2024-04-10 13:27:48 +00:00
|
|
|
{
|
|
|
|
"cells": [
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-04-15 15:05:20 +00:00
|
|
|
"execution_count": 1,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
2024-04-10 13:27:48 +00:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"import os\n",
|
|
|
|
"import cv2\n",
|
|
|
|
"import xlsxwriter\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"import numpy as np\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"def shear_img(image):\n",
|
|
|
|
" rows, cols, _ = image.shape\n",
|
|
|
|
" M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])\n",
|
|
|
|
" sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))\n",
|
|
|
|
" return cv2.flip(sheared_img, 1)\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"def crop_image(sheared_image, r, target_path):\n",
|
|
|
|
" cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
|
|
|
|
" return cropped"
|
2024-04-10 13:27:48 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-04-15 15:05:20 +00:00
|
|
|
"execution_count": 2,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
2024-04-10 13:27:48 +00:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"from datetime import datetime\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"def writeDate(worksheet, row, column, date, format):\n",
|
|
|
|
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
|
|
|
|
" parsed_datetime = datetime.strptime(date, original_format)\n",
|
|
|
|
"\n",
|
|
|
|
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"def data2excel(data):\n",
|
|
|
|
" # save the data in an excel file\n",
|
|
|
|
" fileName = \"data.xlsx\"\n",
|
|
|
|
" workbook = xlsxwriter.Workbook(fileName)\n",
|
|
|
|
" worksheet = workbook.add_worksheet()\n",
|
|
|
|
"\n",
|
|
|
|
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
|
|
|
"\n",
|
|
|
|
" # write the data\n",
|
|
|
|
" row = 0\n",
|
|
|
|
" for key, value in data.items():\n",
|
|
|
|
" date = key.split(\" \", 1)[1][:-4]\n",
|
|
|
|
" writeDate(worksheet, row, 0, date, dateFormat)\n",
|
|
|
|
" worksheet.write(row, 1, value)\n",
|
|
|
|
" try:\n",
|
|
|
|
" worksheet.write(row, 2, float(value[:6]))\n",
|
|
|
|
" except Exception as _:\n",
|
|
|
|
" pass\n",
|
|
|
|
" row += 1\n",
|
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" workbook.close()"
|
2024-04-10 13:27:48 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-04-15 15:05:20 +00:00
|
|
|
"execution_count": 3,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
2024-04-10 13:27:48 +00:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
2024-04-15 15:05:20 +00:00
|
|
|
"def peaks(data, boxes=2):\n",
|
|
|
|
" # split data in boxes\n",
|
|
|
|
" data = np.array_split(data, boxes)\n",
|
|
|
|
"\n",
|
|
|
|
" # get the maximum value in each box\n",
|
|
|
|
" data = [np.max(d) for d in data]\n",
|
|
|
|
" data = [d > 150 for d in data]\n",
|
|
|
|
" return np.array(data).astype(int).tolist()\n",
|
|
|
|
"\n",
|
|
|
|
"\n",
|
|
|
|
"digits = {\n",
|
|
|
|
" 0: [[1, 0, 1], [1, 1], [1, 1]],\n",
|
|
|
|
" 1: [[0, 0, 0], [0, 1], [0, 1]],\n",
|
|
|
|
" 2: [[1, 1, 1], [0, 1], [1, 0]],\n",
|
|
|
|
" 3: [[1, 1, 1], [0, 1], [0, 1]],\n",
|
|
|
|
" 4: [[0, 1, 0], [1, 1], [0, 1]],\n",
|
|
|
|
" 5: [[1, 1, 1], [1, 0], [0, 1]],\n",
|
|
|
|
" 6: [[1, 1, 1], [1, 0], [1, 1]],\n",
|
|
|
|
" 7: [[1, 0, 0], [0, 1], [0, 1]],\n",
|
|
|
|
" 8: [[1, 1, 1], [1, 1], [1, 1]],\n",
|
|
|
|
" 9: [[1, 1, 1], [1, 1], [0, 1]],\n",
|
|
|
|
"}\n",
|
|
|
|
"\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"def ownOCR(image):\n",
|
|
|
|
" # get vertical pixel line in the middle of the image\n",
|
|
|
|
" vertical = image[:, image.shape[1] // 2, 0]\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" # get two horizontal lines at 1/3 and 2/3 of the image\n",
|
|
|
|
" horizontal1 = image[image.shape[0] // 3, :, 0]\n",
|
|
|
|
" horizontal2 = image[2 * image.shape[0] // 3, :, 0]\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" # get times it goes above 150, remove subsequent values\n",
|
|
|
|
" digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]\n",
|
|
|
|
" digit = [key for key, value in digits.items() if value == digit]\n",
|
|
|
|
" return digit[0]"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 4,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
|
|
|
"outputs": [
|
|
|
|
{
|
|
|
|
"name": "stdout",
|
|
|
|
"output_type": "stream",
|
|
|
|
"text": [
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n",
|
|
|
|
"list index out of range\n"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"source": [
|
|
|
|
"folder = \"../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/\"\n",
|
|
|
|
"images = [f for f in os.listdir(folder) if f.endswith(\".jpg\")]\n",
|
|
|
|
"cropped_folder = os.path.join(folder, \"cropped\")\n",
|
|
|
|
"os.makedirs(cropped_folder, exist_ok=True)\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"# Initial image for ROI selection\n",
|
|
|
|
"init_image_path = os.path.join(folder, images[0])\n",
|
|
|
|
"init_image = cv2.imread(init_image_path)\n",
|
|
|
|
"sheared_init_image = shear_img(init_image)\n",
|
|
|
|
"\n",
|
|
|
|
"regions = []\n",
|
|
|
|
"for i in range(5): # Assume 5 regions as in your original code\n",
|
|
|
|
" r = cv2.selectROI(f\"Select the digit {i+1}\", sheared_init_image)\n",
|
|
|
|
" cv2.destroyAllWindows()\n",
|
|
|
|
" regions.append(r)\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
|
|
|
"data = {}\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"i = 0\n",
|
|
|
|
"# Process all images\n",
|
|
|
|
"for image_name in images:\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
" try:\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" image_path = os.path.join(folder, image_name)\n",
|
|
|
|
" image = cv2.imread(image_path)\n",
|
|
|
|
" sheared_image = shear_img(image)\n",
|
|
|
|
"\n",
|
|
|
|
" for idx, region in enumerate(regions):\n",
|
|
|
|
" target_path = os.path.join(cropped_folder, f\"{image_name[:-4]}_{idx+1}.jpg\")\n",
|
|
|
|
" cropped = crop_image(sheared_image, region, target_path)\n",
|
|
|
|
" digit = ownOCR(cropped)\n",
|
|
|
|
"\n",
|
|
|
|
" if image_name not in data:\n",
|
|
|
|
" data[image_name] = \"\"\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" data[image_name] += str(digit)\n",
|
|
|
|
" except Exception as e:\n",
|
|
|
|
" print(e)\n",
|
|
|
|
" continue"
|
2024-04-10 13:27:48 +00:00
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
2024-04-15 15:05:20 +00:00
|
|
|
"execution_count": 7,
|
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
2024-04-10 13:27:48 +00:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"data2excel(data)"
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"cell_type": "code",
|
|
|
|
"execution_count": 12,
|
2024-04-15 15:05:20 +00:00
|
|
|
"metadata": {
|
|
|
|
"metadata": {}
|
|
|
|
},
|
2024-04-10 13:27:48 +00:00
|
|
|
"outputs": [],
|
|
|
|
"source": [
|
|
|
|
"# clean excel file\n",
|
|
|
|
"def data2cleanexcel(data):\n",
|
|
|
|
" # save the data in an excel file\n",
|
|
|
|
" fileName = \"data_clean.xlsx\"\n",
|
|
|
|
" workbook = xlsxwriter.Workbook(fileName)\n",
|
|
|
|
" worksheet = workbook.add_worksheet()\n",
|
|
|
|
"\n",
|
|
|
|
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
|
|
|
"\n",
|
|
|
|
" # write the data\n",
|
|
|
|
" row = 0\n",
|
|
|
|
" for key, value in data.items():\n",
|
|
|
|
" try:\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" value = \"6\"+value[1:]\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
" worksheet.write(row, 2, float(value[:6]))\n",
|
|
|
|
" date = key.split(\" \", 1)[1][:-4]\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
" writeDate(worksheet, row, 0, date, dateFormat) \n",
|
2024-04-10 13:27:48 +00:00
|
|
|
" worksheet.write(row, 1, value)\n",
|
|
|
|
" except Exception as _:\n",
|
|
|
|
" continue\n",
|
|
|
|
"\n",
|
|
|
|
" row += 1\n",
|
|
|
|
"\n",
|
|
|
|
" workbook.close()\n",
|
|
|
|
"\n",
|
2024-04-15 15:05:20 +00:00
|
|
|
"\n",
|
2024-04-10 13:27:48 +00:00
|
|
|
"data2cleanexcel(data)"
|
|
|
|
]
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"metadata": {
|
|
|
|
"kernelspec": {
|
|
|
|
"display_name": "Python 3",
|
|
|
|
"language": "python",
|
|
|
|
"name": "python3"
|
|
|
|
},
|
|
|
|
"language_info": {
|
|
|
|
"codemirror_mode": {
|
|
|
|
"name": "ipython",
|
|
|
|
"version": 3
|
|
|
|
},
|
|
|
|
"file_extension": ".py",
|
|
|
|
"mimetype": "text/x-python",
|
|
|
|
"name": "python",
|
|
|
|
"nbconvert_exporter": "python",
|
|
|
|
"pygments_lexer": "ipython3",
|
|
|
|
"version": "3.10.14"
|
|
|
|
}
|
|
|
|
},
|
|
|
|
"nbformat": 4,
|
|
|
|
"nbformat_minor": 2
|
|
|
|
}
|