AI_logging/img2xlsx.ipynb

265 lines
7.8 KiB
Plaintext
Raw Normal View History

2024-04-10 13:27:48 +00:00
{
"cells": [
{
"cell_type": "code",
2024-04-15 15:05:20 +00:00
"execution_count": 1,
"metadata": {
"metadata": {}
},
2024-04-10 13:27:48 +00:00
"outputs": [],
"source": [
"import os\n",
"import cv2\n",
"import xlsxwriter\n",
2024-04-15 15:05:20 +00:00
"import numpy as np\n",
2024-04-10 13:27:48 +00:00
"\n",
"\n",
2024-04-15 15:05:20 +00:00
"def shear_img(image):\n",
" rows, cols, _ = image.shape\n",
" M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])\n",
" sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))\n",
" return cv2.flip(sheared_img, 1)\n",
2024-04-10 13:27:48 +00:00
"\n",
"\n",
2024-04-15 15:05:20 +00:00
"def crop_image(sheared_image, r, target_path):\n",
" cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
" return cropped"
2024-04-10 13:27:48 +00:00
]
},
{
"cell_type": "code",
2024-04-15 15:05:20 +00:00
"execution_count": 2,
"metadata": {
"metadata": {}
},
2024-04-10 13:27:48 +00:00
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"\n",
"def writeDate(worksheet, row, column, date, format):\n",
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
" parsed_datetime = datetime.strptime(date, original_format)\n",
"\n",
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
"\n",
"\n",
"def data2excel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n",
" worksheet.write(row, 1, value)\n",
" try:\n",
" worksheet.write(row, 2, float(value[:6]))\n",
" except Exception as _:\n",
" pass\n",
" row += 1\n",
"\n",
2024-04-15 15:05:20 +00:00
" workbook.close()"
2024-04-10 13:27:48 +00:00
]
},
{
"cell_type": "code",
2024-04-15 15:05:20 +00:00
"execution_count": 3,
"metadata": {
"metadata": {}
},
2024-04-10 13:27:48 +00:00
"outputs": [],
"source": [
2024-04-15 15:05:20 +00:00
"def peaks(data, boxes=2):\n",
" # split data in boxes\n",
" data = np.array_split(data, boxes)\n",
"\n",
" # get the maximum value in each box\n",
" data = [np.max(d) for d in data]\n",
" data = [d > 150 for d in data]\n",
" return np.array(data).astype(int).tolist()\n",
"\n",
"\n",
"digits = {\n",
" 0: [[1, 0, 1], [1, 1], [1, 1]],\n",
" 1: [[0, 0, 0], [0, 1], [0, 1]],\n",
" 2: [[1, 1, 1], [0, 1], [1, 0]],\n",
" 3: [[1, 1, 1], [0, 1], [0, 1]],\n",
" 4: [[0, 1, 0], [1, 1], [0, 1]],\n",
" 5: [[1, 1, 1], [1, 0], [0, 1]],\n",
" 6: [[1, 1, 1], [1, 0], [1, 1]],\n",
" 7: [[1, 0, 0], [0, 1], [0, 1]],\n",
" 8: [[1, 1, 1], [1, 1], [1, 1]],\n",
" 9: [[1, 1, 1], [1, 1], [0, 1]],\n",
"}\n",
"\n",
2024-04-10 13:27:48 +00:00
"\n",
2024-04-15 15:05:20 +00:00
"def ownOCR(image):\n",
" # get vertical pixel line in the middle of the image\n",
" vertical = image[:, image.shape[1] // 2, 0]\n",
2024-04-10 13:27:48 +00:00
"\n",
2024-04-15 15:05:20 +00:00
" # get two horizontal lines at 1/3 and 2/3 of the image\n",
" horizontal1 = image[image.shape[0] // 3, :, 0]\n",
" horizontal2 = image[2 * image.shape[0] // 3, :, 0]\n",
2024-04-10 13:27:48 +00:00
"\n",
2024-04-15 15:05:20 +00:00
" # get times it goes above 150, remove subsequent values\n",
" digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]\n",
" digit = [key for key, value in digits.items() if value == digit]\n",
" return digit[0]"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"metadata": {}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n"
]
}
],
"source": [
"folder = \"../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/\"\n",
"images = [f for f in os.listdir(folder) if f.endswith(\".jpg\")]\n",
"cropped_folder = os.path.join(folder, \"cropped\")\n",
"os.makedirs(cropped_folder, exist_ok=True)\n",
2024-04-10 13:27:48 +00:00
"\n",
2024-04-15 15:05:20 +00:00
"# Initial image for ROI selection\n",
"init_image_path = os.path.join(folder, images[0])\n",
"init_image = cv2.imread(init_image_path)\n",
"sheared_init_image = shear_img(init_image)\n",
"\n",
"regions = []\n",
"for i in range(5): # Assume 5 regions as in your original code\n",
" r = cv2.selectROI(f\"Select the digit {i+1}\", sheared_init_image)\n",
" cv2.destroyAllWindows()\n",
" regions.append(r)\n",
2024-04-10 13:27:48 +00:00
"\n",
"data = {}\n",
2024-04-15 15:05:20 +00:00
"i = 0\n",
"# Process all images\n",
"for image_name in images:\n",
2024-04-10 13:27:48 +00:00
" try:\n",
2024-04-15 15:05:20 +00:00
" image_path = os.path.join(folder, image_name)\n",
" image = cv2.imread(image_path)\n",
" sheared_image = shear_img(image)\n",
"\n",
" for idx, region in enumerate(regions):\n",
" target_path = os.path.join(cropped_folder, f\"{image_name[:-4]}_{idx+1}.jpg\")\n",
" cropped = crop_image(sheared_image, region, target_path)\n",
" digit = ownOCR(cropped)\n",
"\n",
" if image_name not in data:\n",
" data[image_name] = \"\"\n",
2024-04-10 13:27:48 +00:00
"\n",
2024-04-15 15:05:20 +00:00
" data[image_name] += str(digit)\n",
" except Exception as e:\n",
" print(e)\n",
" continue"
2024-04-10 13:27:48 +00:00
]
},
{
"cell_type": "code",
2024-04-15 15:05:20 +00:00
"execution_count": 7,
"metadata": {
"metadata": {}
},
2024-04-10 13:27:48 +00:00
"outputs": [],
"source": [
"data2excel(data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
2024-04-15 15:05:20 +00:00
"metadata": {
"metadata": {}
},
2024-04-10 13:27:48 +00:00
"outputs": [],
"source": [
"# clean excel file\n",
"def data2cleanexcel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data_clean.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" try:\n",
2024-04-15 15:05:20 +00:00
" value = \"6\"+value[1:]\n",
2024-04-10 13:27:48 +00:00
" worksheet.write(row, 2, float(value[:6]))\n",
" date = key.split(\" \", 1)[1][:-4]\n",
2024-04-15 15:05:20 +00:00
" writeDate(worksheet, row, 0, date, dateFormat) \n",
2024-04-10 13:27:48 +00:00
" worksheet.write(row, 1, value)\n",
" except Exception as _:\n",
" continue\n",
"\n",
" row += 1\n",
"\n",
" workbook.close()\n",
"\n",
2024-04-15 15:05:20 +00:00
"\n",
2024-04-10 13:27:48 +00:00
"data2cleanexcel(data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}