main
psoubrie 2024-04-15 17:05:20 +02:00
parent bf78be22ca
commit 2bb6a15180
3 changed files with 140 additions and 63 deletions

Binary file not shown.

View File

@ -2,48 +2,36 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 1,
"metadata": {}, "metadata": {
"metadata": {}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from paddleocr import PaddleOCR\n",
"import os\n", "import os\n",
"import cv2\n", "import cv2\n",
"import xlsxwriter\n", "import xlsxwriter\n",
"import numpy as np\n",
"\n", "\n",
"\n", "\n",
"def ocr_image(image_path, ocr):\n", "def shear_img(image):\n",
" result = ocr.ocr(image_path, cls=True)\n", " rows, cols, _ = image.shape\n",
" return result[0][0][1][0]\n", " M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])\n",
" sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))\n",
" return cv2.flip(sheared_img, 1)\n",
"\n", "\n",
"\n", "\n",
"def cropUI(image_path):\n", "def crop_image(sheared_image, r, target_path):\n",
" # small opencv window to crop the image\n", " cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
" image = cv2.imread(image_path)\n", " return cropped"
" r = cv2.selectROI(image)\n",
" cv2.destroyAllWindows()\n",
"\n",
" return r\n",
"\n",
"\n",
"def cropImage(image_path, r, flip, cropped_folder):\n",
" # crop the image and save it\n",
" image = cv2.imread(image_path)\n",
" cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
"\n",
" # save with new name\n",
" if flip == \"y\":\n",
" cropped = cv2.flip(cropped, 1)\n",
"\n",
" # save in subfolder cropped\n",
" new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n",
" cv2.imwrite(new_name, cropped)\n"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 2,
"metadata": {}, "metadata": {
"metadata": {}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"from datetime import datetime\n", "from datetime import datetime\n",
@ -76,52 +64,139 @@
" pass\n", " pass\n",
" row += 1\n", " row += 1\n",
"\n", "\n",
" workbook.close()\n" " workbook.close()"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 3,
"metadata": {}, "metadata": {
"metadata": {}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"FOLDER = \"data/\"\n", "def peaks(data, boxes=2):\n",
"# ocr settings\n", " # split data in boxes\n",
"os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n", " data = np.array_split(data, boxes)\n",
"ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n",
"\n", "\n",
"# *** start GUIs ***\n", " # get the maximum value in each box\n",
"images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n", " data = [np.max(d) for d in data]\n",
"region = cropUI(os.path.join(FOLDER, images[0]))\n", " data = [d > 150 for d in data]\n",
" return np.array(data).astype(int).tolist()\n",
"\n", "\n",
"flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n",
"\n", "\n",
"cropped_folder = os.path.join(FOLDER, \"cropped\")\n", "digits = {\n",
"if not os.path.exists(cropped_folder):\n", " 0: [[1, 0, 1], [1, 1], [1, 1]],\n",
" os.makedirs(cropped_folder)\n", " 1: [[0, 0, 0], [0, 1], [0, 1]],\n",
" 2: [[1, 1, 1], [0, 1], [1, 0]],\n",
" 3: [[1, 1, 1], [0, 1], [0, 1]],\n",
" 4: [[0, 1, 0], [1, 1], [0, 1]],\n",
" 5: [[1, 1, 1], [1, 0], [0, 1]],\n",
" 6: [[1, 1, 1], [1, 0], [1, 1]],\n",
" 7: [[1, 0, 0], [0, 1], [0, 1]],\n",
" 8: [[1, 1, 1], [1, 1], [1, 1]],\n",
" 9: [[1, 1, 1], [1, 1], [0, 1]],\n",
"}\n",
"\n", "\n",
"# *** start cropping ***\n",
"for image in images:\n",
" cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n",
"\n", "\n",
"# *** start OCR ***\n", "def ownOCR(image):\n",
"cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n", " # get vertical pixel line in the middle of the image\n",
"data = {}\n", " vertical = image[:, image.shape[1] // 2, 0]\n",
"for image in cropped_images:\n",
" try:\n",
" path = os.path.join(cropped_folder, image)\n",
" text = ocr_image(path, ocr)\n",
" except Exception as _:\n",
" print(\"Error in cropped image\")\n",
" continue\n",
"\n", "\n",
" data[image] = text\n" " # get two horizontal lines at 1/3 and 2/3 of the image\n",
" horizontal1 = image[image.shape[0] // 3, :, 0]\n",
" horizontal2 = image[2 * image.shape[0] // 3, :, 0]\n",
"\n",
" # get times it goes above 150, remove subsequent values\n",
" digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]\n",
" digit = [key for key, value in digits.items() if value == digit]\n",
" return digit[0]"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 4,
"metadata": {}, "metadata": {
"metadata": {}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n",
"list index out of range\n"
]
}
],
"source": [
"folder = \"../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/\"\n",
"images = [f for f in os.listdir(folder) if f.endswith(\".jpg\")]\n",
"cropped_folder = os.path.join(folder, \"cropped\")\n",
"os.makedirs(cropped_folder, exist_ok=True)\n",
"\n",
"# Initial image for ROI selection\n",
"init_image_path = os.path.join(folder, images[0])\n",
"init_image = cv2.imread(init_image_path)\n",
"sheared_init_image = shear_img(init_image)\n",
"\n",
"regions = []\n",
"for i in range(5): # Assume 5 regions as in your original code\n",
" r = cv2.selectROI(f\"Select the digit {i+1}\", sheared_init_image)\n",
" cv2.destroyAllWindows()\n",
" regions.append(r)\n",
"\n",
"data = {}\n",
"i = 0\n",
"# Process all images\n",
"for image_name in images:\n",
" try:\n",
" image_path = os.path.join(folder, image_name)\n",
" image = cv2.imread(image_path)\n",
" sheared_image = shear_img(image)\n",
"\n",
" for idx, region in enumerate(regions):\n",
" target_path = os.path.join(cropped_folder, f\"{image_name[:-4]}_{idx+1}.jpg\")\n",
" cropped = crop_image(sheared_image, region, target_path)\n",
" digit = ownOCR(cropped)\n",
"\n",
" if image_name not in data:\n",
" data[image_name] = \"\"\n",
"\n",
" data[image_name] += str(digit)\n",
" except Exception as e:\n",
" print(e)\n",
" continue"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"metadata": {}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"data2excel(data)" "data2excel(data)"
@ -130,7 +205,9 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 12,
"metadata": {}, "metadata": {
"metadata": {}
},
"outputs": [], "outputs": [],
"source": [ "source": [
"# clean excel file\n", "# clean excel file\n",
@ -146,9 +223,10 @@
" row = 0\n", " row = 0\n",
" for key, value in data.items():\n", " for key, value in data.items():\n",
" try:\n", " try:\n",
" value = \"6\"+value[1:]\n",
" worksheet.write(row, 2, float(value[:6]))\n", " worksheet.write(row, 2, float(value[:6]))\n",
" date = key.split(\" \", 1)[1][:-4]\n", " date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n", " writeDate(worksheet, row, 0, date, dateFormat) \n",
" worksheet.write(row, 1, value)\n", " worksheet.write(row, 1, value)\n",
" except Exception as _:\n", " except Exception as _:\n",
" continue\n", " continue\n",
@ -157,6 +235,7 @@
"\n", "\n",
" workbook.close()\n", " workbook.close()\n",
"\n", "\n",
"\n",
"data2cleanexcel(data)" "data2cleanexcel(data)"
] ]
} }

View File

@ -1,2 +0,0 @@
paddlepaddle==2.6.0
paddleocr>=2.0.1