From bf78be22cab617384953e118fa343a56b979b0e5 Mon Sep 17 00:00:00 2001 From: psoubrie Date: Wed, 10 Apr 2024 15:27:48 +0200 Subject: [PATCH] first results --- .gitignore | 4 ++ environment.yml | Bin 0 -> 13854 bytes img2xlsx.ipynb | 185 ++++++++++++++++++++++++++++++++++++++++++++++++ img2xlsx.py | 41 +++-------- readme.md | 56 +++++---------- 5 files changed, 216 insertions(+), 70 deletions(-) create mode 100644 environment.yml create mode 100644 img2xlsx.ipynb diff --git a/.gitignore b/.gitignore index 6322ac8..da3977a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ output/ *.pyc *.pyo env/ +dist/ +build/ +*.spec +data/ \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..1bb48ec606bb2978c35fa0fb934016ef14f310fb GIT binary patch literal 13854 zcmche+fF0P6^8pdQr=prOhx~Z7eREB*&G$G2M~q9;;W;UP_Xo zsHWmDZPDIH`Ug>C&nrFgExm?m)rly1-Dv$2Q66QhHYo>s2Fv~o2c+FdUTmB5;T!ZH z>f51y=I=5cb;$H4IUbYM(8bokf~78}nJ!p$pl67&jX2Ep;AxrQyfLVcHP8@k|4+x1BNi4VMb*+JQqqHAsYb*i;z$*W^ozl;fDf2{S%n~ii^ zrTDnlXL$NepFYXTL>VultRGjXC)E!I22myiRiQVEW=%0){KYSLQXA(pWWcCBL< z+k{?+B!bhko}P*WUJh;AnUvQeYMa+4dywHuRf}~$vo;>NN%piR)M{+IR>ZhIMC3Z7 z_SeT*wUc*|2Wt=?k;D7xs5AGXoT@Jj zfnFPN9LT1#>W^gKt^Qi;x0T)pT8k$i-laTFnMD@gNNS?4tIo8!lZoz7Yw=npy~z$N z1_xX0T-;~+AxF~Mc*?QrY7LK@WbvV7N9M9p1`j-7KJM&}I`$yD#Zd^eA zd_)yFzt$7Tdz$hjDqz2ypKBdFc`ILUC8<4)4XEmE>@X^#)6^U3Nu-DM>U_ry;TmFPKX{uT6)8_A1pP z>ffCx$i!nwc%dhG7Bq-nO7sRwRap zlJ8PdI401Bc#4cdlXK|;vYkkmykgh<={C-Y<7Ab{%!U4f76b?~C@pFZpxKB_Bq1M& z%#moJBOkgb$%k#KWPxbvbxRzP*F)BQl+2)5tZ^f&u#?(|BYDif_y=j4NQ>+|m(3=6 zA9c!r2`FB#S({sjCop!0kEJt2VRLOVx z4J^~ zbC#hRfYNUjDa6TYEWk?w!#CN`1QO39-%|-m@p@ zKVRRPJYwSXc`h|Vz%Rsq@c2_Y5+6;ZNgoGYP2@qc`Aq+I`75|Sr>zTduWwJI6{+1Fg%x@IgMHmwGPF&Yk_DC*pNiZT4pH{YWd(LFlWWUF}z`uu%3; z_rPs^^k5djBocH0GU=yB6QfRKEoL=`HTs%|nrksz`n794NfV|9)E4Z>i9~kPiL8aEqbq2?%VrCo&CWc;YmKCqrCK95yWt& zTm@TjuJAJ}($$cquVwpcyG{7ic-zRmjU2k_|Dz&`*&dW&{uAj6>R^|DncnHD-AKzb zN-7}p5%9WE3zH^uIaQ^@cI?PM;HL}%Gi%aH%AvE40cYO zwQ;7zY^4jc`CKZb#|EY)2bd5uQ}_s-yy&|d)fIOD_EMGsE&9v9lZ4eASYx9ZG8jLQiD$}S z5Djw^5N^2!n1#+8=hRHKKr)VFV%XEpe1bv^7nW^~7CzWPe5$cYsZ|FeJpbwCgE)x2GX3Q9m*jjOnoIEIB#Mi(MH0- zkKO6hyFTQ*D-=}Z)i70@vQ`8w^_8cvKl((EDRVxLhMx7pCAP_ z^_?A;t7@!VR;k<%&~9W6rdyl>;+61Zv+A&aU~f%Sf?@M`jGfW=9nbp|4DIO=-SLoH zpoyl4VC?TYUC%k-j#KCGUq20yI?hn2GBrjpjy+Z_ zXiwZZ-&m&V@*)}8jngS=%hkDXnxcR%OP?+6$GsEu{hZbB6zkkxpdW$B8Ue$ub$?3uyfv2^8$RvG2BbRem~bXcfa z9U@D?@t}F8sCcrB&zjq{WaHMFt1Z#YzQDSfTdCKJC_JC1^M|=NISK0~4(DmE*?x2bx}*+L)9UkmupqfvOLSN}PUoo^%OsC!?I$D*jhJn6lZZ)GyGC>K z354jUVvBaqJ2(2@m(-i@bJc=I=m5QPt(EC zmOG+?TdC9+?jvw!SmtT$bvbA7?LeQovUbEc`jnk)qbTxu3Qzg0hFrihM)AJ6(9G=c zZhr5M9>ofWyt`5KPZhDADS!<6SMH56uRfKxhQu^}0p-!H$-Vadf~~#ijae@!`&IISI2n7Xjzj=j+|$CA4nA<(s}i@<+HS6bt z7}yyt*eim$=wMCXLBaFavWD-EQ9V4}FcCp%MouL?Ynf(%6r6^XE{G?h4xOP?)l^_ znbF7EzR`|!winorNbX~?ug=ciQa6uX7&= zYf4j&=JWfscCYz}2S|`Lv9{NBeIuK?ybB_#`#atQCqRRw^;EJcDRDri#{X=fibS8O zYg9?$$qIMh=uY0d87~vr-8N*UHr+rq94yFAD<*%NRMS8@p@XYwx6ZGP9Wfl@-t9Wk zM_uUFg$_c;=Cpx4n!no_D&w85HQyvJwkC6$QSq_vH3b&%Nh`Z`?tY`CF%D~SvSKgp Ll=uIS|E={uvD<*K literal 0 HcmV?d00001 diff --git a/img2xlsx.ipynb b/img2xlsx.ipynb new file mode 100644 index 0000000..e105970 --- /dev/null +++ b/img2xlsx.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from paddleocr import PaddleOCR\n", + "import os\n", + "import cv2\n", + "import xlsxwriter\n", + "\n", + "\n", + "def ocr_image(image_path, ocr):\n", + " result = ocr.ocr(image_path, cls=True)\n", + " return result[0][0][1][0]\n", + "\n", + "\n", + "def cropUI(image_path):\n", + " # small opencv window to crop the image\n", + " image = cv2.imread(image_path)\n", + " r = cv2.selectROI(image)\n", + " cv2.destroyAllWindows()\n", + "\n", + " return r\n", + "\n", + "\n", + "def cropImage(image_path, r, flip, cropped_folder):\n", + " # crop the image and save it\n", + " image = cv2.imread(image_path)\n", + " cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n", + "\n", + " # save with new name\n", + " if flip == \"y\":\n", + " cropped = cv2.flip(cropped, 1)\n", + "\n", + " # save in subfolder cropped\n", + " new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n", + " cv2.imwrite(new_name, cropped)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "\n", + "def writeDate(worksheet, row, column, date, format):\n", + " original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n", + " parsed_datetime = datetime.strptime(date, original_format)\n", + "\n", + " worksheet.write_datetime(row, column, parsed_datetime, format)\n", + "\n", + "\n", + "def data2excel(data):\n", + " # save the data in an excel file\n", + " fileName = \"data.xlsx\"\n", + " workbook = xlsxwriter.Workbook(fileName)\n", + " worksheet = workbook.add_worksheet()\n", + "\n", + " dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n", + "\n", + " # write the data\n", + " row = 0\n", + " for key, value in data.items():\n", + " date = key.split(\" \", 1)[1][:-4]\n", + " writeDate(worksheet, row, 0, date, dateFormat)\n", + " worksheet.write(row, 1, value)\n", + " try:\n", + " worksheet.write(row, 2, float(value[:6]))\n", + " except Exception as _:\n", + " pass\n", + " row += 1\n", + "\n", + " workbook.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FOLDER = \"data/\"\n", + "# ocr settings\n", + "os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n", + "ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n", + "\n", + "# *** start GUIs ***\n", + "images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n", + "region = cropUI(os.path.join(FOLDER, images[0]))\n", + "\n", + "flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n", + "\n", + "cropped_folder = os.path.join(FOLDER, \"cropped\")\n", + "if not os.path.exists(cropped_folder):\n", + " os.makedirs(cropped_folder)\n", + "\n", + "# *** start cropping ***\n", + "for image in images:\n", + " cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n", + "\n", + "# *** start OCR ***\n", + "cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n", + "data = {}\n", + "for image in cropped_images:\n", + " try:\n", + " path = os.path.join(cropped_folder, image)\n", + " text = ocr_image(path, ocr)\n", + " except Exception as _:\n", + " print(\"Error in cropped image\")\n", + " continue\n", + "\n", + " data[image] = text\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data2excel(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# clean excel file\n", + "def data2cleanexcel(data):\n", + " # save the data in an excel file\n", + " fileName = \"data_clean.xlsx\"\n", + " workbook = xlsxwriter.Workbook(fileName)\n", + " worksheet = workbook.add_worksheet()\n", + "\n", + " dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n", + "\n", + " # write the data\n", + " row = 0\n", + " for key, value in data.items():\n", + " try:\n", + " worksheet.write(row, 2, float(value[:6]))\n", + " date = key.split(\" \", 1)[1][:-4]\n", + " writeDate(worksheet, row, 0, date, dateFormat)\n", + " worksheet.write(row, 1, value)\n", + " except Exception as _:\n", + " continue\n", + "\n", + " row += 1\n", + "\n", + " workbook.close()\n", + "\n", + "data2cleanexcel(data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/img2xlsx.py b/img2xlsx.py index a134645..d9a2d8f 100644 --- a/img2xlsx.py +++ b/img2xlsx.py @@ -18,44 +18,18 @@ def cropUI(image_path): return r -def levelUI(image_path, region, initial_threshold=127): - # Callback function for the trackbar - def on_trackbar(val): - _, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY) - cv2.imshow("Binary Image", binary_image) - - # Load and convert the image to grayscale - image = cv2.imread(image_path) - gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - gray_image = gray_image[ - int(region[1]) : int(region[1] + region[3]), - int(region[0]) : int(region[0] + region[2]), - ] - - # Create a window and a trackbar - cv2.namedWindow("Binary Image") - cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar) - - # Initialize display - on_trackbar(initial_threshold) - cv2.waitKey(0) - cv2.destroyAllWindows() - - -def cropImage(image_path, r, threshold, flip, cropped_folder): +def cropImage(image_path, r, flip, cropped_folder): # crop the image and save it image = cv2.imread(image_path) cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])] - gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) - gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1] # save with new name if flip == "y": - gray = cv2.flip(gray, 1) + cropped = cv2.flip(cropped, 1) # save in subfolder cropped new_name = os.path.join(cropped_folder, image_path.split("/")[-1]) - cv2.imwrite(new_name, gray) + cv2.imwrite(new_name, cropped) def data2excel(data): @@ -67,7 +41,8 @@ def data2excel(data): # write the data row = 0 for key, value in data.items(): - worksheet.write(row, 0, key) + date = key.split(" ", 1)[1][:-4] + worksheet.write(row, 0, date) worksheet.write(row, 1, value) try: worksheet.write(row, 2, float(value[:6])) @@ -80,12 +55,12 @@ def data2excel(data): def main(): # ocr settings + os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" ocr = PaddleOCR(use_angle_cls=True, lang="en") # *** start GUIs *** images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")] region = cropUI(os.path.join(FOLDER, images[0])) - threshold = levelUI(os.path.join(FOLDER, images[0]), region) flip = input("Do you want to flip the images horizontaly? (y/n): ") @@ -95,7 +70,7 @@ def main(): # *** start cropping *** for image in images: - cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder) + cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder) # *** start OCR *** cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")] @@ -117,5 +92,5 @@ def main(): if __name__ == "__main__": - FOLDER = "." + FOLDER = "data/" main() diff --git a/readme.md b/readme.md index cb00f4e..0f17a45 100644 --- a/readme.md +++ b/readme.md @@ -1,52 +1,34 @@ -## Astral-sh UV - -### Getting started - +## Conda ```bash -# On macOS and Linux. -curl -LsSf https://astral.sh/uv/install.sh | sh +# create environment +conda create -n python=3.8 -# On Windows. -powershell -c "irm https://astral.sh/uv/install.ps1 | iex" +# activate environment +conda activate -# With pip. -pip install uv +# deactivate environment +conda deactivate ``` -### Create virtual environment - +#### Recreate environment ```bash -uv venv # Create a virtual environment at .venv. +conda env create -f environment.yml ``` -### Activate virtual environment - +#### Update environment ```bash -# On macOS and Linux. -source .venv/bin/activate +# save updated environment.yml +conda env export > environment.yml -# On Windows. -.venv\Scripts\activate +# update environment with environment.yml +conda env update -f environment.yml ``` -# Install packages - +#### Conda shell ```bash -uv pip install flask # Install Flask. -uv pip install -r requirements.txt # Install from a requirements.txt file. -uv pip install -e . # Install the current project in editable mode. -uv pip install "package @ ." # Install the current project from disk. -uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra. -``` +conda init (-all) +# then restart terminal -### Generate requirements.txt - -```bash -uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment. -``` - -### Update to requirements.txt - -```bash -uv pip sync requirements.txt +# exit conda shell +conda deactivate ``` \ No newline at end of file