diff --git a/.gitignore b/.gitignore index 6322ac8..da3977a 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ output/ *.pyc *.pyo env/ +dist/ +build/ +*.spec +data/ \ No newline at end of file diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..1bb48ec Binary files /dev/null and b/environment.yml differ diff --git a/img2xlsx.ipynb b/img2xlsx.ipynb new file mode 100644 index 0000000..e105970 --- /dev/null +++ b/img2xlsx.ipynb @@ -0,0 +1,185 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from paddleocr import PaddleOCR\n", + "import os\n", + "import cv2\n", + "import xlsxwriter\n", + "\n", + "\n", + "def ocr_image(image_path, ocr):\n", + " result = ocr.ocr(image_path, cls=True)\n", + " return result[0][0][1][0]\n", + "\n", + "\n", + "def cropUI(image_path):\n", + " # small opencv window to crop the image\n", + " image = cv2.imread(image_path)\n", + " r = cv2.selectROI(image)\n", + " cv2.destroyAllWindows()\n", + "\n", + " return r\n", + "\n", + "\n", + "def cropImage(image_path, r, flip, cropped_folder):\n", + " # crop the image and save it\n", + " image = cv2.imread(image_path)\n", + " cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n", + "\n", + " # save with new name\n", + " if flip == \"y\":\n", + " cropped = cv2.flip(cropped, 1)\n", + "\n", + " # save in subfolder cropped\n", + " new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n", + " cv2.imwrite(new_name, cropped)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "\n", + "\n", + "def writeDate(worksheet, row, column, date, format):\n", + " original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n", + " parsed_datetime = datetime.strptime(date, original_format)\n", + "\n", + " worksheet.write_datetime(row, column, parsed_datetime, format)\n", + "\n", + "\n", + "def data2excel(data):\n", + " # save the data in an excel file\n", + " fileName = \"data.xlsx\"\n", + " workbook = xlsxwriter.Workbook(fileName)\n", + " worksheet = workbook.add_worksheet()\n", + "\n", + " dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n", + "\n", + " # write the data\n", + " row = 0\n", + " for key, value in data.items():\n", + " date = key.split(\" \", 1)[1][:-4]\n", + " writeDate(worksheet, row, 0, date, dateFormat)\n", + " worksheet.write(row, 1, value)\n", + " try:\n", + " worksheet.write(row, 2, float(value[:6]))\n", + " except Exception as _:\n", + " pass\n", + " row += 1\n", + "\n", + " workbook.close()\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "FOLDER = \"data/\"\n", + "# ocr settings\n", + "os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n", + "ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n", + "\n", + "# *** start GUIs ***\n", + "images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n", + "region = cropUI(os.path.join(FOLDER, images[0]))\n", + "\n", + "flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n", + "\n", + "cropped_folder = os.path.join(FOLDER, \"cropped\")\n", + "if not os.path.exists(cropped_folder):\n", + " os.makedirs(cropped_folder)\n", + "\n", + "# *** start cropping ***\n", + "for image in images:\n", + " cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n", + "\n", + "# *** start OCR ***\n", + "cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n", + "data = {}\n", + "for image in cropped_images:\n", + " try:\n", + " path = os.path.join(cropped_folder, image)\n", + " text = ocr_image(path, ocr)\n", + " except Exception as _:\n", + " print(\"Error in cropped image\")\n", + " continue\n", + "\n", + " data[image] = text\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "data2excel(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "# clean excel file\n", + "def data2cleanexcel(data):\n", + " # save the data in an excel file\n", + " fileName = \"data_clean.xlsx\"\n", + " workbook = xlsxwriter.Workbook(fileName)\n", + " worksheet = workbook.add_worksheet()\n", + "\n", + " dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n", + "\n", + " # write the data\n", + " row = 0\n", + " for key, value in data.items():\n", + " try:\n", + " worksheet.write(row, 2, float(value[:6]))\n", + " date = key.split(\" \", 1)[1][:-4]\n", + " writeDate(worksheet, row, 0, date, dateFormat)\n", + " worksheet.write(row, 1, value)\n", + " except Exception as _:\n", + " continue\n", + "\n", + " row += 1\n", + "\n", + " workbook.close()\n", + "\n", + "data2cleanexcel(data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/img2xlsx.py b/img2xlsx.py index a134645..d9a2d8f 100644 --- a/img2xlsx.py +++ b/img2xlsx.py @@ -18,44 +18,18 @@ def cropUI(image_path): return r -def levelUI(image_path, region, initial_threshold=127): - # Callback function for the trackbar - def on_trackbar(val): - _, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY) - cv2.imshow("Binary Image", binary_image) - - # Load and convert the image to grayscale - image = cv2.imread(image_path) - gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) - gray_image = gray_image[ - int(region[1]) : int(region[1] + region[3]), - int(region[0]) : int(region[0] + region[2]), - ] - - # Create a window and a trackbar - cv2.namedWindow("Binary Image") - cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar) - - # Initialize display - on_trackbar(initial_threshold) - cv2.waitKey(0) - cv2.destroyAllWindows() - - -def cropImage(image_path, r, threshold, flip, cropped_folder): +def cropImage(image_path, r, flip, cropped_folder): # crop the image and save it image = cv2.imread(image_path) cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])] - gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) - gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1] # save with new name if flip == "y": - gray = cv2.flip(gray, 1) + cropped = cv2.flip(cropped, 1) # save in subfolder cropped new_name = os.path.join(cropped_folder, image_path.split("/")[-1]) - cv2.imwrite(new_name, gray) + cv2.imwrite(new_name, cropped) def data2excel(data): @@ -67,7 +41,8 @@ def data2excel(data): # write the data row = 0 for key, value in data.items(): - worksheet.write(row, 0, key) + date = key.split(" ", 1)[1][:-4] + worksheet.write(row, 0, date) worksheet.write(row, 1, value) try: worksheet.write(row, 2, float(value[:6])) @@ -80,12 +55,12 @@ def data2excel(data): def main(): # ocr settings + os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE" ocr = PaddleOCR(use_angle_cls=True, lang="en") # *** start GUIs *** images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")] region = cropUI(os.path.join(FOLDER, images[0])) - threshold = levelUI(os.path.join(FOLDER, images[0]), region) flip = input("Do you want to flip the images horizontaly? (y/n): ") @@ -95,7 +70,7 @@ def main(): # *** start cropping *** for image in images: - cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder) + cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder) # *** start OCR *** cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")] @@ -117,5 +92,5 @@ def main(): if __name__ == "__main__": - FOLDER = "." + FOLDER = "data/" main() diff --git a/readme.md b/readme.md index cb00f4e..0f17a45 100644 --- a/readme.md +++ b/readme.md @@ -1,52 +1,34 @@ -## Astral-sh UV - -### Getting started - +## Conda ```bash -# On macOS and Linux. -curl -LsSf https://astral.sh/uv/install.sh | sh +# create environment +conda create -n python=3.8 -# On Windows. -powershell -c "irm https://astral.sh/uv/install.ps1 | iex" +# activate environment +conda activate -# With pip. -pip install uv +# deactivate environment +conda deactivate ``` -### Create virtual environment - +#### Recreate environment ```bash -uv venv # Create a virtual environment at .venv. +conda env create -f environment.yml ``` -### Activate virtual environment - +#### Update environment ```bash -# On macOS and Linux. -source .venv/bin/activate +# save updated environment.yml +conda env export > environment.yml -# On Windows. -.venv\Scripts\activate +# update environment with environment.yml +conda env update -f environment.yml ``` -# Install packages - +#### Conda shell ```bash -uv pip install flask # Install Flask. -uv pip install -r requirements.txt # Install from a requirements.txt file. -uv pip install -e . # Install the current project in editable mode. -uv pip install "package @ ." # Install the current project from disk. -uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra. -``` +conda init (-all) +# then restart terminal -### Generate requirements.txt - -```bash -uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment. -``` - -### Update to requirements.txt - -```bash -uv pip sync requirements.txt +# exit conda shell +conda deactivate ``` \ No newline at end of file