From beeb62025e9ab7aa5d513b5c86fa1fcbad9d9b15 Mon Sep 17 00:00:00 2001 From: psoubrie Date: Mon, 8 Apr 2024 21:23:16 +0200 Subject: [PATCH] done --- .gitignore | 8 ++++ img2xlsx.py | 121 +++++++++++++++++++++++++++++++++++++++++++++++ readme.md | 52 ++++++++++++++++++++ requirements.txt | 2 + 4 files changed, 183 insertions(+) create mode 100644 .gitignore create mode 100644 img2xlsx.py create mode 100644 readme.md create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6322ac8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,8 @@ +*.jpg +*.xlsx +cropped/ +output/ +*.pyd +*.pyc +*.pyo +env/ diff --git a/img2xlsx.py b/img2xlsx.py new file mode 100644 index 0000000..a134645 --- /dev/null +++ b/img2xlsx.py @@ -0,0 +1,121 @@ +from paddleocr import PaddleOCR +import os +import cv2 +import xlsxwriter + + +def ocr_image(image_path, ocr): + result = ocr.ocr(image_path, cls=True) + return result[0][0][1][0] + + +def cropUI(image_path): + # small opencv window to crop the image + image = cv2.imread(image_path) + r = cv2.selectROI(image) + cv2.destroyAllWindows() + + return r + + +def levelUI(image_path, region, initial_threshold=127): + # Callback function for the trackbar + def on_trackbar(val): + _, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY) + cv2.imshow("Binary Image", binary_image) + + # Load and convert the image to grayscale + image = cv2.imread(image_path) + gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + gray_image = gray_image[ + int(region[1]) : int(region[1] + region[3]), + int(region[0]) : int(region[0] + region[2]), + ] + + # Create a window and a trackbar + cv2.namedWindow("Binary Image") + cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar) + + # Initialize display + on_trackbar(initial_threshold) + cv2.waitKey(0) + cv2.destroyAllWindows() + + +def cropImage(image_path, r, threshold, flip, cropped_folder): + # crop the image and save it + image = cv2.imread(image_path) + cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])] + gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY) + gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1] + + # save with new name + if flip == "y": + gray = cv2.flip(gray, 1) + + # save in subfolder cropped + new_name = os.path.join(cropped_folder, image_path.split("/")[-1]) + cv2.imwrite(new_name, gray) + + +def data2excel(data): + # save the data in an excel file + fileName = "data.xlsx" + workbook = xlsxwriter.Workbook(fileName) + worksheet = workbook.add_worksheet() + + # write the data + row = 0 + for key, value in data.items(): + worksheet.write(row, 0, key) + worksheet.write(row, 1, value) + try: + worksheet.write(row, 2, float(value[:6])) + except Exception as _: + pass + row += 1 + + workbook.close() + + +def main(): + # ocr settings + ocr = PaddleOCR(use_angle_cls=True, lang="en") + + # *** start GUIs *** + images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")] + region = cropUI(os.path.join(FOLDER, images[0])) + threshold = levelUI(os.path.join(FOLDER, images[0]), region) + + flip = input("Do you want to flip the images horizontaly? (y/n): ") + + cropped_folder = os.path.join(FOLDER, "cropped") + if not os.path.exists(cropped_folder): + os.makedirs(cropped_folder) + + # *** start cropping *** + for image in images: + cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder) + + # *** start OCR *** + cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")] + data = {} + for image in cropped_images: + try: + path = os.path.join(cropped_folder, image) + text = ocr_image(path, ocr) + except Exception as _: + print("Error in cropped image") + continue + + data[image] = text + + # *** save data in excel *** + data2excel(data) + + print("All images cropped successfully") + + +if __name__ == "__main__": + FOLDER = "." + main() diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..cb00f4e --- /dev/null +++ b/readme.md @@ -0,0 +1,52 @@ +## Astral-sh UV + +### Getting started + +```bash +# On macOS and Linux. +curl -LsSf https://astral.sh/uv/install.sh | sh + +# On Windows. +powershell -c "irm https://astral.sh/uv/install.ps1 | iex" + +# With pip. +pip install uv +``` + +### Create virtual environment + +```bash +uv venv # Create a virtual environment at .venv. +``` + +### Activate virtual environment + +```bash +# On macOS and Linux. +source .venv/bin/activate + +# On Windows. +.venv\Scripts\activate +``` + +# Install packages + +```bash +uv pip install flask # Install Flask. +uv pip install -r requirements.txt # Install from a requirements.txt file. +uv pip install -e . # Install the current project in editable mode. +uv pip install "package @ ." # Install the current project from disk. +uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra. +``` + +### Generate requirements.txt + +```bash +uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment. +``` + +### Update to requirements.txt + +```bash +uv pip sync requirements.txt +``` \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f6cbdea --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +paddlepaddle==2.6.0 +paddleocr>=2.0.1 \ No newline at end of file