first results

main
psoubrie 2024-04-10 15:27:48 +02:00
parent beeb62025e
commit bf78be22ca
5 changed files with 216 additions and 70 deletions

4
.gitignore vendored
View File

@ -6,3 +6,7 @@ output/
*.pyc *.pyc
*.pyo *.pyo
env/ env/
dist/
build/
*.spec
data/

BIN
environment.yml Normal file

Binary file not shown.

185
img2xlsx.ipynb Normal file
View File

@ -0,0 +1,185 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from paddleocr import PaddleOCR\n",
"import os\n",
"import cv2\n",
"import xlsxwriter\n",
"\n",
"\n",
"def ocr_image(image_path, ocr):\n",
" result = ocr.ocr(image_path, cls=True)\n",
" return result[0][0][1][0]\n",
"\n",
"\n",
"def cropUI(image_path):\n",
" # small opencv window to crop the image\n",
" image = cv2.imread(image_path)\n",
" r = cv2.selectROI(image)\n",
" cv2.destroyAllWindows()\n",
"\n",
" return r\n",
"\n",
"\n",
"def cropImage(image_path, r, flip, cropped_folder):\n",
" # crop the image and save it\n",
" image = cv2.imread(image_path)\n",
" cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
"\n",
" # save with new name\n",
" if flip == \"y\":\n",
" cropped = cv2.flip(cropped, 1)\n",
"\n",
" # save in subfolder cropped\n",
" new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n",
" cv2.imwrite(new_name, cropped)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"\n",
"def writeDate(worksheet, row, column, date, format):\n",
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
" parsed_datetime = datetime.strptime(date, original_format)\n",
"\n",
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
"\n",
"\n",
"def data2excel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n",
" worksheet.write(row, 1, value)\n",
" try:\n",
" worksheet.write(row, 2, float(value[:6]))\n",
" except Exception as _:\n",
" pass\n",
" row += 1\n",
"\n",
" workbook.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"FOLDER = \"data/\"\n",
"# ocr settings\n",
"os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n",
"ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n",
"\n",
"# *** start GUIs ***\n",
"images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n",
"region = cropUI(os.path.join(FOLDER, images[0]))\n",
"\n",
"flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n",
"\n",
"cropped_folder = os.path.join(FOLDER, \"cropped\")\n",
"if not os.path.exists(cropped_folder):\n",
" os.makedirs(cropped_folder)\n",
"\n",
"# *** start cropping ***\n",
"for image in images:\n",
" cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n",
"\n",
"# *** start OCR ***\n",
"cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n",
"data = {}\n",
"for image in cropped_images:\n",
" try:\n",
" path = os.path.join(cropped_folder, image)\n",
" text = ocr_image(path, ocr)\n",
" except Exception as _:\n",
" print(\"Error in cropped image\")\n",
" continue\n",
"\n",
" data[image] = text\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data2excel(data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# clean excel file\n",
"def data2cleanexcel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data_clean.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" try:\n",
" worksheet.write(row, 2, float(value[:6]))\n",
" date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n",
" worksheet.write(row, 1, value)\n",
" except Exception as _:\n",
" continue\n",
"\n",
" row += 1\n",
"\n",
" workbook.close()\n",
"\n",
"data2cleanexcel(data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -18,44 +18,18 @@ def cropUI(image_path):
return r return r
def levelUI(image_path, region, initial_threshold=127): def cropImage(image_path, r, flip, cropped_folder):
# Callback function for the trackbar
def on_trackbar(val):
_, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY)
cv2.imshow("Binary Image", binary_image)
# Load and convert the image to grayscale
image = cv2.imread(image_path)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_image = gray_image[
int(region[1]) : int(region[1] + region[3]),
int(region[0]) : int(region[0] + region[2]),
]
# Create a window and a trackbar
cv2.namedWindow("Binary Image")
cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar)
# Initialize display
on_trackbar(initial_threshold)
cv2.waitKey(0)
cv2.destroyAllWindows()
def cropImage(image_path, r, threshold, flip, cropped_folder):
# crop the image and save it # crop the image and save it
image = cv2.imread(image_path) image = cv2.imread(image_path)
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])] cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
# save with new name # save with new name
if flip == "y": if flip == "y":
gray = cv2.flip(gray, 1) cropped = cv2.flip(cropped, 1)
# save in subfolder cropped # save in subfolder cropped
new_name = os.path.join(cropped_folder, image_path.split("/")[-1]) new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
cv2.imwrite(new_name, gray) cv2.imwrite(new_name, cropped)
def data2excel(data): def data2excel(data):
@ -67,7 +41,8 @@ def data2excel(data):
# write the data # write the data
row = 0 row = 0
for key, value in data.items(): for key, value in data.items():
worksheet.write(row, 0, key) date = key.split(" ", 1)[1][:-4]
worksheet.write(row, 0, date)
worksheet.write(row, 1, value) worksheet.write(row, 1, value)
try: try:
worksheet.write(row, 2, float(value[:6])) worksheet.write(row, 2, float(value[:6]))
@ -80,12 +55,12 @@ def data2excel(data):
def main(): def main():
# ocr settings # ocr settings
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
ocr = PaddleOCR(use_angle_cls=True, lang="en") ocr = PaddleOCR(use_angle_cls=True, lang="en")
# *** start GUIs *** # *** start GUIs ***
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")] images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
region = cropUI(os.path.join(FOLDER, images[0])) region = cropUI(os.path.join(FOLDER, images[0]))
threshold = levelUI(os.path.join(FOLDER, images[0]), region)
flip = input("Do you want to flip the images horizontaly? (y/n): ") flip = input("Do you want to flip the images horizontaly? (y/n): ")
@ -95,7 +70,7 @@ def main():
# *** start cropping *** # *** start cropping ***
for image in images: for image in images:
cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder) cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)
# *** start OCR *** # *** start OCR ***
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")] cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
@ -117,5 +92,5 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
FOLDER = "." FOLDER = "data/"
main() main()

View File

@ -1,52 +1,34 @@
## Astral-sh UV ## Conda
### Getting started
```bash ```bash
# On macOS and Linux. # create environment
curl -LsSf https://astral.sh/uv/install.sh | sh conda create -n <name-env> python=3.8
# On Windows. # activate environment
powershell -c "irm https://astral.sh/uv/install.ps1 | iex" conda activate <name-env>
# With pip. # deactivate environment
pip install uv conda deactivate
``` ```
### Create virtual environment #### Recreate environment
```bash ```bash
uv venv # Create a virtual environment at .venv. conda env create -f environment.yml
``` ```
### Activate virtual environment #### Update environment
```bash ```bash
# On macOS and Linux. # save updated environment.yml
source .venv/bin/activate conda env export > environment.yml
# On Windows. # update environment with environment.yml
.venv\Scripts\activate conda env update -f environment.yml
``` ```
# Install packages #### Conda shell
```bash ```bash
uv pip install flask # Install Flask. conda init (-all)
uv pip install -r requirements.txt # Install from a requirements.txt file. # then restart terminal
uv pip install -e . # Install the current project in editable mode.
uv pip install "package @ ." # Install the current project from disk. # exit conda shell
uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra. conda deactivate
```
### Generate requirements.txt
```bash
uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment.
```
### Update to requirements.txt
```bash
uv pip sync requirements.txt
``` ```