first results

main
psoubrie 2024-04-10 15:27:48 +02:00
parent beeb62025e
commit bf78be22ca
5 changed files with 216 additions and 70 deletions

4
.gitignore vendored
View File

@ -6,3 +6,7 @@ output/
*.pyc
*.pyo
env/
dist/
build/
*.spec
data/

BIN
environment.yml Normal file

Binary file not shown.

185
img2xlsx.ipynb Normal file
View File

@ -0,0 +1,185 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from paddleocr import PaddleOCR\n",
"import os\n",
"import cv2\n",
"import xlsxwriter\n",
"\n",
"\n",
"def ocr_image(image_path, ocr):\n",
" result = ocr.ocr(image_path, cls=True)\n",
" return result[0][0][1][0]\n",
"\n",
"\n",
"def cropUI(image_path):\n",
" # small opencv window to crop the image\n",
" image = cv2.imread(image_path)\n",
" r = cv2.selectROI(image)\n",
" cv2.destroyAllWindows()\n",
"\n",
" return r\n",
"\n",
"\n",
"def cropImage(image_path, r, flip, cropped_folder):\n",
" # crop the image and save it\n",
" image = cv2.imread(image_path)\n",
" cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
"\n",
" # save with new name\n",
" if flip == \"y\":\n",
" cropped = cv2.flip(cropped, 1)\n",
"\n",
" # save in subfolder cropped\n",
" new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n",
" cv2.imwrite(new_name, cropped)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"\n",
"\n",
"def writeDate(worksheet, row, column, date, format):\n",
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
" parsed_datetime = datetime.strptime(date, original_format)\n",
"\n",
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
"\n",
"\n",
"def data2excel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n",
" worksheet.write(row, 1, value)\n",
" try:\n",
" worksheet.write(row, 2, float(value[:6]))\n",
" except Exception as _:\n",
" pass\n",
" row += 1\n",
"\n",
" workbook.close()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"FOLDER = \"data/\"\n",
"# ocr settings\n",
"os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n",
"ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n",
"\n",
"# *** start GUIs ***\n",
"images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n",
"region = cropUI(os.path.join(FOLDER, images[0]))\n",
"\n",
"flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n",
"\n",
"cropped_folder = os.path.join(FOLDER, \"cropped\")\n",
"if not os.path.exists(cropped_folder):\n",
" os.makedirs(cropped_folder)\n",
"\n",
"# *** start cropping ***\n",
"for image in images:\n",
" cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n",
"\n",
"# *** start OCR ***\n",
"cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n",
"data = {}\n",
"for image in cropped_images:\n",
" try:\n",
" path = os.path.join(cropped_folder, image)\n",
" text = ocr_image(path, ocr)\n",
" except Exception as _:\n",
" print(\"Error in cropped image\")\n",
" continue\n",
"\n",
" data[image] = text\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data2excel(data)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# clean excel file\n",
"def data2cleanexcel(data):\n",
" # save the data in an excel file\n",
" fileName = \"data_clean.xlsx\"\n",
" workbook = xlsxwriter.Workbook(fileName)\n",
" worksheet = workbook.add_worksheet()\n",
"\n",
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
"\n",
" # write the data\n",
" row = 0\n",
" for key, value in data.items():\n",
" try:\n",
" worksheet.write(row, 2, float(value[:6]))\n",
" date = key.split(\" \", 1)[1][:-4]\n",
" writeDate(worksheet, row, 0, date, dateFormat)\n",
" worksheet.write(row, 1, value)\n",
" except Exception as _:\n",
" continue\n",
"\n",
" row += 1\n",
"\n",
" workbook.close()\n",
"\n",
"data2cleanexcel(data)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.14"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -18,44 +18,18 @@ def cropUI(image_path):
return r
def levelUI(image_path, region, initial_threshold=127):
# Callback function for the trackbar
def on_trackbar(val):
_, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY)
cv2.imshow("Binary Image", binary_image)
# Load and convert the image to grayscale
image = cv2.imread(image_path)
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
gray_image = gray_image[
int(region[1]) : int(region[1] + region[3]),
int(region[0]) : int(region[0] + region[2]),
]
# Create a window and a trackbar
cv2.namedWindow("Binary Image")
cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar)
# Initialize display
on_trackbar(initial_threshold)
cv2.waitKey(0)
cv2.destroyAllWindows()
def cropImage(image_path, r, threshold, flip, cropped_folder):
def cropImage(image_path, r, flip, cropped_folder):
# crop the image and save it
image = cv2.imread(image_path)
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
# save with new name
if flip == "y":
gray = cv2.flip(gray, 1)
cropped = cv2.flip(cropped, 1)
# save in subfolder cropped
new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
cv2.imwrite(new_name, gray)
cv2.imwrite(new_name, cropped)
def data2excel(data):
@ -67,7 +41,8 @@ def data2excel(data):
# write the data
row = 0
for key, value in data.items():
worksheet.write(row, 0, key)
date = key.split(" ", 1)[1][:-4]
worksheet.write(row, 0, date)
worksheet.write(row, 1, value)
try:
worksheet.write(row, 2, float(value[:6]))
@ -80,12 +55,12 @@ def data2excel(data):
def main():
# ocr settings
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
ocr = PaddleOCR(use_angle_cls=True, lang="en")
# *** start GUIs ***
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
region = cropUI(os.path.join(FOLDER, images[0]))
threshold = levelUI(os.path.join(FOLDER, images[0]), region)
flip = input("Do you want to flip the images horizontaly? (y/n): ")
@ -95,7 +70,7 @@ def main():
# *** start cropping ***
for image in images:
cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder)
cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)
# *** start OCR ***
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
@ -117,5 +92,5 @@ def main():
if __name__ == "__main__":
FOLDER = "."
FOLDER = "data/"
main()

View File

@ -1,52 +1,34 @@
## Astral-sh UV
### Getting started
## Conda
```bash
# On macOS and Linux.
curl -LsSf https://astral.sh/uv/install.sh | sh
# create environment
conda create -n <name-env> python=3.8
# On Windows.
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
# activate environment
conda activate <name-env>
# With pip.
pip install uv
# deactivate environment
conda deactivate
```
### Create virtual environment
#### Recreate environment
```bash
uv venv # Create a virtual environment at .venv.
conda env create -f environment.yml
```
### Activate virtual environment
#### Update environment
```bash
# On macOS and Linux.
source .venv/bin/activate
# save updated environment.yml
conda env export > environment.yml
# On Windows.
.venv\Scripts\activate
# update environment with environment.yml
conda env update -f environment.yml
```
# Install packages
#### Conda shell
```bash
uv pip install flask # Install Flask.
uv pip install -r requirements.txt # Install from a requirements.txt file.
uv pip install -e . # Install the current project in editable mode.
uv pip install "package @ ." # Install the current project from disk.
uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra.
```
conda init (-all)
# then restart terminal
### Generate requirements.txt
```bash
uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment.
```
### Update to requirements.txt
```bash
uv pip sync requirements.txt
# exit conda shell
conda deactivate
```