first results
parent
beeb62025e
commit
bf78be22ca
|
@ -6,3 +6,7 @@ output/
|
||||||
*.pyc
|
*.pyc
|
||||||
*.pyo
|
*.pyo
|
||||||
env/
|
env/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
*.spec
|
||||||
|
data/
|
Binary file not shown.
|
@ -0,0 +1,185 @@
|
||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from paddleocr import PaddleOCR\n",
|
||||||
|
"import os\n",
|
||||||
|
"import cv2\n",
|
||||||
|
"import xlsxwriter\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def ocr_image(image_path, ocr):\n",
|
||||||
|
" result = ocr.ocr(image_path, cls=True)\n",
|
||||||
|
" return result[0][0][1][0]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def cropUI(image_path):\n",
|
||||||
|
" # small opencv window to crop the image\n",
|
||||||
|
" image = cv2.imread(image_path)\n",
|
||||||
|
" r = cv2.selectROI(image)\n",
|
||||||
|
" cv2.destroyAllWindows()\n",
|
||||||
|
"\n",
|
||||||
|
" return r\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def cropImage(image_path, r, flip, cropped_folder):\n",
|
||||||
|
" # crop the image and save it\n",
|
||||||
|
" image = cv2.imread(image_path)\n",
|
||||||
|
" cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
|
||||||
|
"\n",
|
||||||
|
" # save with new name\n",
|
||||||
|
" if flip == \"y\":\n",
|
||||||
|
" cropped = cv2.flip(cropped, 1)\n",
|
||||||
|
"\n",
|
||||||
|
" # save in subfolder cropped\n",
|
||||||
|
" new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n",
|
||||||
|
" cv2.imwrite(new_name, cropped)\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from datetime import datetime\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def writeDate(worksheet, row, column, date, format):\n",
|
||||||
|
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
|
||||||
|
" parsed_datetime = datetime.strptime(date, original_format)\n",
|
||||||
|
"\n",
|
||||||
|
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def data2excel(data):\n",
|
||||||
|
" # save the data in an excel file\n",
|
||||||
|
" fileName = \"data.xlsx\"\n",
|
||||||
|
" workbook = xlsxwriter.Workbook(fileName)\n",
|
||||||
|
" worksheet = workbook.add_worksheet()\n",
|
||||||
|
"\n",
|
||||||
|
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
||||||
|
"\n",
|
||||||
|
" # write the data\n",
|
||||||
|
" row = 0\n",
|
||||||
|
" for key, value in data.items():\n",
|
||||||
|
" date = key.split(\" \", 1)[1][:-4]\n",
|
||||||
|
" writeDate(worksheet, row, 0, date, dateFormat)\n",
|
||||||
|
" worksheet.write(row, 1, value)\n",
|
||||||
|
" try:\n",
|
||||||
|
" worksheet.write(row, 2, float(value[:6]))\n",
|
||||||
|
" except Exception as _:\n",
|
||||||
|
" pass\n",
|
||||||
|
" row += 1\n",
|
||||||
|
"\n",
|
||||||
|
" workbook.close()\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"FOLDER = \"data/\"\n",
|
||||||
|
"# ocr settings\n",
|
||||||
|
"os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n",
|
||||||
|
"ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n",
|
||||||
|
"\n",
|
||||||
|
"# *** start GUIs ***\n",
|
||||||
|
"images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n",
|
||||||
|
"region = cropUI(os.path.join(FOLDER, images[0]))\n",
|
||||||
|
"\n",
|
||||||
|
"flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n",
|
||||||
|
"\n",
|
||||||
|
"cropped_folder = os.path.join(FOLDER, \"cropped\")\n",
|
||||||
|
"if not os.path.exists(cropped_folder):\n",
|
||||||
|
" os.makedirs(cropped_folder)\n",
|
||||||
|
"\n",
|
||||||
|
"# *** start cropping ***\n",
|
||||||
|
"for image in images:\n",
|
||||||
|
" cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n",
|
||||||
|
"\n",
|
||||||
|
"# *** start OCR ***\n",
|
||||||
|
"cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n",
|
||||||
|
"data = {}\n",
|
||||||
|
"for image in cropped_images:\n",
|
||||||
|
" try:\n",
|
||||||
|
" path = os.path.join(cropped_folder, image)\n",
|
||||||
|
" text = ocr_image(path, ocr)\n",
|
||||||
|
" except Exception as _:\n",
|
||||||
|
" print(\"Error in cropped image\")\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
" data[image] = text\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data2excel(data)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 12,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"# clean excel file\n",
|
||||||
|
"def data2cleanexcel(data):\n",
|
||||||
|
" # save the data in an excel file\n",
|
||||||
|
" fileName = \"data_clean.xlsx\"\n",
|
||||||
|
" workbook = xlsxwriter.Workbook(fileName)\n",
|
||||||
|
" worksheet = workbook.add_worksheet()\n",
|
||||||
|
"\n",
|
||||||
|
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
||||||
|
"\n",
|
||||||
|
" # write the data\n",
|
||||||
|
" row = 0\n",
|
||||||
|
" for key, value in data.items():\n",
|
||||||
|
" try:\n",
|
||||||
|
" worksheet.write(row, 2, float(value[:6]))\n",
|
||||||
|
" date = key.split(\" \", 1)[1][:-4]\n",
|
||||||
|
" writeDate(worksheet, row, 0, date, dateFormat)\n",
|
||||||
|
" worksheet.write(row, 1, value)\n",
|
||||||
|
" except Exception as _:\n",
|
||||||
|
" continue\n",
|
||||||
|
"\n",
|
||||||
|
" row += 1\n",
|
||||||
|
"\n",
|
||||||
|
" workbook.close()\n",
|
||||||
|
"\n",
|
||||||
|
"data2cleanexcel(data)"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.10.14"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
41
img2xlsx.py
41
img2xlsx.py
|
@ -18,44 +18,18 @@ def cropUI(image_path):
|
||||||
return r
|
return r
|
||||||
|
|
||||||
|
|
||||||
def levelUI(image_path, region, initial_threshold=127):
|
def cropImage(image_path, r, flip, cropped_folder):
|
||||||
# Callback function for the trackbar
|
|
||||||
def on_trackbar(val):
|
|
||||||
_, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY)
|
|
||||||
cv2.imshow("Binary Image", binary_image)
|
|
||||||
|
|
||||||
# Load and convert the image to grayscale
|
|
||||||
image = cv2.imread(image_path)
|
|
||||||
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
|
||||||
gray_image = gray_image[
|
|
||||||
int(region[1]) : int(region[1] + region[3]),
|
|
||||||
int(region[0]) : int(region[0] + region[2]),
|
|
||||||
]
|
|
||||||
|
|
||||||
# Create a window and a trackbar
|
|
||||||
cv2.namedWindow("Binary Image")
|
|
||||||
cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar)
|
|
||||||
|
|
||||||
# Initialize display
|
|
||||||
on_trackbar(initial_threshold)
|
|
||||||
cv2.waitKey(0)
|
|
||||||
cv2.destroyAllWindows()
|
|
||||||
|
|
||||||
|
|
||||||
def cropImage(image_path, r, threshold, flip, cropped_folder):
|
|
||||||
# crop the image and save it
|
# crop the image and save it
|
||||||
image = cv2.imread(image_path)
|
image = cv2.imread(image_path)
|
||||||
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
|
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
|
||||||
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
|
|
||||||
gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
|
|
||||||
|
|
||||||
# save with new name
|
# save with new name
|
||||||
if flip == "y":
|
if flip == "y":
|
||||||
gray = cv2.flip(gray, 1)
|
cropped = cv2.flip(cropped, 1)
|
||||||
|
|
||||||
# save in subfolder cropped
|
# save in subfolder cropped
|
||||||
new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
|
new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
|
||||||
cv2.imwrite(new_name, gray)
|
cv2.imwrite(new_name, cropped)
|
||||||
|
|
||||||
|
|
||||||
def data2excel(data):
|
def data2excel(data):
|
||||||
|
@ -67,7 +41,8 @@ def data2excel(data):
|
||||||
# write the data
|
# write the data
|
||||||
row = 0
|
row = 0
|
||||||
for key, value in data.items():
|
for key, value in data.items():
|
||||||
worksheet.write(row, 0, key)
|
date = key.split(" ", 1)[1][:-4]
|
||||||
|
worksheet.write(row, 0, date)
|
||||||
worksheet.write(row, 1, value)
|
worksheet.write(row, 1, value)
|
||||||
try:
|
try:
|
||||||
worksheet.write(row, 2, float(value[:6]))
|
worksheet.write(row, 2, float(value[:6]))
|
||||||
|
@ -80,12 +55,12 @@ def data2excel(data):
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
# ocr settings
|
# ocr settings
|
||||||
|
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
||||||
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
||||||
|
|
||||||
# *** start GUIs ***
|
# *** start GUIs ***
|
||||||
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
|
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
|
||||||
region = cropUI(os.path.join(FOLDER, images[0]))
|
region = cropUI(os.path.join(FOLDER, images[0]))
|
||||||
threshold = levelUI(os.path.join(FOLDER, images[0]), region)
|
|
||||||
|
|
||||||
flip = input("Do you want to flip the images horizontaly? (y/n): ")
|
flip = input("Do you want to flip the images horizontaly? (y/n): ")
|
||||||
|
|
||||||
|
@ -95,7 +70,7 @@ def main():
|
||||||
|
|
||||||
# *** start cropping ***
|
# *** start cropping ***
|
||||||
for image in images:
|
for image in images:
|
||||||
cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder)
|
cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)
|
||||||
|
|
||||||
# *** start OCR ***
|
# *** start OCR ***
|
||||||
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
|
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
|
||||||
|
@ -117,5 +92,5 @@ def main():
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
FOLDER = "."
|
FOLDER = "data/"
|
||||||
main()
|
main()
|
||||||
|
|
58
readme.md
58
readme.md
|
@ -1,52 +1,34 @@
|
||||||
## Astral-sh UV
|
## Conda
|
||||||
|
|
||||||
### Getting started
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# On macOS and Linux.
|
# create environment
|
||||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
conda create -n <name-env> python=3.8
|
||||||
|
|
||||||
# On Windows.
|
# activate environment
|
||||||
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
|
conda activate <name-env>
|
||||||
|
|
||||||
# With pip.
|
# deactivate environment
|
||||||
pip install uv
|
conda deactivate
|
||||||
```
|
```
|
||||||
|
|
||||||
### Create virtual environment
|
#### Recreate environment
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv venv # Create a virtual environment at .venv.
|
conda env create -f environment.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
### Activate virtual environment
|
#### Update environment
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# On macOS and Linux.
|
# save updated environment.yml
|
||||||
source .venv/bin/activate
|
conda env export > environment.yml
|
||||||
|
|
||||||
# On Windows.
|
# update environment with environment.yml
|
||||||
.venv\Scripts\activate
|
conda env update -f environment.yml
|
||||||
```
|
```
|
||||||
|
|
||||||
# Install packages
|
#### Conda shell
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install flask # Install Flask.
|
conda init (-all)
|
||||||
uv pip install -r requirements.txt # Install from a requirements.txt file.
|
# then restart terminal
|
||||||
uv pip install -e . # Install the current project in editable mode.
|
|
||||||
uv pip install "package @ ." # Install the current project from disk.
|
# exit conda shell
|
||||||
uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra.
|
conda deactivate
|
||||||
```
|
|
||||||
|
|
||||||
### Generate requirements.txt
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment.
|
|
||||||
```
|
|
||||||
|
|
||||||
### Update to requirements.txt
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv pip sync requirements.txt
|
|
||||||
```
|
```
|
Loading…
Reference in New Issue