first results
parent
beeb62025e
commit
bf78be22ca
|
@ -6,3 +6,7 @@ output/
|
|||
*.pyc
|
||||
*.pyo
|
||||
env/
|
||||
dist/
|
||||
build/
|
||||
*.spec
|
||||
data/
|
Binary file not shown.
|
@ -0,0 +1,185 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from paddleocr import PaddleOCR\n",
|
||||
"import os\n",
|
||||
"import cv2\n",
|
||||
"import xlsxwriter\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def ocr_image(image_path, ocr):\n",
|
||||
" result = ocr.ocr(image_path, cls=True)\n",
|
||||
" return result[0][0][1][0]\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def cropUI(image_path):\n",
|
||||
" # small opencv window to crop the image\n",
|
||||
" image = cv2.imread(image_path)\n",
|
||||
" r = cv2.selectROI(image)\n",
|
||||
" cv2.destroyAllWindows()\n",
|
||||
"\n",
|
||||
" return r\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def cropImage(image_path, r, flip, cropped_folder):\n",
|
||||
" # crop the image and save it\n",
|
||||
" image = cv2.imread(image_path)\n",
|
||||
" cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
|
||||
"\n",
|
||||
" # save with new name\n",
|
||||
" if flip == \"y\":\n",
|
||||
" cropped = cv2.flip(cropped, 1)\n",
|
||||
"\n",
|
||||
" # save in subfolder cropped\n",
|
||||
" new_name = os.path.join(cropped_folder, image_path.split(\"/\")[-1])\n",
|
||||
" cv2.imwrite(new_name, cropped)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from datetime import datetime\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def writeDate(worksheet, row, column, date, format):\n",
|
||||
" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
|
||||
" parsed_datetime = datetime.strptime(date, original_format)\n",
|
||||
"\n",
|
||||
" worksheet.write_datetime(row, column, parsed_datetime, format)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def data2excel(data):\n",
|
||||
" # save the data in an excel file\n",
|
||||
" fileName = \"data.xlsx\"\n",
|
||||
" workbook = xlsxwriter.Workbook(fileName)\n",
|
||||
" worksheet = workbook.add_worksheet()\n",
|
||||
"\n",
|
||||
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
||||
"\n",
|
||||
" # write the data\n",
|
||||
" row = 0\n",
|
||||
" for key, value in data.items():\n",
|
||||
" date = key.split(\" \", 1)[1][:-4]\n",
|
||||
" writeDate(worksheet, row, 0, date, dateFormat)\n",
|
||||
" worksheet.write(row, 1, value)\n",
|
||||
" try:\n",
|
||||
" worksheet.write(row, 2, float(value[:6]))\n",
|
||||
" except Exception as _:\n",
|
||||
" pass\n",
|
||||
" row += 1\n",
|
||||
"\n",
|
||||
" workbook.close()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"FOLDER = \"data/\"\n",
|
||||
"# ocr settings\n",
|
||||
"os.environ[\"KMP_DUPLICATE_LIB_OK\"] = \"TRUE\"\n",
|
||||
"ocr = PaddleOCR(use_angle_cls=True, lang=\"en\")\n",
|
||||
"\n",
|
||||
"# *** start GUIs ***\n",
|
||||
"images = [f for f in os.listdir(FOLDER) if f.endswith(\".jpg\")]\n",
|
||||
"region = cropUI(os.path.join(FOLDER, images[0]))\n",
|
||||
"\n",
|
||||
"flip = input(\"Do you want to flip the images horizontaly? (y/n): \")\n",
|
||||
"\n",
|
||||
"cropped_folder = os.path.join(FOLDER, \"cropped\")\n",
|
||||
"if not os.path.exists(cropped_folder):\n",
|
||||
" os.makedirs(cropped_folder)\n",
|
||||
"\n",
|
||||
"# *** start cropping ***\n",
|
||||
"for image in images:\n",
|
||||
" cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)\n",
|
||||
"\n",
|
||||
"# *** start OCR ***\n",
|
||||
"cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(\".jpg\")]\n",
|
||||
"data = {}\n",
|
||||
"for image in cropped_images:\n",
|
||||
" try:\n",
|
||||
" path = os.path.join(cropped_folder, image)\n",
|
||||
" text = ocr_image(path, ocr)\n",
|
||||
" except Exception as _:\n",
|
||||
" print(\"Error in cropped image\")\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" data[image] = text\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"data2excel(data)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# clean excel file\n",
|
||||
"def data2cleanexcel(data):\n",
|
||||
" # save the data in an excel file\n",
|
||||
" fileName = \"data_clean.xlsx\"\n",
|
||||
" workbook = xlsxwriter.Workbook(fileName)\n",
|
||||
" worksheet = workbook.add_worksheet()\n",
|
||||
"\n",
|
||||
" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
|
||||
"\n",
|
||||
" # write the data\n",
|
||||
" row = 0\n",
|
||||
" for key, value in data.items():\n",
|
||||
" try:\n",
|
||||
" worksheet.write(row, 2, float(value[:6]))\n",
|
||||
" date = key.split(\" \", 1)[1][:-4]\n",
|
||||
" writeDate(worksheet, row, 0, date, dateFormat)\n",
|
||||
" worksheet.write(row, 1, value)\n",
|
||||
" except Exception as _:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" row += 1\n",
|
||||
"\n",
|
||||
" workbook.close()\n",
|
||||
"\n",
|
||||
"data2cleanexcel(data)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.14"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
41
img2xlsx.py
41
img2xlsx.py
|
@ -18,44 +18,18 @@ def cropUI(image_path):
|
|||
return r
|
||||
|
||||
|
||||
def levelUI(image_path, region, initial_threshold=127):
|
||||
# Callback function for the trackbar
|
||||
def on_trackbar(val):
|
||||
_, binary_image = cv2.threshold(gray_image, val, 255, cv2.THRESH_BINARY)
|
||||
cv2.imshow("Binary Image", binary_image)
|
||||
|
||||
# Load and convert the image to grayscale
|
||||
image = cv2.imread(image_path)
|
||||
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
|
||||
gray_image = gray_image[
|
||||
int(region[1]) : int(region[1] + region[3]),
|
||||
int(region[0]) : int(region[0] + region[2]),
|
||||
]
|
||||
|
||||
# Create a window and a trackbar
|
||||
cv2.namedWindow("Binary Image")
|
||||
cv2.createTrackbar("Threshold", "Binary Image", initial_threshold, 255, on_trackbar)
|
||||
|
||||
# Initialize display
|
||||
on_trackbar(initial_threshold)
|
||||
cv2.waitKey(0)
|
||||
cv2.destroyAllWindows()
|
||||
|
||||
|
||||
def cropImage(image_path, r, threshold, flip, cropped_folder):
|
||||
def cropImage(image_path, r, flip, cropped_folder):
|
||||
# crop the image and save it
|
||||
image = cv2.imread(image_path)
|
||||
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
|
||||
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.threshold(gray, threshold, 255, cv2.THRESH_BINARY)[1]
|
||||
|
||||
# save with new name
|
||||
if flip == "y":
|
||||
gray = cv2.flip(gray, 1)
|
||||
cropped = cv2.flip(cropped, 1)
|
||||
|
||||
# save in subfolder cropped
|
||||
new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
|
||||
cv2.imwrite(new_name, gray)
|
||||
cv2.imwrite(new_name, cropped)
|
||||
|
||||
|
||||
def data2excel(data):
|
||||
|
@ -67,7 +41,8 @@ def data2excel(data):
|
|||
# write the data
|
||||
row = 0
|
||||
for key, value in data.items():
|
||||
worksheet.write(row, 0, key)
|
||||
date = key.split(" ", 1)[1][:-4]
|
||||
worksheet.write(row, 0, date)
|
||||
worksheet.write(row, 1, value)
|
||||
try:
|
||||
worksheet.write(row, 2, float(value[:6]))
|
||||
|
@ -80,12 +55,12 @@ def data2excel(data):
|
|||
|
||||
def main():
|
||||
# ocr settings
|
||||
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
|
||||
ocr = PaddleOCR(use_angle_cls=True, lang="en")
|
||||
|
||||
# *** start GUIs ***
|
||||
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
|
||||
region = cropUI(os.path.join(FOLDER, images[0]))
|
||||
threshold = levelUI(os.path.join(FOLDER, images[0]), region)
|
||||
|
||||
flip = input("Do you want to flip the images horizontaly? (y/n): ")
|
||||
|
||||
|
@ -95,7 +70,7 @@ def main():
|
|||
|
||||
# *** start cropping ***
|
||||
for image in images:
|
||||
cropImage(os.path.join(FOLDER, image), region, threshold, flip, cropped_folder)
|
||||
cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)
|
||||
|
||||
# *** start OCR ***
|
||||
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
|
||||
|
@ -117,5 +92,5 @@ def main():
|
|||
|
||||
|
||||
if __name__ == "__main__":
|
||||
FOLDER = "."
|
||||
FOLDER = "data/"
|
||||
main()
|
||||
|
|
56
readme.md
56
readme.md
|
@ -1,52 +1,34 @@
|
|||
## Astral-sh UV
|
||||
|
||||
### Getting started
|
||||
|
||||
## Conda
|
||||
```bash
|
||||
# On macOS and Linux.
|
||||
curl -LsSf https://astral.sh/uv/install.sh | sh
|
||||
# create environment
|
||||
conda create -n <name-env> python=3.8
|
||||
|
||||
# On Windows.
|
||||
powershell -c "irm https://astral.sh/uv/install.ps1 | iex"
|
||||
# activate environment
|
||||
conda activate <name-env>
|
||||
|
||||
# With pip.
|
||||
pip install uv
|
||||
# deactivate environment
|
||||
conda deactivate
|
||||
```
|
||||
|
||||
### Create virtual environment
|
||||
|
||||
#### Recreate environment
|
||||
```bash
|
||||
uv venv # Create a virtual environment at .venv.
|
||||
conda env create -f environment.yml
|
||||
```
|
||||
|
||||
### Activate virtual environment
|
||||
|
||||
#### Update environment
|
||||
```bash
|
||||
# On macOS and Linux.
|
||||
source .venv/bin/activate
|
||||
# save updated environment.yml
|
||||
conda env export > environment.yml
|
||||
|
||||
# On Windows.
|
||||
.venv\Scripts\activate
|
||||
# update environment with environment.yml
|
||||
conda env update -f environment.yml
|
||||
```
|
||||
|
||||
# Install packages
|
||||
|
||||
#### Conda shell
|
||||
```bash
|
||||
uv pip install flask # Install Flask.
|
||||
uv pip install -r requirements.txt # Install from a requirements.txt file.
|
||||
uv pip install -e . # Install the current project in editable mode.
|
||||
uv pip install "package @ ." # Install the current project from disk.
|
||||
uv pip install "flask[dotenv]" # Install Flask with "dotenv" extra.
|
||||
```
|
||||
conda init (-all)
|
||||
# then restart terminal
|
||||
|
||||
### Generate requirements.txt
|
||||
|
||||
```bash
|
||||
uv pip freeze | uv pip compile - -o requirements.txt # Lock the current environment.
|
||||
```
|
||||
|
||||
### Update to requirements.txt
|
||||
|
||||
```bash
|
||||
uv pip sync requirements.txt
|
||||
# exit conda shell
|
||||
conda deactivate
|
||||
```
|
Loading…
Reference in New Issue