AI_logging/img2xlsx.py

97 lines
2.4 KiB
Python

from paddleocr import PaddleOCR
import os
import cv2
import xlsxwriter
def ocr_image(image_path, ocr):
result = ocr.ocr(image_path, cls=True)
return result[0][0][1][0]
def cropUI(image_path):
# small opencv window to crop the image
image = cv2.imread(image_path)
r = cv2.selectROI(image)
cv2.destroyAllWindows()
return r
def cropImage(image_path, r, flip, cropped_folder):
# crop the image and save it
image = cv2.imread(image_path)
cropped = image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
# save with new name
if flip == "y":
cropped = cv2.flip(cropped, 1)
# save in subfolder cropped
new_name = os.path.join(cropped_folder, image_path.split("/")[-1])
cv2.imwrite(new_name, cropped)
def data2excel(data):
# save the data in an excel file
fileName = "data.xlsx"
workbook = xlsxwriter.Workbook(fileName)
worksheet = workbook.add_worksheet()
# write the data
row = 0
for key, value in data.items():
date = key.split(" ", 1)[1][:-4]
worksheet.write(row, 0, date)
worksheet.write(row, 1, value)
try:
worksheet.write(row, 2, float(value[:6]))
except Exception as _:
pass
row += 1
workbook.close()
def main():
# ocr settings
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
ocr = PaddleOCR(use_angle_cls=True, lang="en")
# *** start GUIs ***
images = [f for f in os.listdir(FOLDER) if f.endswith(".jpg")]
region = cropUI(os.path.join(FOLDER, images[0]))
flip = input("Do you want to flip the images horizontaly? (y/n): ")
cropped_folder = os.path.join(FOLDER, "cropped")
if not os.path.exists(cropped_folder):
os.makedirs(cropped_folder)
# *** start cropping ***
for image in images:
cropImage(os.path.join(FOLDER, image), region, flip, cropped_folder)
# *** start OCR ***
cropped_images = [f for f in os.listdir(cropped_folder) if f.endswith(".jpg")]
data = {}
for image in cropped_images:
try:
path = os.path.join(cropped_folder, image)
text = ocr_image(path, ocr)
except Exception as _:
print("Error in cropped image")
continue
data[image] = text
# *** save data in excel ***
data2excel(data)
print("All images cropped successfully")
if __name__ == "__main__":
FOLDER = "data/"
main()