In [1]:
import os
import cv2
import xlsxwriter
import numpy as np


def shear_img(image):
    rows, cols, _ = image.shape
    M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])
    sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))
    return cv2.flip(sheared_img, 1)


def crop_image(sheared_image, r, target_path):
    cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]
    return cropped

In [2]:
from datetime import datetime


def writeDate(worksheet, row, column, date, format):
    original_format = "%Y-%m-%d %H-%M-%S-%f"
    parsed_datetime = datetime.strptime(date, original_format)

    worksheet.write_datetime(row, column, parsed_datetime, format)


def data2excel(data):
    # save the data in an excel file
    fileName = "data.xlsx"
    workbook = xlsxwriter.Workbook(fileName)
    worksheet = workbook.add_worksheet()

    dateFormat = workbook.add_format({"num_format": "dd/mm/yy hh:mm:ss"})

    # write the data
    row = 0
    for key, value in data.items():
        date = key.split(" ", 1)[1][:-4]
        writeDate(worksheet, row, 0, date, dateFormat)
        worksheet.write(row, 1, value)
        try:
            worksheet.write(row, 2, float(value[:6]))
        except Exception as _:
            pass
        row += 1

    workbook.close()

In [3]:
def peaks(data, boxes=2):
    # split data in boxes
    data = np.array_split(data, boxes)

    # get the maximum value in each box
    data = [np.max(d) for d in data]
    data = [d > 150 for d in data]
    return np.array(data).astype(int).tolist()


digits = {
    0: [[1, 0, 1], [1, 1], [1, 1]],
    1: [[0, 0, 0], [0, 1], [0, 1]],
    2: [[1, 1, 1], [0, 1], [1, 0]],
    3: [[1, 1, 1], [0, 1], [0, 1]],
    4: [[0, 1, 0], [1, 1], [0, 1]],
    5: [[1, 1, 1], [1, 0], [0, 1]],
    6: [[1, 1, 1], [1, 0], [1, 1]],
    7: [[1, 0, 0], [0, 1], [0, 1]],
    8: [[1, 1, 1], [1, 1], [1, 1]],
    9: [[1, 1, 1], [1, 1], [0, 1]],
}


def ownOCR(image):
    # get vertical pixel line in the middle of the image
    vertical = image[:, image.shape[1] // 2, 0]

    # get two horizontal lines at 1/3 and 2/3 of the image
    horizontal1 = image[image.shape[0] // 3, :, 0]
    horizontal2 = image[2 * image.shape[0] // 3, :, 0]

    # get times it goes above 150, remove subsequent values
    digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]
    digit = [key for key, value in digits.items() if value == digit]
    return digit[0]

In [4]:
folder = "../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/"
images = [f for f in os.listdir(folder) if f.endswith(".jpg")]
cropped_folder = os.path.join(folder, "cropped")
os.makedirs(cropped_folder, exist_ok=True)

# Initial image for ROI selection
init_image_path = os.path.join(folder, images[0])
init_image = cv2.imread(init_image_path)
sheared_init_image = shear_img(init_image)

regions = []
for i in range(5):  # Assume 5 regions as in your original code
    r = cv2.selectROI(f"Select the digit {i+1}", sheared_init_image)
    cv2.destroyAllWindows()
    regions.append(r)

data = {}
i = 0
# Process all images
for image_name in images:
    try:
        image_path = os.path.join(folder, image_name)
        image = cv2.imread(image_path)
        sheared_image = shear_img(image)

        for idx, region in enumerate(regions):
            target_path = os.path.join(cropped_folder, f"{image_name[:-4]}_{idx+1}.jpg")
            cropped = crop_image(sheared_image, region, target_path)
            digit = ownOCR(cropped)

            if image_name not in data:
                data[image_name] = ""

            data[image_name] += str(digit)
    except Exception as e:
        print(e)
        continue

list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range
list index out of range


In [7]:
data2excel(data)

In [12]:
# clean excel file
def data2cleanexcel(data):
    # save the data in an excel file
    fileName = "data_clean.xlsx"
    workbook = xlsxwriter.Workbook(fileName)
    worksheet = workbook.add_worksheet()

    dateFormat = workbook.add_format({"num_format": "dd/mm/yy hh:mm:ss"})

    # write the data
    row = 0
    for key, value in data.items():
        try:
            value = "6"+value[1:]
            worksheet.write(row, 2, float(value[:6]))
            date = key.split(" ", 1)[1][:-4]
            writeDate(worksheet, row, 0, date, dateFormat)            
            worksheet.write(row, 1, value)
        except Exception as _:
            continue

        row += 1

    workbook.close()


data2cleanexcel(data)