AI_logging/img2xlsx.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "import os\n",
    "import cv2\n",
    "import xlsxwriter\n",
    "import numpy as np\n",
    "\n",
    "\n",
    "def shear_img(image):\n",
    "    rows, cols, _ = image.shape\n",
    "    M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])\n",
    "    sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))\n",
    "    return cv2.flip(sheared_img, 1)\n",
    "\n",
    "\n",
    "def crop_image(sheared_image, r, target_path):\n",
    "    cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",
    "    return cropped"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "\n",
    "\n",
    "def writeDate(worksheet, row, column, date, format):\n",
    "    original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",
    "    parsed_datetime = datetime.strptime(date, original_format)\n",
    "\n",
    "    worksheet.write_datetime(row, column, parsed_datetime, format)\n",
    "\n",
    "\n",
    "def data2excel(data):\n",
    "    # save the data in an excel file\n",
    "    fileName = \"data.xlsx\"\n",
    "    workbook = xlsxwriter.Workbook(fileName)\n",
    "    worksheet = workbook.add_worksheet()\n",
    "\n",
    "    dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
    "\n",
    "    # write the data\n",
    "    row = 0\n",
    "    for key, value in data.items():\n",
    "        date = key.split(\" \", 1)[1][:-4]\n",
    "        writeDate(worksheet, row, 0, date, dateFormat)\n",
    "        worksheet.write(row, 1, value)\n",
    "        try:\n",
    "            worksheet.write(row, 2, float(value[:6]))\n",
    "        except Exception as _:\n",
    "            pass\n",
    "        row += 1\n",
    "\n",
    "    workbook.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "def peaks(data, boxes=2):\n",
    "    # split data in boxes\n",
    "    data = np.array_split(data, boxes)\n",
    "\n",
    "    # get the maximum value in each box\n",
    "    data = [np.max(d) for d in data]\n",
    "    data = [d > 150 for d in data]\n",
    "    return np.array(data).astype(int).tolist()\n",
    "\n",
    "\n",
    "digits = {\n",
    "    0: [[1, 0, 1], [1, 1], [1, 1]],\n",
    "    1: [[0, 0, 0], [0, 1], [0, 1]],\n",
    "    2: [[1, 1, 1], [0, 1], [1, 0]],\n",
    "    3: [[1, 1, 1], [0, 1], [0, 1]],\n",
    "    4: [[0, 1, 0], [1, 1], [0, 1]],\n",
    "    5: [[1, 1, 1], [1, 0], [0, 1]],\n",
    "    6: [[1, 1, 1], [1, 0], [1, 1]],\n",
    "    7: [[1, 0, 0], [0, 1], [0, 1]],\n",
    "    8: [[1, 1, 1], [1, 1], [1, 1]],\n",
    "    9: [[1, 1, 1], [1, 1], [0, 1]],\n",
    "}\n",
    "\n",
    "\n",
    "def ownOCR(image):\n",
    "    # get vertical pixel line in the middle of the image\n",
    "    vertical = image[:, image.shape[1] // 2, 0]\n",
    "\n",
    "    # get two horizontal lines at 1/3 and 2/3 of the image\n",
    "    horizontal1 = image[image.shape[0] // 3, :, 0]\n",
    "    horizontal2 = image[2 * image.shape[0] // 3, :, 0]\n",
    "\n",
    "    # get times it goes above 150, remove subsequent values\n",
    "    digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]\n",
    "    digit = [key for key, value in digits.items() if value == digit]\n",
    "    return digit[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "metadata": {}
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n",
      "list index out of range\n"
     ]
    }
   ],
   "source": [
    "folder = \"../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/\"\n",
    "images = [f for f in os.listdir(folder) if f.endswith(\".jpg\")]\n",
    "cropped_folder = os.path.join(folder, \"cropped\")\n",
    "os.makedirs(cropped_folder, exist_ok=True)\n",
    "\n",
    "# Initial image for ROI selection\n",
    "init_image_path = os.path.join(folder, images[0])\n",
    "init_image = cv2.imread(init_image_path)\n",
    "sheared_init_image = shear_img(init_image)\n",
    "\n",
    "regions = []\n",
    "for i in range(5):  # Assume 5 regions as in your original code\n",
    "    r = cv2.selectROI(f\"Select the digit {i+1}\", sheared_init_image)\n",
    "    cv2.destroyAllWindows()\n",
    "    regions.append(r)\n",
    "\n",
    "data = {}\n",
    "i = 0\n",
    "# Process all images\n",
    "for image_name in images:\n",
    "    try:\n",
    "        image_path = os.path.join(folder, image_name)\n",
    "        image = cv2.imread(image_path)\n",
    "        sheared_image = shear_img(image)\n",
    "\n",
    "        for idx, region in enumerate(regions):\n",
    "            target_path = os.path.join(cropped_folder, f\"{image_name[:-4]}_{idx+1}.jpg\")\n",
    "            cropped = crop_image(sheared_image, region, target_path)\n",
    "            digit = ownOCR(cropped)\n",
    "\n",
    "            if image_name not in data:\n",
    "                data[image_name] = \"\"\n",
    "\n",
    "            data[image_name] += str(digit)\n",
    "    except Exception as e:\n",
    "        print(e)\n",
    "        continue"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "data2excel(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "metadata": {}
   },
   "outputs": [],
   "source": [
    "# clean excel file\n",
    "def data2cleanexcel(data):\n",
    "    # save the data in an excel file\n",
    "    fileName = \"data_clean.xlsx\"\n",
    "    workbook = xlsxwriter.Workbook(fileName)\n",
    "    worksheet = workbook.add_worksheet()\n",
    "\n",
    "    dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",
    "\n",
    "    # write the data\n",
    "    row = 0\n",
    "    for key, value in data.items():\n",
    "        try:\n",
    "            value = \"6\"+value[1:]\n",
    "            worksheet.write(row, 2, float(value[:6]))\n",
    "            date = key.split(\" \", 1)[1][:-4]\n",
    "            writeDate(worksheet, row, 0, date, dateFormat)            \n",
    "            worksheet.write(row, 1, value)\n",
    "        except Exception as _:\n",
    "            continue\n",
    "\n",
    "        row += 1\n",
    "\n",
    "    workbook.close()\n",
    "\n",
    "\n",
    "data2cleanexcel(data)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
first results 2024-04-10 13:27:48 +00:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "code",`
own ocr 2024-04-15 15:05:20 +00:00			`"execution_count": 1,`
			`"metadata": {`
			`"metadata": {}`
			`},`
first results 2024-04-10 13:27:48 +00:00			`"outputs": [],`
			`"source": [`
			`"import os\n",`
			`"import cv2\n",`
			`"import xlsxwriter\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"import numpy as np\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"def shear_img(image):\n",`
			`" rows, cols, _ = image.shape\n",`
			`" M = np.float32([[1, -0.22, 0], [0, 1, 0], [0, 0, 1]])\n",`
			`" sheared_img = cv2.warpPerspective(image, M, (int(cols * 1.5), int(rows * 1.5)))\n",`
			`" return cv2.flip(sheared_img, 1)\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"def crop_image(sheared_image, r, target_path):\n",`
			`" cropped = sheared_image[int(r[1]) : int(r[1] + r[3]), int(r[0]) : int(r[0] + r[2])]\n",`
			`" return cropped"`
first results 2024-04-10 13:27:48 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
own ocr 2024-04-15 15:05:20 +00:00			`"execution_count": 2,`
			`"metadata": {`
			`"metadata": {}`
			`},`
first results 2024-04-10 13:27:48 +00:00			`"outputs": [],`
			`"source": [`
			`"from datetime import datetime\n",`
			`"\n",`
			`"\n",`
			`"def writeDate(worksheet, row, column, date, format):\n",`
			`" original_format = \"%Y-%m-%d %H-%M-%S-%f\"\n",`
			`" parsed_datetime = datetime.strptime(date, original_format)\n",`
			`"\n",`
			`" worksheet.write_datetime(row, column, parsed_datetime, format)\n",`
			`"\n",`
			`"\n",`
			`"def data2excel(data):\n",`
			`" # save the data in an excel file\n",`
			`" fileName = \"data.xlsx\"\n",`
			`" workbook = xlsxwriter.Workbook(fileName)\n",`
			`" worksheet = workbook.add_worksheet()\n",`
			`"\n",`
			`" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",`
			`"\n",`
			`" # write the data\n",`
			`" row = 0\n",`
			`" for key, value in data.items():\n",`
			`" date = key.split(\" \", 1)[1][:-4]\n",`
			`" writeDate(worksheet, row, 0, date, dateFormat)\n",`
			`" worksheet.write(row, 1, value)\n",`
			`" try:\n",`
			`" worksheet.write(row, 2, float(value[:6]))\n",`
			`" except Exception as _:\n",`
			`" pass\n",`
			`" row += 1\n",`
			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" workbook.close()"`
first results 2024-04-10 13:27:48 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
own ocr 2024-04-15 15:05:20 +00:00			`"execution_count": 3,`
			`"metadata": {`
			`"metadata": {}`
			`},`
first results 2024-04-10 13:27:48 +00:00			`"outputs": [],`
			`"source": [`
own ocr 2024-04-15 15:05:20 +00:00			`"def peaks(data, boxes=2):\n",`
			`" # split data in boxes\n",`
			`" data = np.array_split(data, boxes)\n",`
			`"\n",`
			`" # get the maximum value in each box\n",`
			`" data = [np.max(d) for d in data]\n",`
			`" data = [d > 150 for d in data]\n",`
			`" return np.array(data).astype(int).tolist()\n",`
			`"\n",`
			`"\n",`
			`"digits = {\n",`
			`" 0: [[1, 0, 1], [1, 1], [1, 1]],\n",`
			`" 1: [[0, 0, 0], [0, 1], [0, 1]],\n",`
			`" 2: [[1, 1, 1], [0, 1], [1, 0]],\n",`
			`" 3: [[1, 1, 1], [0, 1], [0, 1]],\n",`
			`" 4: [[0, 1, 0], [1, 1], [0, 1]],\n",`
			`" 5: [[1, 1, 1], [1, 0], [0, 1]],\n",`
			`" 6: [[1, 1, 1], [1, 0], [1, 1]],\n",`
			`" 7: [[1, 0, 0], [0, 1], [0, 1]],\n",`
			`" 8: [[1, 1, 1], [1, 1], [1, 1]],\n",`
			`" 9: [[1, 1, 1], [1, 1], [0, 1]],\n",`
			`"}\n",`
			`"\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"def ownOCR(image):\n",`
			`" # get vertical pixel line in the middle of the image\n",`
			`" vertical = image[:, image.shape[1] // 2, 0]\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" # get two horizontal lines at 1/3 and 2/3 of the image\n",`
			`" horizontal1 = image[image.shape[0] // 3, :, 0]\n",`
			`" horizontal2 = image[2 * image.shape[0] // 3, :, 0]\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" # get times it goes above 150, remove subsequent values\n",`
			`" digit = [peaks(vertical, 3), peaks(horizontal1), peaks(horizontal2)]\n",`
			`" digit = [key for key, value in digits.items() if value == digit]\n",`
			`" return digit[0]"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {`
			`"metadata": {}`
			`},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n",`
			`"list index out of range\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"folder = \"../../../Downloads/wetransfer_metingen-8-04-tot-15-40-5min_2024-04-15_0905/metingen 8-04 tot 15-40 (5min)/\"\n",`
			`"images = [f for f in os.listdir(folder) if f.endswith(\".jpg\")]\n",`
			`"cropped_folder = os.path.join(folder, \"cropped\")\n",`
			`"os.makedirs(cropped_folder, exist_ok=True)\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"# Initial image for ROI selection\n",`
			`"init_image_path = os.path.join(folder, images[0])\n",`
			`"init_image = cv2.imread(init_image_path)\n",`
			`"sheared_init_image = shear_img(init_image)\n",`
			`"\n",`
			`"regions = []\n",`
			`"for i in range(5): # Assume 5 regions as in your original code\n",`
			`" r = cv2.selectROI(f\"Select the digit {i+1}\", sheared_init_image)\n",`
			`" cv2.destroyAllWindows()\n",`
			`" regions.append(r)\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
			`"data = {}\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"i = 0\n",`
			`"# Process all images\n",`
			`"for image_name in images:\n",`
first results 2024-04-10 13:27:48 +00:00			`" try:\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" image_path = os.path.join(folder, image_name)\n",`
			`" image = cv2.imread(image_path)\n",`
			`" sheared_image = shear_img(image)\n",`
			`"\n",`
			`" for idx, region in enumerate(regions):\n",`
			`" target_path = os.path.join(cropped_folder, f\"{image_name[:-4]}_{idx+1}.jpg\")\n",`
			`" cropped = crop_image(sheared_image, region, target_path)\n",`
			`" digit = ownOCR(cropped)\n",`
			`"\n",`
			`" if image_name not in data:\n",`
			`" data[image_name] = \"\"\n",`
first results 2024-04-10 13:27:48 +00:00			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" data[image_name] += str(digit)\n",`
			`" except Exception as e:\n",`
			`" print(e)\n",`
			`" continue"`
first results 2024-04-10 13:27:48 +00:00			`]`
			`},`
			`{`
			`"cell_type": "code",`
own ocr 2024-04-15 15:05:20 +00:00			`"execution_count": 7,`
			`"metadata": {`
			`"metadata": {}`
			`},`
first results 2024-04-10 13:27:48 +00:00			`"outputs": [],`
			`"source": [`
			`"data2excel(data)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 12,`
own ocr 2024-04-15 15:05:20 +00:00			`"metadata": {`
			`"metadata": {}`
			`},`
first results 2024-04-10 13:27:48 +00:00			`"outputs": [],`
			`"source": [`
			`"# clean excel file\n",`
			`"def data2cleanexcel(data):\n",`
			`" # save the data in an excel file\n",`
			`" fileName = \"data_clean.xlsx\"\n",`
			`" workbook = xlsxwriter.Workbook(fileName)\n",`
			`" worksheet = workbook.add_worksheet()\n",`
			`"\n",`
			`" dateFormat = workbook.add_format({\"num_format\": \"dd/mm/yy hh:mm:ss\"})\n",`
			`"\n",`
			`" # write the data\n",`
			`" row = 0\n",`
			`" for key, value in data.items():\n",`
			`" try:\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" value = \"6\"+value[1:]\n",`
first results 2024-04-10 13:27:48 +00:00			`" worksheet.write(row, 2, float(value[:6]))\n",`
			`" date = key.split(\" \", 1)[1][:-4]\n",`
own ocr 2024-04-15 15:05:20 +00:00			`" writeDate(worksheet, row, 0, date, dateFormat) \n",`
first results 2024-04-10 13:27:48 +00:00			`" worksheet.write(row, 1, value)\n",`
			`" except Exception as _:\n",`
			`" continue\n",`
			`"\n",`
			`" row += 1\n",`
			`"\n",`
			`" workbook.close()\n",`
			`"\n",`
own ocr 2024-04-15 15:05:20 +00:00			`"\n",`
first results 2024-04-10 13:27:48 +00:00			`"data2cleanexcel(data)"`
			`]`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.10.14"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 2`
			`}`