Spaces:
Sleeping
Sleeping
valentynliubchenko
commited on
Commit
·
e69be74
1
Parent(s):
cba413b
nit comment
Browse files- .gitattributes +3 -0
- .gitignore +50 -0
- README.md +6 -5
- algorithm/product.py +8 -0
- algorithm/receipt.py +8 -0
- algorithm/receipt_calculation.py +128 -0
- app.py +263 -0
- custom_flagged_data/test.txt +0 -0
- google_drive_client.py +94 -0
- gpt_processing.py +48 -0
- openai_service.py +63 -0
- process_images_gemini_multithread.py +100 -0
- process_images_gemini_one_hread.py +65 -0
- prompt_v1.txt +49 -0
- prompt_v2.txt +64 -0
- prompt_v3.txt +116 -0
- requirements.txt +15 -0
- system_instruction.txt +1 -0
- utils.py +170 -0
- vertex_ai_example.py +20 -0
- vertex_ai_service.py +168 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
examples/*.JPG filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
examples/*.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
examples/*.* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# These are some examples of commonly ignored file patterns.
|
| 2 |
+
# You should customize this list as applicable to your project.
|
| 3 |
+
# Learn more about .gitignore:
|
| 4 |
+
# https://www.atlassian.com/git/tutorials/saving-changes/gitignore
|
| 5 |
+
|
| 6 |
+
# Node artifact files
|
| 7 |
+
node_modules/
|
| 8 |
+
dist/
|
| 9 |
+
|
| 10 |
+
# Compiled Java class files
|
| 11 |
+
*.class
|
| 12 |
+
|
| 13 |
+
# Compiled Python bytecode
|
| 14 |
+
*.py[cod]
|
| 15 |
+
|
| 16 |
+
# Log files
|
| 17 |
+
*.log
|
| 18 |
+
|
| 19 |
+
# Package files
|
| 20 |
+
*.jar
|
| 21 |
+
|
| 22 |
+
# Maven
|
| 23 |
+
target/
|
| 24 |
+
dist/
|
| 25 |
+
|
| 26 |
+
# JetBrains IDE
|
| 27 |
+
.idea/
|
| 28 |
+
|
| 29 |
+
# Unit test reports
|
| 30 |
+
TEST*.xml
|
| 31 |
+
|
| 32 |
+
# Generated by MacOS
|
| 33 |
+
.DS_Store
|
| 34 |
+
|
| 35 |
+
# Generated by Windows
|
| 36 |
+
Thumbs.db
|
| 37 |
+
|
| 38 |
+
# Applications
|
| 39 |
+
*.app
|
| 40 |
+
*.exe
|
| 41 |
+
*.war
|
| 42 |
+
|
| 43 |
+
# Large media files
|
| 44 |
+
*.mp4
|
| 45 |
+
*.tiff
|
| 46 |
+
*.avi
|
| 47 |
+
*.flv
|
| 48 |
+
*.mov
|
| 49 |
+
*.wmv
|
| 50 |
+
|
README.md
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version: 4.44.
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: ReceiptAI
|
| 3 |
+
emoji: 🏢
|
| 4 |
+
colorFrom: pink
|
| 5 |
+
colorTo: pink
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 4.44.0
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
algorithm/product.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Product:
|
| 2 |
+
def __init__(self, name, price, quantity):
|
| 3 |
+
self.name = name
|
| 4 |
+
self.price = price
|
| 5 |
+
self.quantity = quantity
|
| 6 |
+
|
| 7 |
+
def __repr__(self):
|
| 8 |
+
return f"Product(name={self.name}, price={self.price}, quantity={self.quantity})"
|
algorithm/receipt.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class Receipt:
|
| 2 |
+
def __init__(self, products, tax, total_amount):
|
| 3 |
+
self.products = products
|
| 4 |
+
self.tax = tax
|
| 5 |
+
self.total_amount = total_amount
|
| 6 |
+
|
| 7 |
+
def __repr__(self):
|
| 8 |
+
return f"Receipt(products={self.products}, tax={self.tax}, total_amount={self.total_amount})"
|
algorithm/receipt_calculation.py
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import copy
|
| 3 |
+
|
| 4 |
+
from algorithm.product import Product
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def calculate_dish_price_with_taxes(_products, taxes, grand_total):
|
| 8 |
+
payment_total = round(grand_total - taxes, 2)
|
| 9 |
+
grand_total = round(payment_total, 2) + round(taxes, 2)
|
| 10 |
+
_product_with_taxes = copy.deepcopy(_products)
|
| 11 |
+
for _product in _product_with_taxes:
|
| 12 |
+
_product.price = round(((_product.price / payment_total) * grand_total), 5)
|
| 13 |
+
return _product_with_taxes, grand_total
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def round_up_two_decimals(_products_total):
|
| 17 |
+
_product_with_taxes_rounded = copy.deepcopy(_products_total)
|
| 18 |
+
for _product in _product_with_taxes_rounded:
|
| 19 |
+
_product.price = math.ceil(_product.price * 100) / 100
|
| 20 |
+
|
| 21 |
+
return _product_with_taxes_rounded
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def first_algorithm(_products_total_rounded, receipt_subtotal):
|
| 25 |
+
current_total = 0
|
| 26 |
+
for _product in _products_total_rounded:
|
| 27 |
+
current_total += _product.price
|
| 28 |
+
|
| 29 |
+
current_total = round(current_total, 2)
|
| 30 |
+
|
| 31 |
+
difference = current_total - receipt_subtotal
|
| 32 |
+
|
| 33 |
+
corrections = copy.deepcopy(_products_total_rounded)
|
| 34 |
+
|
| 35 |
+
for _product in corrections:
|
| 36 |
+
_product.price = round((_product.price / current_total) * difference, 2)
|
| 37 |
+
|
| 38 |
+
for i in range(len(_products_total_rounded)):
|
| 39 |
+
_products_total_rounded[i].price = round(_products_total_rounded[i].price - corrections[i].price, 2)
|
| 40 |
+
|
| 41 |
+
_final_total = 0
|
| 42 |
+
for _product in _products_total_rounded:
|
| 43 |
+
_final_total += _product.price
|
| 44 |
+
|
| 45 |
+
return _products_total_rounded, _final_total
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def fractional_part_rest(value):
|
| 49 |
+
fraction_str = f"{value:.10f}".split('.')[1]
|
| 50 |
+
rest_of_digits = fraction_str[2:]
|
| 51 |
+
return float(rest_of_digits)
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def second_algorithm(_products_total, receipt_total):
|
| 55 |
+
_products_total_rounded = round_up_two_decimals(_products_total)
|
| 56 |
+
|
| 57 |
+
current_total = 0
|
| 58 |
+
for _product in _products_total_rounded:
|
| 59 |
+
current_total += _product.price
|
| 60 |
+
|
| 61 |
+
if current_total == receipt_total:
|
| 62 |
+
return _products_total_rounded, receipt_total
|
| 63 |
+
|
| 64 |
+
difference = current_total - receipt_total
|
| 65 |
+
difference = round(difference, 2)
|
| 66 |
+
|
| 67 |
+
fractional_parts = copy.deepcopy(_products_total)
|
| 68 |
+
|
| 69 |
+
for _product in fractional_parts:
|
| 70 |
+
_product.price = fractional_part_rest(_product.price) - math.ceil(_product.price)
|
| 71 |
+
|
| 72 |
+
fractional_parts = sorted(fractional_parts, key=lambda p: p.price, reverse=False)
|
| 73 |
+
|
| 74 |
+
for i in range(len(fractional_parts)):
|
| 75 |
+
if difference <= 0:
|
| 76 |
+
break
|
| 77 |
+
_products_total_rounded[i].price -= 0.01
|
| 78 |
+
difference -= 0.01
|
| 79 |
+
|
| 80 |
+
_final_total = 0
|
| 81 |
+
for _product in _products_total_rounded:
|
| 82 |
+
_final_total += _product.price
|
| 83 |
+
|
| 84 |
+
for _product in _products_total_rounded:
|
| 85 |
+
_product.price = round(_product.price, 2)
|
| 86 |
+
|
| 87 |
+
return _products_total_rounded, _final_total
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def calculate_tips_and_taxes(items_table, total_amount, tax, tips):
|
| 91 |
+
products = []
|
| 92 |
+
|
| 93 |
+
if items_table[0][0] == "No items":
|
| 94 |
+
return products, 0
|
| 95 |
+
|
| 96 |
+
if total_amount == "Not specified" or total_amount is None:
|
| 97 |
+
total_amount = "0.0"
|
| 98 |
+
|
| 99 |
+
if tax == "Not specified" or tax is None:
|
| 100 |
+
tax = "0.0"
|
| 101 |
+
|
| 102 |
+
if tips == "Not specified" or tips is None:
|
| 103 |
+
tax = "0.0"
|
| 104 |
+
|
| 105 |
+
for item in items_table:
|
| 106 |
+
price = item[5]
|
| 107 |
+
if price == "Not specified":
|
| 108 |
+
price = "0.0"
|
| 109 |
+
item_value = float(str(price).replace(",", ".")) if item[5] is not None else 0.0
|
| 110 |
+
products.append(Product(item[0], item_value, item[3]))
|
| 111 |
+
|
| 112 |
+
sum_of_product_prices = 0
|
| 113 |
+
for _product in products:
|
| 114 |
+
sum_of_product_prices += _product.price
|
| 115 |
+
|
| 116 |
+
sum_of_product_prices = round(float(sum_of_product_prices), 2)
|
| 117 |
+
total_amount = round(float(str(total_amount).replace(",", ".")), 2)
|
| 118 |
+
tips = round(float(str(tips).replace(",", ".")), 2)
|
| 119 |
+
tax = round(tips + round(float(str(tax).replace(",", ".")), 2), 2)
|
| 120 |
+
if round(float(total_amount), 2) != round(float(sum_of_product_prices) + float(tax), 2):
|
| 121 |
+
return products, sum_of_product_prices
|
| 122 |
+
|
| 123 |
+
products_total, subtotal = calculate_dish_price_with_taxes(products, taxes=float(tax),
|
| 124 |
+
grand_total=float(total_amount))
|
| 125 |
+
final_prices, final_total = second_algorithm(products_total, subtotal)
|
| 126 |
+
|
| 127 |
+
final_total = round(final_total, 2)
|
| 128 |
+
return final_prices, final_total
|
app.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
from PIL import Image
|
| 8 |
+
|
| 9 |
+
from google_drive_client import GoogleDriveClient
|
| 10 |
+
from openai_service import OpenAIService
|
| 11 |
+
from utils import read_prompt_from_file, process_receipt_json, encode_image_to_jpeg_base64, save_to_excel, encode_image_to_webp_base64
|
| 12 |
+
from vertex_ai_service import VertexAIService
|
| 13 |
+
|
| 14 |
+
model_names = ["gemini-1.5-flash", "gemini-1.5-pro", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo"]
|
| 15 |
+
prompt_names = ["prompt_v1", "prompt_v2", "prompt_v3"]
|
| 16 |
+
example_list = [["./examples/" + example] for example in os.listdir("examples")]
|
| 17 |
+
prompt_default = read_prompt_from_file("prompt_v1.txt")
|
| 18 |
+
system_instruction = read_prompt_from_file("system_instruction.txt")
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def process_image(input_image, model_name, prompt_name, temperatura, system_instruction=None, current_prompt_text=None):
|
| 22 |
+
# print(model_name)
|
| 23 |
+
# print(prompt_name)
|
| 24 |
+
# print(temperatura)
|
| 25 |
+
# print(custom_prompt_text)
|
| 26 |
+
if system_instruction is None:
|
| 27 |
+
system_instruction = ""
|
| 28 |
+
if input_image is None:
|
| 29 |
+
return None, "No objects detected."
|
| 30 |
+
if input_image is None:
|
| 31 |
+
return json.dumps({"error": "No prompt provided."})
|
| 32 |
+
if prompt_name is None:
|
| 33 |
+
prompt_name = "prompt_v1"
|
| 34 |
+
prompt_file = f"{prompt_name}.txt"
|
| 35 |
+
prompt = read_prompt_from_file(prompt_file)
|
| 36 |
+
if prompt_name is None:
|
| 37 |
+
current_prompt_text = prompt_default
|
| 38 |
+
|
| 39 |
+
# if prompt_name != "custom":
|
| 40 |
+
# prompt_file = f"{prompt_name}.txt"
|
| 41 |
+
# prompt = read_prompt_from_file(prompt_file)
|
| 42 |
+
# else:
|
| 43 |
+
# if current_prompt_text is None or current_prompt_text.strip() == "":
|
| 44 |
+
# return json.dumps({"error": "No prompt provided."})
|
| 45 |
+
prompt = current_prompt_text
|
| 46 |
+
# print(prompt)
|
| 47 |
+
print("file name:", input_image)
|
| 48 |
+
print("model_name:", model_name)
|
| 49 |
+
print("prompt_name:", prompt_name)
|
| 50 |
+
print("Temperatura:", temperatura)
|
| 51 |
+
|
| 52 |
+
# base64_image = encode_image_from_gradio(input_image)
|
| 53 |
+
base64_image = encode_image_to_webp_base64(input_image)
|
| 54 |
+
|
| 55 |
+
try:
|
| 56 |
+
if model_name.startswith("gpt"):
|
| 57 |
+
# result = gpt_process_image(base64_image, model_name, prompt, system_instruction, temperatura)
|
| 58 |
+
result = open_ai_client.process_image(base64_image, model_name, prompt, system_instruction, temperatura)
|
| 59 |
+
else:
|
| 60 |
+
result = vertex_ai_client.process_image(base64_image, model_name, prompt, system_instruction,
|
| 61 |
+
temperatura)
|
| 62 |
+
parsed_result = json.loads(result)
|
| 63 |
+
result = json.dumps(parsed_result, ensure_ascii=False, indent=4)
|
| 64 |
+
# result = result.encode('utf-8').decode('unicode_escape')
|
| 65 |
+
print(result)
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Exception occurred: {e}")
|
| 68 |
+
result = json.dumps({"error": "Error processing: Check prompt or images"})
|
| 69 |
+
|
| 70 |
+
# print (result)
|
| 71 |
+
store_info, items_table, message = process_receipt_json(result)
|
| 72 |
+
print(store_info)
|
| 73 |
+
print(items_table)
|
| 74 |
+
|
| 75 |
+
return model_name, result, store_info, items_table, message, gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def save_flag_data(save_type, image, model_name, prompt_name, temperatura, current_prompt_text, model_output, json_output,
|
| 79 |
+
store_info_output, items_list, comments_output, system_instruction, flagging_dir="custom_flagged_data"):
|
| 80 |
+
save_button_update = gr.update(interactive=False)
|
| 81 |
+
image_link, json_link, excel_link = None, None, None
|
| 82 |
+
try:
|
| 83 |
+
|
| 84 |
+
# List files in the directory
|
| 85 |
+
try:
|
| 86 |
+
files = [f for f in os.listdir(flagging_dir) if os.path.isfile(os.path.join(flagging_dir, f))]
|
| 87 |
+
if files:
|
| 88 |
+
print("Files in directory:", flagging_dir)
|
| 89 |
+
for file in files:
|
| 90 |
+
print(file)
|
| 91 |
+
else:
|
| 92 |
+
print(f"No files found in directory: {flagging_dir}")
|
| 93 |
+
except Exception as e:
|
| 94 |
+
print(f"Error listing files in directory: {e}")
|
| 95 |
+
|
| 96 |
+
image_file_path = image
|
| 97 |
+
print("save_type:", save_type)
|
| 98 |
+
print("Image File Path:", image)
|
| 99 |
+
print("prompt_name:", prompt_name)
|
| 100 |
+
print("Model Name:", model_name)
|
| 101 |
+
print("Result as JSON:", json_output)
|
| 102 |
+
print("comments:", comments_output)
|
| 103 |
+
print("system_instruction:", system_instruction)
|
| 104 |
+
|
| 105 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 106 |
+
original_filename = os.path.basename(image_file_path)
|
| 107 |
+
filename, file_extension = os.path.splitext(original_filename)
|
| 108 |
+
base_filename = f"{filename}_{model_name}_{prompt_name}_{timestamp}"
|
| 109 |
+
|
| 110 |
+
# Save image
|
| 111 |
+
image_save_path = os.path.join(flagging_dir, f"{base_filename}{file_extension}")
|
| 112 |
+
image = Image.open(image_file_path)
|
| 113 |
+
image.save(image_save_path)
|
| 114 |
+
|
| 115 |
+
if os.path.exists(image_save_path):
|
| 116 |
+
saved_image = Image.open(image_save_path)
|
| 117 |
+
image_size = saved_image.size
|
| 118 |
+
print(f"Image saved at: {image_save_path}, Size: {image_size}")
|
| 119 |
+
else:
|
| 120 |
+
print(f"Failed to save image at: {image_save_path}")
|
| 121 |
+
return 0
|
| 122 |
+
|
| 123 |
+
# Save result as JSON
|
| 124 |
+
json_file_path = os.path.join(flagging_dir, f"{base_filename}.json")
|
| 125 |
+
data_to_save = {
|
| 126 |
+
"image_name": f"{base_filename}{file_extension}",
|
| 127 |
+
"prompt_name": prompt_name,
|
| 128 |
+
"system_instruction": system_instruction,
|
| 129 |
+
"prompt": current_prompt_text,
|
| 130 |
+
"model_name": model_name,
|
| 131 |
+
"result_json": json_output,
|
| 132 |
+
"comments": comments_output,
|
| 133 |
+
"save_type": save_type
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
+
data_to_save_encode = json.dumps(data_to_save, ensure_ascii=False, indent=4)
|
| 137 |
+
print("data_to_save_encode: ", data_to_save_encode)
|
| 138 |
+
|
| 139 |
+
with open(json_file_path, 'w', encoding='utf-8') as json_file:
|
| 140 |
+
json_file.write(data_to_save_encode)
|
| 141 |
+
|
| 142 |
+
excel_file_path = os.path.join(flagging_dir, f"{base_filename}.xlsx")
|
| 143 |
+
save_to_excel(json_output, excel_file_path)
|
| 144 |
+
|
| 145 |
+
# Upload files to Google Drive
|
| 146 |
+
google_drive_client_current = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
|
| 147 |
+
if google_drive_client_current:
|
| 148 |
+
try:
|
| 149 |
+
image_folder_id = '19yiqYX_Z1rbHDxnFLJTvji0VcbhSO9cq'
|
| 150 |
+
image_link = google_drive_client_current.upload_file(image_save_path, image_folder_id)
|
| 151 |
+
json_link = google_drive_client_current.upload_file(json_file_path, image_folder_id)
|
| 152 |
+
excel_link = google_drive_client_current.upload_file(excel_file_path, image_folder_id)
|
| 153 |
+
print(f"Image uploaded to Google Drive. Link: {image_link}")
|
| 154 |
+
print(f"JSON file uploaded to Google Drive. Link: {json_link}")
|
| 155 |
+
print(f"Excel file uploaded to Google Drive. Link: {excel_link}")
|
| 156 |
+
except Exception as e:
|
| 157 |
+
print(f"Error uploading files to Google Drive: {e}")
|
| 158 |
+
else:
|
| 159 |
+
print(f"Error google_drive_client does not available")
|
| 160 |
+
|
| 161 |
+
except Exception as e:
|
| 162 |
+
print(f"Error while saving flag data: {e}")
|
| 163 |
+
links = f"Image: {image_link}\nJSON: {json_link}\nExcel: {excel_link}"
|
| 164 |
+
return save_button_update, save_button_update, save_button_update, links
|
| 165 |
+
|
| 166 |
+
def update_prompt_from_radio(prompt_name):
|
| 167 |
+
if prompt_name == "prompt_v1":
|
| 168 |
+
return read_prompt_from_file("prompt_v1.txt")
|
| 169 |
+
elif prompt_name == "prompt_v2":
|
| 170 |
+
return read_prompt_from_file("prompt_v2.txt")
|
| 171 |
+
elif prompt_name == "prompt_v3":
|
| 172 |
+
return read_prompt_from_file("prompt_v3.txt")
|
| 173 |
+
else:
|
| 174 |
+
return read_prompt_from_file("prompt_v1.txt")
|
| 175 |
+
|
| 176 |
+
google_drive_client = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
|
| 177 |
+
vertex_ai_client = VertexAIService(json_key_path='GOOGLE_VERTEX_AI_KEY.json')
|
| 178 |
+
|
| 179 |
+
key = None
|
| 180 |
+
key_file_path = 'OPENAI_AI_KEY.txt'
|
| 181 |
+
if os.path.exists(key_file_path):
|
| 182 |
+
try:
|
| 183 |
+
with open(key_file_path, 'r') as key_file:
|
| 184 |
+
key = key_file.read().strip()
|
| 185 |
+
except Exception as e:
|
| 186 |
+
print(f"Error reading file: {e}")
|
| 187 |
+
|
| 188 |
+
open_ai_client = OpenAIService(api_key=key)
|
| 189 |
+
|
| 190 |
+
with gr.Blocks() as iface:
|
| 191 |
+
gr.Markdown("# ReceptAI")
|
| 192 |
+
gr.Markdown("ReceptAI")
|
| 193 |
+
|
| 194 |
+
with gr.Row():
|
| 195 |
+
with gr.Column(scale=1):
|
| 196 |
+
image_input = gr.Image(type="filepath")
|
| 197 |
+
model_radio = gr.Radio(model_names, label="Choose model", value=model_names[0])
|
| 198 |
+
prompt_radio = gr.Radio(prompt_names, label="Choose prompt", value=prompt_names[0])
|
| 199 |
+
temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label="Temperatura", value=0.0)
|
| 200 |
+
system_instruction = gr.Textbox(label="System Instruction", visible=True, value=system_instruction)
|
| 201 |
+
custom_prompt = gr.Textbox(label="prompt text", visible=True, value=prompt_default)
|
| 202 |
+
with gr.Row():
|
| 203 |
+
submit_button = gr.Button("Submit")
|
| 204 |
+
|
| 205 |
+
with gr.Column(scale=2):
|
| 206 |
+
model_output = gr.Textbox(label="MODEL", lines=1, interactive=False)
|
| 207 |
+
json_output = gr.Textbox(label="Result as json")
|
| 208 |
+
store_info_output = gr.Textbox(label="Store Information", lines=4)
|
| 209 |
+
items_list = gr.Dataframe(
|
| 210 |
+
headers=["Item Name", "Category", "Unit Price", "Quantity", "Unit", "Total Price", "Discount", "Grand Total"],
|
| 211 |
+
label="Items List")
|
| 212 |
+
comments_output = gr.Textbox(label="Comments", visible=True, lines=4, interactive=True)
|
| 213 |
+
with gr.Row():
|
| 214 |
+
save_good_button = gr.Button(value="Save as Good", interactive=False)
|
| 215 |
+
save_average_button = gr.Button(value="Save as Average" , interactive=False)
|
| 216 |
+
save_poor_button = gr.Button(value="Save as Poor", interactive=False)
|
| 217 |
+
file_links_output = gr.Textbox(label="File Links", interactive=False, visible=True)
|
| 218 |
+
submit_button.click(fn=process_image,
|
| 219 |
+
inputs=[image_input, model_radio, prompt_radio, temperature_slider, system_instruction,
|
| 220 |
+
custom_prompt],
|
| 221 |
+
outputs=[model_output, json_output, store_info_output, items_list, comments_output,
|
| 222 |
+
save_good_button, save_average_button, save_poor_button])
|
| 223 |
+
common_inputs = [image_input, model_radio, prompt_radio, temperature_slider, custom_prompt, model_output,
|
| 224 |
+
json_output, store_info_output, items_list, comments_output, system_instruction]
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def save_flag_data_wrapper(save_type, image, model_name, prompt_name, temperatura, custom_prompt, model_output,
|
| 228 |
+
json_output, store_info_output, items_list, comments_output, system_instruction):
|
| 229 |
+
# Ensure that `image` is a file path and not an object.
|
| 230 |
+
image_file_path = image # Gradio returns the path as a string
|
| 231 |
+
model_name_value = model_name # Extract selected value
|
| 232 |
+
prompt_name_value = prompt_name # Extract selected value
|
| 233 |
+
|
| 234 |
+
# The following variables should be passed as the values they hold
|
| 235 |
+
save_good_update, save_avg_update, save_poor_update, file_links = save_flag_data(
|
| 236 |
+
save_type, image, model_name, prompt_name, temperatura, custom_prompt, model_output, json_output,
|
| 237 |
+
store_info_output, items_list, comments_output, system_instruction
|
| 238 |
+
)
|
| 239 |
+
return save_good_update, save_avg_update, save_poor_update, file_links
|
| 240 |
+
|
| 241 |
+
# Use the same common_inputs for all buttons but ensure the correct values are passed
|
| 242 |
+
save_good_button.click(
|
| 243 |
+
fn=lambda *args: save_flag_data_wrapper("Good", *args),
|
| 244 |
+
inputs=common_inputs,
|
| 245 |
+
outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
|
| 246 |
+
)
|
| 247 |
+
|
| 248 |
+
save_average_button.click(
|
| 249 |
+
fn=lambda *args: save_flag_data_wrapper("Average", *args),
|
| 250 |
+
inputs=common_inputs,
|
| 251 |
+
outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
save_poor_button.click(
|
| 255 |
+
fn=lambda *args: save_flag_data_wrapper("Poor", *args),
|
| 256 |
+
inputs=common_inputs,
|
| 257 |
+
outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
|
| 258 |
+
)
|
| 259 |
+
prompt_radio.change(fn=update_prompt_from_radio, inputs=[prompt_radio], outputs=[custom_prompt])
|
| 260 |
+
gr.Examples(examples=example_list,
|
| 261 |
+
inputs=[image_input, model_radio, prompt_radio, temperature_slider, custom_prompt])
|
| 262 |
+
|
| 263 |
+
iface.launch()
|
custom_flagged_data/test.txt
ADDED
|
File without changes
|
google_drive_client.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import tempfile
|
| 3 |
+
from google.oauth2 import service_account
|
| 4 |
+
from googleapiclient.discovery import build
|
| 5 |
+
from googleapiclient.http import MediaFileUpload
|
| 6 |
+
from googleapiclient.errors import HttpError
|
| 7 |
+
|
| 8 |
+
class GoogleDriveClient:
|
| 9 |
+
def __init__(self, json_key_path=None, json_key_env_var='GOOGLE_SERVICE_ACCOUNT_KEY'):
|
| 10 |
+
"""
|
| 11 |
+
Initializes the Google Drive client.
|
| 12 |
+
|
| 13 |
+
:param json_key_path: Path to the JSON file (optional)
|
| 14 |
+
:param json_key_env_var: Environment variable name containing the JSON key (default 'GOOGLE_SERVICE_ACCOUNT_KEY')
|
| 15 |
+
"""
|
| 16 |
+
self.json_key_path = json_key_path or os.getenv('GOOGLE_VERTEX_KEY_PATH')
|
| 17 |
+
self.json_key_env_var = json_key_env_var
|
| 18 |
+
self.service = self._authenticate_google_drive()
|
| 19 |
+
|
| 20 |
+
def _authenticate_google_drive(self):
|
| 21 |
+
"""
|
| 22 |
+
Authenticates using the JSON key from a file or environment variable.
|
| 23 |
+
|
| 24 |
+
:return: Google Drive API service object
|
| 25 |
+
"""
|
| 26 |
+
if self.json_key_path and os.path.isfile(self.json_key_path):
|
| 27 |
+
# Authenticate using the file
|
| 28 |
+
creds = service_account.Credentials.from_service_account_file(
|
| 29 |
+
self.json_key_path,
|
| 30 |
+
scopes=['https://www.googleapis.com/auth/drive.file']
|
| 31 |
+
)
|
| 32 |
+
else:
|
| 33 |
+
# Get JSON key from environment variable
|
| 34 |
+
json_key = os.getenv(self.json_key_env_var)
|
| 35 |
+
|
| 36 |
+
if not json_key:
|
| 37 |
+
raise ValueError(f"Environment variable {self.json_key_env_var} is not set.")
|
| 38 |
+
|
| 39 |
+
# Save JSON key to a temporary file
|
| 40 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
| 41 |
+
temp_file.write(json_key.encode('utf-8'))
|
| 42 |
+
temp_file_path = temp_file.name
|
| 43 |
+
|
| 44 |
+
# Create credentials object
|
| 45 |
+
creds = service_account.Credentials.from_service_account_file(
|
| 46 |
+
temp_file_path,
|
| 47 |
+
scopes=['https://www.googleapis.com/auth/drive.file']
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# Remove the temporary file after creating the credentials object
|
| 51 |
+
os.remove(temp_file_path)
|
| 52 |
+
|
| 53 |
+
# Create and return the Google Drive API service object
|
| 54 |
+
return build('drive', 'v3', credentials=creds)
|
| 55 |
+
|
| 56 |
+
def upload_file(self, file_path, folder_id):
|
| 57 |
+
"""
|
| 58 |
+
Uploads a file to Google Drive.
|
| 59 |
+
|
| 60 |
+
:param file_path: Path to the local file
|
| 61 |
+
:param folder_id: ID of the folder on Google Drive
|
| 62 |
+
:return: URL of the uploaded file or error message
|
| 63 |
+
"""
|
| 64 |
+
try:
|
| 65 |
+
file_metadata = {
|
| 66 |
+
'name': os.path.basename(file_path),
|
| 67 |
+
'parents': [folder_id] # ID of the folder to upload the file to
|
| 68 |
+
}
|
| 69 |
+
media = MediaFileUpload(file_path, mimetype='application/octet-stream')
|
| 70 |
+
|
| 71 |
+
# Upload the file
|
| 72 |
+
file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
| 73 |
+
|
| 74 |
+
# Get the file ID and construct the URL
|
| 75 |
+
file_id = file.get('id')
|
| 76 |
+
file_link = f'https://drive.google.com/file/d/{file_id}/view?usp=drive_link'
|
| 77 |
+
|
| 78 |
+
return file_link
|
| 79 |
+
except HttpError as error:
|
| 80 |
+
return f"An error occurred: {error}"
|
| 81 |
+
|
| 82 |
+
# Example usage
|
| 83 |
+
if __name__ == '__main__':
|
| 84 |
+
# Path to the local file
|
| 85 |
+
file_path = './file.txt'
|
| 86 |
+
|
| 87 |
+
# ID of the folder on Google Drive
|
| 88 |
+
folder_id = '19yiqYX_Z1rbHDxnFLJTvji0VcbhSO9cq'
|
| 89 |
+
|
| 90 |
+
# Initialize the client and upload the file
|
| 91 |
+
client = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
|
| 92 |
+
file_link = client.upload_file(file_path, folder_id)
|
| 93 |
+
print(f'File upload result: {file_link}')
|
| 94 |
+
|
gpt_processing.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
openai_api_key = os.environ.get("OPENAI_API_KEY")
|
| 7 |
+
if openai_api_key:
|
| 8 |
+
print("OPENAI_API_KEY found.")
|
| 9 |
+
# print(openai_api_key)
|
| 10 |
+
else:
|
| 11 |
+
print("OPENAI_API_KEY not found.")
|
| 12 |
+
|
| 13 |
+
client = OpenAI(
|
| 14 |
+
api_key=openai_api_key
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def gpt_process_image(input_image64, model_name, prompt, system_instruction="", temperatura=0.0):
|
| 19 |
+
if input_image64 is None:
|
| 20 |
+
return None, "No objects detected."
|
| 21 |
+
# print (prompt)
|
| 22 |
+
print("model_name:", model_name)
|
| 23 |
+
print("Temperatura:", temperatura)
|
| 24 |
+
|
| 25 |
+
response = client.chat.completions.create(
|
| 26 |
+
model=model_name,
|
| 27 |
+
messages=[
|
| 28 |
+
{"role": "system", "content": f"{system_instruction}"},
|
| 29 |
+
{"role": "user", "content": [
|
| 30 |
+
{"type": "text", "text": f"{prompt}"},
|
| 31 |
+
{"type": "image_url", "image_url": {
|
| 32 |
+
"url": f"data:image/jpg;base64,{input_image64}"}
|
| 33 |
+
}
|
| 34 |
+
]}
|
| 35 |
+
],
|
| 36 |
+
temperature=temperatura,
|
| 37 |
+
response_format={"type": "json_object"}
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
json_content = json.loads(response.choices[0].message.content)
|
| 41 |
+
|
| 42 |
+
json_content['input_tokens'] = response.usage.prompt_tokens
|
| 43 |
+
json_content['output_tokens'] = response.usage.completion_tokens
|
| 44 |
+
json_content['total_tokens'] = response.usage.total_tokens
|
| 45 |
+
|
| 46 |
+
print(json_content)
|
| 47 |
+
|
| 48 |
+
return json.dumps(json_content, indent=4)
|
openai_service.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import base64
|
| 3 |
+
import os
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
class OpenAIService:
|
| 7 |
+
def __init__(self, api_key=None):
|
| 8 |
+
self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
|
| 9 |
+
if self.api_key:
|
| 10 |
+
print("OPENAI_API_KEY was found.")
|
| 11 |
+
else:
|
| 12 |
+
raise ValueError("OPENAI_API_KEY not found.")
|
| 13 |
+
|
| 14 |
+
self.client = OpenAI(api_key=self.api_key)
|
| 15 |
+
|
| 16 |
+
def process_image(self, input_image64, model_name, prompt, system = "You are receipt recognizer!", temperatura = 0.0):
|
| 17 |
+
if not input_image64:
|
| 18 |
+
return None, "No objects detected."
|
| 19 |
+
|
| 20 |
+
print("Model name:", model_name)
|
| 21 |
+
print("system:", system)
|
| 22 |
+
print("Temperature:", temperatura)
|
| 23 |
+
|
| 24 |
+
response = self.client.chat.completions.create(
|
| 25 |
+
model=model_name,
|
| 26 |
+
messages=[
|
| 27 |
+
{"role": "system", "content": f"{system}"},
|
| 28 |
+
{"role": "user", "content": [
|
| 29 |
+
{"type": "text", "text": f"{prompt}"},
|
| 30 |
+
{"type": "image_url", "image_url": {
|
| 31 |
+
"url": f"data:image/webp;base64,{input_image64}"}
|
| 32 |
+
}
|
| 33 |
+
]}
|
| 34 |
+
],
|
| 35 |
+
temperature=temperatura,
|
| 36 |
+
response_format={"type": "json_object"}
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
json_content = json.loads(response.choices[0].message.content)
|
| 40 |
+
|
| 41 |
+
json_content['input_tokens'] = response.usage.prompt_tokens
|
| 42 |
+
json_content['output_tokens'] = response.usage.completion_tokens
|
| 43 |
+
json_content['total_tokens'] = response.usage.total_tokens
|
| 44 |
+
|
| 45 |
+
print(json_content)
|
| 46 |
+
|
| 47 |
+
return json.dumps(json_content, indent=4)
|
| 48 |
+
|
| 49 |
+
if __name__ == "__main__":
|
| 50 |
+
processor = OpenAIService()
|
| 51 |
+
|
| 52 |
+
# Image processing
|
| 53 |
+
image_path = "./examples/lidl2.jpg"
|
| 54 |
+
with open(image_path, "rb") as image_file:
|
| 55 |
+
input_image64 = base64.b64encode(image_file.read()).decode('utf-8')
|
| 56 |
+
|
| 57 |
+
system = "You are receipt recognizer "
|
| 58 |
+
prompt = "Recognize the receipt and provide result as json "
|
| 59 |
+
result = processor.process_image(input_image64, "gpt-4o-mini", prompt, system, 0.0)
|
| 60 |
+
print(f'Image processing result: {result}')
|
| 61 |
+
decoded_string = result.encode('utf-8').decode('unicode_escape')
|
| 62 |
+
|
| 63 |
+
print(result)
|
process_images_gemini_multithread.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import base64
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
| 6 |
+
|
| 7 |
+
from utils import encode_image_to_jpeg_base64
|
| 8 |
+
# Import your VertexAIService class
|
| 9 |
+
from vertex_ai_service import VertexAIService # Replace with the module name where VertexAIService is defined
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def process_image_file(file_path, client):
|
| 13 |
+
"""
|
| 14 |
+
Processes an image file and returns the path to the JSON file with results.
|
| 15 |
+
|
| 16 |
+
:param file_path: Path to the image file
|
| 17 |
+
:param client: Instance of VertexAIService
|
| 18 |
+
:return: Path to the saved JSON file
|
| 19 |
+
"""
|
| 20 |
+
input_image64 = encode_image_to_jpeg_base64(file_path)
|
| 21 |
+
|
| 22 |
+
# Processing settings
|
| 23 |
+
prompt = "Read the text"
|
| 24 |
+
system = "You are receipt recognizer"
|
| 25 |
+
|
| 26 |
+
# Call image processing
|
| 27 |
+
result_img, error = client.process_image(input_image64, "gemini-1.5-flash", prompt, system, 0.0)
|
| 28 |
+
|
| 29 |
+
if error:
|
| 30 |
+
raise Exception(error)
|
| 31 |
+
|
| 32 |
+
# Create path for JSON file
|
| 33 |
+
json_file_path = os.path.splitext(file_path)[0] + ".json"
|
| 34 |
+
|
| 35 |
+
# Write the result to a JSON file
|
| 36 |
+
with open(json_file_path, 'w', encoding='utf-8') as json_file:
|
| 37 |
+
json_file.write(result_img)
|
| 38 |
+
|
| 39 |
+
return json_file_path
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def process_folder_images(folder_path, json_key_path):
|
| 43 |
+
"""
|
| 44 |
+
Processes all images in the specified folder concurrently.
|
| 45 |
+
|
| 46 |
+
:param folder_path: Path to the folder containing images
|
| 47 |
+
:param json_key_path: Path to the JSON key for Vertex AI authentication
|
| 48 |
+
"""
|
| 49 |
+
# Initialize Vertex AI client
|
| 50 |
+
client = VertexAIService(json_key_path=json_key_path)
|
| 51 |
+
|
| 52 |
+
# Get a list of all images in the folder
|
| 53 |
+
image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
|
| 54 |
+
|
| 55 |
+
# Use ThreadPoolExecutor for parallel processing
|
| 56 |
+
with ThreadPoolExecutor() as executor:
|
| 57 |
+
# future_to_file = {executor.submit(process_image_file, file_path, client): file_path for file_path in image_files}
|
| 58 |
+
future_to_file = {executor.submit(process_image_file_with_retry, file_path, client, 5): file_path for file_path in image_files}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
for future in as_completed(future_to_file):
|
| 62 |
+
file_path = future_to_file[future]
|
| 63 |
+
try:
|
| 64 |
+
json_file_path = future.result()
|
| 65 |
+
print(f"Processed: {file_path}, result saved to: {json_file_path}")
|
| 66 |
+
except Exception as exc:
|
| 67 |
+
print(f"Failed to process file {file_path}. Error: {exc}")
|
| 68 |
+
|
| 69 |
+
import random
|
| 70 |
+
|
| 71 |
+
def process_image_file_with_retry(file_path, client, max_retries=5):
|
| 72 |
+
"""
|
| 73 |
+
Processes an image file with retry logic and returns the path to the JSON file with results.
|
| 74 |
+
|
| 75 |
+
:param file_path: Path to the image file
|
| 76 |
+
:param client: Instance of VertexAIService
|
| 77 |
+
:param max_retries: Maximum number of retries for handling quota errors
|
| 78 |
+
:return: Path to the saved JSON file
|
| 79 |
+
"""
|
| 80 |
+
retries = 0
|
| 81 |
+
while retries < max_retries:
|
| 82 |
+
try:
|
| 83 |
+
json_file_path = process_image_file(file_path, client)
|
| 84 |
+
return json_file_path
|
| 85 |
+
except Exception as exc:
|
| 86 |
+
if "429" in str(exc):
|
| 87 |
+
retries += 1
|
| 88 |
+
wait_time = 2 ** retries + random.uniform(0, 1)
|
| 89 |
+
print(f"Quota exceeded for {file_path}. Retrying in {wait_time:.2f} seconds... (Attempt {retries}/{max_retries})")
|
| 90 |
+
time.sleep(wait_time)
|
| 91 |
+
else:
|
| 92 |
+
raise exc
|
| 93 |
+
|
| 94 |
+
raise Exception(f"Max retries exceeded for file {file_path}")
|
| 95 |
+
|
| 96 |
+
# Call the function to process the folder
|
| 97 |
+
if __name__ == '__main__':
|
| 98 |
+
folder_path = './examples' # Set the path to your folder
|
| 99 |
+
json_key_path = 'GOOGLE_VERTEX_AI_KEY.json' # Set the path to your JSON key
|
| 100 |
+
process_folder_images(folder_path, json_key_path)
|
process_images_gemini_one_hread.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import base64
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
|
| 6 |
+
from utils import encode_image_to_jpeg_base64
|
| 7 |
+
# Import your VertexAIService class
|
| 8 |
+
from vertex_ai_service import VertexAIService # Replace with the module name where VertexAIService is defined
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def process_image_file(file_path, client):
|
| 12 |
+
"""
|
| 13 |
+
Processes an image file and returns the path to the JSON file with results.
|
| 14 |
+
|
| 15 |
+
:param file_path: Path to the image file
|
| 16 |
+
:param client: Instance of VertexAIService
|
| 17 |
+
:return: Path to the saved JSON file
|
| 18 |
+
"""
|
| 19 |
+
input_image64 = encode_image_to_jpeg_base64(file_path)
|
| 20 |
+
|
| 21 |
+
# Processing settings
|
| 22 |
+
prompt = "Read the text"
|
| 23 |
+
system = "You are receipt recognizer"
|
| 24 |
+
|
| 25 |
+
# Call image processing
|
| 26 |
+
result_img = client.process_image(input_image64, "gemini-1.5-flash", prompt, system, 0.0)
|
| 27 |
+
|
| 28 |
+
# Create path for JSON file
|
| 29 |
+
json_file_path = os.path.splitext(file_path)[0] + ".json"
|
| 30 |
+
|
| 31 |
+
# Write the result to a JSON file
|
| 32 |
+
with open(json_file_path, 'w', encoding='utf-8') as json_file:
|
| 33 |
+
json_file.write(result_img)
|
| 34 |
+
|
| 35 |
+
return json_file_path
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def process_folder_images_sequentially(folder_path, json_key_path):
|
| 39 |
+
"""
|
| 40 |
+
Processes all images in the specified folder sequentially.
|
| 41 |
+
|
| 42 |
+
:param folder_path: Path to the folder containing images
|
| 43 |
+
:param json_key_path: Path to the JSON key for Vertex AI authentication
|
| 44 |
+
"""
|
| 45 |
+
# Initialize Vertex AI client
|
| 46 |
+
client = VertexAIService(json_key_path=json_key_path)
|
| 47 |
+
|
| 48 |
+
# Get a list of all images in the folder
|
| 49 |
+
image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
|
| 50 |
+
|
| 51 |
+
# Process each image file sequentially
|
| 52 |
+
for file_path in image_files:
|
| 53 |
+
try:
|
| 54 |
+
json_file_path = process_image_file(file_path, client)
|
| 55 |
+
print(f"Processed: {file_path}, result saved to: {json_file_path}")
|
| 56 |
+
time.sleep(60)
|
| 57 |
+
except Exception as exc:
|
| 58 |
+
print(f"Failed to process file {file_path}. Error: {exc}")
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# Call the function to process the folder
|
| 62 |
+
if __name__ == '__main__':
|
| 63 |
+
folder_path = './examples' # Set the path to your folder
|
| 64 |
+
json_key_path = 'GOOGLE_VERTEX_AI_KEY.json' # Set the path to your JSON key
|
| 65 |
+
process_folder_images_sequentially(folder_path, json_key_path)
|
prompt_v1.txt
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Task: Extract structured information from receipt images.
|
| 2 |
+
Receipts: May be in various languages, including non-Latin scripts, and have diverse formats.
|
| 3 |
+
Information to Extract:
|
| 4 |
+
- Store name
|
| 5 |
+
- Store address
|
| 6 |
+
- Currency (e.g., USD, EUR)
|
| 7 |
+
- date (in format "YYYY.MM.DD HH:MM:SS")
|
| 8 |
+
- tax
|
| 9 |
+
- Purchased items:
|
| 10 |
+
- Name
|
| 11 |
+
- Price per unit (format: 0.00)
|
| 12 |
+
- Quantity or weight
|
| 13 |
+
- unit of measurement (string)
|
| 14 |
+
- Total price (format: 0.00)
|
| 15 |
+
- Discount applied (if any, format: 0.00)
|
| 16 |
+
- Category (e.g., groceries, electronics, dining) — determine based on item name / description or context
|
| 17 |
+
- Total amount (format: 0.00)
|
| 18 |
+
- Total discount (format: 0.00)
|
| 19 |
+
- Tax
|
| 20 |
+
- Tips (if tips included in Total you need to write tips price, but if tips not included in Total you should write it's price as 0.00)
|
| 21 |
+
Output Format: JSON object with the following structure:
|
| 22 |
+
{
|
| 23 |
+
"store_name": "Store Name",
|
| 24 |
+
"store_address": "Store Address",
|
| 25 |
+
"currency": "Currency",
|
| 26 |
+
"date_time": "YYYY.MM.DD HH:MM:SS",
|
| 27 |
+
"payment_method": "card" or "cash
|
| 28 |
+
"items": [
|
| 29 |
+
{
|
| 30 |
+
"name": "Item Name",
|
| 31 |
+
"unit_price": 0.00,
|
| 32 |
+
"quantity": 0,
|
| 33 |
+
"unit_of_measurement":
|
| 34 |
+
"total_price": 0.00,
|
| 35 |
+
"discount": 0.00,
|
| 36 |
+
"category": "Category Name"
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"total_amount": 0.00,
|
| 40 |
+
"total_discount": 0.00,
|
| 41 |
+
"tax": 0.00
|
| 42 |
+
"tips": 0.00
|
| 43 |
+
}
|
| 44 |
+
If no receipt is detected: Return "Receipt not found."
|
| 45 |
+
Additional Notes:
|
| 46 |
+
1. If the receipt is in a non-Latin script, extract the information in its original form unless translation is required.
|
| 47 |
+
2. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
|
| 48 |
+
Write whole json with information about all products.
|
| 49 |
+
3. Note that the name of an item may be a few lines long, but the item necessarily has a price in last line.
|
prompt_v2.txt
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Task: Extract structured information from receipt images.
|
| 2 |
+
Receipts: May be in various languages, including non-Latin scripts, and have diverse formats.
|
| 3 |
+
Information to Extract:
|
| 4 |
+
- Store name
|
| 5 |
+
- Store address
|
| 6 |
+
- Currency (e.g., USD, EUR)
|
| 7 |
+
- date (in format "YYYY.MM.DD HH:MM:SS")
|
| 8 |
+
- tax
|
| 9 |
+
- tips (if tips included in Total you need to write tips price, but if tips not included in Total you should write it's price as 0.00)
|
| 10 |
+
- Purchased items:
|
| 11 |
+
- Name
|
| 12 |
+
- Price per unit (format: 0.00)
|
| 13 |
+
- Quantity or weight
|
| 14 |
+
- unit of measurement (string)
|
| 15 |
+
- Total price (format: 0.00)
|
| 16 |
+
- Discount applied (if any, format: 0.00)
|
| 17 |
+
- Category (from the following list):
|
| 18 |
+
* Groceries
|
| 19 |
+
* Produce
|
| 20 |
+
* Meat
|
| 21 |
+
* Seafood
|
| 22 |
+
* Dairy
|
| 23 |
+
* Bakery
|
| 24 |
+
* Canned goods
|
| 25 |
+
* Frozen foods
|
| 26 |
+
* Beverages
|
| 27 |
+
* Snacks
|
| 28 |
+
* Cleaning supplies
|
| 29 |
+
* Personal care products
|
| 30 |
+
* Electronics
|
| 31 |
+
* Clothing
|
| 32 |
+
* Dining
|
| 33 |
+
* Home goods
|
| 34 |
+
* Other (specify if not in the list)
|
| 35 |
+
- Total amount (format: 0.00)
|
| 36 |
+
- Total discount (format: 0.00)
|
| 37 |
+
Output Format: JSON object with the following structure:
|
| 38 |
+
{
|
| 39 |
+
"store_name": "Store Name",
|
| 40 |
+
"store_address": "Store Address",
|
| 41 |
+
"currency": "Currency",
|
| 42 |
+
"date_time": "YYYY.MM.DD HH:MM:SS",
|
| 43 |
+
"payment_method": "card" or "cash
|
| 44 |
+
"items": [
|
| 45 |
+
{
|
| 46 |
+
"name": "Item Name",
|
| 47 |
+
"unit_price": 0.00,
|
| 48 |
+
"quantity": 0,
|
| 49 |
+
"unit_of_measurement":
|
| 50 |
+
"total_price": 0.00,
|
| 51 |
+
"discount": 0.00,
|
| 52 |
+
"category": "Category Name"
|
| 53 |
+
}
|
| 54 |
+
],
|
| 55 |
+
"total_amount": 0.00,
|
| 56 |
+
"total_discount": 0.00,
|
| 57 |
+
"tax": 0.00,
|
| 58 |
+
"tips": 0.00
|
| 59 |
+
}
|
| 60 |
+
If no receipt is detected: Return "Receipt not found."
|
| 61 |
+
Additional Notes:
|
| 62 |
+
1. If the receipt is in a non-Latin script, extract the information in its original form unless translation is required.
|
| 63 |
+
2. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
|
| 64 |
+
Write whole json with information about all products.
|
prompt_v3.txt
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#Your Task: Receipt Recognition and Data Extraction
|
| 2 |
+
|
| 3 |
+
You are tasked with extracting structured information from receipts. Receipts will come from various countries, in different languages, and can have different layouts and formats. Your goal is to parse the receipt text, identify the receipt type (store, cafe/restaurant, or payment for services), and return the data in JSON format with the required fields. Follow the specific instructions below to ensure accurate extraction.
|
| 4 |
+
|
| 5 |
+
#Required Fields:
|
| 6 |
+
|
| 7 |
+
1. Receipt Type: Identify the type of receipt. It could be from:
|
| 8 |
+
* Store: Typically involves grocery or retail items.
|
| 9 |
+
* Cafe/Restaurant: Typically involves food and beverage items, table numbers, or tipping sections.
|
| 10 |
+
* Payment for Services: This type of receipt may involve service fees or professional services.
|
| 11 |
+
2. Receipt Number: Extract the unique receipt number, typically found at the top of the receipt.
|
| 12 |
+
3. Store/Business Name: Extract the name of the store, cafe, restaurant, or service provider.
|
| 13 |
+
4. Store Address: Extract the address of the store, including city and country if available.
|
| 14 |
+
5. Date: Extract the date of the transaction and format it as YYYY-MM-DD HH:MM.
|
| 15 |
+
6. Currency: Extract the currency if explicitly mentioned (e.g., EUR, USD). If the currency is not specified, detect the language of the receipt and infer the currency based on the country where the language is predominantly used. For example, if the receipt is in Ukrainian, set the currency to UAH (Ukrainian Hryvnia).
|
| 16 |
+
7. Payment Method: Identify whether the payment was made by "card" or "cash."
|
| 17 |
+
8. Total Amount: Extract the total amount of the transaction. This is typically located at the end of the receipt, often highlighted in bold or a larger font.
|
| 18 |
+
9. Total Discount: Extract the total discount if explicitly mentioned. If not, calculate the total discount by summing up the discounts for individual items.
|
| 19 |
+
10. Tax: Extract the total tax amount if it is listed on the receipt.
|
| 20 |
+
|
| 21 |
+
#Item-Level Details:
|
| 22 |
+
|
| 23 |
+
For each item on the receipt, extract the following details:
|
| 24 |
+
|
| 25 |
+
1. Item Name: Extract the full name of each item. Some items may have names split across multiple lines; in this case, concatenate the lines until you encounter a quantity or unit of measurement (e.g., "2ks"), which marks the end of the item name or on the next line. You should extract full name till statements as, for example, "1 ks" or "1 ks * 2"
|
| 26 |
+
2. Unit Price: Extract the price per unit for each item.
|
| 27 |
+
3. Quantity: Extract the quantity of each item, including the unit of measurement (e.g., "ks" for pieces, "kg" for kilograms).
|
| 28 |
+
4. Price: Extract the final price for each item.
|
| 29 |
+
5. Discount: Extract any discount applied to the item. If no discount is provided, set it to 0.
|
| 30 |
+
6. Category: Automatically assign a category based on the item name. For groceries, assign relevant subcategories such as Dairy, Bakery, Fruits, etc. If this receipt was from restaurant - you should put category only from this list: Food, Drinks.
|
| 31 |
+
|
| 32 |
+
#Special Cases:
|
| 33 |
+
|
| 34 |
+
1. Cafe/Restaurant Receipts: If the receipt is from a cafe or restaurant, handle additional fields like:
|
| 35 |
+
* Table Number: Extract the table number if available, often printed near the top of the receipt.
|
| 36 |
+
* Tips: Extract any tip amounts explicitly listed or infer from the total paid amount minus the original bill amount.
|
| 37 |
+
* Service Charges: Some restaurants may include an automatic service charge, which should be listed separately from the tax or tips.
|
| 38 |
+
* Order Type: Identify whether the order was "dine-in" or "takeaway."
|
| 39 |
+
2. Missing Currency: If no currency is mentioned on the receipt, infer the local currency by detecting the language and country of origin. For example, a receipt in French would use EUR, while one in Ukrainian would use UAH.
|
| 40 |
+
3. Multi-line Item Names: If an item name spans multiple lines, merge the lines to form the complete name. Stop merging when a quantity or unit of measurement is encountered.
|
| 41 |
+
4. Total Amount: The total amount is often larger than other numbers or displayed in bold at the bottom of the receipt. Make sure to capture this accurately.
|
| 42 |
+
5. Total Discount: If no total discount is listed, sum the discounts for each individual item.
|
| 43 |
+
6. Rounding Adjustments: Some receipts may include a "rounding" line item, where the total amount is adjusted (typically for cash payments) to avoid dealing with fractions of currency (e.g., rounding to the nearest 0.05 in some countries). If a rounding adjustment is present, extract the value of the rounding adjustment and reflect it in the total amount. For example:
|
| 44 |
+
* Total Before Rounding: 19.97
|
| 45 |
+
* Rounding: -0.02
|
| 46 |
+
* Final Total: 19.95 If the rounding adjustment is found, include it as a separate field in the JSON output under "rounding_adjustment", and ensure that the "total_amount" reflects the final adjusted total.
|
| 47 |
+
7. Taxes: Receipts can handle taxes in various ways, and the system should be prepared to capture these scenarios:
|
| 48 |
+
* Tax-Inclusive Pricing: In some countries or for certain receipts, taxes are already included in the item price and not listed separately. If the receipt mentions that taxes are included in prices, record the "tax" field as 0 and note that taxes are included in the item prices.
|
| 49 |
+
* Multiple Tax Rates: Some receipts may include multiple tax rates (e.g., different VAT rates for different items). In this case, extract each tax rate and the corresponding tax amounts, and store them in a separate list of tax breakdowns. For example, the receipt might show "5% VAT" and "15% VAT" for different categories of goods:
|
| 50 |
+
** "taxes": [{"rate": "5%", "amount": 1.00}, {"rate": "15%", "amount": 3.50}]
|
| 51 |
+
* Missing Tax Information: In some cases, the receipt might not clearly mention taxes, but you may infer them based on standard rates in the country of origin. If no explicit tax amount is listed and you are unable to infer it, set the tax to "unknown" or null in the JSON output.
|
| 52 |
+
* Tax-Exempt Items: Some items on the receipt may be tax-exempt. If this is indicated, ensure that these items are excluded from any tax calculations. Note these in the item-level details with "tax_exempt": true and make sure the "tax" field reflects the correct amount for taxable items only.
|
| 53 |
+
* Service Charges vs. Taxes: Sometimes service charges may be listed separately from taxes (common in restaurants). Ensure that service charges are not included in the tax amount, and store them under the "service_charge" field.
|
| 54 |
+
* Tax Breakdown and Total: If both individual item taxes and a total tax amount are listed, the system should ensure consistency between the sum of item-level taxes and the total tax listed at the bottom of the receipt.
|
| 55 |
+
8. In certain receipt formats, the quantity and unit price may appear before the item name. When processing such receipts, the goal is to correctly extract the quantity, unit price, and item name in their proper order. For example, if one line of the receipt shows "5 * 23.00 = 115.0" and the next line displays "Milk," the system should interpret this as:
|
| 56 |
+
* Quantity: 5 units
|
| 57 |
+
* Unit Price: 23.00
|
| 58 |
+
* Item Name: Milk
|
| 59 |
+
* Total Price: 115.0 This approach should be applied consistently throughout the entire receipt to extract data accurately.
|
| 60 |
+
|
| 61 |
+
#JSON Output Format:
|
| 62 |
+
|
| 63 |
+
{
|
| 64 |
+
"receipt_type": "string",
|
| 65 |
+
"receipt_number": "string",
|
| 66 |
+
"store_name": "string",
|
| 67 |
+
"store_address": "string",
|
| 68 |
+
"date_time": "string",
|
| 69 |
+
"currency": "string",
|
| 70 |
+
"payment_method": "string",
|
| 71 |
+
"total_amount": "number",
|
| 72 |
+
"total_discount": "number",
|
| 73 |
+
"tax": "number",
|
| 74 |
+
"taxes": [
|
| 75 |
+
{
|
| 76 |
+
"rate": "string",
|
| 77 |
+
"amount": "number"
|
| 78 |
+
}
|
| 79 |
+
],
|
| 80 |
+
"rounding_adjustment": "number",
|
| 81 |
+
"rounded_total_aount": "number",
|
| 82 |
+
"items": [
|
| 83 |
+
{
|
| 84 |
+
"name": "string",
|
| 85 |
+
"unit_price": "number",
|
| 86 |
+
"quantity": {
|
| 87 |
+
"amount": "number",
|
| 88 |
+
"unit_of_measurement": "string"
|
| 89 |
+
},
|
| 90 |
+
"price": "number",
|
| 91 |
+
"discount": "number",
|
| 92 |
+
"category": "string",
|
| 93 |
+
"tax_exempt": "boolean"
|
| 94 |
+
}
|
| 95 |
+
],
|
| 96 |
+
"cafe_additional_info": {
|
| 97 |
+
"table_number": "string",
|
| 98 |
+
"tips": "number",
|
| 99 |
+
"service_charge": "number",
|
| 100 |
+
"order_type": "string"
|
| 101 |
+
}
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
#Additional Notes:
|
| 105 |
+
|
| 106 |
+
1. You should handle receipts in various languages and from different countries.
|
| 107 |
+
2. Pay special attention to formatting differences and edge cases, such as multi-line item names, missing currency symbols, or cafe/restaurant-specific information.
|
| 108 |
+
3. Always ensure the output is well-structured and follows the JSON format provided.
|
| 109 |
+
4. The "rounding_adjustment" field should reflect the value by which the total was adjusted due to rounding. If no rounding adjustment is present, it can be set to 0 or omitted from the output.
|
| 110 |
+
5. Ensure that the final "total_amount" field reflects the total after any rounding adjustment has been applied.
|
| 111 |
+
6. Inclusive Taxes: If taxes are included in the item prices, set the "tax" field to 0 and adjust the item prices accordingly.
|
| 112 |
+
7. Multiple Tax Rates: The "taxes" field provides a detailed breakdown for receipts with different tax rates. This field is optional and can be excluded if only a single tax amount is listed.
|
| 113 |
+
8. Tax-Exempt Items: Mark tax-exempt items with the "tax_exempt": true field.
|
| 114 |
+
9. Service Charges vs. Taxes: Ensure that service charges are captured separately from taxes in the "service_charge" field.
|
| 115 |
+
10. Return the full JSON object with all available information. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
|
| 116 |
+
11. Your final response should be in valid JSON format with no additional text.
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch==2.4.1
|
| 2 |
+
torchvision==0.19.1
|
| 3 |
+
gradio==4.44.0
|
| 4 |
+
numpy==1.26.4
|
| 5 |
+
opencv-python==4.9.0.80
|
| 6 |
+
mediapipe==0.10.7
|
| 7 |
+
openai==1.45.0
|
| 8 |
+
google-api-python-client==2.145.0
|
| 9 |
+
google-auth-httplib2==0.2.0
|
| 10 |
+
google-auth-oauthlib==1.2.1
|
| 11 |
+
google-cloud-aiplatform==1.67.0
|
| 12 |
+
aiohttp==3.10.5
|
| 13 |
+
openpyxl==3.1.5
|
| 14 |
+
torch==2.4.1
|
| 15 |
+
torchvision==0.19.1
|
system_instruction.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
You are receipts recognizer
|
utils.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import json
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
import base64
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
from PIL import Image
|
| 8 |
+
from algorithm import receipt_calculation
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def read_prompt_from_file(file_path):
|
| 12 |
+
with open(file_path, 'r', encoding='utf-8') as file:
|
| 13 |
+
return file.read()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def encode_image(image_path):
|
| 17 |
+
with open(image_path, "rb") as image_file:
|
| 18 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def encode_image_from_gradio(input_image):
|
| 22 |
+
if input_image is None:
|
| 23 |
+
return None
|
| 24 |
+
|
| 25 |
+
pil_image = Image.fromarray(np.uint8(input_image))
|
| 26 |
+
|
| 27 |
+
buffered = BytesIO()
|
| 28 |
+
pil_image.save(buffered, format="JPEG")
|
| 29 |
+
|
| 30 |
+
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 31 |
+
return base64_image
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def encode_image(image_path):
|
| 35 |
+
with open(image_path, "rb") as image_file:
|
| 36 |
+
return base64.b64encode(image_file.read()).decode('utf-8')
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def encode_image_to_jpeg_base64(filepath):
|
| 40 |
+
"""
|
| 41 |
+
Encodes an image file to JPEG and then to a Base64 string.
|
| 42 |
+
|
| 43 |
+
:param filepath: Path to the image file
|
| 44 |
+
:return: Base64 encoded string of the JPEG image or an error message
|
| 45 |
+
"""
|
| 46 |
+
if filepath is None:
|
| 47 |
+
return None, "File path is None."
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
pil_image = Image.open(filepath)
|
| 51 |
+
|
| 52 |
+
if pil_image.mode == 'RGBA':
|
| 53 |
+
pil_image = pil_image.convert('RGB')
|
| 54 |
+
|
| 55 |
+
buffered = BytesIO()
|
| 56 |
+
pil_image.save(buffered, format="JPEG")
|
| 57 |
+
|
| 58 |
+
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 59 |
+
return base64_image
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return None, str(e)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def encode_image_to_webp_base64(filepath):
|
| 66 |
+
"""
|
| 67 |
+
Encodes an image file to JPEG and then to a Base64 string.
|
| 68 |
+
|
| 69 |
+
:param filepath: Path to the image file
|
| 70 |
+
:return: Base64 encoded string of the JPEG image or an error message
|
| 71 |
+
"""
|
| 72 |
+
if filepath is None:
|
| 73 |
+
return None, "File path is None."
|
| 74 |
+
|
| 75 |
+
try:
|
| 76 |
+
pil_image = Image.open(filepath)
|
| 77 |
+
|
| 78 |
+
if pil_image.mode == 'RGBA':
|
| 79 |
+
pil_image = pil_image.convert('RGB')
|
| 80 |
+
|
| 81 |
+
buffered = BytesIO()
|
| 82 |
+
pil_image.save(buffered, format="WEBP")
|
| 83 |
+
|
| 84 |
+
base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
| 85 |
+
return base64_image
|
| 86 |
+
|
| 87 |
+
except Exception as e:
|
| 88 |
+
return None, str(e)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def process_receipt_json(json_input):
|
| 92 |
+
try:
|
| 93 |
+
# Parse the JSON string
|
| 94 |
+
data = json.loads(json_input)
|
| 95 |
+
except json.JSONDecodeError:
|
| 96 |
+
return "Error: Invalid JSON format", None
|
| 97 |
+
|
| 98 |
+
# Initialize the output information
|
| 99 |
+
store_info = ""
|
| 100 |
+
items_table = []
|
| 101 |
+
|
| 102 |
+
# Try to extract store information, if available
|
| 103 |
+
store_name = data.get("store_name", "Not specified")
|
| 104 |
+
store_address = data.get("store_address", "Not specified")
|
| 105 |
+
currency = data.get("currency", "Not specified")
|
| 106 |
+
date_time = data.get("date_time", "Not specified")
|
| 107 |
+
type = data.get("payment_method", "Not specified")
|
| 108 |
+
total_amount = data.get("total_amount", "Not specified")
|
| 109 |
+
discount = data.get("total_discount", "Not specified")
|
| 110 |
+
tax = data.get("tax", "Not specified")
|
| 111 |
+
tips = data.get("tips", "Not specified")
|
| 112 |
+
|
| 113 |
+
# Format store information
|
| 114 |
+
store_info = \
|
| 115 |
+
(f"Store: {store_name}\nAddress: {store_address}\nCurrency: {currency}"
|
| 116 |
+
f"\nTotal Amount: {total_amount}"
|
| 117 |
+
f"\nDate time: {date_time}"
|
| 118 |
+
f"\nType payment: {type}"
|
| 119 |
+
f"\nTotal discount: {discount}"
|
| 120 |
+
f"\nTax: {tax}"
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
# Extract items, if available in JSON
|
| 124 |
+
items = data.get("items", [])
|
| 125 |
+
if items:
|
| 126 |
+
items_table = [[item.get("name", "Not specified"),
|
| 127 |
+
item.get("category", "Not specified"),
|
| 128 |
+
item.get("unit_price", "Not specified"),
|
| 129 |
+
item.get("quantity", "Not specified"),
|
| 130 |
+
item.get("unit_of_measurement", "Not specified"),
|
| 131 |
+
item.get("total_price", "Not specified"),
|
| 132 |
+
item.get("discount", "Not specified")]
|
| 133 |
+
for item in items]
|
| 134 |
+
else:
|
| 135 |
+
items_table = [["No items"]]
|
| 136 |
+
|
| 137 |
+
total_product_prices, total_sum = receipt_calculation.calculate_tips_and_taxes(items_table, total_amount, tax, tips)
|
| 138 |
+
message = "Everything is okay!"
|
| 139 |
+
if items_table[0][0] != "No items":
|
| 140 |
+
for i in range(len(items_table)):
|
| 141 |
+
items_table[i].append(total_product_prices[i].price)
|
| 142 |
+
|
| 143 |
+
if total_sum != round(float(str(total_amount).replace(",", ".")), 2):
|
| 144 |
+
message = "Recognized total sum and products total sum is not equal. Check if AI model correctly created a JSON"
|
| 145 |
+
return store_info, items_table, message
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def save_to_excel(json_output, excel_file_path):
|
| 149 |
+
store_info, items_table, _ = process_receipt_json(json_output)
|
| 150 |
+
if isinstance(store_info, str) and store_info.startswith("Error:"):
|
| 151 |
+
return store_info
|
| 152 |
+
|
| 153 |
+
store_info_lines = store_info.split('\n')
|
| 154 |
+
store_info_parsed = [(line.split(': ')[0], line.split(': ')[1]) for line in store_info_lines if ': ' in line]
|
| 155 |
+
|
| 156 |
+
store_info_df = pd.DataFrame(store_info_parsed, columns=["Label", "Value"])
|
| 157 |
+
|
| 158 |
+
items_df = pd.DataFrame(items_table, columns=[
|
| 159 |
+
"Item Name", "Category", "Unit Price", "Quantity", "Unit", "Total Price", "Discount", "Grand Total"
|
| 160 |
+
])
|
| 161 |
+
|
| 162 |
+
with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
|
| 163 |
+
store_info_df.to_excel(writer, sheet_name='Receipt', index=False, header=False, startrow=0)
|
| 164 |
+
|
| 165 |
+
items_df_start_row = len(store_info_df) + 2
|
| 166 |
+
items_df.to_excel(writer, sheet_name='Receipt', index=False, startrow=items_df_start_row)
|
| 167 |
+
worksheet = writer.sheets['Receipt']
|
| 168 |
+
worksheet.column_dimensions['A'].width = 30
|
| 169 |
+
|
| 170 |
+
return excel_file_path
|
vertex_ai_example.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import vertexai
|
| 2 |
+
from google.cloud import aiplatform
|
| 3 |
+
from google.oauth2 import service_account
|
| 4 |
+
from vertexai.generative_models import GenerativeModel
|
| 5 |
+
|
| 6 |
+
service_account_path = "receiptsai19_09.json"
|
| 7 |
+
|
| 8 |
+
credentials = service_account.Credentials.from_service_account_file(service_account_path)
|
| 9 |
+
|
| 10 |
+
def generate():
|
| 11 |
+
vertexai.init(project="receiptsai-436007", location="us-central1", credentials=credentials)
|
| 12 |
+
model = GenerativeModel(
|
| 13 |
+
"gemini-1.5-flash-001"
|
| 14 |
+
)
|
| 15 |
+
responses = model.generate_content('Create simple example on C++')
|
| 16 |
+
|
| 17 |
+
print(responses.text)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
generate()
|
vertex_ai_service.py
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import base64
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import tempfile
|
| 5 |
+
|
| 6 |
+
import vertexai
|
| 7 |
+
from google.oauth2 import service_account
|
| 8 |
+
from vertexai.generative_models import GenerativeModel
|
| 9 |
+
from vertexai.generative_models import Part, SafetySetting
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class VertexAIService:
|
| 13 |
+
def __init__(self, json_key_path=None, json_key_env_var='GOOGLE_VERTEX_KEY'):
|
| 14 |
+
"""
|
| 15 |
+
Initializes the Vertex AI client.
|
| 16 |
+
|
| 17 |
+
:param json_key_path: Path to the JSON file (optional)
|
| 18 |
+
:param json_key_env_var: Environment variable name containing the JSON key (default 'GOOGLE_VERTEX_KEY')
|
| 19 |
+
"""
|
| 20 |
+
self.json_key_path = json_key_path
|
| 21 |
+
self.json_key_env_var = json_key_env_var
|
| 22 |
+
self.credentials = self._authenticate_vertex_ai()
|
| 23 |
+
|
| 24 |
+
def _authenticate_vertex_ai(self):
|
| 25 |
+
"""
|
| 26 |
+
Authenticates using the JSON key from a file or environment variable.
|
| 27 |
+
|
| 28 |
+
:return: Google Credentials object
|
| 29 |
+
"""
|
| 30 |
+
if self.json_key_path and os.path.isfile(self.json_key_path):
|
| 31 |
+
# Authenticate using the file
|
| 32 |
+
creds = service_account.Credentials.from_service_account_file(self.json_key_path)
|
| 33 |
+
else:
|
| 34 |
+
# Get JSON key from environment variable
|
| 35 |
+
json_key = os.getenv(self.json_key_env_var)
|
| 36 |
+
|
| 37 |
+
if not json_key:
|
| 38 |
+
raise ValueError(f"Environment variable {self.json_key_env_var} is not set.")
|
| 39 |
+
|
| 40 |
+
# Save JSON key to a temporary file
|
| 41 |
+
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
| 42 |
+
temp_file.write(json_key.encode('utf-8'))
|
| 43 |
+
temp_file_path = temp_file.name
|
| 44 |
+
|
| 45 |
+
# Create credentials object
|
| 46 |
+
creds = service_account.Credentials.from_service_account_file(temp_file_path)
|
| 47 |
+
|
| 48 |
+
# Remove the temporary file after creating the credentials object
|
| 49 |
+
os.remove(temp_file_path)
|
| 50 |
+
|
| 51 |
+
return creds
|
| 52 |
+
|
| 53 |
+
def process_text(self, project, location, model_name, prompt, system=""):
|
| 54 |
+
"""
|
| 55 |
+
Generates content using Vertex AI.
|
| 56 |
+
|
| 57 |
+
:param project: GCP project ID
|
| 58 |
+
:param location: GCP region
|
| 59 |
+
:param model_name: The name of the Generative Model
|
| 60 |
+
:param prompt: Prompt for content generation
|
| 61 |
+
:return: Generated content or error message
|
| 62 |
+
"""
|
| 63 |
+
try:
|
| 64 |
+
# Initialize Vertex AI with credentials
|
| 65 |
+
vertexai.init(project=project, location=location, credentials=self.credentials)
|
| 66 |
+
model = GenerativeModel(model_name, system_instruction=[system])
|
| 67 |
+
|
| 68 |
+
# Generate content
|
| 69 |
+
responses = model.generate_content(prompt)
|
| 70 |
+
return responses.text
|
| 71 |
+
except Exception as error:
|
| 72 |
+
return f"An error occurred: {error}"
|
| 73 |
+
|
| 74 |
+
def process_image(self, input_image64, model_name, prompt, system="You are receipt recognizer", temperatura=0.0):
|
| 75 |
+
"""
|
| 76 |
+
Processes the image using Vertex AI model.
|
| 77 |
+
|
| 78 |
+
:param input_image64: Base64 encoded image string
|
| 79 |
+
:param model_name: Name of the model in Vertex AI
|
| 80 |
+
:param prompt: Text prompt to guide the model
|
| 81 |
+
:param temperatura: Temperature for controlling randomness
|
| 82 |
+
:return: JSON response or error message
|
| 83 |
+
"""
|
| 84 |
+
if input_image64 is None:
|
| 85 |
+
raise ValueError("No objects detected.")
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
# Initialize Vertex AI
|
| 89 |
+
vertexai.init(project="receiptsai-436007", location="us-central1", credentials=self.credentials)
|
| 90 |
+
|
| 91 |
+
# Load the model
|
| 92 |
+
model = GenerativeModel(model_name, system_instruction=[system])
|
| 93 |
+
|
| 94 |
+
# Prepare the image part
|
| 95 |
+
image_part = Part.from_data(
|
| 96 |
+
mime_type="image/webp",
|
| 97 |
+
data=input_image64
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Set generation configuration
|
| 101 |
+
generation_config = {
|
| 102 |
+
"max_output_tokens": 8192,
|
| 103 |
+
"temperature": temperatura,
|
| 104 |
+
"response_mime_type": "application/json"
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# Set safety settings
|
| 108 |
+
safety_settings = [
|
| 109 |
+
SafetySetting(
|
| 110 |
+
category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
|
| 111 |
+
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
| 112 |
+
),
|
| 113 |
+
SafetySetting(
|
| 114 |
+
category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
|
| 115 |
+
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
| 116 |
+
),
|
| 117 |
+
SafetySetting(
|
| 118 |
+
category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
|
| 119 |
+
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
| 120 |
+
),
|
| 121 |
+
SafetySetting(
|
| 122 |
+
category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
|
| 123 |
+
threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
|
| 124 |
+
),
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
# Generate content using the model
|
| 128 |
+
response = model.generate_content([image_part, prompt],
|
| 129 |
+
generation_config=generation_config,
|
| 130 |
+
safety_settings=safety_settings)
|
| 131 |
+
|
| 132 |
+
json_content = json.loads(response.text)
|
| 133 |
+
|
| 134 |
+
json_content['input_tokens'] = response.usage_metadata.prompt_token_count
|
| 135 |
+
json_content['output_tokens'] = response.usage_metadata.candidates_token_count
|
| 136 |
+
json_content['total_tokens'] = response.usage_metadata.total_token_count
|
| 137 |
+
|
| 138 |
+
return json.dumps(json_content, indent=4)
|
| 139 |
+
|
| 140 |
+
except Exception as error:
|
| 141 |
+
raise RuntimeError(f"An error occurred during image processing: {error}")
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# Example usage
|
| 145 |
+
if __name__ == '__main__':
|
| 146 |
+
# Project and model details
|
| 147 |
+
project = "receiptsai-436007"
|
| 148 |
+
location = "us-central1"
|
| 149 |
+
model_name = "gemini-1.5-flash-001"
|
| 150 |
+
prompt = "Create example on C++"
|
| 151 |
+
|
| 152 |
+
# Initialize the client and generate content
|
| 153 |
+
client = VertexAIService(json_key_path='GOOGLE_VERTEX_AI_KEY.json')
|
| 154 |
+
result = client.process_text(project, location, model_name, prompt)
|
| 155 |
+
print(f'Generated result: {result}')
|
| 156 |
+
|
| 157 |
+
# Image processing
|
| 158 |
+
image_path = "./examples/lidl2.jpg"
|
| 159 |
+
with open(image_path, "rb") as image_file:
|
| 160 |
+
input_image64 = base64.b64encode(image_file.read()).decode('utf-8')
|
| 161 |
+
|
| 162 |
+
prompt = "Read the text"
|
| 163 |
+
system = "You are receipt recognizer"
|
| 164 |
+
result_img = client.process_image(input_image64, "gemini-1.5-pro", prompt, system, 0.0)
|
| 165 |
+
print(f'Image processing result: {result_img}')
|
| 166 |
+
decoded_string = result_img.encode('utf-8').decode('unicode_escape')
|
| 167 |
+
|
| 168 |
+
print(decoded_string)
|