valentynliubchenko commited on
Commit
e69be74
·
1 Parent(s): cba413b

nit comment

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ examples/*.JPG filter=lfs diff=lfs merge=lfs -text
37
+ examples/*.jpg filter=lfs diff=lfs merge=lfs -text
38
+ examples/*.* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # These are some examples of commonly ignored file patterns.
2
+ # You should customize this list as applicable to your project.
3
+ # Learn more about .gitignore:
4
+ # https://www.atlassian.com/git/tutorials/saving-changes/gitignore
5
+
6
+ # Node artifact files
7
+ node_modules/
8
+ dist/
9
+
10
+ # Compiled Java class files
11
+ *.class
12
+
13
+ # Compiled Python bytecode
14
+ *.py[cod]
15
+
16
+ # Log files
17
+ *.log
18
+
19
+ # Package files
20
+ *.jar
21
+
22
+ # Maven
23
+ target/
24
+ dist/
25
+
26
+ # JetBrains IDE
27
+ .idea/
28
+
29
+ # Unit test reports
30
+ TEST*.xml
31
+
32
+ # Generated by MacOS
33
+ .DS_Store
34
+
35
+ # Generated by Windows
36
+ Thumbs.db
37
+
38
+ # Applications
39
+ *.app
40
+ *.exe
41
+ *.war
42
+
43
+ # Large media files
44
+ *.mp4
45
+ *.tiff
46
+ *.avi
47
+ *.flv
48
+ *.mov
49
+ *.wmv
50
+
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: ReceiptSplitAI
3
- emoji: 🌍
4
- colorFrom: green
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.44.1
8
  app_file: app.py
9
  pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: ReceiptAI
3
+ emoji: 🏢
4
+ colorFrom: pink
5
+ colorTo: pink
6
  sdk: gradio
7
+ sdk_version: 4.44.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
algorithm/product.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ class Product:
2
+ def __init__(self, name, price, quantity):
3
+ self.name = name
4
+ self.price = price
5
+ self.quantity = quantity
6
+
7
+ def __repr__(self):
8
+ return f"Product(name={self.name}, price={self.price}, quantity={self.quantity})"
algorithm/receipt.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ class Receipt:
2
+ def __init__(self, products, tax, total_amount):
3
+ self.products = products
4
+ self.tax = tax
5
+ self.total_amount = total_amount
6
+
7
+ def __repr__(self):
8
+ return f"Receipt(products={self.products}, tax={self.tax}, total_amount={self.total_amount})"
algorithm/receipt_calculation.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ import copy
3
+
4
+ from algorithm.product import Product
5
+
6
+
7
+ def calculate_dish_price_with_taxes(_products, taxes, grand_total):
8
+ payment_total = round(grand_total - taxes, 2)
9
+ grand_total = round(payment_total, 2) + round(taxes, 2)
10
+ _product_with_taxes = copy.deepcopy(_products)
11
+ for _product in _product_with_taxes:
12
+ _product.price = round(((_product.price / payment_total) * grand_total), 5)
13
+ return _product_with_taxes, grand_total
14
+
15
+
16
+ def round_up_two_decimals(_products_total):
17
+ _product_with_taxes_rounded = copy.deepcopy(_products_total)
18
+ for _product in _product_with_taxes_rounded:
19
+ _product.price = math.ceil(_product.price * 100) / 100
20
+
21
+ return _product_with_taxes_rounded
22
+
23
+
24
+ def first_algorithm(_products_total_rounded, receipt_subtotal):
25
+ current_total = 0
26
+ for _product in _products_total_rounded:
27
+ current_total += _product.price
28
+
29
+ current_total = round(current_total, 2)
30
+
31
+ difference = current_total - receipt_subtotal
32
+
33
+ corrections = copy.deepcopy(_products_total_rounded)
34
+
35
+ for _product in corrections:
36
+ _product.price = round((_product.price / current_total) * difference, 2)
37
+
38
+ for i in range(len(_products_total_rounded)):
39
+ _products_total_rounded[i].price = round(_products_total_rounded[i].price - corrections[i].price, 2)
40
+
41
+ _final_total = 0
42
+ for _product in _products_total_rounded:
43
+ _final_total += _product.price
44
+
45
+ return _products_total_rounded, _final_total
46
+
47
+
48
+ def fractional_part_rest(value):
49
+ fraction_str = f"{value:.10f}".split('.')[1]
50
+ rest_of_digits = fraction_str[2:]
51
+ return float(rest_of_digits)
52
+
53
+
54
+ def second_algorithm(_products_total, receipt_total):
55
+ _products_total_rounded = round_up_two_decimals(_products_total)
56
+
57
+ current_total = 0
58
+ for _product in _products_total_rounded:
59
+ current_total += _product.price
60
+
61
+ if current_total == receipt_total:
62
+ return _products_total_rounded, receipt_total
63
+
64
+ difference = current_total - receipt_total
65
+ difference = round(difference, 2)
66
+
67
+ fractional_parts = copy.deepcopy(_products_total)
68
+
69
+ for _product in fractional_parts:
70
+ _product.price = fractional_part_rest(_product.price) - math.ceil(_product.price)
71
+
72
+ fractional_parts = sorted(fractional_parts, key=lambda p: p.price, reverse=False)
73
+
74
+ for i in range(len(fractional_parts)):
75
+ if difference <= 0:
76
+ break
77
+ _products_total_rounded[i].price -= 0.01
78
+ difference -= 0.01
79
+
80
+ _final_total = 0
81
+ for _product in _products_total_rounded:
82
+ _final_total += _product.price
83
+
84
+ for _product in _products_total_rounded:
85
+ _product.price = round(_product.price, 2)
86
+
87
+ return _products_total_rounded, _final_total
88
+
89
+
90
+ def calculate_tips_and_taxes(items_table, total_amount, tax, tips):
91
+ products = []
92
+
93
+ if items_table[0][0] == "No items":
94
+ return products, 0
95
+
96
+ if total_amount == "Not specified" or total_amount is None:
97
+ total_amount = "0.0"
98
+
99
+ if tax == "Not specified" or tax is None:
100
+ tax = "0.0"
101
+
102
+ if tips == "Not specified" or tips is None:
103
+ tax = "0.0"
104
+
105
+ for item in items_table:
106
+ price = item[5]
107
+ if price == "Not specified":
108
+ price = "0.0"
109
+ item_value = float(str(price).replace(",", ".")) if item[5] is not None else 0.0
110
+ products.append(Product(item[0], item_value, item[3]))
111
+
112
+ sum_of_product_prices = 0
113
+ for _product in products:
114
+ sum_of_product_prices += _product.price
115
+
116
+ sum_of_product_prices = round(float(sum_of_product_prices), 2)
117
+ total_amount = round(float(str(total_amount).replace(",", ".")), 2)
118
+ tips = round(float(str(tips).replace(",", ".")), 2)
119
+ tax = round(tips + round(float(str(tax).replace(",", ".")), 2), 2)
120
+ if round(float(total_amount), 2) != round(float(sum_of_product_prices) + float(tax), 2):
121
+ return products, sum_of_product_prices
122
+
123
+ products_total, subtotal = calculate_dish_price_with_taxes(products, taxes=float(tax),
124
+ grand_total=float(total_amount))
125
+ final_prices, final_total = second_algorithm(products_total, subtotal)
126
+
127
+ final_total = round(final_total, 2)
128
+ return final_prices, final_total
app.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import pandas as pd
3
+ import os
4
+ from datetime import datetime
5
+
6
+ import gradio as gr
7
+ from PIL import Image
8
+
9
+ from google_drive_client import GoogleDriveClient
10
+ from openai_service import OpenAIService
11
+ from utils import read_prompt_from_file, process_receipt_json, encode_image_to_jpeg_base64, save_to_excel, encode_image_to_webp_base64
12
+ from vertex_ai_service import VertexAIService
13
+
14
+ model_names = ["gemini-1.5-flash", "gemini-1.5-pro", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo"]
15
+ prompt_names = ["prompt_v1", "prompt_v2", "prompt_v3"]
16
+ example_list = [["./examples/" + example] for example in os.listdir("examples")]
17
+ prompt_default = read_prompt_from_file("prompt_v1.txt")
18
+ system_instruction = read_prompt_from_file("system_instruction.txt")
19
+
20
+
21
+ def process_image(input_image, model_name, prompt_name, temperatura, system_instruction=None, current_prompt_text=None):
22
+ # print(model_name)
23
+ # print(prompt_name)
24
+ # print(temperatura)
25
+ # print(custom_prompt_text)
26
+ if system_instruction is None:
27
+ system_instruction = ""
28
+ if input_image is None:
29
+ return None, "No objects detected."
30
+ if input_image is None:
31
+ return json.dumps({"error": "No prompt provided."})
32
+ if prompt_name is None:
33
+ prompt_name = "prompt_v1"
34
+ prompt_file = f"{prompt_name}.txt"
35
+ prompt = read_prompt_from_file(prompt_file)
36
+ if prompt_name is None:
37
+ current_prompt_text = prompt_default
38
+
39
+ # if prompt_name != "custom":
40
+ # prompt_file = f"{prompt_name}.txt"
41
+ # prompt = read_prompt_from_file(prompt_file)
42
+ # else:
43
+ # if current_prompt_text is None or current_prompt_text.strip() == "":
44
+ # return json.dumps({"error": "No prompt provided."})
45
+ prompt = current_prompt_text
46
+ # print(prompt)
47
+ print("file name:", input_image)
48
+ print("model_name:", model_name)
49
+ print("prompt_name:", prompt_name)
50
+ print("Temperatura:", temperatura)
51
+
52
+ # base64_image = encode_image_from_gradio(input_image)
53
+ base64_image = encode_image_to_webp_base64(input_image)
54
+
55
+ try:
56
+ if model_name.startswith("gpt"):
57
+ # result = gpt_process_image(base64_image, model_name, prompt, system_instruction, temperatura)
58
+ result = open_ai_client.process_image(base64_image, model_name, prompt, system_instruction, temperatura)
59
+ else:
60
+ result = vertex_ai_client.process_image(base64_image, model_name, prompt, system_instruction,
61
+ temperatura)
62
+ parsed_result = json.loads(result)
63
+ result = json.dumps(parsed_result, ensure_ascii=False, indent=4)
64
+ # result = result.encode('utf-8').decode('unicode_escape')
65
+ print(result)
66
+ except Exception as e:
67
+ print(f"Exception occurred: {e}")
68
+ result = json.dumps({"error": "Error processing: Check prompt or images"})
69
+
70
+ # print (result)
71
+ store_info, items_table, message = process_receipt_json(result)
72
+ print(store_info)
73
+ print(items_table)
74
+
75
+ return model_name, result, store_info, items_table, message, gr.update(interactive=True), gr.update(interactive=True), gr.update(interactive=True)
76
+
77
+
78
+ def save_flag_data(save_type, image, model_name, prompt_name, temperatura, current_prompt_text, model_output, json_output,
79
+ store_info_output, items_list, comments_output, system_instruction, flagging_dir="custom_flagged_data"):
80
+ save_button_update = gr.update(interactive=False)
81
+ image_link, json_link, excel_link = None, None, None
82
+ try:
83
+
84
+ # List files in the directory
85
+ try:
86
+ files = [f for f in os.listdir(flagging_dir) if os.path.isfile(os.path.join(flagging_dir, f))]
87
+ if files:
88
+ print("Files in directory:", flagging_dir)
89
+ for file in files:
90
+ print(file)
91
+ else:
92
+ print(f"No files found in directory: {flagging_dir}")
93
+ except Exception as e:
94
+ print(f"Error listing files in directory: {e}")
95
+
96
+ image_file_path = image
97
+ print("save_type:", save_type)
98
+ print("Image File Path:", image)
99
+ print("prompt_name:", prompt_name)
100
+ print("Model Name:", model_name)
101
+ print("Result as JSON:", json_output)
102
+ print("comments:", comments_output)
103
+ print("system_instruction:", system_instruction)
104
+
105
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
106
+ original_filename = os.path.basename(image_file_path)
107
+ filename, file_extension = os.path.splitext(original_filename)
108
+ base_filename = f"{filename}_{model_name}_{prompt_name}_{timestamp}"
109
+
110
+ # Save image
111
+ image_save_path = os.path.join(flagging_dir, f"{base_filename}{file_extension}")
112
+ image = Image.open(image_file_path)
113
+ image.save(image_save_path)
114
+
115
+ if os.path.exists(image_save_path):
116
+ saved_image = Image.open(image_save_path)
117
+ image_size = saved_image.size
118
+ print(f"Image saved at: {image_save_path}, Size: {image_size}")
119
+ else:
120
+ print(f"Failed to save image at: {image_save_path}")
121
+ return 0
122
+
123
+ # Save result as JSON
124
+ json_file_path = os.path.join(flagging_dir, f"{base_filename}.json")
125
+ data_to_save = {
126
+ "image_name": f"{base_filename}{file_extension}",
127
+ "prompt_name": prompt_name,
128
+ "system_instruction": system_instruction,
129
+ "prompt": current_prompt_text,
130
+ "model_name": model_name,
131
+ "result_json": json_output,
132
+ "comments": comments_output,
133
+ "save_type": save_type
134
+ }
135
+
136
+ data_to_save_encode = json.dumps(data_to_save, ensure_ascii=False, indent=4)
137
+ print("data_to_save_encode: ", data_to_save_encode)
138
+
139
+ with open(json_file_path, 'w', encoding='utf-8') as json_file:
140
+ json_file.write(data_to_save_encode)
141
+
142
+ excel_file_path = os.path.join(flagging_dir, f"{base_filename}.xlsx")
143
+ save_to_excel(json_output, excel_file_path)
144
+
145
+ # Upload files to Google Drive
146
+ google_drive_client_current = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
147
+ if google_drive_client_current:
148
+ try:
149
+ image_folder_id = '19yiqYX_Z1rbHDxnFLJTvji0VcbhSO9cq'
150
+ image_link = google_drive_client_current.upload_file(image_save_path, image_folder_id)
151
+ json_link = google_drive_client_current.upload_file(json_file_path, image_folder_id)
152
+ excel_link = google_drive_client_current.upload_file(excel_file_path, image_folder_id)
153
+ print(f"Image uploaded to Google Drive. Link: {image_link}")
154
+ print(f"JSON file uploaded to Google Drive. Link: {json_link}")
155
+ print(f"Excel file uploaded to Google Drive. Link: {excel_link}")
156
+ except Exception as e:
157
+ print(f"Error uploading files to Google Drive: {e}")
158
+ else:
159
+ print(f"Error google_drive_client does not available")
160
+
161
+ except Exception as e:
162
+ print(f"Error while saving flag data: {e}")
163
+ links = f"Image: {image_link}\nJSON: {json_link}\nExcel: {excel_link}"
164
+ return save_button_update, save_button_update, save_button_update, links
165
+
166
+ def update_prompt_from_radio(prompt_name):
167
+ if prompt_name == "prompt_v1":
168
+ return read_prompt_from_file("prompt_v1.txt")
169
+ elif prompt_name == "prompt_v2":
170
+ return read_prompt_from_file("prompt_v2.txt")
171
+ elif prompt_name == "prompt_v3":
172
+ return read_prompt_from_file("prompt_v3.txt")
173
+ else:
174
+ return read_prompt_from_file("prompt_v1.txt")
175
+
176
+ google_drive_client = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
177
+ vertex_ai_client = VertexAIService(json_key_path='GOOGLE_VERTEX_AI_KEY.json')
178
+
179
+ key = None
180
+ key_file_path = 'OPENAI_AI_KEY.txt'
181
+ if os.path.exists(key_file_path):
182
+ try:
183
+ with open(key_file_path, 'r') as key_file:
184
+ key = key_file.read().strip()
185
+ except Exception as e:
186
+ print(f"Error reading file: {e}")
187
+
188
+ open_ai_client = OpenAIService(api_key=key)
189
+
190
+ with gr.Blocks() as iface:
191
+ gr.Markdown("# ReceptAI")
192
+ gr.Markdown("ReceptAI")
193
+
194
+ with gr.Row():
195
+ with gr.Column(scale=1):
196
+ image_input = gr.Image(type="filepath")
197
+ model_radio = gr.Radio(model_names, label="Choose model", value=model_names[0])
198
+ prompt_radio = gr.Radio(prompt_names, label="Choose prompt", value=prompt_names[0])
199
+ temperature_slider = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label="Temperatura", value=0.0)
200
+ system_instruction = gr.Textbox(label="System Instruction", visible=True, value=system_instruction)
201
+ custom_prompt = gr.Textbox(label="prompt text", visible=True, value=prompt_default)
202
+ with gr.Row():
203
+ submit_button = gr.Button("Submit")
204
+
205
+ with gr.Column(scale=2):
206
+ model_output = gr.Textbox(label="MODEL", lines=1, interactive=False)
207
+ json_output = gr.Textbox(label="Result as json")
208
+ store_info_output = gr.Textbox(label="Store Information", lines=4)
209
+ items_list = gr.Dataframe(
210
+ headers=["Item Name", "Category", "Unit Price", "Quantity", "Unit", "Total Price", "Discount", "Grand Total"],
211
+ label="Items List")
212
+ comments_output = gr.Textbox(label="Comments", visible=True, lines=4, interactive=True)
213
+ with gr.Row():
214
+ save_good_button = gr.Button(value="Save as Good", interactive=False)
215
+ save_average_button = gr.Button(value="Save as Average" , interactive=False)
216
+ save_poor_button = gr.Button(value="Save as Poor", interactive=False)
217
+ file_links_output = gr.Textbox(label="File Links", interactive=False, visible=True)
218
+ submit_button.click(fn=process_image,
219
+ inputs=[image_input, model_radio, prompt_radio, temperature_slider, system_instruction,
220
+ custom_prompt],
221
+ outputs=[model_output, json_output, store_info_output, items_list, comments_output,
222
+ save_good_button, save_average_button, save_poor_button])
223
+ common_inputs = [image_input, model_radio, prompt_radio, temperature_slider, custom_prompt, model_output,
224
+ json_output, store_info_output, items_list, comments_output, system_instruction]
225
+
226
+
227
+ def save_flag_data_wrapper(save_type, image, model_name, prompt_name, temperatura, custom_prompt, model_output,
228
+ json_output, store_info_output, items_list, comments_output, system_instruction):
229
+ # Ensure that `image` is a file path and not an object.
230
+ image_file_path = image # Gradio returns the path as a string
231
+ model_name_value = model_name # Extract selected value
232
+ prompt_name_value = prompt_name # Extract selected value
233
+
234
+ # The following variables should be passed as the values they hold
235
+ save_good_update, save_avg_update, save_poor_update, file_links = save_flag_data(
236
+ save_type, image, model_name, prompt_name, temperatura, custom_prompt, model_output, json_output,
237
+ store_info_output, items_list, comments_output, system_instruction
238
+ )
239
+ return save_good_update, save_avg_update, save_poor_update, file_links
240
+
241
+ # Use the same common_inputs for all buttons but ensure the correct values are passed
242
+ save_good_button.click(
243
+ fn=lambda *args: save_flag_data_wrapper("Good", *args),
244
+ inputs=common_inputs,
245
+ outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
246
+ )
247
+
248
+ save_average_button.click(
249
+ fn=lambda *args: save_flag_data_wrapper("Average", *args),
250
+ inputs=common_inputs,
251
+ outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
252
+ )
253
+
254
+ save_poor_button.click(
255
+ fn=lambda *args: save_flag_data_wrapper("Poor", *args),
256
+ inputs=common_inputs,
257
+ outputs=[save_good_button, save_average_button, save_poor_button, file_links_output]
258
+ )
259
+ prompt_radio.change(fn=update_prompt_from_radio, inputs=[prompt_radio], outputs=[custom_prompt])
260
+ gr.Examples(examples=example_list,
261
+ inputs=[image_input, model_radio, prompt_radio, temperature_slider, custom_prompt])
262
+
263
+ iface.launch()
custom_flagged_data/test.txt ADDED
File without changes
google_drive_client.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ from google.oauth2 import service_account
4
+ from googleapiclient.discovery import build
5
+ from googleapiclient.http import MediaFileUpload
6
+ from googleapiclient.errors import HttpError
7
+
8
+ class GoogleDriveClient:
9
+ def __init__(self, json_key_path=None, json_key_env_var='GOOGLE_SERVICE_ACCOUNT_KEY'):
10
+ """
11
+ Initializes the Google Drive client.
12
+
13
+ :param json_key_path: Path to the JSON file (optional)
14
+ :param json_key_env_var: Environment variable name containing the JSON key (default 'GOOGLE_SERVICE_ACCOUNT_KEY')
15
+ """
16
+ self.json_key_path = json_key_path or os.getenv('GOOGLE_VERTEX_KEY_PATH')
17
+ self.json_key_env_var = json_key_env_var
18
+ self.service = self._authenticate_google_drive()
19
+
20
+ def _authenticate_google_drive(self):
21
+ """
22
+ Authenticates using the JSON key from a file or environment variable.
23
+
24
+ :return: Google Drive API service object
25
+ """
26
+ if self.json_key_path and os.path.isfile(self.json_key_path):
27
+ # Authenticate using the file
28
+ creds = service_account.Credentials.from_service_account_file(
29
+ self.json_key_path,
30
+ scopes=['https://www.googleapis.com/auth/drive.file']
31
+ )
32
+ else:
33
+ # Get JSON key from environment variable
34
+ json_key = os.getenv(self.json_key_env_var)
35
+
36
+ if not json_key:
37
+ raise ValueError(f"Environment variable {self.json_key_env_var} is not set.")
38
+
39
+ # Save JSON key to a temporary file
40
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
41
+ temp_file.write(json_key.encode('utf-8'))
42
+ temp_file_path = temp_file.name
43
+
44
+ # Create credentials object
45
+ creds = service_account.Credentials.from_service_account_file(
46
+ temp_file_path,
47
+ scopes=['https://www.googleapis.com/auth/drive.file']
48
+ )
49
+
50
+ # Remove the temporary file after creating the credentials object
51
+ os.remove(temp_file_path)
52
+
53
+ # Create and return the Google Drive API service object
54
+ return build('drive', 'v3', credentials=creds)
55
+
56
+ def upload_file(self, file_path, folder_id):
57
+ """
58
+ Uploads a file to Google Drive.
59
+
60
+ :param file_path: Path to the local file
61
+ :param folder_id: ID of the folder on Google Drive
62
+ :return: URL of the uploaded file or error message
63
+ """
64
+ try:
65
+ file_metadata = {
66
+ 'name': os.path.basename(file_path),
67
+ 'parents': [folder_id] # ID of the folder to upload the file to
68
+ }
69
+ media = MediaFileUpload(file_path, mimetype='application/octet-stream')
70
+
71
+ # Upload the file
72
+ file = self.service.files().create(body=file_metadata, media_body=media, fields='id').execute()
73
+
74
+ # Get the file ID and construct the URL
75
+ file_id = file.get('id')
76
+ file_link = f'https://drive.google.com/file/d/{file_id}/view?usp=drive_link'
77
+
78
+ return file_link
79
+ except HttpError as error:
80
+ return f"An error occurred: {error}"
81
+
82
+ # Example usage
83
+ if __name__ == '__main__':
84
+ # Path to the local file
85
+ file_path = './file.txt'
86
+
87
+ # ID of the folder on Google Drive
88
+ folder_id = '19yiqYX_Z1rbHDxnFLJTvji0VcbhSO9cq'
89
+
90
+ # Initialize the client and upload the file
91
+ client = GoogleDriveClient(json_key_path='GOOGLE_SERVICE_ACCOUNT_KEY.json')
92
+ file_link = client.upload_file(file_path, folder_id)
93
+ print(f'File upload result: {file_link}')
94
+
gpt_processing.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+
4
+ from openai import OpenAI
5
+
6
+ openai_api_key = os.environ.get("OPENAI_API_KEY")
7
+ if openai_api_key:
8
+ print("OPENAI_API_KEY found.")
9
+ # print(openai_api_key)
10
+ else:
11
+ print("OPENAI_API_KEY not found.")
12
+
13
+ client = OpenAI(
14
+ api_key=openai_api_key
15
+ )
16
+
17
+
18
+ def gpt_process_image(input_image64, model_name, prompt, system_instruction="", temperatura=0.0):
19
+ if input_image64 is None:
20
+ return None, "No objects detected."
21
+ # print (prompt)
22
+ print("model_name:", model_name)
23
+ print("Temperatura:", temperatura)
24
+
25
+ response = client.chat.completions.create(
26
+ model=model_name,
27
+ messages=[
28
+ {"role": "system", "content": f"{system_instruction}"},
29
+ {"role": "user", "content": [
30
+ {"type": "text", "text": f"{prompt}"},
31
+ {"type": "image_url", "image_url": {
32
+ "url": f"data:image/jpg;base64,{input_image64}"}
33
+ }
34
+ ]}
35
+ ],
36
+ temperature=temperatura,
37
+ response_format={"type": "json_object"}
38
+ )
39
+
40
+ json_content = json.loads(response.choices[0].message.content)
41
+
42
+ json_content['input_tokens'] = response.usage.prompt_tokens
43
+ json_content['output_tokens'] = response.usage.completion_tokens
44
+ json_content['total_tokens'] = response.usage.total_tokens
45
+
46
+ print(json_content)
47
+
48
+ return json.dumps(json_content, indent=4)
openai_service.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import base64
3
+ import os
4
+ from openai import OpenAI
5
+
6
+ class OpenAIService:
7
+ def __init__(self, api_key=None):
8
+ self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
9
+ if self.api_key:
10
+ print("OPENAI_API_KEY was found.")
11
+ else:
12
+ raise ValueError("OPENAI_API_KEY not found.")
13
+
14
+ self.client = OpenAI(api_key=self.api_key)
15
+
16
+ def process_image(self, input_image64, model_name, prompt, system = "You are receipt recognizer!", temperatura = 0.0):
17
+ if not input_image64:
18
+ return None, "No objects detected."
19
+
20
+ print("Model name:", model_name)
21
+ print("system:", system)
22
+ print("Temperature:", temperatura)
23
+
24
+ response = self.client.chat.completions.create(
25
+ model=model_name,
26
+ messages=[
27
+ {"role": "system", "content": f"{system}"},
28
+ {"role": "user", "content": [
29
+ {"type": "text", "text": f"{prompt}"},
30
+ {"type": "image_url", "image_url": {
31
+ "url": f"data:image/webp;base64,{input_image64}"}
32
+ }
33
+ ]}
34
+ ],
35
+ temperature=temperatura,
36
+ response_format={"type": "json_object"}
37
+ )
38
+
39
+ json_content = json.loads(response.choices[0].message.content)
40
+
41
+ json_content['input_tokens'] = response.usage.prompt_tokens
42
+ json_content['output_tokens'] = response.usage.completion_tokens
43
+ json_content['total_tokens'] = response.usage.total_tokens
44
+
45
+ print(json_content)
46
+
47
+ return json.dumps(json_content, indent=4)
48
+
49
+ if __name__ == "__main__":
50
+ processor = OpenAIService()
51
+
52
+ # Image processing
53
+ image_path = "./examples/lidl2.jpg"
54
+ with open(image_path, "rb") as image_file:
55
+ input_image64 = base64.b64encode(image_file.read()).decode('utf-8')
56
+
57
+ system = "You are receipt recognizer "
58
+ prompt = "Recognize the receipt and provide result as json "
59
+ result = processor.process_image(input_image64, "gpt-4o-mini", prompt, system, 0.0)
60
+ print(f'Image processing result: {result}')
61
+ decoded_string = result.encode('utf-8').decode('unicode_escape')
62
+
63
+ print(result)
process_images_gemini_multithread.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import time
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+
7
+ from utils import encode_image_to_jpeg_base64
8
+ # Import your VertexAIService class
9
+ from vertex_ai_service import VertexAIService # Replace with the module name where VertexAIService is defined
10
+
11
+
12
+ def process_image_file(file_path, client):
13
+ """
14
+ Processes an image file and returns the path to the JSON file with results.
15
+
16
+ :param file_path: Path to the image file
17
+ :param client: Instance of VertexAIService
18
+ :return: Path to the saved JSON file
19
+ """
20
+ input_image64 = encode_image_to_jpeg_base64(file_path)
21
+
22
+ # Processing settings
23
+ prompt = "Read the text"
24
+ system = "You are receipt recognizer"
25
+
26
+ # Call image processing
27
+ result_img, error = client.process_image(input_image64, "gemini-1.5-flash", prompt, system, 0.0)
28
+
29
+ if error:
30
+ raise Exception(error)
31
+
32
+ # Create path for JSON file
33
+ json_file_path = os.path.splitext(file_path)[0] + ".json"
34
+
35
+ # Write the result to a JSON file
36
+ with open(json_file_path, 'w', encoding='utf-8') as json_file:
37
+ json_file.write(result_img)
38
+
39
+ return json_file_path
40
+
41
+
42
+ def process_folder_images(folder_path, json_key_path):
43
+ """
44
+ Processes all images in the specified folder concurrently.
45
+
46
+ :param folder_path: Path to the folder containing images
47
+ :param json_key_path: Path to the JSON key for Vertex AI authentication
48
+ """
49
+ # Initialize Vertex AI client
50
+ client = VertexAIService(json_key_path=json_key_path)
51
+
52
+ # Get a list of all images in the folder
53
+ image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
54
+
55
+ # Use ThreadPoolExecutor for parallel processing
56
+ with ThreadPoolExecutor() as executor:
57
+ # future_to_file = {executor.submit(process_image_file, file_path, client): file_path for file_path in image_files}
58
+ future_to_file = {executor.submit(process_image_file_with_retry, file_path, client, 5): file_path for file_path in image_files}
59
+
60
+
61
+ for future in as_completed(future_to_file):
62
+ file_path = future_to_file[future]
63
+ try:
64
+ json_file_path = future.result()
65
+ print(f"Processed: {file_path}, result saved to: {json_file_path}")
66
+ except Exception as exc:
67
+ print(f"Failed to process file {file_path}. Error: {exc}")
68
+
69
+ import random
70
+
71
+ def process_image_file_with_retry(file_path, client, max_retries=5):
72
+ """
73
+ Processes an image file with retry logic and returns the path to the JSON file with results.
74
+
75
+ :param file_path: Path to the image file
76
+ :param client: Instance of VertexAIService
77
+ :param max_retries: Maximum number of retries for handling quota errors
78
+ :return: Path to the saved JSON file
79
+ """
80
+ retries = 0
81
+ while retries < max_retries:
82
+ try:
83
+ json_file_path = process_image_file(file_path, client)
84
+ return json_file_path
85
+ except Exception as exc:
86
+ if "429" in str(exc):
87
+ retries += 1
88
+ wait_time = 2 ** retries + random.uniform(0, 1)
89
+ print(f"Quota exceeded for {file_path}. Retrying in {wait_time:.2f} seconds... (Attempt {retries}/{max_retries})")
90
+ time.sleep(wait_time)
91
+ else:
92
+ raise exc
93
+
94
+ raise Exception(f"Max retries exceeded for file {file_path}")
95
+
96
+ # Call the function to process the folder
97
+ if __name__ == '__main__':
98
+ folder_path = './examples' # Set the path to your folder
99
+ json_key_path = 'GOOGLE_VERTEX_AI_KEY.json' # Set the path to your JSON key
100
+ process_folder_images(folder_path, json_key_path)
process_images_gemini_one_hread.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import json
4
+ import time
5
+
6
+ from utils import encode_image_to_jpeg_base64
7
+ # Import your VertexAIService class
8
+ from vertex_ai_service import VertexAIService # Replace with the module name where VertexAIService is defined
9
+
10
+
11
+ def process_image_file(file_path, client):
12
+ """
13
+ Processes an image file and returns the path to the JSON file with results.
14
+
15
+ :param file_path: Path to the image file
16
+ :param client: Instance of VertexAIService
17
+ :return: Path to the saved JSON file
18
+ """
19
+ input_image64 = encode_image_to_jpeg_base64(file_path)
20
+
21
+ # Processing settings
22
+ prompt = "Read the text"
23
+ system = "You are receipt recognizer"
24
+
25
+ # Call image processing
26
+ result_img = client.process_image(input_image64, "gemini-1.5-flash", prompt, system, 0.0)
27
+
28
+ # Create path for JSON file
29
+ json_file_path = os.path.splitext(file_path)[0] + ".json"
30
+
31
+ # Write the result to a JSON file
32
+ with open(json_file_path, 'w', encoding='utf-8') as json_file:
33
+ json_file.write(result_img)
34
+
35
+ return json_file_path
36
+
37
+
38
+ def process_folder_images_sequentially(folder_path, json_key_path):
39
+ """
40
+ Processes all images in the specified folder sequentially.
41
+
42
+ :param folder_path: Path to the folder containing images
43
+ :param json_key_path: Path to the JSON key for Vertex AI authentication
44
+ """
45
+ # Initialize Vertex AI client
46
+ client = VertexAIService(json_key_path=json_key_path)
47
+
48
+ # Get a list of all images in the folder
49
+ image_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
50
+
51
+ # Process each image file sequentially
52
+ for file_path in image_files:
53
+ try:
54
+ json_file_path = process_image_file(file_path, client)
55
+ print(f"Processed: {file_path}, result saved to: {json_file_path}")
56
+ time.sleep(60)
57
+ except Exception as exc:
58
+ print(f"Failed to process file {file_path}. Error: {exc}")
59
+
60
+
61
+ # Call the function to process the folder
62
+ if __name__ == '__main__':
63
+ folder_path = './examples' # Set the path to your folder
64
+ json_key_path = 'GOOGLE_VERTEX_AI_KEY.json' # Set the path to your JSON key
65
+ process_folder_images_sequentially(folder_path, json_key_path)
prompt_v1.txt ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Task: Extract structured information from receipt images.
2
+ Receipts: May be in various languages, including non-Latin scripts, and have diverse formats.
3
+ Information to Extract:
4
+ - Store name
5
+ - Store address
6
+ - Currency (e.g., USD, EUR)
7
+ - date (in format "YYYY.MM.DD HH:MM:SS")
8
+ - tax
9
+ - Purchased items:
10
+ - Name
11
+ - Price per unit (format: 0.00)
12
+ - Quantity or weight
13
+ - unit of measurement (string)
14
+ - Total price (format: 0.00)
15
+ - Discount applied (if any, format: 0.00)
16
+ - Category (e.g., groceries, electronics, dining) — determine based on item name / description or context
17
+ - Total amount (format: 0.00)
18
+ - Total discount (format: 0.00)
19
+ - Tax
20
+ - Tips (if tips included in Total you need to write tips price, but if tips not included in Total you should write it's price as 0.00)
21
+ Output Format: JSON object with the following structure:
22
+ {
23
+ "store_name": "Store Name",
24
+ "store_address": "Store Address",
25
+ "currency": "Currency",
26
+ "date_time": "YYYY.MM.DD HH:MM:SS",
27
+ "payment_method": "card" or "cash
28
+ "items": [
29
+ {
30
+ "name": "Item Name",
31
+ "unit_price": 0.00,
32
+ "quantity": 0,
33
+ "unit_of_measurement":
34
+ "total_price": 0.00,
35
+ "discount": 0.00,
36
+ "category": "Category Name"
37
+ }
38
+ ],
39
+ "total_amount": 0.00,
40
+ "total_discount": 0.00,
41
+ "tax": 0.00
42
+ "tips": 0.00
43
+ }
44
+ If no receipt is detected: Return "Receipt not found."
45
+ Additional Notes:
46
+ 1. If the receipt is in a non-Latin script, extract the information in its original form unless translation is required.
47
+ 2. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
48
+ Write whole json with information about all products.
49
+ 3. Note that the name of an item may be a few lines long, but the item necessarily has a price in last line.
prompt_v2.txt ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Task: Extract structured information from receipt images.
2
+ Receipts: May be in various languages, including non-Latin scripts, and have diverse formats.
3
+ Information to Extract:
4
+ - Store name
5
+ - Store address
6
+ - Currency (e.g., USD, EUR)
7
+ - date (in format "YYYY.MM.DD HH:MM:SS")
8
+ - tax
9
+ - tips (if tips included in Total you need to write tips price, but if tips not included in Total you should write it's price as 0.00)
10
+ - Purchased items:
11
+ - Name
12
+ - Price per unit (format: 0.00)
13
+ - Quantity or weight
14
+ - unit of measurement (string)
15
+ - Total price (format: 0.00)
16
+ - Discount applied (if any, format: 0.00)
17
+ - Category (from the following list):
18
+ * Groceries
19
+ * Produce
20
+ * Meat
21
+ * Seafood
22
+ * Dairy
23
+ * Bakery
24
+ * Canned goods
25
+ * Frozen foods
26
+ * Beverages
27
+ * Snacks
28
+ * Cleaning supplies
29
+ * Personal care products
30
+ * Electronics
31
+ * Clothing
32
+ * Dining
33
+ * Home goods
34
+ * Other (specify if not in the list)
35
+ - Total amount (format: 0.00)
36
+ - Total discount (format: 0.00)
37
+ Output Format: JSON object with the following structure:
38
+ {
39
+ "store_name": "Store Name",
40
+ "store_address": "Store Address",
41
+ "currency": "Currency",
42
+ "date_time": "YYYY.MM.DD HH:MM:SS",
43
+ "payment_method": "card" or "cash
44
+ "items": [
45
+ {
46
+ "name": "Item Name",
47
+ "unit_price": 0.00,
48
+ "quantity": 0,
49
+ "unit_of_measurement":
50
+ "total_price": 0.00,
51
+ "discount": 0.00,
52
+ "category": "Category Name"
53
+ }
54
+ ],
55
+ "total_amount": 0.00,
56
+ "total_discount": 0.00,
57
+ "tax": 0.00,
58
+ "tips": 0.00
59
+ }
60
+ If no receipt is detected: Return "Receipt not found."
61
+ Additional Notes:
62
+ 1. If the receipt is in a non-Latin script, extract the information in its original form unless translation is required.
63
+ 2. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
64
+ Write whole json with information about all products.
prompt_v3.txt ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Your Task: Receipt Recognition and Data Extraction
2
+
3
+ You are tasked with extracting structured information from receipts. Receipts will come from various countries, in different languages, and can have different layouts and formats. Your goal is to parse the receipt text, identify the receipt type (store, cafe/restaurant, or payment for services), and return the data in JSON format with the required fields. Follow the specific instructions below to ensure accurate extraction.
4
+
5
+ #Required Fields:
6
+
7
+ 1. Receipt Type: Identify the type of receipt. It could be from:
8
+ * Store: Typically involves grocery or retail items.
9
+ * Cafe/Restaurant: Typically involves food and beverage items, table numbers, or tipping sections.
10
+ * Payment for Services: This type of receipt may involve service fees or professional services.
11
+ 2. Receipt Number: Extract the unique receipt number, typically found at the top of the receipt.
12
+ 3. Store/Business Name: Extract the name of the store, cafe, restaurant, or service provider.
13
+ 4. Store Address: Extract the address of the store, including city and country if available.
14
+ 5. Date: Extract the date of the transaction and format it as YYYY-MM-DD HH:MM.
15
+ 6. Currency: Extract the currency if explicitly mentioned (e.g., EUR, USD). If the currency is not specified, detect the language of the receipt and infer the currency based on the country where the language is predominantly used. For example, if the receipt is in Ukrainian, set the currency to UAH (Ukrainian Hryvnia).
16
+ 7. Payment Method: Identify whether the payment was made by "card" or "cash."
17
+ 8. Total Amount: Extract the total amount of the transaction. This is typically located at the end of the receipt, often highlighted in bold or a larger font.
18
+ 9. Total Discount: Extract the total discount if explicitly mentioned. If not, calculate the total discount by summing up the discounts for individual items.
19
+ 10. Tax: Extract the total tax amount if it is listed on the receipt.
20
+
21
+ #Item-Level Details:
22
+
23
+ For each item on the receipt, extract the following details:
24
+
25
+ 1. Item Name: Extract the full name of each item. Some items may have names split across multiple lines; in this case, concatenate the lines until you encounter a quantity or unit of measurement (e.g., "2ks"), which marks the end of the item name or on the next line. You should extract full name till statements as, for example, "1 ks" or "1 ks * 2"
26
+ 2. Unit Price: Extract the price per unit for each item.
27
+ 3. Quantity: Extract the quantity of each item, including the unit of measurement (e.g., "ks" for pieces, "kg" for kilograms).
28
+ 4. Price: Extract the final price for each item.
29
+ 5. Discount: Extract any discount applied to the item. If no discount is provided, set it to 0.
30
+ 6. Category: Automatically assign a category based on the item name. For groceries, assign relevant subcategories such as Dairy, Bakery, Fruits, etc. If this receipt was from restaurant - you should put category only from this list: Food, Drinks.
31
+
32
+ #Special Cases:
33
+
34
+ 1. Cafe/Restaurant Receipts: If the receipt is from a cafe or restaurant, handle additional fields like:
35
+ * Table Number: Extract the table number if available, often printed near the top of the receipt.
36
+ * Tips: Extract any tip amounts explicitly listed or infer from the total paid amount minus the original bill amount.
37
+ * Service Charges: Some restaurants may include an automatic service charge, which should be listed separately from the tax or tips.
38
+ * Order Type: Identify whether the order was "dine-in" or "takeaway."
39
+ 2. Missing Currency: If no currency is mentioned on the receipt, infer the local currency by detecting the language and country of origin. For example, a receipt in French would use EUR, while one in Ukrainian would use UAH.
40
+ 3. Multi-line Item Names: If an item name spans multiple lines, merge the lines to form the complete name. Stop merging when a quantity or unit of measurement is encountered.
41
+ 4. Total Amount: The total amount is often larger than other numbers or displayed in bold at the bottom of the receipt. Make sure to capture this accurately.
42
+ 5. Total Discount: If no total discount is listed, sum the discounts for each individual item.
43
+ 6. Rounding Adjustments: Some receipts may include a "rounding" line item, where the total amount is adjusted (typically for cash payments) to avoid dealing with fractions of currency (e.g., rounding to the nearest 0.05 in some countries). If a rounding adjustment is present, extract the value of the rounding adjustment and reflect it in the total amount. For example:
44
+ * Total Before Rounding: 19.97
45
+ * Rounding: -0.02
46
+ * Final Total: 19.95 If the rounding adjustment is found, include it as a separate field in the JSON output under "rounding_adjustment", and ensure that the "total_amount" reflects the final adjusted total.
47
+ 7. Taxes: Receipts can handle taxes in various ways, and the system should be prepared to capture these scenarios:
48
+ * Tax-Inclusive Pricing: In some countries or for certain receipts, taxes are already included in the item price and not listed separately. If the receipt mentions that taxes are included in prices, record the "tax" field as 0 and note that taxes are included in the item prices.
49
+ * Multiple Tax Rates: Some receipts may include multiple tax rates (e.g., different VAT rates for different items). In this case, extract each tax rate and the corresponding tax amounts, and store them in a separate list of tax breakdowns. For example, the receipt might show "5% VAT" and "15% VAT" for different categories of goods:
50
+ ** "taxes": [{"rate": "5%", "amount": 1.00}, {"rate": "15%", "amount": 3.50}]
51
+ * Missing Tax Information: In some cases, the receipt might not clearly mention taxes, but you may infer them based on standard rates in the country of origin. If no explicit tax amount is listed and you are unable to infer it, set the tax to "unknown" or null in the JSON output.
52
+ * Tax-Exempt Items: Some items on the receipt may be tax-exempt. If this is indicated, ensure that these items are excluded from any tax calculations. Note these in the item-level details with "tax_exempt": true and make sure the "tax" field reflects the correct amount for taxable items only.
53
+ * Service Charges vs. Taxes: Sometimes service charges may be listed separately from taxes (common in restaurants). Ensure that service charges are not included in the tax amount, and store them under the "service_charge" field.
54
+ * Tax Breakdown and Total: If both individual item taxes and a total tax amount are listed, the system should ensure consistency between the sum of item-level taxes and the total tax listed at the bottom of the receipt.
55
+ 8. In certain receipt formats, the quantity and unit price may appear before the item name. When processing such receipts, the goal is to correctly extract the quantity, unit price, and item name in their proper order. For example, if one line of the receipt shows "5 * 23.00 = 115.0" and the next line displays "Milk," the system should interpret this as:
56
+ * Quantity: 5 units
57
+ * Unit Price: 23.00
58
+ * Item Name: Milk
59
+ * Total Price: 115.0 This approach should be applied consistently throughout the entire receipt to extract data accurately.
60
+
61
+ #JSON Output Format:
62
+
63
+ {
64
+ "receipt_type": "string",
65
+ "receipt_number": "string",
66
+ "store_name": "string",
67
+ "store_address": "string",
68
+ "date_time": "string",
69
+ "currency": "string",
70
+ "payment_method": "string",
71
+ "total_amount": "number",
72
+ "total_discount": "number",
73
+ "tax": "number",
74
+ "taxes": [
75
+ {
76
+ "rate": "string",
77
+ "amount": "number"
78
+ }
79
+ ],
80
+ "rounding_adjustment": "number",
81
+ "rounded_total_aount": "number",
82
+ "items": [
83
+ {
84
+ "name": "string",
85
+ "unit_price": "number",
86
+ "quantity": {
87
+ "amount": "number",
88
+ "unit_of_measurement": "string"
89
+ },
90
+ "price": "number",
91
+ "discount": "number",
92
+ "category": "string",
93
+ "tax_exempt": "boolean"
94
+ }
95
+ ],
96
+ "cafe_additional_info": {
97
+ "table_number": "string",
98
+ "tips": "number",
99
+ "service_charge": "number",
100
+ "order_type": "string"
101
+ }
102
+ }
103
+
104
+ #Additional Notes:
105
+
106
+ 1. You should handle receipts in various languages and from different countries.
107
+ 2. Pay special attention to formatting differences and edge cases, such as multi-line item names, missing currency symbols, or cafe/restaurant-specific information.
108
+ 3. Always ensure the output is well-structured and follows the JSON format provided.
109
+ 4. The "rounding_adjustment" field should reflect the value by which the total was adjusted due to rounding. If no rounding adjustment is present, it can be set to 0 or omitted from the output.
110
+ 5. Ensure that the final "total_amount" field reflects the total after any rounding adjustment has been applied.
111
+ 6. Inclusive Taxes: If taxes are included in the item prices, set the "tax" field to 0 and adjust the item prices accordingly.
112
+ 7. Multiple Tax Rates: The "taxes" field provides a detailed breakdown for receipts with different tax rates. This field is optional and can be excluded if only a single tax amount is listed.
113
+ 8. Tax-Exempt Items: Mark tax-exempt items with the "tax_exempt": true field.
114
+ 9. Service Charges vs. Taxes: Ensure that service charges are captured separately from taxes in the "service_charge" field.
115
+ 10. Return the full JSON object with all available information. If any information is unclear or missing, include it as "unknown" or "not available" in the output.
116
+ 11. Your final response should be in valid JSON format with no additional text.
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ torch==2.4.1
2
+ torchvision==0.19.1
3
+ gradio==4.44.0
4
+ numpy==1.26.4
5
+ opencv-python==4.9.0.80
6
+ mediapipe==0.10.7
7
+ openai==1.45.0
8
+ google-api-python-client==2.145.0
9
+ google-auth-httplib2==0.2.0
10
+ google-auth-oauthlib==1.2.1
11
+ google-cloud-aiplatform==1.67.0
12
+ aiohttp==3.10.5
13
+ openpyxl==3.1.5
14
+ torch==2.4.1
15
+ torchvision==0.19.1
system_instruction.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ You are receipts recognizer
utils.py ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import json
3
+ import pandas as pd
4
+
5
+ import base64
6
+ from io import BytesIO
7
+ from PIL import Image
8
+ from algorithm import receipt_calculation
9
+
10
+
11
+ def read_prompt_from_file(file_path):
12
+ with open(file_path, 'r', encoding='utf-8') as file:
13
+ return file.read()
14
+
15
+
16
+ def encode_image(image_path):
17
+ with open(image_path, "rb") as image_file:
18
+ return base64.b64encode(image_file.read()).decode('utf-8')
19
+
20
+
21
+ def encode_image_from_gradio(input_image):
22
+ if input_image is None:
23
+ return None
24
+
25
+ pil_image = Image.fromarray(np.uint8(input_image))
26
+
27
+ buffered = BytesIO()
28
+ pil_image.save(buffered, format="JPEG")
29
+
30
+ base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
31
+ return base64_image
32
+
33
+
34
+ def encode_image(image_path):
35
+ with open(image_path, "rb") as image_file:
36
+ return base64.b64encode(image_file.read()).decode('utf-8')
37
+
38
+
39
+ def encode_image_to_jpeg_base64(filepath):
40
+ """
41
+ Encodes an image file to JPEG and then to a Base64 string.
42
+
43
+ :param filepath: Path to the image file
44
+ :return: Base64 encoded string of the JPEG image or an error message
45
+ """
46
+ if filepath is None:
47
+ return None, "File path is None."
48
+
49
+ try:
50
+ pil_image = Image.open(filepath)
51
+
52
+ if pil_image.mode == 'RGBA':
53
+ pil_image = pil_image.convert('RGB')
54
+
55
+ buffered = BytesIO()
56
+ pil_image.save(buffered, format="JPEG")
57
+
58
+ base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
59
+ return base64_image
60
+
61
+ except Exception as e:
62
+ return None, str(e)
63
+
64
+
65
+ def encode_image_to_webp_base64(filepath):
66
+ """
67
+ Encodes an image file to JPEG and then to a Base64 string.
68
+
69
+ :param filepath: Path to the image file
70
+ :return: Base64 encoded string of the JPEG image or an error message
71
+ """
72
+ if filepath is None:
73
+ return None, "File path is None."
74
+
75
+ try:
76
+ pil_image = Image.open(filepath)
77
+
78
+ if pil_image.mode == 'RGBA':
79
+ pil_image = pil_image.convert('RGB')
80
+
81
+ buffered = BytesIO()
82
+ pil_image.save(buffered, format="WEBP")
83
+
84
+ base64_image = base64.b64encode(buffered.getvalue()).decode('utf-8')
85
+ return base64_image
86
+
87
+ except Exception as e:
88
+ return None, str(e)
89
+
90
+
91
+ def process_receipt_json(json_input):
92
+ try:
93
+ # Parse the JSON string
94
+ data = json.loads(json_input)
95
+ except json.JSONDecodeError:
96
+ return "Error: Invalid JSON format", None
97
+
98
+ # Initialize the output information
99
+ store_info = ""
100
+ items_table = []
101
+
102
+ # Try to extract store information, if available
103
+ store_name = data.get("store_name", "Not specified")
104
+ store_address = data.get("store_address", "Not specified")
105
+ currency = data.get("currency", "Not specified")
106
+ date_time = data.get("date_time", "Not specified")
107
+ type = data.get("payment_method", "Not specified")
108
+ total_amount = data.get("total_amount", "Not specified")
109
+ discount = data.get("total_discount", "Not specified")
110
+ tax = data.get("tax", "Not specified")
111
+ tips = data.get("tips", "Not specified")
112
+
113
+ # Format store information
114
+ store_info = \
115
+ (f"Store: {store_name}\nAddress: {store_address}\nCurrency: {currency}"
116
+ f"\nTotal Amount: {total_amount}"
117
+ f"\nDate time: {date_time}"
118
+ f"\nType payment: {type}"
119
+ f"\nTotal discount: {discount}"
120
+ f"\nTax: {tax}"
121
+ )
122
+
123
+ # Extract items, if available in JSON
124
+ items = data.get("items", [])
125
+ if items:
126
+ items_table = [[item.get("name", "Not specified"),
127
+ item.get("category", "Not specified"),
128
+ item.get("unit_price", "Not specified"),
129
+ item.get("quantity", "Not specified"),
130
+ item.get("unit_of_measurement", "Not specified"),
131
+ item.get("total_price", "Not specified"),
132
+ item.get("discount", "Not specified")]
133
+ for item in items]
134
+ else:
135
+ items_table = [["No items"]]
136
+
137
+ total_product_prices, total_sum = receipt_calculation.calculate_tips_and_taxes(items_table, total_amount, tax, tips)
138
+ message = "Everything is okay!"
139
+ if items_table[0][0] != "No items":
140
+ for i in range(len(items_table)):
141
+ items_table[i].append(total_product_prices[i].price)
142
+
143
+ if total_sum != round(float(str(total_amount).replace(",", ".")), 2):
144
+ message = "Recognized total sum and products total sum is not equal. Check if AI model correctly created a JSON"
145
+ return store_info, items_table, message
146
+
147
+
148
+ def save_to_excel(json_output, excel_file_path):
149
+ store_info, items_table, _ = process_receipt_json(json_output)
150
+ if isinstance(store_info, str) and store_info.startswith("Error:"):
151
+ return store_info
152
+
153
+ store_info_lines = store_info.split('\n')
154
+ store_info_parsed = [(line.split(': ')[0], line.split(': ')[1]) for line in store_info_lines if ': ' in line]
155
+
156
+ store_info_df = pd.DataFrame(store_info_parsed, columns=["Label", "Value"])
157
+
158
+ items_df = pd.DataFrame(items_table, columns=[
159
+ "Item Name", "Category", "Unit Price", "Quantity", "Unit", "Total Price", "Discount", "Grand Total"
160
+ ])
161
+
162
+ with pd.ExcelWriter(excel_file_path, engine='openpyxl') as writer:
163
+ store_info_df.to_excel(writer, sheet_name='Receipt', index=False, header=False, startrow=0)
164
+
165
+ items_df_start_row = len(store_info_df) + 2
166
+ items_df.to_excel(writer, sheet_name='Receipt', index=False, startrow=items_df_start_row)
167
+ worksheet = writer.sheets['Receipt']
168
+ worksheet.column_dimensions['A'].width = 30
169
+
170
+ return excel_file_path
vertex_ai_example.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import vertexai
2
+ from google.cloud import aiplatform
3
+ from google.oauth2 import service_account
4
+ from vertexai.generative_models import GenerativeModel
5
+
6
+ service_account_path = "receiptsai19_09.json"
7
+
8
+ credentials = service_account.Credentials.from_service_account_file(service_account_path)
9
+
10
+ def generate():
11
+ vertexai.init(project="receiptsai-436007", location="us-central1", credentials=credentials)
12
+ model = GenerativeModel(
13
+ "gemini-1.5-flash-001"
14
+ )
15
+ responses = model.generate_content('Create simple example on C++')
16
+
17
+ print(responses.text)
18
+
19
+
20
+ generate()
vertex_ai_service.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import os
4
+ import tempfile
5
+
6
+ import vertexai
7
+ from google.oauth2 import service_account
8
+ from vertexai.generative_models import GenerativeModel
9
+ from vertexai.generative_models import Part, SafetySetting
10
+
11
+
12
+ class VertexAIService:
13
+ def __init__(self, json_key_path=None, json_key_env_var='GOOGLE_VERTEX_KEY'):
14
+ """
15
+ Initializes the Vertex AI client.
16
+
17
+ :param json_key_path: Path to the JSON file (optional)
18
+ :param json_key_env_var: Environment variable name containing the JSON key (default 'GOOGLE_VERTEX_KEY')
19
+ """
20
+ self.json_key_path = json_key_path
21
+ self.json_key_env_var = json_key_env_var
22
+ self.credentials = self._authenticate_vertex_ai()
23
+
24
+ def _authenticate_vertex_ai(self):
25
+ """
26
+ Authenticates using the JSON key from a file or environment variable.
27
+
28
+ :return: Google Credentials object
29
+ """
30
+ if self.json_key_path and os.path.isfile(self.json_key_path):
31
+ # Authenticate using the file
32
+ creds = service_account.Credentials.from_service_account_file(self.json_key_path)
33
+ else:
34
+ # Get JSON key from environment variable
35
+ json_key = os.getenv(self.json_key_env_var)
36
+
37
+ if not json_key:
38
+ raise ValueError(f"Environment variable {self.json_key_env_var} is not set.")
39
+
40
+ # Save JSON key to a temporary file
41
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
42
+ temp_file.write(json_key.encode('utf-8'))
43
+ temp_file_path = temp_file.name
44
+
45
+ # Create credentials object
46
+ creds = service_account.Credentials.from_service_account_file(temp_file_path)
47
+
48
+ # Remove the temporary file after creating the credentials object
49
+ os.remove(temp_file_path)
50
+
51
+ return creds
52
+
53
+ def process_text(self, project, location, model_name, prompt, system=""):
54
+ """
55
+ Generates content using Vertex AI.
56
+
57
+ :param project: GCP project ID
58
+ :param location: GCP region
59
+ :param model_name: The name of the Generative Model
60
+ :param prompt: Prompt for content generation
61
+ :return: Generated content or error message
62
+ """
63
+ try:
64
+ # Initialize Vertex AI with credentials
65
+ vertexai.init(project=project, location=location, credentials=self.credentials)
66
+ model = GenerativeModel(model_name, system_instruction=[system])
67
+
68
+ # Generate content
69
+ responses = model.generate_content(prompt)
70
+ return responses.text
71
+ except Exception as error:
72
+ return f"An error occurred: {error}"
73
+
74
+ def process_image(self, input_image64, model_name, prompt, system="You are receipt recognizer", temperatura=0.0):
75
+ """
76
+ Processes the image using Vertex AI model.
77
+
78
+ :param input_image64: Base64 encoded image string
79
+ :param model_name: Name of the model in Vertex AI
80
+ :param prompt: Text prompt to guide the model
81
+ :param temperatura: Temperature for controlling randomness
82
+ :return: JSON response or error message
83
+ """
84
+ if input_image64 is None:
85
+ raise ValueError("No objects detected.")
86
+
87
+ try:
88
+ # Initialize Vertex AI
89
+ vertexai.init(project="receiptsai-436007", location="us-central1", credentials=self.credentials)
90
+
91
+ # Load the model
92
+ model = GenerativeModel(model_name, system_instruction=[system])
93
+
94
+ # Prepare the image part
95
+ image_part = Part.from_data(
96
+ mime_type="image/webp",
97
+ data=input_image64
98
+ )
99
+
100
+ # Set generation configuration
101
+ generation_config = {
102
+ "max_output_tokens": 8192,
103
+ "temperature": temperatura,
104
+ "response_mime_type": "application/json"
105
+ }
106
+
107
+ # Set safety settings
108
+ safety_settings = [
109
+ SafetySetting(
110
+ category=SafetySetting.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
111
+ threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
112
+ ),
113
+ SafetySetting(
114
+ category=SafetySetting.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
115
+ threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
116
+ ),
117
+ SafetySetting(
118
+ category=SafetySetting.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
119
+ threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
120
+ ),
121
+ SafetySetting(
122
+ category=SafetySetting.HarmCategory.HARM_CATEGORY_HARASSMENT,
123
+ threshold=SafetySetting.HarmBlockThreshold.BLOCK_NONE
124
+ ),
125
+ ]
126
+
127
+ # Generate content using the model
128
+ response = model.generate_content([image_part, prompt],
129
+ generation_config=generation_config,
130
+ safety_settings=safety_settings)
131
+
132
+ json_content = json.loads(response.text)
133
+
134
+ json_content['input_tokens'] = response.usage_metadata.prompt_token_count
135
+ json_content['output_tokens'] = response.usage_metadata.candidates_token_count
136
+ json_content['total_tokens'] = response.usage_metadata.total_token_count
137
+
138
+ return json.dumps(json_content, indent=4)
139
+
140
+ except Exception as error:
141
+ raise RuntimeError(f"An error occurred during image processing: {error}")
142
+
143
+
144
+ # Example usage
145
+ if __name__ == '__main__':
146
+ # Project and model details
147
+ project = "receiptsai-436007"
148
+ location = "us-central1"
149
+ model_name = "gemini-1.5-flash-001"
150
+ prompt = "Create example on C++"
151
+
152
+ # Initialize the client and generate content
153
+ client = VertexAIService(json_key_path='GOOGLE_VERTEX_AI_KEY.json')
154
+ result = client.process_text(project, location, model_name, prompt)
155
+ print(f'Generated result: {result}')
156
+
157
+ # Image processing
158
+ image_path = "./examples/lidl2.jpg"
159
+ with open(image_path, "rb") as image_file:
160
+ input_image64 = base64.b64encode(image_file.read()).decode('utf-8')
161
+
162
+ prompt = "Read the text"
163
+ system = "You are receipt recognizer"
164
+ result_img = client.process_image(input_image64, "gemini-1.5-pro", prompt, system, 0.0)
165
+ print(f'Image processing result: {result_img}')
166
+ decoded_string = result_img.encode('utf-8').decode('unicode_escape')
167
+
168
+ print(decoded_string)