Abhinav Deshpande commited on
Commit
a08ab73
·
unverified ·
1 Parent(s): 9c468fa

Add files

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +735 -0
  3. requirements.txt +11 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ secrets.env
app.py ADDED
@@ -0,0 +1,735 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """Flipkart Frontend.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/github/Abhinav-gh/404NotFound/blob/main/Flipkart%20Frontend.ipynb
8
+
9
+ # 1. Install Gradio and Required Libraries
10
+ ### Start by installing Gradio if it's not already installed.
11
+ """
12
+
13
+ # ! pip install gradio
14
+ # ! pip install cv
15
+ # ! pip install ultralytics
16
+ # ! pip install supervision
17
+ # !pip install google-generativeai
18
+ # !pip install paddleocr
19
+ # !pip install paddlepaddle
20
+
21
+ """# 2. Import Libraries
22
+ ### Getting all the necessary Libraries
23
+ """
24
+
25
+ import gradio as gr
26
+ import random
27
+ import numpy as np
28
+ from PIL import Image
29
+ import cv2
30
+ import time
31
+ from ultralytics import YOLO
32
+ import supervision as sv
33
+ import pandas as pd
34
+ from google.colab.patches import cv2_imshow
35
+ from IPython.display import clear_output
36
+ from collections import defaultdict, deque
37
+ import matplotlib.pyplot as plt
38
+ import google.generativeai as genai
39
+ from google.colab import userdata
40
+ from datetime import datetime
41
+ from paddleocr import PaddleOCR
42
+ from google.colab import files
43
+ import os
44
+
45
+ """# Path Variables
46
+
47
+ ### Path used in OCR
48
+ """
49
+
50
+ OCR_M3="Model3_best.pt"
51
+ GOOGLE_API_KEY = os.getenv("GEMINI_API")
52
+ GEMINI_MODEL = 'models/gemini-1.5-flash'
53
+
54
+ """### Path used in Brand Recognition model"""
55
+
56
+ Brand_Recognition_Model ='kitkat_s.pt'
57
+ annotatedOpFile= 'annotated_output.mp4'
58
+
59
+ """# 3. Import Drive
60
+
61
+ """
62
+
63
+ # from google.colab import drive
64
+
65
+ # drive.mount('/content/drive')
66
+
67
+ """# 4. Brand Recognition Backend
68
+
69
+ ### Model for Grocery Detection
70
+ """
71
+
72
+ model_path = Brand_Recognition_Model
73
+ model = YOLO(model_path)
74
+
75
+ """### Image uploading for Grocery detection"""
76
+
77
+ def detect_grocery_items(image):
78
+ image = np.array(image)[:, :, ::-1]
79
+ results = model(image)
80
+ annotated_image = results[0].plot()
81
+
82
+ class_ids = results[0].boxes.cls.cpu().numpy()
83
+ confidences = results[0].boxes.conf.cpu().numpy()
84
+
85
+ threshold = 0.4
86
+ class_counts = {}
87
+ class_confidences = {}
88
+
89
+ for i, class_id in enumerate(class_ids):
90
+ confidence = confidences[i]
91
+ if confidence >= threshold:
92
+ class_name = model.names[int(class_id)]
93
+
94
+ if class_name in class_counts:
95
+ class_counts[class_name] += 1
96
+ else:
97
+ class_counts[class_name] = 1
98
+
99
+ if class_name in class_confidences:
100
+ class_confidences[class_name].append(confidence)
101
+ else:
102
+ class_confidences[class_name] = [confidence]
103
+
104
+ if not class_counts:
105
+ return image, [], "The model failed to recognize items or the image may contain untrained objects."
106
+
107
+ summary_table = [[class_name, count, f"{np.mean(class_confidences[class_name]):.2f}"]
108
+ for class_name, count in class_counts.items()]
109
+
110
+ annotated_image_rgb = annotated_image[:, :, ::-1]
111
+ return annotated_image_rgb, summary_table, "Object Recognised Successfully 🥳 "
112
+
113
+ """### Detect Grovcery brand from video"""
114
+
115
+ def iou(box1, box2):
116
+ # Calculate intersection over union
117
+ x1 = max(box1[0], box2[0])
118
+ y1 = max(box1[1], box2[1])
119
+ x2 = min(box1[2], box2[2])
120
+ y2 = min(box1[3], box2[3])
121
+
122
+ intersection = max(0, x2 - x1) * max(0, y2 - y1)
123
+ area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
124
+ area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
125
+
126
+ iou = intersection / float(area1 + area2 - intersection)
127
+ return iou
128
+
129
+ def smooth_box(box_history):
130
+ if not box_history:
131
+ return None
132
+ return np.mean(box_history, axis=0)
133
+
134
+ def process_video(input_path, output_path):
135
+ cap = cv2.VideoCapture(input_path)
136
+
137
+ # Get video properties
138
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
139
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
140
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
141
+
142
+ # Initialize video writer
143
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
144
+ out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
145
+
146
+ # Initialize variables for tracking
147
+ detected_items = {}
148
+ frame_count = 0
149
+
150
+ # For result confirmation
151
+ detections_history = defaultdict(lambda: defaultdict(int))
152
+
153
+ while cap.isOpened():
154
+ ret, frame = cap.read()
155
+ if not ret:
156
+ break
157
+
158
+ frame_count += 1
159
+
160
+ # Run YOLO detection every 5th frame
161
+ if frame_count % 5 == 0:
162
+ results = model(frame)
163
+
164
+ current_frame_detections = []
165
+
166
+ for r in results:
167
+ boxes = r.boxes
168
+ for box in boxes:
169
+ x1, y1, x2, y2 = box.xyxy[0].tolist()
170
+ conf = box.conf.item()
171
+ cls = int(box.cls.item())
172
+ brand = model.names[cls]
173
+
174
+ current_frame_detections.append((brand, [x1, y1, x2, y2], conf))
175
+
176
+ # Match current detections with existing items
177
+ for brand, box, conf in current_frame_detections:
178
+ matched = False
179
+ for item_id, item_info in detected_items.items():
180
+ if iou(box, item_info['smoothed_box']) > 0.5:
181
+ item_info['frames_detected'] += 1
182
+ item_info['total_conf'] += conf
183
+ item_info['box_history'].append(box)
184
+ if len(item_info['box_history']) > 10:
185
+ item_info['box_history'].popleft()
186
+ item_info['smoothed_box'] = smooth_box(item_info['box_history'])
187
+ item_info['last_seen'] = frame_count
188
+ matched = True
189
+ break
190
+
191
+ if not matched:
192
+ item_id = len(detected_items)
193
+ detected_items[item_id] = {
194
+ 'brand': brand,
195
+ 'box_history': deque([box], maxlen=10),
196
+ 'smoothed_box': box,
197
+ 'frames_detected': 1,
198
+ 'total_conf': conf,
199
+ 'last_seen': frame_count
200
+ }
201
+
202
+ detections_history[brand][frame_count] += 1
203
+
204
+
205
+ for item_id, item_info in list(detected_items.items()):
206
+ if frame_count - item_info['last_seen'] > fps * 2: # 2 seconds
207
+ del detected_items[item_id]
208
+ continue
209
+
210
+ # Interpolate box position
211
+ if item_info['smoothed_box'] is not None:
212
+ alpha = 0.3
213
+ current_box = item_info['smoothed_box']
214
+ target_box = item_info['box_history'][-1] if item_info['box_history'] else current_box
215
+ interpolated_box = [
216
+ current_box[i] * (1 - alpha) + target_box[i] * alpha
217
+ for i in range(4)
218
+ ]
219
+ item_info['smoothed_box'] = interpolated_box
220
+
221
+ x1, y1, x2, y2 = map(int, interpolated_box)
222
+ cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
223
+ cv2.putText(frame, f"{item_info['brand']}",
224
+ (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
225
+
226
+ out.write(frame)
227
+
228
+ cap.release()
229
+ out.release()
230
+
231
+ # Calculate final counts and confirm results
232
+ total_frames = frame_count
233
+ confirmed_items = {}
234
+ for brand, frame_counts in detections_history.items():
235
+ detection_frames = len(frame_counts)
236
+ if detection_frames > total_frames * 0.1:
237
+ avg_count = sum(frame_counts.values()) / detection_frames
238
+ confirmed_items[brand] = round(avg_count)
239
+
240
+ return confirmed_items
241
+
242
+ def annotate_video(input_video):
243
+ output_path = annotatedOpFile
244
+ confirmed_items = process_video(input_video, output_path)
245
+
246
+ item_list = [(brand, quantity) for brand, quantity in confirmed_items.items()]
247
+
248
+ status_message = "Video processed successfully!"
249
+
250
+ return output_path, item_list, status_message
251
+
252
+ """# 5. OCR Backend
253
+
254
+ ### The PaddleOCR + Gemini combined type model.
255
+
256
+ Run these 3 cells before trying out any model
257
+ """
258
+
259
+ # Function to draw bounding boxes and show text
260
+ def draw_bounding_boxes(image_path):
261
+ # Read the image
262
+ img = Image.open(image_path)
263
+ result = ocr.ocr(image_path, cls=True) # Get the OCR result
264
+
265
+ # Create a figure to display the image
266
+ plt.figure(figsize=(10, 10))
267
+ plt.imshow(img)
268
+ ax = plt.gca()
269
+ all_text_data = []
270
+ # Iterate through the results and draw boxes
271
+ for idx, line in enumerate(result[0]):
272
+ box = line[0] # Get the bounding box coordinates
273
+ text = line[1][0] # Extracted text
274
+ print(f"[DEBUG] Box {idx + 1}: {text}") # Display text with box number
275
+ all_text_data.append(f"{text}")
276
+
277
+ # Draw the bounding box
278
+ polygon = plt.Polygon(box, fill=None, edgecolor='red', linewidth=2)
279
+ ax.add_patch(polygon)
280
+ # Add text label in the box
281
+ # ax.text(box[0][0], box[0][1] - 5, f"{idx + 1}: {text}", color='blue', fontsize=12)
282
+
283
+ plt.axis('off') # Hide axes
284
+ plt.show()
285
+ return all_text_data
286
+
287
+ # Set your API key securely (store it in Colab’s userdata)
288
+ genai.configure(api_key=GOOGLE_API_KEY)
289
+
290
+ def gemini_context_correction(text):
291
+ """Use Gemini API to refine noisy OCR results and extract MRP details."""
292
+ model = genai.GenerativeModel('models/gemini-1.5-flash')
293
+
294
+ response = model.generate_content(
295
+ f"Identify and extract manufacturing, expiration dates, and MRP from the following text. "
296
+ f"The dates may be written in dd/mm/yyyy format or as <Month_name> <Year> or <day> <Month_Name> <Year>. "
297
+ f"The text may contain noise or unclear information. If only one date is provided, assume it is the Expiration Date. "
298
+ f"Additionally, extract the MRP (e.g., 'MRP: ₹99.00', 'Rs. 99/-'). "
299
+ f"Format the output as:\n"
300
+ f"Manufacturing Date: <MFG Date>\n"
301
+ f"Expiration Date: <EXP Date>\n"
302
+ f"MRP: <MRP Value>\n\n"
303
+ f"Here is the text: {text}"
304
+ )
305
+
306
+ return response.text
307
+
308
+ # Test Gemini with example text (replace with actual OCR output)
309
+ sample_text = "EXP 12/2024 MFD 08/2023 Best Before 06/2025 MRP Rs. 250/-"
310
+ refined_output = gemini_context_correction(sample_text)
311
+ print("[DEBUG] Gemini Refined Output:\n", refined_output)
312
+
313
+ def validate_dates_with_gemini(mfg_date, exp_date):
314
+ """Use Gemini API to validate and correct the manufacturing and expiration dates."""
315
+ model = genai.GenerativeModel(GEMINI_MODEL)
316
+ response = model.generate_content = (
317
+ f"Input Manufacturing Date: {mfg_date}, Expiration Date: {exp_date}. "
318
+ f"If either date is '-1', leave it as is. "
319
+ f"1. If the expiration date is earlier than the manufacturing date, swap them. "
320
+ f"2. If both dates are logically incorrect, suggest new valid dates based on typical timeframes. "
321
+ f"Always respond ONLY in the format:\n"
322
+ f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
323
+ )
324
+
325
+ # Check if the response contains valid parts
326
+ if response.parts:
327
+ # Process the response to extract final dates
328
+ final_dates = response.parts[0].text.strip()
329
+ return final_dates
330
+
331
+ # Return a message or a default value if no valid parts are found
332
+ return "Invalid response from Gemini API."
333
+
334
+
335
+ def extract_and_validate_with_gemini(refined_text):
336
+ """
337
+ Use Gemini API to extract, validate, and correct manufacturing and expiration dates.
338
+ """
339
+ model = genai.GenerativeModel(GEMINI_MODEL)
340
+
341
+ # Correctly call the generate_content method
342
+ response = model.generate_content(
343
+ f"The extracted text is:\n'{refined_text}'\n\n"
344
+ f"1. Extract the 'Manufacturing Date' and 'Expiration Date' from the above text. "
345
+ f"Ignore unrelated data (e.g., 'MRP: Not Found').\n"
346
+ f"2. If a date is missing or invalid, return -1 for that date.\n"
347
+ f"3. If the 'Expiration Date' is earlier than the 'Manufacturing Date', swap them.\n"
348
+ f"4. Ensure both dates are in 'dd/mm/yyyy' format. If the original dates are not in this format, convert them.\n"
349
+ f"Respond ONLY in this exact format:\n"
350
+ f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
351
+ )
352
+ print("[DEBUG] Response from validation function", response)
353
+ # Ensure the response object is valid and contains the required parts
354
+ if hasattr(response, 'parts') and response.parts:
355
+ final_dates = response.parts[0].text.strip()
356
+ print(f"[DEBUG] Gemini Response: {final_dates}")
357
+
358
+ # Extract the dates from the response
359
+ mfg_date_str, exp_date_str = parse_gemini_response(final_dates)
360
+
361
+ # Process and swap if necessary
362
+ if mfg_date_str != "-1" and exp_date_str != "-1":
363
+ mfg_date = datetime.strptime(mfg_date_str, "%Y/%m/%d")
364
+ exp_date = datetime.strptime(exp_date_str, "%Y/%m/%d")
365
+
366
+ # Swap if Expiration Date is earlier than Manufacturing Date
367
+ if exp_date < mfg_date:
368
+ print("[DEBUG] Swapping dates.")
369
+ mfg_date, exp_date = exp_date, mfg_date
370
+
371
+ # Return the formatted swapped dates
372
+ return (
373
+ f"Manufacturing Date: {mfg_date.strftime('%Y/%m/%d')}, "
374
+ f"Expiration Date: {exp_date.strftime('%Y/%m/%d')}"
375
+ )
376
+
377
+ # If either date is -1, return them as-is
378
+ return final_dates
379
+
380
+ # Handle invalid responses gracefully
381
+ print("[ERROR] Invalid response from Gemini API.")
382
+ return "Invalid response from Gemini API."
383
+
384
+ def extract_and_validate_with_gemini(refined_text):
385
+ """
386
+ Use Gemini API to extract, validate, correct, and swap dates in 'yyyy/mm/dd' format if necessary.
387
+ """
388
+ model = genai.GenerativeModel(GEMINI_MODEL)
389
+
390
+ # Generate content using Gemini with the refined prompt
391
+ response = model.generate_content(
392
+ f"The extracted text is:\n'{refined_text}'\n\n"
393
+ f"1. Extract the 'Manufacturing Date' and 'Expiration Date' from the above text. "
394
+ f"Ignore unrelated data (e.g., 'MRP: Not Found').\n"
395
+ f"2. If a date is missing or invalid, return -1 for that date.\n"
396
+ f"3. If the 'Expiration Date' is earlier than the 'Manufacturing Date', swap them.\n"
397
+ f"4. Ensure both dates are in 'dd/mm/yyyy' format. If the original dates are not in this format, convert them.\n"
398
+ f"Respond ONLY in this exact format:\n"
399
+ f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
400
+ )
401
+
402
+ # Validate the response and extract dates
403
+ if hasattr(response, 'parts') and response.parts:
404
+ final_dates = response.parts[0].text.strip()
405
+ print(f"[DEBUG] Gemini Response: {final_dates}")
406
+
407
+ # Extract the dates from the response
408
+ mfg_date_str, exp_date_str = parse_gemini_response(final_dates)
409
+
410
+ # Process and swap if necessary
411
+ if mfg_date_str != "-1" and exp_date_str != "-1":
412
+ mfg_date = datetime.strptime(mfg_date_str, "%d/%m/%Y")
413
+ exp_date = datetime.strptime(exp_date_str, "%d/%m/%Y")
414
+
415
+ # Swap if Expiration Date is earlier than Manufacturing Date
416
+ swapping_statement = ""
417
+ if exp_date < mfg_date:
418
+ print("[DEBUG] Swapping dates.")
419
+ mfg_date, exp_date = exp_date, mfg_date
420
+ swapping_statement = "Corrected Dates: \n"
421
+
422
+ # Return the formatted swapped dates
423
+ return swapping_statement + (
424
+ f"Manufacturing Date: {mfg_date.strftime('%d/%m/%Y')}, "
425
+ f"Expiration Date: {exp_date.strftime('%d/%m/%Y')}"
426
+ )
427
+
428
+ # If either date is -1, return them as-is
429
+ return final_dates
430
+
431
+ # Handle invalid responses gracefully
432
+ print("[ERROR] Invalid response from Gemini API.")
433
+ return "Invalid response from Gemini API."
434
+
435
+ def parse_gemini_response(response_text):
436
+ """
437
+ Helper function to extract Manufacturing Date and Expiration Date from the response text.
438
+ """
439
+ try:
440
+ # Split and extract the dates
441
+ parts = response_text.split(", ")
442
+ mfg_date_str = parts[0].split(": ")[1].strip()
443
+ exp_date_str = parts[1].split(": ")[1].strip()
444
+ return mfg_date_str, exp_date_str
445
+ except IndexError:
446
+ print("[ERROR] Failed to parse Gemini response.")
447
+ return "-1", "-1"
448
+
449
+ def extract_date(refined_text, date_type):
450
+ """Extract the specified date type from the refined text."""
451
+ if date_type in refined_text:
452
+ try:
453
+ # Split the text and find the date for the specified type
454
+ parts = refined_text.split(',')
455
+ for part in parts:
456
+ if date_type in part:
457
+ return part.split(':')[1].strip() # Return the date value
458
+ except IndexError:
459
+ return '-1' # Return -1 if the date is not found
460
+ return '-1' # Return -1 if the date type is not in the text
461
+
462
+
463
+
464
+
465
+ """### **Model 3**
466
+ Using Yolov8 x-large model trained till about 75 epochs
467
+ and
468
+ Gradio as user interface
469
+ (in case model fails, we fall back to the approach from model 1)
470
+
471
+ """
472
+
473
+ model_path = OCR_M3
474
+ model = YOLO(model_path)
475
+
476
+ """## Driver code to be run after selecting from Model 2 or 3.
477
+ (Note: not needed for model 1)
478
+ """
479
+
480
+ def new_draw_bounding_boxes(image):
481
+ """Draw bounding boxes around detected text in the image and display it."""
482
+ # If the input is a string (file path), open the image
483
+ if isinstance(image, str):
484
+ img = Image.open(image)
485
+ np_img = np.array(img) # Convert to NumPy array
486
+ ocr_result = ocr.ocr(np_img, cls=True) # Perform OCR on the array
487
+ elif isinstance(image, Image.Image):
488
+ np_img = np.array(image) # Convert PIL Image to NumPy array
489
+ ocr_result = ocr.ocr(np_img, cls=True) # Perform OCR on the array
490
+ else:
491
+ raise ValueError("Input must be a file path or a PIL Image object.")
492
+
493
+ # Create a figure to display the image
494
+ plt.figure(figsize=(10, 10))
495
+ plt.imshow(image)
496
+ ax = plt.gca()
497
+ all_text_data = []
498
+
499
+ # Iterate through the OCR results and draw boxes
500
+ for idx, line in enumerate(ocr_result[0]):
501
+ box = line[0] # Get the bounding box coordinates
502
+ text = line[1][0] # Extracted text
503
+ print(f"[DEBUG] Box {idx + 1}: {text}") # Debug print
504
+ all_text_data.append(text)
505
+
506
+ # Draw the bounding box
507
+ polygon = plt.Polygon(box, fill=None, edgecolor='red', linewidth=2)
508
+ ax.add_patch(polygon)
509
+
510
+ # Add text label with a small offset for visibility
511
+ x, y = box[0][0], box[0][1]
512
+ ax.text(x, y - 5, f"{idx + 1}: {text}", color='blue', fontsize=12, ha='left')
513
+
514
+ plt.axis('off') # Hide axes
515
+ plt.title("Detected Text with Bounding Boxes", fontsize=16) # Add a title
516
+ plt.show()
517
+
518
+ return all_text_data
519
+
520
+ # Initialize PaddleOCR
521
+ ocr = PaddleOCR(use_angle_cls=True, lang='en')
522
+
523
+ def detect_and_ocr(image):
524
+ """Detect objects using YOLO, draw bounding boxes, and perform OCR."""
525
+ # Convert input image from PIL to OpenCV format
526
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
527
+
528
+ # Run inference using YOLO model
529
+ results = model(image)
530
+ boxes = results[0].boxes.xyxy.cpu().numpy() # Extract bounding box coordinates
531
+
532
+ extracted_texts = []
533
+ for (x1, y1, x2, y2) in boxes:
534
+ # Draw bounding box on the original image
535
+ cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
536
+
537
+ # Perform OCR on the detected region using the original image and bounding box coordinates
538
+ region = image[int(y1):int(y2), int(x1):int(x2)]
539
+ ocr_result = ocr.ocr(region, cls=True)
540
+
541
+ # Check if ocr_result is None or empty
542
+ if ocr_result and isinstance(ocr_result, list) and ocr_result[0]:
543
+ for idx, line in enumerate(ocr_result[0]):
544
+ box = line[0] # Get the bounding box coordinates
545
+ text = line[1][0] # Extracted text
546
+ print(f"[DEBUG] Box {idx + 1}: {text}") # Debug output
547
+ extracted_texts.append(text)
548
+ else:
549
+ # Handle case when OCR returns no result
550
+ print(f"[DEBUG] No OCR result for region: ({x1}, {y1}, {x2}, {y2}) or OCR returned None")
551
+ extracted_texts.append("No OCR result found") # Append a message to indicate no result
552
+
553
+ # Convert image to RGB for Gradio display
554
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
555
+
556
+ # Join all extracted texts into a single string
557
+ result_text = "\n".join(str(text) for text in extracted_texts)
558
+
559
+ # Call the Gemini context correction function
560
+ refined_text = gemini_context_correction(result_text)
561
+ print("[DEBUG] Gemini Refined Text:\n", refined_text)
562
+
563
+ # Validate and correct dates
564
+ validated_output = extract_and_validate_with_gemini(refined_text)
565
+
566
+ print("[DEBUG] Validated Output from Gemini:\n", validated_output)
567
+
568
+ # Return image with bounding boxes and results
569
+ return image_rgb, result_text, refined_text, validated_output
570
+
571
+ def further_processing(image, previous_result_text):
572
+ bounding_boxes_list = new_draw_bounding_boxes(image)
573
+ print("[DEBUG] ", bounding_boxes_list, type(bounding_boxes_list))
574
+ combined_text = previous_result_text
575
+ for text in bounding_boxes_list:
576
+ combined_text += text
577
+ combined_text += "\n"
578
+ print("[DEBUG] combined text", combined_text)
579
+ # Call Gemini for context correction and refinement
580
+ refined_output = gemini_context_correction(combined_text)
581
+ print("[DEBUG] Gemini Refined Output:\n", refined_output)
582
+
583
+ return refined_output # Return refined output for display
584
+
585
+ def handle_processing(validated_output):
586
+ """Decide whether to proceed with further processing."""
587
+ # Extract the manufacturing and expiration dates from the string
588
+ try:
589
+ mfg_date_str = validated_output.split("Manufacturing Date: ")[1].split(",")[0].strip()
590
+ exp_date_str = validated_output.split("Expiration Date: ")[1].strip()
591
+
592
+ # Convert the extracted values to integers
593
+ mfg_date = int(mfg_date_str)
594
+ exp_date = int(exp_date_str)
595
+ print("Further processing: ", mfg_date, exp_date)
596
+
597
+ except (IndexError, ValueError) as e:
598
+ print(f"[ERROR] Failed to parse dates: {e}")
599
+ return gr.update(visible=False) # Hide button on error
600
+
601
+ # Check if both dates are -1
602
+ if mfg_date == -1 and exp_date == -1:
603
+ print("[DEBUG] Showing the 'Further Processing' button.") # Debug print
604
+ return gr.update(visible=True) # Show 'Further Processing' button
605
+ print("[DEBUG] Hiding the 'Further Processing' button.") # Debug print
606
+ return gr.update(visible=False) # Hide button if dates are valid
607
+
608
+ """# 5. Frontend Of Brand Recognition
609
+
610
+ ## Layout for Image interface
611
+ """
612
+
613
+ def create_image_interface():
614
+ return gr.Interface(
615
+ fn=detect_grocery_items,
616
+ inputs=gr.Image(label="Upload Image", height=400, width=400),
617
+ outputs=[
618
+ gr.Image(label="Image with Bounding Boxes", height=400, width=400),
619
+ gr.Dataframe(headers=["Item", "Quantity", "Avg Confidence"], label="Detected Items and Quantities", elem_id="summary_table"),
620
+ gr.Textbox(label="Status", elem_id="status_message")
621
+ ],
622
+ title="Grocery Item Detection in an Image",
623
+ description="Upload an image for object detection. The model will return an annotated image, item quantities, and average confidence scores.",
624
+ css=".gr-table { font-size: 16px; text-align: left; width: 50%; margin: auto; } #summary_table { margin-top: 20px; }"
625
+ )
626
+
627
+ """## Layout For Video Interface"""
628
+
629
+ def create_video_interface():
630
+ return gr.Interface(
631
+ fn=annotate_video, # This is the function that processes the video and returns the results
632
+ inputs=gr.Video(label="Upload Video", height=400, width=400),
633
+ outputs=[
634
+ gr.Video(label="Annotated Video", height=400, width=400), # To display the annotated video
635
+ gr.Dataframe(headers=["Item", "Quantity"], label="Detected Items and Quantities", elem_id="summary_table"),
636
+ gr.Textbox(label="Status", elem_id="status_message") # Any additional status messages
637
+ ],
638
+ title="Grocery Item Detection in a Video",
639
+ description="Upload a video for object detection. The model will return an annotated video with bounding boxes and item quantities. Low confidence values may indicate incorrect detection.",
640
+ css="""
641
+ .gr-table { font-size: 16px; text-align: left; width: 50%; margin: auto; }
642
+ #summary_table { margin-top: 20px; }
643
+ """
644
+ )
645
+
646
+ def create_brand_recog_interface():
647
+ with gr.Blocks() as demo:
648
+ gr.Markdown("# Flipkart Grid Robotics Track - Brand Recognition Interface")
649
+
650
+ with gr.Tabs():
651
+ with gr.Tab("Image"):
652
+ create_image_interface()
653
+ with gr.Tab("Video"):
654
+ create_video_interface()
655
+ return demo
656
+
657
+ Brand_recog = create_brand_recog_interface()
658
+
659
+ """# Frontend Of OCR"""
660
+
661
+ def create_ocr_interface():
662
+ with gr.Blocks() as ocr_interface:
663
+ gr.Markdown("# Flipkart Grid Robotics Track - OCR Interface")
664
+
665
+ with gr.Tabs():
666
+ with gr.TabItem("Upload & Detection"):
667
+ with gr.Row():
668
+ # Input: Upload image
669
+ input_image = gr.Image(type="pil", label="Upload Image", height=400, width=400)
670
+ output_image = gr.Image(label="Image with Bounding Boxes", height=400, width=400)
671
+
672
+ # Button for Analyze Image & Extract Text
673
+ btn = gr.Button("Analyze Image & Extract Text")
674
+
675
+ with gr.TabItem("OCR Results"):
676
+ with gr.Row():
677
+ extracted_textbox = gr.Textbox(label="Extracted OCR Text", lines=5)
678
+ with gr.Row():
679
+ refined_textbox = gr.Textbox(label="Refined Text from Gemini", lines=5)
680
+ with gr.Row():
681
+ validated_textbox = gr.Textbox(label="Validated Output", lines=5)
682
+
683
+ # Comprehensive OCR button (Initially hidden)
684
+ further_button = gr.Button("Comprehensive OCR", visible=False)
685
+
686
+ # Detect and OCR button click event
687
+ btn.click(
688
+ detect_and_ocr,
689
+ inputs=[input_image],
690
+ outputs=[output_image, extracted_textbox, refined_textbox, validated_textbox]
691
+ )
692
+
693
+ # Further processing button click event
694
+ further_button.click(
695
+ further_processing,
696
+ inputs=[input_image, extracted_textbox],
697
+ outputs=refined_textbox
698
+ )
699
+
700
+ # Monitor validated output to control button visibility
701
+ refined_textbox.change(
702
+ handle_processing,
703
+ inputs=[validated_textbox],
704
+ outputs=[further_button]
705
+ )
706
+
707
+ # Hide the validated_textbox when "Comprehensive OCR" is clicked
708
+ further_button.click(
709
+ lambda: gr.update(visible=False),
710
+ outputs=[validated_textbox]
711
+ )
712
+
713
+ return ocr_interface
714
+
715
+ # Create and launch the OCR interface
716
+ ocr_interface = create_ocr_interface()
717
+ # ocr_interface.launch(share=True, debug=True)
718
+
719
+ """# 6. Create a Tabbed Interface for Both Image and Video
720
+ ### Here, we combine the image and video interfaces into a tabbed structure so users can switch between them easily.
721
+ """
722
+
723
+ def create_tabbed_interface():
724
+ return gr.TabbedInterface(
725
+ [Brand_recog, ocr_interface ],
726
+ ["Brand Recongnition", "OCR"]
727
+ )
728
+
729
+ tabbed_interface = create_tabbed_interface()
730
+
731
+ """# 7. Launch the Gradio Interface
732
+ ### Finally, launch the Gradio interface to make it interactable.
733
+ """
734
+
735
+ tabbed_interface.launch()
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio==3.40.1
2
+ opencv-python-headless==4.8.0.74
3
+ ultralytics==8.0.100
4
+ supervision==0.2.0
5
+ google-generativeai==0.1.0
6
+ paddleocr==2.6.1.3
7
+ paddlepaddle==2.5.2
8
+ numpy==1.23.5
9
+ Pillow==9.5.0
10
+ pandas==2.0.3
11
+ matplotlib==3.7.2