Spaces:

Abhinav-hf
/

DeepLearning_Model_Deployment

Sleeping

App Files Files Community

Abhinav Deshpande commited on Oct 18, 2024

Commit

a08ab73

unverified ·

1 Parent(s): 9c468fa

Add files

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +735 -0
requirements.txt +11 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ secrets.env

app.py ADDED Viewed

	@@ -0,0 +1,735 @@

+# -*- coding: utf-8 -*-
+"""Flipkart Frontend.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/github/Abhinav-gh/404NotFound/blob/main/Flipkart%20Frontend.ipynb
+# 1. Install Gradio and Required Libraries
+### Start by installing Gradio if it's not already installed.
+"""
+# ! pip install gradio
+# ! pip install cv
+# ! pip install ultralytics
+# ! pip install supervision
+# !pip install google-generativeai
+# !pip install paddleocr
+# !pip install paddlepaddle
+"""# 2. Import Libraries
+### Getting all the necessary Libraries
+"""
+import gradio as gr
+import random
+import numpy as np
+from PIL import Image
+import cv2
+import time
+from ultralytics import YOLO
+import supervision as sv
+import pandas as pd
+from google.colab.patches import cv2_imshow
+from IPython.display import clear_output
+from collections import defaultdict, deque
+import matplotlib.pyplot as plt
+import google.generativeai as genai
+from google.colab import userdata
+from datetime import datetime
+from paddleocr import PaddleOCR
+from google.colab import files
+import os
+"""# Path Variables
+### Path used in OCR
+"""
+OCR_M3="Model3_best.pt"
+GOOGLE_API_KEY = os.getenv("GEMINI_API")
+GEMINI_MODEL = 'models/gemini-1.5-flash'
+"""### Path used in  Brand Recognition  model"""
+Brand_Recognition_Model ='kitkat_s.pt'
+annotatedOpFile= 'annotated_output.mp4'
+"""# 3. Import Drive
+"""
+# from google.colab import drive
+# drive.mount('/content/drive')
+"""# 4. Brand Recognition Backend
+### Model for Grocery Detection
+"""
+model_path = Brand_Recognition_Model
+model = YOLO(model_path)
+"""### Image uploading for Grocery detection"""
+def detect_grocery_items(image):
+    image = np.array(image)[:, :, ::-1]
+    results = model(image)
+    annotated_image = results[0].plot()
+    class_ids = results[0].boxes.cls.cpu().numpy()
+    confidences = results[0].boxes.conf.cpu().numpy()
+    threshold = 0.4
+    class_counts = {}
+    class_confidences = {}
+    for i, class_id in enumerate(class_ids):
+        confidence = confidences[i]
+        if confidence >= threshold:
+            class_name = model.names[int(class_id)]
+            if class_name in class_counts:
+                class_counts[class_name] += 1
+            else:
+                class_counts[class_name] = 1
+            if class_name in class_confidences:
+                class_confidences[class_name].append(confidence)
+            else:
+                class_confidences[class_name] = [confidence]
+    if not class_counts:
+        return image, [], "The model failed to recognize items or the image may contain untrained objects."
+    summary_table = [[class_name, count, f"{np.mean(class_confidences[class_name]):.2f}"]
+                     for class_name, count in class_counts.items()]
+    annotated_image_rgb = annotated_image[:, :, ::-1]
+    return annotated_image_rgb, summary_table, "Object Recognised Successfully 🥳 "
+"""### Detect Grovcery brand from video"""
+def iou(box1, box2):
+    # Calculate intersection over union
+    x1 = max(box1[0], box2[0])
+    y1 = max(box1[1], box2[1])
+    x2 = min(box1[2], box2[2])
+    y2 = min(box1[3], box2[3])
+    intersection = max(0, x2 - x1) * max(0, y2 - y1)
+    area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    iou = intersection / float(area1 + area2 - intersection)
+    return iou
+def smooth_box(box_history):
+    if not box_history:
+        return None
+    return np.mean(box_history, axis=0)
+def process_video(input_path, output_path):
+    cap = cv2.VideoCapture(input_path)
+    # Get video properties
+    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    fps = int(cap.get(cv2.CAP_PROP_FPS))
+    # Initialize video writer
+    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
+    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
+    # Initialize variables for tracking
+    detected_items = {}
+    frame_count = 0
+    # For result confirmation
+    detections_history = defaultdict(lambda: defaultdict(int))
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
+            break
+        frame_count += 1
+        # Run YOLO detection every 5th frame
+        if frame_count % 5 == 0:
+            results = model(frame)
+            current_frame_detections = []
+            for r in results:
+                boxes = r.boxes
+                for box in boxes:
+                    x1, y1, x2, y2 = box.xyxy[0].tolist()
+                    conf = box.conf.item()
+                    cls = int(box.cls.item())
+                    brand = model.names[cls]
+                    current_frame_detections.append((brand, [x1, y1, x2, y2], conf))
+            # Match current detections with existing items
+            for brand, box, conf in current_frame_detections:
+                matched = False
+                for item_id, item_info in detected_items.items():
+                    if iou(box, item_info['smoothed_box']) > 0.5:
+                        item_info['frames_detected'] += 1
+                        item_info['total_conf'] += conf
+                        item_info['box_history'].append(box)
+                        if len(item_info['box_history']) > 10:
+                            item_info['box_history'].popleft()
+                        item_info['smoothed_box'] = smooth_box(item_info['box_history'])
+                        item_info['last_seen'] = frame_count
+                        matched = True
+                        break
+                if not matched:
+                    item_id = len(detected_items)
+                    detected_items[item_id] = {
+                        'brand': brand,
+                        'box_history': deque([box], maxlen=10),
+                        'smoothed_box': box,
+                        'frames_detected': 1,
+                        'total_conf': conf,
+                        'last_seen': frame_count
+                    }
+                detections_history[brand][frame_count] += 1
+        for item_id, item_info in list(detected_items.items()):
+            if frame_count - item_info['last_seen'] > fps * 2:  # 2 seconds
+                del detected_items[item_id]
+                continue
+            # Interpolate box position
+            if item_info['smoothed_box'] is not None:
+                alpha = 0.3
+                current_box = item_info['smoothed_box']
+                target_box = item_info['box_history'][-1] if item_info['box_history'] else current_box
+                interpolated_box = [
+                    current_box[i] * (1 - alpha) + target_box[i] * alpha
+                    for i in range(4)
+                ]
+                item_info['smoothed_box'] = interpolated_box
+                x1, y1, x2, y2 = map(int, interpolated_box)
+                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
+                cv2.putText(frame, f"{item_info['brand']}",
+                            (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
+        out.write(frame)
+    cap.release()
+    out.release()
+    # Calculate final counts and confirm results
+    total_frames = frame_count
+    confirmed_items = {}
+    for brand, frame_counts in detections_history.items():
+        detection_frames = len(frame_counts)
+        if detection_frames > total_frames * 0.1:
+            avg_count = sum(frame_counts.values()) / detection_frames
+            confirmed_items[brand] = round(avg_count)
+    return confirmed_items
+def annotate_video(input_video):
+    output_path = annotatedOpFile
+    confirmed_items = process_video(input_video, output_path)
+    item_list = [(brand, quantity) for brand, quantity in confirmed_items.items()]
+    status_message = "Video processed successfully!"
+    return output_path, item_list, status_message
+"""# 5. OCR Backend
+### The PaddleOCR + Gemini combined type model.
+Run these 3 cells before trying out any model
+"""
+# Function to draw bounding boxes and show text
+def draw_bounding_boxes(image_path):
+    # Read the image
+    img = Image.open(image_path)
+    result = ocr.ocr(image_path, cls=True)  # Get the OCR result
+    # Create a figure to display the image
+    plt.figure(figsize=(10, 10))
+    plt.imshow(img)
+    ax = plt.gca()
+    all_text_data = []
+    # Iterate through the results and draw boxes
+    for idx, line in enumerate(result[0]):
+        box = line[0]  # Get the bounding box coordinates
+        text = line[1][0]  # Extracted text
+        print(f"[DEBUG] Box {idx + 1}: {text}")  # Display text with box number
+        all_text_data.append(f"{text}")
+        # Draw the bounding box
+        polygon = plt.Polygon(box, fill=None, edgecolor='red', linewidth=2)
+        ax.add_patch(polygon)
+        # Add text label in the box
+        # ax.text(box[0][0], box[0][1] - 5, f"{idx + 1}: {text}", color='blue', fontsize=12)
+    plt.axis('off')  # Hide axes
+    plt.show()
+    return all_text_data
+# Set your API key securely (store it in Colab’s userdata)
+genai.configure(api_key=GOOGLE_API_KEY)
+def gemini_context_correction(text):
+    """Use Gemini API to refine noisy OCR results and extract MRP details."""
+    model = genai.GenerativeModel('models/gemini-1.5-flash')
+    response = model.generate_content(
+        f"Identify and extract manufacturing, expiration dates, and MRP from the following text. "
+        f"The dates may be written in dd/mm/yyyy format or as <Month_name> <Year> or <day> <Month_Name> <Year>. "
+        f"The text may contain noise or unclear information. If only one date is provided, assume it is the Expiration Date. "
+        f"Additionally, extract the MRP (e.g., 'MRP: ₹99.00', 'Rs. 99/-'). "
+        f"Format the output as:\n"
+        f"Manufacturing Date: <MFG Date>\n"
+        f"Expiration Date: <EXP Date>\n"
+        f"MRP: <MRP Value>\n\n"
+        f"Here is the text: {text}"
+    )
+    return response.text
+# Test Gemini with example text (replace with actual OCR output)
+sample_text = "EXP 12/2024 MFD 08/2023 Best Before 06/2025 MRP Rs. 250/-"
+refined_output = gemini_context_correction(sample_text)
+print("[DEBUG] Gemini Refined Output:\n", refined_output)
+def validate_dates_with_gemini(mfg_date, exp_date):
+    """Use Gemini API to validate and correct the manufacturing and expiration dates."""
+    model = genai.GenerativeModel(GEMINI_MODEL)
+    response = model.generate_content = (
+        f"Input Manufacturing Date: {mfg_date}, Expiration Date: {exp_date}. "
+        f"If either date is '-1', leave it as is. "
+        f"1. If the expiration date is earlier than the manufacturing date, swap them. "
+        f"2. If both dates are logically incorrect, suggest new valid dates based on typical timeframes. "
+        f"Always respond ONLY in the format:\n"
+        f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
+    )
+    # Check if the response contains valid parts
+    if response.parts:
+        # Process the response to extract final dates
+        final_dates = response.parts[0].text.strip()
+        return final_dates
+    # Return a message or a default value if no valid parts are found
+    return "Invalid response from Gemini API."
+def extract_and_validate_with_gemini(refined_text):
+    """
+    Use Gemini API to extract, validate, and correct manufacturing and expiration dates.
+    """
+    model = genai.GenerativeModel(GEMINI_MODEL)
+    # Correctly call the generate_content method
+    response = model.generate_content(
+         f"The extracted text is:\n'{refined_text}'\n\n"
+        f"1. Extract the 'Manufacturing Date' and 'Expiration Date' from the above text. "
+        f"Ignore unrelated data (e.g., 'MRP: Not Found').\n"
+        f"2. If a date is missing or invalid, return -1 for that date.\n"
+        f"3. If the 'Expiration Date' is earlier than the 'Manufacturing Date', swap them.\n"
+        f"4. Ensure both dates are in 'dd/mm/yyyy' format. If the original dates are not in this format, convert them.\n"
+        f"Respond ONLY in this exact format:\n"
+        f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
+    )
+    print("[DEBUG] Response from validation function", response)
+    # Ensure the response object is valid and contains the required parts
+    if hasattr(response, 'parts') and response.parts:
+        final_dates = response.parts[0].text.strip()
+        print(f"[DEBUG] Gemini Response: {final_dates}")
+        # Extract the dates from the response
+        mfg_date_str, exp_date_str = parse_gemini_response(final_dates)
+        # Process and swap if necessary
+        if mfg_date_str != "-1" and exp_date_str != "-1":
+            mfg_date = datetime.strptime(mfg_date_str, "%Y/%m/%d")
+            exp_date = datetime.strptime(exp_date_str, "%Y/%m/%d")
+            # Swap if Expiration Date is earlier than Manufacturing Date
+            if exp_date < mfg_date:
+                print("[DEBUG] Swapping dates.")
+                mfg_date, exp_date = exp_date, mfg_date
+            # Return the formatted swapped dates
+            return (
+                f"Manufacturing Date: {mfg_date.strftime('%Y/%m/%d')}, "
+                f"Expiration Date: {exp_date.strftime('%Y/%m/%d')}"
+            )
+        # If either date is -1, return them as-is
+        return final_dates
+    # Handle invalid responses gracefully
+    print("[ERROR] Invalid response from Gemini API.")
+    return "Invalid response from Gemini API."
+def extract_and_validate_with_gemini(refined_text):
+    """
+    Use Gemini API to extract, validate, correct, and swap dates in 'yyyy/mm/dd' format if necessary.
+    """
+    model = genai.GenerativeModel(GEMINI_MODEL)
+    # Generate content using Gemini with the refined prompt
+    response = model.generate_content(
+        f"The extracted text is:\n'{refined_text}'\n\n"
+        f"1. Extract the 'Manufacturing Date' and 'Expiration Date' from the above text. "
+        f"Ignore unrelated data (e.g., 'MRP: Not Found').\n"
+        f"2. If a date is missing or invalid, return -1 for that date.\n"
+        f"3. If the 'Expiration Date' is earlier than the 'Manufacturing Date', swap them.\n"
+        f"4. Ensure both dates are in 'dd/mm/yyyy' format. If the original dates are not in this format, convert them.\n"
+        f"Respond ONLY in this exact format:\n"
+        f"Manufacturing Date: <MFG Date>, Expiration Date: <EXP Date>"
+    )
+    # Validate the response and extract dates
+    if hasattr(response, 'parts') and response.parts:
+        final_dates = response.parts[0].text.strip()
+        print(f"[DEBUG] Gemini Response: {final_dates}")
+        # Extract the dates from the response
+        mfg_date_str, exp_date_str = parse_gemini_response(final_dates)
+        # Process and swap if necessary
+        if mfg_date_str != "-1" and exp_date_str != "-1":
+            mfg_date = datetime.strptime(mfg_date_str, "%d/%m/%Y")
+            exp_date = datetime.strptime(exp_date_str, "%d/%m/%Y")
+            # Swap if Expiration Date is earlier than Manufacturing Date
+            swapping_statement = ""
+            if exp_date < mfg_date:
+                print("[DEBUG] Swapping dates.")
+                mfg_date, exp_date = exp_date, mfg_date
+                swapping_statement = "Corrected Dates: \n"
+            # Return the formatted swapped dates
+            return swapping_statement + (
+                f"Manufacturing Date: {mfg_date.strftime('%d/%m/%Y')}, "
+                f"Expiration Date: {exp_date.strftime('%d/%m/%Y')}"
+            )
+        # If either date is -1, return them as-is
+        return final_dates
+    # Handle invalid responses gracefully
+    print("[ERROR] Invalid response from Gemini API.")
+    return "Invalid response from Gemini API."
+def parse_gemini_response(response_text):
+    """
+    Helper function to extract Manufacturing Date and Expiration Date from the response text.
+    """
+    try:
+        # Split and extract the dates
+        parts = response_text.split(", ")
+        mfg_date_str = parts[0].split(": ")[1].strip()
+        exp_date_str = parts[1].split(": ")[1].strip()
+        return mfg_date_str, exp_date_str
+    except IndexError:
+        print("[ERROR] Failed to parse Gemini response.")
+        return "-1", "-1"
+def extract_date(refined_text, date_type):
+    """Extract the specified date type from the refined text."""
+    if date_type in refined_text:
+        try:
+            # Split the text and find the date for the specified type
+            parts = refined_text.split(',')
+            for part in parts:
+                if date_type in part:
+                    return part.split(':')[1].strip()  # Return the date value
+        except IndexError:
+            return '-1'  # Return -1 if the date is not found
+    return '-1'  # Return -1 if the date type is not in the text
+"""### **Model 3**
+Using Yolov8 x-large model trained till about 75 epochs
+and
+Gradio as user interface
+(in case model fails, we fall back to the approach from model 1)
+"""
+model_path = OCR_M3
+model = YOLO(model_path)
+"""## Driver code to be run after selecting from Model 2 or 3.
+(Note: not needed for model 1)
+"""
+def new_draw_bounding_boxes(image):
+    """Draw bounding boxes around detected text in the image and display it."""
+    # If the input is a string (file path), open the image
+    if isinstance(image, str):
+        img = Image.open(image)
+        np_img = np.array(img)  # Convert to NumPy array
+        ocr_result = ocr.ocr(np_img, cls=True)  # Perform OCR on the array
+    elif isinstance(image, Image.Image):
+        np_img = np.array(image)  # Convert PIL Image to NumPy array
+        ocr_result = ocr.ocr(np_img, cls=True)  # Perform OCR on the array
+    else:
+        raise ValueError("Input must be a file path or a PIL Image object.")
+    # Create a figure to display the image
+    plt.figure(figsize=(10, 10))
+    plt.imshow(image)
+    ax = plt.gca()
+    all_text_data = []
+    # Iterate through the OCR results and draw boxes
+    for idx, line in enumerate(ocr_result[0]):
+        box = line[0]  # Get the bounding box coordinates
+        text = line[1][0]  # Extracted text
+        print(f"[DEBUG] Box {idx + 1}: {text}")  # Debug print
+        all_text_data.append(text)
+        # Draw the bounding box
+        polygon = plt.Polygon(box, fill=None, edgecolor='red', linewidth=2)
+        ax.add_patch(polygon)
+        # Add text label with a small offset for visibility
+        x, y = box[0][0], box[0][1]
+        ax.text(x, y - 5, f"{idx + 1}: {text}", color='blue', fontsize=12, ha='left')
+    plt.axis('off')  # Hide axes
+    plt.title("Detected Text with Bounding Boxes", fontsize=16)  # Add a title
+    plt.show()
+    return all_text_data
+# Initialize PaddleOCR
+ocr = PaddleOCR(use_angle_cls=True, lang='en')
+def detect_and_ocr(image):
+    """Detect objects using YOLO, draw bounding boxes, and perform OCR."""
+    # Convert input image from PIL to OpenCV format
+    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
+    # Run inference using YOLO model
+    results = model(image)
+    boxes = results[0].boxes.xyxy.cpu().numpy()  # Extract bounding box coordinates
+    extracted_texts = []
+    for (x1, y1, x2, y2) in boxes:
+        # Draw bounding box on the original image
+        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
+        # Perform OCR on the detected region using the original image and bounding box coordinates
+        region = image[int(y1):int(y2), int(x1):int(x2)]
+        ocr_result = ocr.ocr(region, cls=True)
+        # Check if ocr_result is None or empty
+        if ocr_result and isinstance(ocr_result, list) and ocr_result[0]:
+            for idx, line in enumerate(ocr_result[0]):
+                box = line[0]  # Get the bounding box coordinates
+                text = line[1][0]  # Extracted text
+                print(f"[DEBUG] Box {idx + 1}: {text}")  # Debug output
+                extracted_texts.append(text)
+        else:
+            # Handle case when OCR returns no result
+            print(f"[DEBUG] No OCR result for region: ({x1}, {y1}, {x2}, {y2}) or OCR returned None")
+            extracted_texts.append("No OCR result found")  # Append a message to indicate no result
+    # Convert image to RGB for Gradio display
+    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+    # Join all extracted texts into a single string
+    result_text = "\n".join(str(text) for text in extracted_texts)
+    # Call the Gemini context correction function
+    refined_text = gemini_context_correction(result_text)
+    print("[DEBUG] Gemini Refined Text:\n", refined_text)
+    # Validate and correct dates
+    validated_output = extract_and_validate_with_gemini(refined_text)
+    print("[DEBUG] Validated Output from Gemini:\n", validated_output)
+    # Return image with bounding boxes and results
+    return image_rgb, result_text, refined_text, validated_output
+def further_processing(image, previous_result_text):
+    bounding_boxes_list = new_draw_bounding_boxes(image)
+    print("[DEBUG] ", bounding_boxes_list, type(bounding_boxes_list))
+    combined_text = previous_result_text
+    for text in bounding_boxes_list:
+        combined_text += text
+        combined_text += "\n"
+    print("[DEBUG] combined text", combined_text)
+    # Call Gemini for context correction and refinement
+    refined_output = gemini_context_correction(combined_text)
+    print("[DEBUG] Gemini Refined Output:\n", refined_output)
+    return refined_output   # Return refined output for display
+def handle_processing(validated_output):
+    """Decide whether to proceed with further processing."""
+    # Extract the manufacturing and expiration dates from the string
+    try:
+        mfg_date_str = validated_output.split("Manufacturing Date: ")[1].split(",")[0].strip()
+        exp_date_str = validated_output.split("Expiration Date: ")[1].strip()
+        # Convert the extracted values to integers
+        mfg_date = int(mfg_date_str)
+        exp_date = int(exp_date_str)
+        print("Further processing: ", mfg_date, exp_date)
+    except (IndexError, ValueError) as e:
+        print(f"[ERROR] Failed to parse dates: {e}")
+        return gr.update(visible=False)  # Hide button on error
+    # Check if both dates are -1
+    if mfg_date == -1 and exp_date == -1:
+        print("[DEBUG] Showing the 'Further Processing' button.")  # Debug print
+        return gr.update(visible=True)  # Show 'Further Processing' button
+    print("[DEBUG] Hiding the 'Further Processing' button.")  # Debug print
+    return gr.update(visible=False)  # Hide button if dates are valid
+"""# 5. Frontend Of Brand Recognition
+## Layout for Image interface
+"""
+def create_image_interface():
+    return gr.Interface(
+        fn=detect_grocery_items,
+        inputs=gr.Image(label="Upload Image", height=400, width=400),
+        outputs=[
+            gr.Image(label="Image with Bounding Boxes", height=400, width=400),
+            gr.Dataframe(headers=["Item", "Quantity", "Avg Confidence"], label="Detected Items and Quantities", elem_id="summary_table"),
+            gr.Textbox(label="Status", elem_id="status_message")
+        ],
+        title="Grocery Item Detection in an Image",
+        description="Upload an image for object detection. The model will return an annotated image, item quantities, and average confidence scores.",
+        css=".gr-table { font-size: 16px; text-align: left; width: 50%; margin: auto; } #summary_table { margin-top: 20px; }"
+    )
+"""## Layout For Video Interface"""
+def create_video_interface():
+    return gr.Interface(
+        fn=annotate_video,  # This is the function that processes the video and returns the results
+        inputs=gr.Video(label="Upload Video", height=400, width=400),
+        outputs=[
+            gr.Video(label="Annotated Video", height=400, width=400),  # To display the annotated video
+            gr.Dataframe(headers=["Item", "Quantity"], label="Detected Items and Quantities", elem_id="summary_table"),
+            gr.Textbox(label="Status", elem_id="status_message")  # Any additional status messages
+        ],
+        title="Grocery Item Detection in a Video",
+        description="Upload a video for object detection. The model will return an annotated video with bounding boxes and item quantities. Low confidence values may indicate incorrect detection.",
+        css="""
+            .gr-table { font-size: 16px; text-align: left; width: 50%; margin: auto; }
+            #summary_table { margin-top: 20px; }
+        """
+    )
+def create_brand_recog_interface():
+    with gr.Blocks() as demo:
+        gr.Markdown("# Flipkart Grid Robotics Track - Brand Recognition Interface")
+        with gr.Tabs():
+            with gr.Tab("Image"):
+                create_image_interface()
+            with gr.Tab("Video"):
+                create_video_interface()
+    return demo
+Brand_recog = create_brand_recog_interface()
+"""# Frontend Of OCR"""
+def create_ocr_interface():
+    with gr.Blocks() as ocr_interface:
+        gr.Markdown("# Flipkart Grid Robotics Track - OCR Interface")
+        with gr.Tabs():
+            with gr.TabItem("Upload & Detection"):
+                with gr.Row():
+                    # Input: Upload image
+                    input_image = gr.Image(type="pil", label="Upload Image", height=400, width=400)
+                    output_image = gr.Image(label="Image with Bounding Boxes", height=400, width=400)
+                # Button for Analyze Image & Extract Text
+                btn = gr.Button("Analyze Image & Extract Text")
+            with gr.TabItem("OCR Results"):
+                with gr.Row():
+                    extracted_textbox = gr.Textbox(label="Extracted OCR Text", lines=5)
+                with gr.Row():
+                    refined_textbox = gr.Textbox(label="Refined Text from Gemini", lines=5)
+                with gr.Row():
+                    validated_textbox = gr.Textbox(label="Validated Output", lines=5)
+                # Comprehensive OCR button (Initially hidden)
+                further_button = gr.Button("Comprehensive OCR", visible=False)
+        # Detect and OCR button click event
+        btn.click(
+            detect_and_ocr,
+            inputs=[input_image],
+            outputs=[output_image, extracted_textbox, refined_textbox, validated_textbox]
+        )
+        # Further processing button click event
+        further_button.click(
+            further_processing,
+            inputs=[input_image, extracted_textbox],
+            outputs=refined_textbox
+        )
+        # Monitor validated output to control button visibility
+        refined_textbox.change(
+            handle_processing,
+            inputs=[validated_textbox],
+            outputs=[further_button]
+        )
+        # Hide the validated_textbox when "Comprehensive OCR" is clicked
+        further_button.click(
+            lambda: gr.update(visible=False),
+            outputs=[validated_textbox]
+        )
+    return ocr_interface
+# Create and launch the OCR interface
+ocr_interface = create_ocr_interface()
+# ocr_interface.launch(share=True, debug=True)
+"""# 6. Create a Tabbed Interface for Both Image and Video
+### Here, we combine the image and video interfaces into a tabbed structure so users can switch between them easily.
+"""
+def create_tabbed_interface():
+    return gr.TabbedInterface(
+        [Brand_recog,  ocr_interface ],
+        ["Brand Recongnition", "OCR"]
+    )
+tabbed_interface = create_tabbed_interface()
+"""# 7. Launch the Gradio Interface
+### Finally, launch the Gradio interface to make it interactable.
+"""
+tabbed_interface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+gradio==3.40.1
+opencv-python-headless==4.8.0.74
+ultralytics==8.0.100
+supervision==0.2.0
+google-generativeai==0.1.0
+paddleocr==2.6.1.3
+paddlepaddle==2.5.2
+numpy==1.23.5
+Pillow==9.5.0
+pandas==2.0.3
+matplotlib==3.7.2