import gradio as gr import easyocr import google.generativeai as genai import os, json, re import numpy as np from PIL import Image, ImageEnhance, ImageFilter # ========================= # ๐Ÿ” Load API Key # ========================= genai.configure(api_key=os.getenv("GEMINI_API_KEY")) # ========================= # ๐Ÿค– Initialize Models # ========================= reader = easyocr.Reader(['en']) model = genai.GenerativeModel("gemini-3-flash-preview") # ========================= # ๐Ÿงผ Image Preprocessing # ========================= def preprocess_image(image_path): img = Image.open(image_path).convert("L") # grayscale img = ImageEnhance.Contrast(img).enhance(2) img = img.filter(ImageFilter.SHARPEN) return img # ========================= # ๐Ÿ” OCR Extraction # ========================= def extract_text(image): if not isinstance(image, np.ndarray): image = np.array(image) # โœ… FIX result = reader.readtext(image, detail=0) text = "\n".join(result) # Clean text text = re.sub(r'\.{2,}', ' ', text) text = re.sub(r'\s+', ' ', text) return text[:4000] # limit for cost/speed # ========================= # ๐Ÿง  Prompt Builder # ========================= def build_prompt(text): return f""" You are a strict JSON generator. Convert the menu text into VALID JSON. FORMAT: {{ "categories": [ {{ "name": "Category Name", "items": [ {{ "name": "Item Name", "sizes": {{ "Regular": 100 }} }} ] }} ] }} RULES: - Output ONLY JSON (no explanation) - Detect categories properly - Each item MUST have "name" and "sizes" - If 1 price โ†’ "Regular" - If 2 prices โ†’ infer labels (Half/Full, Plain/Butter, Gravy/Dry) - Fix OCR mistakes intelligently MENU TEXT: {text} """ # ========================= # ๐Ÿงน Extract JSON safely # ========================= def extract_json(response_text): try: start = response_text.index("{") end = response_text.rindex("}") + 1 return json.loads(response_text[start:end]) except: return None # ========================= # โœ… Validate Output # ========================= def validate_schema(data): if not data or "categories" not in data: return False for cat in data["categories"]: if "name" not in cat or "items" not in cat: return False for item in cat["items"]: if "name" not in item or "sizes" not in item: return False return True # ========================= # ๐Ÿ” Gemini Call with Retry # ========================= def generate_json(text): for _ in range(3): # retry prompt = build_prompt(text) try: response = model.generate_content(prompt) parsed = extract_json(response.text) if validate_schema(parsed): return parsed except Exception as e: print("Gemini Error:", e) return {"error": "Failed to generate valid JSON"} # ========================= # ๐Ÿš€ Main Pipeline # ========================= def process(image_path): try: img = preprocess_image(image_path) text = extract_text(img) print("OCR TEXT:", text[:300]) # debug log result = generate_json(text) return json.dumps(result, indent=2) except Exception as e: return json.dumps({"error": str(e)}, indent=2) # ========================= # ๐ŸŽจ Gradio UI # ========================= app = gr.Interface( fn=process, inputs=gr.Image(type="filepath"), outputs=gr.Code(language="json"), title="๐Ÿฝ๏ธ Menu Image โ†’ JSON (Production Ready)", description="Upload a restaurant menu image and get structured JSON using OCR + Gemini" ) # Queue for stability app.queue(max_size=10) app.launch()