Spaces:

JARVIS-JI
/

ocr

Sleeping

File size: 3,846 Bytes

import gradio as gr
import easyocr
import google.generativeai as genai
import os, json, re
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter

# =========================
# 🔐 Load API Key
# =========================
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# =========================
# 🤖 Initialize Models
# =========================
reader = easyocr.Reader(['en'])
model = genai.GenerativeModel("gemini-3-flash-preview")

# =========================
# 🧼 Image Preprocessing
# =========================
def preprocess_image(image_path):
    img = Image.open(image_path).convert("L")  # grayscale
    img = ImageEnhance.Contrast(img).enhance(2)
    img = img.filter(ImageFilter.SHARPEN)
    return img

# =========================
# 🔍 OCR Extraction
# =========================
def extract_text(image):
    if not isinstance(image, np.ndarray):
        image = np.array(image)  # ✅ FIX

    result = reader.readtext(image, detail=0)

    text = "\n".join(result)

    # Clean text
    text = re.sub(r'\.{2,}', ' ', text)
    text = re.sub(r'\s+', ' ', text)

    return text[:4000]  # limit for cost/speed

# =========================
# 🧠 Prompt Builder
# =========================
def build_prompt(text):
    return f"""
You are a strict JSON generator.

Convert the menu text into VALID JSON.

FORMAT:
{{
  "categories": [
    {{
      "name": "Category Name",
      "items": [
        {{
          "name": "Item Name",
          "sizes": {{
            "Regular": 100
          }}
        }}
      ]
    }}
  ]
}}

RULES:
- Output ONLY JSON (no explanation)
- Detect categories properly
- Each item MUST have "name" and "sizes"
- If 1 price → "Regular"
- If 2 prices → infer labels (Half/Full, Plain/Butter, Gravy/Dry)
- Fix OCR mistakes intelligently

MENU TEXT:
{text}
"""

# =========================
# 🧹 Extract JSON safely
# =========================
def extract_json(response_text):
    try:
        start = response_text.index("{")
        end = response_text.rindex("}") + 1
        return json.loads(response_text[start:end])
    except:
        return None

# =========================
# ✅ Validate Output
# =========================
def validate_schema(data):
    if not data or "categories" not in data:
        return False

    for cat in data["categories"]:
        if "name" not in cat or "items" not in cat:
            return False
        for item in cat["items"]:
            if "name" not in item or "sizes" not in item:
                return False

    return True

# =========================
# 🔁 Gemini Call with Retry
# =========================
def generate_json(text):
    for _ in range(3):  # retry
        prompt = build_prompt(text)

        try:
            response = model.generate_content(prompt)
            parsed = extract_json(response.text)

            if validate_schema(parsed):
                return parsed

        except Exception as e:
            print("Gemini Error:", e)

    return {"error": "Failed to generate valid JSON"}

# =========================
# 🚀 Main Pipeline
# =========================
def process(image_path):
    try:
        img = preprocess_image(image_path)
        text = extract_text(img)

        print("OCR TEXT:", text[:300])  # debug log

        result = generate_json(text)

        return json.dumps(result, indent=2)

    except Exception as e:
        return json.dumps({"error": str(e)}, indent=2)

# =========================
# 🎨 Gradio UI
# =========================
app = gr.Interface(
    fn=process,
    inputs=gr.Image(type="filepath"),
    outputs=gr.Code(language="json"),
    title="🍽️ Menu Image → JSON (Production Ready)",
    description="Upload a restaurant menu image and get structured JSON using OCR + Gemini"
)

# Queue for stability
app.queue(max_size=10)

app.launch()