File size: 3,846 Bytes
31e936b
 
 
 
652a43e
31e936b
 
652a43e
 
 
31e936b
 
652a43e
 
 
31e936b
c4025d8
31e936b
652a43e
 
 
 
 
31e936b
 
 
 
652a43e
 
 
31e936b
652a43e
 
 
31e936b
652a43e
31e936b
 
652a43e
31e936b
 
 
652a43e
31e936b
652a43e
 
 
31e936b
 
 
 
652a43e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31e936b
 
652a43e
 
 
 
 
 
 
 
31e936b
 
 
652a43e
 
 
 
31e936b
652a43e
 
 
31e936b
 
 
652a43e
 
 
31e936b
 
 
 
652a43e
 
 
 
 
 
 
 
31e936b
652a43e
 
 
31e936b
652a43e
31e936b
 
652a43e
 
 
 
 
 
31e936b
652a43e
 
31e936b
 
 
652a43e
 
 
 
 
 
 
 
 
31e936b
652a43e
31e936b
652a43e
31e936b
652a43e
 
31e936b
652a43e
 
 
31e936b
 
 
 
652a43e
 
31e936b
 
652a43e
 
 
31e936b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import easyocr
import google.generativeai as genai
import os, json, re
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter

# =========================
# πŸ” Load API Key
# =========================
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# =========================
# πŸ€– Initialize Models
# =========================
reader = easyocr.Reader(['en'])
model = genai.GenerativeModel("gemini-3-flash-preview")

# =========================
# 🧼 Image Preprocessing
# =========================
def preprocess_image(image_path):
    img = Image.open(image_path).convert("L")  # grayscale
    img = ImageEnhance.Contrast(img).enhance(2)
    img = img.filter(ImageFilter.SHARPEN)
    return img

# =========================
# πŸ” OCR Extraction
# =========================
def extract_text(image):
    if not isinstance(image, np.ndarray):
        image = np.array(image)  # βœ… FIX

    result = reader.readtext(image, detail=0)

    text = "\n".join(result)

    # Clean text
    text = re.sub(r'\.{2,}', ' ', text)
    text = re.sub(r'\s+', ' ', text)

    return text[:4000]  # limit for cost/speed

# =========================
# 🧠 Prompt Builder
# =========================
def build_prompt(text):
    return f"""
You are a strict JSON generator.

Convert the menu text into VALID JSON.

FORMAT:
{{
  "categories": [
    {{
      "name": "Category Name",
      "items": [
        {{
          "name": "Item Name",
          "sizes": {{
            "Regular": 100
          }}
        }}
      ]
    }}
  ]
}}

RULES:
- Output ONLY JSON (no explanation)
- Detect categories properly
- Each item MUST have "name" and "sizes"
- If 1 price β†’ "Regular"
- If 2 prices β†’ infer labels (Half/Full, Plain/Butter, Gravy/Dry)
- Fix OCR mistakes intelligently

MENU TEXT:
{text}
"""

# =========================
# 🧹 Extract JSON safely
# =========================
def extract_json(response_text):
    try:
        start = response_text.index("{")
        end = response_text.rindex("}") + 1
        return json.loads(response_text[start:end])
    except:
        return None

# =========================
# βœ… Validate Output
# =========================
def validate_schema(data):
    if not data or "categories" not in data:
        return False

    for cat in data["categories"]:
        if "name" not in cat or "items" not in cat:
            return False
        for item in cat["items"]:
            if "name" not in item or "sizes" not in item:
                return False

    return True

# =========================
# πŸ” Gemini Call with Retry
# =========================
def generate_json(text):
    for _ in range(3):  # retry
        prompt = build_prompt(text)

        try:
            response = model.generate_content(prompt)
            parsed = extract_json(response.text)

            if validate_schema(parsed):
                return parsed

        except Exception as e:
            print("Gemini Error:", e)

    return {"error": "Failed to generate valid JSON"}

# =========================
# πŸš€ Main Pipeline
# =========================
def process(image_path):
    try:
        img = preprocess_image(image_path)
        text = extract_text(img)

        print("OCR TEXT:", text[:300])  # debug log

        result = generate_json(text)

        return json.dumps(result, indent=2)

    except Exception as e:
        return json.dumps({"error": str(e)}, indent=2)

# =========================
# 🎨 Gradio UI
# =========================
app = gr.Interface(
    fn=process,
    inputs=gr.Image(type="filepath"),
    outputs=gr.Code(language="json"),
    title="🍽️ Menu Image β†’ JSON (Production Ready)",
    description="Upload a restaurant menu image and get structured JSON using OCR + Gemini"
)

# Queue for stability
app.queue(max_size=10)

app.launch()