ocr / app.py
JARVIS-JI's picture
Update app.py
c4025d8 verified
import gradio as gr
import easyocr
import google.generativeai as genai
import os, json, re
import numpy as np
from PIL import Image, ImageEnhance, ImageFilter
# =========================
# πŸ” Load API Key
# =========================
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
# =========================
# πŸ€– Initialize Models
# =========================
reader = easyocr.Reader(['en'])
model = genai.GenerativeModel("gemini-3-flash-preview")
# =========================
# 🧼 Image Preprocessing
# =========================
def preprocess_image(image_path):
img = Image.open(image_path).convert("L") # grayscale
img = ImageEnhance.Contrast(img).enhance(2)
img = img.filter(ImageFilter.SHARPEN)
return img
# =========================
# πŸ” OCR Extraction
# =========================
def extract_text(image):
if not isinstance(image, np.ndarray):
image = np.array(image) # βœ… FIX
result = reader.readtext(image, detail=0)
text = "\n".join(result)
# Clean text
text = re.sub(r'\.{2,}', ' ', text)
text = re.sub(r'\s+', ' ', text)
return text[:4000] # limit for cost/speed
# =========================
# 🧠 Prompt Builder
# =========================
def build_prompt(text):
return f"""
You are a strict JSON generator.
Convert the menu text into VALID JSON.
FORMAT:
{{
"categories": [
{{
"name": "Category Name",
"items": [
{{
"name": "Item Name",
"sizes": {{
"Regular": 100
}}
}}
]
}}
]
}}
RULES:
- Output ONLY JSON (no explanation)
- Detect categories properly
- Each item MUST have "name" and "sizes"
- If 1 price β†’ "Regular"
- If 2 prices β†’ infer labels (Half/Full, Plain/Butter, Gravy/Dry)
- Fix OCR mistakes intelligently
MENU TEXT:
{text}
"""
# =========================
# 🧹 Extract JSON safely
# =========================
def extract_json(response_text):
try:
start = response_text.index("{")
end = response_text.rindex("}") + 1
return json.loads(response_text[start:end])
except:
return None
# =========================
# βœ… Validate Output
# =========================
def validate_schema(data):
if not data or "categories" not in data:
return False
for cat in data["categories"]:
if "name" not in cat or "items" not in cat:
return False
for item in cat["items"]:
if "name" not in item or "sizes" not in item:
return False
return True
# =========================
# πŸ” Gemini Call with Retry
# =========================
def generate_json(text):
for _ in range(3): # retry
prompt = build_prompt(text)
try:
response = model.generate_content(prompt)
parsed = extract_json(response.text)
if validate_schema(parsed):
return parsed
except Exception as e:
print("Gemini Error:", e)
return {"error": "Failed to generate valid JSON"}
# =========================
# πŸš€ Main Pipeline
# =========================
def process(image_path):
try:
img = preprocess_image(image_path)
text = extract_text(img)
print("OCR TEXT:", text[:300]) # debug log
result = generate_json(text)
return json.dumps(result, indent=2)
except Exception as e:
return json.dumps({"error": str(e)}, indent=2)
# =========================
# 🎨 Gradio UI
# =========================
app = gr.Interface(
fn=process,
inputs=gr.Image(type="filepath"),
outputs=gr.Code(language="json"),
title="🍽️ Menu Image β†’ JSON (Production Ready)",
description="Upload a restaurant menu image and get structured JSON using OCR + Gemini"
)
# Queue for stability
app.queue(max_size=10)
app.launch()