Spaces:

JARVIS-JI
/

ocr

Sleeping

App Files Files Community

ocr / app.py

JARVIS-JI

Update app.py

c4025d8 verified 3 months ago

raw

history blame contribute delete

3.85 kB

	import gradio as gr
	import easyocr
	import google.generativeai as genai
	import os, json, re
	import numpy as np
	from PIL import Image, ImageEnhance, ImageFilter

	# =========================
	# 🔐 Load API Key
	# =========================
	genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

	# =========================
	# 🤖 Initialize Models
	# =========================
	reader = easyocr.Reader(['en'])
	model = genai.GenerativeModel("gemini-3-flash-preview")

	# =========================
	# 🧼 Image Preprocessing
	# =========================
	def preprocess_image(image_path):
	img = Image.open(image_path).convert("L") # grayscale
	img = ImageEnhance.Contrast(img).enhance(2)
	img = img.filter(ImageFilter.SHARPEN)
	return img

	# =========================
	# 🔍 OCR Extraction
	# =========================
	def extract_text(image):
	if not isinstance(image, np.ndarray):
	image = np.array(image) # ✅ FIX

	result = reader.readtext(image, detail=0)

	text = "\n".join(result)

	# Clean text
	text = re.sub(r'\.{2,}', ' ', text)
	text = re.sub(r'\s+', ' ', text)

	return text[:4000] # limit for cost/speed

	# =========================
	# 🧠 Prompt Builder
	# =========================
	def build_prompt(text):
	return f"""
	You are a strict JSON generator.

	Convert the menu text into VALID JSON.

	FORMAT:
	{{
	"categories": [
	{{
	"name": "Category Name",
	"items": [
	{{
	"name": "Item Name",
	"sizes": {{
	"Regular": 100
	}}
	}}
	]
	}}
	]
	}}

	RULES:
	- Output ONLY JSON (no explanation)
	- Detect categories properly
	- Each item MUST have "name" and "sizes"
	- If 1 price → "Regular"
	- If 2 prices → infer labels (Half/Full, Plain/Butter, Gravy/Dry)
	- Fix OCR mistakes intelligently

	MENU TEXT:
	{text}
	"""

	# =========================
	# 🧹 Extract JSON safely
	# =========================
	def extract_json(response_text):
	try:
	start = response_text.index("{")
	end = response_text.rindex("}") + 1
	return json.loads(response_text[start:end])
	except:
	return None

	# =========================
	# ✅ Validate Output
	# =========================
	def validate_schema(data):
	if not data or "categories" not in data:
	return False

	for cat in data["categories"]:
	if "name" not in cat or "items" not in cat:
	return False
	for item in cat["items"]:
	if "name" not in item or "sizes" not in item:
	return False

	return True

	# =========================
	# 🔁 Gemini Call with Retry
	# =========================
	def generate_json(text):
	for _ in range(3): # retry
	prompt = build_prompt(text)

	try:
	response = model.generate_content(prompt)
	parsed = extract_json(response.text)

	if validate_schema(parsed):
	return parsed

	except Exception as e:
	print("Gemini Error:", e)

	return {"error": "Failed to generate valid JSON"}

	# =========================
	# 🚀 Main Pipeline
	# =========================
	def process(image_path):
	try:
	img = preprocess_image(image_path)
	text = extract_text(img)

	print("OCR TEXT:", text[:300]) # debug log

	result = generate_json(text)

	return json.dumps(result, indent=2)

	except Exception as e:
	return json.dumps({"error": str(e)}, indent=2)

	# =========================
	# 🎨 Gradio UI
	# =========================
	app = gr.Interface(
	fn=process,
	inputs=gr.Image(type="filepath"),
	outputs=gr.Code(language="json"),
	title="🍽️ Menu Image → JSON (Production Ready)",
	description="Upload a restaurant menu image and get structured JSON using OCR + Gemini"
	)

	# Queue for stability
	app.queue(max_size=10)

	app.launch()