JARVIS-JI commited on
Commit
652a43e
Β·
verified Β·
1 Parent(s): 5ec6ce0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -36
app.py CHANGED
@@ -2,93 +2,159 @@ import gradio as gr
2
  import easyocr
3
  import google.generativeai as genai
4
  import os, json, re
 
5
  from PIL import Image, ImageEnhance, ImageFilter
6
 
7
- # πŸ” API key
 
 
8
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
 
10
- # OCR
 
 
11
  reader = easyocr.Reader(['en'])
12
- model = genai.GenerativeModel("gemini-3-flash-preview")
13
 
14
-
15
- def preprocess_image(path):
16
- img = Image.open(path).convert("L")
 
 
17
  img = ImageEnhance.Contrast(img).enhance(2)
18
  img = img.filter(ImageFilter.SHARPEN)
19
  return img
20
 
21
-
 
 
22
  def extract_text(image):
 
 
 
23
  result = reader.readtext(image, detail=0)
 
24
  text = "\n".join(result)
25
 
 
26
  text = re.sub(r'\.{2,}', ' ', text)
27
  text = re.sub(r'\s+', ' ', text)
28
 
29
- return text
30
-
31
 
 
 
 
32
  def build_prompt(text):
33
  return f"""
34
  You are a strict JSON generator.
35
 
36
- Convert menu text into VALID JSON.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  RULES:
39
- - Output ONLY JSON
40
- - Detect categories
41
- - Each item has "name" and "sizes"
42
- - 1 price β†’ Regular
43
- - 2 prices β†’ infer meaning
44
- - Fix OCR errors
45
-
46
- MENU:
47
  {text}
48
  """
49
 
50
-
51
- def extract_json(text):
 
 
52
  try:
53
- start = text.index("{")
54
- end = text.rindex("}") + 1
55
- return json.loads(text[start:end])
56
  except:
57
  return None
58
 
59
-
 
 
60
  def validate_schema(data):
61
  if not data or "categories" not in data:
62
  return False
63
- return True
64
 
 
 
 
 
 
 
 
 
65
 
 
 
 
66
  def generate_json(text):
67
- for _ in range(3):
68
  prompt = build_prompt(text)
69
- response = model.generate_content(prompt)
70
 
71
- parsed = extract_json(response.text)
 
 
 
 
 
72
 
73
- if validate_schema(parsed):
74
- return parsed
75
 
76
  return {"error": "Failed to generate valid JSON"}
77
 
 
 
 
 
 
 
 
 
 
78
 
79
- def process(image):
80
- img = preprocess_image(image)
81
- text = extract_text(img)
82
- result = generate_json(text)
83
 
84
- return json.dumps(result, indent=2)
85
 
 
 
86
 
 
 
 
87
  app = gr.Interface(
88
  fn=process,
89
  inputs=gr.Image(type="filepath"),
90
  outputs=gr.Code(language="json"),
91
- title="πŸš€ Production Menu Parser (OCR + Gemini)",
 
92
  )
93
 
 
 
 
94
  app.launch()
 
2
  import easyocr
3
  import google.generativeai as genai
4
  import os, json, re
5
+ import numpy as np
6
  from PIL import Image, ImageEnhance, ImageFilter
7
 
8
+ # =========================
9
+ # πŸ” Load API Key
10
+ # =========================
11
  genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
12
 
13
+ # =========================
14
+ # πŸ€– Initialize Models
15
+ # =========================
16
  reader = easyocr.Reader(['en'])
17
+ model = genai.GenerativeModel("gemini-1.5-flash")
18
 
19
+ # =========================
20
+ # 🧼 Image Preprocessing
21
+ # =========================
22
+ def preprocess_image(image_path):
23
+ img = Image.open(image_path).convert("L") # grayscale
24
  img = ImageEnhance.Contrast(img).enhance(2)
25
  img = img.filter(ImageFilter.SHARPEN)
26
  return img
27
 
28
+ # =========================
29
+ # πŸ” OCR Extraction
30
+ # =========================
31
  def extract_text(image):
32
+ if not isinstance(image, np.ndarray):
33
+ image = np.array(image) # βœ… FIX
34
+
35
  result = reader.readtext(image, detail=0)
36
+
37
  text = "\n".join(result)
38
 
39
+ # Clean text
40
  text = re.sub(r'\.{2,}', ' ', text)
41
  text = re.sub(r'\s+', ' ', text)
42
 
43
+ return text[:4000] # limit for cost/speed
 
44
 
45
+ # =========================
46
+ # 🧠 Prompt Builder
47
+ # =========================
48
  def build_prompt(text):
49
  return f"""
50
  You are a strict JSON generator.
51
 
52
+ Convert the menu text into VALID JSON.
53
+
54
+ FORMAT:
55
+ {{
56
+ "categories": [
57
+ {{
58
+ "name": "Category Name",
59
+ "items": [
60
+ {{
61
+ "name": "Item Name",
62
+ "sizes": {{
63
+ "Regular": 100
64
+ }}
65
+ }}
66
+ ]
67
+ }}
68
+ ]
69
+ }}
70
 
71
  RULES:
72
+ - Output ONLY JSON (no explanation)
73
+ - Detect categories properly
74
+ - Each item MUST have "name" and "sizes"
75
+ - If 1 price β†’ "Regular"
76
+ - If 2 prices β†’ infer labels (Half/Full, Plain/Butter, Gravy/Dry)
77
+ - Fix OCR mistakes intelligently
78
+
79
+ MENU TEXT:
80
  {text}
81
  """
82
 
83
+ # =========================
84
+ # 🧹 Extract JSON safely
85
+ # =========================
86
+ def extract_json(response_text):
87
  try:
88
+ start = response_text.index("{")
89
+ end = response_text.rindex("}") + 1
90
+ return json.loads(response_text[start:end])
91
  except:
92
  return None
93
 
94
+ # =========================
95
+ # βœ… Validate Output
96
+ # =========================
97
  def validate_schema(data):
98
  if not data or "categories" not in data:
99
  return False
 
100
 
101
+ for cat in data["categories"]:
102
+ if "name" not in cat or "items" not in cat:
103
+ return False
104
+ for item in cat["items"]:
105
+ if "name" not in item or "sizes" not in item:
106
+ return False
107
+
108
+ return True
109
 
110
+ # =========================
111
+ # πŸ” Gemini Call with Retry
112
+ # =========================
113
  def generate_json(text):
114
+ for _ in range(3): # retry
115
  prompt = build_prompt(text)
 
116
 
117
+ try:
118
+ response = model.generate_content(prompt)
119
+ parsed = extract_json(response.text)
120
+
121
+ if validate_schema(parsed):
122
+ return parsed
123
 
124
+ except Exception as e:
125
+ print("Gemini Error:", e)
126
 
127
  return {"error": "Failed to generate valid JSON"}
128
 
129
+ # =========================
130
+ # πŸš€ Main Pipeline
131
+ # =========================
132
+ def process(image_path):
133
+ try:
134
+ img = preprocess_image(image_path)
135
+ text = extract_text(img)
136
+
137
+ print("OCR TEXT:", text[:300]) # debug log
138
 
139
+ result = generate_json(text)
 
 
 
140
 
141
+ return json.dumps(result, indent=2)
142
 
143
+ except Exception as e:
144
+ return json.dumps({"error": str(e)}, indent=2)
145
 
146
+ # =========================
147
+ # 🎨 Gradio UI
148
+ # =========================
149
  app = gr.Interface(
150
  fn=process,
151
  inputs=gr.Image(type="filepath"),
152
  outputs=gr.Code(language="json"),
153
+ title="🍽️ Menu Image β†’ JSON (Production Ready)",
154
+ description="Upload a restaurant menu image and get structured JSON using OCR + Gemini"
155
  )
156
 
157
+ # Queue for stability
158
+ app.queue(max_size=10)
159
+
160
  app.launch()