neerajkalyank commited on
Commit
f372bee
·
verified ·
1 Parent(s): 21b70f3

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ import pytesseract
4
+ from PIL import Image
5
+ import gradio as gr
6
+ import io
7
+
8
+ # IBS Café schema columns
9
+ COLUMNS = [
10
+ "Parent Category", "Category", "Store Item Name", "Item Code", "Master Item Name", "EAN Code",
11
+ "price", "Active", "Priortiy", "Image", "food type", "NoOfMains", "onlineName",
12
+ "Menu/MRP", "itemTaxInclusive", "taxPct", "brandName", "classificationCode", "HSN CODE"
13
+ ]
14
+
15
+ DEFAULTS = {
16
+ "Item Code": "",
17
+ "Master Item Name": "",
18
+ "EAN Code": "",
19
+ "Active": "Y",
20
+ "Priortiy": "",
21
+ "Image": "",
22
+ "food type": "",
23
+ "NoOfMains": "",
24
+ "itemTaxInclusive": "Y",
25
+ "taxPct": "5",
26
+ "brandName": "Nescafe",
27
+ "classificationCode": "",
28
+ "HSN CODE": "",
29
+ }
30
+
31
+ price_pattern = re.compile(r'(?<!\d)(?:₹|Rs\.?\s*)?\d{2,4}(?:\.\d{1,2})?(?!\d)')
32
+
33
+ def parse_menu_text(text):
34
+ lines = [line.strip() for line in text.splitlines() if line.strip()]
35
+ rows = []
36
+ parent_category, category = "", ""
37
+
38
+ for line in lines:
39
+ if line.isupper() and len(line.split()) <= 4:
40
+ # treat as category or parent
41
+ if not parent_category:
42
+ parent_category = line.title()
43
+ else:
44
+ category = line.title()
45
+ continue
46
+
47
+ prices = price_pattern.findall(line)
48
+ if prices:
49
+ name = re.sub(price_pattern, '', line).strip(" -:–")
50
+ if '/' in line and len(prices) > 1:
51
+ # multi-size e.g., 149/199
52
+ for i, price in enumerate(prices):
53
+ size_label = f" ({['Regular', 'Large', 'XL'][i]})" if i < 3 else f" (Option {i+1})"
54
+ store_name = f"{name}{size_label}"
55
+ row = build_row(parent_category, category, store_name, price)
56
+ rows.append(row)
57
+ else:
58
+ price = prices[0]
59
+ store_name = name
60
+ row = build_row(parent_category, category, store_name, price)
61
+ rows.append(row)
62
+
63
+ return rows
64
+
65
+ def build_row(parent, category, name, price):
66
+ base = {**DEFAULTS}
67
+ base.update({
68
+ "Parent Category": parent,
69
+ "Category": category,
70
+ "Store Item Name": name,
71
+ "price": price,
72
+ "onlineName": name,
73
+ "Menu/MRP": price
74
+ })
75
+ return base
76
+
77
+ def ocr_and_extract(image):
78
+ if image is None:
79
+ return "Please upload an image.", None, None
80
+
81
+ img = Image.open(image)
82
+ text = pytesseract.image_to_string(img)
83
+ rows = parse_menu_text(text)
84
+
85
+ df = pd.DataFrame(rows, columns=COLUMNS)
86
+ csv_buffer = io.StringIO()
87
+ df.to_csv(csv_buffer, index=False)
88
+
89
+ json_output = {"rows": rows, "needs_review": []}
90
+ return text, csv_buffer.getvalue(), json_output
91
+
92
+ # Gradio UI
93
+ with gr.Blocks(title="Menu → IBS Schema Extractor") as demo:
94
+ gr.Markdown("## 🧾 Menu OCR to IBS Café Schema")
95
+ gr.Markdown("Upload a menu image and extract structured data in CSV + JSON formats.")
96
+
97
+ with gr.Row():
98
+ image_input = gr.Image(type="filepath", label="Upload Menu Image")
99
+
100
+ extract_btn = gr.Button("Extract")
101
+
102
+ with gr.Tab("Extracted Text"):
103
+ text_output = gr.Textbox(label="OCR Text", lines=10)
104
+
105
+ with gr.Tab("CSV Output"):
106
+ csv_output = gr.Textbox(label="CSV Data", lines=10)
107
+ csv_file = gr.File(label="Download CSV")
108
+
109
+ with gr.Tab("JSON Output"):
110
+ json_output = gr.JSON(label="Structured JSON")
111
+
112
+ extract_btn.click(
113
+ ocr_and_extract,
114
+ inputs=[image_input],
115
+ outputs=[text_output, csv_output, json_output]
116
+ )
117
+
118
+ demo.launch()