neerajkalyank commited on
Commit
c42bcca
·
verified ·
1 Parent(s): 0ab8f8f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -11
app.py CHANGED
@@ -5,6 +5,9 @@ from PIL import Image
5
  import gradio as gr
6
  import io
7
 
 
 
 
8
  # IBS Café schema columns
9
  COLUMNS = [
10
  "Parent Category", "Category", "Store Item Name", "Item Code", "Master Item Name", "EAN Code",
@@ -48,7 +51,6 @@ def parse_menu_text(text):
48
  if prices:
49
  name = re.sub(price_pattern, '', line).strip(" -:–")
50
  if '/' in line and len(prices) > 1:
51
- # multi-size e.g., 149/199
52
  for i, price in enumerate(prices):
53
  size_label = f" ({['Regular', 'Large', 'XL'][i]})" if i < 3 else f" (Option {i+1})"
54
  store_name = f"{name}{size_label}"
@@ -83,28 +85,31 @@ def ocr_and_extract(image):
83
  rows = parse_menu_text(text)
84
 
85
  df = pd.DataFrame(rows, columns=COLUMNS)
86
- csv_buffer = io.StringIO()
87
- df.to_csv(csv_buffer, index=False)
88
 
 
 
 
 
 
 
89
  json_output = {"rows": rows, "needs_review": []}
90
- return text, csv_buffer.getvalue(), json_output
91
 
92
  # Gradio UI
93
  with gr.Blocks(title="Menu → IBS Schema Extractor") as demo:
94
- gr.Markdown("## 🧾 Menu OCR to IBS Café Schema")
95
- gr.Markdown("Upload a menu image and extract structured data in CSV + JSON formats.")
96
 
97
  with gr.Row():
98
  image_input = gr.Image(type="filepath", label="Upload Menu Image")
99
 
100
  extract_btn = gr.Button("Extract")
101
-
102
  with gr.Tab("Extracted Text"):
103
  text_output = gr.Textbox(label="OCR Text", lines=10)
104
 
105
- with gr.Tab("CSV Output"):
106
- csv_output = gr.Textbox(label="CSV Data", lines=10)
107
- csv_file = gr.File(label="Download CSV")
108
 
109
  with gr.Tab("JSON Output"):
110
  json_output = gr.JSON(label="Structured JSON")
@@ -112,7 +117,7 @@ with gr.Blocks(title="Menu → IBS Schema Extractor") as demo:
112
  extract_btn.click(
113
  ocr_and_extract,
114
  inputs=[image_input],
115
- outputs=[text_output, csv_output, json_output]
116
  )
117
 
118
  demo.launch()
 
5
  import gradio as gr
6
  import io
7
 
8
+ # Uncomment and edit this path if you’re on Windows
9
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
10
+
11
  # IBS Café schema columns
12
  COLUMNS = [
13
  "Parent Category", "Category", "Store Item Name", "Item Code", "Master Item Name", "EAN Code",
 
51
  if prices:
52
  name = re.sub(price_pattern, '', line).strip(" -:–")
53
  if '/' in line and len(prices) > 1:
 
54
  for i, price in enumerate(prices):
55
  size_label = f" ({['Regular', 'Large', 'XL'][i]})" if i < 3 else f" (Option {i+1})"
56
  store_name = f"{name}{size_label}"
 
85
  rows = parse_menu_text(text)
86
 
87
  df = pd.DataFrame(rows, columns=COLUMNS)
 
 
88
 
89
+ # Save Excel file in memory
90
+ excel_buffer = io.BytesIO()
91
+ with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer:
92
+ df.to_excel(writer, index=False, sheet_name="Menu")
93
+ excel_buffer.seek(0)
94
+
95
  json_output = {"rows": rows, "needs_review": []}
96
+ return text, excel_buffer, json_output
97
 
98
  # Gradio UI
99
  with gr.Blocks(title="Menu → IBS Schema Extractor") as demo:
100
+ gr.Markdown("## 🧾 Menu OCR IBS Café Excel Generator")
101
+ gr.Markdown("Upload a menu image and extract structured data in Excel (.xlsx) + JSON formats.")
102
 
103
  with gr.Row():
104
  image_input = gr.Image(type="filepath", label="Upload Menu Image")
105
 
106
  extract_btn = gr.Button("Extract")
107
+
108
  with gr.Tab("Extracted Text"):
109
  text_output = gr.Textbox(label="OCR Text", lines=10)
110
 
111
+ with gr.Tab("Excel Output"):
112
+ excel_file = gr.File(label="Download Excel (.xlsx)")
 
113
 
114
  with gr.Tab("JSON Output"):
115
  json_output = gr.JSON(label="Structured JSON")
 
117
  extract_btn.click(
118
  ocr_and_extract,
119
  inputs=[image_input],
120
+ outputs=[text_output, excel_file, json_output]
121
  )
122
 
123
  demo.launch()