import re import pandas as pd import pytesseract from PIL import Image import gradio as gr import io # Uncomment and edit this path if you’re on Windows # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # IBS Café schema columns COLUMNS = [ "Parent Category", "Category", "Store Item Name", "Item Code", "Master Item Name", "EAN Code", "price", "Active", "Priortiy", "Image", "food type", "NoOfMains", "onlineName", "Menu/MRP", "itemTaxInclusive", "taxPct", "brandName", "classificationCode", "HSN CODE" ] DEFAULTS = { "Item Code": "", "Master Item Name": "", "EAN Code": "", "Active": "Y", "Priortiy": "", "Image": "", "food type": "", "NoOfMains": "", "itemTaxInclusive": "Y", "taxPct": "5", "brandName": "Nescafe", "classificationCode": "", "HSN CODE": "", } price_pattern = re.compile(r'(? 1: for i, price in enumerate(prices): size_label = f" ({['Regular', 'Large', 'XL'][i]})" if i < 3 else f" (Option {i+1})" store_name = f"{name}{size_label}" row = build_row(parent_category, category, store_name, price) rows.append(row) else: price = prices[0] store_name = name row = build_row(parent_category, category, store_name, price) rows.append(row) return rows def build_row(parent, category, name, price): base = {**DEFAULTS} base.update({ "Parent Category": parent, "Category": category, "Store Item Name": name, "price": price, "onlineName": name, "Menu/MRP": price }) return base def ocr_and_extract(image): if image is None: return "Please upload an image.", None, None img = Image.open(image) text = pytesseract.image_to_string(img) rows = parse_menu_text(text) df = pd.DataFrame(rows, columns=COLUMNS) # Save Excel file in memory excel_buffer = io.BytesIO() with pd.ExcelWriter(excel_buffer, engine='openpyxl') as writer: df.to_excel(writer, index=False, sheet_name="Menu") excel_buffer.seek(0) json_output = {"rows": rows, "needs_review": []} return text, excel_buffer, json_output # Gradio UI with gr.Blocks(title="Menu → IBS Schema Extractor") as demo: gr.Markdown("## 🧾 Menu OCR → IBS Café Excel Generator") gr.Markdown("Upload a menu image and extract structured data in Excel (.xlsx) + JSON formats.") with gr.Row(): image_input = gr.Image(type="filepath", label="Upload Menu Image") extract_btn = gr.Button("Extract") with gr.Tab("Extracted Text"): text_output = gr.Textbox(label="OCR Text", lines=10) with gr.Tab("Excel Output"): excel_file = gr.File(label="Download Excel (.xlsx)") with gr.Tab("JSON Output"): json_output = gr.JSON(label="Structured JSON") extract_btn.click( ocr_and_extract, inputs=[image_input], outputs=[text_output, excel_file, json_output] ) demo.launch()