""" Receipt Scanner Upload a receipt photo and extract structured data (items, prices, totals). """ import gradio as gr from huggingface_hub import InferenceClient from PIL import Image import base64 from io import BytesIO import json import pandas as pd import re import os import sys sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from shared.components import create_method_panel, create_premium_hero # Initialize Hugging Face Inference Client client = InferenceClient() EXTRACTION_PROMPT = """Analyze this receipt image and extract ALL information in a structured format. Extract: 1. **Merchant/Store Name** 2. **Date** (in YYYY-MM-DD format if possible) 3. **Time** (if visible) 4. **Items** - List each item with its price 5. **Subtotal** (if shown) 6. **Tax** (if shown) 7. **Total Amount** 8. **Payment Method** (if visible) Format your response EXACTLY as JSON: ```json { "merchant": "Store Name", "date": "YYYY-MM-DD", "time": "HH:MM", "items": [ {"name": "Item 1", "price": 0.00}, {"name": "Item 2", "price": 0.00} ], "subtotal": 0.00, "tax": 0.00, "total": 0.00, "payment_method": "Card/Cash/etc" } ``` Be precise with numbers. If something is unclear, use null.""" def extract_json_from_text(text): """Extract JSON from markdown code blocks or raw text.""" # Try to find JSON in code blocks first json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL) if json_match: return json_match.group(1) # Try to find raw JSON json_match = re.search(r'\{.*\}', text, re.DOTALL) if json_match: return json_match.group(0) return None def scan_receipt(image): """Extract structured data from receipt using VLM.""" if image is None: return "โŒ Please upload a receipt first!", "", "" try: if not os.getenv("HF_TOKEN"): data = { "merchant": "Manual review required", "date": None, "time": None, "items": [], "subtotal": None, "tax": None, "total": None, "payment_method": None, "note": "HF_TOKEN is not configured for hosted vision inference. The image was received, but field extraction needs a Space secret or manual entry.", } summary = """# ๐Ÿงพ Receipt Ready For Review The image uploaded correctly, but hosted vision inference is not configured on this Space. To enable automatic extraction, add a Hugging Face token as a Space secret named `HF_TOKEN`. Until then, this Space still documents the expected schema and downstream JSON shape. """ return summary, pd.DataFrame(columns=["name", "price"]), json.dumps(data, indent=2) # Convert PIL Image to base64 buffered = BytesIO() if isinstance(image, str): image = Image.open(image) image.save(buffered, format="PNG") img_str = base64.b64encode(buffered.getvalue()).decode() # Use Florence-2 or Qwen2-VL for OCR + understanding response = client.chat_completion( model="Qwen/Qwen2-VL-7B-Instruct", messages=[ { "role": "user", "content": [ {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}, {"type": "text", "text": EXTRACTION_PROMPT} ] } ], max_tokens=1000, temperature=0.1 # Low temperature for accuracy ) raw_response = response.choices[0].message.content # Extract JSON from response json_str = extract_json_from_text(raw_response) if not json_str: return f"โš ๏ธ Could not parse receipt data.\n\nRaw response:\n{raw_response}", "", "" # Parse JSON data = json.loads(json_str) # Create formatted summary summary = f"""# ๐Ÿงพ Receipt Analysis **Merchant**: {data.get('merchant', 'N/A')} **Date**: {data.get('date', 'N/A')} **Time**: {data.get('time', 'N/A')} --- ## ๐Ÿ“ฆ Items """ # Add items table if data.get('items'): for item in data['items']: name = item.get('name', 'Unknown') price = item.get('price', 0.0) summary += f"- **{name}**: ${price:.2f}\n" else: summary += "*No items found*\n" summary += f""" --- ## ๐Ÿ’ฐ Totals - **Subtotal**: ${data.get('subtotal', 0.0):.2f} - **Tax**: ${data.get('tax', 0.0):.2f} - **Total**: ${data.get('total', 0.0):.2f} **Payment**: {data.get('payment_method', 'N/A')} """ # Create DataFrame for table view if data.get('items'): df = pd.DataFrame(data['items']) df['price'] = df['price'].apply(lambda x: f"${x:.2f}") else: df = pd.DataFrame(columns=['name', 'price']) # Format JSON for download json_output = json.dumps(data, indent=2) return summary, df, json_output except json.JSONDecodeError as e: return f"โŒ Error parsing JSON: {str(e)}\n\nRaw response:\n{raw_response}", "", "" except Exception as e: return f"โŒ Error scanning receipt: {str(e)}", "", "" # Gradio Interface with gr.Blocks(theme=gr.themes.Soft()) as demo: create_premium_hero( "Receipt Scanner", "Extract merchant, items, totals, and payment details from receipt images with a vision-language model workflow.", "๐Ÿงพ", badge="Document Vision", highlights=["Vision-language extraction", "Structured JSON", "CSV export"], ) create_method_panel({ "Technique": "Image-to-structured-data extraction with schema parsing and tabular validation.", "What it proves": "You can turn multimodal model output into reliable downstream data products.", "HF capability": "Designed for Hub-hosted VLM inference and lightweight Space deployment.", }) with gr.Row(): with gr.Column(scale=1): image_input = gr.Image( label="๐Ÿ“ธ Upload Receipt Photo", type="pil", height=400 ) scan_btn = gr.Button("๐Ÿ” Scan Receipt", variant="primary", size="lg") gr.Markdown(""" ### ๐Ÿ’ก Tips for Best Results: - Good lighting, minimal shadows - Receipt should be flat and clear - Include the entire receipt - High contrast works best """) with gr.Column(scale=1): summary_output = gr.Markdown(label="๐Ÿ“Š Summary") with gr.Row(): with gr.Column(): table_output = gr.Dataframe( label="๐Ÿ“‹ Items Table", headers=["name", "price"], interactive=False ) with gr.Column(): json_output = gr.Textbox( label="๐Ÿ“„ JSON Data (copy to download)", lines=15, max_lines=20 ) # Event handler scan_btn.click( fn=scan_receipt, inputs=[image_input], outputs=[summary_output, table_output, json_output], api_name="scan" ) gr.Markdown(""" --- ### ๐ŸŽ“ What This App Does: 1. **OCR + Understanding**: Doesn't just read text, understands structure 2. **Data Extraction**: Identifies items, prices, totals, dates 3. **JSON Export**: Download structured data for expense tracking 4. **Table View**: See items in an organized format ### ๐Ÿ“Š Use Cases: - **Expense Tracking**: Digitize receipts for accounting - **Budget Apps**: Auto-import spending data - **Tax Records**: Organize business expenses - **Reimbursements**: Submit itemized claims - **Personal Finance**: Track spending categories *Note: Accuracy depends on receipt clarity and format. Complex layouts may require manual verification.* """) if __name__ == "__main__": demo.launch()