Spaces:
Sleeping
Sleeping
| """ | |
| Receipt Scanner | |
| Upload a receipt photo and extract structured data (items, prices, totals). | |
| """ | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| from PIL import Image | |
| import base64 | |
| from io import BytesIO | |
| import json | |
| import pandas as pd | |
| import re | |
| import os | |
| import sys | |
| sys.path.append(os.path.join(os.path.dirname(__file__), '..')) | |
| from shared.components import create_method_panel, create_premium_hero | |
| # Initialize Hugging Face Inference Client | |
| client = InferenceClient() | |
| EXTRACTION_PROMPT = """Analyze this receipt image and extract ALL information in a structured format. | |
| Extract: | |
| 1. **Merchant/Store Name** | |
| 2. **Date** (in YYYY-MM-DD format if possible) | |
| 3. **Time** (if visible) | |
| 4. **Items** - List each item with its price | |
| 5. **Subtotal** (if shown) | |
| 6. **Tax** (if shown) | |
| 7. **Total Amount** | |
| 8. **Payment Method** (if visible) | |
| Format your response EXACTLY as JSON: | |
| ```json | |
| { | |
| "merchant": "Store Name", | |
| "date": "YYYY-MM-DD", | |
| "time": "HH:MM", | |
| "items": [ | |
| {"name": "Item 1", "price": 0.00}, | |
| {"name": "Item 2", "price": 0.00} | |
| ], | |
| "subtotal": 0.00, | |
| "tax": 0.00, | |
| "total": 0.00, | |
| "payment_method": "Card/Cash/etc" | |
| } | |
| ``` | |
| Be precise with numbers. If something is unclear, use null.""" | |
| def extract_json_from_text(text): | |
| """Extract JSON from markdown code blocks or raw text.""" | |
| # Try to find JSON in code blocks first | |
| json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL) | |
| if json_match: | |
| return json_match.group(1) | |
| # Try to find raw JSON | |
| json_match = re.search(r'\{.*\}', text, re.DOTALL) | |
| if json_match: | |
| return json_match.group(0) | |
| return None | |
| def scan_receipt(image): | |
| """Extract structured data from receipt using VLM.""" | |
| if image is None: | |
| return "β Please upload a receipt first!", "", "" | |
| try: | |
| if not os.getenv("HF_TOKEN"): | |
| data = { | |
| "merchant": "Manual review required", | |
| "date": None, | |
| "time": None, | |
| "items": [], | |
| "subtotal": None, | |
| "tax": None, | |
| "total": None, | |
| "payment_method": None, | |
| "note": "HF_TOKEN is not configured for hosted vision inference. The image was received, but field extraction needs a Space secret or manual entry.", | |
| } | |
| summary = """# π§Ύ Receipt Ready For Review | |
| The image uploaded correctly, but hosted vision inference is not configured on this Space. | |
| To enable automatic extraction, add a Hugging Face token as a Space secret named `HF_TOKEN`. | |
| Until then, this Space still documents the expected schema and downstream JSON shape. | |
| """ | |
| return summary, pd.DataFrame(columns=["name", "price"]), json.dumps(data, indent=2) | |
| # Convert PIL Image to base64 | |
| buffered = BytesIO() | |
| if isinstance(image, str): | |
| image = Image.open(image) | |
| image.save(buffered, format="PNG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode() | |
| # Use Florence-2 or Qwen2-VL for OCR + understanding | |
| response = client.chat_completion( | |
| model="Qwen/Qwen2-VL-7B-Instruct", | |
| messages=[ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}}, | |
| {"type": "text", "text": EXTRACTION_PROMPT} | |
| ] | |
| } | |
| ], | |
| max_tokens=1000, | |
| temperature=0.1 # Low temperature for accuracy | |
| ) | |
| raw_response = response.choices[0].message.content | |
| # Extract JSON from response | |
| json_str = extract_json_from_text(raw_response) | |
| if not json_str: | |
| return f"β οΈ Could not parse receipt data.\n\nRaw response:\n{raw_response}", "", "" | |
| # Parse JSON | |
| data = json.loads(json_str) | |
| # Create formatted summary | |
| summary = f"""# π§Ύ Receipt Analysis | |
| **Merchant**: {data.get('merchant', 'N/A')} | |
| **Date**: {data.get('date', 'N/A')} | |
| **Time**: {data.get('time', 'N/A')} | |
| --- | |
| ## π¦ Items | |
| """ | |
| # Add items table | |
| if data.get('items'): | |
| for item in data['items']: | |
| name = item.get('name', 'Unknown') | |
| price = item.get('price', 0.0) | |
| summary += f"- **{name}**: ${price:.2f}\n" | |
| else: | |
| summary += "*No items found*\n" | |
| summary += f""" | |
| --- | |
| ## π° Totals | |
| - **Subtotal**: ${data.get('subtotal', 0.0):.2f} | |
| - **Tax**: ${data.get('tax', 0.0):.2f} | |
| - **Total**: ${data.get('total', 0.0):.2f} | |
| **Payment**: {data.get('payment_method', 'N/A')} | |
| """ | |
| # Create DataFrame for table view | |
| if data.get('items'): | |
| df = pd.DataFrame(data['items']) | |
| df['price'] = df['price'].apply(lambda x: f"${x:.2f}") | |
| else: | |
| df = pd.DataFrame(columns=['name', 'price']) | |
| # Format JSON for download | |
| json_output = json.dumps(data, indent=2) | |
| return summary, df, json_output | |
| except json.JSONDecodeError as e: | |
| return f"β Error parsing JSON: {str(e)}\n\nRaw response:\n{raw_response}", "", "" | |
| except Exception as e: | |
| return f"β Error scanning receipt: {str(e)}", "", "" | |
| # Gradio Interface | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| create_premium_hero( | |
| "Receipt Scanner", | |
| "Extract merchant, items, totals, and payment details from receipt images with a vision-language model workflow.", | |
| "π§Ύ", | |
| badge="Document Vision", | |
| highlights=["Vision-language extraction", "Structured JSON", "CSV export"], | |
| ) | |
| create_method_panel({ | |
| "Technique": "Image-to-structured-data extraction with schema parsing and tabular validation.", | |
| "What it proves": "You can turn multimodal model output into reliable downstream data products.", | |
| "HF capability": "Designed for Hub-hosted VLM inference and lightweight Space deployment.", | |
| }) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image( | |
| label="πΈ Upload Receipt Photo", | |
| type="pil", | |
| height=400 | |
| ) | |
| scan_btn = gr.Button("π Scan Receipt", variant="primary", size="lg") | |
| gr.Markdown(""" | |
| ### π‘ Tips for Best Results: | |
| - Good lighting, minimal shadows | |
| - Receipt should be flat and clear | |
| - Include the entire receipt | |
| - High contrast works best | |
| """) | |
| with gr.Column(scale=1): | |
| summary_output = gr.Markdown(label="π Summary") | |
| with gr.Row(): | |
| with gr.Column(): | |
| table_output = gr.Dataframe( | |
| label="π Items Table", | |
| headers=["name", "price"], | |
| interactive=False | |
| ) | |
| with gr.Column(): | |
| json_output = gr.Textbox( | |
| label="π JSON Data (copy to download)", | |
| lines=15, | |
| max_lines=20 | |
| ) | |
| # Event handler | |
| scan_btn.click( | |
| fn=scan_receipt, | |
| inputs=[image_input], | |
| outputs=[summary_output, table_output, json_output], | |
| api_name="scan" | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### π What This App Does: | |
| 1. **OCR + Understanding**: Doesn't just read text, understands structure | |
| 2. **Data Extraction**: Identifies items, prices, totals, dates | |
| 3. **JSON Export**: Download structured data for expense tracking | |
| 4. **Table View**: See items in an organized format | |
| ### π Use Cases: | |
| - **Expense Tracking**: Digitize receipts for accounting | |
| - **Budget Apps**: Auto-import spending data | |
| - **Tax Records**: Organize business expenses | |
| - **Reimbursements**: Submit itemized claims | |
| - **Personal Finance**: Track spending categories | |
| *Note: Accuracy depends on receipt clarity and format. Complex layouts may require manual verification.* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |