receipt-scanner / app.py
sammoftah's picture
Add no-token fallback
68b4cc3 verified
"""
Receipt Scanner
Upload a receipt photo and extract structured data (items, prices, totals).
"""
import gradio as gr
from huggingface_hub import InferenceClient
from PIL import Image
import base64
from io import BytesIO
import json
import pandas as pd
import re
import os
import sys
sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
from shared.components import create_method_panel, create_premium_hero
# Initialize Hugging Face Inference Client
client = InferenceClient()
EXTRACTION_PROMPT = """Analyze this receipt image and extract ALL information in a structured format.
Extract:
1. **Merchant/Store Name**
2. **Date** (in YYYY-MM-DD format if possible)
3. **Time** (if visible)
4. **Items** - List each item with its price
5. **Subtotal** (if shown)
6. **Tax** (if shown)
7. **Total Amount**
8. **Payment Method** (if visible)
Format your response EXACTLY as JSON:
```json
{
"merchant": "Store Name",
"date": "YYYY-MM-DD",
"time": "HH:MM",
"items": [
{"name": "Item 1", "price": 0.00},
{"name": "Item 2", "price": 0.00}
],
"subtotal": 0.00,
"tax": 0.00,
"total": 0.00,
"payment_method": "Card/Cash/etc"
}
```
Be precise with numbers. If something is unclear, use null."""
def extract_json_from_text(text):
"""Extract JSON from markdown code blocks or raw text."""
# Try to find JSON in code blocks first
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', text, re.DOTALL)
if json_match:
return json_match.group(1)
# Try to find raw JSON
json_match = re.search(r'\{.*\}', text, re.DOTALL)
if json_match:
return json_match.group(0)
return None
def scan_receipt(image):
"""Extract structured data from receipt using VLM."""
if image is None:
return "❌ Please upload a receipt first!", "", ""
try:
if not os.getenv("HF_TOKEN"):
data = {
"merchant": "Manual review required",
"date": None,
"time": None,
"items": [],
"subtotal": None,
"tax": None,
"total": None,
"payment_method": None,
"note": "HF_TOKEN is not configured for hosted vision inference. The image was received, but field extraction needs a Space secret or manual entry.",
}
summary = """# 🧾 Receipt Ready For Review
The image uploaded correctly, but hosted vision inference is not configured on this Space.
To enable automatic extraction, add a Hugging Face token as a Space secret named `HF_TOKEN`.
Until then, this Space still documents the expected schema and downstream JSON shape.
"""
return summary, pd.DataFrame(columns=["name", "price"]), json.dumps(data, indent=2)
# Convert PIL Image to base64
buffered = BytesIO()
if isinstance(image, str):
image = Image.open(image)
image.save(buffered, format="PNG")
img_str = base64.b64encode(buffered.getvalue()).decode()
# Use Florence-2 or Qwen2-VL for OCR + understanding
response = client.chat_completion(
model="Qwen/Qwen2-VL-7B-Instruct",
messages=[
{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}},
{"type": "text", "text": EXTRACTION_PROMPT}
]
}
],
max_tokens=1000,
temperature=0.1 # Low temperature for accuracy
)
raw_response = response.choices[0].message.content
# Extract JSON from response
json_str = extract_json_from_text(raw_response)
if not json_str:
return f"⚠️ Could not parse receipt data.\n\nRaw response:\n{raw_response}", "", ""
# Parse JSON
data = json.loads(json_str)
# Create formatted summary
summary = f"""# 🧾 Receipt Analysis
**Merchant**: {data.get('merchant', 'N/A')}
**Date**: {data.get('date', 'N/A')}
**Time**: {data.get('time', 'N/A')}
---
## πŸ“¦ Items
"""
# Add items table
if data.get('items'):
for item in data['items']:
name = item.get('name', 'Unknown')
price = item.get('price', 0.0)
summary += f"- **{name}**: ${price:.2f}\n"
else:
summary += "*No items found*\n"
summary += f"""
---
## πŸ’° Totals
- **Subtotal**: ${data.get('subtotal', 0.0):.2f}
- **Tax**: ${data.get('tax', 0.0):.2f}
- **Total**: ${data.get('total', 0.0):.2f}
**Payment**: {data.get('payment_method', 'N/A')}
"""
# Create DataFrame for table view
if data.get('items'):
df = pd.DataFrame(data['items'])
df['price'] = df['price'].apply(lambda x: f"${x:.2f}")
else:
df = pd.DataFrame(columns=['name', 'price'])
# Format JSON for download
json_output = json.dumps(data, indent=2)
return summary, df, json_output
except json.JSONDecodeError as e:
return f"❌ Error parsing JSON: {str(e)}\n\nRaw response:\n{raw_response}", "", ""
except Exception as e:
return f"❌ Error scanning receipt: {str(e)}", "", ""
# Gradio Interface
with gr.Blocks(theme=gr.themes.Soft()) as demo:
create_premium_hero(
"Receipt Scanner",
"Extract merchant, items, totals, and payment details from receipt images with a vision-language model workflow.",
"🧾",
badge="Document Vision",
highlights=["Vision-language extraction", "Structured JSON", "CSV export"],
)
create_method_panel({
"Technique": "Image-to-structured-data extraction with schema parsing and tabular validation.",
"What it proves": "You can turn multimodal model output into reliable downstream data products.",
"HF capability": "Designed for Hub-hosted VLM inference and lightweight Space deployment.",
})
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="πŸ“Έ Upload Receipt Photo",
type="pil",
height=400
)
scan_btn = gr.Button("πŸ” Scan Receipt", variant="primary", size="lg")
gr.Markdown("""
### πŸ’‘ Tips for Best Results:
- Good lighting, minimal shadows
- Receipt should be flat and clear
- Include the entire receipt
- High contrast works best
""")
with gr.Column(scale=1):
summary_output = gr.Markdown(label="πŸ“Š Summary")
with gr.Row():
with gr.Column():
table_output = gr.Dataframe(
label="πŸ“‹ Items Table",
headers=["name", "price"],
interactive=False
)
with gr.Column():
json_output = gr.Textbox(
label="πŸ“„ JSON Data (copy to download)",
lines=15,
max_lines=20
)
# Event handler
scan_btn.click(
fn=scan_receipt,
inputs=[image_input],
outputs=[summary_output, table_output, json_output],
api_name="scan"
)
gr.Markdown("""
---
### πŸŽ“ What This App Does:
1. **OCR + Understanding**: Doesn't just read text, understands structure
2. **Data Extraction**: Identifies items, prices, totals, dates
3. **JSON Export**: Download structured data for expense tracking
4. **Table View**: See items in an organized format
### πŸ“Š Use Cases:
- **Expense Tracking**: Digitize receipts for accounting
- **Budget Apps**: Auto-import spending data
- **Tax Records**: Organize business expenses
- **Reimbursements**: Submit itemized claims
- **Personal Finance**: Track spending categories
*Note: Accuracy depends on receipt clarity and format. Complex layouts may require manual verification.*
""")
if __name__ == "__main__":
demo.launch()