Spaces:

sammoftah
/

receipt-scanner

Sleeping

App Files Files Community

receipt-scanner / app.py

sammoftah

Add no-token fallback

68b4cc3 verified 27 days ago

raw

history blame contribute delete

8.09 kB

	"""
	Receipt Scanner
	Upload a receipt photo and extract structured data (items, prices, totals).
	"""

	import gradio as gr
	from huggingface_hub import InferenceClient
	from PIL import Image
	import base64
	from io import BytesIO
	import json
	import pandas as pd
	import re
	import os
	import sys

	sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
	from shared.components import create_method_panel, create_premium_hero

	# Initialize Hugging Face Inference Client
	client = InferenceClient()

	EXTRACTION_PROMPT = """Analyze this receipt image and extract ALL information in a structured format.

	Extract:
	1. Merchant/Store Name
	2. Date (in YYYY-MM-DD format if possible)
	3. Time (if visible)
	4. Items - List each item with its price
	5. Subtotal (if shown)
	6. Tax (if shown)
	7. Total Amount
	8. Payment Method (if visible)

	Format your response EXACTLY as JSON:
	```json
	{
	"merchant": "Store Name",
	"date": "YYYY-MM-DD",
	"time": "HH:MM",
	"items": [
	{"name": "Item 1", "price": 0.00},
	{"name": "Item 2", "price": 0.00}
	],
	"subtotal": 0.00,
	"tax": 0.00,
	"total": 0.00,
	"payment_method": "Card/Cash/etc"
	}
	```

	Be precise with numbers. If something is unclear, use null."""


	def extract_json_from_text(text):
	"""Extract JSON from markdown code blocks or raw text."""
	# Try to find JSON in code blocks first
	json_match = re.search(r'```(?:json)?\s(\{.?\})\s*```', text, re.DOTALL)
	if json_match:
	return json_match.group(1)

	# Try to find raw JSON
	json_match = re.search(r'\{.*\}', text, re.DOTALL)
	if json_match:
	return json_match.group(0)

	return None


	def scan_receipt(image):
	"""Extract structured data from receipt using VLM."""
	if image is None:
	return "❌ Please upload a receipt first!", "", ""

	try:
	if not os.getenv("HF_TOKEN"):
	data = {
	"merchant": "Manual review required",
	"date": None,
	"time": None,
	"items": [],
	"subtotal": None,
	"tax": None,
	"total": None,
	"payment_method": None,
	"note": "HF_TOKEN is not configured for hosted vision inference. The image was received, but field extraction needs a Space secret or manual entry.",
	}
	summary = """# 🧾 Receipt Ready For Review

	The image uploaded correctly, but hosted vision inference is not configured on this Space.

	To enable automatic extraction, add a Hugging Face token as a Space secret named `HF_TOKEN`.

	Until then, this Space still documents the expected schema and downstream JSON shape.
	"""
	return summary, pd.DataFrame(columns=["name", "price"]), json.dumps(data, indent=2)

	# Convert PIL Image to base64
	buffered = BytesIO()
	if isinstance(image, str):
	image = Image.open(image)
	image.save(buffered, format="PNG")
	img_str = base64.b64encode(buffered.getvalue()).decode()

	# Use Florence-2 or Qwen2-VL for OCR + understanding
	response = client.chat_completion(
	model="Qwen/Qwen2-VL-7B-Instruct",
	messages=[
	{
	"role": "user",
	"content": [
	{"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img_str}"}},
	{"type": "text", "text": EXTRACTION_PROMPT}
	]
	}
	],
	max_tokens=1000,
	temperature=0.1 # Low temperature for accuracy
	)

	raw_response = response.choices[0].message.content

	# Extract JSON from response
	json_str = extract_json_from_text(raw_response)
	if not json_str:
	return f"⚠️ Could not parse receipt data.\n\nRaw response:\n{raw_response}", "", ""

	# Parse JSON
	data = json.loads(json_str)

	# Create formatted summary
	summary = f"""# 🧾 Receipt Analysis

	Merchant: {data.get('merchant', 'N/A')}
	Date: {data.get('date', 'N/A')}
	Time: {data.get('time', 'N/A')}

	---

	## 📦 Items

	"""
	# Add items table
	if data.get('items'):
	for item in data['items']:
	name = item.get('name', 'Unknown')
	price = item.get('price', 0.0)
	summary += f"- {name}: ${price:.2f}\n"
	else:
	summary += "No items found\n"

	summary += f"""
	---

	## 💰 Totals

	- Subtotal: ${data.get('subtotal', 0.0):.2f}
	- Tax: ${data.get('tax', 0.0):.2f}
	- Total: ${data.get('total', 0.0):.2f}

	Payment: {data.get('payment_method', 'N/A')}
	"""

	# Create DataFrame for table view
	if data.get('items'):
	df = pd.DataFrame(data['items'])
	df['price'] = df['price'].apply(lambda x: f"${x:.2f}")
	else:
	df = pd.DataFrame(columns=['name', 'price'])

	# Format JSON for download
	json_output = json.dumps(data, indent=2)

	return summary, df, json_output

	except json.JSONDecodeError as e:
	return f"❌ Error parsing JSON: {str(e)}\n\nRaw response:\n{raw_response}", "", ""
	except Exception as e:
	return f"❌ Error scanning receipt: {str(e)}", "", ""


	# Gradio Interface
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	create_premium_hero(
	"Receipt Scanner",
	"Extract merchant, items, totals, and payment details from receipt images with a vision-language model workflow.",
	"🧾",
	badge="Document Vision",
	highlights=["Vision-language extraction", "Structured JSON", "CSV export"],
	)
	create_method_panel({
	"Technique": "Image-to-structured-data extraction with schema parsing and tabular validation.",
	"What it proves": "You can turn multimodal model output into reliable downstream data products.",
	"HF capability": "Designed for Hub-hosted VLM inference and lightweight Space deployment.",
	})

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="📸 Upload Receipt Photo",
	type="pil",
	height=400
	)

	scan_btn = gr.Button("🔍 Scan Receipt", variant="primary", size="lg")

	gr.Markdown("""
	### 💡 Tips for Best Results:
	- Good lighting, minimal shadows
	- Receipt should be flat and clear
	- Include the entire receipt
	- High contrast works best
	""")

	with gr.Column(scale=1):
	summary_output = gr.Markdown(label="📊 Summary")

	with gr.Row():
	with gr.Column():
	table_output = gr.Dataframe(
	label="📋 Items Table",
	headers=["name", "price"],
	interactive=False
	)

	with gr.Column():
	json_output = gr.Textbox(
	label="📄 JSON Data (copy to download)",
	lines=15,
	max_lines=20
	)

	# Event handler
	scan_btn.click(
	fn=scan_receipt,
	inputs=[image_input],
	outputs=[summary_output, table_output, json_output],
	api_name="scan"
	)

	gr.Markdown("""
	---
	### 🎓 What This App Does:

	1. OCR + Understanding: Doesn't just read text, understands structure
	2. Data Extraction: Identifies items, prices, totals, dates
	3. JSON Export: Download structured data for expense tracking
	4. Table View: See items in an organized format

	### 📊 Use Cases:

	- Expense Tracking: Digitize receipts for accounting
	- Budget Apps: Auto-import spending data
	- Tax Records: Organize business expenses
	- Reimbursements: Submit itemized claims
	- Personal Finance: Track spending categories

	Note: Accuracy depends on receipt clarity and format. Complex layouts may require manual verification.
	""")

	if __name__ == "__main__":
	demo.launch()