Spaces:

haharvs
/

Qwen-3-VL-8B_OCR_receipts

Running

App Files Files Community

Qwen-3-VL-8B_OCR_receipts / src /streamlit_app.py

haharvs

Update src/streamlit_app.py

df03a2f verified 3 days ago

raw

history blame contribute delete

7.27 kB

	import streamlit as st
	import requests
	import json
	import base64

	# Configuration
	API_URL = "https://openrouter.ai/api/v1/chat/completions"

	def encode_image(uploaded_file):
	"""Encodes the uploaded file to base64."""
	bytes_data = uploaded_file.getvalue()
	return base64.b64encode(bytes_data).decode('utf-8')

	def analyze_receipt(base64_image, prompt_text, api_key):
	"""Sends the image to OpenRouter API for analysis."""
	if not api_key:
	return {"error": "API Key is missing."}

	headers = {
	"Authorization": f"Bearer {api_key}",
	"Content-Type": "application/json"
	}

	data_url = f"data:image/jpeg;base64,{base64_image}"

	messages = [
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": prompt_text
	},
	{
	"type": "image_url",
	"image_url": {
	"url": data_url
	}
	}
	]
	}
	]

	payload = {
	"model": "qwen/qwen3-vl-8b-instruct",
	"messages": messages
	}

	try:
	response = requests.post(API_URL, headers=headers, json=payload)
	response.raise_for_status() # Raise an error for bad status codes
	return response.json()
	except requests.exceptions.RequestException as e:
	return {"error": str(e)}

	# Streamlit App UI
	st.set_page_config(page_title="Receipt Analyzer", page_icon="🧾", layout="wide")

	st.title("🧾 Receipt Cost Breakdown (Qwen 3-VL-8B)")
	st.markdown("Upload a receipt image to get a JSON breakdown of costs.")

	# Sidebar for configuration
	with st.sidebar:
	st.header("⚙️ Configuration")

	# API Key Input for User Inference
	st.subheader("API Access")
	api_key = st.text_input("OpenRouter API Key", type="password", help="Enter your OpenRouter API Key here.")
	if not api_key:
	st.warning("Please enter your API Key to proceed.")

	st.divider()

	# User-friendly schema builder - PRIORITY 1
	# st.markdown("Define what to extract from the receipt.") # Removed to save space
	st.subheader("Fields to Extract")

	default_fields = ["Merchant Name", "Total Amount", "Currency", "Date"]
	available_fields = ["Merchant Name", "Total Amount", "Currency", "Date", "Tax/VAT", "Address", "Time", "Payment Method"]

	selected_fields = st.multiselect(
	"Select fields:",
	options=available_fields,
	default=default_fields,
	help="Leave empty to extract ALL available information automatically."
	)

	if not selected_fields:
	st.caption("✅ No fields selected. The model will extract everything it finds.")

	extract_line_items = st.checkbox("Extract Line Items (Name & Price)", value=True)

	st.divider()

	# Custom instructions - PRIORITY 2
	custom_instructions = st.text_input(
	"Custom Instructions (Optional)",
	placeholder="e.g., Extract the cashier name",
	help="Add any specific data points or rules not covered above."
	)

	st.divider()

	# Model Indicator - MOVED TO BOTTOM
	with st.expander("ℹ️ About the Model", expanded=False):
	st.info(
	"Qwen 3-VL-8B\n\n"
	"This is an open-source model efficient enough to run locally on consumer hardware."
	)

	# Construct the prompt dynamically
	if not selected_fields:
	# User selected nothing -> Extract all
	prompt_text = "Analyze this receipt image. Extract all visible information including merchant details, dates, totals, taxes, and address in a structured JSON format."
	else:
	# User selected specific fields
	field_str = ", ".join(selected_fields)
	prompt_text = f"Analyze this receipt image. Extract the following information in JSON format: {field_str}."

	if extract_line_items:
	prompt_text += " Also include a detailed list of 'items' containing 'name' and 'price'."

	if custom_instructions:
	prompt_text += f" Additionally: {custom_instructions}."

	# Enforce JSON structure
	prompt_text += " Return a single valid JSON object. Do not include markdown formatting."

	# Store in variable to match existing function call
	custom_prompt = prompt_text

	uploaded_file = st.file_uploader("Choose a receipt image...", type=["jpg", "jpeg", "png"])

	if uploaded_file is not None:
	col1, col2 = st.columns(2)

	with col1:
	# Display the uploaded image
	st.image(uploaded_file, caption="Uploaded Receipt", use_column_width=True)
	analyze = st.button("Analyze Receipt", type="primary", use_container_width=True)

	with col2:
	if analyze:
	if not api_key:
	st.error("Please enter an API Key in the sidebar.")
	else:
	with st.spinner("Analyzing receipt..."):
	# Encode image
	base64_image = encode_image(uploaded_file)

	# Call API
	api_result = analyze_receipt(base64_image, custom_prompt, api_key)

	# Handle response
	if "error" in api_result:
	st.error(f"Error calling API: {api_result['error']}")
	elif "choices" in api_result:
	content = api_result["choices"][0]["message"]["content"]

	with st.expander("🔍 Raw Analysis Output"):
	st.code(content, language="json")

	# Try to clean and parse JSON if markdown code blocks are used
	try:
	# Clean up code blocks if present
	json_str = content.replace("```json", "").replace("```", "").strip()
	parsed_json = json.loads(json_str)

	st.success("Analysis Complete!")
	st.subheader("Structured Data")
	st.json(parsed_json)

	# Optional: Display as a nice table
	if "items" in parsed_json and isinstance(parsed_json["items"], list):
	st.subheader("Itemized Breakdown")
	st.dataframe(parsed_json["items"], use_container_width=True)

	# Display other top-level keys as metrics if simple
	for key, value in parsed_json.items():
	if key != "items" and isinstance(value, (int, float, str)):
	st.metric(key.title(), value)

	except json.JSONDecodeError:
	st.warning("Could not parse the response as JSON. See the raw output above.")
	else:
	st.error("Unexpected response format from API.")
	st.json(api_result)