Qwen-3-VL-8B_OCR_receipts / src /streamlit_app.py
haharvs's picture
Update src/streamlit_app.py
df03a2f verified
import streamlit as st
import requests
import json
import base64
# Configuration
API_URL = "https://openrouter.ai/api/v1/chat/completions"
def encode_image(uploaded_file):
"""Encodes the uploaded file to base64."""
bytes_data = uploaded_file.getvalue()
return base64.b64encode(bytes_data).decode('utf-8')
def analyze_receipt(base64_image, prompt_text, api_key):
"""Sends the image to OpenRouter API for analysis."""
if not api_key:
return {"error": "API Key is missing."}
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
data_url = f"data:image/jpeg;base64,{base64_image}"
messages = [
{
"role": "user",
"content": [
{
"type": "text",
"text": prompt_text
},
{
"type": "image_url",
"image_url": {
"url": data_url
}
}
]
}
]
payload = {
"model": "qwen/qwen3-vl-8b-instruct",
"messages": messages
}
try:
response = requests.post(API_URL, headers=headers, json=payload)
response.raise_for_status() # Raise an error for bad status codes
return response.json()
except requests.exceptions.RequestException as e:
return {"error": str(e)}
# Streamlit App UI
st.set_page_config(page_title="Receipt Analyzer", page_icon="🧾", layout="wide")
st.title("🧾 Receipt Cost Breakdown (Qwen 3-VL-8B)")
st.markdown("Upload a receipt image to get a JSON breakdown of costs.")
# Sidebar for configuration
with st.sidebar:
st.header("⚙️ Configuration")
# API Key Input for User Inference
st.subheader("API Access")
api_key = st.text_input("OpenRouter API Key", type="password", help="Enter your OpenRouter API Key here.")
if not api_key:
st.warning("Please enter your API Key to proceed.")
st.divider()
# User-friendly schema builder - PRIORITY 1
# st.markdown("Define what to extract from the receipt.") # Removed to save space
st.subheader("Fields to Extract")
default_fields = ["Merchant Name", "Total Amount", "Currency", "Date"]
available_fields = ["Merchant Name", "Total Amount", "Currency", "Date", "Tax/VAT", "Address", "Time", "Payment Method"]
selected_fields = st.multiselect(
"Select fields:",
options=available_fields,
default=default_fields,
help="Leave empty to extract **ALL** available information automatically."
)
if not selected_fields:
st.caption("✅ *No fields selected. The model will extract everything it finds.*")
extract_line_items = st.checkbox("Extract Line Items (Name & Price)", value=True)
st.divider()
# Custom instructions - PRIORITY 2
custom_instructions = st.text_input(
"Custom Instructions (Optional)",
placeholder="e.g., Extract the cashier name",
help="Add any specific data points or rules not covered above."
)
st.divider()
# Model Indicator - MOVED TO BOTTOM
with st.expander("ℹ️ About the Model", expanded=False):
st.info(
"**Qwen 3-VL-8B**\n\n"
"This is an open-source model efficient enough to run locally on consumer hardware."
)
# Construct the prompt dynamically
if not selected_fields:
# User selected nothing -> Extract all
prompt_text = "Analyze this receipt image. Extract **all** visible information including merchant details, dates, totals, taxes, and address in a structured JSON format."
else:
# User selected specific fields
field_str = ", ".join(selected_fields)
prompt_text = f"Analyze this receipt image. Extract the following information in JSON format: {field_str}."
if extract_line_items:
prompt_text += " Also include a detailed list of 'items' containing 'name' and 'price'."
if custom_instructions:
prompt_text += f" Additionally: {custom_instructions}."
# Enforce JSON structure
prompt_text += " Return a single valid JSON object. Do not include markdown formatting."
# Store in variable to match existing function call
custom_prompt = prompt_text
uploaded_file = st.file_uploader("Choose a receipt image...", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
col1, col2 = st.columns(2)
with col1:
# Display the uploaded image
st.image(uploaded_file, caption="Uploaded Receipt", use_column_width=True)
analyze = st.button("Analyze Receipt", type="primary", use_container_width=True)
with col2:
if analyze:
if not api_key:
st.error("Please enter an API Key in the sidebar.")
else:
with st.spinner("Analyzing receipt..."):
# Encode image
base64_image = encode_image(uploaded_file)
# Call API
api_result = analyze_receipt(base64_image, custom_prompt, api_key)
# Handle response
if "error" in api_result:
st.error(f"Error calling API: {api_result['error']}")
elif "choices" in api_result:
content = api_result["choices"][0]["message"]["content"]
with st.expander("🔍 Raw Analysis Output"):
st.code(content, language="json")
# Try to clean and parse JSON if markdown code blocks are used
try:
# Clean up code blocks if present
json_str = content.replace("```json", "").replace("```", "").strip()
parsed_json = json.loads(json_str)
st.success("Analysis Complete!")
st.subheader("Structured Data")
st.json(parsed_json)
# Optional: Display as a nice table
if "items" in parsed_json and isinstance(parsed_json["items"], list):
st.subheader("Itemized Breakdown")
st.dataframe(parsed_json["items"], use_container_width=True)
# Display other top-level keys as metrics if simple
for key, value in parsed_json.items():
if key != "items" and isinstance(value, (int, float, str)):
st.metric(key.title(), value)
except json.JSONDecodeError:
st.warning("Could not parse the response as JSON. See the raw output above.")
else:
st.error("Unexpected response format from API.")
st.json(api_result)