Spaces:

haharvs
/

Qwen-3-VL-8B_OCR_receipts

Running

File size: 7,272 Bytes

106ab05
df03a2f
 
 
106ab05
df03a2f

import streamlit as st
import requests
import json
import base64

# Configuration
API_URL = "https://openrouter.ai/api/v1/chat/completions"

def encode_image(uploaded_file):
    """Encodes the uploaded file to base64."""
    bytes_data = uploaded_file.getvalue()
    return base64.b64encode(bytes_data).decode('utf-8')

def analyze_receipt(base64_image, prompt_text, api_key):
    """Sends the image to OpenRouter API for analysis."""
    if not api_key:
        return {"error": "API Key is missing."}
        
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json"
    }

    data_url = f"data:image/jpeg;base64,{base64_image}"

    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": prompt_text
                },
                {
                    "type": "image_url",
                    "image_url": {
                        "url": data_url
                    }
                }
            ]
        }
    ]

    payload = {
        "model": "qwen/qwen3-vl-8b-instruct",
        "messages": messages
    }

    try:
        response = requests.post(API_URL, headers=headers, json=payload)
        response.raise_for_status() # Raise an error for bad status codes
        return response.json()
    except requests.exceptions.RequestException as e:
        return {"error": str(e)}

# Streamlit App UI
st.set_page_config(page_title="Receipt Analyzer", page_icon="🧾", layout="wide")

st.title("🧾 Receipt Cost Breakdown (Qwen 3-VL-8B)")
st.markdown("Upload a receipt image to get a JSON breakdown of costs.")

# Sidebar for configuration
with st.sidebar:
    st.header("⚙️ Configuration")
    
    # API Key Input for User Inference
    st.subheader("API Access")
    api_key = st.text_input("OpenRouter API Key", type="password", help="Enter your OpenRouter API Key here.")
    if not api_key:
        st.warning("Please enter your API Key to proceed.")
    
    st.divider()

    # User-friendly schema builder - PRIORITY 1
    # st.markdown("Define what to extract from the receipt.") # Removed to save space
    st.subheader("Fields to Extract")
    
    default_fields = ["Merchant Name", "Total Amount", "Currency", "Date"]
    available_fields = ["Merchant Name", "Total Amount", "Currency", "Date", "Tax/VAT", "Address", "Time", "Payment Method"]
    
    selected_fields = st.multiselect(
        "Select fields:",
        options=available_fields,
        default=default_fields,
        help="Leave empty to extract **ALL** available information automatically."
    )
    
    if not selected_fields:
        st.caption("✅ *No fields selected. The model will extract everything it finds.*")
    
    extract_line_items = st.checkbox("Extract Line Items (Name & Price)", value=True)
    
    st.divider()
    
    # Custom instructions - PRIORITY 2
    custom_instructions = st.text_input(
        "Custom Instructions (Optional)",
        placeholder="e.g., Extract the cashier name",
        help="Add any specific data points or rules not covered above."
    )

    st.divider()

    # Model Indicator - MOVED TO BOTTOM
    with st.expander("ℹ️ About the Model", expanded=False):
        st.info(
            "**Qwen 3-VL-8B**\n\n"
            "This is an open-source model efficient enough to run locally on consumer hardware."
        )

    # Construct the prompt dynamically
    if not selected_fields:
        # User selected nothing -> Extract all
        prompt_text = "Analyze this receipt image. Extract **all** visible information including merchant details, dates, totals, taxes, and address in a structured JSON format."
    else:
        # User selected specific fields
        field_str = ", ".join(selected_fields)
        prompt_text = f"Analyze this receipt image. Extract the following information in JSON format: {field_str}."
    
    if extract_line_items:
        prompt_text += " Also include a detailed list of 'items' containing 'name' and 'price'."
        
    if custom_instructions:
        prompt_text += f" Additionally: {custom_instructions}."
    
    # Enforce JSON structure
    prompt_text += " Return a single valid JSON object. Do not include markdown formatting."
    
    # Store in variable to match existing function call
    custom_prompt = prompt_text

uploaded_file = st.file_uploader("Choose a receipt image...", type=["jpg", "jpeg", "png"])

if uploaded_file is not None:
    col1, col2 = st.columns(2)
    
    with col1:
        # Display the uploaded image
        st.image(uploaded_file, caption="Uploaded Receipt", use_column_width=True)
        analyze = st.button("Analyze Receipt", type="primary", use_container_width=True)

    with col2:
        if analyze:
            if not api_key:
                st.error("Please enter an API Key in the sidebar.")
            else:
                with st.spinner("Analyzing receipt..."):
                    # Encode image
                    base64_image = encode_image(uploaded_file)
                    
                    # Call API
                    api_result = analyze_receipt(base64_image, custom_prompt, api_key)
                    
                    # Handle response
                    if "error" in api_result:
                        st.error(f"Error calling API: {api_result['error']}")
                    elif "choices" in api_result:
                        content = api_result["choices"][0]["message"]["content"]
                        
                        with st.expander("🔍 Raw Analysis Output"):
                            st.code(content, language="json")
                        
                        # Try to clean and parse JSON if markdown code blocks are used
                        try:
                            # Clean up code blocks if present
                            json_str = content.replace("```json", "").replace("```", "").strip()
                            parsed_json = json.loads(json_str)
                            
                            st.success("Analysis Complete!")
                            st.subheader("Structured Data")
                            st.json(parsed_json)
                            
                            # Optional: Display as a nice table
                            if "items" in parsed_json and isinstance(parsed_json["items"], list):
                                st.subheader("Itemized Breakdown")
                                st.dataframe(parsed_json["items"], use_container_width=True)
                            
                            # Display other top-level keys as metrics if simple
                            for key, value in parsed_json.items():
                                if key != "items" and isinstance(value, (int, float, str)):
                                    st.metric(key.title(), value)
                                
                        except json.JSONDecodeError:
                            st.warning("Could not parse the response as JSON. See the raw output above.")
                    else:
                        st.error("Unexpected response format from API.")
                        st.json(api_result)