import gradio as gr
import spaces
import torch
import json
import re
import urllib.request
import os
from transformers import AutoProcessor, AutoModelForMultimodalLM

MODEL_ID = "google/gemma-4-E2B-it"

print(f"Loading {MODEL_ID}...")
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForMultimodalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16,
    device_map="auto"
)
print("Model loaded successfully.")

os.makedirs("sample_data", exist_ok=True)
SAMPLE_IMAGE = "sample_data/car_damage.jpg"
SAMPLE_AUDIO = "sample_data/driver_statement.wav"

if not os.path.exists(SAMPLE_IMAGE):
    urllib.request.urlretrieve(
        "https://www.driving.org/wp-content/uploads/2023/11/driver-hand-examining-dented-car-with-damaged-fend-2023-07-17-20-53-56-utc-e1699944140557.jpg",
        SAMPLE_IMAGE
    )

if not os.path.exists(SAMPLE_AUDIO):
    urllib.request.urlretrieve(
        "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/apps/sample-data/journal1.wav",
        SAMPLE_AUDIO
    )

@spaces.GPU
def process_insurance_claim(image_path, audio_path):
    if not image_path or not audio_path:
        return {"error": "Both an image of the damage and an audio statement are required."}

    system_prompt = """You are an expert AI Auto Insurance Claim Adjuster. 
Your task is to analyze the provided image of vehicle damage and the audio statement from the driver. 
Cross-reference the audio description with the visual evidence.
You must output ONLY a valid JSON object. Do not include markdown formatting like ```json.
The JSON must strictly follow this schema:
{
  "damage_severity": "Low|Medium|High|Total Loss",
  "affected_parts": ["list", "of", "damaged", "car", "parts"],
  "driver_statement_summary": "Short 1-sentence summary of the audio transcript",
  "consistency_check": "Match|Mismatch",
  "flagged_for_review": true|false,
  "reasoning": "Brief explanation of why it matches or doesn't match the visual evidence."
}"""

    messages = [
        {
            "role": "system", 
            "content": [
                {"type": "text", "text": system_prompt}
            ]
        },
        {
            "role": "user",
            "content": [
                {"type": "image", "url": image_path},
                {"type": "audio", "audio": audio_path},
                {"type": "text", "text": "Analyze this insurance claim and output the JSON report."}
            ]
        }
    ]

    inputs = processor.apply_chat_template(
        messages,
        tokenize=True,
        return_dict=True,
        return_tensors="pt",
        add_generation_prompt=True,
        enable_thinking=False
    ).to(model.device)
    
    input_len = inputs["input_ids"].shape[-1]

    outputs = model.generate(
        **inputs, 
        max_new_tokens=512,
        temperature=0.2,
        top_p=0.95,
        top_k=64
    )
    
    response = processor.decode(outputs[0][input_len:], skip_special_tokens=True)
    
    clean_response = re.sub(r"^```(?:json)?\s*", "", response).strip()
    clean_response = re.sub(r"\s*```$", "", clean_response).strip()
    
    try:
        json_output = json.loads(clean_response)
        return json_output
    except json.JSONDecodeError:
        return {
            "error": "Failed to parse JSON output.",
            "raw_output": response
        }

css = """
#component-0 { max-width: 900px; margin: auto; }
.gr-button { background-color: #2563eb !important; color: white !important; }
"""

with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
    gr.Markdown(
        """
        # 🚗 AI Auto Claim Adjuster (Gemma 4 E2B)
        Upload a photo of the vehicle damage alongside an audio statement from the driver describing the incident. 
        Gemma 4 E2B natively processes **both the audio wave and the image** simultaneously, transcribing the story, analyzing the visual damage, and outputting a structured JSON claim adjustment report.
        """
    )
    
    with gr.Row():
        with gr.Column(scale=1):
            img_input = gr.Image(type="filepath", label="1. Upload Vehicle Damage Image")
            audio_input = gr.Audio(type="filepath", label="2. Upload Driver Audio Statement")
            submit_btn = gr.Button("Generate Claim Report", size="lg")
            
        with gr.Column(scale=1):
            json_output = gr.JSON(label="Structured Claim JSON Output")

    submit_btn.click(
        fn=process_insurance_claim,
        inputs=[img_input, audio_input],
        outputs=[json_output]
    )
    
    gr.Examples(
        examples=[[SAMPLE_IMAGE, SAMPLE_AUDIO]],
        inputs=[img_input, audio_input],
        outputs=[json_output],
        fn=process_insurance_claim,
        cache_examples=False,
        label="Try Demo Example"
    )

if __name__ == "__main__":
    demo.launch()