import gradio as gr import spaces import torch import json import re import urllib.request import os from transformers import AutoProcessor, AutoModelForMultimodalLM MODEL_ID = "google/gemma-4-E2B-it" print(f"Loading {MODEL_ID}...") processor = AutoProcessor.from_pretrained(MODEL_ID) model = AutoModelForMultimodalLM.from_pretrained( MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto" ) print("Model loaded successfully.") os.makedirs("sample_data", exist_ok=True) SAMPLE_IMAGE = "sample_data/car_damage.jpg" SAMPLE_AUDIO = "sample_data/driver_statement.wav" if not os.path.exists(SAMPLE_IMAGE): urllib.request.urlretrieve( "https://www.driving.org/wp-content/uploads/2023/11/driver-hand-examining-dented-car-with-damaged-fend-2023-07-17-20-53-56-utc-e1699944140557.jpg", SAMPLE_IMAGE ) if not os.path.exists(SAMPLE_AUDIO): urllib.request.urlretrieve( "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/apps/sample-data/journal1.wav", SAMPLE_AUDIO ) @spaces.GPU def process_insurance_claim(image_path, audio_path): if not image_path or not audio_path: return {"error": "Both an image of the damage and an audio statement are required."} system_prompt = """You are an expert AI Auto Insurance Claim Adjuster. Your task is to analyze the provided image of vehicle damage and the audio statement from the driver. Cross-reference the audio description with the visual evidence. You must output ONLY a valid JSON object. Do not include markdown formatting like ```json. The JSON must strictly follow this schema: { "damage_severity": "Low|Medium|High|Total Loss", "affected_parts": ["list", "of", "damaged", "car", "parts"], "driver_statement_summary": "Short 1-sentence summary of the audio transcript", "consistency_check": "Match|Mismatch", "flagged_for_review": true|false, "reasoning": "Brief explanation of why it matches or doesn't match the visual evidence." }""" messages = [ { "role": "system", "content": [ {"type": "text", "text": system_prompt} ] }, { "role": "user", "content": [ {"type": "image", "url": image_path}, {"type": "audio", "audio": audio_path}, {"type": "text", "text": "Analyze this insurance claim and output the JSON report."} ] } ] inputs = processor.apply_chat_template( messages, tokenize=True, return_dict=True, return_tensors="pt", add_generation_prompt=True, enable_thinking=False ).to(model.device) input_len = inputs["input_ids"].shape[-1] outputs = model.generate( **inputs, max_new_tokens=512, temperature=0.2, top_p=0.95, top_k=64 ) response = processor.decode(outputs[0][input_len:], skip_special_tokens=True) clean_response = re.sub(r"^```(?:json)?\s*", "", response).strip() clean_response = re.sub(r"\s*```$", "", clean_response).strip() try: json_output = json.loads(clean_response) return json_output except json.JSONDecodeError: return { "error": "Failed to parse JSON output.", "raw_output": response } css = """ #component-0 { max-width: 900px; margin: auto; } .gr-button { background-color: #2563eb !important; color: white !important; } """ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: gr.Markdown( """ # 🚗 AI Auto Claim Adjuster (Gemma 4 E2B) Upload a photo of the vehicle damage alongside an audio statement from the driver describing the incident. Gemma 4 E2B natively processes **both the audio wave and the image** simultaneously, transcribing the story, analyzing the visual damage, and outputting a structured JSON claim adjustment report. """ ) with gr.Row(): with gr.Column(scale=1): img_input = gr.Image(type="filepath", label="1. Upload Vehicle Damage Image") audio_input = gr.Audio(type="filepath", label="2. Upload Driver Audio Statement") submit_btn = gr.Button("Generate Claim Report", size="lg") with gr.Column(scale=1): json_output = gr.JSON(label="Structured Claim JSON Output") submit_btn.click( fn=process_insurance_claim, inputs=[img_input, audio_input], outputs=[json_output] ) gr.Examples( examples=[[SAMPLE_IMAGE, SAMPLE_AUDIO]], inputs=[img_input, audio_input], outputs=[json_output], fn=process_insurance_claim, cache_examples=False, label="Try Demo Example" ) if __name__ == "__main__": demo.launch()