Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import spaces | |
| import torch | |
| import json | |
| import re | |
| import urllib.request | |
| import os | |
| from transformers import AutoProcessor, AutoModelForMultimodalLM | |
| MODEL_ID = "google/gemma-4-E2B-it" | |
| print(f"Loading {MODEL_ID}...") | |
| processor = AutoProcessor.from_pretrained(MODEL_ID) | |
| model = AutoModelForMultimodalLM.from_pretrained( | |
| MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto" | |
| ) | |
| print("Model loaded successfully.") | |
| os.makedirs("sample_data", exist_ok=True) | |
| SAMPLE_IMAGE = "sample_data/car_damage.jpg" | |
| SAMPLE_AUDIO = "sample_data/driver_statement.wav" | |
| if not os.path.exists(SAMPLE_IMAGE): | |
| urllib.request.urlretrieve( | |
| "https://www.driving.org/wp-content/uploads/2023/11/driver-hand-examining-dented-car-with-damaged-fend-2023-07-17-20-53-56-utc-e1699944140557.jpg", | |
| SAMPLE_IMAGE | |
| ) | |
| if not os.path.exists(SAMPLE_AUDIO): | |
| urllib.request.urlretrieve( | |
| "https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/apps/sample-data/journal1.wav", | |
| SAMPLE_AUDIO | |
| ) | |
| def process_insurance_claim(image_path, audio_path): | |
| if not image_path or not audio_path: | |
| return {"error": "Both an image of the damage and an audio statement are required."} | |
| system_prompt = """You are an expert AI Auto Insurance Claim Adjuster. | |
| Your task is to analyze the provided image of vehicle damage and the audio statement from the driver. | |
| Cross-reference the audio description with the visual evidence. | |
| You must output ONLY a valid JSON object. Do not include markdown formatting like ```json. | |
| The JSON must strictly follow this schema: | |
| { | |
| "damage_severity": "Low|Medium|High|Total Loss", | |
| "affected_parts": ["list", "of", "damaged", "car", "parts"], | |
| "driver_statement_summary": "Short 1-sentence summary of the audio transcript", | |
| "consistency_check": "Match|Mismatch", | |
| "flagged_for_review": true|false, | |
| "reasoning": "Brief explanation of why it matches or doesn't match the visual evidence." | |
| }""" | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": [ | |
| {"type": "text", "text": system_prompt} | |
| ] | |
| }, | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "image", "url": image_path}, | |
| {"type": "audio", "audio": audio_path}, | |
| {"type": "text", "text": "Analyze this insurance claim and output the JSON report."} | |
| ] | |
| } | |
| ] | |
| inputs = processor.apply_chat_template( | |
| messages, | |
| tokenize=True, | |
| return_dict=True, | |
| return_tensors="pt", | |
| add_generation_prompt=True, | |
| enable_thinking=False | |
| ).to(model.device) | |
| input_len = inputs["input_ids"].shape[-1] | |
| outputs = model.generate( | |
| **inputs, | |
| max_new_tokens=512, | |
| temperature=0.2, | |
| top_p=0.95, | |
| top_k=64 | |
| ) | |
| response = processor.decode(outputs[0][input_len:], skip_special_tokens=True) | |
| clean_response = re.sub(r"^```(?:json)?\s*", "", response).strip() | |
| clean_response = re.sub(r"\s*```$", "", clean_response).strip() | |
| try: | |
| json_output = json.loads(clean_response) | |
| return json_output | |
| except json.JSONDecodeError: | |
| return { | |
| "error": "Failed to parse JSON output.", | |
| "raw_output": response | |
| } | |
| css = """ | |
| #component-0 { max-width: 900px; margin: auto; } | |
| .gr-button { background-color: #2563eb !important; color: white !important; } | |
| """ | |
| with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo: | |
| gr.Markdown( | |
| """ | |
| # 🚗 AI Auto Claim Adjuster (Gemma 4 E2B) | |
| Upload a photo of the vehicle damage alongside an audio statement from the driver describing the incident. | |
| Gemma 4 E2B natively processes **both the audio wave and the image** simultaneously, transcribing the story, analyzing the visual damage, and outputting a structured JSON claim adjustment report. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| img_input = gr.Image(type="filepath", label="1. Upload Vehicle Damage Image") | |
| audio_input = gr.Audio(type="filepath", label="2. Upload Driver Audio Statement") | |
| submit_btn = gr.Button("Generate Claim Report", size="lg") | |
| with gr.Column(scale=1): | |
| json_output = gr.JSON(label="Structured Claim JSON Output") | |
| submit_btn.click( | |
| fn=process_insurance_claim, | |
| inputs=[img_input, audio_input], | |
| outputs=[json_output] | |
| ) | |
| gr.Examples( | |
| examples=[[SAMPLE_IMAGE, SAMPLE_AUDIO]], | |
| inputs=[img_input, audio_input], | |
| outputs=[json_output], | |
| fn=process_insurance_claim, | |
| cache_examples=False, | |
| label="Try Demo Example" | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |