Spaces:
Sleeping
Sleeping
File size: 4,835 Bytes
88c69e1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | import gradio as gr
import spaces
import torch
import json
import re
import urllib.request
import os
from transformers import AutoProcessor, AutoModelForMultimodalLM
MODEL_ID = "google/gemma-4-E2B-it"
print(f"Loading {MODEL_ID}...")
processor = AutoProcessor.from_pretrained(MODEL_ID)
model = AutoModelForMultimodalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto"
)
print("Model loaded successfully.")
os.makedirs("sample_data", exist_ok=True)
SAMPLE_IMAGE = "sample_data/car_damage.jpg"
SAMPLE_AUDIO = "sample_data/driver_statement.wav"
if not os.path.exists(SAMPLE_IMAGE):
urllib.request.urlretrieve(
"https://www.driving.org/wp-content/uploads/2023/11/driver-hand-examining-dented-car-with-damaged-fend-2023-07-17-20-53-56-utc-e1699944140557.jpg",
SAMPLE_IMAGE
)
if not os.path.exists(SAMPLE_AUDIO):
urllib.request.urlretrieve(
"https://raw.githubusercontent.com/google-gemma/cookbook/refs/heads/main/apps/sample-data/journal1.wav",
SAMPLE_AUDIO
)
@spaces.GPU
def process_insurance_claim(image_path, audio_path):
if not image_path or not audio_path:
return {"error": "Both an image of the damage and an audio statement are required."}
system_prompt = """You are an expert AI Auto Insurance Claim Adjuster.
Your task is to analyze the provided image of vehicle damage and the audio statement from the driver.
Cross-reference the audio description with the visual evidence.
You must output ONLY a valid JSON object. Do not include markdown formatting like ```json.
The JSON must strictly follow this schema:
{
"damage_severity": "Low|Medium|High|Total Loss",
"affected_parts": ["list", "of", "damaged", "car", "parts"],
"driver_statement_summary": "Short 1-sentence summary of the audio transcript",
"consistency_check": "Match|Mismatch",
"flagged_for_review": true|false,
"reasoning": "Brief explanation of why it matches or doesn't match the visual evidence."
}"""
messages = [
{
"role": "system",
"content": [
{"type": "text", "text": system_prompt}
]
},
{
"role": "user",
"content": [
{"type": "image", "url": image_path},
{"type": "audio", "audio": audio_path},
{"type": "text", "text": "Analyze this insurance claim and output the JSON report."}
]
}
]
inputs = processor.apply_chat_template(
messages,
tokenize=True,
return_dict=True,
return_tensors="pt",
add_generation_prompt=True,
enable_thinking=False
).to(model.device)
input_len = inputs["input_ids"].shape[-1]
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.2,
top_p=0.95,
top_k=64
)
response = processor.decode(outputs[0][input_len:], skip_special_tokens=True)
clean_response = re.sub(r"^```(?:json)?\s*", "", response).strip()
clean_response = re.sub(r"\s*```$", "", clean_response).strip()
try:
json_output = json.loads(clean_response)
return json_output
except json.JSONDecodeError:
return {
"error": "Failed to parse JSON output.",
"raw_output": response
}
css = """
#component-0 { max-width: 900px; margin: auto; }
.gr-button { background-color: #2563eb !important; color: white !important; }
"""
with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
gr.Markdown(
"""
# 🚗 AI Auto Claim Adjuster (Gemma 4 E2B)
Upload a photo of the vehicle damage alongside an audio statement from the driver describing the incident.
Gemma 4 E2B natively processes **both the audio wave and the image** simultaneously, transcribing the story, analyzing the visual damage, and outputting a structured JSON claim adjustment report.
"""
)
with gr.Row():
with gr.Column(scale=1):
img_input = gr.Image(type="filepath", label="1. Upload Vehicle Damage Image")
audio_input = gr.Audio(type="filepath", label="2. Upload Driver Audio Statement")
submit_btn = gr.Button("Generate Claim Report", size="lg")
with gr.Column(scale=1):
json_output = gr.JSON(label="Structured Claim JSON Output")
submit_btn.click(
fn=process_insurance_claim,
inputs=[img_input, audio_input],
outputs=[json_output]
)
gr.Examples(
examples=[[SAMPLE_IMAGE, SAMPLE_AUDIO]],
inputs=[img_input, audio_input],
outputs=[json_output],
fn=process_insurance_claim,
cache_examples=False,
label="Try Demo Example"
)
if __name__ == "__main__":
demo.launch() |