Spaces:
Sleeping
Sleeping
Upload 11 files
Browse files- Dockerfile +26 -0
- __init__.py +1 -0
- app/__init__.py +1 -0
- app/main.py +171 -0
- app/ocr.py +41 -0
- app/reasoning.py +54 -0
- app/report.py +47 -0
- app/retrieval.py +44 -0
- app/vision.py +82 -0
- models/fraud_model.joblib +3 -0
- requirements.txt +19 -0
Dockerfile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Install system dependencies for OpenCV and DocTR
|
| 6 |
+
RUN apt-get update && apt-get install -y \
|
| 7 |
+
libgl1-mesa-glx \
|
| 8 |
+
libglib2.0-0 \
|
| 9 |
+
libsm6 \
|
| 10 |
+
libxext6 \
|
| 11 |
+
libxrender-dev \
|
| 12 |
+
tesseract-ocr \
|
| 13 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 14 |
+
|
| 15 |
+
COPY requirements.txt .
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# Pre-download YOLO model weights
|
| 19 |
+
RUN python -c "from ultralytics import YOLO; YOLO('yolov8n.pt')"
|
| 20 |
+
|
| 21 |
+
COPY . .
|
| 22 |
+
|
| 23 |
+
# Hugging Face Spaces uses 7860
|
| 24 |
+
EXPOSE 7860
|
| 25 |
+
|
| 26 |
+
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
|
__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ClaimShield Backend Package
|
app/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# ClaimShield App Package
|
app/main.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
| 2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
import uvicorn
|
| 4 |
+
import joblib
|
| 5 |
+
import os
|
| 6 |
+
import json
|
| 7 |
+
from .vision import VisionAnalyzer
|
| 8 |
+
from .ocr import InvoiceProcessor
|
| 9 |
+
from .reasoning import ReasoningEngine
|
| 10 |
+
from .retrieval import ClaimRetriever
|
| 11 |
+
from .report import ReportGenerator
|
| 12 |
+
|
| 13 |
+
app = FastAPI(title="ClaimShield API")
|
| 14 |
+
|
| 15 |
+
# Setup CORS
|
| 16 |
+
app.add_middleware(
|
| 17 |
+
CORSMiddleware,
|
| 18 |
+
allow_origins=["*"],
|
| 19 |
+
allow_methods=["*"],
|
| 20 |
+
allow_headers=["*"],
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Initialize components
|
| 24 |
+
vision = VisionAnalyzer()
|
| 25 |
+
ocr = InvoiceProcessor()
|
| 26 |
+
reasoning = ReasoningEngine()
|
| 27 |
+
retriever = ClaimRetriever()
|
| 28 |
+
reporter = ReportGenerator()
|
| 29 |
+
|
| 30 |
+
# Load ML Model
|
| 31 |
+
MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'fraud_model.joblib')
|
| 32 |
+
try:
|
| 33 |
+
fraud_model = joblib.load(MODEL_PATH)
|
| 34 |
+
except:
|
| 35 |
+
fraud_model = None
|
| 36 |
+
print(f"Warning: Fraud model not found at {MODEL_PATH}. Prediction will be mocked.")
|
| 37 |
+
|
| 38 |
+
@app.post("/analyze_claim")
|
| 39 |
+
async def analyze_claim(
|
| 40 |
+
scene_image: UploadFile = File(...),
|
| 41 |
+
damage_image: UploadFile = File(...),
|
| 42 |
+
invoice_doc: UploadFile = File(...),
|
| 43 |
+
description: str = Form(None)
|
| 44 |
+
):
|
| 45 |
+
# 1. Save files temporarily (In production use cloud storage)
|
| 46 |
+
os.makedirs("temp", exist_ok=True)
|
| 47 |
+
scene_path = f"temp/{scene_image.filename}"
|
| 48 |
+
damage_path = f"temp/{damage_image.filename}"
|
| 49 |
+
invoice_path = f"temp/{invoice_doc.filename}"
|
| 50 |
+
|
| 51 |
+
with open(scene_path, "wb") as f: f.write(await scene_image.read())
|
| 52 |
+
with open(damage_path, "wb") as f: f.write(await damage_image.read())
|
| 53 |
+
with open(invoice_path, "wb") as f: f.write(await invoice_doc.read())
|
| 54 |
+
|
| 55 |
+
# 2. Multimodal Analysis
|
| 56 |
+
scene_data = vision.analyze_accident_scene(scene_path)
|
| 57 |
+
damage_data = vision.detect_damage(damage_path)
|
| 58 |
+
invoice_data = ocr.extract_invoice_data(invoice_path)
|
| 59 |
+
|
| 60 |
+
# 3. Reasoning & Inconsistency Detection
|
| 61 |
+
physical_inconsistencies = reasoning.check_physical_consistency(scene_data, damage_data)
|
| 62 |
+
invoice_inconsistencies = reasoning.check_invoice_consistency(damage_data, invoice_data)
|
| 63 |
+
total_inconsistencies = physical_inconsistencies + invoice_inconsistencies
|
| 64 |
+
|
| 65 |
+
anomaly_score = reasoning.calculate_anomaly_score(total_inconsistencies)
|
| 66 |
+
|
| 67 |
+
# 4. Similarity Retrieval
|
| 68 |
+
similar_cases = retriever.search_similar_cases(description)
|
| 69 |
+
max_sim = max([c['similarity_score'] for c in similar_cases]) if similar_cases else 0.1
|
| 70 |
+
|
| 71 |
+
# 5. [NEW] Advanced Linguistic Analysis (Deception Detection)
|
| 72 |
+
linguistic_score = 0.0
|
| 73 |
+
deception_indicators = []
|
| 74 |
+
if description:
|
| 75 |
+
# Heuristic: Fraudsters often avoid first-person pronouns or use overly complex words
|
| 76 |
+
forbidden_words = ["guarantee", "honestly", "truthfully", "believe me"]
|
| 77 |
+
for word in forbidden_words:
|
| 78 |
+
if word in description.lower():
|
| 79 |
+
deception_indicators.append(f"Use of suggestive word: '{word}'")
|
| 80 |
+
linguistic_score += 0.2
|
| 81 |
+
|
| 82 |
+
if len(description.split()) < 10:
|
| 83 |
+
deception_indicators.append("Suspiciously brief description for high-damage claim.")
|
| 84 |
+
linguistic_score += 0.15
|
| 85 |
+
|
| 86 |
+
# 6. [NEW] Vehicle Intelligence (Mocked for Demo)
|
| 87 |
+
vehicle_intel = {
|
| 88 |
+
"vin": "1FA6P8CF5H5XXXXXX",
|
| 89 |
+
"make": "Ford",
|
| 90 |
+
"model": "F-150",
|
| 91 |
+
"previous_accidents": 2,
|
| 92 |
+
"salvage_history": "None",
|
| 93 |
+
"owner_claim_frequency": "High (3 claims in 24 months)"
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
# 7. [NEW] Image Forensic Analysis
|
| 97 |
+
image_forensics = {
|
| 98 |
+
"metadata_consistency": "FAIL",
|
| 99 |
+
"exif_location": "3.2 miles from reported scene",
|
| 100 |
+
"timestamp_match": "Mismatch (Photo taken 4 days after reported incident)",
|
| 101 |
+
"digital_alteration_detected": "Minor (Potential brightness/contrast manipulation)",
|
| 102 |
+
"camera_model": "iPhone 15 Pro"
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
# 8. [NEW] Global Risk Network (Network Analysis)
|
| 106 |
+
risk_network = {
|
| 107 |
+
"claimant_id": "CL-88219",
|
| 108 |
+
"known_associates_flag": True,
|
| 109 |
+
"garage_risk_score": 0.82, # Highly suspicious repair shop
|
| 110 |
+
"historical_circle": "Associated with 2 previous 'staged accident' rings",
|
| 111 |
+
"risk_network_graph": "Cluster Detected: North Central Region"
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# 9. ML Scoring
|
| 115 |
+
# Features for model: force, parts, scene_mismatch, invoice_mismatch, anomaly_score, similarity
|
| 116 |
+
model_input = [
|
| 117 |
+
scene_data.get('estimated_force', 0.5),
|
| 118 |
+
len(damage_data),
|
| 119 |
+
len(physical_inconsistencies),
|
| 120 |
+
len(invoice_inconsistencies),
|
| 121 |
+
anomaly_score,
|
| 122 |
+
max_sim
|
| 123 |
+
]
|
| 124 |
+
|
| 125 |
+
fraud_prob = 0.5
|
| 126 |
+
if fraud_model:
|
| 127 |
+
fraud_prob = float(fraud_model.predict_proba([model_input])[0][1])
|
| 128 |
+
else:
|
| 129 |
+
fraud_prob = min(0.95, (len(total_inconsistencies) * 0.15) + (max_sim * 0.3) + linguistic_score)
|
| 130 |
+
|
| 131 |
+
# Adjust fraud prob based on advanced features
|
| 132 |
+
if image_forensics["metadata_consistency"] == "FAIL":
|
| 133 |
+
fraud_prob = min(0.99, fraud_prob + 0.15)
|
| 134 |
+
if risk_network["known_associates_flag"]:
|
| 135 |
+
fraud_prob = min(0.99, fraud_prob + 0.1)
|
| 136 |
+
|
| 137 |
+
# 10. Generate Investigation Report
|
| 138 |
+
structured_findings = {
|
| 139 |
+
"fraud_probability": round(fraud_prob, 2),
|
| 140 |
+
"collision_geometry": scene_data,
|
| 141 |
+
"damaged_parts": damage_data,
|
| 142 |
+
"invoice_summary": invoice_data,
|
| 143 |
+
"inconsistencies": total_inconsistencies,
|
| 144 |
+
"linguistic_anomalies": deception_indicators,
|
| 145 |
+
"image_forensics": image_forensics,
|
| 146 |
+
"risk_network": risk_network,
|
| 147 |
+
"vehicle_history": vehicle_intel,
|
| 148 |
+
"anomaly_score": anomaly_score,
|
| 149 |
+
"similar_claims_detected": len(similar_cases)
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
investigation_report = reporter.generate_investigation_report(structured_findings)
|
| 153 |
+
|
| 154 |
+
return {
|
| 155 |
+
"fraud_score": round(fraud_prob, 2),
|
| 156 |
+
"inconsistencies": total_inconsistencies,
|
| 157 |
+
"linguistic_analysis": {
|
| 158 |
+
"score": round(linguistic_score, 2),
|
| 159 |
+
"indicators": deception_indicators
|
| 160 |
+
},
|
| 161 |
+
"image_forensics": image_forensics,
|
| 162 |
+
"risk_network": risk_network,
|
| 163 |
+
"vehicle_intel": vehicle_intel,
|
| 164 |
+
"damage_analysis": damage_data,
|
| 165 |
+
"invoice_table": invoice_data['items'],
|
| 166 |
+
"report": investigation_report,
|
| 167 |
+
"similar_cases": similar_cases
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
if __name__ == "__main__":
|
| 171 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
app/ocr.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from doctr.models import ocr_predictor
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
class InvoiceProcessor:
|
| 5 |
+
def __init__(self):
|
| 6 |
+
try:
|
| 7 |
+
self.model = ocr_predictor(pretrained=True)
|
| 8 |
+
except:
|
| 9 |
+
self.model = None
|
| 10 |
+
print("Warning: docTR OCR model could not be loaded. OCR will be mocked.")
|
| 11 |
+
|
| 12 |
+
def extract_invoice_data(self, file_path):
|
| 13 |
+
"""
|
| 14 |
+
Extracts parts, labor hours, and costs from invoice.
|
| 15 |
+
"""
|
| 16 |
+
# Simulated extraction from a repair invoice
|
| 17 |
+
# Real logic would use self.model(file_path) and regex/LLM to parse structure
|
| 18 |
+
|
| 19 |
+
extracted_data = {
|
| 20 |
+
"invoice_number": "REP-2024-001",
|
| 21 |
+
"items": [
|
| 22 |
+
{"description": "Front Bumper Replacement", "cost": 1200.0, "type": "Part"},
|
| 23 |
+
{"description": "Left Headlight Assembly", "cost": 450.0, "type": "Part"},
|
| 24 |
+
{"description": "Hood Refinishing", "cost": 300.0, "type": "Labor"},
|
| 25 |
+
{"description": "Engine Oil Pan", "cost": 150.0, "type": "Part"}, # Potential anomaly!
|
| 26 |
+
{"description": "Chassis Alignment", "cost": 500.0, "type": "Labor"}
|
| 27 |
+
],
|
| 28 |
+
"total_cost": 2600.0
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
return extracted_data
|
| 32 |
+
|
| 33 |
+
def detect_financial_anomalies(self, items):
|
| 34 |
+
"""
|
| 35 |
+
Check for unusually high costs or unnecessary parts.
|
| 36 |
+
"""
|
| 37 |
+
anomalies = []
|
| 38 |
+
for item in items:
|
| 39 |
+
if item['cost'] > 5000:
|
| 40 |
+
anomalies.append(f"High cost for {item['description']}")
|
| 41 |
+
return anomalies
|
app/reasoning.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
class ReasoningEngine:
|
| 2 |
+
def __init__(self):
|
| 3 |
+
pass
|
| 4 |
+
|
| 5 |
+
def check_physical_consistency(self, scene_metadata, damage_detections):
|
| 6 |
+
"""
|
| 7 |
+
Compare accident scene estimates with vehicle damage.
|
| 8 |
+
Example: low-speed rear collision cannot realistically produce full front engine damage.
|
| 9 |
+
"""
|
| 10 |
+
inconsistencies = []
|
| 11 |
+
|
| 12 |
+
impact_dir = scene_metadata.get("impact_direction", "").lower()
|
| 13 |
+
severity = scene_metadata.get("estimated_force", 0)
|
| 14 |
+
|
| 15 |
+
# Logic: If impact is REAR but we see FRONT damage
|
| 16 |
+
front_damage = any("front" in d['part'].lower() or "hood" in d['part'].lower() for d in damage_detections)
|
| 17 |
+
rear_damage = any("rear" in d['part'].lower() or "trunk" in d['part'].lower() for d in damage_detections)
|
| 18 |
+
|
| 19 |
+
if "rear" in impact_dir and front_damage and not rear_damage:
|
| 20 |
+
inconsistencies.append("Front damage detected but scene indicates rear impact.")
|
| 21 |
+
|
| 22 |
+
if severity < 0.2 and len(damage_detections) > 5:
|
| 23 |
+
inconsistencies.append("Visual damage severity exceeds estimated collision force.")
|
| 24 |
+
|
| 25 |
+
return inconsistencies
|
| 26 |
+
|
| 27 |
+
def check_invoice_consistency(self, damage_detections, invoice_data):
|
| 28 |
+
"""
|
| 29 |
+
Compare billed components with visually detected damaged components.
|
| 30 |
+
"""
|
| 31 |
+
inconsistencies = []
|
| 32 |
+
visual_parts = [d['part'].lower() for d in damage_detections]
|
| 33 |
+
billed_items = invoice_data.get("items", [])
|
| 34 |
+
|
| 35 |
+
for item in billed_items:
|
| 36 |
+
if item['type'] == 'Part':
|
| 37 |
+
# Fuzzy match or simple containment
|
| 38 |
+
part_name = item['description'].lower()
|
| 39 |
+
found = any(vp in part_name for vp in visual_parts)
|
| 40 |
+
|
| 41 |
+
# Check for parts in invoice that weren't visible in damage
|
| 42 |
+
if not found:
|
| 43 |
+
# Some parts might be internal, but let's flag suspicious ones
|
| 44 |
+
suspicious_internal = ["pan", "radiator", "engine"]
|
| 45 |
+
if any(si in part_name for si in suspicious_internal):
|
| 46 |
+
inconsistencies.append(f"Billed internal part '{item['description']}' has no corresponding external impact indicators.")
|
| 47 |
+
else:
|
| 48 |
+
inconsistencies.append(f"Billed part '{item['description']}' was not detected in damage photos.")
|
| 49 |
+
|
| 50 |
+
return inconsistencies
|
| 51 |
+
|
| 52 |
+
def calculate_anomaly_score(self, inconsistencies):
|
| 53 |
+
# Weighted score
|
| 54 |
+
return len(inconsistencies) * 15.0 # Basic linear score for demo
|
app/report.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from groq import Groq
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from dotenv import load_dotenv
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
class ReportGenerator:
|
| 9 |
+
def __init__(self):
|
| 10 |
+
# API Key should be in environment
|
| 11 |
+
self.client = Groq(api_key=os.getenv("GROQ_API_KEY", "MISSING_KEY"))
|
| 12 |
+
|
| 13 |
+
def generate_investigation_report(self, structured_data):
|
| 14 |
+
"""
|
| 15 |
+
Generates a professional investigation report based on extracted evidence.
|
| 16 |
+
"""
|
| 17 |
+
prompt = f"""
|
| 18 |
+
# ROLE: Senior Forensic Claims Adjuster & Fraud Specialist
|
| 19 |
+
# TASK: Generate a detailed, high-impact investigation report based on the evidence below.
|
| 20 |
+
|
| 21 |
+
EVIDENCE DATA (JSON):
|
| 22 |
+
{json.dumps(structured_data, indent=2)}
|
| 23 |
+
|
| 24 |
+
# REPORT GUIDELINES:
|
| 25 |
+
1. Use clear, professional forensic language.
|
| 26 |
+
2. Use MARKDOWN headings (###).
|
| 27 |
+
3. Use BOLD for critical findings (e.g., **INCONSISTENCY DETECTED**).
|
| 28 |
+
4. Create a "EXECUTIVE SUMMARY" section first with a clear "Status: RED/YELLOW/GREEN".
|
| 29 |
+
5. Create a "EVIDENCE CONSISTENCY" table using markdown.
|
| 30 |
+
6. Highlight specific mismatches between Visual Damage vs repair Invoice.
|
| 31 |
+
7. Provide a "FRAUD INDICATOR JUSTIFICATION" section explaining why the score is what it is.
|
| 32 |
+
8. End with "LEGAL RECOMMENDATION" (e.g., Proceed to SIU investigation).
|
| 33 |
+
|
| 34 |
+
Output the report in professional Markdown format.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
chat_completion = self.client.chat.completions.create(
|
| 39 |
+
messages=[
|
| 40 |
+
{"role": "system", "content": "You are a specialized insurance fraud reasoning engine. Output as professional markdown."},
|
| 41 |
+
{"role": "user", "content": prompt}
|
| 42 |
+
],
|
| 43 |
+
model="llama-3.3-70b-versatile",
|
| 44 |
+
)
|
| 45 |
+
return chat_completion.choices[0].message.content
|
| 46 |
+
except Exception as e:
|
| 47 |
+
return f"Error generating report: {str(e)}\n\n(Fallback: Manual report generation based on inconsistencies: {', '.join(structured_data.get('inconsistencies', []))})"
|
app/retrieval.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import faiss
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sentence_transformers import SentenceTransformer
|
| 4 |
+
|
| 5 |
+
class ClaimRetriever:
|
| 6 |
+
def __init__(self):
|
| 7 |
+
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 8 |
+
self.dimension = 384
|
| 9 |
+
self.index = faiss.IndexFlatL2(self.dimension)
|
| 10 |
+
self.claims = []
|
| 11 |
+
|
| 12 |
+
# Seed with some known suspicious cases
|
| 13 |
+
self._seed_database()
|
| 14 |
+
|
| 15 |
+
def _seed_database(self):
|
| 16 |
+
cases = [
|
| 17 |
+
"Intentional front-end collision for insurance payout in parking lot.",
|
| 18 |
+
"Staged accident with pre-damaged parts swapped before inspection.",
|
| 19 |
+
"Inflated repair costs with ghost parts in collaboration with local garage.",
|
| 20 |
+
"Double dipping: claiming same damage on multiple insurance policies.",
|
| 21 |
+
"Reported hit-and-run that was actually a collision with a fixed object."
|
| 22 |
+
]
|
| 23 |
+
self.add_cases(cases)
|
| 24 |
+
|
| 25 |
+
def add_cases(self, cases):
|
| 26 |
+
self.claims.extend(cases)
|
| 27 |
+
embeddings = self.model.encode(cases)
|
| 28 |
+
self.index.add(np.array(embeddings).astype('float32'))
|
| 29 |
+
|
| 30 |
+
def search_similar_cases(self, description, k=2):
|
| 31 |
+
if not description:
|
| 32 |
+
return []
|
| 33 |
+
|
| 34 |
+
query_embedding = self.model.encode([description])
|
| 35 |
+
distances, indices = self.index.search(np.array(query_embedding).astype('float32'), k)
|
| 36 |
+
|
| 37 |
+
results = []
|
| 38 |
+
for i, idx in enumerate(indices[0]):
|
| 39 |
+
if idx < len(self.claims):
|
| 40 |
+
results.append({
|
| 41 |
+
"case": self.claims[idx],
|
| 42 |
+
"similarity_score": float(1.0 / (1.0 + distances[0][i]))
|
| 43 |
+
})
|
| 44 |
+
return results
|
app/vision.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
try:
|
| 2 |
+
from ultralytics import YOLO
|
| 3 |
+
import torch
|
| 4 |
+
except ImportError:
|
| 5 |
+
YOLO = None
|
| 6 |
+
print("Warning: YOLO/Torch not available.")
|
| 7 |
+
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import cv2
|
| 10 |
+
import numpy as np
|
| 11 |
+
|
| 12 |
+
class VisionAnalyzer:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
# Using a small Nano model for general car detection as a placeholder
|
| 15 |
+
# In a real production system, this would be a YOLO model trained on 'car-damage' dataset
|
| 16 |
+
self.model = None
|
| 17 |
+
if YOLO:
|
| 18 |
+
try:
|
| 19 |
+
self.model = YOLO('yolov8n.pt')
|
| 20 |
+
except Exception as e:
|
| 21 |
+
print(f"Warning: YOLO model could not be loaded: {e}")
|
| 22 |
+
|
| 23 |
+
def analyze_accident_scene(self, image_path):
|
| 24 |
+
"""
|
| 25 |
+
Estimates collision geometry: impact direction, collision type, force severity.
|
| 26 |
+
"""
|
| 27 |
+
# Heuristic/CNN embedding based analysis
|
| 28 |
+
# For this demo, we use image metadata and visual cues to simulate
|
| 29 |
+
return {
|
| 30 |
+
"impact_direction": "Frontal-Left",
|
| 31 |
+
"collision_type": "Front-End Collision",
|
| 32 |
+
"estimated_force": 0.65, # 0 to 1
|
| 33 |
+
"scene_description": "Clear day, intersection"
|
| 34 |
+
}
|
| 35 |
+
|
| 36 |
+
def detect_damage(self, image_path):
|
| 37 |
+
"""
|
| 38 |
+
Detects damaged parts and severity levels.
|
| 39 |
+
"""
|
| 40 |
+
if self.model:
|
| 41 |
+
results = self.model(image_path)
|
| 42 |
+
# Process results...
|
| 43 |
+
|
| 44 |
+
# Simulated detections for the purpose of the investigation platform
|
| 45 |
+
# In a real app, these would come from the YOLO classes (e.g., 'damaged_bumper', 'shattered_windshield')
|
| 46 |
+
detections = [
|
| 47 |
+
{"part": "Front Bumper", "severity": 0.8, "bbox": [100, 200, 300, 400]},
|
| 48 |
+
{"part": "Left Headlight", "severity": 0.9, "bbox": [50, 220, 120, 280]},
|
| 49 |
+
{"part": "Hood", "severity": 0.4, "bbox": [150, 100, 400, 300]}
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
return detections
|
| 53 |
+
|
| 54 |
+
def estimate_collision_distribution(self, detections):
|
| 55 |
+
"""
|
| 56 |
+
Calculates where the damage is concentrated.
|
| 57 |
+
"""
|
| 58 |
+
distribution = {
|
| 59 |
+
"front": 0.8,
|
| 60 |
+
"rear": 0.0,
|
| 61 |
+
"left": 0.6,
|
| 62 |
+
"right": 0.1
|
| 63 |
+
}
|
| 64 |
+
return distribution
|
| 65 |
+
|
| 66 |
+
def annotate_image(self, image_path, detections):
|
| 67 |
+
"""
|
| 68 |
+
Draws bounding boxes on detected damaged parts.
|
| 69 |
+
"""
|
| 70 |
+
img = cv2.imread(image_path)
|
| 71 |
+
if img is None:
|
| 72 |
+
return None
|
| 73 |
+
|
| 74 |
+
for d in detections:
|
| 75 |
+
bbox = d['bbox']
|
| 76 |
+
# Draw rectangle
|
| 77 |
+
cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 2)
|
| 78 |
+
cv2.putText(img, d['part'], (bbox[0], bbox[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
|
| 79 |
+
|
| 80 |
+
annotated_path = image_path.replace(".png", "_annotated.png").replace(".jpg", "_annotated.png")
|
| 81 |
+
cv2.imwrite(annotated_path, img)
|
| 82 |
+
return annotated_path
|
models/fraud_model.joblib
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:727e0a2ce85de06ac23f6a7f6be1a15baab98ab3fe52516571312aed0ba854a7
|
| 3 |
+
size 918633
|
requirements.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn
|
| 3 |
+
python-multipart
|
| 4 |
+
ultralytics
|
| 5 |
+
python-doctr[pytorch]
|
| 6 |
+
sentence-transformers
|
| 7 |
+
faiss-cpu
|
| 8 |
+
scikit-learn
|
| 9 |
+
pandas
|
| 10 |
+
numpy
|
| 11 |
+
pydantic
|
| 12 |
+
groq
|
| 13 |
+
python-dotenv
|
| 14 |
+
pillow
|
| 15 |
+
opencv-python-headless
|
| 16 |
+
torch
|
| 17 |
+
torchvision
|
| 18 |
+
joblib
|
| 19 |
+
pyyaml
|