kshitij230 commited on
Commit
13ae903
·
verified ·
1 Parent(s): eca5829

Upload 11 files

Browse files
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies for OpenCV and DocTR
6
+ RUN apt-get update && apt-get install -y \
7
+ libgl1-mesa-glx \
8
+ libglib2.0-0 \
9
+ libsm6 \
10
+ libxext6 \
11
+ libxrender-dev \
12
+ tesseract-ocr \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Pre-download YOLO model weights
19
+ RUN python -c "from ultralytics import YOLO; YOLO('yolov8n.pt')"
20
+
21
+ COPY . .
22
+
23
+ # Hugging Face Spaces uses 7860
24
+ EXPOSE 7860
25
+
26
+ CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]
__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ClaimShield Backend Package
app/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # ClaimShield App Package
app/main.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, UploadFile, File, Form
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ import uvicorn
4
+ import joblib
5
+ import os
6
+ import json
7
+ from .vision import VisionAnalyzer
8
+ from .ocr import InvoiceProcessor
9
+ from .reasoning import ReasoningEngine
10
+ from .retrieval import ClaimRetriever
11
+ from .report import ReportGenerator
12
+
13
+ app = FastAPI(title="ClaimShield API")
14
+
15
+ # Setup CORS
16
+ app.add_middleware(
17
+ CORSMiddleware,
18
+ allow_origins=["*"],
19
+ allow_methods=["*"],
20
+ allow_headers=["*"],
21
+ )
22
+
23
+ # Initialize components
24
+ vision = VisionAnalyzer()
25
+ ocr = InvoiceProcessor()
26
+ reasoning = ReasoningEngine()
27
+ retriever = ClaimRetriever()
28
+ reporter = ReportGenerator()
29
+
30
+ # Load ML Model
31
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'fraud_model.joblib')
32
+ try:
33
+ fraud_model = joblib.load(MODEL_PATH)
34
+ except:
35
+ fraud_model = None
36
+ print(f"Warning: Fraud model not found at {MODEL_PATH}. Prediction will be mocked.")
37
+
38
+ @app.post("/analyze_claim")
39
+ async def analyze_claim(
40
+ scene_image: UploadFile = File(...),
41
+ damage_image: UploadFile = File(...),
42
+ invoice_doc: UploadFile = File(...),
43
+ description: str = Form(None)
44
+ ):
45
+ # 1. Save files temporarily (In production use cloud storage)
46
+ os.makedirs("temp", exist_ok=True)
47
+ scene_path = f"temp/{scene_image.filename}"
48
+ damage_path = f"temp/{damage_image.filename}"
49
+ invoice_path = f"temp/{invoice_doc.filename}"
50
+
51
+ with open(scene_path, "wb") as f: f.write(await scene_image.read())
52
+ with open(damage_path, "wb") as f: f.write(await damage_image.read())
53
+ with open(invoice_path, "wb") as f: f.write(await invoice_doc.read())
54
+
55
+ # 2. Multimodal Analysis
56
+ scene_data = vision.analyze_accident_scene(scene_path)
57
+ damage_data = vision.detect_damage(damage_path)
58
+ invoice_data = ocr.extract_invoice_data(invoice_path)
59
+
60
+ # 3. Reasoning & Inconsistency Detection
61
+ physical_inconsistencies = reasoning.check_physical_consistency(scene_data, damage_data)
62
+ invoice_inconsistencies = reasoning.check_invoice_consistency(damage_data, invoice_data)
63
+ total_inconsistencies = physical_inconsistencies + invoice_inconsistencies
64
+
65
+ anomaly_score = reasoning.calculate_anomaly_score(total_inconsistencies)
66
+
67
+ # 4. Similarity Retrieval
68
+ similar_cases = retriever.search_similar_cases(description)
69
+ max_sim = max([c['similarity_score'] for c in similar_cases]) if similar_cases else 0.1
70
+
71
+ # 5. [NEW] Advanced Linguistic Analysis (Deception Detection)
72
+ linguistic_score = 0.0
73
+ deception_indicators = []
74
+ if description:
75
+ # Heuristic: Fraudsters often avoid first-person pronouns or use overly complex words
76
+ forbidden_words = ["guarantee", "honestly", "truthfully", "believe me"]
77
+ for word in forbidden_words:
78
+ if word in description.lower():
79
+ deception_indicators.append(f"Use of suggestive word: '{word}'")
80
+ linguistic_score += 0.2
81
+
82
+ if len(description.split()) < 10:
83
+ deception_indicators.append("Suspiciously brief description for high-damage claim.")
84
+ linguistic_score += 0.15
85
+
86
+ # 6. [NEW] Vehicle Intelligence (Mocked for Demo)
87
+ vehicle_intel = {
88
+ "vin": "1FA6P8CF5H5XXXXXX",
89
+ "make": "Ford",
90
+ "model": "F-150",
91
+ "previous_accidents": 2,
92
+ "salvage_history": "None",
93
+ "owner_claim_frequency": "High (3 claims in 24 months)"
94
+ }
95
+
96
+ # 7. [NEW] Image Forensic Analysis
97
+ image_forensics = {
98
+ "metadata_consistency": "FAIL",
99
+ "exif_location": "3.2 miles from reported scene",
100
+ "timestamp_match": "Mismatch (Photo taken 4 days after reported incident)",
101
+ "digital_alteration_detected": "Minor (Potential brightness/contrast manipulation)",
102
+ "camera_model": "iPhone 15 Pro"
103
+ }
104
+
105
+ # 8. [NEW] Global Risk Network (Network Analysis)
106
+ risk_network = {
107
+ "claimant_id": "CL-88219",
108
+ "known_associates_flag": True,
109
+ "garage_risk_score": 0.82, # Highly suspicious repair shop
110
+ "historical_circle": "Associated with 2 previous 'staged accident' rings",
111
+ "risk_network_graph": "Cluster Detected: North Central Region"
112
+ }
113
+
114
+ # 9. ML Scoring
115
+ # Features for model: force, parts, scene_mismatch, invoice_mismatch, anomaly_score, similarity
116
+ model_input = [
117
+ scene_data.get('estimated_force', 0.5),
118
+ len(damage_data),
119
+ len(physical_inconsistencies),
120
+ len(invoice_inconsistencies),
121
+ anomaly_score,
122
+ max_sim
123
+ ]
124
+
125
+ fraud_prob = 0.5
126
+ if fraud_model:
127
+ fraud_prob = float(fraud_model.predict_proba([model_input])[0][1])
128
+ else:
129
+ fraud_prob = min(0.95, (len(total_inconsistencies) * 0.15) + (max_sim * 0.3) + linguistic_score)
130
+
131
+ # Adjust fraud prob based on advanced features
132
+ if image_forensics["metadata_consistency"] == "FAIL":
133
+ fraud_prob = min(0.99, fraud_prob + 0.15)
134
+ if risk_network["known_associates_flag"]:
135
+ fraud_prob = min(0.99, fraud_prob + 0.1)
136
+
137
+ # 10. Generate Investigation Report
138
+ structured_findings = {
139
+ "fraud_probability": round(fraud_prob, 2),
140
+ "collision_geometry": scene_data,
141
+ "damaged_parts": damage_data,
142
+ "invoice_summary": invoice_data,
143
+ "inconsistencies": total_inconsistencies,
144
+ "linguistic_anomalies": deception_indicators,
145
+ "image_forensics": image_forensics,
146
+ "risk_network": risk_network,
147
+ "vehicle_history": vehicle_intel,
148
+ "anomaly_score": anomaly_score,
149
+ "similar_claims_detected": len(similar_cases)
150
+ }
151
+
152
+ investigation_report = reporter.generate_investigation_report(structured_findings)
153
+
154
+ return {
155
+ "fraud_score": round(fraud_prob, 2),
156
+ "inconsistencies": total_inconsistencies,
157
+ "linguistic_analysis": {
158
+ "score": round(linguistic_score, 2),
159
+ "indicators": deception_indicators
160
+ },
161
+ "image_forensics": image_forensics,
162
+ "risk_network": risk_network,
163
+ "vehicle_intel": vehicle_intel,
164
+ "damage_analysis": damage_data,
165
+ "invoice_table": invoice_data['items'],
166
+ "report": investigation_report,
167
+ "similar_cases": similar_cases
168
+ }
169
+
170
+ if __name__ == "__main__":
171
+ uvicorn.run(app, host="0.0.0.0", port=8000)
app/ocr.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from doctr.models import ocr_predictor
2
+ import re
3
+
4
+ class InvoiceProcessor:
5
+ def __init__(self):
6
+ try:
7
+ self.model = ocr_predictor(pretrained=True)
8
+ except:
9
+ self.model = None
10
+ print("Warning: docTR OCR model could not be loaded. OCR will be mocked.")
11
+
12
+ def extract_invoice_data(self, file_path):
13
+ """
14
+ Extracts parts, labor hours, and costs from invoice.
15
+ """
16
+ # Simulated extraction from a repair invoice
17
+ # Real logic would use self.model(file_path) and regex/LLM to parse structure
18
+
19
+ extracted_data = {
20
+ "invoice_number": "REP-2024-001",
21
+ "items": [
22
+ {"description": "Front Bumper Replacement", "cost": 1200.0, "type": "Part"},
23
+ {"description": "Left Headlight Assembly", "cost": 450.0, "type": "Part"},
24
+ {"description": "Hood Refinishing", "cost": 300.0, "type": "Labor"},
25
+ {"description": "Engine Oil Pan", "cost": 150.0, "type": "Part"}, # Potential anomaly!
26
+ {"description": "Chassis Alignment", "cost": 500.0, "type": "Labor"}
27
+ ],
28
+ "total_cost": 2600.0
29
+ }
30
+
31
+ return extracted_data
32
+
33
+ def detect_financial_anomalies(self, items):
34
+ """
35
+ Check for unusually high costs or unnecessary parts.
36
+ """
37
+ anomalies = []
38
+ for item in items:
39
+ if item['cost'] > 5000:
40
+ anomalies.append(f"High cost for {item['description']}")
41
+ return anomalies
app/reasoning.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class ReasoningEngine:
2
+ def __init__(self):
3
+ pass
4
+
5
+ def check_physical_consistency(self, scene_metadata, damage_detections):
6
+ """
7
+ Compare accident scene estimates with vehicle damage.
8
+ Example: low-speed rear collision cannot realistically produce full front engine damage.
9
+ """
10
+ inconsistencies = []
11
+
12
+ impact_dir = scene_metadata.get("impact_direction", "").lower()
13
+ severity = scene_metadata.get("estimated_force", 0)
14
+
15
+ # Logic: If impact is REAR but we see FRONT damage
16
+ front_damage = any("front" in d['part'].lower() or "hood" in d['part'].lower() for d in damage_detections)
17
+ rear_damage = any("rear" in d['part'].lower() or "trunk" in d['part'].lower() for d in damage_detections)
18
+
19
+ if "rear" in impact_dir and front_damage and not rear_damage:
20
+ inconsistencies.append("Front damage detected but scene indicates rear impact.")
21
+
22
+ if severity < 0.2 and len(damage_detections) > 5:
23
+ inconsistencies.append("Visual damage severity exceeds estimated collision force.")
24
+
25
+ return inconsistencies
26
+
27
+ def check_invoice_consistency(self, damage_detections, invoice_data):
28
+ """
29
+ Compare billed components with visually detected damaged components.
30
+ """
31
+ inconsistencies = []
32
+ visual_parts = [d['part'].lower() for d in damage_detections]
33
+ billed_items = invoice_data.get("items", [])
34
+
35
+ for item in billed_items:
36
+ if item['type'] == 'Part':
37
+ # Fuzzy match or simple containment
38
+ part_name = item['description'].lower()
39
+ found = any(vp in part_name for vp in visual_parts)
40
+
41
+ # Check for parts in invoice that weren't visible in damage
42
+ if not found:
43
+ # Some parts might be internal, but let's flag suspicious ones
44
+ suspicious_internal = ["pan", "radiator", "engine"]
45
+ if any(si in part_name for si in suspicious_internal):
46
+ inconsistencies.append(f"Billed internal part '{item['description']}' has no corresponding external impact indicators.")
47
+ else:
48
+ inconsistencies.append(f"Billed part '{item['description']}' was not detected in damage photos.")
49
+
50
+ return inconsistencies
51
+
52
+ def calculate_anomaly_score(self, inconsistencies):
53
+ # Weighted score
54
+ return len(inconsistencies) * 15.0 # Basic linear score for demo
app/report.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from groq import Groq
2
+ import json
3
+ import os
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ class ReportGenerator:
9
+ def __init__(self):
10
+ # API Key should be in environment
11
+ self.client = Groq(api_key=os.getenv("GROQ_API_KEY", "MISSING_KEY"))
12
+
13
+ def generate_investigation_report(self, structured_data):
14
+ """
15
+ Generates a professional investigation report based on extracted evidence.
16
+ """
17
+ prompt = f"""
18
+ # ROLE: Senior Forensic Claims Adjuster & Fraud Specialist
19
+ # TASK: Generate a detailed, high-impact investigation report based on the evidence below.
20
+
21
+ EVIDENCE DATA (JSON):
22
+ {json.dumps(structured_data, indent=2)}
23
+
24
+ # REPORT GUIDELINES:
25
+ 1. Use clear, professional forensic language.
26
+ 2. Use MARKDOWN headings (###).
27
+ 3. Use BOLD for critical findings (e.g., **INCONSISTENCY DETECTED**).
28
+ 4. Create a "EXECUTIVE SUMMARY" section first with a clear "Status: RED/YELLOW/GREEN".
29
+ 5. Create a "EVIDENCE CONSISTENCY" table using markdown.
30
+ 6. Highlight specific mismatches between Visual Damage vs repair Invoice.
31
+ 7. Provide a "FRAUD INDICATOR JUSTIFICATION" section explaining why the score is what it is.
32
+ 8. End with "LEGAL RECOMMENDATION" (e.g., Proceed to SIU investigation).
33
+
34
+ Output the report in professional Markdown format.
35
+ """
36
+
37
+ try:
38
+ chat_completion = self.client.chat.completions.create(
39
+ messages=[
40
+ {"role": "system", "content": "You are a specialized insurance fraud reasoning engine. Output as professional markdown."},
41
+ {"role": "user", "content": prompt}
42
+ ],
43
+ model="llama-3.3-70b-versatile",
44
+ )
45
+ return chat_completion.choices[0].message.content
46
+ except Exception as e:
47
+ return f"Error generating report: {str(e)}\n\n(Fallback: Manual report generation based on inconsistencies: {', '.join(structured_data.get('inconsistencies', []))})"
app/retrieval.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import faiss
2
+ import numpy as np
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ class ClaimRetriever:
6
+ def __init__(self):
7
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
8
+ self.dimension = 384
9
+ self.index = faiss.IndexFlatL2(self.dimension)
10
+ self.claims = []
11
+
12
+ # Seed with some known suspicious cases
13
+ self._seed_database()
14
+
15
+ def _seed_database(self):
16
+ cases = [
17
+ "Intentional front-end collision for insurance payout in parking lot.",
18
+ "Staged accident with pre-damaged parts swapped before inspection.",
19
+ "Inflated repair costs with ghost parts in collaboration with local garage.",
20
+ "Double dipping: claiming same damage on multiple insurance policies.",
21
+ "Reported hit-and-run that was actually a collision with a fixed object."
22
+ ]
23
+ self.add_cases(cases)
24
+
25
+ def add_cases(self, cases):
26
+ self.claims.extend(cases)
27
+ embeddings = self.model.encode(cases)
28
+ self.index.add(np.array(embeddings).astype('float32'))
29
+
30
+ def search_similar_cases(self, description, k=2):
31
+ if not description:
32
+ return []
33
+
34
+ query_embedding = self.model.encode([description])
35
+ distances, indices = self.index.search(np.array(query_embedding).astype('float32'), k)
36
+
37
+ results = []
38
+ for i, idx in enumerate(indices[0]):
39
+ if idx < len(self.claims):
40
+ results.append({
41
+ "case": self.claims[idx],
42
+ "similarity_score": float(1.0 / (1.0 + distances[0][i]))
43
+ })
44
+ return results
app/vision.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ try:
2
+ from ultralytics import YOLO
3
+ import torch
4
+ except ImportError:
5
+ YOLO = None
6
+ print("Warning: YOLO/Torch not available.")
7
+
8
+ from PIL import Image
9
+ import cv2
10
+ import numpy as np
11
+
12
+ class VisionAnalyzer:
13
+ def __init__(self):
14
+ # Using a small Nano model for general car detection as a placeholder
15
+ # In a real production system, this would be a YOLO model trained on 'car-damage' dataset
16
+ self.model = None
17
+ if YOLO:
18
+ try:
19
+ self.model = YOLO('yolov8n.pt')
20
+ except Exception as e:
21
+ print(f"Warning: YOLO model could not be loaded: {e}")
22
+
23
+ def analyze_accident_scene(self, image_path):
24
+ """
25
+ Estimates collision geometry: impact direction, collision type, force severity.
26
+ """
27
+ # Heuristic/CNN embedding based analysis
28
+ # For this demo, we use image metadata and visual cues to simulate
29
+ return {
30
+ "impact_direction": "Frontal-Left",
31
+ "collision_type": "Front-End Collision",
32
+ "estimated_force": 0.65, # 0 to 1
33
+ "scene_description": "Clear day, intersection"
34
+ }
35
+
36
+ def detect_damage(self, image_path):
37
+ """
38
+ Detects damaged parts and severity levels.
39
+ """
40
+ if self.model:
41
+ results = self.model(image_path)
42
+ # Process results...
43
+
44
+ # Simulated detections for the purpose of the investigation platform
45
+ # In a real app, these would come from the YOLO classes (e.g., 'damaged_bumper', 'shattered_windshield')
46
+ detections = [
47
+ {"part": "Front Bumper", "severity": 0.8, "bbox": [100, 200, 300, 400]},
48
+ {"part": "Left Headlight", "severity": 0.9, "bbox": [50, 220, 120, 280]},
49
+ {"part": "Hood", "severity": 0.4, "bbox": [150, 100, 400, 300]}
50
+ ]
51
+
52
+ return detections
53
+
54
+ def estimate_collision_distribution(self, detections):
55
+ """
56
+ Calculates where the damage is concentrated.
57
+ """
58
+ distribution = {
59
+ "front": 0.8,
60
+ "rear": 0.0,
61
+ "left": 0.6,
62
+ "right": 0.1
63
+ }
64
+ return distribution
65
+
66
+ def annotate_image(self, image_path, detections):
67
+ """
68
+ Draws bounding boxes on detected damaged parts.
69
+ """
70
+ img = cv2.imread(image_path)
71
+ if img is None:
72
+ return None
73
+
74
+ for d in detections:
75
+ bbox = d['bbox']
76
+ # Draw rectangle
77
+ cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 2)
78
+ cv2.putText(img, d['part'], (bbox[0], bbox[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
79
+
80
+ annotated_path = image_path.replace(".png", "_annotated.png").replace(".jpg", "_annotated.png")
81
+ cv2.imwrite(annotated_path, img)
82
+ return annotated_path
models/fraud_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:727e0a2ce85de06ac23f6a7f6be1a15baab98ab3fe52516571312aed0ba854a7
3
+ size 918633
requirements.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ ultralytics
5
+ python-doctr[pytorch]
6
+ sentence-transformers
7
+ faiss-cpu
8
+ scikit-learn
9
+ pandas
10
+ numpy
11
+ pydantic
12
+ groq
13
+ python-dotenv
14
+ pillow
15
+ opencv-python-headless
16
+ torch
17
+ torchvision
18
+ joblib
19
+ pyyaml