Spaces:

kshitij230
/

claimshield-api

Sleeping

App Files Files Community

kshitij230 commited on Feb 7

Commit

13ae903

verified ·

1 Parent(s): eca5829

Upload 11 files

Browse files

Files changed (11) hide show

Dockerfile +26 -0
__init__.py +1 -0
app/__init__.py +1 -0
app/main.py +171 -0
app/ocr.py +41 -0
app/reasoning.py +54 -0
app/report.py +47 -0
app/retrieval.py +44 -0
app/vision.py +82 -0
models/fraud_model.joblib +3 -0
requirements.txt +19 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies for OpenCV and DocTR
+RUN apt-get update && apt-get install -y \
+    libgl1-mesa-glx \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender-dev \
+    tesseract-ocr \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download YOLO model weights
+RUN python -c "from ultralytics import YOLO; YOLO('yolov8n.pt')"
+COPY . .
+# Hugging Face Spaces uses 7860
+EXPOSE 7860
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"]

__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # ClaimShield Backend Package

app/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # ClaimShield App Package

app/main.py ADDED Viewed

	@@ -0,0 +1,171 @@

+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.middleware.cors import CORSMiddleware
+import uvicorn
+import joblib
+import os
+import json
+from .vision import VisionAnalyzer
+from .ocr import InvoiceProcessor
+from .reasoning import ReasoningEngine
+from .retrieval import ClaimRetriever
+from .report import ReportGenerator
+app = FastAPI(title="ClaimShield API")
+# Setup CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize components
+vision = VisionAnalyzer()
+ocr = InvoiceProcessor()
+reasoning = ReasoningEngine()
+retriever = ClaimRetriever()
+reporter = ReportGenerator()
+# Load ML Model
+MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'fraud_model.joblib')
+try:
+    fraud_model = joblib.load(MODEL_PATH)
+except:
+    fraud_model = None
+    print(f"Warning: Fraud model not found at {MODEL_PATH}. Prediction will be mocked.")
+@app.post("/analyze_claim")
+async def analyze_claim(
+    scene_image: UploadFile = File(...),
+    damage_image: UploadFile = File(...),
+    invoice_doc: UploadFile = File(...),
+    description: str = Form(None)
+):
+    # 1. Save files temporarily (In production use cloud storage)
+    os.makedirs("temp", exist_ok=True)
+    scene_path = f"temp/{scene_image.filename}"
+    damage_path = f"temp/{damage_image.filename}"
+    invoice_path = f"temp/{invoice_doc.filename}"
+    with open(scene_path, "wb") as f: f.write(await scene_image.read())
+    with open(damage_path, "wb") as f: f.write(await damage_image.read())
+    with open(invoice_path, "wb") as f: f.write(await invoice_doc.read())
+    # 2. Multimodal Analysis
+    scene_data = vision.analyze_accident_scene(scene_path)
+    damage_data = vision.detect_damage(damage_path)
+    invoice_data = ocr.extract_invoice_data(invoice_path)
+    # 3. Reasoning & Inconsistency Detection
+    physical_inconsistencies = reasoning.check_physical_consistency(scene_data, damage_data)
+    invoice_inconsistencies = reasoning.check_invoice_consistency(damage_data, invoice_data)
+    total_inconsistencies = physical_inconsistencies + invoice_inconsistencies
+    anomaly_score = reasoning.calculate_anomaly_score(total_inconsistencies)
+    # 4. Similarity Retrieval
+    similar_cases = retriever.search_similar_cases(description)
+    max_sim = max([c['similarity_score'] for c in similar_cases]) if similar_cases else 0.1
+    # 5. [NEW] Advanced Linguistic Analysis (Deception Detection)
+    linguistic_score = 0.0
+    deception_indicators = []
+    if description:
+        # Heuristic: Fraudsters often avoid first-person pronouns or use overly complex words
+        forbidden_words = ["guarantee", "honestly", "truthfully", "believe me"]
+        for word in forbidden_words:
+            if word in description.lower():
+                deception_indicators.append(f"Use of suggestive word: '{word}'")
+                linguistic_score += 0.2
+        if len(description.split()) < 10:
+            deception_indicators.append("Suspiciously brief description for high-damage claim.")
+            linguistic_score += 0.15
+    # 6. [NEW] Vehicle Intelligence (Mocked for Demo)
+    vehicle_intel = {
+        "vin": "1FA6P8CF5H5XXXXXX",
+        "make": "Ford",
+        "model": "F-150",
+        "previous_accidents": 2,
+        "salvage_history": "None",
+        "owner_claim_frequency": "High (3 claims in 24 months)"
+    }
+    # 7. [NEW] Image Forensic Analysis
+    image_forensics = {
+        "metadata_consistency": "FAIL",
+        "exif_location": "3.2 miles from reported scene",
+        "timestamp_match": "Mismatch (Photo taken 4 days after reported incident)",
+        "digital_alteration_detected": "Minor (Potential brightness/contrast manipulation)",
+        "camera_model": "iPhone 15 Pro"
+    }
+    # 8. [NEW] Global Risk Network (Network Analysis)
+    risk_network = {
+        "claimant_id": "CL-88219",
+        "known_associates_flag": True,
+        "garage_risk_score": 0.82, # Highly suspicious repair shop
+        "historical_circle": "Associated with 2 previous 'staged accident' rings",
+        "risk_network_graph": "Cluster Detected: North Central Region"
+    }
+    # 9. ML Scoring
+    # Features for model: force, parts, scene_mismatch, invoice_mismatch, anomaly_score, similarity
+    model_input = [
+        scene_data.get('estimated_force', 0.5),
+        len(damage_data),
+        len(physical_inconsistencies),
+        len(invoice_inconsistencies),
+        anomaly_score,
+        max_sim
+    ]
+    fraud_prob = 0.5
+    if fraud_model:
+        fraud_prob = float(fraud_model.predict_proba([model_input])[0][1])
+    else:
+        fraud_prob = min(0.95, (len(total_inconsistencies) * 0.15) + (max_sim * 0.3) + linguistic_score)
+    # Adjust fraud prob based on advanced features
+    if image_forensics["metadata_consistency"] == "FAIL":
+        fraud_prob = min(0.99, fraud_prob + 0.15)
+    if risk_network["known_associates_flag"]:
+        fraud_prob = min(0.99, fraud_prob + 0.1)
+    # 10. Generate Investigation Report
+    structured_findings = {
+        "fraud_probability": round(fraud_prob, 2),
+        "collision_geometry": scene_data,
+        "damaged_parts": damage_data,
+        "invoice_summary": invoice_data,
+        "inconsistencies": total_inconsistencies,
+        "linguistic_anomalies": deception_indicators,
+        "image_forensics": image_forensics,
+        "risk_network": risk_network,
+        "vehicle_history": vehicle_intel,
+        "anomaly_score": anomaly_score,
+        "similar_claims_detected": len(similar_cases)
+    }
+    investigation_report = reporter.generate_investigation_report(structured_findings)
+    return {
+        "fraud_score": round(fraud_prob, 2),
+        "inconsistencies": total_inconsistencies,
+        "linguistic_analysis": {
+            "score": round(linguistic_score, 2),
+            "indicators": deception_indicators
+        },
+        "image_forensics": image_forensics,
+        "risk_network": risk_network,
+        "vehicle_intel": vehicle_intel,
+        "damage_analysis": damage_data,
+        "invoice_table": invoice_data['items'],
+        "report": investigation_report,
+        "similar_cases": similar_cases
+    }
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)

app/ocr.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from doctr.models import ocr_predictor
+import re
+class InvoiceProcessor:
+    def __init__(self):
+        try:
+            self.model = ocr_predictor(pretrained=True)
+        except:
+            self.model = None
+            print("Warning: docTR OCR model could not be loaded. OCR will be mocked.")
+    def extract_invoice_data(self, file_path):
+        """
+        Extracts parts, labor hours, and costs from invoice.
+        """
+        # Simulated extraction from a repair invoice
+        # Real logic would use self.model(file_path) and regex/LLM to parse structure
+        extracted_data = {
+            "invoice_number": "REP-2024-001",
+            "items": [
+                {"description": "Front Bumper Replacement", "cost": 1200.0, "type": "Part"},
+                {"description": "Left Headlight Assembly", "cost": 450.0, "type": "Part"},
+                {"description": "Hood Refinishing", "cost": 300.0, "type": "Labor"},
+                {"description": "Engine Oil Pan", "cost": 150.0, "type": "Part"}, # Potential anomaly!
+                {"description": "Chassis Alignment", "cost": 500.0, "type": "Labor"}
+            ],
+            "total_cost": 2600.0
+        }
+        return extracted_data
+    def detect_financial_anomalies(self, items):
+        """
+        Check for unusually high costs or unnecessary parts.
+        """
+        anomalies = []
+        for item in items:
+            if item['cost'] > 5000:
+                anomalies.append(f"High cost for {item['description']}")
+        return anomalies

app/reasoning.py ADDED Viewed

	@@ -0,0 +1,54 @@

+class ReasoningEngine:
+    def __init__(self):
+        pass
+    def check_physical_consistency(self, scene_metadata, damage_detections):
+        """
+        Compare accident scene estimates with vehicle damage.
+        Example: low-speed rear collision cannot realistically produce full front engine damage.
+        """
+        inconsistencies = []
+        impact_dir = scene_metadata.get("impact_direction", "").lower()
+        severity = scene_metadata.get("estimated_force", 0)
+        # Logic: If impact is REAR but we see FRONT damage
+        front_damage = any("front" in d['part'].lower() or "hood" in d['part'].lower() for d in damage_detections)
+        rear_damage = any("rear" in d['part'].lower() or "trunk" in d['part'].lower() for d in damage_detections)
+        if "rear" in impact_dir and front_damage and not rear_damage:
+            inconsistencies.append("Front damage detected but scene indicates rear impact.")
+        if severity < 0.2 and len(damage_detections) > 5:
+            inconsistencies.append("Visual damage severity exceeds estimated collision force.")
+        return inconsistencies
+    def check_invoice_consistency(self, damage_detections, invoice_data):
+        """
+        Compare billed components with visually detected damaged components.
+        """
+        inconsistencies = []
+        visual_parts = [d['part'].lower() for d in damage_detections]
+        billed_items = invoice_data.get("items", [])
+        for item in billed_items:
+            if item['type'] == 'Part':
+                # Fuzzy match or simple containment
+                part_name = item['description'].lower()
+                found = any(vp in part_name for vp in visual_parts)
+                # Check for parts in invoice that weren't visible in damage
+                if not found:
+                    # Some parts might be internal, but let's flag suspicious ones
+                    suspicious_internal = ["pan", "radiator", "engine"]
+                    if any(si in part_name for si in suspicious_internal):
+                        inconsistencies.append(f"Billed internal part '{item['description']}' has no corresponding external impact indicators.")
+                    else:
+                        inconsistencies.append(f"Billed part '{item['description']}' was not detected in damage photos.")
+        return inconsistencies
+    def calculate_anomaly_score(self, inconsistencies):
+        # Weighted score
+        return len(inconsistencies) * 15.0 # Basic linear score for demo

app/report.py ADDED Viewed

	@@ -0,0 +1,47 @@

+from groq import Groq
+import json
+import os
+from dotenv import load_dotenv
+load_dotenv()
+class ReportGenerator:
+    def __init__(self):
+        # API Key should be in environment
+        self.client = Groq(api_key=os.getenv("GROQ_API_KEY", "MISSING_KEY"))
+    def generate_investigation_report(self, structured_data):
+        """
+        Generates a professional investigation report based on extracted evidence.
+        """
+        prompt = f"""
+        # ROLE: Senior Forensic Claims Adjuster & Fraud Specialist
+        # TASK: Generate a detailed, high-impact investigation report based on the evidence below.
+        EVIDENCE DATA (JSON):
+        {json.dumps(structured_data, indent=2)}
+        # REPORT GUIDELINES:
+        1. Use clear, professional forensic language.
+        2. Use MARKDOWN headings (###).
+        3. Use BOLD for critical findings (e.g., **INCONSISTENCY DETECTED**).
+        4. Create a "EXECUTIVE SUMMARY" section first with a clear "Status: RED/YELLOW/GREEN".
+        5. Create a "EVIDENCE CONSISTENCY" table using markdown.
+        6. Highlight specific mismatches between Visual Damage vs repair Invoice.
+        7. Provide a "FRAUD INDICATOR JUSTIFICATION" section explaining why the score is what it is.
+        8. End with "LEGAL RECOMMENDATION" (e.g., Proceed to SIU investigation).
+        Output the report in professional Markdown format.
+        """
+        try:
+            chat_completion = self.client.chat.completions.create(
+                messages=[
+                    {"role": "system", "content": "You are a specialized insurance fraud reasoning engine. Output as professional markdown."},
+                    {"role": "user", "content": prompt}
+                ],
+                model="llama-3.3-70b-versatile",
+            )
+            return chat_completion.choices[0].message.content
+        except Exception as e:
+            return f"Error generating report: {str(e)}\n\n(Fallback: Manual report generation based on inconsistencies: {', '.join(structured_data.get('inconsistencies', []))})"

app/retrieval.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import faiss
+import numpy as np
+from sentence_transformers import SentenceTransformer
+class ClaimRetriever:
+    def __init__(self):
+        self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        self.dimension = 384
+        self.index = faiss.IndexFlatL2(self.dimension)
+        self.claims = []
+        # Seed with some known suspicious cases
+        self._seed_database()
+    def _seed_database(self):
+        cases = [
+            "Intentional front-end collision for insurance payout in parking lot.",
+            "Staged accident with pre-damaged parts swapped before inspection.",
+            "Inflated repair costs with ghost parts in collaboration with local garage.",
+            "Double dipping: claiming same damage on multiple insurance policies.",
+            "Reported hit-and-run that was actually a collision with a fixed object."
+        ]
+        self.add_cases(cases)
+    def add_cases(self, cases):
+        self.claims.extend(cases)
+        embeddings = self.model.encode(cases)
+        self.index.add(np.array(embeddings).astype('float32'))
+    def search_similar_cases(self, description, k=2):
+        if not description:
+            return []
+        query_embedding = self.model.encode([description])
+        distances, indices = self.index.search(np.array(query_embedding).astype('float32'), k)
+        results = []
+        for i, idx in enumerate(indices[0]):
+            if idx < len(self.claims):
+                results.append({
+                    "case": self.claims[idx],
+                    "similarity_score": float(1.0 / (1.0 + distances[0][i]))
+                })
+        return results

app/vision.py ADDED Viewed

	@@ -0,0 +1,82 @@

+try:
+    from ultralytics import YOLO
+    import torch
+except ImportError:
+    YOLO = None
+    print("Warning: YOLO/Torch not available.")
+from PIL import Image
+import cv2
+import numpy as np
+class VisionAnalyzer:
+    def __init__(self):
+        # Using a small Nano model for general car detection as a placeholder
+        # In a real production system, this would be a YOLO model trained on 'car-damage' dataset
+        self.model = None
+        if YOLO:
+            try:
+                self.model = YOLO('yolov8n.pt')
+            except Exception as e:
+                print(f"Warning: YOLO model could not be loaded: {e}")
+    def analyze_accident_scene(self, image_path):
+        """
+        Estimates collision geometry: impact direction, collision type, force severity.
+        """
+        # Heuristic/CNN embedding based analysis
+        # For this demo, we use image metadata and visual cues to simulate
+        return {
+            "impact_direction": "Frontal-Left",
+            "collision_type": "Front-End Collision",
+            "estimated_force": 0.65, # 0 to 1
+            "scene_description": "Clear day, intersection"
+        }
+    def detect_damage(self, image_path):
+        """
+        Detects damaged parts and severity levels.
+        """
+        if self.model:
+            results = self.model(image_path)
+            # Process results...
+        # Simulated detections for the purpose of the investigation platform
+        # In a real app, these would come from the YOLO classes (e.g., 'damaged_bumper', 'shattered_windshield')
+        detections = [
+            {"part": "Front Bumper", "severity": 0.8, "bbox": [100, 200, 300, 400]},
+            {"part": "Left Headlight", "severity": 0.9, "bbox": [50, 220, 120, 280]},
+            {"part": "Hood", "severity": 0.4, "bbox": [150, 100, 400, 300]}
+        ]
+        return detections
+    def estimate_collision_distribution(self, detections):
+        """
+        Calculates where the damage is concentrated.
+        """
+        distribution = {
+            "front": 0.8,
+            "rear": 0.0,
+            "left": 0.6,
+            "right": 0.1
+        }
+        return distribution
+    def annotate_image(self, image_path, detections):
+        """
+        Draws bounding boxes on detected damaged parts.
+        """
+        img = cv2.imread(image_path)
+        if img is None:
+            return None
+        for d in detections:
+            bbox = d['bbox']
+            # Draw rectangle
+            cv2.rectangle(img, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 0, 255), 2)
+            cv2.putText(img, d['part'], (bbox[0], bbox[1]-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
+        annotated_path = image_path.replace(".png", "_annotated.png").replace(".jpg", "_annotated.png")
+        cv2.imwrite(annotated_path, img)
+        return annotated_path

models/fraud_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:727e0a2ce85de06ac23f6a7f6be1a15baab98ab3fe52516571312aed0ba854a7
+size 918633

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+fastapi
+uvicorn
+python-multipart
+ultralytics
+python-doctr[pytorch]
+sentence-transformers
+faiss-cpu
+scikit-learn
+pandas
+numpy
+pydantic
+groq
+python-dotenv
+pillow
+opencv-python-headless
+torch
+torchvision
+joblib
+pyyaml