Spaces:

gcc-insurance-intelligence-lab
/

FraudSimulator-AI

Sleeping

App Files Files Community

BDR-AI commited on Feb 27

Commit

d98efb2

verified ·

1 Parent(s): 44f8189

Add transformer text processor for Maysat method (Step 1/5)

Browse files

Implementing Maysat method multi-model ensemble - Phase 1:

- Created InsuranceTextProcessor class with transformer pipeline
- Extracts linguistic features: urgency, emotional, vague language
- Analyzes claim text for fraud indicators
- Returns fraud score (0-1) with confidence level
- Includes test cases for validation

Next: Integrate with fraud_engine.py for hybrid scoring (40% Transformer + 40% ML + 20% Rules)

Files changed (1) hide show

text_processor.py +119 -0

text_processor.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""Text Processing Module for Maysat Method Implementation
+This module implements NLP-based fraud detection using transformers.
+Part of the multi-model ensemble: 40% Transformer + 40% ML + 20% Rules
+"""
+from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
+import torch
+import warnings
+warnings.filterwarnings('ignore')
+class InsuranceTextProcessor:
+    """Process insurance claim text using transformer models"""
+    def __init__(self):
+        print("Initializing Text Processor...")
+        try:
+            # Load fraud detection model
+            self.fraud_classifier = pipeline(
+                "text-classification",
+                model="distilbert-base-uncased",
+                device=-1  # CPU
+            )
+            print("✓ Fraud classifier loaded")
+        except Exception as e:
+            print(f"Warning: Could not load fraud classifier: {e}")
+            self.fraud_classifier = None
+    def analyze_claim_text(self, claim_text):
+        """Analyze claim text for fraud indicators
+        Args:
+            claim_text (str): The claim description/notes
+        Returns:
+            dict: Analysis results with fraud score and features
+        """
+        if not claim_text or len(claim_text.strip()) < 10:
+            return {
+                'fraud_score': 0.5,
+                'confidence': 0.0,
+                'text_features': self._extract_basic_features(claim_text or "")
+            }
+        # Get transformer prediction
+        fraud_score = 0.5
+        confidence = 0.0
+        if self.fraud_classifier:
+            try:
+                result = self.fraud_classifier(claim_text[:512])[0]  # Limit to 512 tokens
+                # Normalize score to 0-1 range
+                fraud_score = result['score'] if result['label'] == 'LABEL_1' else 1 - result['score']
+                confidence = result['score']
+            except Exception as e:
+                print(f"Prediction error: {e}")
+        # Extract text features
+        text_features = self._extract_text_features(claim_text)
+        return {
+            'fraud_score': fraud_score,
+            'confidence': confidence,
+            'text_features': text_features,
+            'method': 'transformer'
+        }
+    def _extract_text_features(self, text):
+        """Extract linguistic features from text"""
+        text_lower = text.lower()
+        # Urgency indicators
+        urgency_words = ['urgent', 'emergency', 'immediately', 'asap', 'critical']
+        urgency_count = sum(1 for word in urgency_words if word in text_lower)
+        # Emotional manipulation
+        emotional_words = ['desperate', 'suffering', 'dying', 'helpless', 'tragedy']
+        emotional_count = sum(1 for word in emotional_words if word in text_lower)
+        # Vague language
+        vague_words = ['somehow', 'maybe', 'approximately', 'around', 'roughly']
+        vague_count = sum(1 for word in vague_words if word in text_lower)
+        return {
+            'length': len(text),
+            'word_count': len(text.split()),
+            'urgency_score': min(urgency_count / 5.0, 1.0),
+            'emotional_score': min(emotional_count / 5.0, 1.0),
+            'vague_score': min(vague_count / 5.0, 1.0),
+            'has_numbers': any(char.isdigit() for char in text)
+        }
+    def _extract_basic_features(self, text):
+        """Extract basic features when text is too short"""
+        return {
+            'length': len(text),
+            'word_count': len(text.split()) if text else 0,
+            'urgency_score': 0.0,
+            'emotional_score': 0.0,
+            'vague_score': 0.0,
+            'has_numbers': False
+        }
+if __name__ == "__main__":
+    # Test the processor
+    processor = InsuranceTextProcessor()
+    test_claims = [
+        "My car was damaged in an accident yesterday. The front bumper needs replacement.",
+        "URGENT! I need immediate payment for my claim. This is an emergency situation!",
+        "The incident happened somehow around 3pm. Maybe the damage is approximately $5000."
+    ]
+    for i, claim in enumerate(test_claims, 1):
+        print(f"\nTest {i}: {claim[:50]}...")
+        result = processor.analyze_claim_text(claim)
+        print(f"Fraud Score: {result['fraud_score']:.3f}")
+        print(f"Confidence: {result['confidence']:.3f}")
+        print(f"Text Features: {result['text_features']}")