Spaces:

teoat
/

zenith-backend

Paused

App Files Files Community

teoat commited on Jan 16

Commit

9773495

verified ·

1 Parent(s): 2a5f5af

Upload core/ai/advanced_ai_features.py with huggingface_hub

Browse files

Files changed (1) hide show

core/ai/advanced_ai_features.py +614 -0

core/ai/advanced_ai_features.py ADDED Viewed

	@@ -0,0 +1,614 @@

+"""
+Advanced AI-Powered Features for Enterprise Fraud Detection
+"""
+import asyncio
+from datetime import datetime
+from typing import Any, Dict, List, Tuple
+from core.cache.advanced_cache import get_api_cache_manager
+from core.logging.advanced_logging import structured_logger
+class AdvancedAIFeatures:
+    """Advanced AI-powered features for fraud detection"""
+    def __init__(self):
+        self.logger = structured_logger
+        self.cache_manager = get_api_cache_manager()
+    async def predictive_fraud_scoring(self, transaction_data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Advanced predictive fraud scoring using machine learning models
+        """
+        # Extract features from transaction data
+        features = self._extract_transaction_features(transaction_data)
+        # Multi-model scoring
+        scores = await asyncio.gather(
+            self._score_with_isolation_forest(features),
+            self._score_with_autoencoder(features),
+            self._score_with_gradient_boosting(features),
+            self._score_with_neural_network(features)
+        )
+        # Ensemble scoring
+        ensemble_score = self._calculate_ensemble_score(scores)
+        # Risk assessment
+        risk_level, confidence = self._assess_risk_level(ensemble_score)
+        # Generate explanation
+        explanation = self._generate_risk_explanation(features, ensemble_score, scores)
+        result = {
+            'transaction_id': transaction_data.get('id'),
+            'fraud_score': round(ensemble_score, 4),
+            'risk_level': risk_level,
+            'confidence': round(confidence, 4),
+            'model_scores': {
+                'isolation_forest': round(scores[0], 4),
+                'autoencoder': round(scores[1], 4),
+                'gradient_boosting': round(scores[2], 4),
+                'neural_network': round(scores[3], 4)
+            },
+            'explanation': explanation,
+            'recommendations': self._generate_recommendations(risk_level, transaction_data),
+            'timestamp': datetime.now().isoformat()
+        }
+        # Cache result
+        if self.cache_manager:
+            cache_key = f"fraud_score:{transaction_data.get('id')}"
+            self.cache_manager.set(cache_key, result, ttl=3600)  # 1 hour
+        return result
+    def _extract_transaction_features(self, transaction: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract numerical features from transaction data"""
+        features = {}
+        # Amount-based features
+        amount = float(transaction.get('amount', 0))
+        features['amount'] = amount
+        features['amount_log'] = amount ** 0.5 if amount > 0 else 0
+        features['amount_category'] = self._categorize_amount(amount)
+        # Time-based features
+        timestamp = transaction.get('timestamp')
+        if timestamp:
+            dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
+            features['hour_of_day'] = dt.hour
+            features['day_of_week'] = dt.weekday()
+            features['is_weekend'] = 1 if dt.weekday() >= 5 else 0
+            features['is_business_hours'] = 1 if 9 <= dt.hour <= 17 else 0
+        # Location-based features
+        location = transaction.get('location', {})
+        features['location_risk'] = self._assess_location_risk(location)
+        # Merchant-based features
+        merchant = transaction.get('merchant', {})
+        features['merchant_category_risk'] = self._assess_merchant_risk(merchant)
+        # User behavior features
+        user_history = transaction.get('user_history', {})
+        features['velocity_1h'] = user_history.get('transactions_last_hour', 0)
+        features['velocity_24h'] = user_history.get('transactions_last_24h', 0)
+        features['avg_amount_30d'] = user_history.get('avg_amount_last_30d', 0)
+        # Device and network features
+        device = transaction.get('device', {})
+        features['device_fingerprint_risk'] = self._assess_device_risk(device)
+        return features
+    def _categorize_amount(self, amount: float) -> int:
+        """Categorize transaction amount"""
+        if amount < 10:
+            return 0  # Micro transaction
+        elif amount < 100:
+            return 1  # Small
+        elif amount < 1000:
+            return 2  # Medium
+        elif amount < 10000:
+            return 3  # Large
+        else:
+            return 4  # Very large
+    def _assess_location_risk(self, location: Dict[str, Any]) -> float:
+        """Assess location-based risk (0-1 scale)"""
+        risk_score = 0.0
+        # Check for unusual locations
+        if location.get('is_unusual', False):
+            risk_score += 0.3
+        # Check distance from usual locations
+        distance = location.get('distance_from_home', 0)
+        if distance > 1000:  # More than 1000km
+            risk_score += 0.2
+        elif distance > 100:  # More than 100km
+            risk_score += 0.1
+        # Check for high-risk countries
+        country_risk = location.get('country_risk_score', 0)
+        risk_score += country_risk * 0.4
+        return min(1.0, risk_score)
+    def _assess_merchant_risk(self, merchant: Dict[str, Any]) -> float:
+        """Assess merchant-based risk"""
+        risk_score = 0.0
+        # High-risk merchant categories
+        high_risk_categories = ['gambling', 'adult', 'cryptocurrency', 'wire_transfer']
+        category = merchant.get('category', '').lower()
+        if any(risk_cat in category for risk_cat in high_risk_categories):
+            risk_score += 0.4
+        # New merchant
+        if merchant.get('is_new', False):
+            risk_score += 0.2
+        # Unusual merchant for user
+        if merchant.get('is_unusual_for_user', False):
+            risk_score += 0.3
+        return min(1.0, risk_score)
+    def _assess_device_risk(self, device: Dict[str, Any]) -> float:
+        """Assess device and network risk"""
+        risk_score = 0.0
+        # New device
+        if device.get('is_new', False):
+            risk_score += 0.3
+        # Unusual device fingerprint
+        if device.get('fingerprint_changed', False):
+            risk_score += 0.2
+        # VPN or proxy detection
+        if device.get('using_vpn', False) or device.get('using_proxy', False):
+            risk_score += 0.1
+        # Unusual IP
+        if device.get('ip_is_unusual', False):
+            risk_score += 0.2
+        return min(1.0, risk_score)
+    async def _score_with_isolation_forest(self, features: Dict[str, Any]) -> float:
+        """Score using Isolation Forest model"""
+        # Simulate model prediction (would use actual ML model)
+        # High-dimensional anomaly detection
+        feature_values = list(features.values())
+        anomaly_score = sum(abs(x - 0.5) for x in feature_values if isinstance(x, (int, float))) / len(feature_values)
+        return min(1.0, anomaly_score * 2)
+    async def _score_with_autoencoder(self, features: Dict[str, Any]) -> float:
+        """Score using Autoencoder reconstruction error"""
+        # Simulate reconstruction error (would use actual autoencoder)
+        # Measures how well the transaction fits expected patterns
+        expected_patterns = [0.1, 0.2, 0.3, 0.4, 0.5]  # Expected feature distributions
+        reconstruction_error = sum(abs(features.get(f'feature_{i}', 0) - expected_patterns[i])
+                                 for i in range(min(5, len(expected_patterns))))
+        return min(1.0, reconstruction_error / 2)
+    async def _score_with_gradient_boosting(self, features: Dict[str, Any]) -> float:
+        """Score using Gradient Boosting model"""
+        # Simulate ensemble model prediction
+        # Would use features like amount, location, merchant, user behavior
+        base_score = 0.1
+        # Amount risk
+        if features.get('amount', 0) > 1000:
+            base_score += 0.3
+        # Location risk
+        base_score += features.get('location_risk', 0) * 0.2
+        # Merchant risk
+        base_score += features.get('merchant_category_risk', 0) * 0.2
+        # Velocity risk
+        velocity_1h = features.get('velocity_1h', 0)
+        if velocity_1h > 5:
+            base_score += 0.2
+        return min(1.0, base_score)
+    async def _score_with_neural_network(self, features: Dict[str, Any]) -> float:
+        """Score using Neural Network model"""
+        # Simulate deep learning model prediction
+        # Would use complex feature interactions and temporal patterns
+        # Simple neural network simulation
+        inputs = [
+            features.get('amount_log', 0) / 10,
+            features.get('location_risk', 0),
+            features.get('merchant_category_risk', 0),
+            features.get('velocity_1h', 0) / 10,
+            features.get('device_fingerprint_risk', 0)
+        ]
+        # Simulate hidden layer processing
+        hidden_layer = [max(0, x * 0.5 + 0.1) for x in inputs]  # ReLU activation
+        output = sum(hidden_layer) / len(hidden_layer)  # Simple average
+        return min(1.0, output)
+    def _calculate_ensemble_score(self, scores: List[float]) -> float:
+        """Calculate ensemble score from multiple models"""
+        if not scores:
+            return 0.0
+        # Weighted ensemble (neural network gets higher weight)
+        weights = [0.2, 0.2, 0.3, 0.3]  # Isolation Forest, Autoencoder, GB, NN
+        ensemble_score = sum(score * weight for score, weight in zip(scores, weights))
+        # Apply sigmoid transformation for better distribution
+        return 1 / (1 + 2.718**(-(ensemble_score * 5 - 2.5)))
+    def _assess_risk_level(self, score: float) -> Tuple[str, float]:
+        """Assess risk level from fraud score"""
+        if score >= 0.8:
+            return "critical", 0.95
+        elif score >= 0.6:
+            return "high", 0.85
+        elif score >= 0.4:
+            return "medium", 0.75
+        elif score >= 0.2:
+            return "low", 0.65
+        else:
+            return "very_low", 0.55
+    def _generate_risk_explanation(self, features: Dict[str, Any], ensemble_score: float,
+                                 model_scores: List[float]) -> Dict[str, Any]:
+        """Generate human-readable risk explanation"""
+        explanations = []
+        # Amount analysis
+        amount = features.get('amount', 0)
+        if amount > 1000:
+            explanations.append(f"High transaction amount (${amount}) contributes to elevated risk")
+        # Location analysis
+        location_risk = features.get('location_risk', 0)
+        if location_risk > 0.5:
+            explanations.append("Transaction location is unusual or high-risk")
+        # Velocity analysis
+        velocity_1h = features.get('velocity_1h', 0)
+        if velocity_1h > 3:
+            explanations.append(f"High transaction velocity ({velocity_1h} transactions in last hour)")
+        # Merchant analysis
+        merchant_risk = features.get('merchant_category_risk', 0)
+        if merchant_risk > 0.5:
+            explanations.append("Merchant category is considered high-risk")
+        # Device analysis
+        device_risk = features.get('device_fingerprint_risk', 0)
+        if device_risk > 0.5:
+            explanations.append("Device or network characteristics are suspicious")
+        # Model agreement analysis
+        model_agreement = len([s for s in model_scores if s > 0.5]) / len(model_scores)
+        if model_agreement > 0.75:
+            explanations.append("Multiple AI models agree on elevated risk")
+        elif model_agreement < 0.25:
+            explanations.append("AI models show low consensus on risk assessment")
+        return {
+            'score': round(ensemble_score, 4),
+            'confidence': round(model_agreement, 2),
+            'key_factors': explanations[:5],  # Top 5 factors
+            'model_consensus': f"{int(model_agreement * 100)}% of models indicate risk"
+        }
+    def _generate_recommendations(self, risk_level: str, transaction: Dict[str, Any]) -> List[str]:
+        """Generate risk mitigation recommendations"""
+        recommendations = []
+        if risk_level in ['critical', 'high']:
+            recommendations.extend([
+                "🚨 IMMEDIATE ACTION REQUIRED",
+                "Hold transaction for manual review",
+                "Contact customer for verification",
+                "Flag account for enhanced monitoring",
+                "Consider transaction blocking"
+            ])
+        elif risk_level == 'medium':
+            recommendations.extend([
+                "⚠️ ENHANCED MONITORING ADVISED",
+                "Send additional verification request",
+                "Monitor account for 24 hours",
+                "Review transaction pattern",
+                "Consider step-up authentication"
+            ])
+        elif risk_level == 'low':
+            recommendations.extend([
+                "✅ LOW RISK - PROCEED WITH CAUTION",
+                "Log transaction for pattern analysis",
+                "Continue normal processing",
+                "Monitor for velocity changes"
+            ])
+        else:  # very_low
+            recommendations.extend([
+                "✅ VERY LOW RISK - NORMAL PROCESSING",
+                "No additional actions required",
+                "Continue standard fraud monitoring"
+            ])
+        # Transaction-specific recommendations
+        amount = transaction.get('amount', 0)
+        if amount > 5000 and risk_level in ['high', 'critical']:
+            recommendations.append(f"⚠️ High-value transaction (${amount}) requires executive approval")
+        return recommendations
+    async def behavioral_pattern_analysis(self, user_id: str, transaction_history: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Analyze user behavioral patterns for advanced fraud detection
+        """
+        # Extract temporal patterns
+        temporal_patterns = self._analyze_temporal_patterns(transaction_history)
+        # Extract amount patterns
+        amount_patterns = self._analyze_amount_patterns(transaction_history)
+        # Extract merchant patterns
+        merchant_patterns = self._analyze_merchant_patterns(transaction_history)
+        # Calculate behavioral score
+        behavioral_score = self._calculate_behavioral_score(
+            temporal_patterns, amount_patterns, merchant_patterns
+        )
+        # Detect anomalies
+        anomalies = self._detect_behavioral_anomalies(
+            transaction_history, temporal_patterns, amount_patterns, merchant_patterns
+        )
+        result = {
+            'user_id': user_id,
+            'behavioral_score': round(behavioral_score, 4),
+            'patterns': {
+                'temporal': temporal_patterns,
+                'amount': amount_patterns,
+                'merchant': merchant_patterns
+            },
+            'anomalies': anomalies,
+            'risk_assessment': self._assess_behavioral_risk(behavioral_score, anomalies),
+            'timestamp': datetime.now().isoformat()
+        }
+        return result
+    def _analyze_temporal_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze temporal spending patterns"""
+        if not transactions:
+            return {'pattern': 'insufficient_data'}
+        # Group by hour of day
+        hourly_counts = {}
+        for tx in transactions:
+            timestamp = tx.get('timestamp')
+            if timestamp:
+                dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
+                hour = dt.hour
+                hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
+        # Find peak hours
+        peak_hour = max(hourly_counts.keys(), key=lambda h: hourly_counts[h]) if hourly_counts else 0
+        peak_count = hourly_counts.get(peak_hour, 0)
+        # Calculate consistency score
+        total_tx = len(transactions)
+        expected_per_hour = total_tx / 24
+        consistency_score = 1 - (abs(peak_count - expected_per_hour) / max(expected_per_hour, 1))
+        return {
+            'peak_hour': peak_hour,
+            'peak_transactions': peak_count,
+            'consistency_score': round(consistency_score, 3),
+            'total_transactions': total_tx
+        }
+    def _analyze_amount_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze spending amount patterns"""
+        if not transactions:
+            return {'pattern': 'insufficient_data'}
+        amounts = [tx.get('amount', 0) for tx in transactions if tx.get('amount', 0) > 0]
+        if not amounts:
+            return {'pattern': 'no_amount_data'}
+        avg_amount = sum(amounts) / len(amounts)
+        median_amount = sorted(amounts)[len(amounts) // 2]
+        # Calculate variance
+        variance = sum((x - avg_amount) ** 2 for x in amounts) / len(amounts)
+        std_dev = variance ** 0.5
+        # Categorize spending pattern
+        cv = std_dev / avg_amount if avg_amount > 0 else 0  # Coefficient of variation
+        if cv < 0.3:
+            pattern = 'consistent'
+            risk = 'low'
+        elif cv < 0.7:
+            pattern = 'moderate_variation'
+            risk = 'medium'
+        else:
+            pattern = 'highly_variable'
+            risk = 'high'
+        return {
+            'average_amount': round(avg_amount, 2),
+            'median_amount': round(median_amount, 2),
+            'standard_deviation': round(std_dev, 2),
+            'coefficient_of_variation': round(cv, 3),
+            'spending_pattern': pattern,
+            'pattern_risk': risk,
+            'transaction_count': len(amounts)
+        }
+    def _analyze_merchant_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Analyze merchant spending patterns"""
+        if not transactions:
+            return {'pattern': 'insufficient_data'}
+        # Group by merchant
+        merchant_counts = {}
+        merchant_categories = {}
+        for tx in transactions:
+            merchant = tx.get('merchant', {}).get('name', 'unknown')
+            category = tx.get('merchant', {}).get('category', 'unknown')
+            merchant_counts[merchant] = merchant_counts.get(merchant, 0) + 1
+            if category != 'unknown':
+                merchant_categories[category] = merchant_categories.get(category, 0) + 1
+        # Find most frequent merchant and category
+        top_merchant = max(merchant_counts.keys(), key=lambda m: merchant_counts[m]) if merchant_counts else None
+        top_category = max(merchant_categories.keys(), key=lambda c: merchant_categories[c]) if merchant_categories else None
+        # Calculate merchant diversity
+        unique_merchants = len(merchant_counts)
+        total_transactions = len(transactions)
+        merchant_diversity = unique_merchants / total_transactions if total_transactions > 0 else 0
+        # Assess pattern
+        if merchant_diversity < 0.1:
+            pattern = 'highly_repetitive'
+            risk = 'medium'  # Could indicate account takeover
+        elif merchant_diversity < 0.3:
+            pattern = 'somewhat_repetitive'
+            risk = 'low'
+        else:
+            pattern = 'diverse'
+            risk = 'low'
+        return {
+            'top_merchant': top_merchant,
+            'top_category': top_category,
+            'unique_merchants': unique_merchants,
+            'merchant_diversity': round(merchant_diversity, 3),
+            'merchant_pattern': pattern,
+            'pattern_risk': risk
+        }
+    def _calculate_behavioral_score(self, temporal: Dict, amount: Dict, merchant: Dict) -> float:
+        """Calculate overall behavioral risk score"""
+        score = 0.0
+        # Temporal consistency (lower consistency = higher risk)
+        temporal_consistency = temporal.get('consistency_score', 0.5)
+        score += (1 - temporal_consistency) * 0.3
+        # Amount variation (higher variation = higher risk)
+        amount_cv = amount.get('coefficient_of_variation', 0)
+        if amount_cv > 0.5:
+            score += 0.3
+        elif amount_cv > 0.3:
+            score += 0.15
+        # Merchant diversity (lower diversity = higher risk)
+        merchant_diversity = merchant.get('merchant_diversity', 0.5)
+        score += (1 - merchant_diversity) * 0.4
+        return min(1.0, score)
+    def _detect_behavioral_anomalies(self, transactions: List[Dict], temporal: Dict,
+                                   amount: Dict, merchant: Dict) -> List[Dict[str, Any]]:
+        """Detect behavioral anomalies"""
+        anomalies = []
+        # Check for unusual timing
+        current_hour = datetime.now().hour
+        peak_hour = temporal.get('peak_hour', 12)
+        if abs(current_hour - peak_hour) > 8:  # More than 8 hours from usual peak
+            anomalies.append({
+                'type': 'temporal_anomaly',
+                'description': f'Transaction at unusual hour ({current_hour}) vs peak hour ({peak_hour})',
+                'severity': 'medium'
+            })
+        # Check for unusual amounts
+        if transactions:
+            recent_amounts = [tx.get('amount', 0) for tx in transactions[-10:]]  # Last 10 transactions
+            avg_recent = sum(recent_amounts) / len(recent_amounts) if recent_amounts else 0
+            if transactions[-1].get('amount', 0) > avg_recent * 3:  # 3x higher than recent average
+                anomalies.append({
+                    'type': 'amount_anomaly',
+                    'description': 'Transaction amount significantly higher than recent average',
+                    'severity': 'high'
+                })
+        # Check for new merchant categories
+        # This would require historical merchant data comparison
+        return anomalies
+    def _assess_behavioral_risk(self, score: float, anomalies: List[Dict]) -> Dict[str, Any]:
+        """Assess overall behavioral risk"""
+        # Base risk from score
+        if score >= 0.7:
+            pass
+        elif score >= 0.4:
+            pass
+        else:
+            pass
+        # Adjust for anomalies
+        anomaly_severity_boost = sum(
+            {'low': 0.1, 'medium': 0.2, 'high': 0.3}.get(a.get('severity', 'low'), 0)
+            for a in anomalies
+        )
+        adjusted_score = min(1.0, score + anomaly_severity_boost)
+        if adjusted_score >= 0.8:
+            final_risk = 'critical'
+        elif adjusted_score >= 0.6:
+            final_risk = 'high'
+        elif adjusted_score >= 0.4:
+            final_risk = 'medium'
+        else:
+            final_risk = 'low'
+        return {
+            'base_score': round(score, 4),
+            'adjusted_score': round(adjusted_score, 4),
+            'anomaly_count': len(anomalies),
+            'risk_level': final_risk,
+            'confidence': round(0.8 + len(anomalies) * 0.05, 2)  # Higher confidence with more data
+        }
+# Global AI features instance
+ai_features = AdvancedAIFeatures()