teoat commited on
Commit
9773495
·
verified ·
1 Parent(s): 2a5f5af

Upload core/ai/advanced_ai_features.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. core/ai/advanced_ai_features.py +614 -0
core/ai/advanced_ai_features.py ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Advanced AI-Powered Features for Enterprise Fraud Detection
3
+ """
4
+
5
+ import asyncio
6
+ from datetime import datetime
7
+ from typing import Any, Dict, List, Tuple
8
+
9
+ from core.cache.advanced_cache import get_api_cache_manager
10
+ from core.logging.advanced_logging import structured_logger
11
+
12
+
13
+ class AdvancedAIFeatures:
14
+ """Advanced AI-powered features for fraud detection"""
15
+
16
+ def __init__(self):
17
+ self.logger = structured_logger
18
+ self.cache_manager = get_api_cache_manager()
19
+
20
+ async def predictive_fraud_scoring(self, transaction_data: Dict[str, Any]) -> Dict[str, Any]:
21
+ """
22
+ Advanced predictive fraud scoring using machine learning models
23
+ """
24
+
25
+ # Extract features from transaction data
26
+ features = self._extract_transaction_features(transaction_data)
27
+
28
+ # Multi-model scoring
29
+ scores = await asyncio.gather(
30
+ self._score_with_isolation_forest(features),
31
+ self._score_with_autoencoder(features),
32
+ self._score_with_gradient_boosting(features),
33
+ self._score_with_neural_network(features)
34
+ )
35
+
36
+ # Ensemble scoring
37
+ ensemble_score = self._calculate_ensemble_score(scores)
38
+
39
+ # Risk assessment
40
+ risk_level, confidence = self._assess_risk_level(ensemble_score)
41
+
42
+ # Generate explanation
43
+ explanation = self._generate_risk_explanation(features, ensemble_score, scores)
44
+
45
+ result = {
46
+ 'transaction_id': transaction_data.get('id'),
47
+ 'fraud_score': round(ensemble_score, 4),
48
+ 'risk_level': risk_level,
49
+ 'confidence': round(confidence, 4),
50
+ 'model_scores': {
51
+ 'isolation_forest': round(scores[0], 4),
52
+ 'autoencoder': round(scores[1], 4),
53
+ 'gradient_boosting': round(scores[2], 4),
54
+ 'neural_network': round(scores[3], 4)
55
+ },
56
+ 'explanation': explanation,
57
+ 'recommendations': self._generate_recommendations(risk_level, transaction_data),
58
+ 'timestamp': datetime.now().isoformat()
59
+ }
60
+
61
+ # Cache result
62
+ if self.cache_manager:
63
+ cache_key = f"fraud_score:{transaction_data.get('id')}"
64
+ self.cache_manager.set(cache_key, result, ttl=3600) # 1 hour
65
+
66
+ return result
67
+
68
+ def _extract_transaction_features(self, transaction: Dict[str, Any]) -> Dict[str, Any]:
69
+ """Extract numerical features from transaction data"""
70
+
71
+ features = {}
72
+
73
+ # Amount-based features
74
+ amount = float(transaction.get('amount', 0))
75
+ features['amount'] = amount
76
+ features['amount_log'] = amount ** 0.5 if amount > 0 else 0
77
+ features['amount_category'] = self._categorize_amount(amount)
78
+
79
+ # Time-based features
80
+ timestamp = transaction.get('timestamp')
81
+ if timestamp:
82
+ dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
83
+ features['hour_of_day'] = dt.hour
84
+ features['day_of_week'] = dt.weekday()
85
+ features['is_weekend'] = 1 if dt.weekday() >= 5 else 0
86
+ features['is_business_hours'] = 1 if 9 <= dt.hour <= 17 else 0
87
+
88
+ # Location-based features
89
+ location = transaction.get('location', {})
90
+ features['location_risk'] = self._assess_location_risk(location)
91
+
92
+ # Merchant-based features
93
+ merchant = transaction.get('merchant', {})
94
+ features['merchant_category_risk'] = self._assess_merchant_risk(merchant)
95
+
96
+ # User behavior features
97
+ user_history = transaction.get('user_history', {})
98
+ features['velocity_1h'] = user_history.get('transactions_last_hour', 0)
99
+ features['velocity_24h'] = user_history.get('transactions_last_24h', 0)
100
+ features['avg_amount_30d'] = user_history.get('avg_amount_last_30d', 0)
101
+
102
+ # Device and network features
103
+ device = transaction.get('device', {})
104
+ features['device_fingerprint_risk'] = self._assess_device_risk(device)
105
+
106
+ return features
107
+
108
+ def _categorize_amount(self, amount: float) -> int:
109
+ """Categorize transaction amount"""
110
+ if amount < 10:
111
+ return 0 # Micro transaction
112
+ elif amount < 100:
113
+ return 1 # Small
114
+ elif amount < 1000:
115
+ return 2 # Medium
116
+ elif amount < 10000:
117
+ return 3 # Large
118
+ else:
119
+ return 4 # Very large
120
+
121
+ def _assess_location_risk(self, location: Dict[str, Any]) -> float:
122
+ """Assess location-based risk (0-1 scale)"""
123
+ risk_score = 0.0
124
+
125
+ # Check for unusual locations
126
+ if location.get('is_unusual', False):
127
+ risk_score += 0.3
128
+
129
+ # Check distance from usual locations
130
+ distance = location.get('distance_from_home', 0)
131
+ if distance > 1000: # More than 1000km
132
+ risk_score += 0.2
133
+ elif distance > 100: # More than 100km
134
+ risk_score += 0.1
135
+
136
+ # Check for high-risk countries
137
+ country_risk = location.get('country_risk_score', 0)
138
+ risk_score += country_risk * 0.4
139
+
140
+ return min(1.0, risk_score)
141
+
142
+ def _assess_merchant_risk(self, merchant: Dict[str, Any]) -> float:
143
+ """Assess merchant-based risk"""
144
+ risk_score = 0.0
145
+
146
+ # High-risk merchant categories
147
+ high_risk_categories = ['gambling', 'adult', 'cryptocurrency', 'wire_transfer']
148
+ category = merchant.get('category', '').lower()
149
+
150
+ if any(risk_cat in category for risk_cat in high_risk_categories):
151
+ risk_score += 0.4
152
+
153
+ # New merchant
154
+ if merchant.get('is_new', False):
155
+ risk_score += 0.2
156
+
157
+ # Unusual merchant for user
158
+ if merchant.get('is_unusual_for_user', False):
159
+ risk_score += 0.3
160
+
161
+ return min(1.0, risk_score)
162
+
163
+ def _assess_device_risk(self, device: Dict[str, Any]) -> float:
164
+ """Assess device and network risk"""
165
+ risk_score = 0.0
166
+
167
+ # New device
168
+ if device.get('is_new', False):
169
+ risk_score += 0.3
170
+
171
+ # Unusual device fingerprint
172
+ if device.get('fingerprint_changed', False):
173
+ risk_score += 0.2
174
+
175
+ # VPN or proxy detection
176
+ if device.get('using_vpn', False) or device.get('using_proxy', False):
177
+ risk_score += 0.1
178
+
179
+ # Unusual IP
180
+ if device.get('ip_is_unusual', False):
181
+ risk_score += 0.2
182
+
183
+ return min(1.0, risk_score)
184
+
185
+ async def _score_with_isolation_forest(self, features: Dict[str, Any]) -> float:
186
+ """Score using Isolation Forest model"""
187
+ # Simulate model prediction (would use actual ML model)
188
+ # High-dimensional anomaly detection
189
+ feature_values = list(features.values())
190
+ anomaly_score = sum(abs(x - 0.5) for x in feature_values if isinstance(x, (int, float))) / len(feature_values)
191
+ return min(1.0, anomaly_score * 2)
192
+
193
+ async def _score_with_autoencoder(self, features: Dict[str, Any]) -> float:
194
+ """Score using Autoencoder reconstruction error"""
195
+ # Simulate reconstruction error (would use actual autoencoder)
196
+ # Measures how well the transaction fits expected patterns
197
+ expected_patterns = [0.1, 0.2, 0.3, 0.4, 0.5] # Expected feature distributions
198
+ reconstruction_error = sum(abs(features.get(f'feature_{i}', 0) - expected_patterns[i])
199
+ for i in range(min(5, len(expected_patterns))))
200
+ return min(1.0, reconstruction_error / 2)
201
+
202
+ async def _score_with_gradient_boosting(self, features: Dict[str, Any]) -> float:
203
+ """Score using Gradient Boosting model"""
204
+ # Simulate ensemble model prediction
205
+ # Would use features like amount, location, merchant, user behavior
206
+ base_score = 0.1
207
+
208
+ # Amount risk
209
+ if features.get('amount', 0) > 1000:
210
+ base_score += 0.3
211
+
212
+ # Location risk
213
+ base_score += features.get('location_risk', 0) * 0.2
214
+
215
+ # Merchant risk
216
+ base_score += features.get('merchant_category_risk', 0) * 0.2
217
+
218
+ # Velocity risk
219
+ velocity_1h = features.get('velocity_1h', 0)
220
+ if velocity_1h > 5:
221
+ base_score += 0.2
222
+
223
+ return min(1.0, base_score)
224
+
225
+ async def _score_with_neural_network(self, features: Dict[str, Any]) -> float:
226
+ """Score using Neural Network model"""
227
+ # Simulate deep learning model prediction
228
+ # Would use complex feature interactions and temporal patterns
229
+
230
+ # Simple neural network simulation
231
+ inputs = [
232
+ features.get('amount_log', 0) / 10,
233
+ features.get('location_risk', 0),
234
+ features.get('merchant_category_risk', 0),
235
+ features.get('velocity_1h', 0) / 10,
236
+ features.get('device_fingerprint_risk', 0)
237
+ ]
238
+
239
+ # Simulate hidden layer processing
240
+ hidden_layer = [max(0, x * 0.5 + 0.1) for x in inputs] # ReLU activation
241
+ output = sum(hidden_layer) / len(hidden_layer) # Simple average
242
+
243
+ return min(1.0, output)
244
+
245
+ def _calculate_ensemble_score(self, scores: List[float]) -> float:
246
+ """Calculate ensemble score from multiple models"""
247
+ if not scores:
248
+ return 0.0
249
+
250
+ # Weighted ensemble (neural network gets higher weight)
251
+ weights = [0.2, 0.2, 0.3, 0.3] # Isolation Forest, Autoencoder, GB, NN
252
+
253
+ ensemble_score = sum(score * weight for score, weight in zip(scores, weights))
254
+
255
+ # Apply sigmoid transformation for better distribution
256
+ return 1 / (1 + 2.718**(-(ensemble_score * 5 - 2.5)))
257
+
258
+ def _assess_risk_level(self, score: float) -> Tuple[str, float]:
259
+ """Assess risk level from fraud score"""
260
+
261
+ if score >= 0.8:
262
+ return "critical", 0.95
263
+ elif score >= 0.6:
264
+ return "high", 0.85
265
+ elif score >= 0.4:
266
+ return "medium", 0.75
267
+ elif score >= 0.2:
268
+ return "low", 0.65
269
+ else:
270
+ return "very_low", 0.55
271
+
272
+ def _generate_risk_explanation(self, features: Dict[str, Any], ensemble_score: float,
273
+ model_scores: List[float]) -> Dict[str, Any]:
274
+ """Generate human-readable risk explanation"""
275
+
276
+ explanations = []
277
+
278
+ # Amount analysis
279
+ amount = features.get('amount', 0)
280
+ if amount > 1000:
281
+ explanations.append(f"High transaction amount (${amount}) contributes to elevated risk")
282
+
283
+ # Location analysis
284
+ location_risk = features.get('location_risk', 0)
285
+ if location_risk > 0.5:
286
+ explanations.append("Transaction location is unusual or high-risk")
287
+
288
+ # Velocity analysis
289
+ velocity_1h = features.get('velocity_1h', 0)
290
+ if velocity_1h > 3:
291
+ explanations.append(f"High transaction velocity ({velocity_1h} transactions in last hour)")
292
+
293
+ # Merchant analysis
294
+ merchant_risk = features.get('merchant_category_risk', 0)
295
+ if merchant_risk > 0.5:
296
+ explanations.append("Merchant category is considered high-risk")
297
+
298
+ # Device analysis
299
+ device_risk = features.get('device_fingerprint_risk', 0)
300
+ if device_risk > 0.5:
301
+ explanations.append("Device or network characteristics are suspicious")
302
+
303
+ # Model agreement analysis
304
+ model_agreement = len([s for s in model_scores if s > 0.5]) / len(model_scores)
305
+ if model_agreement > 0.75:
306
+ explanations.append("Multiple AI models agree on elevated risk")
307
+ elif model_agreement < 0.25:
308
+ explanations.append("AI models show low consensus on risk assessment")
309
+
310
+ return {
311
+ 'score': round(ensemble_score, 4),
312
+ 'confidence': round(model_agreement, 2),
313
+ 'key_factors': explanations[:5], # Top 5 factors
314
+ 'model_consensus': f"{int(model_agreement * 100)}% of models indicate risk"
315
+ }
316
+
317
+ def _generate_recommendations(self, risk_level: str, transaction: Dict[str, Any]) -> List[str]:
318
+ """Generate risk mitigation recommendations"""
319
+
320
+ recommendations = []
321
+
322
+ if risk_level in ['critical', 'high']:
323
+ recommendations.extend([
324
+ "🚨 IMMEDIATE ACTION REQUIRED",
325
+ "Hold transaction for manual review",
326
+ "Contact customer for verification",
327
+ "Flag account for enhanced monitoring",
328
+ "Consider transaction blocking"
329
+ ])
330
+
331
+ elif risk_level == 'medium':
332
+ recommendations.extend([
333
+ "⚠️ ENHANCED MONITORING ADVISED",
334
+ "Send additional verification request",
335
+ "Monitor account for 24 hours",
336
+ "Review transaction pattern",
337
+ "Consider step-up authentication"
338
+ ])
339
+
340
+ elif risk_level == 'low':
341
+ recommendations.extend([
342
+ "✅ LOW RISK - PROCEED WITH CAUTION",
343
+ "Log transaction for pattern analysis",
344
+ "Continue normal processing",
345
+ "Monitor for velocity changes"
346
+ ])
347
+
348
+ else: # very_low
349
+ recommendations.extend([
350
+ "✅ VERY LOW RISK - NORMAL PROCESSING",
351
+ "No additional actions required",
352
+ "Continue standard fraud monitoring"
353
+ ])
354
+
355
+ # Transaction-specific recommendations
356
+ amount = transaction.get('amount', 0)
357
+ if amount > 5000 and risk_level in ['high', 'critical']:
358
+ recommendations.append(f"⚠️ High-value transaction (${amount}) requires executive approval")
359
+
360
+ return recommendations
361
+
362
+ async def behavioral_pattern_analysis(self, user_id: str, transaction_history: List[Dict[str, Any]]) -> Dict[str, Any]:
363
+ """
364
+ Analyze user behavioral patterns for advanced fraud detection
365
+ """
366
+
367
+ # Extract temporal patterns
368
+ temporal_patterns = self._analyze_temporal_patterns(transaction_history)
369
+
370
+ # Extract amount patterns
371
+ amount_patterns = self._analyze_amount_patterns(transaction_history)
372
+
373
+ # Extract merchant patterns
374
+ merchant_patterns = self._analyze_merchant_patterns(transaction_history)
375
+
376
+ # Calculate behavioral score
377
+ behavioral_score = self._calculate_behavioral_score(
378
+ temporal_patterns, amount_patterns, merchant_patterns
379
+ )
380
+
381
+ # Detect anomalies
382
+ anomalies = self._detect_behavioral_anomalies(
383
+ transaction_history, temporal_patterns, amount_patterns, merchant_patterns
384
+ )
385
+
386
+ result = {
387
+ 'user_id': user_id,
388
+ 'behavioral_score': round(behavioral_score, 4),
389
+ 'patterns': {
390
+ 'temporal': temporal_patterns,
391
+ 'amount': amount_patterns,
392
+ 'merchant': merchant_patterns
393
+ },
394
+ 'anomalies': anomalies,
395
+ 'risk_assessment': self._assess_behavioral_risk(behavioral_score, anomalies),
396
+ 'timestamp': datetime.now().isoformat()
397
+ }
398
+
399
+ return result
400
+
401
+ def _analyze_temporal_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
402
+ """Analyze temporal spending patterns"""
403
+
404
+ if not transactions:
405
+ return {'pattern': 'insufficient_data'}
406
+
407
+ # Group by hour of day
408
+ hourly_counts = {}
409
+ for tx in transactions:
410
+ timestamp = tx.get('timestamp')
411
+ if timestamp:
412
+ dt = datetime.fromisoformat(timestamp.replace('Z', '+00:00'))
413
+ hour = dt.hour
414
+ hourly_counts[hour] = hourly_counts.get(hour, 0) + 1
415
+
416
+ # Find peak hours
417
+ peak_hour = max(hourly_counts.keys(), key=lambda h: hourly_counts[h]) if hourly_counts else 0
418
+ peak_count = hourly_counts.get(peak_hour, 0)
419
+
420
+ # Calculate consistency score
421
+ total_tx = len(transactions)
422
+ expected_per_hour = total_tx / 24
423
+ consistency_score = 1 - (abs(peak_count - expected_per_hour) / max(expected_per_hour, 1))
424
+
425
+ return {
426
+ 'peak_hour': peak_hour,
427
+ 'peak_transactions': peak_count,
428
+ 'consistency_score': round(consistency_score, 3),
429
+ 'total_transactions': total_tx
430
+ }
431
+
432
+ def _analyze_amount_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
433
+ """Analyze spending amount patterns"""
434
+
435
+ if not transactions:
436
+ return {'pattern': 'insufficient_data'}
437
+
438
+ amounts = [tx.get('amount', 0) for tx in transactions if tx.get('amount', 0) > 0]
439
+
440
+ if not amounts:
441
+ return {'pattern': 'no_amount_data'}
442
+
443
+ avg_amount = sum(amounts) / len(amounts)
444
+ median_amount = sorted(amounts)[len(amounts) // 2]
445
+
446
+ # Calculate variance
447
+ variance = sum((x - avg_amount) ** 2 for x in amounts) / len(amounts)
448
+ std_dev = variance ** 0.5
449
+
450
+ # Categorize spending pattern
451
+ cv = std_dev / avg_amount if avg_amount > 0 else 0 # Coefficient of variation
452
+
453
+ if cv < 0.3:
454
+ pattern = 'consistent'
455
+ risk = 'low'
456
+ elif cv < 0.7:
457
+ pattern = 'moderate_variation'
458
+ risk = 'medium'
459
+ else:
460
+ pattern = 'highly_variable'
461
+ risk = 'high'
462
+
463
+ return {
464
+ 'average_amount': round(avg_amount, 2),
465
+ 'median_amount': round(median_amount, 2),
466
+ 'standard_deviation': round(std_dev, 2),
467
+ 'coefficient_of_variation': round(cv, 3),
468
+ 'spending_pattern': pattern,
469
+ 'pattern_risk': risk,
470
+ 'transaction_count': len(amounts)
471
+ }
472
+
473
+ def _analyze_merchant_patterns(self, transactions: List[Dict[str, Any]]) -> Dict[str, Any]:
474
+ """Analyze merchant spending patterns"""
475
+
476
+ if not transactions:
477
+ return {'pattern': 'insufficient_data'}
478
+
479
+ # Group by merchant
480
+ merchant_counts = {}
481
+ merchant_categories = {}
482
+
483
+ for tx in transactions:
484
+ merchant = tx.get('merchant', {}).get('name', 'unknown')
485
+ category = tx.get('merchant', {}).get('category', 'unknown')
486
+
487
+ merchant_counts[merchant] = merchant_counts.get(merchant, 0) + 1
488
+ if category != 'unknown':
489
+ merchant_categories[category] = merchant_categories.get(category, 0) + 1
490
+
491
+ # Find most frequent merchant and category
492
+ top_merchant = max(merchant_counts.keys(), key=lambda m: merchant_counts[m]) if merchant_counts else None
493
+ top_category = max(merchant_categories.keys(), key=lambda c: merchant_categories[c]) if merchant_categories else None
494
+
495
+ # Calculate merchant diversity
496
+ unique_merchants = len(merchant_counts)
497
+ total_transactions = len(transactions)
498
+ merchant_diversity = unique_merchants / total_transactions if total_transactions > 0 else 0
499
+
500
+ # Assess pattern
501
+ if merchant_diversity < 0.1:
502
+ pattern = 'highly_repetitive'
503
+ risk = 'medium' # Could indicate account takeover
504
+ elif merchant_diversity < 0.3:
505
+ pattern = 'somewhat_repetitive'
506
+ risk = 'low'
507
+ else:
508
+ pattern = 'diverse'
509
+ risk = 'low'
510
+
511
+ return {
512
+ 'top_merchant': top_merchant,
513
+ 'top_category': top_category,
514
+ 'unique_merchants': unique_merchants,
515
+ 'merchant_diversity': round(merchant_diversity, 3),
516
+ 'merchant_pattern': pattern,
517
+ 'pattern_risk': risk
518
+ }
519
+
520
+ def _calculate_behavioral_score(self, temporal: Dict, amount: Dict, merchant: Dict) -> float:
521
+ """Calculate overall behavioral risk score"""
522
+
523
+ score = 0.0
524
+
525
+ # Temporal consistency (lower consistency = higher risk)
526
+ temporal_consistency = temporal.get('consistency_score', 0.5)
527
+ score += (1 - temporal_consistency) * 0.3
528
+
529
+ # Amount variation (higher variation = higher risk)
530
+ amount_cv = amount.get('coefficient_of_variation', 0)
531
+ if amount_cv > 0.5:
532
+ score += 0.3
533
+ elif amount_cv > 0.3:
534
+ score += 0.15
535
+
536
+ # Merchant diversity (lower diversity = higher risk)
537
+ merchant_diversity = merchant.get('merchant_diversity', 0.5)
538
+ score += (1 - merchant_diversity) * 0.4
539
+
540
+ return min(1.0, score)
541
+
542
+ def _detect_behavioral_anomalies(self, transactions: List[Dict], temporal: Dict,
543
+ amount: Dict, merchant: Dict) -> List[Dict[str, Any]]:
544
+ """Detect behavioral anomalies"""
545
+
546
+ anomalies = []
547
+
548
+ # Check for unusual timing
549
+ current_hour = datetime.now().hour
550
+ peak_hour = temporal.get('peak_hour', 12)
551
+
552
+ if abs(current_hour - peak_hour) > 8: # More than 8 hours from usual peak
553
+ anomalies.append({
554
+ 'type': 'temporal_anomaly',
555
+ 'description': f'Transaction at unusual hour ({current_hour}) vs peak hour ({peak_hour})',
556
+ 'severity': 'medium'
557
+ })
558
+
559
+ # Check for unusual amounts
560
+ if transactions:
561
+ recent_amounts = [tx.get('amount', 0) for tx in transactions[-10:]] # Last 10 transactions
562
+ avg_recent = sum(recent_amounts) / len(recent_amounts) if recent_amounts else 0
563
+
564
+ if transactions[-1].get('amount', 0) > avg_recent * 3: # 3x higher than recent average
565
+ anomalies.append({
566
+ 'type': 'amount_anomaly',
567
+ 'description': 'Transaction amount significantly higher than recent average',
568
+ 'severity': 'high'
569
+ })
570
+
571
+ # Check for new merchant categories
572
+ # This would require historical merchant data comparison
573
+
574
+ return anomalies
575
+
576
+ def _assess_behavioral_risk(self, score: float, anomalies: List[Dict]) -> Dict[str, Any]:
577
+ """Assess overall behavioral risk"""
578
+
579
+ # Base risk from score
580
+ if score >= 0.7:
581
+ pass
582
+ elif score >= 0.4:
583
+ pass
584
+ else:
585
+ pass
586
+
587
+ # Adjust for anomalies
588
+ anomaly_severity_boost = sum(
589
+ {'low': 0.1, 'medium': 0.2, 'high': 0.3}.get(a.get('severity', 'low'), 0)
590
+ for a in anomalies
591
+ )
592
+
593
+ adjusted_score = min(1.0, score + anomaly_severity_boost)
594
+
595
+ if adjusted_score >= 0.8:
596
+ final_risk = 'critical'
597
+ elif adjusted_score >= 0.6:
598
+ final_risk = 'high'
599
+ elif adjusted_score >= 0.4:
600
+ final_risk = 'medium'
601
+ else:
602
+ final_risk = 'low'
603
+
604
+ return {
605
+ 'base_score': round(score, 4),
606
+ 'adjusted_score': round(adjusted_score, 4),
607
+ 'anomaly_count': len(anomalies),
608
+ 'risk_level': final_risk,
609
+ 'confidence': round(0.8 + len(anomalies) * 0.05, 2) # Higher confidence with more data
610
+ }
611
+
612
+
613
+ # Global AI features instance
614
+ ai_features = AdvancedAIFeatures()