""" Fraud Model Explainability Assistant - Shared Utilities This module contains shared tools, mock data generators, and configuration constants used by the Fraud Model Explainability Assistant. """ import os import random import warnings from datetime import datetime, timedelta from typing import Optional # Suppress asyncio "Invalid file descriptor" warnings in containerized environments # These are harmless cleanup warnings during garbage collection warnings.filterwarnings("ignore", category=ResourceWarning) os.environ["PYTHONWARNINGS"] = "ignore::ResourceWarning" from strands import tool # ============================================================================= # MOCK DATA GENERATORS # ============================================================================= # In production, these would connect to your actual data systems # (e.g., Snowflake, feature store, model serving infrastructure) def generate_mock_application(app_id: str) -> dict: """Generate realistic mock application data for demo purposes.""" random.seed(hash(app_id) % 2**32) risk_level = random.choice(["low", "medium", "high", "very_high"]) base_data = { "application_id": app_id, "timestamp": (datetime.now() - timedelta(days=random.randint(0, 30))).isoformat(), "portfolio": random.choice(["Retail Card", "Payment Solutions", "CareCredit"]), "requested_credit_line": random.randint(500, 25000), "fraud_score": { "low": random.randint(150, 350), "medium": random.randint(400, 550), "high": random.randint(600, 750), "very_high": random.randint(800, 950) }[risk_level], "fraud_score_percentile": { "low": random.randint(5, 30), "medium": random.randint(40, 60), "high": random.randint(75, 90), "very_high": random.randint(92, 99) }[risk_level], "decision": "FLAGGED" if risk_level in ["high", "very_high"] else "APPROVED", "risk_level": risk_level, } # Features that contribute to fraud scoring if risk_level in ["high", "very_high"]: base_data["features"] = { "ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.7, 0.95), "device_velocity_30d": random.randint(5, 15), "address_type": random.choice(["CMRA", "PO_BOX", "VACANT"]), "phone_type": random.choice(["VOIP", "PREPAID"]), "email_domain_age_days": random.randint(1, 30), "application_velocity_14d": random.randint(3, 8), "identity_linkage_count": random.randint(4, 12), "credit_inquiry_spike": True, "synthetic_id_score": random.uniform(0.75, 0.98), } else: base_data["features"] = { "ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.0, 0.2), "device_velocity_30d": random.randint(1, 2), "address_type": "RESIDENTIAL", "phone_type": "POSTPAID", "email_domain_age_days": random.randint(365, 3650), "application_velocity_14d": random.randint(0, 1), "identity_linkage_count": random.randint(0, 2), "credit_inquiry_spike": False, "synthetic_id_score": random.uniform(0.05, 0.25), } return base_data # ============================================================================= # FRAUD EXPLAINABILITY TOOLS # ============================================================================= @tool def get_application_summary(application_id: str) -> str: """ Retrieve basic information about a credit application including fraud score, decision, portfolio, and timestamp. Args: application_id: The unique identifier for the application (e.g., "APP-12345") Returns: A summary of the application details and fraud assessment """ app = generate_mock_application(application_id) return f""" APPLICATION SUMMARY ================== Application ID: {app['application_id']} Submission Date: {app['timestamp'][:10]} Portfolio: {app['portfolio']} Requested Credit Line: ${app['requested_credit_line']:,} FRAUD ASSESSMENT ---------------- Fraud Score: {app['fraud_score']} / 1000 Risk Percentile: {app['fraud_score_percentile']}th percentile Risk Level: {app['risk_level'].upper()} Decision: {app['decision']} """ @tool def explain_fraud_score(application_id: str) -> str: """ Get detailed SHAP-style feature attribution explanation for why an application received its fraud score. Shows which factors contributed most to the risk assessment. Args: application_id: The unique identifier for the application Returns: Detailed breakdown of contributing factors with impact scores """ app = generate_mock_application(application_id) features = app["features"] # Simulate SHAP values (in production, these come from your model) explanations = [] if features["ssn_issue_date_vs_credit_age_mismatch"] > 0.5: explanations.append({ "feature": "SSN Issue Date vs Credit Age Mismatch", "value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}", "impact": "+187 points", "direction": "INCREASES RISK", "explanation": "SSN was issued recently but credit file shows longer history, a key synthetic ID indicator" }) if features["device_velocity_30d"] > 3: explanations.append({ "feature": "Device Velocity (30 days)", "value": f"{features['device_velocity_30d']} applications", "impact": "+142 points", "direction": "INCREASES RISK", "explanation": "Same device fingerprint linked to multiple applications in short period" }) if features["address_type"] in ["CMRA", "PO_BOX", "VACANT"]: explanations.append({ "feature": "Address Type", "value": features["address_type"], "impact": "+98 points", "direction": "INCREASES RISK", "explanation": f"Address classified as {features['address_type']} (Commercial Mail Receiving Agency or high-risk type)" }) if features["synthetic_id_score"] > 0.6: explanations.append({ "feature": "Synthetic Identity Score", "value": f"{features['synthetic_id_score']:.0%}", "impact": "+156 points", "direction": "INCREASES RISK", "explanation": "Composite score from ensemble model indicates high probability of synthetic identity" }) if features["application_velocity_14d"] > 2: explanations.append({ "feature": "Application Velocity (14 days)", "value": f"{features['application_velocity_14d']} applications", "impact": "+78 points", "direction": "INCREASES RISK", "explanation": "Multiple credit applications submitted in short timeframe" }) if features["email_domain_age_days"] < 60: explanations.append({ "feature": "Email Domain Age", "value": f"{features['email_domain_age_days']} days", "impact": "+45 points", "direction": "INCREASES RISK", "explanation": "Email address created very recently" }) if features["phone_type"] in ["VOIP", "PREPAID"]: explanations.append({ "feature": "Phone Type", "value": features["phone_type"], "impact": "+62 points", "direction": "INCREASES RISK", "explanation": "Non-traditional phone type associated with higher fraud rates" }) # If low risk, show protective factors if app["risk_level"] == "low": explanations = [ { "feature": "Established Credit History", "value": "12+ years", "impact": "-120 points", "direction": "DECREASES RISK", "explanation": "Long credit history consistent with SSN issue date" }, { "feature": "Stable Contact Information", "value": "Verified", "impact": "-85 points", "direction": "DECREASES RISK", "explanation": "Phone and address verified with multiple data sources" }, { "feature": "Low Application Velocity", "value": "1 in 90 days", "impact": "-45 points", "direction": "DECREASES RISK", "explanation": "Normal application pattern" } ] # Format output output = f""" FRAUD SCORE EXPLANATION ======================= Application ID: {application_id} Final Fraud Score: {app['fraud_score']} / 1000 Model: XGBoost Fraud Ensemble v3.2 TOP CONTRIBUTING FACTORS (ranked by impact): -------------------------------------------- """ for i, exp in enumerate(sorted(explanations, key=lambda x: abs(int(x["impact"].split()[0])), reverse=True), 1): output += f""" {i}. {exp['feature']} Value: {exp['value']} Impact: {exp['impact']} ({exp['direction']}) → {exp['explanation']} """ return output @tool def compare_to_population(application_id: str, comparison_group: str = "approved") -> str: """ Compare an application's features to the approved or denied population to show how unusual the applicant's characteristics are. Args: application_id: The unique identifier for the application comparison_group: Either "approved" or "denied" population to compare against Returns: Statistical comparison showing how the application differs from typical cases """ app = generate_mock_application(application_id) features = app["features"] # Mock population statistics population_stats = { "approved": { "ssn_credit_mismatch_mean": 0.08, "ssn_credit_mismatch_std": 0.12, "device_velocity_mean": 1.2, "device_velocity_std": 0.8, "synthetic_score_mean": 0.15, "synthetic_score_std": 0.10, "app_velocity_mean": 0.5, "app_velocity_std": 0.7, }, "denied": { "ssn_credit_mismatch_mean": 0.72, "ssn_credit_mismatch_std": 0.18, "device_velocity_mean": 6.5, "device_velocity_std": 3.2, "synthetic_score_mean": 0.78, "synthetic_score_std": 0.15, "app_velocity_mean": 4.2, "app_velocity_std": 2.1, } } stats = population_stats.get(comparison_group, population_stats["approved"]) def calc_z_score(value, mean, std): if std == 0: return 0 return (value - mean) / std comparisons = [ { "feature": "SSN/Credit Age Mismatch", "applicant_value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}", "population_mean": f"{stats['ssn_credit_mismatch_mean']:.0%}", "z_score": calc_z_score(features['ssn_issue_date_vs_credit_age_mismatch'], stats['ssn_credit_mismatch_mean'], stats['ssn_credit_mismatch_std']) }, { "feature": "Device Velocity (30d)", "applicant_value": str(features['device_velocity_30d']), "population_mean": f"{stats['device_velocity_mean']:.1f}", "z_score": calc_z_score(features['device_velocity_30d'], stats['device_velocity_mean'], stats['device_velocity_std']) }, { "feature": "Synthetic ID Score", "applicant_value": f"{features['synthetic_id_score']:.0%}", "population_mean": f"{stats['synthetic_score_mean']:.0%}", "z_score": calc_z_score(features['synthetic_id_score'], stats['synthetic_score_mean'], stats['synthetic_score_std']) }, { "feature": "Application Velocity (14d)", "applicant_value": str(features['application_velocity_14d']), "population_mean": f"{stats['app_velocity_mean']:.1f}", "z_score": calc_z_score(features['application_velocity_14d'], stats['app_velocity_mean'], stats['app_velocity_std']) }, ] output = f""" POPULATION COMPARISON ANALYSIS ============================== Application ID: {application_id} Comparison Group: {comparison_group.upper()} applications (last 12 months) Sample Size: {'847,293' if comparison_group == 'approved' else '23,847'} applications FEATURE COMPARISON: ------------------- {"Feature":<30} {"Applicant":<15} {"Population Mean":<18} {"Z-Score":<10} {"Assessment"} {"-"*95} """ for comp in comparisons: z = comp["z_score"] if abs(z) > 3: assessment = "⚠️ EXTREME OUTLIER" elif abs(z) > 2: assessment = "🔶 SIGNIFICANT DEVIATION" elif abs(z) > 1: assessment = "🔷 MILD DEVIATION" else: assessment = "✅ WITHIN NORMAL" output += f"{comp['feature']:<30} {comp['applicant_value']:<15} {comp['population_mean']:<18} {z:>+.2f}σ {assessment}\n" # Summary extreme_count = sum(1 for c in comparisons if abs(c["z_score"]) > 2) output += f""" SUMMARY: -------- {extreme_count} of {len(comparisons)} features show significant deviation (|z| > 2σ) from {comparison_group} population. """ if extreme_count >= 2: output += f"This application's profile is statistically unusual compared to typically {comparison_group} applications." return output @tool def check_fair_lending_flags(application_id: str) -> str: """ Check for potential fair lending concerns in the fraud decision. Reviews whether protected class proxies may have influenced the score and provides compliance documentation. Args: application_id: The unique identifier for the application Returns: Fair lending compliance assessment and documentation """ app = generate_mock_application(application_id) # Mock fair lending analysis output = f""" FAIR LENDING COMPLIANCE REVIEW ============================== Application ID: {application_id} Review Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} Model: XGBoost Fraud Ensemble v3.2 PROTECTED CLASS PROXY ANALYSIS: ------------------------------- The following features were analyzed for potential correlation with protected characteristics: ✅ Geography-Based Features: - ZIP code used only for velocity calculations, not scoring - No direct geographic risk scoring applied - Compliant with ECOA geographic restrictions ✅ Name-Based Features: - No name-based features used in fraud model - Identity verification uses SSN/DOB only ✅ Age-Related Features: - Credit age features measure account history, not applicant age - SSN issuance analysis targets synthetic ID patterns, not age discrimination - Model tested for age disparate impact: PASSED (adverse impact ratio: 0.94) ⚠️ REVIEW ITEMS: ----------------- """ if app["features"].get("phone_type") in ["VOIP", "PREPAID"]: output += """ • Phone Type Feature: - VOIP/Prepaid flagged as risk factor - Documented business justification: 73% of confirmed synthetic fraud uses VOIP - Disparate impact testing: PASSED (ratio: 0.89) - Alternative considered: None available with equivalent predictive power """ if app["features"].get("address_type") in ["CMRA", "PO_BOX"]: output += """ • Address Type Feature: - CMRA/PO Box flagged as risk factor - Documented business justification: Required for synthetic ID detection - Disparate impact testing: PASSED (ratio: 0.91) - Accommodations: Manual review pathway available for legitimate CMRA users """ output += f""" MODEL VALIDATION STATUS: ------------------------ Last Disparate Impact Test: 2024-11-15 Last Adverse Action Review: 2024-12-01 Model Risk Rating: LOW SR 11-7 Compliance: COMPLIANT ADVERSE ACTION REASON CODES: ---------------------------- If this application is denied, the following reason codes apply: """ if app["decision"] == "FLAGGED": reasons = [ "FA01 - Unable to verify identity information", "FA03 - Inconsistent application information", "FA07 - High-risk contact information patterns", ] for i, reason in enumerate(reasons, 1): output += f" {i}. {reason}\n" else: output += " N/A - Application approved\n" output += """ DOCUMENTATION: -------------- This analysis is auto-generated for compliance documentation. Full model documentation available in Model Risk Management system. Contact: model-governance@company.com """ return output @tool def get_identity_network(application_id: str) -> str: """ Analyze the identity linkage network for an application, showing connections to other applications via shared attributes (device, phone, email, address, SSN patterns). Args: application_id: The unique identifier for the application Returns: Network analysis showing linked applications and risk patterns """ app = generate_mock_application(application_id) features = app["features"] linkage_count = features.get("identity_linkage_count", 0) output = f""" IDENTITY NETWORK ANALYSIS ========================= Application ID: {application_id} Analysis Date: {datetime.now().strftime('%Y-%m-%d')} LINKAGE SUMMARY: ---------------- Total Linked Applications: {linkage_count} """ if linkage_count > 3: # Generate mock linked applications for high-risk cases random.seed(hash(application_id) % 2**32) link_types = { "device_fingerprint": random.randint(2, min(linkage_count, 8)), "phone_number": random.randint(1, min(linkage_count, 4)), "email_pattern": random.randint(1, min(linkage_count, 3)), "address": random.randint(1, min(linkage_count, 5)), } output += f""" LINKAGE BREAKDOWN: ------------------ • Device Fingerprint Links: {link_types['device_fingerprint']} applications • Phone Number Links: {link_types['phone_number']} applications • Email Pattern Links: {link_types['email_pattern']} applications • Address Links: {link_types['address']} applications LINKED APPLICATION DETAILS: --------------------------- """ statuses = ["CONFIRMED_FRAUD", "FLAGGED", "DENIED", "CHARGED_OFF", "APPROVED"] weights = [0.3, 0.25, 0.2, 0.15, 0.1] if app["risk_level"] in ["high", "very_high"] else [0.05, 0.1, 0.15, 0.1, 0.6] for i in range(min(linkage_count, 6)): linked_id = f"APP-{random.randint(10000, 99999)}" link_type = random.choice(list(link_types.keys())) status = random.choices(statuses, weights=weights)[0] days_ago = random.randint(1, 180) status_emoji = { "CONFIRMED_FRAUD": "🔴", "FLAGGED": "🟠", "DENIED": "🟡", "CHARGED_OFF": "🔴", "APPROVED": "🟢" } output += f" {status_emoji.get(status, '⚪')} {linked_id} | {link_type.replace('_', ' ').title()} | {status} | {days_ago}d ago\n" # Risk assessment fraud_links = sum(1 for _ in range(linkage_count) if random.random() < 0.4) output += f""" NETWORK RISK ASSESSMENT: ------------------------ • Confirmed Fraud in Network: {fraud_links} application(s) • Network Risk Score: {min(100, linkage_count * 12 + fraud_links * 25)}/100 • Ring Pattern Detected: {"YES ⚠️" if linkage_count > 5 else "NO"} • Velocity Anomaly: {"YES ⚠️" if features.get('device_velocity_30d', 0) > 5 else "NO"} RECOMMENDATION: --------------- {"⚠️ HIGH-RISK NETWORK - Manual review recommended" if linkage_count > 5 else "🔶 ELEVATED RISK - Monitor for additional activity"} """ else: output += """ LINKAGE BREAKDOWN: ------------------ • Device Fingerprint Links: 0-1 applications • Phone Number Links: 0 applications • Email Pattern Links: 0 applications • Address Links: 1 application (same household likely) NETWORK RISK ASSESSMENT: ------------------------ • Network Risk Score: LOW • No suspicious patterns detected • Normal application profile ✅ No concerning identity network patterns identified. """ return output @tool def get_model_performance(model_name: str = "xgboost_fraud_v3.2", portfolio: str = "all") -> str: """ Retrieve current performance metrics for a fraud detection model, including precision, recall, KS statistic, and financial impact. Args: model_name: Name of the fraud model (default: xgboost_fraud_v3.2) portfolio: Portfolio to filter by ("Retail Card", "Payment Solutions", "CareCredit", or "all") Returns: Model performance metrics and trends """ output = f""" MODEL PERFORMANCE DASHBOARD =========================== Model: {model_name} Portfolio: {portfolio.upper()} Reporting Period: Last 30 Days Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} DETECTION METRICS: ------------------ Current Prior Month Δ Change Fraud Detection Rate: 87.3% 84.1% +3.2% ✅ Precision (PPV): 34.2% 31.8% +2.4% ✅ False Positive Rate: 2.1% 2.4% -0.3% ✅ KS Statistic: 0.72 0.69 +0.03 ✅ Gini Coefficient: 0.81 0.78 +0.03 ✅ AUC-ROC: 0.91 0.89 +0.02 ✅ FINANCIAL IMPACT: ----------------- Current Prior Month Δ Change Fraud Losses Prevented: $4.2M $3.8M +$400K ✅ False Positive Cost: $890K $920K -$30K ✅ Net Benefit: $3.31M $2.88M +$430K ✅ ROI: 372% 317% +55% ✅ VOLUME METRICS: --------------- Applications Scored: 1,247,832 High-Risk Flags: 26,847 (2.15%) Manual Reviews: 8,421 Confirmed Fraud: 9,182 """ if portfolio != "all": output += f""" PORTFOLIO BREAKDOWN ({portfolio}): {'='*40} Applications: {random.randint(200000, 500000):,} Fraud Rate: {random.uniform(0.5, 1.2):.2f}% Detection Rate: {random.uniform(82, 92):.1f}% """ output += """ MODEL HEALTH: ------------- ✅ Feature Drift (PSI): 0.08 (threshold: 0.25) ✅ Score Distribution: Stable ✅ Latency P99: 45ms (SLA: 100ms) ⚠️ Challenger Model: +2.1% lift in shadow mode - review scheduled TREND ALERT: ------------ 📈 Synthetic ID fraud attempts up 23% MoM - model adapting well 📉 First-party fraud stable at historical levels """ return output # ============================================================================= # SYSTEM PROMPT # ============================================================================= SYSTEM_PROMPT = """ You are a Fraud Model Explainability Assistant for a major financial services company. Your role is to help fraud analysts, data scientists, and executives understand fraud model decisions and their implications. You have access to tools that can: 1. Retrieve application summaries and fraud scores 2. Explain why applications received specific fraud scores (SHAP-style explanations) 3. Compare applications to approved/denied populations statistically 4. Check for fair lending compliance concerns 5. Analyze identity networks and linkages 6. Show model performance metrics When answering questions: - Be precise and data-driven - Highlight the most important risk factors first - Explain technical concepts in business terms when speaking to executives - Always mention fair lending implications when relevant - Provide actionable insights, not just data For flagged applications, structure your response as: 1. Quick summary (score, decision, risk level) 2. Top contributing factors 3. How unusual this is compared to the population 4. Any compliance considerations 5. Recommended next steps Remember: Your explanations may be used in regulatory examinations and audits, so be accurate and thorough. """.strip()