chrisjcc's picture
Refactoring App Structure (#1)
1ecc53e
Raw
History Blame Contribute Delete
24.9 kB
"""
Fraud Model Explainability Assistant - Shared Utilities
This module contains shared tools, mock data generators, and configuration constants
used by the Fraud Model Explainability Assistant.
"""
import os
import random
import warnings
from datetime import datetime, timedelta
from typing import Optional
# Suppress asyncio "Invalid file descriptor" warnings in containerized environments
# These are harmless cleanup warnings during garbage collection
warnings.filterwarnings("ignore", category=ResourceWarning)
os.environ["PYTHONWARNINGS"] = "ignore::ResourceWarning"
from strands import tool
# =============================================================================
# MOCK DATA GENERATORS
# =============================================================================
# In production, these would connect to your actual data systems
# (e.g., Snowflake, feature store, model serving infrastructure)
def generate_mock_application(app_id: str) -> dict:
"""Generate realistic mock application data for demo purposes."""
random.seed(hash(app_id) % 2**32)
risk_level = random.choice(["low", "medium", "high", "very_high"])
base_data = {
"application_id": app_id,
"timestamp": (datetime.now() - timedelta(days=random.randint(0, 30))).isoformat(),
"portfolio": random.choice(["Retail Card", "Payment Solutions", "CareCredit"]),
"requested_credit_line": random.randint(500, 25000),
"fraud_score": {
"low": random.randint(150, 350),
"medium": random.randint(400, 550),
"high": random.randint(600, 750),
"very_high": random.randint(800, 950)
}[risk_level],
"fraud_score_percentile": {
"low": random.randint(5, 30),
"medium": random.randint(40, 60),
"high": random.randint(75, 90),
"very_high": random.randint(92, 99)
}[risk_level],
"decision": "FLAGGED" if risk_level in ["high", "very_high"] else "APPROVED",
"risk_level": risk_level,
}
# Features that contribute to fraud scoring
if risk_level in ["high", "very_high"]:
base_data["features"] = {
"ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.7, 0.95),
"device_velocity_30d": random.randint(5, 15),
"address_type": random.choice(["CMRA", "PO_BOX", "VACANT"]),
"phone_type": random.choice(["VOIP", "PREPAID"]),
"email_domain_age_days": random.randint(1, 30),
"application_velocity_14d": random.randint(3, 8),
"identity_linkage_count": random.randint(4, 12),
"credit_inquiry_spike": True,
"synthetic_id_score": random.uniform(0.75, 0.98),
}
else:
base_data["features"] = {
"ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.0, 0.2),
"device_velocity_30d": random.randint(1, 2),
"address_type": "RESIDENTIAL",
"phone_type": "POSTPAID",
"email_domain_age_days": random.randint(365, 3650),
"application_velocity_14d": random.randint(0, 1),
"identity_linkage_count": random.randint(0, 2),
"credit_inquiry_spike": False,
"synthetic_id_score": random.uniform(0.05, 0.25),
}
return base_data
# =============================================================================
# FRAUD EXPLAINABILITY TOOLS
# =============================================================================
@tool
def get_application_summary(application_id: str) -> str:
"""
Retrieve basic information about a credit application including
fraud score, decision, portfolio, and timestamp.
Args:
application_id: The unique identifier for the application (e.g., "APP-12345")
Returns:
A summary of the application details and fraud assessment
"""
app = generate_mock_application(application_id)
return f"""
APPLICATION SUMMARY
==================
Application ID: {app['application_id']}
Submission Date: {app['timestamp'][:10]}
Portfolio: {app['portfolio']}
Requested Credit Line: ${app['requested_credit_line']:,}
FRAUD ASSESSMENT
----------------
Fraud Score: {app['fraud_score']} / 1000
Risk Percentile: {app['fraud_score_percentile']}th percentile
Risk Level: {app['risk_level'].upper()}
Decision: {app['decision']}
"""
@tool
def explain_fraud_score(application_id: str) -> str:
"""
Get detailed SHAP-style feature attribution explanation for why an
application received its fraud score. Shows which factors contributed
most to the risk assessment.
Args:
application_id: The unique identifier for the application
Returns:
Detailed breakdown of contributing factors with impact scores
"""
app = generate_mock_application(application_id)
features = app["features"]
# Simulate SHAP values (in production, these come from your model)
explanations = []
if features["ssn_issue_date_vs_credit_age_mismatch"] > 0.5:
explanations.append({
"feature": "SSN Issue Date vs Credit Age Mismatch",
"value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}",
"impact": "+187 points",
"direction": "INCREASES RISK",
"explanation": "SSN was issued recently but credit file shows longer history, a key synthetic ID indicator"
})
if features["device_velocity_30d"] > 3:
explanations.append({
"feature": "Device Velocity (30 days)",
"value": f"{features['device_velocity_30d']} applications",
"impact": "+142 points",
"direction": "INCREASES RISK",
"explanation": "Same device fingerprint linked to multiple applications in short period"
})
if features["address_type"] in ["CMRA", "PO_BOX", "VACANT"]:
explanations.append({
"feature": "Address Type",
"value": features["address_type"],
"impact": "+98 points",
"direction": "INCREASES RISK",
"explanation": f"Address classified as {features['address_type']} (Commercial Mail Receiving Agency or high-risk type)"
})
if features["synthetic_id_score"] > 0.6:
explanations.append({
"feature": "Synthetic Identity Score",
"value": f"{features['synthetic_id_score']:.0%}",
"impact": "+156 points",
"direction": "INCREASES RISK",
"explanation": "Composite score from ensemble model indicates high probability of synthetic identity"
})
if features["application_velocity_14d"] > 2:
explanations.append({
"feature": "Application Velocity (14 days)",
"value": f"{features['application_velocity_14d']} applications",
"impact": "+78 points",
"direction": "INCREASES RISK",
"explanation": "Multiple credit applications submitted in short timeframe"
})
if features["email_domain_age_days"] < 60:
explanations.append({
"feature": "Email Domain Age",
"value": f"{features['email_domain_age_days']} days",
"impact": "+45 points",
"direction": "INCREASES RISK",
"explanation": "Email address created very recently"
})
if features["phone_type"] in ["VOIP", "PREPAID"]:
explanations.append({
"feature": "Phone Type",
"value": features["phone_type"],
"impact": "+62 points",
"direction": "INCREASES RISK",
"explanation": "Non-traditional phone type associated with higher fraud rates"
})
# If low risk, show protective factors
if app["risk_level"] == "low":
explanations = [
{
"feature": "Established Credit History",
"value": "12+ years",
"impact": "-120 points",
"direction": "DECREASES RISK",
"explanation": "Long credit history consistent with SSN issue date"
},
{
"feature": "Stable Contact Information",
"value": "Verified",
"impact": "-85 points",
"direction": "DECREASES RISK",
"explanation": "Phone and address verified with multiple data sources"
},
{
"feature": "Low Application Velocity",
"value": "1 in 90 days",
"impact": "-45 points",
"direction": "DECREASES RISK",
"explanation": "Normal application pattern"
}
]
# Format output
output = f"""
FRAUD SCORE EXPLANATION
=======================
Application ID: {application_id}
Final Fraud Score: {app['fraud_score']} / 1000
Model: XGBoost Fraud Ensemble v3.2
TOP CONTRIBUTING FACTORS (ranked by impact):
--------------------------------------------
"""
for i, exp in enumerate(sorted(explanations, key=lambda x: abs(int(x["impact"].split()[0])), reverse=True), 1):
output += f"""
{i}. {exp['feature']}
Value: {exp['value']}
Impact: {exp['impact']} ({exp['direction']})
β†’ {exp['explanation']}
"""
return output
@tool
def compare_to_population(application_id: str, comparison_group: str = "approved") -> str:
"""
Compare an application's features to the approved or denied population
to show how unusual the applicant's characteristics are.
Args:
application_id: The unique identifier for the application
comparison_group: Either "approved" or "denied" population to compare against
Returns:
Statistical comparison showing how the application differs from typical cases
"""
app = generate_mock_application(application_id)
features = app["features"]
# Mock population statistics
population_stats = {
"approved": {
"ssn_credit_mismatch_mean": 0.08,
"ssn_credit_mismatch_std": 0.12,
"device_velocity_mean": 1.2,
"device_velocity_std": 0.8,
"synthetic_score_mean": 0.15,
"synthetic_score_std": 0.10,
"app_velocity_mean": 0.5,
"app_velocity_std": 0.7,
},
"denied": {
"ssn_credit_mismatch_mean": 0.72,
"ssn_credit_mismatch_std": 0.18,
"device_velocity_mean": 6.5,
"device_velocity_std": 3.2,
"synthetic_score_mean": 0.78,
"synthetic_score_std": 0.15,
"app_velocity_mean": 4.2,
"app_velocity_std": 2.1,
}
}
stats = population_stats.get(comparison_group, population_stats["approved"])
def calc_z_score(value, mean, std):
if std == 0:
return 0
return (value - mean) / std
comparisons = [
{
"feature": "SSN/Credit Age Mismatch",
"applicant_value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}",
"population_mean": f"{stats['ssn_credit_mismatch_mean']:.0%}",
"z_score": calc_z_score(features['ssn_issue_date_vs_credit_age_mismatch'],
stats['ssn_credit_mismatch_mean'],
stats['ssn_credit_mismatch_std'])
},
{
"feature": "Device Velocity (30d)",
"applicant_value": str(features['device_velocity_30d']),
"population_mean": f"{stats['device_velocity_mean']:.1f}",
"z_score": calc_z_score(features['device_velocity_30d'],
stats['device_velocity_mean'],
stats['device_velocity_std'])
},
{
"feature": "Synthetic ID Score",
"applicant_value": f"{features['synthetic_id_score']:.0%}",
"population_mean": f"{stats['synthetic_score_mean']:.0%}",
"z_score": calc_z_score(features['synthetic_id_score'],
stats['synthetic_score_mean'],
stats['synthetic_score_std'])
},
{
"feature": "Application Velocity (14d)",
"applicant_value": str(features['application_velocity_14d']),
"population_mean": f"{stats['app_velocity_mean']:.1f}",
"z_score": calc_z_score(features['application_velocity_14d'],
stats['app_velocity_mean'],
stats['app_velocity_std'])
},
]
output = f"""
POPULATION COMPARISON ANALYSIS
==============================
Application ID: {application_id}
Comparison Group: {comparison_group.upper()} applications (last 12 months)
Sample Size: {'847,293' if comparison_group == 'approved' else '23,847'} applications
FEATURE COMPARISON:
-------------------
{"Feature":<30} {"Applicant":<15} {"Population Mean":<18} {"Z-Score":<10} {"Assessment"}
{"-"*95}
"""
for comp in comparisons:
z = comp["z_score"]
if abs(z) > 3:
assessment = "⚠️ EXTREME OUTLIER"
elif abs(z) > 2:
assessment = "πŸ”Ά SIGNIFICANT DEVIATION"
elif abs(z) > 1:
assessment = "πŸ”· MILD DEVIATION"
else:
assessment = "βœ… WITHIN NORMAL"
output += f"{comp['feature']:<30} {comp['applicant_value']:<15} {comp['population_mean']:<18} {z:>+.2f}Οƒ {assessment}\n"
# Summary
extreme_count = sum(1 for c in comparisons if abs(c["z_score"]) > 2)
output += f"""
SUMMARY:
--------
{extreme_count} of {len(comparisons)} features show significant deviation (|z| > 2Οƒ) from {comparison_group} population.
"""
if extreme_count >= 2:
output += f"This application's profile is statistically unusual compared to typically {comparison_group} applications."
return output
@tool
def check_fair_lending_flags(application_id: str) -> str:
"""
Check for potential fair lending concerns in the fraud decision.
Reviews whether protected class proxies may have influenced the score
and provides compliance documentation.
Args:
application_id: The unique identifier for the application
Returns:
Fair lending compliance assessment and documentation
"""
app = generate_mock_application(application_id)
# Mock fair lending analysis
output = f"""
FAIR LENDING COMPLIANCE REVIEW
==============================
Application ID: {application_id}
Review Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Model: XGBoost Fraud Ensemble v3.2
PROTECTED CLASS PROXY ANALYSIS:
-------------------------------
The following features were analyzed for potential correlation with protected characteristics:
βœ… Geography-Based Features:
- ZIP code used only for velocity calculations, not scoring
- No direct geographic risk scoring applied
- Compliant with ECOA geographic restrictions
βœ… Name-Based Features:
- No name-based features used in fraud model
- Identity verification uses SSN/DOB only
βœ… Age-Related Features:
- Credit age features measure account history, not applicant age
- SSN issuance analysis targets synthetic ID patterns, not age discrimination
- Model tested for age disparate impact: PASSED (adverse impact ratio: 0.94)
⚠️ REVIEW ITEMS:
-----------------
"""
if app["features"].get("phone_type") in ["VOIP", "PREPAID"]:
output += """
β€’ Phone Type Feature:
- VOIP/Prepaid flagged as risk factor
- Documented business justification: 73% of confirmed synthetic fraud uses VOIP
- Disparate impact testing: PASSED (ratio: 0.89)
- Alternative considered: None available with equivalent predictive power
"""
if app["features"].get("address_type") in ["CMRA", "PO_BOX"]:
output += """
β€’ Address Type Feature:
- CMRA/PO Box flagged as risk factor
- Documented business justification: Required for synthetic ID detection
- Disparate impact testing: PASSED (ratio: 0.91)
- Accommodations: Manual review pathway available for legitimate CMRA users
"""
output += f"""
MODEL VALIDATION STATUS:
------------------------
Last Disparate Impact Test: 2024-11-15
Last Adverse Action Review: 2024-12-01
Model Risk Rating: LOW
SR 11-7 Compliance: COMPLIANT
ADVERSE ACTION REASON CODES:
----------------------------
If this application is denied, the following reason codes apply:
"""
if app["decision"] == "FLAGGED":
reasons = [
"FA01 - Unable to verify identity information",
"FA03 - Inconsistent application information",
"FA07 - High-risk contact information patterns",
]
for i, reason in enumerate(reasons, 1):
output += f" {i}. {reason}\n"
else:
output += " N/A - Application approved\n"
output += """
DOCUMENTATION:
--------------
This analysis is auto-generated for compliance documentation.
Full model documentation available in Model Risk Management system.
Contact: model-governance@company.com
"""
return output
@tool
def get_identity_network(application_id: str) -> str:
"""
Analyze the identity linkage network for an application, showing
connections to other applications via shared attributes (device,
phone, email, address, SSN patterns).
Args:
application_id: The unique identifier for the application
Returns:
Network analysis showing linked applications and risk patterns
"""
app = generate_mock_application(application_id)
features = app["features"]
linkage_count = features.get("identity_linkage_count", 0)
output = f"""
IDENTITY NETWORK ANALYSIS
=========================
Application ID: {application_id}
Analysis Date: {datetime.now().strftime('%Y-%m-%d')}
LINKAGE SUMMARY:
----------------
Total Linked Applications: {linkage_count}
"""
if linkage_count > 3:
# Generate mock linked applications for high-risk cases
random.seed(hash(application_id) % 2**32)
link_types = {
"device_fingerprint": random.randint(2, min(linkage_count, 8)),
"phone_number": random.randint(1, min(linkage_count, 4)),
"email_pattern": random.randint(1, min(linkage_count, 3)),
"address": random.randint(1, min(linkage_count, 5)),
}
output += f"""
LINKAGE BREAKDOWN:
------------------
β€’ Device Fingerprint Links: {link_types['device_fingerprint']} applications
β€’ Phone Number Links: {link_types['phone_number']} applications
β€’ Email Pattern Links: {link_types['email_pattern']} applications
β€’ Address Links: {link_types['address']} applications
LINKED APPLICATION DETAILS:
---------------------------
"""
statuses = ["CONFIRMED_FRAUD", "FLAGGED", "DENIED", "CHARGED_OFF", "APPROVED"]
weights = [0.3, 0.25, 0.2, 0.15, 0.1] if app["risk_level"] in ["high", "very_high"] else [0.05, 0.1, 0.15, 0.1, 0.6]
for i in range(min(linkage_count, 6)):
linked_id = f"APP-{random.randint(10000, 99999)}"
link_type = random.choice(list(link_types.keys()))
status = random.choices(statuses, weights=weights)[0]
days_ago = random.randint(1, 180)
status_emoji = {
"CONFIRMED_FRAUD": "πŸ”΄",
"FLAGGED": "🟠",
"DENIED": "🟑",
"CHARGED_OFF": "πŸ”΄",
"APPROVED": "🟒"
}
output += f" {status_emoji.get(status, 'βšͺ')} {linked_id} | {link_type.replace('_', ' ').title()} | {status} | {days_ago}d ago\n"
# Risk assessment
fraud_links = sum(1 for _ in range(linkage_count) if random.random() < 0.4)
output += f"""
NETWORK RISK ASSESSMENT:
------------------------
β€’ Confirmed Fraud in Network: {fraud_links} application(s)
β€’ Network Risk Score: {min(100, linkage_count * 12 + fraud_links * 25)}/100
β€’ Ring Pattern Detected: {"YES ⚠️" if linkage_count > 5 else "NO"}
β€’ Velocity Anomaly: {"YES ⚠️" if features.get('device_velocity_30d', 0) > 5 else "NO"}
RECOMMENDATION:
---------------
{"⚠️ HIGH-RISK NETWORK - Manual review recommended" if linkage_count > 5 else "πŸ”Ά ELEVATED RISK - Monitor for additional activity"}
"""
else:
output += """
LINKAGE BREAKDOWN:
------------------
β€’ Device Fingerprint Links: 0-1 applications
β€’ Phone Number Links: 0 applications
β€’ Email Pattern Links: 0 applications
β€’ Address Links: 1 application (same household likely)
NETWORK RISK ASSESSMENT:
------------------------
β€’ Network Risk Score: LOW
β€’ No suspicious patterns detected
β€’ Normal application profile
βœ… No concerning identity network patterns identified.
"""
return output
@tool
def get_model_performance(model_name: str = "xgboost_fraud_v3.2", portfolio: str = "all") -> str:
"""
Retrieve current performance metrics for a fraud detection model,
including precision, recall, KS statistic, and financial impact.
Args:
model_name: Name of the fraud model (default: xgboost_fraud_v3.2)
portfolio: Portfolio to filter by ("Retail Card", "Payment Solutions", "CareCredit", or "all")
Returns:
Model performance metrics and trends
"""
output = f"""
MODEL PERFORMANCE DASHBOARD
===========================
Model: {model_name}
Portfolio: {portfolio.upper()}
Reporting Period: Last 30 Days
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
DETECTION METRICS:
------------------
Current Prior Month Ξ” Change
Fraud Detection Rate: 87.3% 84.1% +3.2% βœ…
Precision (PPV): 34.2% 31.8% +2.4% βœ…
False Positive Rate: 2.1% 2.4% -0.3% βœ…
KS Statistic: 0.72 0.69 +0.03 βœ…
Gini Coefficient: 0.81 0.78 +0.03 βœ…
AUC-ROC: 0.91 0.89 +0.02 βœ…
FINANCIAL IMPACT:
-----------------
Current Prior Month Ξ” Change
Fraud Losses Prevented: $4.2M $3.8M +$400K βœ…
False Positive Cost: $890K $920K -$30K βœ…
Net Benefit: $3.31M $2.88M +$430K βœ…
ROI: 372% 317% +55% βœ…
VOLUME METRICS:
---------------
Applications Scored: 1,247,832
High-Risk Flags: 26,847 (2.15%)
Manual Reviews: 8,421
Confirmed Fraud: 9,182
"""
if portfolio != "all":
output += f"""
PORTFOLIO BREAKDOWN ({portfolio}):
{'='*40}
Applications: {random.randint(200000, 500000):,}
Fraud Rate: {random.uniform(0.5, 1.2):.2f}%
Detection Rate: {random.uniform(82, 92):.1f}%
"""
output += """
MODEL HEALTH:
-------------
βœ… Feature Drift (PSI): 0.08 (threshold: 0.25)
βœ… Score Distribution: Stable
βœ… Latency P99: 45ms (SLA: 100ms)
⚠️ Challenger Model: +2.1% lift in shadow mode - review scheduled
TREND ALERT:
------------
πŸ“ˆ Synthetic ID fraud attempts up 23% MoM - model adapting well
πŸ“‰ First-party fraud stable at historical levels
"""
return output
# =============================================================================
# SYSTEM PROMPT
# =============================================================================
SYSTEM_PROMPT = """
You are a Fraud Model Explainability Assistant for a major financial services company.
Your role is to help fraud analysts, data scientists, and executives understand
fraud model decisions and their implications.
You have access to tools that can:
1. Retrieve application summaries and fraud scores
2. Explain why applications received specific fraud scores (SHAP-style explanations)
3. Compare applications to approved/denied populations statistically
4. Check for fair lending compliance concerns
5. Analyze identity networks and linkages
6. Show model performance metrics
When answering questions:
- Be precise and data-driven
- Highlight the most important risk factors first
- Explain technical concepts in business terms when speaking to executives
- Always mention fair lending implications when relevant
- Provide actionable insights, not just data
For flagged applications, structure your response as:
1. Quick summary (score, decision, risk level)
2. Top contributing factors
3. How unusual this is compared to the population
4. Any compliance considerations
5. Recommended next steps
Remember: Your explanations may be used in regulatory examinations and audits,
so be accurate and thorough.
""".strip()