Spaces:

chrisjcc
/

fraud_model_explainability_assistant

Runtime error

App Files Files Community

fraud_model_explainability_assistant / utils.py

chrisjcc

Refactoring App Structure (#1)

1ecc53e 6 months ago

Raw

History Blame Contribute Delete

24.9 kB

	"""
	Fraud Model Explainability Assistant - Shared Utilities

	This module contains shared tools, mock data generators, and configuration constants
	used by the Fraud Model Explainability Assistant.
	"""

	import os
	import random
	import warnings
	from datetime import datetime, timedelta
	from typing import Optional

	# Suppress asyncio "Invalid file descriptor" warnings in containerized environments
	# These are harmless cleanup warnings during garbage collection
	warnings.filterwarnings("ignore", category=ResourceWarning)
	os.environ["PYTHONWARNINGS"] = "ignore::ResourceWarning"

	from strands import tool


	# =============================================================================
	# MOCK DATA GENERATORS
	# =============================================================================
	# In production, these would connect to your actual data systems
	# (e.g., Snowflake, feature store, model serving infrastructure)

	def generate_mock_application(app_id: str) -> dict:
	"""Generate realistic mock application data for demo purposes."""
	random.seed(hash(app_id) % 2**32)

	risk_level = random.choice(["low", "medium", "high", "very_high"])

	base_data = {
	"application_id": app_id,
	"timestamp": (datetime.now() - timedelta(days=random.randint(0, 30))).isoformat(),
	"portfolio": random.choice(["Retail Card", "Payment Solutions", "CareCredit"]),
	"requested_credit_line": random.randint(500, 25000),
	"fraud_score": {
	"low": random.randint(150, 350),
	"medium": random.randint(400, 550),
	"high": random.randint(600, 750),
	"very_high": random.randint(800, 950)
	}[risk_level],
	"fraud_score_percentile": {
	"low": random.randint(5, 30),
	"medium": random.randint(40, 60),
	"high": random.randint(75, 90),
	"very_high": random.randint(92, 99)
	}[risk_level],
	"decision": "FLAGGED" if risk_level in ["high", "very_high"] else "APPROVED",
	"risk_level": risk_level,
	}

	# Features that contribute to fraud scoring
	if risk_level in ["high", "very_high"]:
	base_data["features"] = {
	"ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.7, 0.95),
	"device_velocity_30d": random.randint(5, 15),
	"address_type": random.choice(["CMRA", "PO_BOX", "VACANT"]),
	"phone_type": random.choice(["VOIP", "PREPAID"]),
	"email_domain_age_days": random.randint(1, 30),
	"application_velocity_14d": random.randint(3, 8),
	"identity_linkage_count": random.randint(4, 12),
	"credit_inquiry_spike": True,
	"synthetic_id_score": random.uniform(0.75, 0.98),
	}
	else:
	base_data["features"] = {
	"ssn_issue_date_vs_credit_age_mismatch": random.uniform(0.0, 0.2),
	"device_velocity_30d": random.randint(1, 2),
	"address_type": "RESIDENTIAL",
	"phone_type": "POSTPAID",
	"email_domain_age_days": random.randint(365, 3650),
	"application_velocity_14d": random.randint(0, 1),
	"identity_linkage_count": random.randint(0, 2),
	"credit_inquiry_spike": False,
	"synthetic_id_score": random.uniform(0.05, 0.25),
	}

	return base_data


	# =============================================================================
	# FRAUD EXPLAINABILITY TOOLS
	# =============================================================================

	@tool
	def get_application_summary(application_id: str) -> str:
	"""
	Retrieve basic information about a credit application including
	fraud score, decision, portfolio, and timestamp.

	Args:
	application_id: The unique identifier for the application (e.g., "APP-12345")

	Returns:
	A summary of the application details and fraud assessment
	"""
	app = generate_mock_application(application_id)

	return f"""
	APPLICATION SUMMARY
	==================
	Application ID: {app['application_id']}
	Submission Date: {app['timestamp'][:10]}
	Portfolio: {app['portfolio']}
	Requested Credit Line: ${app['requested_credit_line']:,}

	FRAUD ASSESSMENT
	----------------
	Fraud Score: {app['fraud_score']} / 1000
	Risk Percentile: {app['fraud_score_percentile']}th percentile
	Risk Level: {app['risk_level'].upper()}
	Decision: {app['decision']}
	"""


	@tool
	def explain_fraud_score(application_id: str) -> str:
	"""
	Get detailed SHAP-style feature attribution explanation for why an
	application received its fraud score. Shows which factors contributed
	most to the risk assessment.

	Args:
	application_id: The unique identifier for the application

	Returns:
	Detailed breakdown of contributing factors with impact scores
	"""
	app = generate_mock_application(application_id)
	features = app["features"]

	# Simulate SHAP values (in production, these come from your model)
	explanations = []

	if features["ssn_issue_date_vs_credit_age_mismatch"] > 0.5:
	explanations.append({
	"feature": "SSN Issue Date vs Credit Age Mismatch",
	"value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}",
	"impact": "+187 points",
	"direction": "INCREASES RISK",
	"explanation": "SSN was issued recently but credit file shows longer history, a key synthetic ID indicator"
	})

	if features["device_velocity_30d"] > 3:
	explanations.append({
	"feature": "Device Velocity (30 days)",
	"value": f"{features['device_velocity_30d']} applications",
	"impact": "+142 points",
	"direction": "INCREASES RISK",
	"explanation": "Same device fingerprint linked to multiple applications in short period"
	})

	if features["address_type"] in ["CMRA", "PO_BOX", "VACANT"]:
	explanations.append({
	"feature": "Address Type",
	"value": features["address_type"],
	"impact": "+98 points",
	"direction": "INCREASES RISK",
	"explanation": f"Address classified as {features['address_type']} (Commercial Mail Receiving Agency or high-risk type)"
	})

	if features["synthetic_id_score"] > 0.6:
	explanations.append({
	"feature": "Synthetic Identity Score",
	"value": f"{features['synthetic_id_score']:.0%}",
	"impact": "+156 points",
	"direction": "INCREASES RISK",
	"explanation": "Composite score from ensemble model indicates high probability of synthetic identity"
	})

	if features["application_velocity_14d"] > 2:
	explanations.append({
	"feature": "Application Velocity (14 days)",
	"value": f"{features['application_velocity_14d']} applications",
	"impact": "+78 points",
	"direction": "INCREASES RISK",
	"explanation": "Multiple credit applications submitted in short timeframe"
	})

	if features["email_domain_age_days"] < 60:
	explanations.append({
	"feature": "Email Domain Age",
	"value": f"{features['email_domain_age_days']} days",
	"impact": "+45 points",
	"direction": "INCREASES RISK",
	"explanation": "Email address created very recently"
	})

	if features["phone_type"] in ["VOIP", "PREPAID"]:
	explanations.append({
	"feature": "Phone Type",
	"value": features["phone_type"],
	"impact": "+62 points",
	"direction": "INCREASES RISK",
	"explanation": "Non-traditional phone type associated with higher fraud rates"
	})

	# If low risk, show protective factors
	if app["risk_level"] == "low":
	explanations = [
	{
	"feature": "Established Credit History",
	"value": "12+ years",
	"impact": "-120 points",
	"direction": "DECREASES RISK",
	"explanation": "Long credit history consistent with SSN issue date"
	},
	{
	"feature": "Stable Contact Information",
	"value": "Verified",
	"impact": "-85 points",
	"direction": "DECREASES RISK",
	"explanation": "Phone and address verified with multiple data sources"
	},
	{
	"feature": "Low Application Velocity",
	"value": "1 in 90 days",
	"impact": "-45 points",
	"direction": "DECREASES RISK",
	"explanation": "Normal application pattern"
	}
	]

	# Format output
	output = f"""
	FRAUD SCORE EXPLANATION
	=======================
	Application ID: {application_id}
	Final Fraud Score: {app['fraud_score']} / 1000
	Model: XGBoost Fraud Ensemble v3.2

	TOP CONTRIBUTING FACTORS (ranked by impact):
	--------------------------------------------
	"""

	for i, exp in enumerate(sorted(explanations, key=lambda x: abs(int(x["impact"].split()[0])), reverse=True), 1):
	output += f"""
	{i}. {exp['feature']}
	Value: {exp['value']}
	Impact: {exp['impact']} ({exp['direction']})
	→ {exp['explanation']}
	"""

	return output


	@tool
	def compare_to_population(application_id: str, comparison_group: str = "approved") -> str:
	"""
	Compare an application's features to the approved or denied population
	to show how unusual the applicant's characteristics are.

	Args:
	application_id: The unique identifier for the application
	comparison_group: Either "approved" or "denied" population to compare against

	Returns:
	Statistical comparison showing how the application differs from typical cases
	"""
	app = generate_mock_application(application_id)
	features = app["features"]

	# Mock population statistics
	population_stats = {
	"approved": {
	"ssn_credit_mismatch_mean": 0.08,
	"ssn_credit_mismatch_std": 0.12,
	"device_velocity_mean": 1.2,
	"device_velocity_std": 0.8,
	"synthetic_score_mean": 0.15,
	"synthetic_score_std": 0.10,
	"app_velocity_mean": 0.5,
	"app_velocity_std": 0.7,
	},
	"denied": {
	"ssn_credit_mismatch_mean": 0.72,
	"ssn_credit_mismatch_std": 0.18,
	"device_velocity_mean": 6.5,
	"device_velocity_std": 3.2,
	"synthetic_score_mean": 0.78,
	"synthetic_score_std": 0.15,
	"app_velocity_mean": 4.2,
	"app_velocity_std": 2.1,
	}
	}

	stats = population_stats.get(comparison_group, population_stats["approved"])

	def calc_z_score(value, mean, std):
	if std == 0:
	return 0
	return (value - mean) / std

	comparisons = [
	{
	"feature": "SSN/Credit Age Mismatch",
	"applicant_value": f"{features['ssn_issue_date_vs_credit_age_mismatch']:.0%}",
	"population_mean": f"{stats['ssn_credit_mismatch_mean']:.0%}",
	"z_score": calc_z_score(features['ssn_issue_date_vs_credit_age_mismatch'],
	stats['ssn_credit_mismatch_mean'],
	stats['ssn_credit_mismatch_std'])
	},
	{
	"feature": "Device Velocity (30d)",
	"applicant_value": str(features['device_velocity_30d']),
	"population_mean": f"{stats['device_velocity_mean']:.1f}",
	"z_score": calc_z_score(features['device_velocity_30d'],
	stats['device_velocity_mean'],
	stats['device_velocity_std'])
	},
	{
	"feature": "Synthetic ID Score",
	"applicant_value": f"{features['synthetic_id_score']:.0%}",
	"population_mean": f"{stats['synthetic_score_mean']:.0%}",
	"z_score": calc_z_score(features['synthetic_id_score'],
	stats['synthetic_score_mean'],
	stats['synthetic_score_std'])
	},
	{
	"feature": "Application Velocity (14d)",
	"applicant_value": str(features['application_velocity_14d']),
	"population_mean": f"{stats['app_velocity_mean']:.1f}",
	"z_score": calc_z_score(features['application_velocity_14d'],
	stats['app_velocity_mean'],
	stats['app_velocity_std'])
	},
	]

	output = f"""
	POPULATION COMPARISON ANALYSIS
	==============================
	Application ID: {application_id}
	Comparison Group: {comparison_group.upper()} applications (last 12 months)
	Sample Size: {'847,293' if comparison_group == 'approved' else '23,847'} applications

	FEATURE COMPARISON:
	-------------------
	{"Feature":<30} {"Applicant":<15} {"Population Mean":<18} {"Z-Score":<10} {"Assessment"}
	{"-"*95}
	"""

	for comp in comparisons:
	z = comp["z_score"]
	if abs(z) > 3:
	assessment = "⚠️ EXTREME OUTLIER"
	elif abs(z) > 2:
	assessment = "🔶 SIGNIFICANT DEVIATION"
	elif abs(z) > 1:
	assessment = "🔷 MILD DEVIATION"
	else:
	assessment = "✅ WITHIN NORMAL"

	output += f"{comp['feature']:<30} {comp['applicant_value']:<15} {comp['population_mean']:<18} {z:>+.2f}σ {assessment}\n"

	# Summary
	extreme_count = sum(1 for c in comparisons if abs(c["z_score"]) > 2)

	output += f"""
	SUMMARY:
	--------
	{extreme_count} of {len(comparisons)} features show significant deviation (\|z\| > 2σ) from {comparison_group} population.
	"""

	if extreme_count >= 2:
	output += f"This application's profile is statistically unusual compared to typically {comparison_group} applications."

	return output


	@tool
	def check_fair_lending_flags(application_id: str) -> str:
	"""
	Check for potential fair lending concerns in the fraud decision.
	Reviews whether protected class proxies may have influenced the score
	and provides compliance documentation.

	Args:
	application_id: The unique identifier for the application

	Returns:
	Fair lending compliance assessment and documentation
	"""
	app = generate_mock_application(application_id)

	# Mock fair lending analysis
	output = f"""
	FAIR LENDING COMPLIANCE REVIEW
	==============================
	Application ID: {application_id}
	Review Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
	Model: XGBoost Fraud Ensemble v3.2

	PROTECTED CLASS PROXY ANALYSIS:
	-------------------------------
	The following features were analyzed for potential correlation with protected characteristics:

	✅ Geography-Based Features:
	- ZIP code used only for velocity calculations, not scoring
	- No direct geographic risk scoring applied
	- Compliant with ECOA geographic restrictions

	✅ Name-Based Features:
	- No name-based features used in fraud model
	- Identity verification uses SSN/DOB only

	✅ Age-Related Features:
	- Credit age features measure account history, not applicant age
	- SSN issuance analysis targets synthetic ID patterns, not age discrimination
	- Model tested for age disparate impact: PASSED (adverse impact ratio: 0.94)

	⚠️ REVIEW ITEMS:
	-----------------
	"""

	if app["features"].get("phone_type") in ["VOIP", "PREPAID"]:
	output += """
	• Phone Type Feature:
	- VOIP/Prepaid flagged as risk factor
	- Documented business justification: 73% of confirmed synthetic fraud uses VOIP
	- Disparate impact testing: PASSED (ratio: 0.89)
	- Alternative considered: None available with equivalent predictive power
	"""

	if app["features"].get("address_type") in ["CMRA", "PO_BOX"]:
	output += """
	• Address Type Feature:
	- CMRA/PO Box flagged as risk factor
	- Documented business justification: Required for synthetic ID detection
	- Disparate impact testing: PASSED (ratio: 0.91)
	- Accommodations: Manual review pathway available for legitimate CMRA users
	"""

	output += f"""
	MODEL VALIDATION STATUS:
	------------------------
	Last Disparate Impact Test: 2024-11-15
	Last Adverse Action Review: 2024-12-01
	Model Risk Rating: LOW
	SR 11-7 Compliance: COMPLIANT

	ADVERSE ACTION REASON CODES:
	----------------------------
	If this application is denied, the following reason codes apply:
	"""

	if app["decision"] == "FLAGGED":
	reasons = [
	"FA01 - Unable to verify identity information",
	"FA03 - Inconsistent application information",
	"FA07 - High-risk contact information patterns",
	]
	for i, reason in enumerate(reasons, 1):
	output += f" {i}. {reason}\n"
	else:
	output += " N/A - Application approved\n"

	output += """
	DOCUMENTATION:
	--------------
	This analysis is auto-generated for compliance documentation.
	Full model documentation available in Model Risk Management system.
	Contact: model-governance@company.com
	"""

	return output


	@tool
	def get_identity_network(application_id: str) -> str:
	"""
	Analyze the identity linkage network for an application, showing
	connections to other applications via shared attributes (device,
	phone, email, address, SSN patterns).

	Args:
	application_id: The unique identifier for the application

	Returns:
	Network analysis showing linked applications and risk patterns
	"""
	app = generate_mock_application(application_id)
	features = app["features"]

	linkage_count = features.get("identity_linkage_count", 0)

	output = f"""
	IDENTITY NETWORK ANALYSIS
	=========================
	Application ID: {application_id}
	Analysis Date: {datetime.now().strftime('%Y-%m-%d')}

	LINKAGE SUMMARY:
	----------------
	Total Linked Applications: {linkage_count}
	"""

	if linkage_count > 3:
	# Generate mock linked applications for high-risk cases
	random.seed(hash(application_id) % 2**32)

	link_types = {
	"device_fingerprint": random.randint(2, min(linkage_count, 8)),
	"phone_number": random.randint(1, min(linkage_count, 4)),
	"email_pattern": random.randint(1, min(linkage_count, 3)),
	"address": random.randint(1, min(linkage_count, 5)),
	}

	output += f"""
	LINKAGE BREAKDOWN:
	------------------
	• Device Fingerprint Links: {link_types['device_fingerprint']} applications
	• Phone Number Links: {link_types['phone_number']} applications
	• Email Pattern Links: {link_types['email_pattern']} applications
	• Address Links: {link_types['address']} applications

	LINKED APPLICATION DETAILS:
	---------------------------
	"""

	statuses = ["CONFIRMED_FRAUD", "FLAGGED", "DENIED", "CHARGED_OFF", "APPROVED"]
	weights = [0.3, 0.25, 0.2, 0.15, 0.1] if app["risk_level"] in ["high", "very_high"] else [0.05, 0.1, 0.15, 0.1, 0.6]

	for i in range(min(linkage_count, 6)):
	linked_id = f"APP-{random.randint(10000, 99999)}"
	link_type = random.choice(list(link_types.keys()))
	status = random.choices(statuses, weights=weights)[0]
	days_ago = random.randint(1, 180)

	status_emoji = {
	"CONFIRMED_FRAUD": "🔴",
	"FLAGGED": "🟠",
	"DENIED": "🟡",
	"CHARGED_OFF": "🔴",
	"APPROVED": "🟢"
	}

	output += f" {status_emoji.get(status, '⚪')} {linked_id} \| {link_type.replace('_', ' ').title()} \| {status} \| {days_ago}d ago\n"

	# Risk assessment
	fraud_links = sum(1 for _ in range(linkage_count) if random.random() < 0.4)

	output += f"""
	NETWORK RISK ASSESSMENT:
	------------------------
	• Confirmed Fraud in Network: {fraud_links} application(s)
	• Network Risk Score: {min(100, linkage_count * 12 + fraud_links * 25)}/100
	• Ring Pattern Detected: {"YES ⚠️" if linkage_count > 5 else "NO"}
	• Velocity Anomaly: {"YES ⚠️" if features.get('device_velocity_30d', 0) > 5 else "NO"}

	RECOMMENDATION:
	---------------
	{"⚠️ HIGH-RISK NETWORK - Manual review recommended" if linkage_count > 5 else "🔶 ELEVATED RISK - Monitor for additional activity"}
	"""

	else:
	output += """
	LINKAGE BREAKDOWN:
	------------------
	• Device Fingerprint Links: 0-1 applications
	• Phone Number Links: 0 applications
	• Email Pattern Links: 0 applications
	• Address Links: 1 application (same household likely)

	NETWORK RISK ASSESSMENT:
	------------------------
	• Network Risk Score: LOW
	• No suspicious patterns detected
	• Normal application profile

	✅ No concerning identity network patterns identified.
	"""

	return output


	@tool
	def get_model_performance(model_name: str = "xgboost_fraud_v3.2", portfolio: str = "all") -> str:
	"""
	Retrieve current performance metrics for a fraud detection model,
	including precision, recall, KS statistic, and financial impact.

	Args:
	model_name: Name of the fraud model (default: xgboost_fraud_v3.2)
	portfolio: Portfolio to filter by ("Retail Card", "Payment Solutions", "CareCredit", or "all")

	Returns:
	Model performance metrics and trends
	"""
	output = f"""
	MODEL PERFORMANCE DASHBOARD
	===========================
	Model: {model_name}
	Portfolio: {portfolio.upper()}
	Reporting Period: Last 30 Days
	Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

	DETECTION METRICS:
	------------------
	Current Prior Month Δ Change
	Fraud Detection Rate: 87.3% 84.1% +3.2% ✅
	Precision (PPV): 34.2% 31.8% +2.4% ✅
	False Positive Rate: 2.1% 2.4% -0.3% ✅
	KS Statistic: 0.72 0.69 +0.03 ✅
	Gini Coefficient: 0.81 0.78 +0.03 ✅
	AUC-ROC: 0.91 0.89 +0.02 ✅

	FINANCIAL IMPACT:
	-----------------
	Current Prior Month Δ Change
	Fraud Losses Prevented: $4.2M $3.8M +$400K ✅
	False Positive Cost: $890K $920K -$30K ✅
	Net Benefit: $3.31M $2.88M +$430K ✅
	ROI: 372% 317% +55% ✅

	VOLUME METRICS:
	---------------
	Applications Scored: 1,247,832
	High-Risk Flags: 26,847 (2.15%)
	Manual Reviews: 8,421
	Confirmed Fraud: 9,182
	"""

	if portfolio != "all":
	output += f"""
	PORTFOLIO BREAKDOWN ({portfolio}):
	{'='*40}
	Applications: {random.randint(200000, 500000):,}
	Fraud Rate: {random.uniform(0.5, 1.2):.2f}%
	Detection Rate: {random.uniform(82, 92):.1f}%
	"""

	output += """
	MODEL HEALTH:
	-------------
	✅ Feature Drift (PSI): 0.08 (threshold: 0.25)
	✅ Score Distribution: Stable
	✅ Latency P99: 45ms (SLA: 100ms)
	⚠️ Challenger Model: +2.1% lift in shadow mode - review scheduled

	TREND ALERT:
	------------
	📈 Synthetic ID fraud attempts up 23% MoM - model adapting well
	📉 First-party fraud stable at historical levels
	"""

	return output


	# =============================================================================
	# SYSTEM PROMPT
	# =============================================================================

	SYSTEM_PROMPT = """
	You are a Fraud Model Explainability Assistant for a major financial services company.
	Your role is to help fraud analysts, data scientists, and executives understand
	fraud model decisions and their implications.

	You have access to tools that can:
	1. Retrieve application summaries and fraud scores
	2. Explain why applications received specific fraud scores (SHAP-style explanations)
	3. Compare applications to approved/denied populations statistically
	4. Check for fair lending compliance concerns
	5. Analyze identity networks and linkages
	6. Show model performance metrics

	When answering questions:
	- Be precise and data-driven
	- Highlight the most important risk factors first
	- Explain technical concepts in business terms when speaking to executives
	- Always mention fair lending implications when relevant
	- Provide actionable insights, not just data

	For flagged applications, structure your response as:
	1. Quick summary (score, decision, risk level)
	2. Top contributing factors
	3. How unusual this is compared to the population
	4. Any compliance considerations
	5. Recommended next steps

	Remember: Your explanations may be used in regulatory examinations and audits,
	so be accurate and thorough.
	""".strip()