Spaces:

AJAYKASU
/

AML_Shield

Running

AJAY KASU

Initial commit AML Shield

7d391cb 23 days ago

2.19 kB

	import pandas as pd
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.cluster import KMeans
	import numpy as np

	def build_customer_profiles(df):
	"""
	Group by customer_id and aggregate features for KYC.
	"""
	profile_df = df.groupby('customer_id').agg(
	total_transactions=('transaction_id', 'count'),
	total_volume=('amount', 'sum'),
	avg_transaction_amount=('amount', 'mean'),
	max_transaction_amount=('amount', 'max'),
	international_ratio=('is_international', 'mean'),
	flagged_ratio=('is_flagged', 'mean'),
	avg_risk_score=('risk_score', 'mean'),
	unique_countries=('origin_country', 'nunique'),
	structuring_attempts=('structuring_flag', 'sum')
	).reset_index()
	return profile_df

	def assign_kyc_tier(profile_df):
	"""
	Assign clustering based tiers.
	"""
	profile_df = profile_df.copy()

	features = ['total_transactions', 'total_volume', 'avg_transaction_amount',
	'max_transaction_amount', 'international_ratio', 'flagged_ratio',
	'avg_risk_score', 'unique_countries', 'structuring_attempts']

	X = profile_df[features].fillna(0)

	# Normalize
	scaler = MinMaxScaler()
	X_scaled = scaler.fit_transform(X)

	# KMeans
	kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
	clusters = kmeans.fit_predict(X_scaled)
	profile_df['cluster'] = clusters

	# Map cluster labels to Low/Medium/High
	cluster_risk = profile_df.groupby('cluster')['avg_risk_score'].mean().sort_values()

	tier_mapping = {
	cluster_risk.index[0]: 'Low',
	cluster_risk.index[1]: 'Medium',
	cluster_risk.index[2]: 'High'
	}

	profile_df['kyc_tier'] = profile_df['cluster'].map(tier_mapping)

	# Simple kyc_risk_score based on normalized avg_risk_score of the user
	# to meet the "kyc_risk_score" float req
	risk_scaler = MinMaxScaler(feature_range=(0, 100))
	profile_df['kyc_risk_score'] = risk_scaler.fit_transform(
	profile_df[['avg_risk_score']]
	).flatten()

	profile_df = profile_df.drop(columns=['cluster'])
	return profile_df