soupstick
Fix: upgrade to resolve TypeError
68cf4dc
import gradio as gr
import pandas as pd
import re
import os
import numpy as np
from datetime import datetime, timedelta
from huggingface_hub import InferenceClient, HfApi
import io
import base64
import unicodedata
import warnings
from requests.exceptions import HTTPError
# Optional: phone normalization (graceful fallback if missing)
try:
import phonenumbers
except Exception:
phonenumbers = None
# Read from Space secret - check both common environment variables
HF_TOKEN = (os.getenv("HF_TOKEN", "") or os.getenv("HF_SPACES", "")).strip()
# Initialize API clients
api = HfApi(token=HF_TOKEN if HF_TOKEN else None)
client = InferenceClient(token=HF_TOKEN) if HF_TOKEN else InferenceClient()
# Your target models
DEFAULT_MODEL_ID = "tiiuae/falcon-7b-instruct"
SECONDARY_MODEL_ID = "HuggingFaceH4/zephyr-7b-beta"
def hf_inference_supported(model_id: str, task: str = "text-generation") -> bool:
"""Check if model is supported on HF's Serverless Inference API"""
try:
info = api.model_info(model_id, expand=["inferenceProviderMapping"])
mapping = getattr(info, "inference_provider_mapping", None) or getattr(info, "inferenceProviderMapping", None)
if not mapping:
return False
# mapping is a dict: { provider_name: { tasks: [...] , ... } }
return any(
prov.lower() in {"hf-inference", "hf_serverless", "hf-inference-api"} and
(task in (details.get("tasks") or []))
for prov, details in mapping.items()
)
except Exception as e:
print(f"[HF] Provider check failed for {model_id}: {e}")
return False
def _hf_sanity_check():
"""Enhanced sanity check with provider validation"""
print("[HF] Token present:", bool(HF_TOKEN), "len:", len(HF_TOKEN) if HF_TOKEN else 0)
if not HF_TOKEN:
print("[HF] No token provided - API calls will fail")
return False
# Test models and their provider support
test_models = [DEFAULT_MODEL_ID, SECONDARY_MODEL_ID]
for model in test_models:
supported = hf_inference_supported(model)
print(f"[HF] {model}: {'✓ Supported' if supported else '✗ Not available'} on HF Serverless")
if supported:
try:
# Quick test call
response = client.text_generation(
model=model,
prompt="Hello",
max_new_tokens=5
)
print(f"[HF] ✓ {model} API test successful")
return True
except HTTPError as e:
code = getattr(e.response, "status_code", None)
if code == 401:
print(f"[HF] ✗ {model}: Invalid token")
elif code == 403:
print(f"[HF] ✗ {model}: License not accepted")
else:
print(f"[HF] ✗ {model}: HTTP {code}")
except Exception as e:
print(f"[HF] ✗ {model}: {e}")
return False
# Run sanity check
_hf_sanity_check()
# --------------------------
# Utility Functions
# --------------------------
def generate_summary(prompt: str, model_id: str = DEFAULT_MODEL_ID):
"""Generate AI-powered analysis using HuggingFace models with proper error handling"""
if not HF_TOKEN:
return "⚠️ HF token missing. Set HF_TOKEN in Space Secrets and restart the Space."
# Check if model is available on HF Serverless
if not hf_inference_supported(model_id):
return (f"⚠️ Model not available on HF's Serverless Inference API: {model_id}\n\n"
f"This model either:\n"
f"• Requires a Dedicated Inference Endpoint\n"
f"• Is not supported for text-generation tasks\n"
f"• Has restricted access\n\n"
f"Check the HuggingFace Models Support Matrix for alternatives, "
f"or deploy a Dedicated Inference Endpoint.")
try:
# Use the correct parameter name: prompt (not inputs)
response = client.text_generation(
model=model_id,
prompt=prompt,
max_new_tokens=500,
temperature=0.7,
do_sample=True,
)
return response
except HTTPError as e:
code = getattr(e.response, "status_code", None)
if code == 401:
return "⚠️ HF token missing/invalid. Set HF_TOKEN in Space Secrets and restart the Space."
elif code == 403:
return (f"⚠️ Access denied to {model_id}.\n\n"
f"Required action:\n"
f"• Visit: https://huggingface.co/{model_id}\n"
f"• Accept the model's license with the same HF account as your token\n"
f"• Or use a model with open access")
elif code == 503:
return (f"⚠️ Model {model_id} is currently loading or unavailable.\n"
f"This is common with CPU-only inference. Try again in a few minutes,\n"
f"or consider GPU-accelerated inference for better reliability.")
else:
return f"⚠️ Inference error (HTTP {code}): {e}"
except Exception as e:
return f"⚠️ Inference error: {e}"
def create_download_link(df, filename):
"""Create downloadable CSV from DataFrame"""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
return f'<a href="data:file/csv;base64,{b64}" download="{filename}">📥 Download {filename}</a>'
# Update the Gradio interface section for the AI chatbot:
# AI Consultant Module
with gr.Column(visible=False) as ai_section:
gr.Markdown("## AI-Powered Risk Consultant")
gr.Markdown("### Chat with our AI expert about fraud detection and risk management")
model_choice = gr.Dropdown(
choices=[DEFAULT_MODEL_ID, SECONDARY_MODEL_ID],
label="Choose AI Model",
value=DEFAULT_MODEL_ID,
info="Select the language model for analysis"
)
# Add model status indicator
with gr.Row():
gr.Markdown("""
**Model Status**:
- ✅ = Available on HF Serverless
- ⚠️ = Requires license acceptance
- ❌ = Not supported
""")
chatbot = gr.Chatbot(
label="Risk Management Consultant",
height=500
)
with gr.Row():
msg = gr.Textbox(
label="Ask about fraud detection, risk assessment, compliance...",
placeholder="e.g., How can I improve my transaction fraud detection?",
scale=4
)
submit_btn = gr.Button("Send", scale=1, variant="primary")
msg.submit(
chatbot_respond,
inputs=[msg, chatbot, model_choice],
outputs=[chatbot, msg]
)
submit_btn.click(
chatbot_respond,
inputs=[msg, chatbot, model_choice],
outputs=[chatbot, msg]
)
# -------------------------
# Data Validation & Normalization Helpers (Point 1)
# -------------------------
def _standardize_columns(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
df.columns = [re.sub(r"\s+", "_", str(c).strip().lower()) for c in df.columns]
return df
def _norm_str(s: str) -> str:
if pd.isna(s):
return ""
s = unicodedata.normalize("NFKC", str(s))
s = re.sub(r"\s+", " ", s).strip()
return s
def _norm_name(name: str) -> str:
n = _norm_str(name)
parts = [p if len(p) <= 3 and p.isupper() else p.title() for p in n.split(" ")]
return " ".join(parts)
def _norm_email(email: str):
e = _norm_str(email).lower()
if not e:
return "", "missing_email"
if not re.match(r"^[a-z0-9._%+\-]+@[a-z0-9.\-]+\.[a-z]{2,}$", e):
return e, "invalid_email"
return e, None
def _norm_phone(phone: str, default_region: str = "IN"):
raw = _norm_str(phone)
if not raw:
return "", "missing_phone"
if phonenumbers:
try:
num = phonenumbers.parse(raw, default_region)
if not (phonenumbers.is_possible_number(num) and phonenumbers.is_valid_number(num)):
return raw, "invalid_phone"
return phonenumbers.format_number(num, phonenumbers.PhoneNumberFormat.E164), None
except Exception:
return raw, "invalid_phone"
digits = re.sub(r"\D", "", raw)
if len(digits) < 8:
return raw, "invalid_phone"
return "+" + digits, None
def _to_datetime(series, errors="coerce"):
return pd.to_datetime(series, errors=errors, utc=False, infer_datetime_format=True)
def _to_numeric(series):
return pd.to_numeric(series, errors="coerce")
def _new_issues_list():
return []
def _add_issue(issues, row_idx, field, issue, value):
issues.append({
"row": int(row_idx) if pd.notna(row_idx) else None,
"field": field,
"issue": issue,
"value": None if pd.isna(value) else str(value)
})
def _issues_df(issues):
return pd.DataFrame(issues, columns=["row", "field", "issue", "value"]) if issues else pd.DataFrame(columns=["row", "field", "issue", "value"])
# ---------- Per-module preparation ----------
def _prepare_transactions_df(df_raw: pd.DataFrame):
issues = _new_issues_list()
df = _standardize_columns(df_raw)
required = {"customer_id", "amount", "timestamp"}
missing = required - set(df.columns)
if missing:
return None, _issues_df([{"row": None, "field": "/".join(sorted(missing)), "issue": "missing_required_columns", "value": ""}]), f"Missing required columns: {sorted(missing)}"
df["customer_id"] = df["customer_id"].astype(str).apply(_norm_str)
df["amount"] = _to_numeric(df["amount"])
df["timestamp"] = _to_datetime(df["timestamp"])
for c in ["merchant_category", "merchant", "country", "device_id", "ip_address"]:
if c in df.columns:
df[c] = df[c].astype(str).apply(_norm_str)
for idx, v in df["amount"].items():
if pd.isna(v):
_add_issue(issues, idx, "amount", "non_numeric_amount", df_raw.loc[idx, "amount"])
for idx, v in df["timestamp"].items():
if pd.isna(v):
_add_issue(issues, idx, "timestamp", "unparseable_timestamp", df_raw.loc[idx, "timestamp"])
clean = df.dropna(subset=["customer_id", "amount", "timestamp"]).copy()
kept = len(clean); total = len(df_raw)
quality = f"Validated {total} rows → kept {kept}, dropped {total - kept} due to invalid amount/timestamp/customer_id."
return clean, _issues_df(issues), quality
def _prepare_kyc_df(df_raw: pd.DataFrame):
issues = _new_issues_list()
df = _standardize_columns(df_raw)
required = {"customer_id", "name", "email", "dob"}
missing = required - set(df.columns)
if missing:
return None, _issues_df([{"row": None, "field": "/".join(sorted(missing)), "issue": "missing_required_columns", "value": ""}]), f"Missing required columns: {sorted(missing)}"
df["customer_id"] = df["customer_id"].astype(str).apply(_norm_str)
df["name"] = df["name"].astype(str).apply(_norm_name)
emails = []
for idx, e in df["email"].items():
ne, err = _norm_email(e)
emails.append(ne)
if err:
_add_issue(issues, idx, "email", err, e)
df["email"] = emails
if "phone" in df.columns:
phones = []
for idx, p in df["phone"].items():
np_, err = _norm_phone(p, default_region="IN")
phones.append(np_)
if err:
_add_issue(issues, idx, "phone", err, p)
df["phone"] = phones
df["dob_parsed"] = _to_datetime(df["dob"])
now = pd.Timestamp.now(tz=None)
too_old_cutoff = now - pd.DateOffset(years=120)
for idx, d in df["dob_parsed"].items():
if pd.isna(d):
_add_issue(issues, idx, "dob", "unparseable_dob", df_raw.loc[idx, "dob"])
elif d > now:
_add_issue(issues, idx, "dob", "future_dob", df_raw.loc[idx, "dob"])
elif d < too_old_cutoff:
_add_issue(issues, idx, "dob", "age_over_120", df_raw.loc[idx, "dob"])
clean = df.dropna(subset=["customer_id", "name", "email", "dob_parsed"]).copy()
kept = len(clean); total = len(df_raw)
quality = f"KYC validated {total} rows → kept {kept}, dropped {total - kept} due to email/phone/DOB issues."
return clean, _issues_df(issues), quality
def _prepare_sanctions_customer_df(df_raw: pd.DataFrame):
issues = _new_issues_list()
df = _standardize_columns(df_raw)
required = {"customer_id", "name"}
missing = required - set(df.columns)
if missing:
return None, _issues_df([{"row": None, "field": "/".join(sorted(missing)), "issue": "missing_required_columns", "value": ""}]), f"Missing required columns: {sorted(missing)}"
df["customer_id"] = df["customer_id"].astype(str).apply(_norm_str)
df["name"] = df["name"].astype(str).apply(_norm_name)
if "dob" in df.columns:
df["dob_parsed"] = _to_datetime(df["dob"])
for idx, d in df["dob_parsed"].items():
if pd.isna(d):
_add_issue(issues, idx, "dob", "unparseable_dob", df_raw.loc[idx, "dob"])
if "country" in df.columns:
df["country"] = df["country"].astype(str).apply(_norm_str)
clean = df.dropna(subset=["customer_id", "name"]).copy()
quality = f"Sanctions input validated {len(df_raw)} rows → kept {len(clean)}, dropped {len(df_raw)-len(clean)}."
return clean, _issues_df(issues), quality
def _prepare_sanctions_list_df(sanctions_file):
if sanctions_file is None:
return None, pd.DataFrame(), "Using built-in demo sanctions list."
try:
raw = pd.read_csv(sanctions_file.name)
df = _standardize_columns(raw)
if "name" not in df.columns:
msg_df = _issues_df([{"row": None, "field": "name", "issue": "missing_required_columns", "value": ""}])
return None, msg_df, "Uploaded sanctions list missing required 'name' column. Using demo list."
df["name"] = df["name"].astype(str).apply(_norm_name)
return df, pd.DataFrame(), "Using uploaded sanctions list."
except Exception as e:
warn = _issues_df([{"row": None, "field": "file", "issue": "read_error", "value": str(e)}])
return None, warn, "Failed to read uploaded sanctions list. Using demo list."
def _prepare_credit_df(df_raw: pd.DataFrame):
issues = _new_issues_list()
df = _standardize_columns(df_raw)
required = {"customer_id"}
missing = required - set(df.columns)
if missing:
return None, _issues_df([{"row": None, "field": "/".join(sorted(missing)), "issue": "missing_required_columns", "value": ""}]), f"Missing required columns: {sorted(missing)}"
df["customer_id"] = df["customer_id"].astype(str).apply(_norm_str)
numeric_cols = ["credit_score", "utilization_rate", "debt_to_income", "income", "recent_defaults"]
for c in numeric_cols:
if c in df.columns:
df[c] = _to_numeric(df[c])
if "credit_score" in df.columns:
for idx, v in df["credit_score"].items():
if pd.isna(v):
_add_issue(issues, idx, "credit_score", "non_numeric", df_raw.loc[idx, "credit_score"])
elif not (250 <= v <= 950):
_add_issue(issues, idx, "credit_score", "out_of_range_250_950", v)
if "utilization_rate" in df.columns:
for idx, v in df["utilization_rate"].items():
if pd.isna(v):
_add_issue(issues, idx, "utilization_rate", "non_numeric", df_raw.loc[idx, "utilization_rate"])
elif not (0 <= v <= 1.5):
_add_issue(issues, idx, "utilization_rate", "out_of_range_0_1.5", v)
if "debt_to_income" in df.columns:
for idx, v in df["debt_to_income"].items():
if pd.isna(v):
_add_issue(issues, idx, "debt_to_income", "non_numeric", df_raw.loc[idx, "debt_to_income"])
elif not (0 <= v <= 2):
_add_issue(issues, idx, "debt_to_income", "out_of_range_0_2", v)
if "income" in df.columns:
for idx, v in df["income"].items():
if pd.isna(v):
_add_issue(issues, idx, "income", "non_numeric", df_raw.loc[idx, "income"])
elif v < 0:
_add_issue(issues, idx, "income", "negative_income", v)
if "recent_defaults" in df.columns:
for idx, v in df["recent_defaults"].items():
if pd.isna(v):
_add_issue(issues, idx, "recent_defaults", "non_numeric", df_raw.loc[idx, "recent_defaults"])
elif v < 0:
_add_issue(issues, idx, "recent_defaults", "negative_count", v)
clean = df.dropna(subset=["customer_id"]).copy()
kept = len(clean); total = len(df_raw)
quality = f"Credit input validated {total} rows → kept {kept}, dropped {total - kept} (non-critical fields coerced with issues recorded)."
return clean, _issues_df(issues), quality
# -------------------------
# 1. Transaction Fraud (Enhanced + Validation)
# -------------------------
def process_transaction_file(file):
"""Process transaction data for fraud detection"""
try:
df_raw = pd.read_csv(file.name)
df, dq_issues, quality = _prepare_transactions_df(df_raw)
if df is None:
return pd.DataFrame(), quality, "", dq_issues
# Enhanced fraud detection rules (on clean df)
high_risk_mc = (df["merchant_category"] == "HIGH_RISK") if "merchant_category" in df.columns else False
suspicious_conditions = (
(df['amount'] > 10000) |
(df['amount'] < 0) |
(high_risk_mc) |
(df.groupby('customer_id')['amount'].transform('sum') > 50000)
)
suspicious = df[suspicious_conditions].copy()
def _reason(x):
if x['amount'] > 10000: return 'Large Amount'
if x['amount'] < 0: return 'Negative Amount'
if ('merchant_category' in df.columns and x.get('merchant_category') == 'HIGH_RISK'): return 'High Risk Merchant'
return 'Daily Limit Exceeded'
if not suspicious.empty:
suspicious['risk_reason'] = suspicious.apply(_reason, axis=1)
prompt = f"""You are a financial fraud analyst. Analyze these suspicious transactions:
Sample:
{df.head(10).to_string()}
Suspicious Found: {len(suspicious)}
Data Quality: {quality}
Provide a risk assessment and recommended actions."""
summary = generate_summary(prompt)
stats = f"{quality}\nFound {len(suspicious)} suspicious transactions out of {len(df)} usable rows."
return suspicious, summary, stats, dq_issues
except Exception as e:
return pd.DataFrame(), f"Error processing file: {str(e)}", "", pd.DataFrame()
# -------------------------
# 2. KYC Fraud Analysis (Enhanced + Validation)
# -------------------------
def process_kyc_file(file):
"""Process KYC data for identity fraud detection"""
try:
df_raw = pd.read_csv(file.name)
df, dq_issues, quality = _prepare_kyc_df(df_raw)
if df is None:
return pd.DataFrame(), quality, "", dq_issues
flagged_records = []
dup_email = df[df.duplicated('email', keep=False)]
if not dup_email.empty:
dup_email = dup_email.copy(); dup_email['flag_reason'] = 'Duplicate Email'
flagged_records.append(dup_email)
if 'phone' in df.columns:
dup_phone = df[df.duplicated('phone', keep=False)]
if not dup_phone.empty:
dup_phone = dup_phone.copy(); dup_phone['flag_reason'] = 'Duplicate Phone'
flagged_records.append(dup_phone)
if 'name' in df.columns:
suspicious_names = df[
df['name'].str.contains(r'^[A-Z]+$', na=False) |
df['name'].str.contains(r'\d', na=False) |
(df['name'].str.len() < 3)
].copy()
if not suspicious_names.empty:
suspicious_names['flag_reason'] = 'Suspicious Name Pattern'
flagged_records.append(suspicious_names)
flagged_df = pd.concat(flagged_records, ignore_index=True).drop_duplicates() if flagged_records else pd.DataFrame()
prompt = f"""You are a KYC fraud analyst. Review identity records for potential fraud.
Total: {len(df)}
Flagged: {len(flagged_df)}
Data Quality: {quality}
Flag reasons distribution:
{flagged_df['flag_reason'].value_counts().to_string() if not flagged_df.empty else 'None'}
Recommend verification steps."""
summary = generate_summary(prompt)
stats = f"{quality}\nFlagged {len(flagged_df)} suspicious KYC records out of {len(df)} usable rows."
return flagged_df, summary, stats, dq_issues
except Exception as e:
return pd.DataFrame(), f"Error processing KYC file: {str(e)}", "", pd.DataFrame()
# -------------------------
# 3. Sanctions Check (Enhanced + Validation)
# -------------------------
def process_sanctions_file(file, sanctions_file=None):
"""Process customer data against sanctions/PEP lists"""
try:
customer_raw = pd.read_csv(file.name)
customers, cust_issues, cust_quality = _prepare_sanctions_customer_df(customer_raw)
sanc_df, sanc_issues, sanc_quality = _prepare_sanctions_list_df(sanctions_file)
if sanc_df is None:
default_sanctions = [
"John Doe", "Jane Smith", "Muhammad Ali", "Vladimir Putin",
"Kim Jong Un", "Alexander Petrov", "Maria Gonzalez"
]
sanc_df = pd.DataFrame({"name": [_norm_name(x) for x in default_sanctions]})
dq_issues = pd.concat([cust_issues, sanc_issues], ignore_index=True) if not cust_issues.empty or not sanc_issues.empty else pd.DataFrame()
if customers is None:
msg = f"{cust_quality} | {sanc_quality}"
return pd.DataFrame(), msg, "", dq_issues
# Exact matches
exact = customers[customers["name"].isin(set(sanc_df["name"]))].copy()
if not exact.empty:
exact["match_type"] = "Exact Match"
# Simple fuzzy: shared >=2 tokens
sanc_tokens = [set(n.lower().split()) for n in sanc_df["name"]]
fuzzy_idx = []
for idx, row in customers.iterrows():
tokens = set(str(row["name"]).lower().split())
for st in sanc_tokens:
if len(tokens & st) >= 2:
fuzzy_idx.append(idx); break
fuzzy = customers.loc[sorted(set(fuzzy_idx))].copy() if fuzzy_idx else pd.DataFrame()
if not fuzzy.empty:
fuzzy["match_type"] = "Fuzzy Match"
flagged = pd.concat([exact, fuzzy]).drop_duplicates()
prompt = f"""You are a compliance officer conducting sanctions screening.
Customers screened: {len(customers)}
Potential matches: {len(flagged)}
Customer sample:
{customers.head(5).to_string()}
Data Quality: {cust_quality} | {sanc_quality}
Assess risk and recommend EDD steps for any matches."""
summary = generate_summary(prompt)
stats = f"{cust_quality} | {sanc_quality}\nFound {len(flagged)} potential matches out of {len(customers)} customers."
return flagged, summary, stats, dq_issues
except Exception as e:
return pd.DataFrame(), f"Error processing sanctions check: {str(e)}", "", pd.DataFrame()
# -------------------------
# 4. Credit Risk Analysis (Enhanced + Validation)
# -------------------------
def process_credit_file(file):
"""Process credit data for risk assessment"""
try:
df_raw = pd.read_csv(file.name)
df, dq_issues, quality = _prepare_credit_df(df_raw)
if df is None:
return pd.DataFrame(), quality, "", dq_issues
masks = []
if 'credit_score' in df.columns:
masks.append(df['credit_score'] < 600)
if 'utilization_rate' in df.columns:
masks.append(df['utilization_rate'] > 0.8)
if 'debt_to_income' in df.columns:
masks.append(df['debt_to_income'] > 0.4)
if 'recent_defaults' in df.columns:
masks.append(df['recent_defaults'] > 0)
if 'income' in df.columns:
masks.append(df['income'] < 30000)
if masks:
risk_mask = pd.concat(masks, axis=1).any(axis=1)
risky_customers = df[risk_mask].copy()
risky_customers['risk_score'] = 0
for m in masks:
risky_customers.loc[m, 'risk_score'] += 1
risky_customers['risk_level'] = risky_customers['risk_score'].apply(
lambda x: 'High' if x >= 3 else 'Medium' if x >= 2 else 'Low'
)
else:
risky_customers = pd.DataFrame()
prompt = f"""You are a credit risk analyst. Assess these customer credit profiles:
Total Customers: {len(df)}
High-Risk Customers: {len(risky_customers)}
Risk Distribution:
{risky_customers['risk_level'].value_counts().to_string() if not risky_customers.empty else 'No high-risk customers identified'}
Data Quality: {quality}
Provide risk assessment insights and recommend credit policies or monitoring actions."""
summary = generate_summary(prompt)
stats = f"{quality}\nIdentified {len(risky_customers)} high-risk customers out of {len(df)} usable rows."
return risky_customers, summary, stats, dq_issues
except Exception as e:
return pd.DataFrame(), f"Error processing credit risk file: {str(e)}", "", pd.DataFrame()
# -------------------------
# 5. Chatbot (Enhanced)
# -------------------------
def chatbot_respond(message, history, model_choice):
"""Enhanced chatbot for fraud and risk analysis queries"""
conversation = ""
for msg, response in history:
conversation += f"User: {msg}\nAssistant: {response}\n\n"
prompt = f"""You are an expert fraud analyst and risk management consultant. Help users with:
- Transaction fraud detection
- KYC/Identity verification
- Sanctions screening
- Credit risk assessment
- Regulatory compliance
- Financial crime prevention
Previous conversation:
{conversation}
User: {message}
Assistant:"""
try:
response = generate_summary(prompt, model_id=model_choice)
history.append((message, response))
return history, ""
except Exception as e:
error_response = f"I apologize, but I encountered an error: {str(e)}"
history.append((message, error_response))
return history, ""
# -------------------------
# Navigation Functions (Updated)
# -------------------------
def update_section(section):
"""Updated navigation function to properly show/hide sections"""
# Set all sections to hidden first
visibility = {
"overview": False,
"transaction": False,
"kyc": False,
"sanctions": False,
"credit": False,
"ai": False
}
# Show only the requested section
visibility[section] = True
return (
gr.update(visible=visibility["overview"]),
gr.update(visible=visibility["transaction"]),
gr.update(visible=visibility["kyc"]),
gr.update(visible=visibility["sanctions"]),
gr.update(visible=visibility["credit"]),
gr.update(visible=visibility["ai"]),
gr.update(visible=(section != "overview")) # Show back button only when not on overview
)
# -------------------------
# Gradio Interface
# -------------------------
with gr.Blocks(theme=gr.themes.Soft(), title="🛡️ Fraud Detector Analyst") as demo:
# Overview/Front Page
with gr.Column(visible=True) as overview_section:
gr.Markdown(
"""
# 🛡️ Fraud Detector Analyst
## Multi-Module Risk Intelligence Platform
Welcome to the comprehensive fraud detection and risk management platform powered by AI.
Choose a module below to get started with your risk analysis.
"""
)
with gr.Row():
with gr.Column():
gr.Markdown("### 🔍 Detection Modules")
transaction_btn = gr.Button(
"📊 Transaction Fraud Detection",
size="lg",
variant="primary"
)
gr.Markdown("*Detect suspicious transaction patterns and anomalies*")
kyc_btn = gr.Button(
"🆔 KYC Identity Fraud Analysis",
size="lg",
variant="primary"
)
gr.Markdown("*Identify duplicate accounts and synthetic identities*")
sanctions_btn = gr.Button(
"🌍 Sanctions & PEP Screening",
size="lg",
variant="primary"
)
gr.Markdown("*Screen customers against global sanctions lists*")
with gr.Column():
gr.Markdown("### 🎯 Assessment Tools")
credit_btn = gr.Button(
"💳 Credit Risk Assessment",
size="lg",
variant="primary"
)
gr.Markdown("*Evaluate credit risk and default probability*")
ai_btn = gr.Button(
"💬 AI-Powered Risk Consultant",
size="lg",
variant="secondary"
)
gr.Markdown("*Get expert guidance on fraud and risk management*")
gr.Markdown(
"""
---
### 🚀 Quick Start Guide
1. **Choose a module** from the buttons above based on your analysis needs
2. **Upload your CSV data** following the format requirements
3. **Review AI-powered insights** and flagged records
4. **Download results** for further investigation
### 📋 Supported Data Formats
- **Transactions**: `customer_id, amount, merchant_category, timestamp`
- **KYC Records**: `customer_id, name, email, phone, dob, address`
- **Customer Lists**: `customer_id, name, dob, country`
- **Credit Profiles**: `customer_id, credit_score, utilization_rate, income`
"""
)
# Navigation Back Button (for all modules)
back_btn = gr.Button("← Back to Main Menu", visible=False, variant="secondary")
# Transaction Fraud Module
with gr.Column(visible=False) as transaction_section:
gr.Markdown("## 📊 Transaction Fraud Detection")
gr.Markdown("### Upload transaction data to detect fraudulent patterns")
with gr.Row():
trans_file = gr.File(
label="Upload Transaction CSV",
file_types=[".csv"],
type="filepath"
)
with gr.Row():
with gr.Column():
trans_summary = gr.Textbox(
label="AI Analysis Summary",
lines=8,
interactive=False
)
with gr.Column():
trans_stats = gr.Textbox(
label="Detection Statistics",
lines=3,
interactive=False
)
trans_results = gr.Dataframe(
label="Suspicious Transactions",
interactive=False
)
trans_issues = gr.Dataframe(
label="Data Quality Issues",
interactive=False
)
trans_file.upload(
process_transaction_file,
inputs=[trans_file],
outputs=[trans_results, trans_summary, trans_stats, trans_issues]
)
# KYC Fraud Module
with gr.Column(visible=False) as kyc_section:
gr.Markdown("## 🆔 KYC Identity Fraud Analysis")
gr.Markdown("### Detect identity fraud and synthetic accounts in customer onboarding data")
with gr.Row():
kyc_file = gr.File(
label="Upload KYC Customer Data CSV",
file_types=[".csv"],
type="filepath"
)
with gr.Row():
with gr.Column():
kyc_summary = gr.Textbox(
label="KYC Fraud Analysis",
lines=8,
interactive=False
)
with gr.Column():
kyc_stats = gr.Textbox(
label="KYC Statistics",
lines=3,
interactive=False
)
kyc_results = gr.Dataframe(
label="Flagged KYC Records",
interactive=False
)
kyc_issues = gr.Dataframe(
label="Data Quality Issues",
interactive=False
)
kyc_file.upload(
process_kyc_file,
inputs=[kyc_file],
outputs=[kyc_results, kyc_summary, kyc_stats, kyc_issues]
)
# Sanctions Check Module
with gr.Column(visible=False) as sanctions_section:
gr.Markdown("## 🌍 Sanctions & PEP Screening")
gr.Markdown("### Screen customers against sanctions lists and PEP databases")
with gr.Row():
sanctions_customer_file = gr.File(
label="Upload Customer List CSV",
file_types=[".csv"],
type="filepath"
)
sanctions_list_file = gr.File(
label="Upload Sanctions List CSV (Optional)",
file_types=[".csv"],
type="filepath"
)
with gr.Row():
with gr.Column():
sanctions_summary = gr.Textbox(
label="Sanctions Screening Results",
lines=8,
interactive=False
)
with gr.Column():
sanctions_stats = gr.Textbox(
label="Screening Statistics",
lines=3,
interactive=False
)
sanctions_results = gr.Dataframe(
label="Flagged Customers",
interactive=False
)
sanctions_issues = gr.Dataframe(
label="Data Quality Issues",
interactive=False
)
sanctions_customer_file.upload(
lambda f1, f2: process_sanctions_file(f1, f2),
inputs=[sanctions_customer_file, sanctions_list_file],
outputs=[sanctions_results, sanctions_summary, sanctions_stats, sanctions_issues]
)
# Credit Risk Module
with gr.Column(visible=False) as credit_section:
gr.Markdown("## 💳 Credit Risk Assessment")
gr.Markdown("### Assess credit risk and default probability for loan applicants")
with gr.Row():
credit_file = gr.File(
label="Upload Credit Profile CSV",
file_types=[".csv"],
type="filepath"
)
with gr.Row():
with gr.Column():
credit_summary = gr.Textbox(
label="Credit Risk Analysis",
lines=8,
interactive=False
)
with gr.Column():
credit_stats = gr.Textbox(
label="Risk Statistics",
lines=3,
interactive=False
)
credit_results = gr.Dataframe(
label="High-Risk Customers",
interactive=False
)
credit_issues = gr.Dataframe(
label="Data Quality Issues",
interactive=False
)
credit_file.upload(
process_credit_file,
inputs=[credit_file],
outputs=[credit_results, credit_summary, credit_stats, credit_issues]
)
# AI Consultant Module
with gr.Column(visible=False) as ai_section:
gr.Markdown("## 💬 AI-Powered Risk Consultant")
gr.Markdown("### Chat with our AI expert about fraud detection and risk management")
model_choice = gr.Dropdown(
choices=[DEFAULT_MODEL_ID, SECONDARY_MODEL_ID],
label="Choose AI Model",
value=DEFAULT_MODEL_ID,
info="Select the language model for analysis"
)
chatbot = gr.Chatbot(
label="Risk Management Consultant",
height=500
)
with gr.Row():
msg = gr.Textbox(
label="Ask about fraud detection, risk assessment, compliance...",
placeholder="e.g., How can I improve my transaction fraud detection?",
scale=4
)
submit_btn = gr.Button("Send", scale=1, variant="primary")
msg.submit(
chatbot_respond,
inputs=[msg, chatbot, model_choice],
outputs=[chatbot, msg]
)
submit_btn.click(
chatbot_respond,
inputs=[msg, chatbot, model_choice],
outputs=[chatbot, msg]
)
# Button Event Handlers - Fixed to properly navigate between sections
transaction_btn.click(
fn=lambda: update_section("transaction"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
kyc_btn.click(
fn=lambda: update_section("kyc"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
sanctions_btn.click(
fn=lambda: update_section("sanctions"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
credit_btn.click(
fn=lambda: update_section("credit"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
ai_btn.click(
fn=lambda: update_section("ai"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
# Back button handler - Returns to overview page
back_btn.click(
fn=lambda: update_section("overview"),
inputs=[],
outputs=[overview_section, transaction_section, kyc_section, sanctions_section, credit_section, ai_section, back_btn]
)
# Footer (visible on all pages)
with gr.Row():
gr.Markdown(
"""
---
**⚠️ Disclaimer:** This tool is for demonstration purposes. Always validate results with domain experts and comply with relevant regulations.
"""
)
if __name__ == "__main__":
demo.launch(
share=True,
server_name="0.0.0.0",
server_port=7860
)