Spaces:

soupstick
/

fraud-detector-app

Sleeping

soupstick commited on Aug 20, 2025

Commit

2132a21

1 Parent(s): 2382167

Enhanced fraud detection platform with KYC, Sanctions, and Credit Risk modules

- Added KYC identity fraud detection with duplicate/synthetic account detection
- Implemented sanctions screening with fuzzy name matching capabilities
- Integrated credit risk assessment with multi-factor scoring
- Enhanced AI consultant chatbot with fraud domain expertise
- Improved UI/UX with professional theme and statistics panels
- Added comprehensive error handling and data validation
- Updated requirements and documentation

Files changed (1) hide show

app.py +503 -564

app.py CHANGED Viewed

@@ -1,609 +1,548 @@
-import os
-import json
-from datetime import datetime
-import pandas as pd
 import gradio as gr
 from huggingface_hub import InferenceClient
-# local modules
-from database import DatabaseManager
-from models import ModelRouter
-from velocity import detect_burst
-from export_utils import generate_csv_report, generate_pdf_report
-from alerts import EmailNotifier, SlackNotifier
-from rag import RAGStore
-# =========================
-# Config from environment
-# =========================
-HF_TOKEN = os.getenv("HF_TOKEN", "")
-ADMIN_EMAIL = os.getenv("ADMIN_EMAIL", "")
-SLACK_WEBHOOK = os.getenv("SLACK_WEBHOOK", "")
-# =========================
-# Data dirs (writeable in HF Spaces)
-# =========================
-DATA_DIR = os.path.join(os.getcwd(), "app_data")
-os.makedirs(DATA_DIR, exist_ok=True)
-DB_PATH = os.path.join(DATA_DIR, "fraud_detector.db")
-VECTOR_DIR = os.path.join(DATA_DIR, "chroma_collection")
-# =========================
-# Utils / Logging
-# =========================
-def _log_ex(prefix: str, e: Exception):
-    import traceback
-    print(f"{prefix}: {e}")
-    print(traceback.format_exc())
-# =========================
-# Initialize components
-# =========================
-try:
-    db = DatabaseManager(DB_PATH)
-    print("✅ Database initialized")
-except Exception as e:
-    _log_ex("Warning: Database initialization failed", e)
-    db = None
-try:
-    rag = RAGStore(collection_dir=VECTOR_DIR)
-    print("✅ RAG store initialized")
-except Exception as e:
-    _log_ex("Warning: RAG store initialization failed", e)
-    rag = None
-try:
-    model_router = ModelRouter(hf_token=HF_TOKEN)
-    print("✅ Model router initialized")
-except Exception as e:
-    _log_ex("Warning: Model router initialization failed", e)
-    model_router = None
-try:
-    emailer = EmailNotifier()
-    print("✅ Email notifier ready")
-except Exception as e:
-    _log_ex("Warning: Email notifier initialization failed", e)
-    emailer = None
-try:
-    slacknot = SlackNotifier(SLACK_WEBHOOK) if SLACK_WEBHOOK else None
-    if slacknot:
-        print("✅ Slack notifier ready")
-except Exception as e:
-    _log_ex("Warning: Slack notifier initialization failed", e)
-    slacknot = None
-# simple in-memory session (for demo only)
-SESSIONS = {}
-# =========================
-# LLM Clients (Text Generation)
-# =========================
 DEFAULT_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
 QWEN_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
-client_ministral = None
-client_qwen = None
-if HF_TOKEN:
     try:
-        client_ministral = InferenceClient(model=DEFAULT_MODEL_ID, token=HF_TOKEN, timeout=60)
-        print(f"✅ Initialized HF client: {DEFAULT_MODEL_ID}")
     except Exception as e:
-        _log_ex(f"⚠️ Failed to initialize {DEFAULT_MODEL_ID}", e)
     try:
-        client_qwen = InferenceClient(model=QWEN_MODEL_ID, token=HF_TOKEN, timeout=60)
-        print(f"✅ Initialized HF client: {QWEN_MODEL_ID}")
     except Exception as e:
-        _log_ex(f"⚠️ Failed to initialize {QWEN_MODEL_ID}", e)
-else:
-    print("⚠️ No HF_TOKEN provided - chatbot functionality will be limited")
-# FIXED: Match the model labels with what's used in the Gradio interface
-MODEL_LABELS = {
-    "Mistral-7B-Instruct-v0.2": {"id": DEFAULT_MODEL_ID, "client": lambda: client_ministral},
-    "Qwen2.5-Coder-32B-Instruct": {"id": QWEN_MODEL_ID, "client": lambda: client_qwen},
-}
-# System prompt for chat completion
-SYSTEM_PROMPT = "You are a helpful fraud detection analyst AI. Be concise, clear, and practical."
-def build_messages(user_msg: str) -> list:
-    # Chat completion format with system and user messages
-    return [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": user_msg}
-    ]
-# =========================
-# Fraud Detector Functions
-# =========================
-def register_user(username: str, password: str, email: str):
-    if not db:
-        return "Database not available"
-    if not username or not password:
-        return "Username and password required"
-    print(f"🔄 Attempting to register user: '{username}'")
     try:
-        ok = db.create_user(username=username, password=password, email=email)
-        if ok:
-            # Debug: List all users after registration
-            db.list_users()
-            return "✅ Registered successfully"
         else:
-            return "❌ Registration failed (username taken or database error)"
     except Exception as e:
-        print(f"Registration error: {e}")
-        return f"❌ Registration error: {e}"
-def login_user(username: str, password: str):
-    if not db:
-        return "", "Database not available"
-    if not username or not password:
-        return "", "Username and password required"
-    print(f"🔄 Attempting to login user: '{username}'")
     try:
-        # Debug: List all users before authentication
-        db.list_users()
-        user_id = db.authenticate_user(username, password)
-        if user_id:
-            token = f"session-{user_id}-{int(datetime.utcnow().timestamp())}"
-            SESSIONS[token] = user_id
-            print(f"✅ Login successful. Token: {token}")
-            return token, f"✅ Logged in as {username} (ID: {user_id})"
         else:
-            return "", "❌ Invalid credentials - please check username and password"
     except Exception as e:
-        print(f"Login error: {e}")
-        return "", f"❌ Login error: {e}"
-def debug_database():
-    """Debug function to check database state"""
-    if not db:
-        return "Database not available"
     try:
-        users = db.list_users()
-        return f"Found {len(users)} users in database"
-    except Exception as e:
-        return f"Error checking database: {e}"
-def process_file(session_token: str, file, rag_query: str = ""):
-    if session_token not in SESSIONS:
-        return "❌ Please log in first", {}, {}, ""
-    if not file:
-        return "❌ Please upload a CSV file", {}, {}, ""
-    user_id = SESSIONS[session_token]
-    print(f"🔄 Processing file for user {user_id}")
-    try:
-        # Read CSV file
-        try:
-            df = pd.read_csv(file.name)
-            print(f"✅ CSV loaded successfully: {len(df)} rows, {len(df.columns)} columns")
-        except Exception as e:
-            print(f"❌ CSV reading failed: {e}")
-            return f"❌ Error reading CSV: {e}", {}, {}, ""
-        # Validate required columns
-        required_cols = {"transaction_id", "timestamp", "amount", "description", "merchant"}
-        missing_cols = required_cols - set(df.columns)
-        if missing_cols:
-            return f"❌ CSV missing required columns: {missing_cols}", {}, {}, ""
-        print(f"✅ CSV validation passed")
-        # Store transactions in database
-        if db:
-            try:
-                db.store_transactions(user_id, df)
-                print("✅ Transactions stored in database")
-            except Exception as e:
-                _log_ex("Storing transactions failed", e)
-        # Add to RAG store
-        if rag:
-            try:
-                texts = df.apply(
-                    lambda x: f"txn_id:{x['transaction_id']} amount:{x['amount']} merchant:{x['merchant']} desc:{x.get('description','')}",
-                    axis=1
-                ).tolist()
-                metadatas = df.to_dict(orient="records")
-                rag.add(texts=texts, metadatas=metadatas)
-                print("✅ Transactions added to RAG store")
-            except Exception as e:
-                _log_ex("RAG add failed", e)
-        # Velocity detection
-        velocity_flags = []
-        try:
-            velocity_flags = detect_burst(df, window_minutes=10, threshold=5)
-            print(f"✅ Velocity detection completed: {len(velocity_flags)} flags")
-        except Exception as e:
-            _log_ex("Velocity detection failed", e)
-        # Amount anomaly detection
-        amount_flags = []
-        try:
-            if len(df) > 5:
-                mean = df['amount'].mean()
-                std = df['amount'].std()
-                for _, row in df.iterrows():
-                    z = abs((row['amount'] - mean) / (std if std > 0 else 1))
-                    if z > 2.5 or row['amount'] > 1000:
-                        amount_flags.append({
-                            "transaction": row.to_dict(),
-                            "z_score": float(z),
-                            "risk_factor": "amount_anomaly",
-                            "risk_score": float(min(z/3.0, 1.0))
-                        })
-            print(f"✅ Amount anomaly detection completed: {len(amount_flags)} flags")
-        except Exception as e:
-            _log_ex("Amount anomaly detection failed", e)
-        all_flagged = velocity_flags + amount_flags
-        print(f"📊 Total flagged transactions: {len(all_flagged)}")
-        # RAG query processing
-        rag_results = []
-        if rag_query and rag:
-            try:
-                rag_results = rag.query(rag_query, k=5)
-                print(f"✅ RAG query completed: {len(rag_results)} results")
-            except Exception as e:
-                _log_ex("RAG query failed", e)
-        # AI Analysis with multiple fallbacks
-        context = {
-            "num_transactions": len(df),
-            "velocity_flags": len(velocity_flags),
-            "amount_flags": len(amount_flags),
-            "rag_snippets": rag_results[:3]
-        }
-        # Generate AI analysis with multiple fallback options
-        llm_output = None
-        # Try 1: Model router
-        if model_router:
-            try:
-                prompt = f"""
-Analyze these fraud detection results and provide a concise risk assessment:
-Transaction Summary:
-- Total transactions: {len(df)}
-- Velocity anomalies: {len(velocity_flags)} transactions
-- Amount anomalies: {len(amount_flags)} transactions
-- Total flagged: {len(all_flagged)} transactions
-Please provide:
-1. Risk level (LOW/MEDIUM/HIGH)
-2. Key findings
-3. Recommended actions
-4. Next steps
-Keep response under 300 words.
-"""
-                llm_output = model_router.run(prompt, task="analysis")
-                if llm_output and not llm_output.startswith("Error"):
-                    print("✅ Model router analysis successful")
-                else:
-                    print(f"⚠️ Model router returned error: {llm_output}")
-                    llm_output = None
-            except Exception as e:
-                _log_ex("Model router run failed", e)
-                llm_output = None
-        # Try 2: Direct HF client (Mistral)
-        if llm_output is None and client_ministral:
-            try:
-                messages = build_messages(f"""
-Analyze these fraud detection results:
-- Total transactions: {len(df)}
-- Velocity anomalies: {len(velocity_flags)}
-- Amount anomalies: {len(amount_flags)}
-- Total flagged: {len(all_flagged)}
-Provide a brief risk assessment and recommend next steps. Keep it under 250 words.
-""")
-                response = client_ministral.chat_completion(
-                    messages=messages,
-                    max_tokens=300,
-                    temperature=0.3
-                )
-                # Extract response text
-                if hasattr(response, 'choices') and len(response.choices) > 0:
-                    llm_output = response.choices[0].message.content
-                elif isinstance(response, dict) and 'choices' in response:
-                    llm_output = response['choices'][0]['message']['content']
-                else:
-                    llm_output = str(response)
-                if llm_output and llm_output.strip():
-                    print("✅ Direct HF client (Mistral) analysis successful")
-                else:
-                    llm_output = None
-            except Exception as e:
-                _log_ex("Direct HF client (Mistral) analysis failed", e)
-                llm_output = None
-        # Try 3: Direct HF client (Qwen)
-        if llm_output is None and client_qwen:
-            try:
-                messages = build_messages(f"""
-Fraud Analysis Request:
-Transactions: {len(df)} total, {len(all_flagged)} flagged
-Velocity alerts: {len(velocity_flags)}
-Amount alerts: {len(amount_flags)}
-Provide risk assessment and recommendations.
-""")
-                response = client_qwen.chat_completion(
-                    messages=messages,
-                    max_tokens=250,
-                    temperature=0.3
-                )
-                # Extract response text
-                if hasattr(response, 'choices') and len(response.choices) > 0:
-                    llm_output = response.choices[0].message.content
-                elif isinstance(response, dict) and 'choices' in response:
-                    llm_output = response['choices'][0]['message']['content']
-                else:
-                    llm_output = str(response)
-                if llm_output and llm_output.strip():
-                    print("✅ Direct HF client (Qwen) analysis successful")
-                else:
-                    llm_output = None
-            except Exception as e:
-                _log_ex("Direct HF client (Qwen) analysis failed", e)
-                llm_output = None
-        # Final fallback: Structured report
-        if llm_output is None:
-            print("⚠️ All AI models failed, using structured fallback")
-            risk_level = "HIGH" if len(all_flagged) > 5 else "MEDIUM" if len(all_flagged) > 0 else "LOW"
-            risk_color = "🔴" if risk_level == "HIGH" else "🟡" if risk_level == "MEDIUM" else "🟢"
-            llm_output = f"""
-🔍 FRAUD ANALYSIS REPORT
-📊 **Transaction Summary:**
-• Total transactions processed: {len(df):,}
-• Suspicious transactions flagged: {len(all_flagged)}
-• Velocity pattern anomalies: {len(velocity_flags)}
-• Amount-based anomalies: {len(amount_flags)}
-{risk_color} **Risk Assessment: {risk_level} RISK**
-🎯 **Key Findings:**
-{"• Multiple fraud indicators detected across transactions" if len(all_flagged) > 3 else "• Some suspicious patterns identified" if len(all_flagged) > 0 else "• No major anomalies detected in transaction patterns"}
-{"• High velocity transactions may indicate automated attacks" if len(velocity_flags) > 2 else ""}
-{"• Unusual amount patterns suggest potential fraud" if len(amount_flags) > 2 else ""}
-📋 **Recommended Actions:**
-{"• IMMEDIATE: Manual review of all flagged transactions required" if len(all_flagged) > 3 else "• Review flagged transactions within 24 hours" if len(all_flagged) > 0 else "• Continue standard monitoring procedures"}
-{"• URGENT: Consider temporarily blocking high-risk accounts" if len(all_flagged) > 5 else ""}
-{"• Notify compliance team and document findings" if len(all_flagged) > 1 else ""}
-• Download detailed reports for compliance records
-• {"Set up enhanced monitoring for similar patterns" if len(all_flagged) > 0 else "Maintain current monitoring protocols"}
-🔄 **Next Steps:**
-1. Review detailed flagged transactions in the results below
-2. Cross-reference with historical fraud cases
-3. {"Implement additional transaction controls" if len(all_flagged) > 2 else "Continue monitoring"}
-4. Schedule follow-up analysis in 24-48 hours
-"""
-        # Generate reports
-        report_paths = {}
-        if all_flagged:
-            try:
-                ts = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
-                csv_path = os.path.join(DATA_DIR, f"fraud_report_{user_id}_{ts}.csv")
-                pdf_path = os.path.join(DATA_DIR, f"fraud_report_{user_id}_{ts}.pdf")
-                generate_csv_report(all_flagged, csv_path)
-                generate_pdf_report(all_flagged, pdf_path)
-                report_paths = {"csv": csv_path, "pdf": pdf_path}
-                print(f"✅ Reports generated: CSV and PDF")
-            except Exception as e:
-                _log_ex("Report generation failed", e)
-        # Send alerts
-        if all_flagged:
-            try:
-                admin = ADMIN_EMAIL
-                if admin and emailer:
-                    emailer.send_alert(
-                        recipient=admin,
-                        subject=f"Fraud Alert for user {user_id}",
-                        body=f"Detected {len(all_flagged)} suspicious txns. See attached.",
-                        attachment=pdf_path if 'pdf_path' in locals() else None
-                    )
-                if slacknot:
-                    slacknot.send(f"Fraud Alert: user {user_id} has {len(all_flagged)} flagged transactions.")
-                print("✅ Alerts sent")
-            except Exception as e:
-                _log_ex("Alert sending failed", e)
-        status_msg = f"✅ Analysis complete. Found {len(all_flagged)} suspicious transactions."
-        print(f"🎉 Processing completed successfully")
-        return llm_output, all_flagged, report_paths, status_msg
     except Exception as e:
-        error_msg = f"❌ Processing error: {e}"
-        _log_ex("Overall processing error", e)
-        return error_msg, {}, {}, ""
-# =========================
-# Chatbot (Chat Completion)
-# =========================
-def respond(message, history, model_choice):
-    # Guard: empty message
-    if not message or not message.strip():
-        yield history
-        return
-    # Debug: print what model was selected
-    print(f"Selected model: '{model_choice}'")
-    print(f"Available models: {list(MODEL_LABELS.keys())}")
-    # Resolve model/client
-    info = MODEL_LABELS.get(model_choice)
-    if not info:
-        history.append([message, f"❌ Unknown model selected: '{model_choice}'. Available: {list(MODEL_LABELS.keys())}"])
-        yield history
-        return
-    client = info["client"]()
-    if client is None:
-        history.append([message, "❌ Selected model not available. Check HF_TOKEN in Space secrets."])
-        yield history
-        return
-    # Add placeholder assistant message
-    history.append([message, ""])
-    try:
-        messages = build_messages(message)
-        # Use chat_completion instead of text_generation
-        response = client.chat_completion(
-            messages=messages,
-            max_tokens=512,
-            temperature=0.7,
-            # stream=False,  # Set to False for single response
-        )
-        # Extract text from response
-        if hasattr(response, 'choices') and len(response.choices) > 0:
-            # Standard OpenAI-style response
-            text = response.choices[0].message.content
-        elif isinstance(response, dict) and 'choices' in response:
-            # Dict-style response
-            text = response['choices'][0]['message']['content']
-        elif hasattr(response, 'content'):
-            # Direct content attribute
-            text = response.content
-        elif isinstance(response, str):
-            # Direct string response
-            text = response
-        else:
-            # Fallback - convert to string
-            text = str(response)
-        history[-1][1] = text.strip() if text else "❌ Empty response."
-        yield history
     except Exception as e:
-        _log_ex("Chatbot chat_completion failed", e)
-        history[-1][1] = f"❌ Error: {e}"
-        yield history
-def user_submit(message, history, model_choice):
-    # clear the textbox but keep history as-is (respond will append)
-    return "", history
-# =========================
-# Gradio UI
-# =========================
-with gr.Blocks(css=".output_json {height:300px;}", title="🔍 Fraud Detector Analyst") as demo:
-    gr.Markdown("# 🔍 Fraud Detector Analyst")
-    gr.Markdown("Upload transaction data to detect fraud patterns using AI analysis and velocity detection.")
-    with gr.Tab("🔐 Authentication"):
-        gr.Markdown("### Register New Account")
-        with gr.Row():
-            reg_user = gr.Textbox(label="Username", placeholder="Enter username")
-            reg_pass = gr.Textbox(label="Password", type="password", placeholder="Enter password")
-            reg_email = gr.Textbox(label="Email", placeholder="Enter email (optional)")
-        reg_btn = gr.Button("📝 Register", variant="primary")
-        reg_msg = gr.Textbox(label="Registration Status", interactive=False)
-        gr.Markdown("### Login to Existing Account")
         with gr.Row():
-            login_user_tb = gr.Textbox(label="Username", placeholder="Enter username")
-            login_pass_tb = gr.Textbox(label="Password", type="password", placeholder="Enter password")
-        login_btn = gr.Button("🔑 Login", variant="primary")
-        token_out = gr.Textbox(label="Session Token", interactive=False)
-        login_msg = gr.Textbox(label="Login Status", interactive=False)
-        # Debug section
-        gr.Markdown("### 🐛 Debug (for troubleshooting)")
-        debug_btn = gr.Button("📋 Check Database", variant="secondary")
-        debug_msg = gr.Textbox(label="Debug Info", interactive=False)
-    with gr.Tab("📊 Fraud Analysis"):
-        gr.Markdown("### Upload Transaction Data")
-        gr.Markdown("**Required CSV columns:** `transaction_id`, `timestamp`, `amount`, `description`, `merchant`")
-        session_token_tb = gr.Textbox(label="Session Token (from login)", placeholder="Paste your session token here")
-        csv_file = gr.File(label="📄 Upload transactions CSV (.csv)", file_types=[".csv"])
-        rag_query_tb = gr.Textbox(label="🔍 RAG Query (optional)", placeholder="e.g., 'similar gift card purchases'")
-        analyze_btn = gr.Button("🔍 Analyze Transactions", variant="primary", size="lg")
         with gr.Row():
-            analysis_out = gr.Textbox(label="🤖 AI Analysis", lines=8, interactive=False)
-            status_out = gr.Textbox(label="📋 Status", lines=2, interactive=False)
         with gr.Row():
-            flagged_out = gr.JSON(label="⚠️ Flagged Transactions")
-            reports_out = gr.JSON(label="📄 Generated Reports")
-    with gr.Tab("💬 Chatbot"):
-        gr.Markdown("### 🛡 Fraud Detector Chatbot")
-        gr.Markdown("Ask questions about fraud detection, transaction analysis, or get general assistance.")
-        # IMPORTANT: default type uses list of [user, bot] pairs
-        chatbot = gr.Chatbot(height=400, show_label=False)
         with gr.Row():
             msg = gr.Textbox(
-                label="Your message",
-                placeholder="Ask about fraud patterns, transaction analysis, etc...",
                 scale=4
             )
-            # FIXED: Match the model choices with MODEL_LABELS keys
-            model_choice = gr.Radio(
-                ["Mistral-7B-Instruct-v0.2", "Qwen2.5-Coder-32B-Instruct"],
-                value="Mistral-7B-Instruct-v0.2",
-                label="Select Model",
-                scale=1
-            )
-    # Event handlers for Fraud Detector
-    reg_btn.click(fn=register_user, inputs=[reg_user, reg_pass, reg_email], outputs=[reg_msg])
-    login_btn.click(fn=login_user, inputs=[login_user_tb, login_pass_tb], outputs=[token_out, login_msg])
-    debug_btn.click(fn=debug_database, inputs=[], outputs=[debug_msg])
-    analyze_btn.click(fn=process_file, inputs=[session_token_tb, csv_file, rag_query_tb], outputs=[analysis_out, flagged_out, reports_out, status_out])
-    # Event handlers for Chatbot (call respond first, then clear input)
-    msg.submit(respond, [msg, chatbot, model_choice], chatbot).then(
-        lambda: "", None, msg
-    )
-    # Add sample data format
     gr.Markdown("""
-    ### 📋 Sample CSV Format:
-    ```csv
-    transaction_id,timestamp,amount,description,merchant
-    TXN001,2024-01-15 10:30:00,25.99,Coffee Purchase,Starbucks
-    TXN002,2024-01-15 10:31:00,1500.00,Gift Card Purchase,Amazon
-    TXN003,2024-01-15 10:32:00,2000.00,Wire Transfer,Bank Transfer
-    ```
     """)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import pandas as pd
+import re
+import numpy as np
+from datetime import datetime, timedelta
 from huggingface_hub import InferenceClient
+import io
+import base64
+# -------------------------
+# HF Inference Client Setup
+# -------------------------
+HF_TOKEN = "YOUR_HF_TOKEN"  # Replace with your actual token
+client = InferenceClient(token=HF_TOKEN)
 DEFAULT_MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"
 QWEN_MODEL_ID = "Qwen/Qwen2.5-Coder-32B-Instruct"
+# -------------------------
+# Utility Functions
+# -------------------------
+def generate_summary(prompt, model_id=DEFAULT_MODEL_ID):
+    """Generate AI-powered analysis summary using HuggingFace models"""
     try:
+        response = client.text_generation(
+            model=model_id,
+            inputs=prompt,
+            max_new_tokens=500,
+            temperature=0.7,
+            do_sample=True
+        )
+        return response
     except Exception as e:
+        return f"⚠️ Error generating AI summary: {str(e)}"
+def create_download_link(df, filename):
+    """Create downloadable CSV from DataFrame"""
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    return f'<a href="data:file/csv;base64,{b64}" download="{filename}">📥 Download {filename}</a>'
+# -------------------------
+# 1. Transaction Fraud (Enhanced from original)
+# -------------------------
+def process_transaction_file(file):
+    """Process transaction data for fraud detection"""
     try:
+        df = pd.read_csv(file.name)
+        # Enhanced fraud detection rules
+        suspicious_conditions = (
+            (df['amount'] > 10000) |  # Large transactions
+            (df['amount'] < 0) |      # Negative amounts
+            (df.get('merchant_category', '') == 'HIGH_RISK') |  # High-risk merchants
+            (df.groupby('customer_id')['amount'].transform('sum') > 50000)  # Daily limits
+        )
+        suspicious = df[suspicious_conditions].copy()
+        suspicious['risk_reason'] = suspicious.apply(lambda x:
+            'Large Amount' if x['amount'] > 10000 else
+            'Negative Amount' if x['amount'] < 0 else
+            'High Risk Merchant' if x.get('merchant_category') == 'HIGH_RISK' else
+            'Daily Limit Exceeded', axis=1)
+        # AI Analysis
+        prompt = f"""You are a financial fraud analyst. Analyze these suspicious transactions:
+Transaction Data Sample:
+{df.head(10).to_string()}
+Suspicious Transactions Found: {len(suspicious)}
+Key Patterns: Large amounts, negative values, high-risk merchants
+Provide a detailed risk assessment and recommended actions."""
+        summary = generate_summary(prompt)
+        return suspicious, summary, f"Found {len(suspicious)} suspicious transactions out of {len(df)} total"
     except Exception as e:
+        return pd.DataFrame(), f"Error processing file: {str(e)}", ""
+# -------------------------
+# 2. KYC Fraud Analysis
+# -------------------------
+def process_kyc_file(file):
+    """Process KYC data for identity fraud detection"""
     try:
+        df = pd.read_csv(file.name)
+        flagged_records = []
+        # Check for duplicate emails
+        duplicate_emails = df[df.duplicated('email', keep=False)]
+        if not duplicate_emails.empty:
+            duplicate_emails = duplicate_emails.copy()
+            duplicate_emails['flag_reason'] = 'Duplicate Email'
+            flagged_records.append(duplicate_emails)
+        # Check for duplicate phone numbers
+        if 'phone' in df.columns:
+            duplicate_phones = df[df.duplicated('phone', keep=False)]
+            if not duplicate_phones.empty:
+                duplicate_phones = duplicate_phones.copy()
+                duplicate_phones['flag_reason'] = 'Duplicate Phone'
+                flagged_records.append(duplicate_phones)
+        # Check for invalid DOB patterns
+        if 'dob' in df.columns:
+            # Flag future dates or unrealistic ages
+            try:
+                df['dob_parsed'] = pd.to_datetime(df['dob'], errors='coerce')
+                invalid_dob = df[
+                    (df['dob_parsed'].isna()) |  # Invalid date format
+                    (df['dob_parsed'] > datetime.now()) |  # Future dates
+                    (df['dob_parsed'] < datetime.now() - timedelta(days=365*120))  # Age > 120
+                ].copy()
+                if not invalid_dob.empty:
+                    invalid_dob['flag_reason'] = 'Invalid DOB'
+                    flagged_records.append(invalid_dob)
+            except:
+                pass
+        # Check for suspicious name patterns
+        if 'name' in df.columns:
+            suspicious_names = df[
+                df['name'].str.contains(r'^[A-Z]+$', na=False) |  # All caps
+                df['name'].str.contains(r'\d', na=False) |  # Contains numbers
+                (df['name'].str.len() < 3)  # Too short
+            ].copy()
+            if not suspicious_names.empty:
+                suspicious_names['flag_reason'] = 'Suspicious Name Pattern'
+                flagged_records.append(suspicious_names)
+        # Combine all flagged records
+        if flagged_records:
+            flagged_df = pd.concat(flagged_records, ignore_index=True)
+            flagged_df = flagged_df.drop_duplicates()
         else:
+            flagged_df = pd.DataFrame()
+        # AI Analysis
+        prompt = f"""You are a KYC fraud analyst. Review these customer identity records for potential fraud:
+Total Records: {len(df)}
+Flagged Records: {len(flagged_df)}
+Sample Data:
+{df.head(10).to_string()}
+Flagged Issues:
+{flagged_df[['flag_reason']].value_counts().to_string() if not flagged_df.empty else 'No issues found'}
+Identify patterns of synthetic identities, duplicate accounts, or data quality issues. Recommend verification steps."""
+        summary = generate_summary(prompt)
+        return flagged_df, summary, f"Flagged {len(flagged_df)} suspicious KYC records out of {len(df)} total"
     except Exception as e:
+        return pd.DataFrame(), f"Error processing KYC file: {str(e)}", ""
+# -------------------------
+# 3. Sanctions Check
+# -------------------------
+def process_sanctions_file(file, sanctions_file=None):
+    """Process customer data against sanctions/PEP lists"""
     try:
+        df = pd.read_csv(file.name)
+        # Default sanctions list (you can replace with actual data)
+        default_sanctions = [
+            "John Doe", "Jane Smith", "Muhammad Ali", "Vladimir Putin",
+            "Kim Jong Un", "Alexander Petrov", "Maria Gonzalez"
+        ]
+        if sanctions_file:
+            try:
+                sanctions_df = pd.read_csv(sanctions_file.name)
+                sanctions_list = sanctions_df['name'].str.lower().tolist()
+            except:
+                sanctions_list = [name.lower() for name in default_sanctions]
         else:
+            sanctions_list = [name.lower() for name in default_sanctions]
+        # Exact match check
+        exact_matches = df[df['name'].str.lower().isin(sanctions_list)].copy()
+        # Fuzzy match check (simple implementation)
+        fuzzy_matches = []
+        for idx, row in df.iterrows():
+            customer_name = str(row['name']).lower()
+            for sanction_name in sanctions_list:
+                # Simple similarity check (can be enhanced with fuzzy matching libraries)
+                if len(set(customer_name.split()) & set(sanction_name.split())) >= 2:
+                    fuzzy_matches.append(idx)
+                    break
+        fuzzy_df = df.loc[fuzzy_matches].copy() if fuzzy_matches else pd.DataFrame()
+        # Combine results
+        flagged_customers = pd.concat([exact_matches, fuzzy_df]).drop_duplicates()
+        if not flagged_customers.empty:
+            flagged_customers['match_type'] = 'Exact Match'
+            flagged_customers.loc[fuzzy_df.index, 'match_type'] = 'Fuzzy Match'
+        # AI Analysis
+        prompt = f"""You are a compliance officer conducting sanctions screening. Review these results:
+Total Customers Screened: {len(df)}
+Potential Matches Found: {len(flagged_customers)}
+Customer Sample:
+{df.head(5).to_string()}
+Flagged Customers:
+{flagged_customers.to_string() if not flagged_customers.empty else 'No matches found'}
+Assess the risk level and recommend enhanced due diligence procedures for flagged customers."""
+        summary = generate_summary(prompt)
+        return flagged_customers, summary, f"Found {len(flagged_customers)} potential sanctions matches out of {len(df)} customers screened"
     except Exception as e:
+        return pd.DataFrame(), f"Error processing sanctions check: {str(e)}", ""
+# -------------------------
+# 4. Credit Risk Analysis
+# -------------------------
+def process_credit_file(file):
+    """Process credit data for risk assessment"""
     try:
+        df = pd.read_csv(file.name)
+        # Credit risk scoring rules
+        high_risk_conditions = []
+        risk_reasons = []
+        # Rule 1: Low credit score
+        if 'credit_score' in df.columns:
+            low_score = df['credit_score'] < 600
+            high_risk_conditions.append(low_score)
+            risk_reasons.append('Low Credit Score')
+        # Rule 2: High utilization rate
+        if 'utilization_rate' in df.columns:
+            high_util = df['utilization_rate'] > 0.8
+            high_risk_conditions.append(high_util)
+            risk_reasons.append('High Utilization')
+        # Rule 3: High debt-to-income ratio
+        if 'debt_to_income' in df.columns:
+            high_dti = df['debt_to_income'] > 0.4
+            high_risk_conditions.append(high_dti)
+            risk_reasons.append('High Debt-to-Income')
+        # Rule 4: Recent defaults
+        if 'recent_defaults' in df.columns:
+            has_defaults = df['recent_defaults'] > 0
+            high_risk_conditions.append(has_defaults)
+            risk_reasons.append('Recent Defaults')
+        # Rule 5: Low income
+        if 'income' in df.columns:
+            low_income = df['income'] < 30000
+            high_risk_conditions.append(low_income)
+            risk_reasons.append('Low Income')
+        # Combine risk conditions
+        if high_risk_conditions:
+            risk_mask = pd.concat(high_risk_conditions, axis=1).any(axis=1)
+            risky_customers = df[risk_mask].copy()
+            # Add risk scoring
+            risky_customers['risk_score'] = 0
+            for i, condition in enumerate(high_risk_conditions):
+                risky_customers.loc[condition, 'risk_score'] += 1
+            risky_customers['risk_level'] = risky_customers['risk_score'].apply(
+                lambda x: 'High' if x >= 3 else 'Medium' if x >= 2 else 'Low'
+            )
+        else:
+            risky_customers = pd.DataFrame()
+        # AI Analysis
+        prompt = f"""You are a credit risk analyst. Assess these customer credit profiles:
+Total Customers: {len(df)}
+High-Risk Customers: {len(risky_customers)}
+Sample Data:
+{df.head(10).to_string()}
+Risk Distribution:
+{risky_customers['risk_level'].value_counts().to_string() if not risky_customers.empty else 'No high-risk customers identified'}
+Provide risk assessment insights and recommend credit policies or monitoring actions."""
+        summary = generate_summary(prompt)
+        return risky_customers, summary, f"Identified {len(risky_customers)} high-risk customers out of {len(df)} total"
     except Exception as e:
+        return pd.DataFrame(), f"Error processing credit risk file: {str(e)}", ""
+# -------------------------
+# 5. Chatbot (Enhanced)
+# -------------------------
+def chatbot_respond(message, history, model_choice):
+    """Enhanced chatbot for fraud and risk analysis queries"""
+    # Build conversation context
+    conversation = ""
+    for msg, response in history:
+        conversation += f"User: {msg}\nAssistant: {response}\n\n"
+    prompt = f"""You are an expert fraud analyst and risk management consultant. Help users with:
+- Transaction fraud detection
+- KYC/Identity verification
+- Sanctions screening
+- Credit risk assessment
+- Regulatory compliance
+- Financial crime prevention
+Previous conversation:
+{conversation}
+User: {message}
+Assistant:"""
+    try:
+        response = generate_summary(prompt, model_id=model_choice)
+        history.append((message, response))
+        return history, ""
     except Exception as e:
+        error_response = f"I apologize, but I encountered an error: {str(e)}"
+        history.append((message, error_response))
+        return history, ""
+# -------------------------
+# Gradio Interface
+# -------------------------
+with gr.Blocks(theme=gr.themes.Soft(), title="🛡️ Fraud Detector Analyst") as demo:
+    gr.Markdown("""
+    # 🛡️ Fraud Detector Analyst (Multi-Module Risk Platform)
+    **Comprehensive Risk Intelligence Platform** featuring:
+    - 📊 Transaction Fraud Detection
+    - 🆔 KYC Identity Fraud Analysis
+    - 🌍 Sanctions & PEP Screening
+    - 💳 Credit Risk Assessment
+    - 💬 AI-Powered Risk Consultant
+    """)
+    with gr.Tab("📊 Transaction Fraud"):
+        gr.Markdown("### Upload transaction data to detect fraudulent patterns")
         with gr.Row():
+            trans_file = gr.File(
+                label="Upload Transaction CSV",
+                file_types=[".csv"],
+                type="filepath"
+            )
         with gr.Row():
+            with gr.Column():
+                trans_summary = gr.Textbox(
+                    label="AI Analysis Summary",
+                    lines=8,
+                    interactive=False
+                )
+            with gr.Column():
+                trans_stats = gr.Textbox(
+                    label="Detection Statistics",
+                    lines=2,
+                    interactive=False
+                )
+        trans_results = gr.Dataframe(
+            label="Suspicious Transactions",
+            interactive=False
+        )
+        trans_file.upload(
+            process_transaction_file,
+            inputs=[trans_file],
+            outputs=[trans_results, trans_summary, trans_stats]
+        )
+    with gr.Tab("🆔 KYC Fraud Analysis"):
+        gr.Markdown("### Detect identity fraud and synthetic accounts in customer onboarding data")
+        with gr.Row():
+            kyc_file = gr.File(
+                label="Upload KYC Customer Data CSV",
+                file_types=[".csv"],
+                type="filepath"
+            )
         with gr.Row():
+            with gr.Column():
+                kyc_summary = gr.Textbox(
+                    label="KYC Fraud Analysis",
+                    lines=8,
+                    interactive=False
+                )
+            with gr.Column():
+                kyc_stats = gr.Textbox(
+                    label="KYC Statistics",
+                    lines=2,
+                    interactive=False
+                )
+        kyc_results = gr.Dataframe(
+            label="Flagged KYC Records",
+            interactive=False
+        )
+        kyc_file.upload(
+            process_kyc_file,
+            inputs=[kyc_file],
+            outputs=[kyc_results, kyc_summary, kyc_stats]
+        )
+    with gr.Tab("🌍 Sanctions Check"):
+        gr.Markdown("### Screen customers against sanctions lists and PEP databases")
+        with gr.Row():
+            sanctions_customer_file = gr.File(
+                label="Upload Customer List CSV",
+                file_types=[".csv"],
+                type="filepath"
+            )
+            sanctions_list_file = gr.File(
+                label="Upload Sanctions List CSV (Optional)",
+                file_types=[".csv"],
+                type="filepath"
+            )
+        with gr.Row():
+            with gr.Column():
+                sanctions_summary = gr.Textbox(
+                    label="Sanctions Screening Results",
+                    lines=8,
+                    interactive=False
+                )
+            with gr.Column():
+                sanctions_stats = gr.Textbox(
+                    label="Screening Statistics",
+                    lines=2,
+                    interactive=False
+                )
+        sanctions_results = gr.Dataframe(
+            label="Flagged Customers",
+            interactive=False
+        )
+        sanctions_customer_file.upload(
+            lambda f1, f2: process_sanctions_file(f1, f2),
+            inputs=[sanctions_customer_file, sanctions_list_file],
+            outputs=[sanctions_results, sanctions_summary, sanctions_stats]
+        )
+    with gr.Tab("💳 Credit Risk"):
+        gr.Markdown("### Assess credit risk and default probability for loan applicants")
+        with gr.Row():
+            credit_file = gr.File(
+                label="Upload Credit Profile CSV",
+                file_types=[".csv"],
+                type="filepath"
+            )
+        with gr.Row():
+            with gr.Column():
+                credit_summary = gr.Textbox(
+                    label="Credit Risk Analysis",
+                    lines=8,
+                    interactive=False
+                )
+            with gr.Column():
+                credit_stats = gr.Textbox(
+                    label="Risk Statistics",
+                    lines=2,
+                    interactive=False
+                )
+        credit_results = gr.Dataframe(
+            label="High-Risk Customers",
+            interactive=False
+        )
+        credit_file.upload(
+            process_credit_file,
+            inputs=[credit_file],
+            outputs=[credit_results, credit_summary, credit_stats]
+        )
+    with gr.Tab("💬 AI Risk Consultant"):
+        gr.Markdown("### Chat with our AI expert about fraud detection and risk management")
+        model_choice = gr.Dropdown(
+            choices=[DEFAULT_MODEL_ID, QWEN_MODEL_ID],
+            label="Choose AI Model",
+            value=DEFAULT_MODEL_ID,
+            info="Select the language model for analysis"
+        )
+        chatbot = gr.Chatbot(
+            label="Risk Management Consultant",
+            height=500
+        )
         with gr.Row():
             msg = gr.Textbox(
+                label="Ask about fraud detection, risk assessment, compliance...",
+                placeholder="e.g., How can I improve my transaction fraud detection?",
                 scale=4
             )
+            submit_btn = gr.Button("Send", scale=1, variant="primary")
+        # Handle both enter key and button click
+        msg.submit(
+            chatbot_respond,
+            inputs=[msg, chatbot, model_choice],
+            outputs=[chatbot, msg]
+        )
+        submit_btn.click(
+            chatbot_respond,
+            inputs=[msg, chatbot, model_choice],
+            outputs=[chatbot, msg]
+        )
+    # Footer
     gr.Markdown("""
+    ---
+    **⚠️ Disclaimer:** This tool is for demonstration purposes. Always validate results with domain experts and comply with relevant regulations.
+    **📋 Supported CSV Formats:**
+    - **Transactions:** `customer_id, amount, merchant_category, timestamp`
+    - **KYC:** `customer_id, name, email, phone, dob, address`
+    - **Sanctions:** `name, dob, country` (customers) | `name, list_type` (sanctions)
+    - **Credit:** `customer_id, credit_score, utilization_rate, debt_to_income, income, recent_defaults`
     """)
 if __name__ == "__main__":
+    demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860
+    )