Spaces:

sourize
/

FraudLens

Sleeping

App Files Files Community

sourize commited on Jun 20, 2025

Commit

7448648

1 Parent(s): fb38b84

Initial Commit

Browse files

Files changed (9) hide show

app.py +420 -27
pages/🏠 Home.py +0 -58
pages/📈 Analytics Dashboard.py +0 -111
pages/📊 Model Insights.py +0 -78
pages/🔍 Fraud Detection.py +0 -130
requirements.txt +11 -11
utils/model_utils.py +0 -33
utils/preprocessing.py +0 -34
utils/visualization.py +0 -47

app.py CHANGED Viewed

@@ -1,49 +1,442 @@
 import streamlit as st
-# Page config
 st.set_page_config(
-    page_title="🔍 FraudLens: An E-Commerce Fraud Detection",
-    page_icon="🛡️",
     layout="wide",
     initial_sidebar_state="expanded"
 )
-# Custom CSS with enhanced styling
 st.markdown("""
 <style>
     .main-header {
         font-size: 2.5rem;
         color: #1f77b4;
         text-align: center;
         margin-bottom: 2rem;
-        text-shadow: 2px 2px 4px rgba(0,0,0,0.1);
     }
 </style>
 """, unsafe_allow_html=True)
-st.markdown('''
-<div class="main-header">
-    🛡️ E-Commerce Fraud Detection System
-</div>
-''', unsafe_allow_html=True)
-st.markdown("""
-## 🎯 Welcome to FraudLens
-This is the landing page for the E-Commerce Fraud Detection System. Use the navigation menu on the left to explore:
-- 🔍 Fraud Detection
-- 📊 Model Insights
-- 📈 Analytics Dashboard
-Protecting businesses and customers from fraudulent transactions with Explainable AI.
-""")
-st.markdown("---")
-st.markdown("""
-<div style='text-align: center; padding: 30px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-           border-radius: 15px; color: white; margin-top: 2rem;'>
-    <h3>🛡️ E-Commerce Fraud Detection System</h3>
-    <p>Powered by <strong>Explainable AI</strong> • Built by <strong>FraudNinjas</strong> for Security</p>
-    <p><em>Protecting businesses and customers from fraudulent transactions</em></p>
-</div>
-""", unsafe_allow_html=True)

 import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+import shap
+import matplotlib.pyplot as plt
+import seaborn as sns
+from datetime import datetime, time
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import warnings
+warnings.filterwarnings('ignore')
+# Configure Streamlit page
 st.set_page_config(
+    page_title="Fraud Detection System",
+    page_icon="🔍",
     layout="wide",
     initial_sidebar_state="expanded"
 )
+# Custom CSS for better styling
 st.markdown("""
 <style>
     .main-header {
         font-size: 2.5rem;
+        font-weight: bold;
         color: #1f77b4;
         text-align: center;
         margin-bottom: 2rem;
+    }
+    .prediction-box {
+        padding: 1rem;
+        border-radius: 10px;
+        margin: 1rem 0;
+        text-align: center;
+        font-size: 1.2rem;
+        font-weight: bold;
+    }
+    .fraud-box {
+        background-color: #ffebee;
+        border: 2px solid #f44336;
+        color: #c62828;
+    }
+    .legitimate-box {
+        background-color: #e8f5e8;
+        border: 2px solid #4caf50;
+        color: #2e7d32;
+    }
+    .metric-card {
+        background-color: #f8f9fa;
+        padding: 1rem;
+        border-radius: 8px;
+        border-left: 4px solid #1f77b4;
     }
 </style>
 """, unsafe_allow_html=True)
+@st.cache_resource
+def load_models():
+    """Load the trained model and label encoder"""
+    try:
+        model = joblib.load('lightgbm_model.pkl')
+        label_encoder = joblib.load('customer_loc.pkl')
+        return model, label_encoder
+    except FileNotFoundError as e:
+        st.error(f"Model files not found: {e}")
+        st.error("Please ensure 'lightgbm_model.pkl' and 'customer_loc.pkl' are in the same directory as this app.")
+        st.stop()
+def preprocess_data(transaction_amount, transaction_date, customer_age,
+                   customer_location, account_age_days, transaction_time,
+                   label_encoder):
+    """Preprocess input data to match training format"""
+    # Convert transaction date to Excel serial date format
+    # (days since 1899-12-30 as used in training)
+    reference_date = pd.Timestamp("1899-12-30")
+    transaction_date_serial = (pd.Timestamp(transaction_date) - reference_date).days
+    # Convert transaction time to fraction of day
+    # Convert time object to seconds and then to fraction of day
+    transaction_time_fraction = (transaction_time.hour * 3600 +
+                               transaction_time.minute * 60 +
+                               transaction_time.second) / 86400
+    # Encode customer location
+    try:
+        location_encoded = label_encoder.transform([customer_location])[0]
+    except ValueError:
+        # If location not seen during training, use most frequent class (mode)
+        st.warning(f"Location '{customer_location}' not seen during training. Using fallback encoding.")
+        location_encoded = 0  # Default fallback
+    # Create feature vector matching training format
+    features = pd.DataFrame({
+        'Transaction Amount': [transaction_amount],
+        'Transaction Date': [transaction_date_serial],
+        'Customer Age': [customer_age],
+        'Account Age Days': [account_age_days],
+        'Transaction Time': [transaction_time_fraction],
+        'Customer Location Encoded': [location_encoded]
+    })
+    return features
+@st.cache_data
+def get_sample_locations(_label_encoder):
+    """Get sample locations from the label encoder"""
+    try:
+        return list(_label_encoder.classes_[:100])  # First 100 locations for dropdown
+    except:
+        return ["Unknown Location"]
+def create_shap_plots(model, features, feature_names):
+    """Create SHAP explanation plots"""
+    # Initialize SHAP explainer
+    explainer = shap.TreeExplainer(model)
+    shap_values = explainer.shap_values(features)
+    # For binary classification, use the positive class (fraud)
+    if isinstance(shap_values, list):
+        shap_values_fraud = shap_values[1]  # Class 1 (fraud)
+        expected_value = explainer.expected_value[1]
+    else:
+        shap_values_fraud = shap_values
+        expected_value = explainer.expected_value
+    return shap_values_fraud, expected_value, explainer
+def plot_shap_waterfall(shap_values, expected_value, features, feature_names):
+    """Create SHAP waterfall plot"""
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Get feature values and SHAP values for the single prediction
+    feature_values = features.iloc[0].values
+    shap_vals = shap_values[0]
+    # Create waterfall plot data
+    cumulative = expected_value
+    positions = []
+    values = []
+    labels = []
+    colors = []
+    # Add base value
+    positions.append(0)
+    values.append(expected_value)
+    labels.append(f"Base Value\n{expected_value:.3f}")
+    colors.append('gray')
+    # Add each feature contribution
+    for i, (feature, shap_val, feat_val) in enumerate(zip(feature_names, shap_vals, feature_values)):
+        positions.append(i + 1)
+        values.append(cumulative + shap_val)
+        labels.append(f"{feature}\n{feat_val:.3f}\nSHAP: {shap_val:.3f}")
+        colors.append('red' if shap_val > 0 else 'blue')
+        cumulative += shap_val
+    # Add final prediction
+    positions.append(len(feature_names) + 1)
+    values.append(cumulative)
+    labels.append(f"Final Score\n{cumulative:.3f}")
+    colors.append('green' if cumulative > 0 else 'orange')
+    # Create bar plot
+    bars = ax.bar(positions, values, color=colors, alpha=0.7)
+    # Add connecting lines
+    for i in range(len(positions) - 1):
+        ax.plot([positions[i] + 0.4, positions[i + 1] - 0.4],
+               [values[i], values[i]], 'k--', alpha=0.5)
+    ax.set_xticks(positions)
+    ax.set_xticklabels(labels, rotation=45, ha='right')
+    ax.set_ylabel('SHAP Value Contribution')
+    ax.set_title('SHAP Waterfall Plot - Feature Contributions to Fraud Prediction')
+    ax.grid(True, alpha=0.3)
+    ax.axhline(y=0, color='black', linestyle='-', alpha=0.5)
+    plt.tight_layout()
+    return fig
+def main():
+    st.markdown('<div class="main-header">🔍 Fraud Detection System</div>', unsafe_allow_html=True)
+    # Load models
+    model, label_encoder = load_models()
+    # Get sample locations for dropdown
+    sample_locations = get_sample_locations(label_encoder)
+    # Sidebar for input
+    st.sidebar.header("Transaction Details")
+    # Input fields
+    transaction_amount = st.sidebar.number_input(
+        "Transaction Amount ($)",
+        min_value=0.01,
+        max_value=10000.0,
+        value=100.0,
+        step=0.01,
+        help="Enter the transaction amount in dollars"
+    )
+    transaction_date = st.sidebar.date_input(
+        "Transaction Date",
+        value=datetime.now().date(),
+        help="Select the date of the transaction"
+    )
+    transaction_time = st.sidebar.time_input(
+        "Transaction Time",
+        value=time(12, 0),
+        help="Select the time of the transaction"
+    )
+    customer_age = st.sidebar.slider(
+        "Customer Age",
+        min_value=16,
+        max_value=100,
+        value=35,
+        help="Customer's age in years"
+    )
+    account_age_days = st.sidebar.number_input(
+        "Account Age (Days)",
+        min_value=1,
+        max_value=3650,
+        value=365,
+        help="How many days old is the customer's account"
+    )
+    customer_location = st.sidebar.selectbox(
+        "Customer Location",
+        options=sample_locations,
+        index=0,
+        help="Select customer's location"
+    )
+    # Alternative: Allow manual location input
+    manual_location = st.sidebar.text_input(
+        "Or enter location manually:",
+        placeholder="Type location name",
+        help="Enter a specific location if not in dropdown"
+    )
+    if manual_location:
+        customer_location = manual_location
+    # Prediction button
+    if st.sidebar.button("🔍 Analyze Transaction", type="primary"):
+        # Preprocess data
+        features = preprocess_data(
+            transaction_amount, transaction_date, customer_age,
+            customer_location, account_age_days, transaction_time, label_encoder
+        )
+        # Make prediction
+        prediction_proba = model.predict_proba(features)[0]
+        prediction = model.predict(features)[0]
+        fraud_probability = prediction_proba[1]
+        # Main content area
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            # Display prediction
+            if prediction == 1:
+                st.markdown(
+                    f'<div class="prediction-box fraud-box">⚠️ FRAUD DETECTED<br>'
+                    f'Fraud Probability: {fraud_probability:.2%}</div>',
+                    unsafe_allow_html=True
+                )
+            else:
+                st.markdown(
+                    f'<div class="prediction-box legitimate-box">✅ LEGITIMATE TRANSACTION<br>'
+                    f'Fraud Probability: {fraud_probability:.2%}</div>',
+                    unsafe_allow_html=True
+                )
+            # Feature importance
+            st.subheader("📊 Feature Analysis")
+            # Display input features
+            st.write("**Input Features:**")
+            feature_df = pd.DataFrame({
+                'Feature': ['Transaction Amount', 'Transaction Date', 'Customer Age',
+                           'Account Age Days', 'Transaction Time', 'Customer Location'],
+                'Value': [f"${transaction_amount:.2f}", str(transaction_date), f"{customer_age} years",
+                         f"{account_age_days} days", str(transaction_time), customer_location]
+            })
+            st.dataframe(feature_df, use_container_width=True)
+        with col2:
+            # Risk metrics
+            st.subheader("🎯 Risk Metrics")
+            # Risk level
+            if fraud_probability >= 0.8:
+                risk_level = "🔴 Very High"
+                risk_color = "#f44336"
+            elif fraud_probability >= 0.6:
+                risk_level = "🟠 High"
+                risk_color = "#ff9800"
+            elif fraud_probability >= 0.4:
+                risk_level = "🟡 Medium"
+                risk_color = "#ffc107"
+            else:
+                risk_level = "🟢 Low"
+                risk_color = "#4caf50"
+            st.markdown(f"**Risk Level:** {risk_level}")
+            st.markdown(f"**Confidence:** {max(fraud_probability, 1-fraud_probability):.2%}")
+            # Probability gauge
+            fig_gauge = go.Figure(go.Indicator(
+                mode = "gauge+number+delta",
+                value = fraud_probability * 100,
+                domain = {'x': [0, 1], 'y': [0, 1]},
+                title = {'text': "Fraud Probability (%)"},
+                delta = {'reference': 50},
+                gauge = {
+                    'axis': {'range': [None, 100]},
+                    'bar': {'color': risk_color},
+                    'steps': [
+                        {'range': [0, 25], 'color': "lightgray"},
+                        {'range': [25, 50], 'color': "gray"},
+                        {'range': [50, 75], 'color': "orange"},
+                        {'range': [75, 100], 'color': "red"}
+                    ],
+                    'threshold': {
+                        'line': {'color': "red", 'width': 4},
+                        'thickness': 0.75,
+                        'value': 90
+                    }
+                }
+            ))
+            fig_gauge.update_layout(height=300)
+            st.plotly_chart(fig_gauge, use_container_width=True)
+        # SHAP Explanations
+        st.subheader("🎯 AI Explanation (SHAP)")
+        try:
+            # Create SHAP plots
+            shap_values, expected_value, explainer = create_shap_plots(
+                model, features, features.columns.tolist()
+            )
+            # Feature importance plot
+            col1, col2 = st.columns(2)
+            with col1:
+                st.write("**Feature Contributions:**")
+                # Create a simple bar plot of SHAP values
+                shap_df = pd.DataFrame({
+                    'Feature': features.columns,
+                    'SHAP Value': shap_values[0],
+                    'Feature Value': features.iloc[0].values
+                })
+                shap_df = shap_df.reindex(shap_df['SHAP Value'].abs().sort_values(ascending=False).index)
+                fig_bar = px.bar(
+                    shap_df,
+                    x='SHAP Value',
+                    y='Feature',
+                    orientation='h',
+                    color='SHAP Value',
+                    color_continuous_scale=['blue', 'white', 'red'],
+                    title="SHAP Feature Importance"
+                )
+                fig_bar.update_layout(height=400)
+                st.plotly_chart(fig_bar, use_container_width=True)
+            with col2:
+                st.write("**Waterfall Explanation:**")
+                # Create waterfall plot
+                fig_waterfall = plot_shap_waterfall(
+                    shap_values, expected_value, features, features.columns.tolist()
+                )
+                st.pyplot(fig_waterfall)
+            # Explanation text
+            st.write("**How to interpret SHAP values:**")
+            st.write("- 🔴 **Positive values (red)**: Push prediction towards FRAUD")
+            st.write("- 🔵 **Negative values (blue)**: Push prediction towards LEGITIMATE")
+            st.write("- **Magnitude**: Larger absolute values have stronger influence")
+            # Top contributing features
+            top_features = shap_df.head(3)
+            st.write("**Top 3 Contributing Features:**")
+            for _, row in top_features.iterrows():
+                direction = "towards FRAUD" if row['SHAP Value'] > 0 else "towards LEGITIMATE"
+                st.write(f"• **{row['Feature']}** (value: {row['Feature Value']:.3f}): "
+                        f"Contributes {abs(row['SHAP Value']):.3f} {direction}")
+        except Exception as e:
+            st.error(f"Error generating SHAP explanations: {str(e)}")
+            st.write("SHAP explanations are not available, but the prediction is still valid.")
+    else:
+        # Default view when no prediction is made
+        st.info("👈 Enter transaction details in the sidebar and click 'Analyze Transaction' to get started!")
+        # Show some information about the model
+        st.subheader("ℹ️ About This System")
+        col1, col2, col3 = st.columns(3)
+        with col1:
+            st.markdown("""
+            **🤖 Model Information**
+            - Algorithm: LightGBM
+            - Training: SMOTE-balanced data
+            - Features: 6 key transaction attributes
+            """)
+        with col2:
+            st.markdown("""
+            **🎯 Key Features**
+            - Transaction amount & timing
+            - Customer demographics
+            - Account age
+            - Geographic location
+            """)
+        with col3:
+            st.markdown("""
+            **🔍 AI Explainability**
+            - SHAP values for interpretability
+            - Feature contribution analysis
+            - Waterfall explanations
+            """)
+if __name__ == "__main__":
+    main()

pages/🏠 Home.py DELETED Viewed

@@ -1,58 +0,0 @@
-import streamlit as st
-st.set_page_config(page_title="🏠 Home")
-def home_page():
-    col1, col2 = st.columns([2, 1])
-    with col1:
-        st.markdown("""
-        ## 🎯 Welcome to Our AI-Powered Fraud Detection System
-        Our cutting-edge system combines **Machine Learning** and **Explainable AI** to protect
-        e-commerce platforms from fraudulent transactions.
-        ### ✨ Key Features
-        🤖 **Advanced ML Model**: LightGBM classifier with 75.2% ROC AUC
-        🔍 **Real-time Detection**: Instant fraud risk assessment
-        📊 **Explainable AI**: SHAP-based feature impact analysis
-        📈 **Interactive Dashboard**: Comprehensive analytics and insights
-        🛡️ **Robust Security**: Production-ready fraud prevention
-        ### 🚀 How It Works
-        1. **Input Transaction Data**: Enter transaction details
-        2. **AI Analysis**: Our model processes 6 key features
-        3. **Risk Assessment**: Get instant fraud probability
-        4. **Explanation**: Understand why decisions are made
-        """)
-    with col2:
-        st.markdown("### 📊 Model Performance")
-        metrics = [
-            ("🎯 ROC AUC Score", "75.2%", "#1f77b4"),
-            ("🎲 Precision", "19.0%", "#ff7f0e"),
-            ("🔍 Recall", "58.0%", "#2ca02c"),
-            ("⚖️ F1-Score", "29.0%", "#d62728")
-        ]
-        for metric, value, color in metrics:
-            st.markdown(f"""
-            <div style=\"background: linear-gradient(135deg, {color}20, {color}10);
-                       padding: 1rem; border-radius: 10px; margin: 0.5rem 0;
-                       border-left: 4px solid {color};\">
-                <h4 style=\"margin: 0; color: {color};\">{metric}</h4>
-                <h2 style=\"margin: 0; color: {color};\">{value}</h2>
-            </div>
-            """, unsafe_allow_html=True)
-    st.markdown("---")
-    st.markdown("### 🔧 Technology Stack")
-    tech_cols = st.columns(4)
-    technologies = [
-        ("🤖 Machine Learning", "LightGBM\nScikit-learn\nIMBLEARN"),
-        ("🧠 Explainable AI", "SHAP\nDiCE-ML\nSurrogate Models"),
-        ("📊 Visualization", "Plotly\nMatplotlib\nSeaborn"),
-        ("🚀 Deployment", "Streamlit\nPandas\nNumPy")
-    ]
-    for i, (title, tech) in enumerate(technologies):
-        with tech_cols[i]:
-            st.markdown(f"""
-            <div style=\"text-align: center; padding: 1rem; background: #f0f4ff; border-radius: 10px; height: 120px; color: #222;\">
-                <h4>{title}</h4>
-                <p style=\"font-size: 0.9em; color: #333;\">{tech}</p>
-            </div>
-            """, unsafe_allow_html=True)
-home_page()

pages/📈 Analytics Dashboard.py DELETED Viewed

@@ -1,111 +0,0 @@
-import streamlit as st
-st.set_page_config(page_title="📈 Analytics Dashboard")
-import numpy as np
-import pandas as pd
-import plotly.express as px
-def analytics_dashboard_page():
-    st.markdown("## 📈 Fraud Analytics Dashboard")
-    st.markdown("*Simulated data for demonstration purposes*")
-    np.random.seed(42)
-    n_transactions = 5000
-    dates = pd.date_range('2024-01-01', periods=n_transactions, freq='15min')
-    hours = dates.hour
-    fraud_prob_base = 0.02
-    fraud_prob_night = np.where((hours < 6) | (hours > 22), 0.08, fraud_prob_base)
-    transactions = pd.DataFrame({
-        'Date': dates,
-        'Hour': hours,
-        'Amount': np.random.lognormal(4, 1.2, n_transactions),
-        'Customer_Age': np.random.normal(40, 15, n_transactions).clip(18, 80),
-        'Account_Age': np.random.exponential(200, n_transactions).clip(1, 2000),
-        'Is_Fraud': np.random.binomial(1, fraud_prob_night)
-    })
-    high_amount_mask = transactions['Amount'] > transactions['Amount'].quantile(0.9)
-    transactions.loc[high_amount_mask, 'Is_Fraud'] = np.random.binomial(
-        1, 0.15, high_amount_mask.sum()
-    )
-    total_transactions = len(transactions)
-    fraud_count = transactions['Is_Fraud'].sum()
-    fraud_rate = fraud_count / total_transactions
-    total_amount = transactions['Amount'].sum()
-    fraud_amount = transactions[transactions['Is_Fraud'] == 1]['Amount'].sum()
-    kpi_col1, kpi_col2, kpi_col3, kpi_col4 = st.columns(4)
-    with kpi_col1:
-        st.metric("📊 Total Transactions", f"{total_transactions:,}")
-    with kpi_col2:
-        st.metric("🚨 Fraud Cases", f"{fraud_count:,}", delta=f"{fraud_rate:.2%}")
-    with kpi_col3:
-        st.metric("💰 Total Volume", f"₹{total_amount:,.0f}")
-    with kpi_col4:
-        st.metric("⚠️ Fraud Loss", f"₹{fraud_amount:,.0f}")
-    st.markdown("---")
-    st.markdown("### ⏰ Time-Based Fraud Patterns")
-    col1, col2 = st.columns(2)
-    with col1:
-        hourly_stats = transactions.groupby('Hour').agg({
-            'Is_Fraud': ['count', 'sum', 'mean']
-        }).round(3)
-        hourly_stats.columns = ['Total_Transactions', 'Fraud_Count', 'Fraud_Rate']
-        hourly_stats = hourly_stats.reset_index()
-        st.write("hourly_stats", hourly_stats)  # Debug output
-        fig = px.line(
-            hourly_stats,
-            x='Hour',
-            y='Fraud_Rate',
-            title="Fraud Rate by Hour of Day",
-            markers=True
-        )
-        fig.update_layout(height=400)
-        st.plotly_chart(fig, use_container_width=True)
-    with col2:
-        fig = px.bar(
-            hourly_stats,
-            x='Hour',
-            y='Total_Transactions',
-            title="Transaction Volume by Hour",
-            color='Fraud_Rate',
-            color_continuous_scale='reds'
-        )
-        fig.update_layout(height=400)
-        st.plotly_chart(fig, use_container_width=True)
-    st.markdown("### 💵 Transaction Amount Analysis")
-    col1, col2 = st.columns(2)
-    with col1:
-        st.write("transactions", transactions)  # Debug output
-        fig = px.histogram(
-            transactions,
-            x='Amount',
-            color='Is_Fraud',
-            nbins=50,
-            title="Transaction Amount Distribution",
-            labels={'Is_Fraud': 'Fraud Status'},
-            marginal="box"
-        )
-        st.plotly_chart(fig, use_container_width=True)
-    with col2:
-        fig = px.box(
-            transactions,
-            x='Is_Fraud',
-            y='Amount',
-            title="Amount Distribution: Normal vs Fraud",
-            labels={'Is_Fraud': 'Fraud Status', 'Amount': 'Transaction Amount (₹)'}
-        )
-        st.plotly_chart(fig, use_container_width=True)
-    st.markdown("### 👥 Customer Demographics & Fraud Risk")
-    age_bins = pd.cut(transactions['Customer_Age'], bins=6, precision=0)
-    age_stats = transactions.groupby(age_bins)['Is_Fraud'].agg(['count', 'sum', 'mean']).reset_index()
-    age_stats.columns = ['Age_Group', 'Total', 'Fraud_Count', 'Fraud_Rate']
-    age_stats['Age_Group'] = age_stats['Age_Group'].astype(str)  # Fix Interval serialization
-    st.write("age_stats", age_stats)  # Debug output
-    fig = px.bar(
-        age_stats,
-        x='Age_Group',
-        y='Fraud_Rate',
-        title="Fraud Rate by Customer Age Group",
-        color='Fraud_Rate',
-        color_continuous_scale='reds'
-    )
-    st.plotly_chart(fig, use_container_width=True)
-analytics_dashboard_page()

pages/📊 Model Insights.py DELETED Viewed

@@ -1,78 +0,0 @@
-import streamlit as st
-st.set_page_config(page_title="📊 Model Insights")
-import numpy as np
-import pandas as pd
-import plotly.express as px
-import plotly.graph_objects as go
-from utils.model_utils import create_demo_model
-def model_insights_page():
-    # Use a demo model for feature importance display
-    model, _ = create_demo_model()
-    st.markdown("## 📊 Model Performance & Insights")
-    feature_names = ['Transaction Amount', 'Transaction Date', 'Customer Age',
-                    'Account Age Days', 'Transaction Time', 'Customer Location Encoded']
-    try:
-        if hasattr(model, 'feature_importances_'):
-            importance = model.feature_importances_
-        else:
-            importance = np.random.rand(len(feature_names))
-            importance = importance / importance.sum()
-        importance_df = pd.DataFrame({
-            'Feature': feature_names,
-            'Importance': importance
-        }).sort_values('Importance', ascending=True)
-        col1, col2 = st.columns(2)
-        with col1:
-            st.markdown("### 🎯 Feature Importance Ranking")
-            fig = px.bar(
-                importance_df,
-                x='Importance',
-                y='Feature',
-                orientation='h',
-                color='Importance',
-                color_continuous_scale='blues',
-                title="How Much Each Feature Influences Predictions"
-            )
-            fig.update_layout(height=400)
-            st.plotly_chart(fig, use_container_width=True)
-        with col2:
-            st.markdown("### 🥧 Feature Distribution")
-            fig = px.pie(
-                importance_df,
-                values='Importance',
-                names='Feature',
-                title="Relative Feature Importance",
-                color_discrete_sequence=px.colors.qualitative.Set3
-            )
-            st.plotly_chart(fig, use_container_width=True)
-    except Exception as e:
-        st.error(f"Error displaying feature importance: {e}")
-    st.markdown("---")
-    st.markdown("### 🏆 Model Performance Dashboard")
-    metrics_data = {
-        'Metric': ['ROC AUC', 'Precision (Fraud)', 'Recall (Fraud)', 'F1-Score (Fraud)', 'Accuracy'],
-        'Score': [0.752, 0.19, 0.58, 0.29, 0.86],
-        'Benchmark': [0.7, 0.2, 0.5, 0.3, 0.85]
-    }
-    col1, col2 = st.columns(2)
-    with col1:
-        fig = go.Figure()
-        fig.add_trace(go.Bar(name='Our Model', x=metrics_data['Metric'], y=metrics_data['Score']))
-        fig.add_trace(go.Bar(name='Industry Benchmark', x=metrics_data['Metric'], y=metrics_data['Benchmark']))
-        fig.update_layout(
-            title="Model vs Industry Benchmark",
-            barmode='group',
-            height=400
-        )
-        st.plotly_chart(fig, use_container_width=True)
-    with col2:
-        for metric, score, benchmark in zip(metrics_data['Metric'], metrics_data['Score'], metrics_data['Benchmark']):
-            delta = score - benchmark
-            st.metric(
-                metric,
-                f"{score:.3f}",
-                delta=f"{delta:+.3f}" if delta != 0 else None
-            )
-model_insights_page()

pages/🔍 Fraud Detection.py DELETED Viewed

@@ -1,130 +0,0 @@
-import streamlit as st
-st.set_page_config(page_title="🔍 Fraud Detection")
-import pandas as pd
-from utils.preprocessing import get_location_options, preprocess_inputs
-from utils.visualization import create_risk_gauge, explain_prediction_simple
-from utils.model_utils import load_models
-def fraud_detection_page():
-    model, label_encoder, models_loaded = load_models()
-    st.markdown("## 🔍 Real-Time Fraud Detection")
-    st.markdown("Enter transaction details below to get instant fraud risk assessment:")
-    location_options = get_location_options(label_encoder)
-    with st.form("fraud_detection_form", clear_on_submit=False):
-        col1, col2, col3 = st.columns(3)
-        with col1:
-            st.markdown("### 💰 Transaction Info")
-            amount = st.number_input(
-                "Transaction Amount (₹)",
-                min_value=0.01, max_value=50000.0, value=150.0, step=0.01,
-                help="Enter the transaction amount in INR"
-            )
-            date = st.date_input(
-                "Transaction Date",
-                value=pd.Timestamp.now().date(),
-                help="Select the date of transaction"
-            )
-        with col2:
-            st.markdown("### 👤 Customer Info")
-            age = st.number_input(
-                "Customer Age",
-                min_value=16, max_value=100, value=35, step=1,
-                help="Age of the customer making the transaction"
-            )
-            account_age = st.number_input(
-                "Account Age (Days)",
-                min_value=1, max_value=3650, value=180, step=1,
-                help="How many days since account was created"
-            )
-        with col3:
-            st.markdown("### 📍 Additional Details")
-            trans_time = st.time_input(
-                "Transaction Time",
-                value=pd.Timestamp.now().time().replace(hour=14, minute=30, second=0, microsecond=0),
-                help="Time when transaction occurred"
-            )
-            location = st.selectbox(
-                "Customer Location",
-                options=location_options,
-                index=0,
-                help="Select customer's location"
-            )
-        st.markdown("---")
-        col1, col2, col3 = st.columns([1, 2, 1])
-        with col2:
-            submitted = st.form_submit_button("🚀 Analyze Transaction", use_container_width=True)
-    if submitted:
-        processed_data = preprocess_inputs(amount, date, age, account_age, trans_time, location, label_encoder)
-        if processed_data is not None:
-            input_df = pd.DataFrame([processed_data])
-            prediction_proba = model.predict_proba(input_df)[0]
-            prediction = model.predict(input_df)[0]
-            fraud_probability = prediction_proba[1] if len(prediction_proba) > 1 else prediction_proba[0]
-            st.markdown("---")
-            st.markdown("## 🎯 Analysis Results")
-            col1, col2 = st.columns([1, 2])
-            with col1:
-                fig_gauge = create_risk_gauge(fraud_probability)
-                st.plotly_chart(fig_gauge, use_container_width=True)
-            with col2:
-                if prediction == 1 or fraud_probability > 0.5:
-                    st.markdown(f'''
-                    <div class="fraud-alert">
-                        <h2>⚠️ HIGH FRAUD RISK</h2>
-                        <h3>Risk Score: {fraud_probability:.1%}</h3>
-                        <p><strong>Recommendation:</strong> Review this transaction carefully</p>
-                        <p>Multiple fraud indicators detected</p>
-                    </div>
-                    ''', unsafe_allow_html=True)
-                else:
-                    st.markdown(f'''
-                    <div class="safe-alert">
-                        <h2>✅ LOW FRAUD RISK</h2>
-                        <h3>Risk Score: {fraud_probability:.1%}</h3>
-                        <p><strong>Recommendation:</strong> Transaction appears legitimate</p>
-                        <p>Normal transaction pattern detected</p>
-                    </div>
-                    ''', unsafe_allow_html=True)
-            st.markdown("---")
-            st.markdown("### 🔬 AI Explanation - Why This Decision?")
-            explanation_df = explain_prediction_simple(model, processed_data)
-            if explanation_df is not None:
-                col1, col2 = st.columns(2)
-                with col1:
-                    st.markdown("#### 📊 Feature Impact Analysis")
-                    for _, row in explanation_df.head(4).iterrows():
-                        importance_pct = row['Importance'] * 100
-                        st.markdown(f"""
-                        <div class=\"feature-impact\">
-                            <strong>{row['Feature']}</strong><br>
-                            Value: {row['Value']:.3f} | Impact: {importance_pct:.1f}%
-                        </div>
-                        """, unsafe_allow_html=True)
-                with col2:
-                    st.markdown("#### 📈 Feature Importance Chart")
-                    import plotly.express as px
-                    fig = px.bar(
-                        explanation_df.head(6),
-                        x='Importance',
-                        y='Feature',
-                        orientation='h',
-                        color='Importance',
-                        color_continuous_scale='viridis',
-                        title="Feature Contribution to Decision"
-                    )
-                    fig.update_layout(height=400, showlegend=False)
-                    st.plotly_chart(fig, use_container_width=True)
-            st.markdown("---")
-            st.markdown("### 📋 Transaction Summary")
-            summary_data = {
-                "Field": ["Amount", "Date", "Customer Age", "Account Age", "Time", "Location"],
-                "Value": [f"₹{amount:.2f}", str(date), f"{age} years", f"{account_age} days",
-                         str(trans_time), location]
-            }
-            summary_df = pd.DataFrame(summary_data)
-            # Indent the table by placing it in the center column of a 3-column layout
-            col1, col2, col3 = st.columns([1,2,1])
-            with col2:
-                st.table(summary_df)
-fraud_detection_page()

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-streamlit
-pandas
-numpy
-scikit-learn
-lightgbm
-matplotlib
-seaborn
-plotly
-joblib
-shap
-dice-ml

+streamlit==1.28.1
+pandas==2.0.3
+numpy==1.24.3
+joblib==1.3.2
+shap==0.42.1
+matplotlib==3.7.2
+seaborn==0.12.2
+plotly==5.17.0
+lightgbm==4.1.0
+scikit-learn==1.3.0
+imbalanced-learn==0.11.0

utils/model_utils.py DELETED Viewed

@@ -1,33 +0,0 @@
-import joblib
-import streamlit as st
-import numpy as np
-def load_models():
-    """Load the trained models and encoders with error handling"""
-    try:
-        model = joblib.load('lightgbm_model.pkl')
-        label_encoder = joblib.load('customer_loc.pkl')
-        return model, label_encoder, True
-    except FileNotFoundError as e:
-        st.error(f"⚠️ Model files not found: {e}")
-        st.info("Please ensure 'lightgbm_model.pkl' and 'customer_loc.pkl' are in the app directory.")
-        return None, None, False
-def create_demo_model():
-    """Create a demo model when real model is not available"""
-    from sklearn.ensemble import RandomForestClassifier
-    from sklearn.preprocessing import LabelEncoder
-    # Create dummy data
-    np.random.seed(42)
-    n_samples = 1000
-    X_demo = np.random.randn(n_samples, 6)
-    y_demo = np.random.choice([0, 1], n_samples, p=[0.95, 0.05])
-    # Train demo model
-    demo_model = RandomForestClassifier(n_estimators=10, random_state=42)
-    demo_model.fit(X_demo, y_demo)
-    # Create demo encoder
-    demo_encoder = LabelEncoder()
-    demo_locations = ["New York", "Los Angeles", "Chicago", "Houston", "Phoenix",
-                     "Philadelphia", "San Antonio", "San Diego", "Dallas", "San Jose"]
-    demo_encoder.fit(demo_locations)
-    return demo_model, demo_encoder

utils/preprocessing.py DELETED Viewed

@@ -1,34 +0,0 @@
-import pandas as pd
-import streamlit as st
-def get_location_options(label_encoder):
-    try:
-        location_classes = label_encoder.classes_
-        return location_classes.tolist()
-    except AttributeError:
-        return ["Unknown"]
-def preprocess_inputs(amount, date, age, account_age, trans_time, location, label_encoder):
-    """Enhanced preprocessing with better error handling"""
-    try:
-        excel_epoch = pd.Timestamp("1899-12-30")
-        date_days = (pd.to_datetime(date) - excel_epoch).days
-        time_fraction = (trans_time.hour * 3600 + trans_time.minute * 60 + trans_time.second) / 86400
-        location_encoded = 0
-        if label_encoder is not None:
-            try:
-                location_encoded = label_encoder.transform([location])[0]
-            except ValueError:
-                location_encoded = len(label_encoder.classes_) // 2
-                st.warning(f"⚠️ Location '{location}' not in training data. Using fallback encoding.")
-        return {
-            'Transaction Amount': float(amount),
-            'Transaction Date': int(date_days),
-            'Customer Age': int(age),
-            'Account Age Days': int(account_age),
-            'Transaction Time': float(time_fraction),
-            'Customer Location Encoded': int(location_encoded)
-        }
-    except Exception as e:
-        st.error(f"Error in preprocessing: {e}")
-        return None

utils/visualization.py DELETED Viewed

@@ -1,47 +0,0 @@
-import plotly.graph_objects as go
-import plotly.express as px
-import pandas as pd
-import numpy as np
-import streamlit as st
-def create_risk_gauge(fraud_probability):
-    """Create a risk gauge visualization"""
-    fig = go.Figure(go.Indicator(
-        mode = "gauge+number+delta",
-        value = fraud_probability * 100,
-        domain = {'x': [0, 1], 'y': [0, 1]},
-        title = {'text': "Fraud Risk Score (%)"},
-        delta = {'reference': 50},
-        gauge = {
-            'axis': {'range': [None, 100]},
-            'bar': {'color': "darkblue"},
-            'steps': [
-                {'range': [0, 25], 'color': "lightgreen"},
-                {'range': [25, 50], 'color': "yellow"},
-                {'range': [50, 75], 'color': "orange"},
-                {'range': [75, 100], 'color': "red"}],
-            'threshold': {
-                'line': {'color': "red", 'width': 4},
-                'thickness': 0.75,
-                'value': 70}}))
-    fig.update_layout(height=300)
-    return fig
-def explain_prediction_simple(model, input_data):
-    """Simple feature importance explanation"""
-    try:
-        feature_names = list(input_data.keys())
-        if hasattr(model, 'feature_importances_'):
-            importances = model.feature_importances_
-        else:
-            importances = np.random.rand(len(feature_names))
-            importances = importances / importances.sum()
-        explanation_df = pd.DataFrame({
-            'Feature': feature_names,
-            'Importance': importances,
-            'Value': [input_data[feat] for feat in feature_names]
-        }).sort_values('Importance', ascending=False)
-        return explanation_df
-    except Exception as e:
-        st.error(f"Error generating explanation: {e}")
-        return None