Upload folder using huggingface_hub

Browse files

Files changed (16) hide show

.gitattributes +1 -0
app.py +67 -0
class_distribution.png +0 -0
creditcard.csv +3 -0
distributions.png +0 -0
eda.py +42 -0
fraud_model.joblib +3 -0
implementation_plan.md +43 -0
requirements.txt +8 -0
scaler.joblib +3 -0
scaler_amount.joblib +3 -0
scaler_time.joblib +3 -0
static/style.css +234 -0
task.md +10 -0
templates/index.html +176 -0
train_model.py +59 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+creditcard.csv filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from flask import Flask, render_template, request, jsonify
+import joblib
+import pandas as pd
+import numpy as np
+import os
+app = Flask(__name__)
+# Load model and scalers
+MODEL_PATH = 'c:/card/fraud_model.joblib'
+SCALER_AMOUNT_PATH = 'c:/card/scaler_amount.joblib'
+SCALER_TIME_PATH = 'c:/card/scaler_time.joblib'
+DATA_PATH = 'c:/card/creditcard.csv'
+model = joblib.load(MODEL_PATH)
+scaler_amount = joblib.load(SCALER_AMOUNT_PATH)
+scaler_time = joblib.load(SCALER_TIME_PATH)
+# Cache some samples for the frontend
+df_all = pd.read_csv(DATA_PATH)
+fraud_samples = df_all[df_all['Class'] == 1].sample(10).to_dict('records')
+normal_samples = df_all[df_all['Class'] == 0].sample(10).to_dict('records')
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/get_samples', methods=['GET'])
+def get_samples():
+    return jsonify({
+        "fraud": fraud_samples,
+        "normal": normal_samples
+    })
+@app.route('/predict', methods=['POST'])
+def predict():
+    try:
+        data = request.json
+        # Prepare feature vector (V1-V28, scaled_amount, scaled_time)
+        v_features = [float(data.get(f'V{i}', 0)) for i in range(1, 29)]
+        amount = float(data.get('Amount', 0))
+        time = float(data.get('Time', 0))
+        scaled_amount = scaler_amount.transform([[amount]])[0][0]
+        scaled_time = scaler_time.transform([[time]])[0][0]
+        # Combine all features
+        # Training script Order: X = df.drop('Class', axis=1)
+        # df had columns in order: V1...V28, scaled_amount, scaled_time (since original were dropped)
+        feature_vector = np.array(v_features + [scaled_amount, scaled_time]).reshape(1, -1)
+        prediction = int(model.predict(feature_vector)[0])
+        probability = model.predict_proba(feature_vector)[0].tolist()
+        return jsonify({
+            "is_fraud": prediction == 1,
+            "confidence": max(probability) * 100,
+            "class": "Fraudulent" if prediction == 1 else "Legitimate"
+        })
+    except Exception as e:
+        return jsonify({"error": str(e)}), 400
+if __name__ == '__main__':
+    app.run(debug=True, port=5000)

class_distribution.png ADDED Viewed

creditcard.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76274b691b16a6c49d3f159c883398e03ccd6d1ee12d9d8ee38f4b4b98551a89
+size 150828752

distributions.png ADDED Viewed

eda.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+# Load dataset
+df = pd.read_csv('c:/card/creditcard.csv')
+# Basic info
+print("Dataset Shape:", df.shape)
+print("\nFirst 5 rows:")
+print(df.head())
+# Check for missing values
+print("\nMissing values:")
+print(df.isnull().sum().max())
+# Class distribution
+print("\nClass Distribution (0: Normal, 1: Fraud):")
+print(df['Class'].value_counts())
+print("\nPercentage:")
+print(df['Class'].value_counts(normalize=True) * 100)
+# Statistics
+print("\nSummary Statistics:")
+print(df.describe())
+# Plotting class distribution
+plt.figure(figsize=(8, 6))
+sns.countplot(x='Class', data=df, palette='viridis')
+plt.title('Class Distribution (0: Normal, 1: Fraud)')
+plt.savefig('c:/card/class_distribution.png')
+# Plotting distributions of Time and Amount
+plt.figure(figsize=(12, 4))
+plt.subplot(1, 2, 1)
+sns.histplot(df['Amount'], bins=50, kde=True, color='blue')
+plt.title('Transaction Amount Distribution')
+plt.subplot(1, 2, 2)
+sns.histplot(df['Time'], bins=50, kde=True, color='red')
+plt.title('Transaction Time Distribution')
+plt.savefig('c:/card/distributions.png')

fraud_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:71576f9d6260ca53b4c0df6561821ba0847ef38c25d8339c34a1af6a346f38ac
+size 2047769

implementation_plan.md ADDED Viewed

	@@ -0,0 +1,43 @@

+# Implementation Plan - Credit Card Fraud Detection
+## Overview
+Build a machine learning pipeline to detect fraudulent credit card transactions and provide a web interface for real-time inference.
+## Tech Stack
+- **Dataset**: `creditcard.csv` (provided)
+- **ML Framework**: Scikit-learn, Pandas, Numpy, Imbalanced-learn (SMOTE)
+- **Model**: Random Forest or XGBoost
+- **Backend**: Flask (Python)
+- **Frontend**: HTML5, Vanilla CSS (Modern/Premium design), JavaScript
+## Steps
+### 1. Data Preparation & EDA
+- Load `creditcard.csv`.
+- Analyze class distribution (fraud vs. non-fraud).
+- Visualize correlations and feature distributions.
+- Check for missing values.
+### 2. Preprocessing
+- Scale `Time` and `Amount` features (V1-V28 are already PCA-transformed).
+- Split data into training and testing sets.
+- Apply SMOTE (Synthetic Minority Over-sampling Technique) to handle class imbalance.
+### 3. Model Engineering
+- Train multiple models (Logistic Regression, Random Forest).
+- Evaluate using Precision-Recall curves and F1-score.
+- Save the best model using `joblib`.
+### 4. Backend (Flask)
+- Create an API endpoint `/predict`.
+- Load the trained model and scaler.
+- Handle POST requests with transaction data.
+### 5. Frontend (Web UI)
+- Build a premium, glassmorphic UI.
+- Form to input transaction details (or sample details).
+- Display prediction result with a visual indicator (Safe vs. Fraud).
+### 6. Deployment Readiness
+- Create `requirements.txt`.
+- Ensure scripts are well-documented.

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+pandas
+numpy
+matplotlib
+seaborn
+scikit-learn
+imbalanced-learn
+joblib
+flask

scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:209e91b39c7def96a51d697fc57bb271a76d66abee000512fa2a166d3325ca11
+size 623

scaler_amount.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d94e1a17e6e54764c3e296d299d861512708da0cf52fb610b8da63dbc9d792da
+size 623

scaler_time.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:209e91b39c7def96a51d697fc57bb271a76d66abee000512fa2a166d3325ca11
+size 623

static/style.css ADDED Viewed

	@@ -0,0 +1,234 @@

+:root {
+    --primary: #6366f1;
+    --primary-hover: #4f46e5;
+    --danger: #ef4444;
+    --success: #10b981;
+    --bg-dark: #0f172a;
+    --glass-bg: rgba(255, 255, 255, 0.05);
+    --glass-border: rgba(255, 255, 255, 0.1);
+}
+body {
+    margin: 0;
+    font-family: 'Inter', system-ui, -apple-system, sans-serif;
+    background-color: var(--bg-dark);
+    color: #f8fafc;
+    min-height: 100vh;
+    display: flex;
+    justify-content: center;
+    align-items: center;
+    background-image:
+        radial-gradient(circle at 0% 0%, rgba(99, 102, 241, 0.15) 0%, transparent 50%),
+        radial-gradient(circle at 100% 100%, rgba(239, 68, 68, 0.1) 0%, transparent 50%);
+}
+.container {
+    width: 90%;
+    max-width: 1000px;
+    background: var(--glass-bg);
+    backdrop-filter: blur(12px);
+    -webkit-backdrop-filter: blur(12px);
+    border: 1px solid var(--glass-border);
+    border-radius: 24px;
+    padding: 2.5rem;
+    box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
+}
+header {
+    text-align: center;
+    margin-bottom: 2.5rem;
+}
+header h1 {
+    font-size: 2.5rem;
+    margin: 0;
+    background: linear-gradient(to right, #818cf8, #f472b6);
+    -webkit-background-clip: text;
+    background-clip: text;
+    -webkit-text-fill-color: transparent;
+    font-weight: 800;
+}
+header p {
+    color: #94a3b8;
+    font-size: 1.1rem;
+    margin-top: 0.5rem;
+}
+.main-layout {
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 2rem;
+}
+.card {
+    background: rgba(255, 255, 255, 0.03);
+    border-radius: 16px;
+    padding: 1.5rem;
+    border: 1px solid var(--glass-border);
+}
+.card-title {
+    font-size: 1.25rem;
+    font-weight: 600;
+    margin-bottom: 1.25rem;
+    display: flex;
+    align-items: center;
+    gap: 0.75rem;
+}
+.samples-list {
+    display: flex;
+    flex-direction: column;
+    gap: 0.75rem;
+}
+.sample-item {
+    background: rgba(255, 255, 255, 0.05);
+    padding: 1rem;
+    border-radius: 12px;
+    cursor: pointer;
+    transition: all 0.2s;
+    border: 1px solid transparent;
+}
+.sample-item:hover {
+    background: rgba(255, 255, 255, 0.08);
+    border-color: var(--primary);
+    transform: translateY(-2px);
+}
+.sample-item .meta {
+    display: flex;
+    justify-content: space-between;
+    font-size: 0.875rem;
+    color: #94a3b8;
+}
+.sample-item .amount {
+    font-weight: 700;
+    color: #fff;
+    font-size: 1rem;
+}
+.form-group {
+    margin-bottom: 1rem;
+    display: grid;
+    grid-template-columns: 1fr 1fr;
+    gap: 1rem;
+}
+.input-field {
+    background: rgba(0, 0, 0, 0.2);
+    border: 1px solid var(--glass-border);
+    border-radius: 8px;
+    padding: 0.75rem;
+    color: white;
+    width: 100%;
+    box-sizing: border-box;
+}
+.input-field:focus {
+    outline: none;
+    border-color: var(--primary);
+}
+.btn {
+    background: var(--primary);
+    color: white;
+    border: none;
+    padding: 1rem;
+    border-radius: 12px;
+    font-size: 1.1rem;
+    font-weight: 600;
+    cursor: pointer;
+    width: 100%;
+    transition: all 0.2s;
+    margin-top: 1rem;
+}
+.btn:hover {
+    background: var(--primary-hover);
+    box-shadow: 0 0 20px rgba(99, 102, 241, 0.4);
+}
+#result-area {
+    margin-top: 2rem;
+    padding: 1.5rem;
+    border-radius: 16px;
+    text-align: center;
+    display: none;
+    animation: fadeIn 0.3s ease-out;
+}
+.result-fraud {
+    background: rgba(239, 68, 68, 0.1);
+    border: 1px solid var(--danger);
+    color: #fca5a5;
+}
+.result-safe {
+    background: rgba(16, 185, 129, 0.1);
+    border: 1px solid var(--success);
+    color: #6ee7b7;
+}
+.result-title {
+    font-size: 1.5rem;
+    font-weight: 800;
+    margin-bottom: 0.5rem;
+}
+.result-conf {
+    font-size: 0.9rem;
+    opacity: 0.8;
+}
+@keyframes fadeIn {
+    from {
+        opacity: 0;
+        transform: translateY(10px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.tabs {
+    display: flex;
+    gap: 1rem;
+    margin-bottom: 1rem;
+}
+.tab {
+    padding: 0.5rem 1rem;
+    border-radius: 20px;
+    cursor: pointer;
+    background: rgba(255, 255, 255, 0.05);
+    font-size: 0.875rem;
+    transition: all 0.2s;
+}
+.tab.active {
+    background: var(--primary);
+}
+/* Scrollbar */
+::-webkit-scrollbar {
+    width: 8px;
+}
+::-webkit-scrollbar-track {
+    background: rgba(0, 0, 0, 0.1);
+}
+::-webkit-scrollbar-thumb {
+    background: rgba(255, 255, 255, 0.1);
+    border-radius: 4px;
+}
+::-webkit-scrollbar-thumb:hover {
+    background: rgba(255, 255, 255, 0.2);
+}

task.md ADDED Viewed

	@@ -0,0 +1,10 @@

+# Tasks: End-to-End Credit Card Fraud Detection
+- [x] Project Setup & Data Inspection <!-- id: 0 -->
+- [x] Exploratory Data Analysis (EDA) <!-- id: 1 -->
+- [x] Data Preprocessing & Balancing <!-- id: 2 -->
+- [x] Model Selection & Training <!-- id: 3 -->
+- [x] Model Evaluation & Saving <!-- id: 4 -->
+- [x] Backend API Development (Flask) <!-- id: 5 -->
+- [x] Frontend Web Interface Development <!-- id: 6 -->
+- [x] Testing & Final Polish <!-- id: 7 -->

templates/index.html ADDED Viewed

	@@ -0,0 +1,176 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>FraudShield AI | Transaction Monitoring</title>
+    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
+    <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap" rel="stylesheet">
+</head>
+<body>
+    <div class="container">
+        <header>
+            <h1>FraudShield AI</h1>
+            <p>Advanced Real-time Credit Card Fraud Detection Pipeline</p>
+        </header>
+        <div class="main-layout">
+            <div class="card">
+                <div class="card-title">
+                    <span>🔍</span> Sample Transactions
+                </div>
+                <div class="tabs">
+                    <div class="tab active" id="tab-fraud">Fraud Cases</div>
+                    <div class="tab" id="tab-normal">Legitimate</div>
+                </div>
+                <div class="samples-list" id="samples-container">
+                    <!-- Samples will be injected here -->
+                    <p style="text-align: center; color: #64748b;">Loading samples...</p>
+                </div>
+            </div>
+            <div class="card">
+                <div class="card-title">
+                    <span>⚙️</span> Analysis Engine
+                </div>
+                <form id="prediction-form">
+                    <div class="form-group">
+                        <div>
+                            <label style="font-size: 0.8rem; color: #94a3b8;">Amount ($)</label>
+                            <input type="number" id="Amount" class="input-field" step="0.01" required placeholder="89.99">
+                        </div>
+                        <div>
+                            <label style="font-size: 0.8rem; color: #94a3b8;">Time (Seconds)</label>
+                            <input type="number" id="Time" class="input-field" required placeholder="0">
+                        </div>
+                    </div>
+                    <div style="margin-bottom: 0.5rem; font-size: 0.8rem; color: #94a3b8;">PCA Components (V1 - V28)</div>
+                    <div style="max-height: 200px; overflow-y: auto; padding-right: 5px;" id="v-inputs">
+                        <!-- V1 to V28 inputs will be injected -->
+                    </div>
+                    <button type="submit" class="btn">Analyze Transaction</button>
+                </form>
+                <div id="result-area">
+                    <div class="result-title" id="res-title">SAFE</div>
+                    <div class="result-conf" id="res-conf">Confidence: 99.8%</div>
+                </div>
+            </div>
+        </div>
+    </div>
+    <script>
+        let samples = { fraud: [], normal: [] };
+        let activeTab = 'fraud';
+        // Initialize V inputs
+        const vContainer = document.getElementById('v-inputs');
+        for (let i = 1; i <= 28; i++) {
+            const div = document.createElement('div');
+            div.className = 'form-group';
+            div.style.marginBottom = '0.5rem';
+            div.innerHTML = `
+                <div style="grid-column: span 2">
+                    <input type="number" step="0.000001" id="V${i}" class="input-field" placeholder="V${i} component" value="0">
+                </div>
+            `;
+            vContainer.appendChild(div);
+        }
+        // Fetch samples
+        fetch('/get_samples')
+            .then(res => res.json())
+            .then(data => {
+                samples = data;
+                renderSamples();
+            });
+        function renderSamples() {
+            const container = document.getElementById('samples-container');
+            container.innerHTML = '';
+            const list = activeTab === 'fraud' ? samples.fraud : samples.normal;
+            list.forEach(item => {
+                const el = document.createElement('div');
+                el.className = 'sample-item';
+                el.innerHTML = `
+                    <div class="amount">$${item.Amount.toFixed(2)}</div>
+                    <div class="meta">
+                        <span>Time: ${Math.floor(item.Time)}s</span>
+                        <span>PCA: Mixed</span>
+                    </div>
+                `;
+                el.onclick = () => fillForm(item);
+                container.appendChild(el);
+            });
+        }
+        function fillForm(item) {
+            document.getElementById('Amount').value = item.Amount;
+            document.getElementById('Time').value = item.Time;
+            for (let i = 1; i <= 28; i++) {
+                document.getElementById(`V${i}`).value = item[`V${i}`];
+            }
+            // Trigger animation
+            const card = document.querySelectorAll('.card')[1];
+            card.style.borderColor = 'var(--primary)';
+            setTimeout(() => card.style.borderColor = 'var(--glass-border)', 500);
+        }
+        // Tabs
+        document.getElementById('tab-fraud').onclick = () => {
+            activeTab = 'fraud';
+            document.getElementById('tab-fraud').classList.add('active');
+            document.getElementById('tab-normal').classList.remove('active');
+            renderSamples();
+        };
+        document.getElementById('tab-normal').onclick = () => {
+            activeTab = 'normal';
+            document.getElementById('tab-normal').classList.add('active');
+            document.getElementById('tab-fraud').classList.remove('active');
+            renderSamples();
+        };
+        // Form Submit
+        document.getElementById('prediction-form').onsubmit = async (e) => {
+            e.preventDefault();
+            const btn = e.target.querySelector('button');
+            btn.innerHTML = 'Processing...';
+            btn.disabled = true;
+            const data = {
+                Amount: document.getElementById('Amount').value,
+                Time: document.getElementById('Time').value
+            };
+            for (let i = 1; i <= 28; i++) {
+                data[`V${i}`] = document.getElementById(`V${i}`).value;
+            }
+            try {
+                const res = await fetch('/predict', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify(data)
+                });
+                const result = await res.json();
+                const resArea = document.getElementById('result-area');
+                resArea.style.display = 'block';
+                resArea.className = result.is_fraud ? 'result-fraud' : 'result-safe';
+                document.getElementById('res-title').innerText = result.class;
+                document.getElementById('res-conf').innerText = `Confidence: ${result.confidence.toFixed(2)}%`;
+            } catch (err) {
+                alert('Analysis failed: ' + err.message);
+            } finally {
+                btn.innerHTML = 'Analyze Transaction';
+                btn.disabled = false;
+            }
+        };
+    </script>
+</body>
+</html>

train_model.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc
+from imblearn.over_sampling import SMOTE
+import joblib
+import os
+# Load data
+print("Loading data...")
+df = pd.read_csv('c:/card/creditcard.csv')
+# Preprocessing
+print("Preprocessing...")
+scaler_amount = StandardScaler()
+scaler_time = StandardScaler()
+df['scaled_amount'] = scaler_amount.fit_transform(df['Amount'].values.reshape(-1, 1))
+df['scaled_time'] = scaler_time.fit_transform(df['Time'].values.reshape(-1, 1))
+# Drop original Time and Amount
+df.drop(['Time', 'Amount'], axis=1, inplace=True)
+# Define X and y
+X = df.drop('Class', axis=1)
+y = df['Class']
+# Split data
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
+# Handle imbalance with SMOTE
+print("Applying SMOTE to balance training data...")
+sm = SMOTE(random_state=42)
+X_train_res, y_train_res = sm.fit_resample(X_train, y_train)
+print(f"Original training shape: {X_train.shape}")
+print(f"Resampled training shape: {X_train_res.shape}")
+# Train Model
+print("Training Random Forest Classifier (this might take a minute)...")
+model = RandomForestClassifier(n_estimators=50, max_depth=10, random_state=42, n_jobs=-1)
+model.fit(X_train_res, y_train_res)
+# Evaluate
+print("Evaluating model...")
+y_pred = model.predict(X_test)
+print("\nConfusion Matrix:")
+print(confusion_matrix(y_test, y_pred))
+print("\nClassification Report:")
+print(classification_report(y_test, y_pred))
+# Save model and scalers
+print("Saving model and scalers...")
+joblib.dump(model, 'c:/card/fraud_model.joblib')
+joblib.dump(scaler_amount, 'c:/card/scaler_amount.joblib')
+joblib.dump(scaler_time, 'c:/card/scaler_time.joblib')
+print("Done! Files saved to c:/card/")