Mishal23 commited on
Commit
2f3185c
·
verified ·
1 Parent(s): fb17031

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +117 -0
app.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import StandardScaler
5
+ from sklearn.utils import resample
6
+ from sklearn.metrics import accuracy_score, classification_report
7
+ from sklearn.linear_model import LogisticRegression
8
+ from imblearn.over_sampling import SMOTE
9
+ from transformers import pipeline
10
+ import gradio as gr
11
+ from google.colab import files
12
+
13
+ # Load the creditcard.csv dataset from Google Drive or Colab local file upload
14
+ uploaded = files.upload() # This will prompt you to upload your file
15
+ df = pd.read_csv('creditcard.csv')
16
+
17
+ # Display basic information
18
+ print("Columns in the dataset:", df.columns)
19
+ print(df.head())
20
+
21
+ # Preprocessing: Selecting relevant columns
22
+ # Assuming the dataset has 'Time', 'Amount', and 'Class' columns along with 'V1' to 'V28' features
23
+ time_col = 'Time'
24
+ amount_col = 'Amount'
25
+ class_col = 'Class'
26
+ feature_cols = [col for col in df.columns if col not in [class_col, time_col]]
27
+
28
+ # Handle missing values
29
+ df = df.fillna(df.mean())
30
+
31
+ # Downsample the majority class to handle class imbalance
32
+ df_majority = df[df[class_col] == 0]
33
+ df_minority = df[df[class_col] == 1]
34
+ df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority))
35
+ df_balanced = pd.concat([df_majority_downsampled, df_minority])
36
+
37
+ # Feature scaling
38
+ X = df_balanced[feature_cols]
39
+ y = df_balanced[class_col]
40
+ scaler = StandardScaler()
41
+ X_scaled = scaler.fit_transform(X)
42
+
43
+ # Train-test split
44
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
45
+
46
+ # Balancing the dataset using SMOTE
47
+ smote = SMOTE()
48
+ X_train, y_train = smote.fit_resample(X_train, y_train)
49
+
50
+ # Logistic Regression Model
51
+ model = LogisticRegression(max_iter=1000)
52
+ model.fit(X_train, y_train)
53
+
54
+ # Predictions
55
+ y_pred = model.predict(X_test)
56
+
57
+ # Model evaluation
58
+ print("Accuracy:", accuracy_score(y_test, y_pred))
59
+ print("Classification Report:\n", classification_report(y_test, y_pred))
60
+
61
+ # Initialize the retrieval pipeline with a lightweight model (if required)
62
+ retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased")
63
+
64
+ def retrieve_explanation(prediction):
65
+ if prediction == 1:
66
+ explanation = "The transaction is classified as fraudulent based on the provided features."
67
+ else:
68
+ explanation = "The transaction is classified as non-fraudulent based on the provided features."
69
+ return explanation
70
+
71
+ # Gradio prediction function with complete feature padding
72
+ def fraud_detection_predictor(V1, V2, V3, Amount):
73
+ # Create a list of features with default zero values for missing ones
74
+ input_features = [0] * len(feature_cols)
75
+
76
+ # Map the provided features to their indices (ensure they are in correct feature_cols)
77
+ v1_index = feature_cols.index('V1') # Ensure these columns exist in feature_cols
78
+ v2_index = feature_cols.index('V2')
79
+ v3_index = feature_cols.index('V3')
80
+ amount_index = feature_cols.index('Amount')
81
+
82
+ # Assign user inputs to the correct feature indices
83
+ input_features[v1_index] = V1
84
+ input_features[v2_index] = V2
85
+ input_features[v3_index] = V3
86
+ input_features[amount_index] = Amount
87
+
88
+ # Scale input data using the pre-fitted scaler
89
+ input_data = scaler.transform([input_features])
90
+
91
+ # Make a prediction
92
+ prediction = model.predict(input_data)[0]
93
+ fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent"
94
+
95
+ # Get explanation
96
+ explanation = retrieve_explanation(prediction)
97
+ return fraud_status, explanation
98
+
99
+ # Define Gradio Interface
100
+ interface = gr.Interface(
101
+ fn=fraud_detection_predictor,
102
+ inputs=[
103
+ gr.Number(label="V1"),
104
+ gr.Number(label="V2"),
105
+ gr.Number(label="V3"),
106
+ gr.Number(label="Amount")
107
+ ],
108
+ outputs=[
109
+ gr.Textbox(label="Fraud Status"),
110
+ gr.Textbox(label="Explanation")
111
+ ],
112
+ title="Simplified Credit Card Fraud Detection",
113
+ description="Enter a few transaction features (V1, V2, V3, Amount) to predict fraud status."
114
+ )
115
+
116
+ # Launch Gradio Interface
117
+ interface.launch()