Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.utils import resample | |
| from sklearn.metrics import accuracy_score, classification_report | |
| from sklearn.linear_model import LogisticRegression | |
| from imblearn.over_sampling import SMOTE | |
| from transformers import pipeline | |
| import gradio as gr | |
| from google.colab import files | |
| # Load the creditcard.csv dataset from Google Drive or Colab local file upload | |
| uploaded = files.upload() # This will prompt you to upload your file | |
| df = pd.read_csv('creditcard.csv') | |
| # Display basic information | |
| print("Columns in the dataset:", df.columns) | |
| print(df.head()) | |
| # Preprocessing: Selecting relevant columns | |
| # Assuming the dataset has 'Time', 'Amount', and 'Class' columns along with 'V1' to 'V28' features | |
| time_col = 'Time' | |
| amount_col = 'Amount' | |
| class_col = 'Class' | |
| feature_cols = [col for col in df.columns if col not in [class_col, time_col]] | |
| # Handle missing values | |
| df = df.fillna(df.mean()) | |
| # Downsample the majority class to handle class imbalance | |
| df_majority = df[df[class_col] == 0] | |
| df_minority = df[df[class_col] == 1] | |
| df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority)) | |
| df_balanced = pd.concat([df_majority_downsampled, df_minority]) | |
| # Feature scaling | |
| X = df_balanced[feature_cols] | |
| y = df_balanced[class_col] | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| # Balancing the dataset using SMOTE | |
| smote = SMOTE() | |
| X_train, y_train = smote.fit_resample(X_train, y_train) | |
| # Logistic Regression Model | |
| model = LogisticRegression(max_iter=1000) | |
| model.fit(X_train, y_train) | |
| # Predictions | |
| y_pred = model.predict(X_test) | |
| # Model evaluation | |
| print("Accuracy:", accuracy_score(y_test, y_pred)) | |
| print("Classification Report:\n", classification_report(y_test, y_pred)) | |
| # Initialize the retrieval pipeline with a lightweight model (if required) | |
| retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased") | |
| def retrieve_explanation(prediction): | |
| if prediction == 1: | |
| explanation = "The transaction is classified as fraudulent based on the provided features." | |
| else: | |
| explanation = "The transaction is classified as non-fraudulent based on the provided features." | |
| return explanation | |
| # Gradio prediction function with complete feature padding | |
| def fraud_detection_predictor(V1, V2, V3, Amount): | |
| # Create a list of features with default zero values for missing ones | |
| input_features = [0] * len(feature_cols) | |
| # Map the provided features to their indices (ensure they are in correct feature_cols) | |
| v1_index = feature_cols.index('V1') # Ensure these columns exist in feature_cols | |
| v2_index = feature_cols.index('V2') | |
| v3_index = feature_cols.index('V3') | |
| amount_index = feature_cols.index('Amount') | |
| # Assign user inputs to the correct feature indices | |
| input_features[v1_index] = V1 | |
| input_features[v2_index] = V2 | |
| input_features[v3_index] = V3 | |
| input_features[amount_index] = Amount | |
| # Scale input data using the pre-fitted scaler | |
| input_data = scaler.transform([input_features]) | |
| # Make a prediction | |
| prediction = model.predict(input_data)[0] | |
| fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent" | |
| # Get explanation | |
| explanation = retrieve_explanation(prediction) | |
| return fraud_status, explanation | |
| # Define Gradio Interface | |
| interface = gr.Interface( | |
| fn=fraud_detection_predictor, | |
| inputs=[ | |
| gr.Number(label="V1"), | |
| gr.Number(label="V2"), | |
| gr.Number(label="V3"), | |
| gr.Number(label="Amount") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Fraud Status"), | |
| gr.Textbox(label="Explanation") | |
| ], | |
| title="Simplified Credit Card Fraud Detection", | |
| description="Enter a few transaction features (V1, V2, V3, Amount) to predict fraud status." | |
| ) | |
| # Launch Gradio Interface | |
| interface.launch() | |