Spaces:
Runtime error
Runtime error
| import pandas as pd | |
| import numpy as np | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.utils import resample | |
| from sklearn.metrics import accuracy_score, classification_report | |
| from sklearn.linear_model import LogisticRegression | |
| from imblearn.over_sampling import SMOTE | |
| from transformers import pipeline | |
| import gradio as gr | |
| # Load the creditcard.csv dataset from your local directory | |
| file_path = 'creditcard.csv' # Make sure this file is in the same directory as your script | |
| # Load the dataset | |
| df = pd.read_csv(file_path) | |
| # Display basic information | |
| print("Columns in the dataset:", df.columns) | |
| print(df.head()) | |
| # Preprocessing: Selecting relevant columns | |
| time_col = 'Time' | |
| amount_col = 'Amount' | |
| class_col = 'Class' | |
| feature_cols = [col for col in df.columns if col not in [class_col, time_col]] | |
| # Handle missing values | |
| df = df.fillna(df.mean()) | |
| # Downsample the majority class to handle class imbalance | |
| df_majority = df[df[class_col] == 0] | |
| df_minority = df[df[class_col] == 1] | |
| df_majority_downsampled = resample(df_majority, replace=False, n_samples=len(df_minority)) | |
| df_balanced = pd.concat([df_majority_downsampled, df_minority]) | |
| # Feature scaling | |
| X = df_balanced[feature_cols] | |
| y = df_balanced[class_col] | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(X) | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) | |
| # Balancing the dataset using SMOTE | |
| smote = SMOTE() | |
| X_train, y_train = smote.fit_resample(X_train, y_train) | |
| # Logistic Regression Model | |
| model = LogisticRegression(max_iter=1000) | |
| model.fit(X_train, y_train) | |
| # Predictions | |
| y_pred = model.predict(X_test) | |
| # Model evaluation | |
| print("Accuracy:", accuracy_score(y_test, y_pred)) | |
| print("Classification Report:\n", classification_report(y_test, y_pred)) | |
| # Initialize the retrieval pipeline with a lightweight model (if required) | |
| retrieval_pipeline = pipeline("feature-extraction", model="distilbert-base-uncased") | |
| def retrieve_explanation(prediction): | |
| if prediction == 1: | |
| return "The transaction is classified as fraudulent based on the provided features." | |
| return "The transaction is classified as non-fraudulent based on the provided features." | |
| # Gradio prediction function | |
| def fraud_detection_predictor(V1, V2, V3, Amount): | |
| # Prepare input features | |
| input_features = [0] * len(feature_cols) | |
| input_features[feature_cols.index('V1')] = V1 | |
| input_features[feature_cols.index('V2')] = V2 | |
| input_features[feature_cols.index('V3')] = V3 | |
| input_features[feature_cols.index('Amount')] = Amount | |
| # Scale input data | |
| input_data = scaler.transform([input_features]) | |
| # Make a prediction | |
| prediction = model.predict(input_data)[0] | |
| fraud_status = "Fraudulent" if prediction == 1 else "Non-Fraudulent" | |
| explanation = retrieve_explanation(prediction) | |
| return fraud_status, explanation | |
| # Define Gradio Interface | |
| interface = gr.Interface( | |
| fn=fraud_detection_predictor, | |
| inputs=[ | |
| gr.Number(label="V1"), | |
| gr.Number(label="V2"), | |
| gr.Number(label="V3"), | |
| gr.Number(label="Amount") | |
| ], | |
| outputs=[ | |
| gr.Textbox(label="Fraud Status"), | |
| gr.Textbox(label="Explanation") | |
| ], | |
| title="Credit Card Fraud Detection", | |
| description="Enter transaction features (V1, V2, V3, Amount) to predict fraud status." | |
| ) | |
| # Launch Gradio Interface | |
| interface.launch() | |