Spaces:

marianeft
/

Fraud_Detection

Sleeping

App Files Files Community

marianeft commited on Mar 4, 2025

Commit

d99ae65

verified ·

1 Parent(s): 8bc7c7b

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -36

app.py CHANGED Viewed

@@ -13,11 +13,13 @@ import joblib
 # Generate sample data
 def load_data():
     X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
     return X, y
 # Train models
 def train_models(X_train, y_train):
     models = {
         'Logistic Regression': LogisticRegression(),
         'Random Forest': RandomForestClassifier(),
@@ -25,20 +27,24 @@ def train_models(X_train, y_train):
     }
     trained_models = {}
     for name, model in models.items():
         model.fit(X_train, y_train)
-        trained_models[name] = model
     return trained_models
 # Predict and evaluate
 def evaluate_models(models, X_test, y_test):
     results = {}
     for name, model in models.items():
-        y_pred = model.predict(X_test)
         y_prob = model.predict_proba(X_test)[:, 1]  # Probability estimates for ROC
         accuracy = model.score(X_test, y_test)
         roc_auc = roc_auc_score(y_test, y_prob)
         conf_matrix = confusion_matrix(y_test, y_pred)
         class_report = classification_report(y_test, y_pred)
@@ -52,50 +58,25 @@ def evaluate_models(models, X_test, y_test):
 # Streamlit app
 def main():
-st.title("Model Performance and Predictions")
-st.subheader("Make Predictions")
-input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
-if input_data:
-    try:
-        # Convert input data to numpy array and reshape
-        input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
-        # Check if the number of features matches the model's input
-        if input_features.shape[1] != X_train_scaled.shape[1]:
-            st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
-        else:
-            # Transform input features using the same scaler
-            input_features_scaled = scaler.transform(input_features)
-            # Predict using the selected model
-            prediction = selected_model.predict(input_features_scaled)
-            prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
-            st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
-            st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
-    except ValueError:
-        st.error("Please enter valid numerical values separated by commas.")
-    except Exception as e:
-        st.error(f"An error occurred: {e}")
-    # Load and split data
     X, y = load_data()
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     scaler = StandardScaler()
-    X_train_scaled = scaler.fit_transform(X_train)
-    X_test_scaled = scaler.transform(X_test)
-    # Train models
     models = train_models(X_train_scaled, y_train)
-    # Model selection
     st.sidebar.header("Model Selection")
     model_names = list(models.keys())
     selected_model_name = st.sidebar.selectbox("Select Model", model_names)
     selected_model = models[selected_model_name]
-    # Evaluate selected model
     results = evaluate_models(models, X_test_scaled, y_test)
     metrics = results[selected_model_name]
@@ -116,7 +97,7 @@ if input_data:
     y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
     fpr, tpr, _ = roc_curve(y_test, y_prob)
     plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
-    plt.plot([0, 1], [0, 1], 'k--')
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver Operating Characteristic (ROC) Curve')
@@ -135,5 +116,30 @@ if input_data:
         ax.set_title(f'Feature Importance - {selected_model_name}')
         st.pyplot(fig)
 if __name__ == "__main__":
-    main()

 # Generate sample data
 def load_data():
+    # Create a synthetic dataset for classification with 1000 samples and 20 features
     X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
     return X, y
 # Train models
 def train_models(X_train, y_train):
+    # Dictionary of models to train
     models = {
         'Logistic Regression': LogisticRegression(),
         'Random Forest': RandomForestClassifier(),
     }
     trained_models = {}
+    # Train each model using the training data
     for name, model in models.items():
         model.fit(X_train, y_train)
+        trained_models[name] = model  # Store trained models in a dictionary
     return trained_models
 # Predict and evaluate
 def evaluate_models(models, X_test, y_test):
     results = {}
+    # Evaluate each model using the test data
     for name, model in models.items():
+        y_pred = model.predict(X_test)  # Predict class labels
         y_prob = model.predict_proba(X_test)[:, 1]  # Probability estimates for ROC
+        # Calculate accuracy and ROC AUC score
         accuracy = model.score(X_test, y_test)
         roc_auc = roc_auc_score(y_test, y_prob)
+        # Compute confusion matrix and classification report
         conf_matrix = confusion_matrix(y_test, y_pred)
         class_report = classification_report(y_test, y_pred)
 # Streamlit app
 def main():
+    st.title("Model Performance and Predictions")
+    # Load and split data into training and test sets
     X, y = load_data()
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     scaler = StandardScaler()
+    X_train_scaled = scaler.fit_transform(X_train)  # Scale training data
+    X_test_scaled = scaler.transform(X_test)  # Scale test data
+    # Train models using scaled training data
     models = train_models(X_train_scaled, y_train)
+    # Sidebar for model selection
     st.sidebar.header("Model Selection")
     model_names = list(models.keys())
     selected_model_name = st.sidebar.selectbox("Select Model", model_names)
     selected_model = models[selected_model_name]
+    # Evaluate selected model using test data
     results = evaluate_models(models, X_test_scaled, y_test)
     metrics = results[selected_model_name]
     y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
     fpr, tpr, _ = roc_curve(y_test, y_prob)
     plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
+    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random guessing
     plt.xlabel('False Positive Rate')
     plt.ylabel('True Positive Rate')
     plt.title('Receiver Operating Characteristic (ROC) Curve')
         ax.set_title(f'Feature Importance - {selected_model_name}')
         st.pyplot(fig)
+    st.subheader("Make Predictions")
+    input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
+    if input_data:
+        try:
+            # Convert input data to numpy array and reshape
+            input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
+            # Check if the number of features matches the model's input
+            if input_features.shape[1] != X_train_scaled.shape[1]:
+                st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
+            else:
+                # Transform input features using the same scaler
+                input_features_scaled = scaler.transform(input_features)
+                # Predict using the selected model
+                prediction = selected_model.predict(input_features_scaled)
+                prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
+                st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
+                st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
+        except ValueError:
+            st.error("Please enter valid numerical values separated by commas.")
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
 if __name__ == "__main__":
+    main()