File size: 5,964 Bytes
f982348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d99ae65
f982348
 
 
 
 
d99ae65
f982348
 
 
 
 
 
 
d99ae65
f982348
 
d99ae65
f982348
 
 
 
 
d99ae65
f982348
d99ae65
f982348
 
d99ae65
f982348
 
d99ae65
f982348
 
 
 
 
 
 
 
 
 
 
 
 
d99ae65
f982348
d99ae65
f982348
 
 
d99ae65
 
f982348
d99ae65
f982348
 
d99ae65
f982348
 
 
 
 
d99ae65
f982348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d99ae65
f982348
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d99ae65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f982348
d99ae65
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import streamlit as st
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.datasets import make_classification
import joblib

# Generate sample data
def load_data():
    # Create a synthetic dataset for classification with 1000 samples and 20 features
    X, y = make_classification(n_samples=1000, n_features=20, random_state=42)
    return X, y

# Train models
def train_models(X_train, y_train):
    # Dictionary of models to train
    models = {
        'Logistic Regression': LogisticRegression(),
        'Random Forest': RandomForestClassifier(),
        'Gradient Boosting': GradientBoostingClassifier()
    }
    
    trained_models = {}
    # Train each model using the training data
    for name, model in models.items():
        model.fit(X_train, y_train)
        trained_models[name] = model  # Store trained models in a dictionary
    return trained_models

# Predict and evaluate
def evaluate_models(models, X_test, y_test):
    results = {}
    # Evaluate each model using the test data
    for name, model in models.items():
        y_pred = model.predict(X_test)  # Predict class labels
        y_prob = model.predict_proba(X_test)[:, 1]  # Probability estimates for ROC
        
        # Calculate accuracy and ROC AUC score
        accuracy = model.score(X_test, y_test)
        roc_auc = roc_auc_score(y_test, y_prob)
        # Compute confusion matrix and classification report
        conf_matrix = confusion_matrix(y_test, y_pred)
        class_report = classification_report(y_test, y_pred)
        
        results[name] = {
            'Accuracy': accuracy,
            'ROC AUC': roc_auc,
            'Confusion Matrix': conf_matrix,
            'Classification Report': class_report
        }
    return results

# Streamlit app
def main():
    st.title("Model Performance and Predictions")

    # Load and split data into training and test sets
    X, y = load_data()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)  # Scale training data
    X_test_scaled = scaler.transform(X_test)  # Scale test data

    # Train models using scaled training data
    models = train_models(X_train_scaled, y_train)
    
    # Sidebar for model selection
    st.sidebar.header("Model Selection")
    model_names = list(models.keys())
    selected_model_name = st.sidebar.selectbox("Select Model", model_names)
    selected_model = models[selected_model_name]

    # Evaluate selected model using test data
    results = evaluate_models(models, X_test_scaled, y_test)
    metrics = results[selected_model_name]

    st.header(f"Model: {selected_model_name}")
    
    st.subheader("Metrics")
    st.write(f"**Accuracy:** {metrics['Accuracy']:.4f}")
    st.write(f"**ROC AUC:** {metrics['ROC AUC']:.4f}")
    
    st.write("**Confusion Matrix:**")
    st.write(metrics['Confusion Matrix'])
    
    st.write("**Classification Report:**")
    st.text(metrics['Classification Report'])
    
    st.subheader("ROC Curve")
    plt.figure(figsize=(10, 7))
    y_prob = selected_model.predict_proba(X_test_scaled)[:, 1]
    fpr, tpr, _ = roc_curve(y_test, y_prob)
    plt.plot(fpr, tpr, label=f'{selected_model_name} (AUC = {metrics["ROC AUC"]:.2f})')
    plt.plot([0, 1], [0, 1], 'k--')  # Diagonal line for random guessing
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc='lower right')
    st.pyplot(plt)

    st.subheader("Feature Importance")
    if selected_model_name in ['Random Forest', 'Gradient Boosting']:
        feature_importances = selected_model.feature_importances_
        feature_names = [f'Feature {i}' for i in range(X_test_scaled.shape[1])]
        importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importances})
        importance_df = importance_df.sort_values(by='Importance', ascending=False)
        
        fig, ax = plt.subplots(figsize=(10, 7))
        sns.barplot(x='Importance', y='Feature', data=importance_df, ax=ax)
        ax.set_title(f'Feature Importance - {selected_model_name}')
        st.pyplot(fig)

    st.subheader("Make Predictions")
    input_data = st.text_input("Enter features separated by commas (e.g., 0.1, 0.2, ..., 0.5)")
    if input_data:
        try:
            # Convert input data to numpy array and reshape
            input_features = np.array([float(i) for i in input_data.split(',')]).reshape(1, -1)
            
            # Check if the number of features matches the model's input
            if input_features.shape[1] != X_train_scaled.shape[1]:
                st.error(f"Number of features should be {X_train_scaled.shape[1]}.")
            else:
                # Transform input features using the same scaler
                input_features_scaled = scaler.transform(input_features)
                
                # Predict using the selected model
                prediction = selected_model.predict(input_features_scaled)
                prediction_proba = selected_model.predict_proba(input_features_scaled)[:, 1]
                st.write(f"Prediction: {'Positive' if prediction[0] == 1 else 'Negative'}")
                st.write(f"Probability of Positive: {prediction_proba[0]:.4f}")
                
        except ValueError:
            st.error("Please enter valid numerical values separated by commas.")
        except Exception as e:
            st.error(f"An error occurred: {e}")

if __name__ == "__main__":
    main()