saherPervaiz commited on
Commit
0665d1a
·
verified ·
1 Parent(s): ff625a6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sklearn.svm import SVC
8
+ from sklearn.neighbors import KNeighborsClassifier
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.naive_bayes import GaussianNB
11
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
12
+ from tabulate import tabulate
13
+
14
+ # File uploader
15
+ st.title("Model Training with Metrics")
16
+ uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
17
+
18
+ if uploaded_file is not None:
19
+ df = pd.read_csv(uploaded_file)
20
+
21
+ # Show the dataset
22
+ st.write("Dataset:")
23
+ st.dataframe(df)
24
+
25
+ # Model Training Section
26
+ st.subheader("Model Training")
27
+ if df.empty:
28
+ st.warning("The dataset is empty. Please upload a valid CSV file.")
29
+ else:
30
+ target = st.selectbox("Select Target Variable", df.columns)
31
+ features = [col for col in df.columns if col != target]
32
+ X = df[features]
33
+ y = df[target]
34
+
35
+ # Determine if the target is continuous or categorical
36
+ is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
37
+
38
+ # Ensure there is enough data before proceeding with train-test split
39
+ if len(X) == 0 or len(y) == 0:
40
+ st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
41
+ else:
42
+ # Split the data into training and test sets with customizable training size
43
+ train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
44
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
45
+
46
+ # List of classifiers to evaluate
47
+ classifiers = {
48
+ 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
49
+ 'Decision Tree': DecisionTreeClassifier(),
50
+ 'Random Forest': RandomForestClassifier(),
51
+ 'Support Vector Machine (SVM)': SVC(),
52
+ 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
53
+ 'Naive Bayes': GaussianNB()
54
+ }
55
+
56
+ # Initialize results storage
57
+ metrics = []
58
+
59
+ # Train and evaluate each model
60
+ for name, classifier in classifiers.items():
61
+ # Train the model
62
+ classifier.fit(X_train, y_train)
63
+
64
+ # Make predictions
65
+ y_pred = classifier.predict(X_test)
66
+
67
+ # Evaluate metrics
68
+ accuracy = accuracy_score(y_test, y_pred)
69
+ precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
70
+ recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
71
+ f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
72
+
73
+ metrics.append({
74
+ 'Model': name,
75
+ 'Accuracy': round(accuracy, 2),
76
+ 'Precision': round(precision, 2),
77
+ 'Recall': round(recall, 2),
78
+ 'F1-Score': round(f1, 2)
79
+ })
80
+
81
+ # Create a metrics DataFrame
82
+ metrics_df = pd.DataFrame(metrics)
83
+
84
+ # Add bold formatting to the headers for tabulate
85
+ bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
86
+
87
+ # Format table with tabulate
88
+ table = tabulate(
89
+ metrics_df,
90
+ headers=bold_headers,
91
+ tablefmt="fancy_grid",
92
+ showindex=False,
93
+ numalign="center",
94
+ stralign="center"
95
+ )
96
+
97
+ # Display results in Streamlit
98
+ st.subheader("Model Performance Metrics")
99
+ st.markdown(f"**Model Performance Metrics**")
100
+ st.text(table)
101
+
102
+ # Option to download the model performance metrics (Results Table)
103
+ st.download_button(
104
+ label="Download Model Report",
105
+ data=metrics_df.to_csv(index=False),
106
+ file_name="model_report.csv",
107
+ mime="text/csv"
108
+ )
109
+
110
+ # Option to download the dataset
111
+ st.download_button(
112
+ label="Download Dataset",
113
+ data=df.to_csv(index=False),
114
+ file_name="dataset.csv",
115
+ mime="text/csv"
116
+ )