saherPervaiz commited on
Commit
35ca176
·
verified ·
1 Parent(s): 85d20ed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -59
app.py CHANGED
@@ -1,63 +1,116 @@
1
- from tabulate import tabulate
 
 
 
 
 
 
 
 
 
2
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
3
 
4
- # Split the data into training and testing sets
5
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
6
-
7
- # List of classifiers to evaluate
8
- classifiers = {
9
- 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
10
- 'Decision Tree': DecisionTreeClassifier(),
11
- 'Random Forest': RandomForestClassifier(),
12
- 'Support Vector Machine (SVM)': SVC(),
13
- 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
14
- 'Naive Bayes': GaussianNB()
15
- }
16
-
17
- # Initialize results storage
18
- predictions = pd.DataFrame()
19
- metrics = []
20
-
21
- # Train and evaluate each model
22
- for name, classifier in classifiers.items():
23
- # Train the model
24
- classifier.fit(X_train, y_train)
25
-
26
- # Make predictions
27
- y_pred = classifier.predict(X_test)
28
- predictions[name] = y_pred # Store predictions
29
 
30
- # Evaluate metrics
31
- accuracy = accuracy_score(y_test, y_pred)
32
- precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
33
- recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
34
- f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
35
 
36
- metrics.append({
37
- 'Model': name,
38
- 'Accuracy': round(accuracy, 2),
39
- 'Precision': round(precision, 2),
40
- 'Recall': round(recall, 2),
41
- 'F1-Score': round(f1, 2)
42
- })
43
-
44
- # Create a metrics DataFrame
45
- metrics_df = pd.DataFrame(metrics)
46
-
47
- # Add bold formatting to the headers
48
- bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
49
-
50
- # Format table with tabulate
51
- table = tabulate(
52
- metrics_df,
53
- headers=bold_headers,
54
- tablefmt="fancy_grid",
55
- showindex=False,
56
- numalign="center",
57
- stralign="center"
58
- )
59
-
60
- # Add spacing for a larger table
61
- print(f"\033[1m{'Model Performance Metrics'.center(80)}\033[0m") # Bold title
62
- print(table.center(120)) # Center align the table for larger width
63
- print("\n" + "=" * 80)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder
5
+ from sklearn.ensemble import RandomForestClassifier
6
+ from sklearn.linear_model import LogisticRegression
7
+ from sklearn.svm import SVC
8
+ from sklearn.neighbors import KNeighborsClassifier
9
+ from sklearn.tree import DecisionTreeClassifier
10
+ from sklearn.naive_bayes import GaussianNB
11
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
12
+ from tabulate import tabulate
13
 
14
+ # File uploader
15
+ st.title("Model Training with Metrics")
16
+ uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
17
+
18
+ if uploaded_file is not None:
19
+ df = pd.read_csv(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ # Show the dataset
22
+ st.write("Dataset:")
23
+ st.dataframe(df)
 
 
24
 
25
+ # Model Training Section
26
+ st.subheader("Model Training")
27
+ if df.empty:
28
+ st.warning("The dataset is empty. Please upload a valid CSV file.")
29
+ else:
30
+ target = st.selectbox("Select Target Variable", df.columns)
31
+ features = [col for col in df.columns if col != target]
32
+ X = df[features]
33
+ y = df[target]
34
+
35
+ # Determine if the target is continuous or categorical
36
+ is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
37
+
38
+ # Ensure there is enough data before proceeding with train-test split
39
+ if len(X) == 0 or len(y) == 0:
40
+ st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
41
+ else:
42
+ # Split the data into training and test sets with customizable training size
43
+ train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
44
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
45
+
46
+ # List of classifiers to evaluate
47
+ classifiers = {
48
+ 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
49
+ 'Decision Tree': DecisionTreeClassifier(),
50
+ 'Random Forest': RandomForestClassifier(),
51
+ 'Support Vector Machine (SVM)': SVC(),
52
+ 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
53
+ 'Naive Bayes': GaussianNB()
54
+ }
55
+
56
+ # Initialize results storage
57
+ metrics = []
58
+
59
+ # Train and evaluate each model
60
+ for name, classifier in classifiers.items():
61
+ # Train the model
62
+ classifier.fit(X_train, y_train)
63
+
64
+ # Make predictions
65
+ y_pred = classifier.predict(X_test)
66
+
67
+ # Evaluate metrics
68
+ accuracy = accuracy_score(y_test, y_pred)
69
+ precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
70
+ recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
71
+ f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
72
+
73
+ metrics.append({
74
+ 'Model': name,
75
+ 'Accuracy': round(accuracy, 2),
76
+ 'Precision': round(precision, 2),
77
+ 'Recall': round(recall, 2),
78
+ 'F1-Score': round(f1, 2)
79
+ })
80
+
81
+ # Create a metrics DataFrame
82
+ metrics_df = pd.DataFrame(metrics)
83
+
84
+ # Add bold formatting to the headers for tabulate
85
+ bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
86
+
87
+ # Format table with tabulate
88
+ table = tabulate(
89
+ metrics_df,
90
+ headers=bold_headers,
91
+ tablefmt="fancy_grid",
92
+ showindex=False,
93
+ numalign="center",
94
+ stralign="center"
95
+ )
96
+
97
+ # Display results in Streamlit
98
+ st.subheader("Model Performance Metrics")
99
+ st.markdown(f"**Model Performance Metrics**")
100
+ st.text(table)
101
+
102
+ # Option to download the model performance metrics (Results Table)
103
+ st.download_button(
104
+ label="Download Model Report",
105
+ data=metrics_df.to_csv(index=False),
106
+ file_name="model_report.csv",
107
+ mime="text/csv"
108
+ )
109
+
110
+ # Option to download the dataset
111
+ st.download_button(
112
+ label="Download Dataset",
113
+ data=df.to_csv(index=False),
114
+ file_name="dataset.csv",
115
+ mime="text/csv"
116
+ )