saherPervaiz commited on
Commit
85d20ed
·
verified ·
1 Parent(s): 97a2e91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -100
app.py CHANGED
@@ -1,107 +1,63 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.model_selection import train_test_split
4
- from sklearn.preprocessing import LabelEncoder, StandardScaler
5
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
6
- from sklearn.linear_model import LogisticRegression, LinearRegression
7
- from sklearn.svm import SVC, SVR
8
- from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
9
- from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
10
- from sklearn.naive_bayes import GaussianNB
11
- from sklearn.metrics import accuracy_score, mean_squared_error
12
 
13
- # File uploader
14
- st.title("Model Training")
15
- uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
16
 
17
- if uploaded_file is not None:
18
- df = pd.read_csv(uploaded_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- # Show the dataset
21
- st.write("Dataset:")
22
- st.dataframe(df)
23
 
24
- # Model Training Section
25
- st.subheader("Model Training")
26
- if df.empty:
27
- st.warning("The dataset is empty. Please upload a valid CSV file.")
28
- else:
29
- target = st.selectbox("Select Target Variable", df.columns)
30
- features = [col for col in df.columns if col != target]
31
- X = df[features]
32
- y = df[target]
33
-
34
- # Determine if the target is continuous or categorical
35
- is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
36
-
37
- # Ensure there is enough data before proceeding with train-test split
38
- if len(X) == 0 or len(y) == 0:
39
- st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
40
- else:
41
- # Split the data into training and test sets with customizable training size
42
- train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
43
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
44
-
45
- # Store results in a dictionary
46
- results = []
47
 
48
- # Model Selection and Evaluation (For Classification)
49
- if is_classification:
50
- model_choices = [
51
- ("Random Forest", RandomForestClassifier(n_estimators=50)),
52
- ("Logistic Regression", LogisticRegression(max_iter=1000)),
53
- ("SVM", SVC()),
54
- ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
55
- ("Decision Tree", DecisionTreeClassifier()),
56
- ("Naive Bayes", GaussianNB())
57
- ]
58
-
59
- for name, model in model_choices:
60
- model.fit(X_train, y_train)
61
- y_pred = model.predict(X_test)
62
- accuracy = accuracy_score(y_test, y_pred)
63
- results.append([name, accuracy])
64
-
65
- # Display results in a table
66
- st.subheader("Model Performance Results")
67
- results_df = pd.DataFrame(results, columns=["Model", "Accuracy"])
68
- st.markdown(f"**Model Performance Results**")
69
- st.dataframe(results_df)
70
-
71
- # For Regression
72
- else:
73
- model_choices = [
74
- ("Random Forest", RandomForestRegressor(n_estimators=50)),
75
- ("Linear Regression", LinearRegression()),
76
- ("SVR", SVR()),
77
- ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
78
- ("Decision Tree", DecisionTreeRegressor()),
79
- ]
80
-
81
- for name, model in model_choices:
82
- model.fit(X_train, y_train)
83
- y_pred = model.predict(X_test)
84
- mse = mean_squared_error(y_test, y_pred)
85
- results.append([name, mse])
86
-
87
- # Display results in a table
88
- st.subheader("Model Performance Results")
89
- results_df = pd.DataFrame(results, columns=["Model", "Mean Squared Error"])
90
- st.markdown(f"**Model Performance Results**")
91
- st.dataframe(results_df)
92
 
93
- # Option to download the model performance metrics (Results Table)
94
- st.download_button(
95
- label="Download Model Report",
96
- data=results_df.to_csv(index=False),
97
- file_name="model_report.csv",
98
- mime="text/csv"
99
- )
 
 
100
 
101
- # Option to download the dataset
102
- st.download_button(
103
- label="Download Dataset",
104
- data=df.to_csv(index=False),
105
- file_name="dataset.csv",
106
- mime="text/csv"
107
- )
 
1
+ from tabulate import tabulate
2
+ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
 
 
 
 
 
 
 
 
 
3
 
4
+ # Split the data into training and testing sets
5
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
 
6
 
7
+ # List of classifiers to evaluate
8
+ classifiers = {
9
+ 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
10
+ 'Decision Tree': DecisionTreeClassifier(),
11
+ 'Random Forest': RandomForestClassifier(),
12
+ 'Support Vector Machine (SVM)': SVC(),
13
+ 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
14
+ 'Naive Bayes': GaussianNB()
15
+ }
16
+
17
+ # Initialize results storage
18
+ predictions = pd.DataFrame()
19
+ metrics = []
20
+
21
+ # Train and evaluate each model
22
+ for name, classifier in classifiers.items():
23
+ # Train the model
24
+ classifier.fit(X_train, y_train)
25
 
26
+ # Make predictions
27
+ y_pred = classifier.predict(X_test)
28
+ predictions[name] = y_pred # Store predictions
29
 
30
+ # Evaluate metrics
31
+ accuracy = accuracy_score(y_test, y_pred)
32
+ precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
33
+ recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
34
+ f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
35
+
36
+ metrics.append({
37
+ 'Model': name,
38
+ 'Accuracy': round(accuracy, 2),
39
+ 'Precision': round(precision, 2),
40
+ 'Recall': round(recall, 2),
41
+ 'F1-Score': round(f1, 2)
42
+ })
43
+
44
+ # Create a metrics DataFrame
45
+ metrics_df = pd.DataFrame(metrics)
 
 
 
 
 
 
 
46
 
47
+ # Add bold formatting to the headers
48
+ bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ # Format table with tabulate
51
+ table = tabulate(
52
+ metrics_df,
53
+ headers=bold_headers,
54
+ tablefmt="fancy_grid",
55
+ showindex=False,
56
+ numalign="center",
57
+ stralign="center"
58
+ )
59
 
60
+ # Add spacing for a larger table
61
+ print(f"\033[1m{'Model Performance Metrics'.center(80)}\033[0m") # Bold title
62
+ print(table.center(120)) # Center align the table for larger width
63
+ print("\n" + "=" * 80)