saherPervaiz commited on
Commit
ff625a6
·
verified ·
1 Parent(s): 35ca176

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -116
app.py DELETED
@@ -1,116 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- from sklearn.model_selection import train_test_split
4
- from sklearn.preprocessing import LabelEncoder
5
- from sklearn.ensemble import RandomForestClassifier
6
- from sklearn.linear_model import LogisticRegression
7
- from sklearn.svm import SVC
8
- from sklearn.neighbors import KNeighborsClassifier
9
- from sklearn.tree import DecisionTreeClassifier
10
- from sklearn.naive_bayes import GaussianNB
11
- from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
12
- from tabulate import tabulate
13
-
14
- # File uploader
15
- st.title("Model Training with Metrics")
16
- uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
17
-
18
- if uploaded_file is not None:
19
- df = pd.read_csv(uploaded_file)
20
-
21
- # Show the dataset
22
- st.write("Dataset:")
23
- st.dataframe(df)
24
-
25
- # Model Training Section
26
- st.subheader("Model Training")
27
- if df.empty:
28
- st.warning("The dataset is empty. Please upload a valid CSV file.")
29
- else:
30
- target = st.selectbox("Select Target Variable", df.columns)
31
- features = [col for col in df.columns if col != target]
32
- X = df[features]
33
- y = df[target]
34
-
35
- # Determine if the target is continuous or categorical
36
- is_classification = y.dtype == 'object' or len(y.unique()) <= 10 # If target is categorical or has few unique values, treat as classification
37
-
38
- # Ensure there is enough data before proceeding with train-test split
39
- if len(X) == 0 or len(y) == 0:
40
- st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
41
- else:
42
- # Split the data into training and test sets with customizable training size
43
- train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
44
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
45
-
46
- # List of classifiers to evaluate
47
- classifiers = {
48
- 'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
49
- 'Decision Tree': DecisionTreeClassifier(),
50
- 'Random Forest': RandomForestClassifier(),
51
- 'Support Vector Machine (SVM)': SVC(),
52
- 'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
53
- 'Naive Bayes': GaussianNB()
54
- }
55
-
56
- # Initialize results storage
57
- metrics = []
58
-
59
- # Train and evaluate each model
60
- for name, classifier in classifiers.items():
61
- # Train the model
62
- classifier.fit(X_train, y_train)
63
-
64
- # Make predictions
65
- y_pred = classifier.predict(X_test)
66
-
67
- # Evaluate metrics
68
- accuracy = accuracy_score(y_test, y_pred)
69
- precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
70
- recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
71
- f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
72
-
73
- metrics.append({
74
- 'Model': name,
75
- 'Accuracy': round(accuracy, 2),
76
- 'Precision': round(precision, 2),
77
- 'Recall': round(recall, 2),
78
- 'F1-Score': round(f1, 2)
79
- })
80
-
81
- # Create a metrics DataFrame
82
- metrics_df = pd.DataFrame(metrics)
83
-
84
- # Add bold formatting to the headers for tabulate
85
- bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
86
-
87
- # Format table with tabulate
88
- table = tabulate(
89
- metrics_df,
90
- headers=bold_headers,
91
- tablefmt="fancy_grid",
92
- showindex=False,
93
- numalign="center",
94
- stralign="center"
95
- )
96
-
97
- # Display results in Streamlit
98
- st.subheader("Model Performance Metrics")
99
- st.markdown(f"**Model Performance Metrics**")
100
- st.text(table)
101
-
102
- # Option to download the model performance metrics (Results Table)
103
- st.download_button(
104
- label="Download Model Report",
105
- data=metrics_df.to_csv(index=False),
106
- file_name="model_report.csv",
107
- mime="text/csv"
108
- )
109
-
110
- # Option to download the dataset
111
- st.download_button(
112
- label="Download Dataset",
113
- data=df.to_csv(index=False),
114
- file_name="dataset.csv",
115
- mime="text/csv"
116
- )