saherPervaiz commited on
Commit
05285ce
·
verified ·
1 Parent(s): 581a17c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -16
app.py CHANGED
@@ -6,10 +6,10 @@ import matplotlib.pyplot as plt
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
- from sklearn.ensemble import RandomForestClassifier
10
- from sklearn.linear_model import LogisticRegression
11
- from sklearn.svm import SVC
12
- from sklearn.metrics import classification_report, accuracy_score
13
  from scipy import stats
14
 
15
  # File uploader
@@ -42,6 +42,9 @@ if uploaded_file is not None:
42
  scaler = StandardScaler()
43
  df[df.select_dtypes(include=['number']).columns] = scaler.fit_transform(df.select_dtypes(include=['number']))
44
 
 
 
 
45
  # Ensure that all columns are numeric before using in models
46
  for column in df.select_dtypes(include=['object']).columns:
47
  df[column] = pd.to_numeric(df[column], errors='coerce')
@@ -81,6 +84,9 @@ if uploaded_file is not None:
81
  X = df_cleaned[features]
82
  y = df_cleaned[target]
83
 
 
 
 
84
  # Ensure there is enough data before proceeding with train-test split
85
  if len(X) == 0 or len(y) == 0:
86
  st.warning("Insufficient data after cleaning. Please adjust the cleaning parameters.")
@@ -90,20 +96,34 @@ if uploaded_file is not None:
90
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
91
 
92
  # Model Selection
93
- model_type = st.selectbox("Choose Model", ["Random Forest", "Logistic Regression", "SVM"])
94
- if model_type == "Random Forest":
95
- n_estimators = st.slider("Number of Trees", 10, 100, 50)
96
- model = RandomForestClassifier(n_estimators=n_estimators)
97
- elif model_type == "Logistic Regression":
98
- model = LogisticRegression(max_iter=1000)
99
- elif model_type == "SVM":
100
- model = SVC()
 
 
 
 
 
 
 
 
 
 
101
 
102
  # Train and Evaluate Model
103
  model.fit(X_train, y_train)
104
  y_pred = model.predict(X_test)
105
- st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
106
- st.text(classification_report(y_test, y_pred))
 
 
 
 
107
 
108
  # Option to download the cleaned dataset
109
  st.download_button(
@@ -116,14 +136,14 @@ if uploaded_file is not None:
116
  # Option to download model performance metrics
117
  st.download_button(
118
  label="Download Model Report",
119
- data=classification_report(y_test, y_pred),
120
  file_name="model_report.txt",
121
  mime="text/plain"
122
  )
123
 
124
  # Save and provide a download option for the model accuracy plot
125
  fig, ax = plt.subplots(figsize=(6, 4))
126
- sns.barplot(x=['Accuracy'], y=[accuracy_score(y_test, y_pred)], ax=ax)
127
  st.pyplot(fig)
128
 
129
  # Option to download the accuracy plot
 
6
  from sklearn.model_selection import train_test_split
7
  from sklearn.impute import SimpleImputer
8
  from sklearn.preprocessing import LabelEncoder, StandardScaler
9
+ from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
10
+ from sklearn.linear_model import LogisticRegression, LinearRegression
11
+ from sklearn.svm import SVC, SVR
12
+ from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
13
  from scipy import stats
14
 
15
  # File uploader
 
42
  scaler = StandardScaler()
43
  df[df.select_dtypes(include=['number']).columns] = scaler.fit_transform(df.select_dtypes(include=['number']))
44
 
45
+ # Drop rows with any null values
46
+ df = df.dropna()
47
+
48
  # Ensure that all columns are numeric before using in models
49
  for column in df.select_dtypes(include=['object']).columns:
50
  df[column] = pd.to_numeric(df[column], errors='coerce')
 
84
  X = df_cleaned[features]
85
  y = df_cleaned[target]
86
 
87
+ # Determine if the target is continuous or categorical
88
+ is_classification = y.nunique() <= 10 # If target has fewer than or equal to 10 unique values, treat as classification
89
+
90
  # Ensure there is enough data before proceeding with train-test split
91
  if len(X) == 0 or len(y) == 0:
92
  st.warning("Insufficient data after cleaning. Please adjust the cleaning parameters.")
 
96
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
97
 
98
  # Model Selection
99
+ if is_classification:
100
+ model_type = st.selectbox("Choose Classification Model", ["Random Forest", "Logistic Regression", "SVM"])
101
+ if model_type == "Random Forest":
102
+ n_estimators = st.slider("Number of Trees", 10, 100, 50)
103
+ model = RandomForestClassifier(n_estimators=n_estimators)
104
+ elif model_type == "Logistic Regression":
105
+ model = LogisticRegression(max_iter=1000)
106
+ elif model_type == "SVM":
107
+ model = SVC()
108
+ else:
109
+ model_type = st.selectbox("Choose Regression Model", ["Random Forest", "Linear Regression", "SVR"])
110
+ if model_type == "Random Forest":
111
+ n_estimators = st.slider("Number of Trees", 10, 100, 50)
112
+ model = RandomForestRegressor(n_estimators=n_estimators)
113
+ elif model_type == "Linear Regression":
114
+ model = LinearRegression()
115
+ elif model_type == "SVR":
116
+ model = SVR()
117
 
118
  # Train and Evaluate Model
119
  model.fit(X_train, y_train)
120
  y_pred = model.predict(X_test)
121
+
122
+ if is_classification:
123
+ st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
124
+ st.text(classification_report(y_test, y_pred))
125
+ else:
126
+ st.write(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
127
 
128
  # Option to download the cleaned dataset
129
  st.download_button(
 
136
  # Option to download model performance metrics
137
  st.download_button(
138
  label="Download Model Report",
139
+ data=classification_report(y_test, y_pred) if is_classification else f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}",
140
  file_name="model_report.txt",
141
  mime="text/plain"
142
  )
143
 
144
  # Save and provide a download option for the model accuracy plot
145
  fig, ax = plt.subplots(figsize=(6, 4))
146
+ sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
147
  st.pyplot(fig)
148
 
149
  # Option to download the accuracy plot