Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

05285ce

verified ·

1 Parent(s): 581a17c

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -16

app.py CHANGED Viewed

@@ -6,10 +6,10 @@ import matplotlib.pyplot as plt
 from sklearn.model_selection import train_test_split
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.svm import SVC
-from sklearn.metrics import classification_report, accuracy_score
 from scipy import stats
 # File uploader
@@ -42,6 +42,9 @@ if uploaded_file is not None:
         scaler = StandardScaler()
         df[df.select_dtypes(include=['number']).columns] = scaler.fit_transform(df.select_dtypes(include=['number']))
         # Ensure that all columns are numeric before using in models
         for column in df.select_dtypes(include=['object']).columns:
             df[column] = pd.to_numeric(df[column], errors='coerce')
@@ -81,6 +84,9 @@ if uploaded_file is not None:
         X = df_cleaned[features]
         y = df_cleaned[target]
         # Ensure there is enough data before proceeding with train-test split
         if len(X) == 0 or len(y) == 0:
             st.warning("Insufficient data after cleaning. Please adjust the cleaning parameters.")
@@ -90,20 +96,34 @@ if uploaded_file is not None:
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
             # Model Selection
-            model_type = st.selectbox("Choose Model", ["Random Forest", "Logistic Regression", "SVM"])
-            if model_type == "Random Forest":
-                n_estimators = st.slider("Number of Trees", 10, 100, 50)
-                model = RandomForestClassifier(n_estimators=n_estimators)
-            elif model_type == "Logistic Regression":
-                model = LogisticRegression(max_iter=1000)
-            elif model_type == "SVM":
-                model = SVC()
             # Train and Evaluate Model
             model.fit(X_train, y_train)
             y_pred = model.predict(X_test)
-            st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
-            st.text(classification_report(y_test, y_pred))
             # Option to download the cleaned dataset
             st.download_button(
@@ -116,14 +136,14 @@ if uploaded_file is not None:
             # Option to download model performance metrics
             st.download_button(
                 label="Download Model Report",
-                data=classification_report(y_test, y_pred),
                 file_name="model_report.txt",
                 mime="text/plain"
             )
             # Save and provide a download option for the model accuracy plot
             fig, ax = plt.subplots(figsize=(6, 4))
-            sns.barplot(x=['Accuracy'], y=[accuracy_score(y_test, y_pred)], ax=ax)
             st.pyplot(fig)
             # Option to download the accuracy plot

 from sklearn.model_selection import train_test_split
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.svm import SVC, SVR
+from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
 from scipy import stats
 # File uploader
         scaler = StandardScaler()
         df[df.select_dtypes(include=['number']).columns] = scaler.fit_transform(df.select_dtypes(include=['number']))
+        # Drop rows with any null values
+        df = df.dropna()
         # Ensure that all columns are numeric before using in models
         for column in df.select_dtypes(include=['object']).columns:
             df[column] = pd.to_numeric(df[column], errors='coerce')
         X = df_cleaned[features]
         y = df_cleaned[target]
+        # Determine if the target is continuous or categorical
+        is_classification = y.nunique() <= 10  # If target has fewer than or equal to 10 unique values, treat as classification
         # Ensure there is enough data before proceeding with train-test split
         if len(X) == 0 or len(y) == 0:
             st.warning("Insufficient data after cleaning. Please adjust the cleaning parameters.")
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
             # Model Selection
+            if is_classification:
+                model_type = st.selectbox("Choose Classification Model", ["Random Forest", "Logistic Regression", "SVM"])
+                if model_type == "Random Forest":
+                    n_estimators = st.slider("Number of Trees", 10, 100, 50)
+                    model = RandomForestClassifier(n_estimators=n_estimators)
+                elif model_type == "Logistic Regression":
+                    model = LogisticRegression(max_iter=1000)
+                elif model_type == "SVM":
+                    model = SVC()
+            else:
+                model_type = st.selectbox("Choose Regression Model", ["Random Forest", "Linear Regression", "SVR"])
+                if model_type == "Random Forest":
+                    n_estimators = st.slider("Number of Trees", 10, 100, 50)
+                    model = RandomForestRegressor(n_estimators=n_estimators)
+                elif model_type == "Linear Regression":
+                    model = LinearRegression()
+                elif model_type == "SVR":
+                    model = SVR()
             # Train and Evaluate Model
             model.fit(X_train, y_train)
             y_pred = model.predict(X_test)
+            if is_classification:
+                st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
+                st.text(classification_report(y_test, y_pred))
+            else:
+                st.write(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
             # Option to download the cleaned dataset
             st.download_button(
             # Option to download model performance metrics
             st.download_button(
                 label="Download Model Report",
+                data=classification_report(y_test, y_pred) if is_classification else f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}",
                 file_name="model_report.txt",
                 mime="text/plain"
             )
             # Save and provide a download option for the model accuracy plot
             fig, ax = plt.subplots(figsize=(6, 4))
+            sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
             st.pyplot(fig)
             # Option to download the accuracy plot