Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

edf0043

verified ·

1 Parent(s): 05285ce

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -56

app.py CHANGED Viewed

@@ -7,8 +7,11 @@ from sklearn.model_selection import train_test_split
 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.linear_model import LogisticRegression, LinearRegression
 from sklearn.svm import SVC, SVR
 from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
 from scipy import stats
@@ -57,23 +60,6 @@ if uploaded_file is not None:
     st.write("Cleaned Dataset:")
     st.dataframe(df_cleaned)
-    # Plot the correlation heatmap
-    st.subheader("Correlation Heatmap")
-    # Select only numeric columns for correlation matrix
-    correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
-    fig, ax = plt.subplots(figsize=(8, 6))  # Small graph
-    sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
-    st.pyplot(fig)
-    # Display Histograms of Numerical Columns
-    st.subheader("Histograms of Numerical Columns")
-    for column in df_cleaned.select_dtypes(include=['number']).columns:
-        fig, ax = plt.subplots(figsize=(5, 4))  # Small graph
-        df_cleaned[column].plot(kind="hist", bins=20, ax=ax, title=column)
-        st.pyplot(fig)
     # Model Training Section
     st.subheader("Model Training")
     if df_cleaned.empty:
@@ -95,35 +81,64 @@ if uploaded_file is not None:
             train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
-            # Model Selection
             if is_classification:
-                model_type = st.selectbox("Choose Classification Model", ["Random Forest", "Logistic Regression", "SVM"])
-                if model_type == "Random Forest":
-                    n_estimators = st.slider("Number of Trees", 10, 100, 50)
-                    model = RandomForestClassifier(n_estimators=n_estimators)
-                elif model_type == "Logistic Regression":
-                    model = LogisticRegression(max_iter=1000)
-                elif model_type == "SVM":
-                    model = SVC()
             else:
-                model_type = st.selectbox("Choose Regression Model", ["Random Forest", "Linear Regression", "SVR"])
-                if model_type == "Random Forest":
-                    n_estimators = st.slider("Number of Trees", 10, 100, 50)
-                    model = RandomForestRegressor(n_estimators=n_estimators)
-                elif model_type == "Linear Regression":
-                    model = LinearRegression()
-                elif model_type == "SVR":
-                    model = SVR()
-            # Train and Evaluate Model
-            model.fit(X_train, y_train)
-            y_pred = model.predict(X_test)
-            if is_classification:
-                st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
-                st.text(classification_report(y_test, y_pred))
-            else:
-                st.write(f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")
             # Option to download the cleaned dataset
             st.download_button(
@@ -133,25 +148,41 @@ if uploaded_file is not None:
                 mime="text/csv"
             )
-            # Option to download model performance metrics
             st.download_button(
                 label="Download Model Report",
-                data=classification_report(y_test, y_pred) if is_classification else f"Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}",
-                file_name="model_report.txt",
-                mime="text/plain"
             )
-            # Save and provide a download option for the model accuracy plot
-            fig, ax = plt.subplots(figsize=(6, 4))
-            sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
             st.pyplot(fig)
-            # Option to download the accuracy plot
-            fig.savefig("/tmp/model_accuracy.png")
-            with open("/tmp/model_accuracy.png", "rb") as f:
                 st.download_button(
-                    label="Download Accuracy Plot",
                     data=f,
-                    file_name="model_accuracy.png",
                     mime="image/png"
                 )

 from sklearn.impute import SimpleImputer
 from sklearn.preprocessing import LabelEncoder, StandardScaler
 from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+from sklearn.linear_model import LogisticRegression, LinearRegression, Ridge
 from sklearn.svm import SVC, SVR
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.naive_bayes import GaussianNB
 from sklearn.metrics import classification_report, accuracy_score, mean_squared_error
 from scipy import stats
     st.write("Cleaned Dataset:")
     st.dataframe(df_cleaned)
     # Model Training Section
     st.subheader("Model Training")
     if df_cleaned.empty:
             train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
+            # Store results in a dictionary
+            results = []
+            # Model Selection and Evaluation
+            models = []
             if is_classification:
+                model_choices = [
+                    ("Random Forest", RandomForestClassifier(n_estimators=50)),
+                    ("Logistic Regression", LogisticRegression(max_iter=1000)),
+                    ("SVM", SVC()),
+                    ("K-Nearest Neighbors", KNeighborsClassifier(n_neighbors=5)),
+                    ("Decision Tree", DecisionTreeClassifier()),
+                    ("Naive Bayes", GaussianNB())
+                ]
+                for name, model in model_choices:
+                    model.fit(X_train, y_train)
+                    y_pred = model.predict(X_test)
+                    accuracy = accuracy_score(y_test, y_pred)
+                    results.append([name, accuracy, None])
             else:
+                model_choices = [
+                    ("Random Forest", RandomForestRegressor(n_estimators=50)),
+                    ("Linear Regression", LinearRegression()),
+                    ("SVR", SVR()),
+                    ("K-Nearest Neighbors", KNeighborsRegressor(n_neighbors=5)),
+                    ("Decision Tree", DecisionTreeRegressor()),
+                    ("Ridge Regression", Ridge())
+                ]
+                for name, model in model_choices:
+                    model.fit(X_train, y_train)
+                    y_pred = model.predict(X_test)
+                    mse = mean_squared_error(y_test, y_pred)
+                    results.append([name, None, mse])
+            # Display results in a table
+            st.subheader("Model Performance Results")
+            results_df = pd.DataFrame(results, columns=["Model", "Accuracy" if is_classification else "Accuracy (N/A)", "Mean Squared Error" if not is_classification else "MSE (N/A)"])
+            # Bold the headers
+            st.markdown(f"**Model Performance Results**")
+            st.dataframe(results_df)
+            # Download Image for Model Accuracy Plot
+            fig, ax = plt.subplots(figsize=(6, 4))
+            sns.barplot(x=['Accuracy' if is_classification else 'MSE'], y=[accuracy_score(y_test, y_pred) if is_classification else mean_squared_error(y_test, y_pred)], ax=ax)
+            st.pyplot(fig)
+            # Save and provide download option
+            fig.savefig("/tmp/model_accuracy.png")
+            with open("/tmp/model_accuracy.png", "rb") as f:
+                st.download_button(
+                    label="Download Accuracy Plot",
+                    data=f,
+                    file_name="model_accuracy.png",
+                    mime="image/png"
+                )
             # Option to download the cleaned dataset
             st.download_button(
                 mime="text/csv"
             )
+            # Option to download model performance metrics (Results Table)
             st.download_button(
                 label="Download Model Report",
+                data=results_df.to_csv(index=False),
+                file_name="model_report.csv",
+                mime="text/csv"
             )
+            # Download correlation heatmap
+            st.subheader("Correlation Heatmap")
+            correlation_matrix = df_cleaned.select_dtypes(include=['number']).corr()
+            fig, ax = plt.subplots(figsize=(8, 6))
+            sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap="coolwarm", ax=ax)
             st.pyplot(fig)
+            fig.savefig("/tmp/correlation_heatmap.png")
+            with open("/tmp/correlation_heatmap.png", "rb") as f:
                 st.download_button(
+                    label="Download Correlation Heatmap",
                     data=f,
+                    file_name="correlation_heatmap.png",
                     mime="image/png"
                 )
+            # Display Histograms of Numerical Columns
+            st.subheader("Histograms of Numerical Columns")
+            for column in df_cleaned.select_dtypes(include=['number']).columns:
+                fig, ax = plt.subplots(figsize=(5, 4))  # Small graph
+                df_cleaned[column].plot(kind="hist", bins=20, ax=ax, title=column)
+                st.pyplot(fig)
+                # Save and provide download option for histogram
+                fig.savefig(f"/tmp/{column}_histogram.png")
+                with open(f"/tmp/{column}_histogram.png", "rb") as f:
+                    st.download_button(
+                        label=f"Download {column} Histogram",
+                        data=f,
+                        file_name=f"{column}_histogram.png",
+                        mime="image/png"
+                    )