Spaces:

saherPervaiz
/

ModelTrain

Running

App Files Files Community

saherPervaiz commited on Jan 12, 2025

Commit

ff625a6

verified ·

1 Parent(s): 35ca176

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -116

app.py DELETED Viewed

@@ -1,116 +0,0 @@
-import streamlit as st
-import pandas as pd
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import LabelEncoder
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LogisticRegression
-from sklearn.svm import SVC
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.naive_bayes import GaussianNB
-from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
-from tabulate import tabulate
-# File uploader
-st.title("Model Training with Metrics")
-uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
-if uploaded_file is not None:
-    df = pd.read_csv(uploaded_file)
-    # Show the dataset
-    st.write("Dataset:")
-    st.dataframe(df)
-    # Model Training Section
-    st.subheader("Model Training")
-    if df.empty:
-        st.warning("The dataset is empty. Please upload a valid CSV file.")
-    else:
-        target = st.selectbox("Select Target Variable", df.columns)
-        features = [col for col in df.columns if col != target]
-        X = df[features]
-        y = df[target]
-        # Determine if the target is continuous or categorical
-        is_classification = y.dtype == 'object' or len(y.unique()) <= 10  # If target is categorical or has few unique values, treat as classification
-        # Ensure there is enough data before proceeding with train-test split
-        if len(X) == 0 or len(y) == 0:
-            st.warning("Insufficient data. Please ensure there are valid feature and target columns.")
-        else:
-            # Split the data into training and test sets with customizable training size
-            train_size = st.slider("Select Training Size", min_value=0.1, max_value=0.9, value=0.8)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_size, random_state=42)
-            # List of classifiers to evaluate
-            classifiers = {
-                'Logistic Regression': LogisticRegression(max_iter=5000, solver='saga', penalty='l1'),
-                'Decision Tree': DecisionTreeClassifier(),
-                'Random Forest': RandomForestClassifier(),
-                'Support Vector Machine (SVM)': SVC(),
-                'K-Nearest Neighbors (k-NN)': KNeighborsClassifier(),
-                'Naive Bayes': GaussianNB()
-            }
-            # Initialize results storage
-            metrics = []
-            # Train and evaluate each model
-            for name, classifier in classifiers.items():
-                # Train the model
-                classifier.fit(X_train, y_train)
-                # Make predictions
-                y_pred = classifier.predict(X_test)
-                # Evaluate metrics
-                accuracy = accuracy_score(y_test, y_pred)
-                precision = precision_score(y_test, y_pred, zero_division=1, average='macro')
-                recall = recall_score(y_test, y_pred, zero_division=1, average='macro')
-                f1 = f1_score(y_test, y_pred, zero_division=1, average='macro')
-                metrics.append({
-                    'Model': name,
-                    'Accuracy': round(accuracy, 2),
-                    'Precision': round(precision, 2),
-                    'Recall': round(recall, 2),
-                    'F1-Score': round(f1, 2)
-                })
-            # Create a metrics DataFrame
-            metrics_df = pd.DataFrame(metrics)
-            # Add bold formatting to the headers for tabulate
-            bold_headers = [f"\033[1m{header}\033[0m" for header in metrics_df.columns]
-            # Format table with tabulate
-            table = tabulate(
-                metrics_df,
-                headers=bold_headers,
-                tablefmt="fancy_grid",
-                showindex=False,
-                numalign="center",
-                stralign="center"
-            )
-            # Display results in Streamlit
-            st.subheader("Model Performance Metrics")
-            st.markdown(f"**Model Performance Metrics**")
-            st.text(table)
-            # Option to download the model performance metrics (Results Table)
-            st.download_button(
-                label="Download Model Report",
-                data=metrics_df.to_csv(index=False),
-                file_name="model_report.csv",
-                mime="text/csv"
-            )
-            # Option to download the dataset
-            st.download_button(
-                label="Download Dataset",
-                data=df.to_csv(index=False),
-                file_name="dataset.csv",
-                mime="text/csv"
-            )