import streamlit as st import pandas as pd from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score # Function to build and evaluate models def build_and_evaluate_model(model, X_train, X_test, y_train, y_test): try: model.fit(X_train, y_train) y_pred = model.predict(X_test) # Convert y_pred to the same data type as y_test for comparison y_pred = y_pred.astype(str) #Used in data manipulation accuracy = accuracy_score(y_test.astype(str), y_pred) return accuracy except Exception as e: return f"Not Applicable: {str(e)}" # Streamlit app def main(): # Custom CSS for styling the title custom_css = """ """ # Streamlit app st.markdown(custom_css, unsafe_allow_html=True) # Title and tagline st.markdown("
ModelMetrics Hub
", unsafe_allow_html=True) st.markdown("Optimal Model Explorer
", unsafe_allow_html=True) #st.title("ModelMetrics Hub \n _Optimal Model Explorer_") st.sidebar.title("Model Selection") model_names = ["Random Forest", "Decision Tree", "KNN", "SVM", "Logistic Regression"] selected_models = st.sidebar.multiselect("Choose models to compare", model_names, default=model_names) uploaded_file = st.file_uploader("Upload a CSV file", type=["csv"]) if uploaded_file is not None: data = pd.read_csv(uploaded_file, encoding='latin-1') # Specify encoding here st.write("Preview of the dataset:") st.write(data.head()) feature_columns = st.multiselect("Select feature columns", data.columns.tolist()) target_column = st.selectbox("Select target column", data.columns.tolist()) if st.button("Run Models"): X = data[feature_columns] y = data[target_column] y = y.astype(str) # Perform one-hot encoding for categorical columns categorical_cols = X.select_dtypes(include=['object']).columns.tolist() X = pd.get_dummies(X, columns=categorical_cols, drop_first=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) models = { "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42), "Decision Tree": DecisionTreeClassifier(random_state=42), "KNN": KNeighborsClassifier(n_neighbors=5), "SVM": SVC(kernel='rbf', random_state=42), "Logistic Regression": LogisticRegression(max_iter=1000, random_state=42) } results = {} for model_name in selected_models: accuracy = build_and_evaluate_model(models[model_name], X_train, X_test, y_train, y_test) results[model_name] = accuracy sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True) #key=lambda x: is a custom sorting order st.subheader("Accuracy") for model_name, accuracy in sorted_results: st.write(f"{model_name}: {accuracy * 100:.2f}%") if __name__ == "__main__": main()