import pandas as pd from sklearn.model_selection import train_test_split, cross_val_score from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor from sklearn.linear_model import LogisticRegression, LinearRegression from sklearn.svm import SVC, SVR from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor from sklearn.naive_bayes import GaussianNB from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, mean_absolute_error, r2_score def train_all_models(X, y): """ Train all models and return a DataFrame with evaluation metrics. """ model_results = [] models = [ ("Logistic Regression", LogisticRegression(), 'classification'), ("Random Forest", RandomForestClassifier(), 'classification'), ("SVM", SVC(), 'classification'), ("KNN", KNeighborsClassifier(), 'classification'), ("Decision Tree", DecisionTreeClassifier(), 'classification'), ("Naive Bayes", GaussianNB(), 'classification'), ("Linear Regression", LinearRegression(), 'regression'), ("Random Forest Regressor", RandomForestRegressor(), 'regression'), ("SVR", SVR(), 'regression'), ("KNN Regressor", KNeighborsRegressor(), 'regression'), ("Decision Tree Regressor", DecisionTreeRegressor(), 'regression') ] for model_name, model, task_type in models: # Split data into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) # Train the model model.fit(X_train, y_train) # Make predictions y_pred = model.predict(X_test) # Evaluate based on task type if task_type == 'classification': metrics = { "Model": model_name, "Accuracy": accuracy_score(y_test, y_pred), "Precision": precision_score(y_test, y_pred, average='weighted', zero_division=0), "Recall": recall_score(y_test, y_pred, average='weighted', zero_division=0), "F1 Score": f1_score(y_test, y_pred, average='weighted', zero_division=0) } else: metrics = { "Model": model_name, "MSE": mean_squared_error(y_test, y_pred), "MAE": mean_absolute_error(y_test, y_pred), "R2 Score": r2_score(y_test, y_pred) } model_results.append(metrics) # Convert to DataFrame for display model_results_df = pd.DataFrame(model_results) return model_results_df