import optuna import yaml import joblib import os import pandas as pd import matplotlib.pyplot as plt from sklearn.datasets import load_iris from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from optuna.visualization.matplotlib import plot_optimization_history, plot_param_importances os.makedirs("models", exist_ok=True) os.makedirs("plots", exist_ok=True) def objective(trial): n_estimators = trial.suggest_int("n_estimators", 50, 300) max_depth = trial.suggest_int("max_depth", 2, 32) data = load_iris() X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth) clf.fit(X_train, y_train) accuracy = accuracy_score(y_test, clf.predict(X_test)) return accuracy study = optuna.create_study(direction="maximize") study.optimize(objective, n_trials=30) # Save trial results study_df = study.trials_dataframe() study_df.to_csv("models/study_trials.csv", index=False) # Save best parameters with open("models/best_params.yaml", "w") as f: yaml.dump(study.best_trial.params, f) # Train final model with best parameters best_params = study.best_trial.params final_model = RandomForestClassifier(**best_params) data = load_iris() X_train, X_test, y_train, y_test = train_test_split(data.data, data.target) final_model.fit(X_train, y_train) # Save model joblib.dump(final_model, "models/best_model.pkl") # Save Optuna plots plot_optimization_history(study) plt.savefig("plots/optimization_history.png") plt.clf() plot_param_importances(study) plt.savefig("plots/param_importances.png")