# scripts/export_mlflow_results.py import os import mlflow import pandas as pd OUT_DIR = "results" os.makedirs(OUT_DIR, exist_ok=True) # Change if your tracking URI is different mlflow.set_tracking_uri("file:./mlruns") experiments = mlflow.search_experiments() rows = [] for exp in experiments: runs = mlflow.search_runs( experiment_ids=[exp.experiment_id], output_format="pandas" ) if runs.empty: continue runs["experiment_name"] = exp.name rows.append(runs) if not rows: print("No runs found. Run train_eval.py first.") exit(0) all_runs = pd.concat(rows, ignore_index=True) # Keep useful columns only cols = [ "experiment_name", "run_name", "params.target", "params.knn_k", "metrics.mae", "metrics.pcc", "metrics.n_test_observed", "metrics.n_test_total", "metrics.mae_mean", "metrics.pcc_mean", "metrics.mae_overall_mean", "metrics.pcc_overall_mean", ] cols = [c for c in cols if c in all_runs.columns] out = all_runs[cols] export_path = os.path.join(OUT_DIR, "mlflow_cv_results_export.csv") out.to_csv(export_path, index=False) print(out) print(f"\nSaved to {export_path}") # ============================================================ # Compare two experiment tags: NewData vs OldData # ============================================================ TAG_A = "OldData" TAG_B = "NewData" compare_df = all_runs.copy() # MLflow stores run name here in most versions if "tags.mlflow.runName" in compare_df.columns: compare_df["run_name_full"] = compare_df["tags.mlflow.runName"] elif "run_name" in compare_df.columns: compare_df["run_name_full"] = compare_df["run_name"] else: raise ValueError("Could not find MLflow run name column.") # Detect dataset tag from run name compare_df["dataset_tag"] = None compare_df.loc[ compare_df["run_name_full"].astype(str).str.contains(TAG_A, case=False, na=False), "dataset_tag" ] = TAG_A compare_df.loc[ compare_df["run_name_full"].astype(str).str.contains(TAG_B, case=False, na=False), "dataset_tag" ] = TAG_B compare_df = compare_df[compare_df["dataset_tag"].isin([TAG_A, TAG_B])] # ---------------------------- # 1. Per-target comparison # ---------------------------- target_results = compare_df[ compare_df["params.target"].notna() & compare_df["metrics.mae_mean"].notna() ].copy() target_cols = [ "dataset_tag", "params.target", "metrics.mae_mean", "metrics.pcc_mean", ] target_results = target_results[target_cols] target_compare = target_results.pivot_table( index="params.target", columns="dataset_tag", values=["metrics.mae_mean", "metrics.pcc_mean"], aggfunc="mean" ) target_compare.columns = [ f"{metric}_{tag}" for metric, tag in target_compare.columns ] target_compare = target_compare.reset_index() # Add differences: NewData - OldData mae_new = target_compare.get("metrics.mae_mean_NewData") mae_old = target_compare.get("metrics.mae_mean_OldData") pcc_new = target_compare.get("metrics.pcc_mean_NewData") pcc_old = target_compare.get("metrics.pcc_mean_OldData") if mae_new is not None and mae_old is not None: target_compare["delta_mae_New_minus_Old"] = mae_new - mae_old if pcc_new is not None and pcc_old is not None: target_compare["delta_pcc_New_minus_Old"] = pcc_new - pcc_old by_target_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_by_target.csv") target_compare.to_csv(by_target_path, index=False) print("\nPer-target comparison:") print(target_compare) print(f"\nSaved to {by_target_path}") # ---------------------------- # 2. Overall comparison # ---------------------------- overall_results = compare_df[ compare_df["metrics.mae_overall_mean"].notna() | compare_df["metrics.pcc_overall_mean"].notna() ].copy() overall_compare = overall_results.groupby("dataset_tag", as_index=False).agg({ "metrics.mae_overall_mean": "mean", "metrics.pcc_overall_mean": "mean", }) overall_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_overall.csv") overall_compare.to_csv(overall_path, index=False) print("\nOverall comparison:") print(overall_compare) print(f"\nSaved to {overall_path}") # ---------------------------- # 3. Fold-level comparison # ---------------------------- fold_results = compare_df[ compare_df["metrics.mae"].notna() & compare_df["metrics.pcc"].notna() ].copy() fold_compare = fold_results.groupby( ["dataset_tag", "params.target"], as_index=False ).agg({ "metrics.mae": ["mean", "std"], "metrics.pcc": ["mean", "std"], "metrics.n_test_observed": "sum", "metrics.n_test_total": "sum", }) fold_compare.columns = [ "_".join(col).strip("_") for col in fold_compare.columns ] fold_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_fold_summary.csv") fold_compare.to_csv(fold_path, index=False) print("\nFold-level summary:") print(fold_compare) print(f"\nSaved to {fold_path}")