mvppred / scripts /export_mlflow_results.py
Md Wasi Ul Kabir
CC Edit
41d1183
# scripts/export_mlflow_results.py
import os
import mlflow
import pandas as pd
OUT_DIR = "results"
os.makedirs(OUT_DIR, exist_ok=True)
# Change if your tracking URI is different
mlflow.set_tracking_uri("file:./mlruns")
experiments = mlflow.search_experiments()
rows = []
for exp in experiments:
runs = mlflow.search_runs(
experiment_ids=[exp.experiment_id],
output_format="pandas"
)
if runs.empty:
continue
runs["experiment_name"] = exp.name
rows.append(runs)
if not rows:
print("No runs found. Run train_eval.py first.")
exit(0)
all_runs = pd.concat(rows, ignore_index=True)
# Keep useful columns only
cols = [
"experiment_name",
"run_name",
"params.target",
"params.knn_k",
"metrics.mae",
"metrics.pcc",
"metrics.n_test_observed",
"metrics.n_test_total",
"metrics.mae_mean",
"metrics.pcc_mean",
"metrics.mae_overall_mean",
"metrics.pcc_overall_mean",
]
cols = [c for c in cols if c in all_runs.columns]
out = all_runs[cols]
export_path = os.path.join(OUT_DIR, "mlflow_cv_results_export.csv")
out.to_csv(export_path, index=False)
print(out)
print(f"\nSaved to {export_path}")
# ============================================================
# Compare two experiment tags: NewData vs OldData
# ============================================================
TAG_A = "OldData"
TAG_B = "NewData"
compare_df = all_runs.copy()
# MLflow stores run name here in most versions
if "tags.mlflow.runName" in compare_df.columns:
compare_df["run_name_full"] = compare_df["tags.mlflow.runName"]
elif "run_name" in compare_df.columns:
compare_df["run_name_full"] = compare_df["run_name"]
else:
raise ValueError("Could not find MLflow run name column.")
# Detect dataset tag from run name
compare_df["dataset_tag"] = None
compare_df.loc[
compare_df["run_name_full"].astype(str).str.contains(TAG_A, case=False, na=False),
"dataset_tag"
] = TAG_A
compare_df.loc[
compare_df["run_name_full"].astype(str).str.contains(TAG_B, case=False, na=False),
"dataset_tag"
] = TAG_B
compare_df = compare_df[compare_df["dataset_tag"].isin([TAG_A, TAG_B])]
# ----------------------------
# 1. Per-target comparison
# ----------------------------
target_results = compare_df[
compare_df["params.target"].notna() &
compare_df["metrics.mae_mean"].notna()
].copy()
target_cols = [
"dataset_tag",
"params.target",
"metrics.mae_mean",
"metrics.pcc_mean",
]
target_results = target_results[target_cols]
target_compare = target_results.pivot_table(
index="params.target",
columns="dataset_tag",
values=["metrics.mae_mean", "metrics.pcc_mean"],
aggfunc="mean"
)
target_compare.columns = [
f"{metric}_{tag}" for metric, tag in target_compare.columns
]
target_compare = target_compare.reset_index()
# Add differences: NewData - OldData
mae_new = target_compare.get("metrics.mae_mean_NewData")
mae_old = target_compare.get("metrics.mae_mean_OldData")
pcc_new = target_compare.get("metrics.pcc_mean_NewData")
pcc_old = target_compare.get("metrics.pcc_mean_OldData")
if mae_new is not None and mae_old is not None:
target_compare["delta_mae_New_minus_Old"] = mae_new - mae_old
if pcc_new is not None and pcc_old is not None:
target_compare["delta_pcc_New_minus_Old"] = pcc_new - pcc_old
by_target_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_by_target.csv")
target_compare.to_csv(by_target_path, index=False)
print("\nPer-target comparison:")
print(target_compare)
print(f"\nSaved to {by_target_path}")
# ----------------------------
# 2. Overall comparison
# ----------------------------
overall_results = compare_df[
compare_df["metrics.mae_overall_mean"].notna() |
compare_df["metrics.pcc_overall_mean"].notna()
].copy()
overall_compare = overall_results.groupby("dataset_tag", as_index=False).agg({
"metrics.mae_overall_mean": "mean",
"metrics.pcc_overall_mean": "mean",
})
overall_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_overall.csv")
overall_compare.to_csv(overall_path, index=False)
print("\nOverall comparison:")
print(overall_compare)
print(f"\nSaved to {overall_path}")
# ----------------------------
# 3. Fold-level comparison
# ----------------------------
fold_results = compare_df[
compare_df["metrics.mae"].notna() &
compare_df["metrics.pcc"].notna()
].copy()
fold_compare = fold_results.groupby(
["dataset_tag", "params.target"],
as_index=False
).agg({
"metrics.mae": ["mean", "std"],
"metrics.pcc": ["mean", "std"],
"metrics.n_test_observed": "sum",
"metrics.n_test_total": "sum",
})
fold_compare.columns = [
"_".join(col).strip("_") for col in fold_compare.columns
]
fold_path = os.path.join(OUT_DIR, "compare_NewData_vs_OldData_fold_summary.csv")
fold_compare.to_csv(fold_path, index=False)
print("\nFold-level summary:")
print(fold_compare)
print(f"\nSaved to {fold_path}")