llm-zero-lite-experiments / src /plot_results.py
kishan51's picture
Add files using upload-large-folder tool
4f99f73 verified
Raw
History Blame Contribute Delete
1.74 kB
import argparse
from pathlib import Path
import matplotlib.pyplot as plt
import pandas as pd
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--runs-dir", default="runs")
args = parser.parse_args()
runs_dir = Path(args.runs_dir)
frames = [pd.read_csv(path) for path in runs_dir.glob("*/history.csv")]
if not frames:
raise SystemExit("No histories found")
results = pd.concat(frames, ignore_index=True)
results.to_csv(runs_dir / "summary_table.csv", index=False)
final = results.sort_values("stage").groupby("run_name", as_index=False).tail(1)
(runs_dir / "summary_table.md").write_text(final.to_markdown(index=False))
specs = [
("eval_accuracy", "Evaluation accuracy", "eval_accuracy_by_stage.png"),
("eval_sampled_pass_at_1", "Sampled pass@1", "sampled_pass_at_1_by_stage.png"),
("eval_sampled_pass_at_4", "Sampled pass@4", "sampled_pass_at_4_by_stage.png"),
("train_reward_mean", "Train reward mean", "reward_mean_by_stage.png"),
("kl_mean", "KL mean", "kl_by_stage.png"),
("avg_completion_length", "Completion length", "completion_length_by_stage.png"),
]
for column, ylabel, filename in specs:
if column not in results or results[column].isna().all():
continue
plt.figure(figsize=(7, 4))
for run_name, group in results.groupby("run_name"):
plt.plot(group["stage"], group[column], marker="o", label=run_name)
plt.xlabel("Stage")
plt.ylabel(ylabel)
plt.legend()
plt.tight_layout()
plt.savefig(runs_dir / filename, dpi=160)
plt.close()
print(final.to_string(index=False))
if __name__ == "__main__":
main()