| """ |
| experiments.py β dated experiment folder convention. |
| |
| Every pipeline run goes into experiments/<YYYY-MM-DD_HHMMSS>_<name>/ |
| with a README + config.json so we can compare runs over time. |
| |
| Layout: |
| experiments/ |
| βββ 2026-04-07_193000_first-yt-batch/ |
| β βββ README.md β what this run was, parameters, observations |
| β βββ config.json β exact CLI args |
| β βββ gather/ β gather_v2 outputs (images go to drone-dataset-v2/) |
| β β βββ manifest.json |
| β β βββ stats.json |
| β βββ filter_qwen/ β run_qwen_filter outputs |
| β β βββ keep_list.json |
| β β βββ stats.json |
| β βββ label_falcon/ β pod_label outputs (from RunPod) |
| β β βββ coco.json |
| β β βββ stats.json |
| β βββ verify_qwen/ β verify_vlm outputs (from RunPod) |
| β β βββ verified.json |
| β β βββ stats.json |
| β βββ reviews/ β human verdicts from the web UI |
| β βββ reviews.json |
| βββ latest -> 2026-04-07_193000_first-yt-batch/ β symlink to most recent |
| |
| The drone-dataset-v2/ images themselves are SHARED across experiments β |
| each experiment writes labels/filters/verifications referencing those images, |
| not copies of them. |
| """ |
|
|
| import json |
| import os |
| import sys |
| import time |
| from datetime import datetime |
| from pathlib import Path |
|
|
|
|
| def make_experiment_dir(name: str = "", base: str = "experiments") -> str: |
| """Create a fresh experiment dir with a timestamp + optional name suffix. |
| Returns the absolute path.""" |
| ts = datetime.now().strftime("%Y-%m-%d_%H%M%S") |
| safe_name = name.strip().replace(" ", "-").replace("/", "_") if name else "" |
| folder = f"{ts}_{safe_name}" if safe_name else ts |
| full = os.path.abspath(os.path.join(base, folder)) |
| os.makedirs(full, exist_ok=True) |
|
|
| |
| for sub in ("gather", "filter_qwen", "label_falcon", "verify_qwen", "reviews"): |
| os.makedirs(os.path.join(full, sub), exist_ok=True) |
|
|
| return full |
|
|
|
|
| def write_readme(experiment_dir: str, name: str, description: str, params: dict): |
| """Write a small markdown README capturing what this experiment is.""" |
| readme_path = os.path.join(experiment_dir, "README.md") |
| lines = [ |
| f"# Experiment: {name or os.path.basename(experiment_dir)}", |
| "", |
| f"**Started:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", |
| f"**Path:** `{experiment_dir}`", |
| "", |
| "## Description", |
| "", |
| description or "(no description)", |
| "", |
| "## Parameters", |
| "", |
| "```json", |
| json.dumps(params, indent=2), |
| "```", |
| "", |
| "## Pipeline stages", |
| "", |
| "1. **gather/** β image gathering manifest", |
| "2. **filter_qwen/** β image-level Qwen YES/NO filter results", |
| "3. **label_falcon/** β Falcon Perception bbox grounding (COCO format)", |
| "4. **verify_qwen/** β per-bbox Qwen verification", |
| "5. **reviews/** β human verdicts from the web UI", |
| "", |
| ] |
| with open(readme_path, "w") as f: |
| f.write("\n".join(lines)) |
|
|
|
|
| def write_config(experiment_dir: str, config: dict): |
| """Write the exact config used for this experiment.""" |
| with open(os.path.join(experiment_dir, "config.json"), "w") as f: |
| json.dump(config, f, indent=2) |
|
|
|
|
| def update_latest_symlink(experiment_dir: str, base: str = "experiments"): |
| """Update the experiments/latest symlink to point at this experiment.""" |
| base_abs = os.path.abspath(base) |
| link = os.path.join(base_abs, "latest") |
| target = os.path.basename(experiment_dir) |
| if os.path.islink(link): |
| os.unlink(link) |
| elif os.path.exists(link): |
| |
| return |
| try: |
| os.symlink(target, link) |
| except OSError: |
| pass |
|
|
|
|
| def list_experiments(base: str = "experiments") -> list: |
| """List all experiment directories in chronological order (newest first).""" |
| if not os.path.exists(base): |
| return [] |
| out = [] |
| for entry in sorted(os.listdir(base), reverse=True): |
| if entry == "latest": |
| continue |
| full = os.path.join(base, entry) |
| if not os.path.isdir(full): |
| continue |
| readme = os.path.join(full, "README.md") |
| config = os.path.join(full, "config.json") |
| cfg = {} |
| if os.path.exists(config): |
| try: |
| cfg = json.load(open(config)) |
| except Exception: |
| pass |
| out.append({ |
| "name": entry, |
| "path": full, |
| "config": cfg, |
| "has_readme": os.path.exists(readme), |
| }) |
| return out |
|
|
|
|
| if __name__ == "__main__": |
| |
| import argparse |
| p = argparse.ArgumentParser() |
| sub = p.add_subparsers(dest="cmd") |
|
|
| p_new = sub.add_parser("new", help="Create a new dated experiment folder") |
| p_new.add_argument("--name", default="", help="Optional human-readable suffix") |
| p_new.add_argument("--description", default="") |
|
|
| p_list = sub.add_parser("list", help="List existing experiments") |
|
|
| args = p.parse_args() |
| if args.cmd == "new": |
| path = make_experiment_dir(args.name) |
| write_readme(path, args.name, args.description, {}) |
| update_latest_symlink(path) |
| print(path) |
| elif args.cmd == "list": |
| for e in list_experiments(): |
| print(f" {e['name']}") |
| else: |
| p.print_help() |
|
|