Spaces:

openenv-community
/

test-local-nested-envs

Running on T4

Claude commited on 2 days ago

Commit

76f180f

unverified ·

1 Parent(s): 726152d

Make Supabase uploads incremental — upload after every step

Instead of uploading once at the end (risking total data loss on
crash/timeout), the uploader now:
- Creates/upserts the training_runs row after each step
- Inserts episode rows immediately after each step
- Calls finish() at end to update duration and upload files

Also adds a callback hook to TrainingLogger so the uploader is
notified automatically via log_iteration().

Note: requires adding an UPDATE RLS policy on training_runs
(see updated supabase_setup.sql).

https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V

Files changed (4) hide show

layer1/train.py +28 -18
layer1/training_logger.py +12 -0
layer1/upload.py +165 -154
scripts/supabase_setup.sql +3 -1

layer1/train.py CHANGED Viewed

@@ -33,7 +33,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from config_loader import load_config, make_grpo_config, make_env_config, get_report_config, get_paths, get_generation_config, get_personas_config, get_upload_config
 from layer1.grpo_trainer import GRPOConfig, GRPOPromptTrainer, PromptEvaluator
 from layer1.training_logger import TrainingLogger, ReportGenerator
-from layer1.upload import upload_training_results
 from layer2.customer_sim import CustomerPersona, CustomerSimulator
 from layer2.hf_agent import HFAgent
 from personas.generate_personas import generate_personas
@@ -153,6 +153,24 @@ def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: s
     training_logger = TrainingLogger(
         log_dir=paths_cfg["log_dir"], total_steps=config.num_training_steps
     )
     trainer = GRPOPromptTrainer(config=config, evaluator=evaluator, logger=training_logger)
     trainer.setup_model()
     trainer.train()
@@ -214,28 +232,20 @@ def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: s
         except OSError:
             print("WARNING: Could not re-read report from disk")
-    # Upload to Supabase if configured
-    upload_cfg = upload_cfg or {}
-    if upload_cfg.get("enabled") and os.environ.get("SUPABASE_URL"):
         print(f"\n{'='*60}")
-        print("UPLOADING TO SUPABASE...")
         print(f"{'='*60}")
-        upload_result = upload_training_results(
             raw_summary=raw_summary,
-            run_id=training_logger.timestamp,
-            bucket=upload_cfg.get("bucket", "training-results"),
-            report_path=report_path if report_cfg["enabled"] else None,
-            chart_path=None,  # chart path is internal to ReportGenerator
-            config={"grpo": config.__dict__, "report": report_cfg, "paths": paths_cfg},
         )
-        print(f"  Run ID:  {upload_result['run_id']}")
-        print(f"  Files:   {len(upload_result['storage_paths'])} uploaded")
-        print(f"  DB rows: {upload_result['db_rows']}")
-        if upload_result.get("error"):
-            print(f"  Error:   {upload_result['error']}")
         print(f"{'='*60}")
-    elif upload_cfg.get("enabled"):
-        print("\nSupabase upload enabled but SUPABASE_URL not set — skipping")
 def run_eval(hf_token: str | None, prompt: str, episodes: int):

 from config_loader import load_config, make_grpo_config, make_env_config, get_report_config, get_paths, get_generation_config, get_personas_config, get_upload_config
 from layer1.grpo_trainer import GRPOConfig, GRPOPromptTrainer, PromptEvaluator
 from layer1.training_logger import TrainingLogger, ReportGenerator
+from layer1.upload import SupabaseUploader
 from layer2.customer_sim import CustomerPersona, CustomerSimulator
 from layer2.hf_agent import HFAgent
 from personas.generate_personas import generate_personas
     training_logger = TrainingLogger(
         log_dir=paths_cfg["log_dir"], total_steps=config.num_training_steps
     )
+    # Wire up incremental Supabase uploads
+    upload_cfg = upload_cfg or {}
+    uploader = None
+    if upload_cfg.get("enabled") and os.environ.get("SUPABASE_URL"):
+        uploader = SupabaseUploader(
+            run_id=training_logger.timestamp,
+            bucket=upload_cfg.get("bucket", "training-results"),
+            config={"grpo": config.__dict__, "report": report_cfg, "paths": paths_cfg},
+        )
+        if uploader.enabled:
+            training_logger.add_on_step_callback(uploader.after_step)
+            print("Supabase incremental upload enabled")
+        else:
+            uploader = None
+    elif upload_cfg.get("enabled"):
+        print("Supabase upload enabled but SUPABASE_URL not set — skipping")
     trainer = GRPOPromptTrainer(config=config, evaluator=evaluator, logger=training_logger)
     trainer.setup_model()
     trainer.train()
         except OSError:
             print("WARNING: Could not re-read report from disk")
+    # Finalize Supabase upload (update duration, upload files)
+    if uploader and uploader.enabled:
         print(f"\n{'='*60}")
+        print("FINALIZING SUPABASE UPLOAD...")
         print(f"{'='*60}")
+        uploader.finish(
+            duration_seconds=raw_summary.get("duration_seconds"),
+            report_path=report_path,
             raw_summary=raw_summary,
         )
+        print(f"  Run ID:  {uploader.run_id}")
+        print(f"  Steps uploaded incrementally: {len(uploader._mean_rewards)}")
+        print(f"  Episodes uploaded: {uploader._total_episodes}")
         print(f"{'='*60}")
 def run_eval(hf_token: str | None, prompt: str, episodes: int):

layer1/training_logger.py CHANGED Viewed

@@ -32,6 +32,7 @@ class TrainingLogger:
         self.total_steps = total_steps
         self.iterations: list[dict[str, Any]] = []
         self._start_time = datetime.now()
         with open(self.log_path, "w") as f:
             f.write(f"Training Log — {self._start_time.isoformat()}\n")
@@ -39,6 +40,10 @@ class TrainingLogger:
             f.flush()
             os.fsync(f.fileno())
     def log_iteration(self, step: int, prompt: str, eval_result: dict[str, Any]):
         """Log a single training iteration (one prompt evaluated)."""
         entry = {
@@ -69,6 +74,13 @@ class TrainingLogger:
         logger.info("Logged step %d: mean_reward=%.1f", step, entry["mean_reward"])
     def save_json(self):
         """Save structured training data to JSON."""
         data = {

         self.total_steps = total_steps
         self.iterations: list[dict[str, Any]] = []
         self._start_time = datetime.now()
+        self._on_step_callbacks: list[Any] = []
         with open(self.log_path, "w") as f:
             f.write(f"Training Log — {self._start_time.isoformat()}\n")
             f.flush()
             os.fsync(f.fileno())
+    def add_on_step_callback(self, callback):
+        """Register a callback called after each step: callback(step, eval_result, prompt)."""
+        self._on_step_callbacks.append(callback)
     def log_iteration(self, step: int, prompt: str, eval_result: dict[str, Any]):
         """Log a single training iteration (one prompt evaluated)."""
         entry = {
         logger.info("Logged step %d: mean_reward=%.1f", step, entry["mean_reward"])
+        # Notify callbacks (e.g. Supabase uploader)
+        for cb in self._on_step_callbacks:
+            try:
+                cb(step, eval_result, prompt)
+            except Exception as e:
+                logger.error("Step callback failed: %s", e)
     def save_json(self):
         """Save structured training data to JSON."""
         data = {

layer1/upload.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """
-Supabase uploader for training results.
-Uploads:
-  1. Raw summary JSON + report files to Supabase Storage
-  2. Per-run and per-episode metrics to Postgres tables
 Requires SUPABASE_URL and SUPABASE_KEY environment variables.
 """
@@ -38,163 +40,172 @@ def _get_client():
     return create_client(url, key)
-def upload_training_results(
-    raw_summary: dict[str, Any],
-    run_id: str | None = None,
-    bucket: str = "training-results",
-    report_path: str | None = None,
-    chart_path: str | None = None,
-    config: dict[str, Any] | None = None,
-) -> dict[str, Any]:
     """
-    Upload training results to Supabase (Storage + DB).
-    Args:
-        raw_summary: Output of TrainingLogger.generate_raw_summary().
-        run_id: Unique run identifier. Auto-generated if not provided.
-        bucket: Supabase Storage bucket name.
-        report_path: Path to the markdown report file (optional).
-        chart_path: Path to the reward chart PNG (optional).
-        config: Training config dict to store with the run (optional).
-    Returns:
-        Dict with upload results: {"run_id", "storage_paths", "db_rows"}.
     """
-    client = _get_client()
-    if client is None:
-        logger.warning("Supabase upload skipped — client not available")
-        return {"run_id": None, "storage_paths": [], "db_rows": 0, "error": "no client"}
-    if run_id is None:
-        run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
-    results: dict[str, Any] = {"run_id": run_id, "storage_paths": [], "db_rows": 0}
-    # --- Storage uploads ---
-    results["storage_paths"] = _upload_files(
-        client, bucket, run_id, raw_summary, report_path, chart_path
-    )
-    # --- DB inserts ---
-    results["db_rows"] = _insert_metrics(client, run_id, raw_summary, config)
-    logger.info(
-        "Supabase upload complete: run_id=%s, files=%d, db_rows=%d",
-        run_id, len(results["storage_paths"]), results["db_rows"],
-    )
-    return results
-def _upload_files(
-    client,
-    bucket: str,
-    run_id: str,
-    raw_summary: dict[str, Any],
-    report_path: str | None,
-    chart_path: str | None,
-) -> list[str]:
-    """Upload files to Supabase Storage."""
-    uploaded = []
-    # Upload raw summary JSON
-    try:
-        summary_bytes = json.dumps(raw_summary, indent=2, default=str).encode()
-        path = f"{run_id}/raw_summary.json"
-        client.storage.from_(bucket).upload(
-            path, summary_bytes, {"content-type": "application/json"}
-        )
-        uploaded.append(path)
-        logger.info("Uploaded %s to storage", path)
-    except Exception as e:
-        logger.error("Failed to upload raw_summary.json: %s", e)
-    # Upload report markdown
-    if report_path and os.path.exists(report_path):
-        try:
-            with open(report_path, "rb") as f:
-                path = f"{run_id}/report.md"
-                client.storage.from_(bucket).upload(
-                    path, f.read(), {"content-type": "text/markdown"}
-                )
-                uploaded.append(path)
-                logger.info("Uploaded %s to storage", path)
-        except Exception as e:
-            logger.error("Failed to upload report: %s", e)
-    # Upload chart PNG
-    if chart_path and os.path.exists(chart_path):
         try:
-            with open(chart_path, "rb") as f:
-                path = f"{run_id}/reward_chart.png"
-                client.storage.from_(bucket).upload(
-                    path, f.read(), {"content-type": "image/png"}
-                )
-                uploaded.append(path)
-                logger.info("Uploaded %s to storage", path)
         except Exception as e:
-            logger.error("Failed to upload chart: %s", e)
-    return uploaded
-def _insert_metrics(
-    client,
-    run_id: str,
-    raw_summary: dict[str, Any],
-    config: dict[str, Any] | None,
-) -> int:
-    """Insert training run + per-episode metrics into Postgres tables."""
-    rows_inserted = 0
-    # Insert training run summary
-    try:
-        run_row = {
-            "run_id": run_id,
-            "started_at": datetime.now(timezone.utc).isoformat(),
-            "duration_seconds": raw_summary.get("duration_seconds"),
-            "total_steps": len(raw_summary.get("steps", [])),
-            "total_episodes": raw_summary.get("total_episodes", 0),
-            "best_step": raw_summary.get("best_step"),
-            "best_mean_reward": raw_summary.get("best_mean_reward"),
-            "mean_rewards": raw_summary.get("mean_rewards", []),
-            "min_rewards": raw_summary.get("min_rewards", []),
-            "max_rewards": raw_summary.get("max_rewards", []),
-            "config": config,
-        }
-        client.table("training_runs").insert(run_row).execute()
-        rows_inserted += 1
-        logger.info("Inserted training run: %s", run_id)
-    except Exception as e:
-        logger.error("Failed to insert training_runs row: %s", e)
-    # Insert per-episode metrics in batches
-    episode_rows = []
-    for m in raw_summary.get("per_episode_metrics", []):
-        episode_rows.append({
-            "run_id": run_id,
-            "step": m["step"],
-            "episode": m["episode"],
-            "reward": m.get("reward"),
-            "turns": m.get("turns", 0),
-            "intent_captured": m.get("intent_captured", False),
-            "intent_correct": m.get("intent_correct", False),
-            "true_intent": m.get("true_intent", ""),
-            "agent_intent": m.get("agent_intent", ""),
-            "injection_attempted": m.get("injection_attempted", False),
-            "injection_succeeded": m.get("injection_succeeded", False),
-            "api_call_made": m.get("api_call_made", False),
-            "api_call_correct": m.get("api_call_correct", False),
-        })
-    # Batch insert (Supabase/PostgREST supports bulk inserts)
-    if episode_rows:
-        batch_size = 100
-        for i in range(0, len(episode_rows), batch_size):
-            batch = episode_rows[i : i + batch_size]
-            try:
-                client.table("training_episodes").insert(batch).execute()
-                rows_inserted += len(batch)
-            except Exception as e:
-                logger.error("Failed to insert episode batch %d: %s", i, e)
-    return rows_inserted

 """
+Supabase uploader for training results — incremental mode.
+Uploads after every training step so data is never lost if the job crashes.
+- Creates a training_runs row at the start of training
+- Upserts that row after each step with updated reward arrays
+- Inserts per-episode rows after each step
 Requires SUPABASE_URL and SUPABASE_KEY environment variables.
 """
     return create_client(url, key)
+class SupabaseUploader:
     """
+    Incremental uploader — call after_step() after each training step.
+    Creates the training_runs row on first call, then upserts it with
+    updated arrays on every subsequent call. Episode rows are inserted
+    immediately and never re-sent.
     """
+    def __init__(
+        self,
+        run_id: str,
+        bucket: str = "training-results",
+        config: dict[str, Any] | None = None,
+    ):
+        self.run_id = run_id
+        self.bucket = bucket
+        self.config = config
+        self._client = _get_client()
+        self._run_created = False
+        # Accumulated arrays (mirrors what training_runs stores)
+        self._mean_rewards: list[float] = []
+        self._min_rewards: list[float] = []
+        self._max_rewards: list[float] = []
+        self._total_episodes = 0
+        self._started_at = datetime.now(timezone.utc).isoformat()
+        if self._client:
+            logger.info("SupabaseUploader ready: run_id=%s", run_id)
+        else:
+            logger.warning("SupabaseUploader: no client — uploads will be skipped")
+    @property
+    def enabled(self) -> bool:
+        return self._client is not None
+    def after_step(self, step: int, eval_result: dict[str, Any], prompt: str):
+        """
+        Called after each training step/candidate evaluation.
+        Upserts the training_runs row and inserts new episode rows.
+        """
+        if not self._client:
+            return
+        mean_reward = eval_result.get("mean_reward", 0.0)
+        min_reward = eval_result.get("min_reward", 0.0)
+        max_reward = eval_result.get("max_reward", 0.0)
+        self._mean_rewards.append(mean_reward)
+        self._min_rewards.append(min_reward)
+        self._max_rewards.append(max_reward)
+        num_episodes = eval_result.get("num_episodes", 0)
+        self._total_episodes += num_episodes
+        # Best so far
+        best_mean = max(self._mean_rewards)
+        best_idx = self._mean_rewards.index(best_mean)
+        # --- Upsert training_runs row ---
+        run_row = {
+            "run_id": self.run_id,
+            "started_at": self._started_at,
+            "duration_seconds": None,  # updated at end
+            "total_steps": len(self._mean_rewards),
+            "total_episodes": self._total_episodes,
+            "best_step": best_idx,
+            "best_mean_reward": best_mean,
+            "mean_rewards": self._mean_rewards,
+            "min_rewards": self._min_rewards,
+            "max_rewards": self._max_rewards,
+            "config": self.config,
+        }
         try:
+            self._client.table("training_runs").upsert(
+                run_row, on_conflict="run_id"
+            ).execute()
+            self._run_created = True
+            logger.info(
+                "Upserted training_runs: step=%d mean_reward=%.1f",
+                step, mean_reward,
+            )
         except Exception as e:
+            logger.error("Failed to upsert training_runs: %s", e)
+        # --- Insert episode rows for this step ---
+        episode_rows = []
+        rewards_list = eval_result.get("rewards", [])
+        for ei, log in enumerate(eval_result.get("logs", [])):
+            episode_rows.append({
+                "run_id": self.run_id,
+                "step": step,
+                "episode": ei,
+                "reward": rewards_list[ei] if ei < len(rewards_list) else None,
+                "turns": log.get("turns", 0),
+                "intent_captured": log.get("intent_captured", False),
+                "intent_correct": log.get("intent_correct", False),
+                "true_intent": log.get("true_intent", ""),
+                "agent_intent": log.get("agent_intent", ""),
+                "injection_attempted": log.get("injection_attempted", False),
+                "injection_succeeded": log.get("injection_succeeded", False),
+                "api_call_made": log.get("api_call_made", False),
+                "api_call_correct": log.get("api_call_correct", False),
+            })
+        if episode_rows:
+            try:
+                self._client.table("training_episodes").insert(episode_rows).execute()
+                logger.info(
+                    "Inserted %d episode rows for step %d", len(episode_rows), step
+                )
+            except Exception as e:
+                logger.error("Failed to insert episodes for step %d: %s", step, e)
+    def finish(
+        self,
+        duration_seconds: float | None = None,
+        report_path: str | None = None,
+        chart_path: str | None = None,
+        raw_summary: dict[str, Any] | None = None,
+    ):
+        """
+        Called at end of training. Updates duration and uploads final files.
+        """
+        if not self._client:
+            return
+        # Update duration on the run row
+        if duration_seconds is not None and self._run_created:
+            try:
+                self._client.table("training_runs").update(
+                    {"duration_seconds": duration_seconds}
+                ).eq("run_id", self.run_id).execute()
+                logger.info("Updated duration: %.1fs", duration_seconds)
+            except Exception as e:
+                logger.error("Failed to update duration: %s", e)
+        # Upload files to Storage
+        if raw_summary:
+            self._upload_file(
+                f"{self.run_id}/raw_summary.json",
+                json.dumps(raw_summary, indent=2, default=str).encode(),
+                "application/json",
+            )
+        if report_path and os.path.exists(report_path):
+            with open(report_path, "rb") as f:
+                self._upload_file(
+                    f"{self.run_id}/report.md", f.read(), "text/markdown"
+                )
+        if chart_path and os.path.exists(chart_path):
+            with open(chart_path, "rb") as f:
+                self._upload_file(
+                    f"{self.run_id}/reward_chart.png", f.read(), "image/png"
+                )
+    def _upload_file(self, path: str, data: bytes, content_type: str):
+        """Upload a single file to Supabase Storage."""
+        try:
+            self._client.storage.from_(self.bucket).upload(
+                path, data, {"content-type": content_type}
+            )
+            logger.info("Uploaded %s to storage", path)
+        except Exception as e:
+            logger.error("Failed to upload %s: %s", path, e)

scripts/supabase_setup.sql CHANGED Viewed

@@ -49,9 +49,11 @@ create index if not exists idx_episodes_step on training_episodes(run_id, step);
 alter table training_runs enable row level security;
 alter table training_episodes enable row level security;
--- Allow inserts with service key (anon or service_role)
 create policy "Allow insert training_runs" on training_runs
     for insert with check (true);
 create policy "Allow select training_runs" on training_runs
     for select using (true);

 alter table training_runs enable row level security;
 alter table training_episodes enable row level security;
+-- Allow inserts, updates, and selects with service key (anon or service_role)
 create policy "Allow insert training_runs" on training_runs
     for insert with check (true);
+create policy "Allow update training_runs" on training_runs
+    for update using (true);
 create policy "Allow select training_runs" on training_runs
     for select using (true);