Spaces:

openenv-community
/

test-local-nested-envs

Running on T4

Claude commited on Mar 8

Commit

28bcb40

unverified ·

1 Parent(s): 71b0977

Add Supabase upload for training results (Storage + DB)

- layer1/upload.py: uploads raw summary JSON, report, and chart to
Supabase Storage; inserts per-run and per-episode metrics into
training_runs and training_episodes Postgres tables
- scripts/supabase_setup.sql: migration to create tables, indexes,
and RLS policies — run in Supabase SQL Editor before first training
- config.yaml: upload section with enabled flag and bucket name
- config_loader.py: get_upload_config() for new section
- pyproject.toml: supabase>=2.0.0 as optional [upload] dependency

Requires SUPABASE_URL and SUPABASE_KEY env vars. Gracefully skips
upload if not configured (logs warning, training still completes).

https://claude.ai/code/session_01DPirJ78YYN4fJUvUFJ5D6V

Files changed (6) hide show

config.yaml +9 -0
config_loader.py +9 -0
layer1/train.py +29 -3
layer1/upload.py +200 -0
pyproject.toml +3 -0
scripts/supabase_setup.sql +61 -0

config.yaml CHANGED Viewed

@@ -97,6 +97,15 @@ report:
   example_customers: 5            # Example conversations in report
 # --- Paths ---
 paths:

   example_customers: 5            # Example conversations in report
+# --- Upload: Supabase ---
+# Upload training results to Supabase for analysis.
+# Requires SUPABASE_URL and SUPABASE_KEY environment variables.
+upload:
+  enabled: true
+  bucket: "training-results"        # Supabase Storage bucket name
 # --- Paths ---
 paths:

config_loader.py CHANGED Viewed

@@ -126,6 +126,15 @@ def get_generation_config(cfg: dict[str, Any]) -> dict[str, Any]:
     }
 def get_personas_config(cfg: dict[str, Any]) -> dict[str, Any]:
     """Extract persona settings from config."""
     personas = cfg.get("personas", {})

     }
+def get_upload_config(cfg: dict[str, Any]) -> dict[str, Any]:
+    """Extract Supabase upload settings from config."""
+    upload = cfg.get("upload", {})
+    return {
+        "enabled": upload.get("enabled", False),
+        "bucket": upload.get("bucket", "training-results"),
+    }
 def get_personas_config(cfg: dict[str, Any]) -> dict[str, Any]:
     """Extract persona settings from config."""
     personas = cfg.get("personas", {})

layer1/train.py CHANGED Viewed

@@ -30,9 +30,10 @@ load_dotenv(os.path.join(os.path.dirname(os.path.dirname(os.path.abspath(__file_
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from config_loader import load_config, make_grpo_config, make_env_config, get_report_config, get_paths, get_generation_config, get_personas_config
 from layer1.grpo_trainer import GRPOConfig, GRPOPromptTrainer, PromptEvaluator
 from layer1.training_logger import TrainingLogger, ReportGenerator
 from layer2.customer_sim import CustomerPersona, CustomerSimulator
 from layer2.hf_agent import HFAgent
 from personas.generate_personas import generate_personas
@@ -138,7 +139,7 @@ def _print_config_banner(config: GRPOConfig, report_cfg: dict, paths_cfg: dict):
     print(f"{'='*70}\n")
-def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: str | None, gen_cfg: dict | None = None, personas_cfg: dict | None = None):
     """Run GRPO training."""
     _print_config_banner(config, report_cfg, paths_cfg)
@@ -188,6 +189,7 @@ def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: s
     print(f"\nFull raw JSON: {summary_path}")
     print(f"{'='*60}")
     if report_cfg["enabled"]:
         print(f"\n{'='*60}")
         print("GENERATING TRAINING REPORT...")
@@ -212,6 +214,29 @@ def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: s
         except OSError:
             print("WARNING: Could not re-read report from disk")
 def run_eval(hf_token: str | None, prompt: str, episodes: int):
     """Evaluate a single prompt."""
@@ -268,6 +293,7 @@ def main():
     paths_cfg = get_paths(cfg)
     gen_cfg = get_generation_config(cfg)
     personas_cfg = get_personas_config(cfg)
     # CLI overrides
     if args.steps is not None:
@@ -289,7 +315,7 @@ def main():
         report_cfg["example_customers"] = args.example_customers
     if args.mode == "train":
-        run_train(grpo_config, report_cfg, paths_cfg, args.hf_token, gen_cfg=gen_cfg, personas_cfg=personas_cfg)
     elif args.mode == "eval":
         if not args.prompt:
             parser.error("--prompt is required for eval mode")

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from config_loader import load_config, make_grpo_config, make_env_config, get_report_config, get_paths, get_generation_config, get_personas_config, get_upload_config
 from layer1.grpo_trainer import GRPOConfig, GRPOPromptTrainer, PromptEvaluator
 from layer1.training_logger import TrainingLogger, ReportGenerator
+from layer1.upload import upload_training_results
 from layer2.customer_sim import CustomerPersona, CustomerSimulator
 from layer2.hf_agent import HFAgent
 from personas.generate_personas import generate_personas
     print(f"{'='*70}\n")
+def run_train(config: GRPOConfig, report_cfg: dict, paths_cfg: dict, hf_token: str | None, gen_cfg: dict | None = None, personas_cfg: dict | None = None, upload_cfg: dict | None = None):
     """Run GRPO training."""
     _print_config_banner(config, report_cfg, paths_cfg)
     print(f"\nFull raw JSON: {summary_path}")
     print(f"{'='*60}")
+    report_path = None
     if report_cfg["enabled"]:
         print(f"\n{'='*60}")
         print("GENERATING TRAINING REPORT...")
         except OSError:
             print("WARNING: Could not re-read report from disk")
+    # Upload to Supabase if configured
+    upload_cfg = upload_cfg or {}
+    if upload_cfg.get("enabled") and os.environ.get("SUPABASE_URL"):
+        print(f"\n{'='*60}")
+        print("UPLOADING TO SUPABASE...")
+        print(f"{'='*60}")
+        upload_result = upload_training_results(
+            raw_summary=raw_summary,
+            run_id=training_logger.timestamp,
+            bucket=upload_cfg.get("bucket", "training-results"),
+            report_path=report_path if report_cfg["enabled"] else None,
+            chart_path=None,  # chart path is internal to ReportGenerator
+            config={"grpo": config.__dict__, "report": report_cfg, "paths": paths_cfg},
+        )
+        print(f"  Run ID:  {upload_result['run_id']}")
+        print(f"  Files:   {len(upload_result['storage_paths'])} uploaded")
+        print(f"  DB rows: {upload_result['db_rows']}")
+        if upload_result.get("error"):
+            print(f"  Error:   {upload_result['error']}")
+        print(f"{'='*60}")
+    elif upload_cfg.get("enabled"):
+        print("\nSupabase upload enabled but SUPABASE_URL not set — skipping")
 def run_eval(hf_token: str | None, prompt: str, episodes: int):
     """Evaluate a single prompt."""
     paths_cfg = get_paths(cfg)
     gen_cfg = get_generation_config(cfg)
     personas_cfg = get_personas_config(cfg)
+    upload_cfg = get_upload_config(cfg)
     # CLI overrides
     if args.steps is not None:
         report_cfg["example_customers"] = args.example_customers
     if args.mode == "train":
+        run_train(grpo_config, report_cfg, paths_cfg, args.hf_token, gen_cfg=gen_cfg, personas_cfg=personas_cfg, upload_cfg=upload_cfg)
     elif args.mode == "eval":
         if not args.prompt:
             parser.error("--prompt is required for eval mode")

layer1/upload.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+Supabase uploader for training results.
+Uploads:
+  1. Raw summary JSON + report files to Supabase Storage
+  2. Per-run and per-episode metrics to Postgres tables
+Requires SUPABASE_URL and SUPABASE_KEY environment variables.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+from datetime import datetime, timezone
+from typing import Any
+logger = logging.getLogger(__name__)
+def _get_client():
+    """Create a Supabase client from environment variables."""
+    try:
+        from supabase import create_client
+    except ImportError:
+        logger.error(
+            "supabase package not installed. Install with: pip install 'nested-rl-envs[upload]'"
+        )
+        return None
+    url = os.environ.get("SUPABASE_URL")
+    key = os.environ.get("SUPABASE_KEY")
+    if not url or not key:
+        logger.error("SUPABASE_URL and SUPABASE_KEY must be set")
+        return None
+    return create_client(url, key)
+def upload_training_results(
+    raw_summary: dict[str, Any],
+    run_id: str | None = None,
+    bucket: str = "training-results",
+    report_path: str | None = None,
+    chart_path: str | None = None,
+    config: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """
+    Upload training results to Supabase (Storage + DB).
+    Args:
+        raw_summary: Output of TrainingLogger.generate_raw_summary().
+        run_id: Unique run identifier. Auto-generated if not provided.
+        bucket: Supabase Storage bucket name.
+        report_path: Path to the markdown report file (optional).
+        chart_path: Path to the reward chart PNG (optional).
+        config: Training config dict to store with the run (optional).
+    Returns:
+        Dict with upload results: {"run_id", "storage_paths", "db_rows"}.
+    """
+    client = _get_client()
+    if client is None:
+        logger.warning("Supabase upload skipped — client not available")
+        return {"run_id": None, "storage_paths": [], "db_rows": 0, "error": "no client"}
+    if run_id is None:
+        run_id = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
+    results: dict[str, Any] = {"run_id": run_id, "storage_paths": [], "db_rows": 0}
+    # --- Storage uploads ---
+    results["storage_paths"] = _upload_files(
+        client, bucket, run_id, raw_summary, report_path, chart_path
+    )
+    # --- DB inserts ---
+    results["db_rows"] = _insert_metrics(client, run_id, raw_summary, config)
+    logger.info(
+        "Supabase upload complete: run_id=%s, files=%d, db_rows=%d",
+        run_id, len(results["storage_paths"]), results["db_rows"],
+    )
+    return results
+def _upload_files(
+    client,
+    bucket: str,
+    run_id: str,
+    raw_summary: dict[str, Any],
+    report_path: str | None,
+    chart_path: str | None,
+) -> list[str]:
+    """Upload files to Supabase Storage."""
+    uploaded = []
+    # Upload raw summary JSON
+    try:
+        summary_bytes = json.dumps(raw_summary, indent=2, default=str).encode()
+        path = f"{run_id}/raw_summary.json"
+        client.storage.from_(bucket).upload(
+            path, summary_bytes, {"content-type": "application/json"}
+        )
+        uploaded.append(path)
+        logger.info("Uploaded %s to storage", path)
+    except Exception as e:
+        logger.error("Failed to upload raw_summary.json: %s", e)
+    # Upload report markdown
+    if report_path and os.path.exists(report_path):
+        try:
+            with open(report_path, "rb") as f:
+                path = f"{run_id}/report.md"
+                client.storage.from_(bucket).upload(
+                    path, f.read(), {"content-type": "text/markdown"}
+                )
+                uploaded.append(path)
+                logger.info("Uploaded %s to storage", path)
+        except Exception as e:
+            logger.error("Failed to upload report: %s", e)
+    # Upload chart PNG
+    if chart_path and os.path.exists(chart_path):
+        try:
+            with open(chart_path, "rb") as f:
+                path = f"{run_id}/reward_chart.png"
+                client.storage.from_(bucket).upload(
+                    path, f.read(), {"content-type": "image/png"}
+                )
+                uploaded.append(path)
+                logger.info("Uploaded %s to storage", path)
+        except Exception as e:
+            logger.error("Failed to upload chart: %s", e)
+    return uploaded
+def _insert_metrics(
+    client,
+    run_id: str,
+    raw_summary: dict[str, Any],
+    config: dict[str, Any] | None,
+) -> int:
+    """Insert training run + per-episode metrics into Postgres tables."""
+    rows_inserted = 0
+    # Insert training run summary
+    try:
+        run_row = {
+            "run_id": run_id,
+            "started_at": datetime.now(timezone.utc).isoformat(),
+            "duration_seconds": raw_summary.get("duration_seconds"),
+            "total_steps": len(raw_summary.get("steps", [])),
+            "total_episodes": raw_summary.get("total_episodes", 0),
+            "best_step": raw_summary.get("best_step"),
+            "best_mean_reward": raw_summary.get("best_mean_reward"),
+            "mean_rewards": raw_summary.get("mean_rewards", []),
+            "min_rewards": raw_summary.get("min_rewards", []),
+            "max_rewards": raw_summary.get("max_rewards", []),
+            "config": config,
+        }
+        client.table("training_runs").insert(run_row).execute()
+        rows_inserted += 1
+        logger.info("Inserted training run: %s", run_id)
+    except Exception as e:
+        logger.error("Failed to insert training_runs row: %s", e)
+    # Insert per-episode metrics in batches
+    episode_rows = []
+    for m in raw_summary.get("per_episode_metrics", []):
+        episode_rows.append({
+            "run_id": run_id,
+            "step": m["step"],
+            "episode": m["episode"],
+            "reward": m.get("reward"),
+            "turns": m.get("turns", 0),
+            "intent_captured": m.get("intent_captured", False),
+            "intent_correct": m.get("intent_correct", False),
+            "true_intent": m.get("true_intent", ""),
+            "agent_intent": m.get("agent_intent", ""),
+            "injection_attempted": m.get("injection_attempted", False),
+            "injection_succeeded": m.get("injection_succeeded", False),
+            "api_call_made": m.get("api_call_made", False),
+            "api_call_correct": m.get("api_call_correct", False),
+        })
+    # Batch insert (Supabase/PostgREST supports bulk inserts)
+    if episode_rows:
+        batch_size = 100
+        for i in range(0, len(episode_rows), batch_size):
+            batch = episode_rows[i : i + batch_size]
+            try:
+                client.table("training_episodes").insert(batch).execute()
+                rows_inserted += len(batch)
+            except Exception as e:
+                logger.error("Failed to insert episode batch %d: %s", i, e)
+    return rows_inserted

pyproject.toml CHANGED Viewed

@@ -32,6 +32,9 @@ train = [
     "accelerate>=0.27.0",
     "datasets>=2.18.0",
 ]
 dev = [
     "pytest>=8.0",
     "ruff>=0.3.0",

     "accelerate>=0.27.0",
     "datasets>=2.18.0",
 ]
+upload = [
+    "supabase>=2.0.0",
+]
 dev = [
     "pytest>=8.0",
     "ruff>=0.3.0",

scripts/supabase_setup.sql ADDED Viewed

	@@ -0,0 +1,61 @@

+-- Supabase schema for training results
+-- Run this in your Supabase SQL Editor to create the tables.
+-- Training runs: one row per training run
+create table if not exists training_runs (
+    id bigint generated always as identity primary key,
+    run_id text unique not null,
+    started_at timestamptz default now(),
+    duration_seconds real,
+    total_steps int,
+    total_episodes int,
+    best_step int,
+    best_mean_reward real,
+    mean_rewards jsonb,            -- array of mean rewards per step
+    min_rewards jsonb,             -- array of min rewards per step
+    max_rewards jsonb,             -- array of max rewards per step
+    config jsonb,                  -- full training config snapshot
+    created_at timestamptz default now()
+);
+-- Per-episode metrics: one row per episode
+create table if not exists training_episodes (
+    id bigint generated always as identity primary key,
+    run_id text not null references training_runs(run_id) on delete cascade,
+    step int not null,
+    episode int not null,
+    reward real,
+    turns int,
+    intent_captured boolean default false,
+    intent_correct boolean default false,
+    true_intent text,
+    agent_intent text,
+    injection_attempted boolean default false,
+    injection_succeeded boolean default false,
+    api_call_made boolean default false,
+    api_call_correct boolean default false,
+    created_at timestamptz default now()
+);
+-- Index for fast queries by run
+create index if not exists idx_episodes_run_id on training_episodes(run_id);
+create index if not exists idx_episodes_step on training_episodes(run_id, step);
+-- Create the storage bucket (run via Supabase Dashboard > Storage > New Bucket)
+-- Bucket name: training-results
+-- Public: false (use service key for uploads)
+-- Enable Row Level Security (optional but recommended)
+alter table training_runs enable row level security;
+alter table training_episodes enable row level security;
+-- Allow inserts with service key (anon or service_role)
+create policy "Allow insert training_runs" on training_runs
+    for insert with check (true);
+create policy "Allow select training_runs" on training_runs
+    for select using (true);
+create policy "Allow insert training_episodes" on training_episodes
+    for insert with check (true);
+create policy "Allow select training_episodes" on training_episodes
+    for select using (true);