XcodeAddy commited on
Commit
c47715e
Β·
1 Parent(s): 1835d90

Fix HF training deps and Space API fallback

Browse files
requirements-train.txt CHANGED
@@ -1,10 +1,10 @@
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
- trl<0.13
3
- transformers>=4.46
4
- datasets
5
- accelerate
6
- peft
7
- bitsandbytes
8
  matplotlib
9
  seaborn
10
  pandas
 
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
+ trl>=0.18.2,<0.25,!=0.19.0
3
+ transformers>=4.56,<5
4
+ datasets>=3.0,<5
5
+ accelerate>=1.4
6
+ peft>=0.14
7
+ bitsandbytes>=0.45
8
  matplotlib
9
  seaborn
10
  pandas
training/launch_hf_job.py CHANGED
@@ -48,12 +48,28 @@ def train_command(args: argparse.Namespace) -> str:
48
  f"--batch-size {args.batch_size}",
49
  f"--learning-rate {args.learning_rate}",
50
  f"--lora-rank {args.lora_rank}",
 
51
  f"--max-seq-length {args.max_seq_length}",
52
  f"--output-dir {shlex.quote(args.output_dir)}",
53
  ]
54
  )
55
  )
56
  if args.mode == "train-full":
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  lines.extend(
58
  [
59
  "python -c \"from training.replay import record_trained_actions; "
@@ -67,21 +83,7 @@ def train_command(args: argparse.Namespace) -> str:
67
  "cp outputs/eval_post.json outputs/evaluation_results.json",
68
  "python -m training.plots --pre outputs/eval_pre.json "
69
  "--post outputs/eval_post.json --out-dir outputs/charts",
70
- "python - <<'PY'\n"
71
- "import os\n"
72
- "from huggingface_hub import HfApi\n"
73
- "token = os.environ.get('HF_TOKEN')\n"
74
- "api = HfApi(token=token)\n"
75
- "model_repo = os.environ.get('SENTINEL_MODEL_REPO', 'XcodeAddy/sentinel-grpo-qwen05')\n"
76
- "artifact_repo = os.environ.get('SENTINEL_ARTIFACT_REPO', 'XcodeAddy/sentinel-env-artifacts')\n"
77
- "job_id = os.environ.get('JOB_ID', 'manual')\n"
78
- "api.create_repo(model_repo, repo_type='model', exist_ok=True)\n"
79
- f"api.upload_folder(folder_path='{args.output_dir}', repo_id=model_repo, repo_type='model')\n"
80
- "api.create_repo(artifact_repo, repo_type='dataset', exist_ok=True)\n"
81
- "api.upload_folder(folder_path='outputs', repo_id=artifact_repo, repo_type='dataset', path_in_repo=f'job-{job_id}/outputs')\n"
82
- "print('Uploaded model adapter to', model_repo)\n"
83
- "print('Uploaded outputs to', artifact_repo, 'under', f'job-{job_id}/outputs')\n"
84
- "PY",
85
  ]
86
  )
87
  return shell_join(lines)
@@ -105,6 +107,7 @@ def parse_args() -> argparse.Namespace:
105
  parser.add_argument("--batch-size", type=int, default=2)
106
  parser.add_argument("--learning-rate", type=float, default=5e-6)
107
  parser.add_argument("--lora-rank", type=int, default=8)
 
108
  parser.add_argument("--max-seq-length", type=int, default=1024)
109
  parser.add_argument("--output-dir", default="training/sentinel_qwen05_grpo")
110
  return parser.parse_args()
 
48
  f"--batch-size {args.batch_size}",
49
  f"--learning-rate {args.learning_rate}",
50
  f"--lora-rank {args.lora_rank}",
51
+ f"--num-generations {args.num_generations}",
52
  f"--max-seq-length {args.max_seq_length}",
53
  f"--output-dir {shlex.quote(args.output_dir)}",
54
  ]
55
  )
56
  )
57
  if args.mode == "train-full":
58
+ upload_code = (
59
+ "import os; "
60
+ "from huggingface_hub import HfApi; "
61
+ "token=os.environ.get('HF_TOKEN'); "
62
+ "api=HfApi(token=token); "
63
+ "model_repo=os.environ.get('SENTINEL_MODEL_REPO','XcodeAddy/sentinel-grpo-qwen05'); "
64
+ "artifact_repo=os.environ.get('SENTINEL_ARTIFACT_REPO','XcodeAddy/sentinel-env-artifacts'); "
65
+ "job_id=os.environ.get('JOB_ID','manual'); "
66
+ "api.create_repo(model_repo, repo_type='model', exist_ok=True); "
67
+ f"api.upload_folder(folder_path='{args.output_dir}', repo_id=model_repo, repo_type='model'); "
68
+ "api.create_repo(artifact_repo, repo_type='dataset', exist_ok=True); "
69
+ "api.upload_folder(folder_path='outputs', repo_id=artifact_repo, repo_type='dataset', path_in_repo=f'job-{job_id}/outputs'); "
70
+ "print('Uploaded model adapter to', model_repo); "
71
+ "print('Uploaded outputs to', artifact_repo, 'under', f'job-{job_id}/outputs')"
72
+ )
73
  lines.extend(
74
  [
75
  "python -c \"from training.replay import record_trained_actions; "
 
83
  "cp outputs/eval_post.json outputs/evaluation_results.json",
84
  "python -m training.plots --pre outputs/eval_pre.json "
85
  "--post outputs/eval_post.json --out-dir outputs/charts",
86
+ f"python -c {shlex.quote(upload_code)}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ]
88
  )
89
  return shell_join(lines)
 
107
  parser.add_argument("--batch-size", type=int, default=2)
108
  parser.add_argument("--learning-rate", type=float, default=5e-6)
109
  parser.add_argument("--lora-rank", type=int, default=8)
110
+ parser.add_argument("--num-generations", type=int, default=2)
111
  parser.add_argument("--max-seq-length", type=int, default=1024)
112
  parser.add_argument("--output-dir", default="training/sentinel_qwen05_grpo")
113
  return parser.parse_args()
training/train.py CHANGED
@@ -174,6 +174,7 @@ def run_grpo(args) -> None:
174
  learning_rate=args.learning_rate,
175
  num_train_epochs=args.epochs,
176
  per_device_train_batch_size=args.batch_size,
 
177
  logging_steps=10,
178
  save_steps=50,
179
  max_prompt_length=args.max_seq_length,
@@ -210,6 +211,7 @@ def main() -> None:
210
  parser.add_argument("--learning-rate", type=float, default=5e-6)
211
  parser.add_argument("--max-seq-length", type=int, default=1024)
212
  parser.add_argument("--lora-rank", type=int, default=16)
 
213
  args = parser.parse_args()
214
 
215
  if args.dry_run:
 
174
  learning_rate=args.learning_rate,
175
  num_train_epochs=args.epochs,
176
  per_device_train_batch_size=args.batch_size,
177
+ num_generations=args.num_generations,
178
  logging_steps=10,
179
  save_steps=50,
180
  max_prompt_length=args.max_seq_length,
 
211
  parser.add_argument("--learning-rate", type=float, default=5e-6)
212
  parser.add_argument("--max-seq-length", type=int, default=1024)
213
  parser.add_argument("--lora-rank", type=int, default=16)
214
+ parser.add_argument("--num-generations", type=int, default=2)
215
  args = parser.parse_args()
216
 
217
  if args.dry_run:
ui/app/hooks/useSentinel.ts CHANGED
@@ -9,6 +9,8 @@ import type {
9
 
10
  /* ── helpers ──────────────────────────────────────────── */
11
 
 
 
12
  function bestSpec(obs: Observation | null): string {
13
  if (!obs) return "S0";
14
  return [...obs.available_specialists].sort(
@@ -92,12 +94,12 @@ export function useSentinel() {
92
 
93
  /* load evaluation data once */
94
  useEffect(() => {
95
- fetch(`${process.env.NEXT_PUBLIC_API_URL}/assets/evaluation_results.json`)
96
  .then((r) => r.json())
97
  .then(setEval)
98
  .catch(() => null);
99
 
100
- fetch(`${process.env.NEXT_PUBLIC_API_URL}/assets/trained_policy_replay.jsonl`)
101
  .then((r) => r.ok ? r.text() : "")
102
  .then((txt) => {
103
  const table = new Map<string, ReplayRow>();
@@ -151,7 +153,7 @@ export function useSentinel() {
151
  const payload = { task_type: t, seed: s };
152
  setLastReq({ method: "POST", path: "/reset", body: payload });
153
  try {
154
- const res = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/reset`, {
155
  method: "POST",
156
  headers: { "Content-Type": "application/json" },
157
  body: JSON.stringify(payload),
@@ -203,7 +205,7 @@ export function useSentinel() {
203
  };
204
  setLastReq({ method: "POST", path: `/step?session_id=${sid}`, body: payload });
205
  try {
206
- const res = await fetch(`${process.env.NEXT_PUBLIC_API_URL}/step?session_id=${encodeURIComponent(sid)}`, {
207
  method: "POST",
208
  headers: { "Content-Type": "application/json" },
209
  body: JSON.stringify(payload),
 
9
 
10
  /* ── helpers ──────────────────────────────────────────── */
11
 
12
+ const API_BASE = process.env.NEXT_PUBLIC_API_URL || "";
13
+
14
  function bestSpec(obs: Observation | null): string {
15
  if (!obs) return "S0";
16
  return [...obs.available_specialists].sort(
 
94
 
95
  /* load evaluation data once */
96
  useEffect(() => {
97
+ fetch(`${API_BASE}/assets/evaluation_results.json`)
98
  .then((r) => r.json())
99
  .then(setEval)
100
  .catch(() => null);
101
 
102
+ fetch(`${API_BASE}/assets/trained_policy_replay.jsonl`)
103
  .then((r) => r.ok ? r.text() : "")
104
  .then((txt) => {
105
  const table = new Map<string, ReplayRow>();
 
153
  const payload = { task_type: t, seed: s };
154
  setLastReq({ method: "POST", path: "/reset", body: payload });
155
  try {
156
+ const res = await fetch(`${API_BASE}/reset`, {
157
  method: "POST",
158
  headers: { "Content-Type": "application/json" },
159
  body: JSON.stringify(payload),
 
205
  };
206
  setLastReq({ method: "POST", path: `/step?session_id=${sid}`, body: payload });
207
  try {
208
+ const res = await fetch(`${API_BASE}/step?session_id=${encodeURIComponent(sid)}`, {
209
  method: "POST",
210
  headers: { "Content-Type": "application/json" },
211
  body: JSON.stringify(payload),