XcodeAddy commited on
Commit
db820a9
·
1 Parent(s): ed29027

Fix HF Jobs GRPO runtime stack

Browse files
README.md CHANGED
@@ -79,9 +79,14 @@ Deployment contract: run one server worker for the submitted Space. Active `Sent
79
  ## Live Submission Targets
80
 
81
  - GitHub: `https://github.com/ADITYAGABA1322/sentinel-env`
82
- - Hugging Face Space: `https://xcodeaddy-sentinel-env.hf.space`
 
83
  - OpenEnv base URL: `https://xcodeaddy-sentinel-env.hf.space`
84
 
 
 
 
 
85
  ## Specialist Behaviors
86
 
87
  | Public Slot | Hidden Behavior |
 
79
  ## Live Submission Targets
80
 
81
  - GitHub: `https://github.com/ADITYAGABA1322/sentinel-env`
82
+ - Hugging Face Space repo/settings: `https://huggingface.co/spaces/XcodeAddy/sentinel-env`
83
+ - Hugging Face live app: `https://xcodeaddy-sentinel-env.hf.space`
84
  - OpenEnv base URL: `https://xcodeaddy-sentinel-env.hf.space`
85
 
86
+ Local note: run uvicorn with `--host 0.0.0.0`, but open the app in a browser at
87
+ `http://127.0.0.1:7860/` or `http://localhost:7860/`. `0.0.0.0` is a bind
88
+ address, not the page URL to demo.
89
+
90
  ## Specialist Behaviors
91
 
92
  | Public Slot | Hidden Behavior |
docs/TRAINING_RUNBOOK.md CHANGED
@@ -148,6 +148,29 @@ Use a Hugging Face token in Colab for:
148
 
149
  The Space itself does not need GPU to run the replay demo.
150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  ## Hugging Face Credits
152
 
153
  Best use:
@@ -155,10 +178,36 @@ Best use:
155
  - keep the Space on CPU for normal judging,
156
  - optionally upgrade the Space to T4 only during the final live demo if the UI
157
  needs extra responsiveness,
158
- - avoid doing full training inside the Space.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- Training belongs in Colab. The Space is for serving the environment and replay
161
- demo.
162
 
163
  ## Success Criteria
164
 
 
148
 
149
  The Space itself does not need GPU to run the replay demo.
150
 
151
+ ## Hugging Face App URLs
152
+
153
+ Use these two Hugging Face URLs for different jobs:
154
+
155
+ ```text
156
+ https://huggingface.co/spaces/XcodeAddy/sentinel-env
157
+ ```
158
+
159
+ This is the Space repository/settings page. Use it to inspect files, Settings,
160
+ hardware, build logs, variables, secrets, and commits. It is not the iframe app
161
+ URL you demo to judges.
162
+
163
+ ```text
164
+ https://xcodeaddy-sentinel-env.hf.space/
165
+ ```
166
+
167
+ This is the real live app URL. Use this for the dashboard, API smoke tests, and
168
+ OpenEnv base URL.
169
+
170
+ When running locally, start uvicorn with `--host 0.0.0.0`, but open the browser
171
+ at `http://127.0.0.1:7860/` or `http://localhost:7860/`. Do not browse to
172
+ `http://0.0.0.0:7860/`; `0.0.0.0` is only a bind address.
173
+
174
  ## Hugging Face Credits
175
 
176
  Best use:
 
178
  - keep the Space on CPU for normal judging,
179
  - optionally upgrade the Space to T4 only during the final live demo if the UI
180
  needs extra responsiveness,
181
+ - avoid doing full training inside the Space,
182
+ - use Hugging Face Jobs or Colab for the actual GRPO run.
183
+
184
+ The Space is for serving the environment and replay demo. Training belongs in
185
+ Colab or in a Hugging Face GPU Job.
186
+
187
+ HF Jobs smoke path:
188
+
189
+ ```bash
190
+ .venv/bin/python training/launch_hf_job.py \
191
+ --mode import-smoke \
192
+ --timeout 45m
193
+
194
+ .venv/bin/python training/launch_hf_job.py \
195
+ --mode train-smoke \
196
+ --episodes 50 \
197
+ --timeout 2h
198
+ ```
199
+
200
+ If `import-smoke` passes, run the full job:
201
+
202
+ ```bash
203
+ .venv/bin/python training/launch_hf_job.py \
204
+ --mode train-full \
205
+ --episodes 200 \
206
+ --timeout 4h
207
+ ```
208
 
209
+ The launcher uses `pytorch/pytorch:2.11.0-cuda12.8-cudnn9-devel` because the
210
+ current Unsloth stack pulls `torchao`, which expects torch `>=2.11`.
211
 
212
  ## Success Criteria
213
 
pyproject.toml CHANGED
@@ -18,10 +18,12 @@ server = "server.app:main"
18
  [project.optional-dependencies]
19
  dev = ["pytest>=8.0.0"]
20
  training = [
21
- "trl",
22
- "transformers",
23
- "datasets",
24
- "accelerate",
 
 
25
  "unsloth",
26
  ]
27
 
 
18
  [project.optional-dependencies]
19
  dev = ["pytest>=8.0.0"]
20
  training = [
21
+ "trl==0.24.0",
22
+ "transformers==4.57.6",
23
+ "datasets==4.3.0",
24
+ "accelerate==1.13.0",
25
+ "peft==0.19.1",
26
+ "bitsandbytes==0.49.2",
27
  "unsloth",
28
  ]
29
 
requirements-train.txt CHANGED
@@ -1,11 +1,11 @@
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
- trl>=0.18.2,<0.25,!=0.19.0
3
- transformers>=4.56,<5
4
- datasets>=3.0,<5
5
- accelerate>=1.4
6
- peft>=0.14
7
- bitsandbytes>=0.45
8
- matplotlib
9
- seaborn
10
- pandas
11
- huggingface_hub
 
1
  unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
2
+ trl==0.24.0
3
+ transformers==4.57.6
4
+ datasets==4.3.0
5
+ accelerate==1.13.0
6
+ peft==0.19.1
7
+ bitsandbytes==0.49.2
8
+ matplotlib==3.10.9
9
+ seaborn==0.13.2
10
+ pandas==3.0.2
11
+ huggingface_hub>=0.36,<1
training/colab_notebook.ipynb CHANGED
@@ -74,7 +74,7 @@
74
  " \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\",\n",
75
  " ])\n",
76
  " subprocess.check_call([\"pip\", \"install\", \"-q\", \"--no-deps\",\n",
77
- " \"trl<0.13\", \"transformers>=4.46\", \"datasets\", \"accelerate\", \"peft\", \"bitsandbytes\",\n",
78
  " ])\n",
79
  "except subprocess.CalledProcessError as exc:\n",
80
  " print(f\"Training extras failed to install ({exc}); continuing with heuristic-fallback path.\")\n",
 
74
  " \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\",\n",
75
  " ])\n",
76
  " subprocess.check_call([\"pip\", \"install\", \"-q\", \"--no-deps\",\n",
77
+ " \"trl==0.24.0\", \"transformers==4.57.6\", \"datasets==4.3.0\", \"accelerate==1.13.0\", \"peft==0.19.1\", \"bitsandbytes==0.49.2\",\n",
78
  " ])\n",
79
  "except subprocess.CalledProcessError as exc:\n",
80
  " print(f\"Training extras failed to install ({exc}); continuing with heuristic-fallback path.\")\n",
training/launch_hf_job.py CHANGED
@@ -9,7 +9,9 @@ from textwrap import dedent
9
  from huggingface_hub import run_job
10
 
11
 
12
- DEFAULT_IMAGE = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
 
 
13
  DEFAULT_REPO = "https://github.com/ADITYAGABA1322/sentinel-env"
14
  DEFAULT_MODEL = "unsloth/Qwen2.5-0.5B-Instruct"
15
 
@@ -27,6 +29,14 @@ def bootstrap_repo(repo_url: str) -> list[str]:
27
  "python -m pip install --upgrade pip",
28
  "pip install -r requirements.txt",
29
  "pip install -r requirements-train.txt",
 
 
 
 
 
 
 
 
30
  ]
31
 
32
 
@@ -34,8 +44,11 @@ def gpu_test_command() -> str:
34
  return "python -c 'import torch; print(torch.cuda.get_device_name())'"
35
 
36
 
37
- def train_command(args: argparse.Namespace) -> str:
38
  lines = bootstrap_repo(args.repo_url)
 
 
 
39
  lines.append(
40
  " ".join(
41
  [
@@ -93,7 +106,11 @@ def parse_args() -> argparse.Namespace:
93
  parser = argparse.ArgumentParser(
94
  description="Launch SENTINEL training on Hugging Face Jobs without shell quoting pain."
95
  )
96
- parser.add_argument("--mode", choices=["gpu-test", "train-smoke", "train-full"], default="gpu-test")
 
 
 
 
97
  parser.add_argument("--namespace", default=os.environ.get("HF_NAMESPACE", "XcodeAddy"))
98
  parser.add_argument("--flavor", default="a10g-small")
99
  parser.add_argument("--timeout", default="2h")
@@ -130,7 +147,12 @@ def main() -> None:
130
  ).strip()
131
  )
132
 
133
- command = gpu_test_command() if args.mode == "gpu-test" else train_command(args)
 
 
 
 
 
134
  print("Launching HF Job:")
135
  print(f" mode = {args.mode}")
136
  print(f" namespace = {args.namespace}")
 
9
  from huggingface_hub import run_job
10
 
11
 
12
+ # Current Unsloth pulls torchao, which expects torch >= 2.11. Keep the Jobs
13
+ # image aligned so GRPO imports fail fast only for real code issues.
14
+ DEFAULT_IMAGE = "pytorch/pytorch:2.11.0-cuda12.8-cudnn9-devel"
15
  DEFAULT_REPO = "https://github.com/ADITYAGABA1322/sentinel-env"
16
  DEFAULT_MODEL = "unsloth/Qwen2.5-0.5B-Instruct"
17
 
 
29
  "python -m pip install --upgrade pip",
30
  "pip install -r requirements.txt",
31
  "pip install -r requirements-train.txt",
32
+ (
33
+ "python -c \"import torch; "
34
+ "print('torch', torch.__version__); "
35
+ "print('gpu', torch.cuda.get_device_name() if torch.cuda.is_available() else 'none'); "
36
+ "from transformers import PreTrainedModel; "
37
+ "from trl import GRPOConfig, GRPOTrainer; "
38
+ "print('training imports ok')\""
39
+ ),
40
  ]
41
 
42
 
 
44
  return "python -c 'import torch; print(torch.cuda.get_device_name())'"
45
 
46
 
47
+ def train_command(args: argparse.Namespace, train: bool = True) -> str:
48
  lines = bootstrap_repo(args.repo_url)
49
+ if not train:
50
+ return shell_join(lines)
51
+
52
  lines.append(
53
  " ".join(
54
  [
 
106
  parser = argparse.ArgumentParser(
107
  description="Launch SENTINEL training on Hugging Face Jobs without shell quoting pain."
108
  )
109
+ parser.add_argument(
110
+ "--mode",
111
+ choices=["gpu-test", "import-smoke", "train-smoke", "train-full"],
112
+ default="gpu-test",
113
+ )
114
  parser.add_argument("--namespace", default=os.environ.get("HF_NAMESPACE", "XcodeAddy"))
115
  parser.add_argument("--flavor", default="a10g-small")
116
  parser.add_argument("--timeout", default="2h")
 
147
  ).strip()
148
  )
149
 
150
+ if args.mode == "gpu-test":
151
+ command = gpu_test_command()
152
+ elif args.mode == "import-smoke":
153
+ command = train_command(args, train=False)
154
+ else:
155
+ command = train_command(args)
156
  print("Launching HF Job:")
157
  print(f" mode = {args.mode}")
158
  print(f" namespace = {args.namespace}")