XcodeAddy commited on
Commit
cf2cf65
·
1 Parent(s): abef90f

Collab Notebook Added

Browse files
training/colab_notebook.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
training/launch_hf_job.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ import shlex
6
+ import sys
7
+ from textwrap import dedent
8
+
9
+ from huggingface_hub import run_job
10
+
11
+
12
+ DEFAULT_IMAGE = "pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel"
13
+ DEFAULT_REPO = "https://github.com/ADITYAGABA1322/sentinel-env"
14
+ DEFAULT_MODEL = "unsloth/Qwen2.5-0.5B-Instruct"
15
+
16
+
17
+ def shell_join(lines: list[str]) -> str:
18
+ return " && ".join(line.strip() for line in lines if line.strip())
19
+
20
+
21
+ def bootstrap_repo(repo_url: str) -> list[str]:
22
+ return [
23
+ "set -eux",
24
+ "command -v git || (apt-get update && apt-get install -y git)",
25
+ f"git clone {shlex.quote(repo_url)} sentinel-env",
26
+ "cd sentinel-env",
27
+ "python -m pip install --upgrade pip",
28
+ "pip install -r requirements.txt",
29
+ "pip install -r requirements-train.txt",
30
+ ]
31
+
32
+
33
+ def gpu_test_command() -> str:
34
+ return "python -c 'import torch; print(torch.cuda.get_device_name())'"
35
+
36
+
37
+ def train_command(args: argparse.Namespace) -> str:
38
+ lines = bootstrap_repo(args.repo_url)
39
+ lines.append(
40
+ " ".join(
41
+ [
42
+ "python training/train.py",
43
+ f"--episodes {args.episodes}",
44
+ f"--task {shlex.quote(args.task)}",
45
+ f"--seed {args.seed}",
46
+ f"--model {shlex.quote(args.model)}",
47
+ f"--epochs {args.epochs}",
48
+ f"--batch-size {args.batch_size}",
49
+ f"--learning-rate {args.learning_rate}",
50
+ f"--lora-rank {args.lora_rank}",
51
+ f"--max-seq-length {args.max_seq_length}",
52
+ f"--output-dir {shlex.quote(args.output_dir)}",
53
+ ]
54
+ )
55
+ )
56
+ if args.mode == "train-full":
57
+ lines.extend(
58
+ [
59
+ "python -c \"from training.replay import record_trained_actions; "
60
+ f"record_trained_actions(adapter_path='{args.output_dir}', "
61
+ f"base_model='{args.model}', tasks=['task1','task2','task3'], "
62
+ "seeds=range(30), out_path='outputs/trained_policy_replay.jsonl')\"",
63
+ "python training/evaluate.py --episodes 30 --task all "
64
+ "--policies random,heuristic,oracle_lite,trained "
65
+ "--replay outputs/trained_policy_replay.jsonl "
66
+ "--out outputs/eval_post.json --no-plot",
67
+ "cp outputs/eval_post.json outputs/evaluation_results.json",
68
+ "python -m training.plots --pre outputs/eval_pre.json "
69
+ "--post outputs/eval_post.json --out-dir outputs/charts",
70
+ "python - <<'PY'\n"
71
+ "import os\n"
72
+ "from huggingface_hub import HfApi\n"
73
+ "token = os.environ.get('HF_TOKEN')\n"
74
+ "api = HfApi(token=token)\n"
75
+ "model_repo = os.environ.get('SENTINEL_MODEL_REPO', 'XcodeAddy/sentinel-grpo-qwen05')\n"
76
+ "artifact_repo = os.environ.get('SENTINEL_ARTIFACT_REPO', 'XcodeAddy/sentinel-env-artifacts')\n"
77
+ "job_id = os.environ.get('JOB_ID', 'manual')\n"
78
+ "api.create_repo(model_repo, repo_type='model', exist_ok=True)\n"
79
+ f"api.upload_folder(folder_path='{args.output_dir}', repo_id=model_repo, repo_type='model')\n"
80
+ "api.create_repo(artifact_repo, repo_type='dataset', exist_ok=True)\n"
81
+ "api.upload_folder(folder_path='outputs', repo_id=artifact_repo, repo_type='dataset', path_in_repo=f'job-{job_id}/outputs')\n"
82
+ "print('Uploaded model adapter to', model_repo)\n"
83
+ "print('Uploaded outputs to', artifact_repo, 'under', f'job-{job_id}/outputs')\n"
84
+ "PY",
85
+ ]
86
+ )
87
+ return shell_join(lines)
88
+
89
+
90
+ def parse_args() -> argparse.Namespace:
91
+ parser = argparse.ArgumentParser(
92
+ description="Launch SENTINEL training on Hugging Face Jobs without shell quoting pain."
93
+ )
94
+ parser.add_argument("--mode", choices=["gpu-test", "train-smoke", "train-full"], default="gpu-test")
95
+ parser.add_argument("--namespace", default=os.environ.get("HF_NAMESPACE", "XcodeAddy"))
96
+ parser.add_argument("--flavor", default="a10g-small")
97
+ parser.add_argument("--timeout", default="2h")
98
+ parser.add_argument("--image", default=DEFAULT_IMAGE)
99
+ parser.add_argument("--repo-url", default=DEFAULT_REPO)
100
+ parser.add_argument("--model", default=DEFAULT_MODEL)
101
+ parser.add_argument("--episodes", type=int, default=50)
102
+ parser.add_argument("--task", choices=["task1", "task2", "task3", "all"], default="all")
103
+ parser.add_argument("--seed", type=int, default=0)
104
+ parser.add_argument("--epochs", type=int, default=1)
105
+ parser.add_argument("--batch-size", type=int, default=2)
106
+ parser.add_argument("--learning-rate", type=float, default=5e-6)
107
+ parser.add_argument("--lora-rank", type=int, default=8)
108
+ parser.add_argument("--max-seq-length", type=int, default=1024)
109
+ parser.add_argument("--output-dir", default="training/sentinel_qwen05_grpo")
110
+ return parser.parse_args()
111
+
112
+
113
+ def main() -> None:
114
+ args = parse_args()
115
+ token = os.environ.get("HF_TOKEN")
116
+ if not token:
117
+ raise SystemExit(
118
+ dedent(
119
+ """
120
+ HF_TOKEN is not set.
121
+
122
+ Run:
123
+ read -s HF_TOKEN
124
+ export HF_TOKEN
125
+ Then paste your Hugging Face write token.
126
+ """
127
+ ).strip()
128
+ )
129
+
130
+ command = gpu_test_command() if args.mode == "gpu-test" else train_command(args)
131
+ print("Launching HF Job:")
132
+ print(f" mode = {args.mode}")
133
+ print(f" namespace = {args.namespace}")
134
+ print(f" flavor = {args.flavor}")
135
+ print(f" timeout = {args.timeout}")
136
+ print(f" image = {args.image}")
137
+ print(" command = bash -lc", shlex.quote(command[:260] + ("..." if len(command) > 260 else "")))
138
+
139
+ job = run_job(
140
+ image=args.image,
141
+ command=["bash", "-lc", command],
142
+ flavor=args.flavor,
143
+ timeout=args.timeout,
144
+ namespace=args.namespace,
145
+ token=token,
146
+ secrets={"HF_TOKEN": token},
147
+ env={
148
+ "SENTINEL_MODEL_REPO": "XcodeAddy/sentinel-grpo-qwen05",
149
+ "SENTINEL_ARTIFACT_REPO": "XcodeAddy/sentinel-env-artifacts",
150
+ },
151
+ labels={"project": "sentinel", "mode": args.mode},
152
+ )
153
+ print("Job launched.")
154
+ print("URL:", job.url)
155
+ print("ID:", job.id)
156
+ print()
157
+ print("Follow logs with:")
158
+ print(f" .venv/bin/hf jobs logs -f {job.id} --namespace {args.namespace} --token \"$HF_TOKEN\"")
159
+
160
+
161
+ if __name__ == "__main__":
162
+ try:
163
+ main()
164
+ except KeyboardInterrupt:
165
+ sys.exit(130)
training/sentinel-env ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit 5427e15ed4573661c53b1d9cb77a02922c8502fc