Spaces:
Running
Running
Fix HF training deps and Space API fallback
Browse files- requirements-train.txt +6 -6
- training/launch_hf_job.py +18 -15
- training/train.py +2 -0
- ui/app/hooks/useSentinel.ts +6 -4
requirements-train.txt
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
|
| 2 |
-
trl<0.
|
| 3 |
-
transformers>=4.
|
| 4 |
-
datasets
|
| 5 |
-
accelerate
|
| 6 |
-
peft
|
| 7 |
-
bitsandbytes
|
| 8 |
matplotlib
|
| 9 |
seaborn
|
| 10 |
pandas
|
|
|
|
| 1 |
unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
|
| 2 |
+
trl>=0.18.2,<0.25,!=0.19.0
|
| 3 |
+
transformers>=4.56,<5
|
| 4 |
+
datasets>=3.0,<5
|
| 5 |
+
accelerate>=1.4
|
| 6 |
+
peft>=0.14
|
| 7 |
+
bitsandbytes>=0.45
|
| 8 |
matplotlib
|
| 9 |
seaborn
|
| 10 |
pandas
|
training/launch_hf_job.py
CHANGED
|
@@ -48,12 +48,28 @@ def train_command(args: argparse.Namespace) -> str:
|
|
| 48 |
f"--batch-size {args.batch_size}",
|
| 49 |
f"--learning-rate {args.learning_rate}",
|
| 50 |
f"--lora-rank {args.lora_rank}",
|
|
|
|
| 51 |
f"--max-seq-length {args.max_seq_length}",
|
| 52 |
f"--output-dir {shlex.quote(args.output_dir)}",
|
| 53 |
]
|
| 54 |
)
|
| 55 |
)
|
| 56 |
if args.mode == "train-full":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
lines.extend(
|
| 58 |
[
|
| 59 |
"python -c \"from training.replay import record_trained_actions; "
|
|
@@ -67,21 +83,7 @@ def train_command(args: argparse.Namespace) -> str:
|
|
| 67 |
"cp outputs/eval_post.json outputs/evaluation_results.json",
|
| 68 |
"python -m training.plots --pre outputs/eval_pre.json "
|
| 69 |
"--post outputs/eval_post.json --out-dir outputs/charts",
|
| 70 |
-
"python -
|
| 71 |
-
"import os\n"
|
| 72 |
-
"from huggingface_hub import HfApi\n"
|
| 73 |
-
"token = os.environ.get('HF_TOKEN')\n"
|
| 74 |
-
"api = HfApi(token=token)\n"
|
| 75 |
-
"model_repo = os.environ.get('SENTINEL_MODEL_REPO', 'XcodeAddy/sentinel-grpo-qwen05')\n"
|
| 76 |
-
"artifact_repo = os.environ.get('SENTINEL_ARTIFACT_REPO', 'XcodeAddy/sentinel-env-artifacts')\n"
|
| 77 |
-
"job_id = os.environ.get('JOB_ID', 'manual')\n"
|
| 78 |
-
"api.create_repo(model_repo, repo_type='model', exist_ok=True)\n"
|
| 79 |
-
f"api.upload_folder(folder_path='{args.output_dir}', repo_id=model_repo, repo_type='model')\n"
|
| 80 |
-
"api.create_repo(artifact_repo, repo_type='dataset', exist_ok=True)\n"
|
| 81 |
-
"api.upload_folder(folder_path='outputs', repo_id=artifact_repo, repo_type='dataset', path_in_repo=f'job-{job_id}/outputs')\n"
|
| 82 |
-
"print('Uploaded model adapter to', model_repo)\n"
|
| 83 |
-
"print('Uploaded outputs to', artifact_repo, 'under', f'job-{job_id}/outputs')\n"
|
| 84 |
-
"PY",
|
| 85 |
]
|
| 86 |
)
|
| 87 |
return shell_join(lines)
|
|
@@ -105,6 +107,7 @@ def parse_args() -> argparse.Namespace:
|
|
| 105 |
parser.add_argument("--batch-size", type=int, default=2)
|
| 106 |
parser.add_argument("--learning-rate", type=float, default=5e-6)
|
| 107 |
parser.add_argument("--lora-rank", type=int, default=8)
|
|
|
|
| 108 |
parser.add_argument("--max-seq-length", type=int, default=1024)
|
| 109 |
parser.add_argument("--output-dir", default="training/sentinel_qwen05_grpo")
|
| 110 |
return parser.parse_args()
|
|
|
|
| 48 |
f"--batch-size {args.batch_size}",
|
| 49 |
f"--learning-rate {args.learning_rate}",
|
| 50 |
f"--lora-rank {args.lora_rank}",
|
| 51 |
+
f"--num-generations {args.num_generations}",
|
| 52 |
f"--max-seq-length {args.max_seq_length}",
|
| 53 |
f"--output-dir {shlex.quote(args.output_dir)}",
|
| 54 |
]
|
| 55 |
)
|
| 56 |
)
|
| 57 |
if args.mode == "train-full":
|
| 58 |
+
upload_code = (
|
| 59 |
+
"import os; "
|
| 60 |
+
"from huggingface_hub import HfApi; "
|
| 61 |
+
"token=os.environ.get('HF_TOKEN'); "
|
| 62 |
+
"api=HfApi(token=token); "
|
| 63 |
+
"model_repo=os.environ.get('SENTINEL_MODEL_REPO','XcodeAddy/sentinel-grpo-qwen05'); "
|
| 64 |
+
"artifact_repo=os.environ.get('SENTINEL_ARTIFACT_REPO','XcodeAddy/sentinel-env-artifacts'); "
|
| 65 |
+
"job_id=os.environ.get('JOB_ID','manual'); "
|
| 66 |
+
"api.create_repo(model_repo, repo_type='model', exist_ok=True); "
|
| 67 |
+
f"api.upload_folder(folder_path='{args.output_dir}', repo_id=model_repo, repo_type='model'); "
|
| 68 |
+
"api.create_repo(artifact_repo, repo_type='dataset', exist_ok=True); "
|
| 69 |
+
"api.upload_folder(folder_path='outputs', repo_id=artifact_repo, repo_type='dataset', path_in_repo=f'job-{job_id}/outputs'); "
|
| 70 |
+
"print('Uploaded model adapter to', model_repo); "
|
| 71 |
+
"print('Uploaded outputs to', artifact_repo, 'under', f'job-{job_id}/outputs')"
|
| 72 |
+
)
|
| 73 |
lines.extend(
|
| 74 |
[
|
| 75 |
"python -c \"from training.replay import record_trained_actions; "
|
|
|
|
| 83 |
"cp outputs/eval_post.json outputs/evaluation_results.json",
|
| 84 |
"python -m training.plots --pre outputs/eval_pre.json "
|
| 85 |
"--post outputs/eval_post.json --out-dir outputs/charts",
|
| 86 |
+
f"python -c {shlex.quote(upload_code)}",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
]
|
| 88 |
)
|
| 89 |
return shell_join(lines)
|
|
|
|
| 107 |
parser.add_argument("--batch-size", type=int, default=2)
|
| 108 |
parser.add_argument("--learning-rate", type=float, default=5e-6)
|
| 109 |
parser.add_argument("--lora-rank", type=int, default=8)
|
| 110 |
+
parser.add_argument("--num-generations", type=int, default=2)
|
| 111 |
parser.add_argument("--max-seq-length", type=int, default=1024)
|
| 112 |
parser.add_argument("--output-dir", default="training/sentinel_qwen05_grpo")
|
| 113 |
return parser.parse_args()
|
training/train.py
CHANGED
|
@@ -174,6 +174,7 @@ def run_grpo(args) -> None:
|
|
| 174 |
learning_rate=args.learning_rate,
|
| 175 |
num_train_epochs=args.epochs,
|
| 176 |
per_device_train_batch_size=args.batch_size,
|
|
|
|
| 177 |
logging_steps=10,
|
| 178 |
save_steps=50,
|
| 179 |
max_prompt_length=args.max_seq_length,
|
|
@@ -210,6 +211,7 @@ def main() -> None:
|
|
| 210 |
parser.add_argument("--learning-rate", type=float, default=5e-6)
|
| 211 |
parser.add_argument("--max-seq-length", type=int, default=1024)
|
| 212 |
parser.add_argument("--lora-rank", type=int, default=16)
|
|
|
|
| 213 |
args = parser.parse_args()
|
| 214 |
|
| 215 |
if args.dry_run:
|
|
|
|
| 174 |
learning_rate=args.learning_rate,
|
| 175 |
num_train_epochs=args.epochs,
|
| 176 |
per_device_train_batch_size=args.batch_size,
|
| 177 |
+
num_generations=args.num_generations,
|
| 178 |
logging_steps=10,
|
| 179 |
save_steps=50,
|
| 180 |
max_prompt_length=args.max_seq_length,
|
|
|
|
| 211 |
parser.add_argument("--learning-rate", type=float, default=5e-6)
|
| 212 |
parser.add_argument("--max-seq-length", type=int, default=1024)
|
| 213 |
parser.add_argument("--lora-rank", type=int, default=16)
|
| 214 |
+
parser.add_argument("--num-generations", type=int, default=2)
|
| 215 |
args = parser.parse_args()
|
| 216 |
|
| 217 |
if args.dry_run:
|
ui/app/hooks/useSentinel.ts
CHANGED
|
@@ -9,6 +9,8 @@ import type {
|
|
| 9 |
|
| 10 |
/* ββ helpers ββββββββββββββββββββββββββββββββββββββββββββ */
|
| 11 |
|
|
|
|
|
|
|
| 12 |
function bestSpec(obs: Observation | null): string {
|
| 13 |
if (!obs) return "S0";
|
| 14 |
return [...obs.available_specialists].sort(
|
|
@@ -92,12 +94,12 @@ export function useSentinel() {
|
|
| 92 |
|
| 93 |
/* load evaluation data once */
|
| 94 |
useEffect(() => {
|
| 95 |
-
fetch(`${
|
| 96 |
.then((r) => r.json())
|
| 97 |
.then(setEval)
|
| 98 |
.catch(() => null);
|
| 99 |
|
| 100 |
-
fetch(`${
|
| 101 |
.then((r) => r.ok ? r.text() : "")
|
| 102 |
.then((txt) => {
|
| 103 |
const table = new Map<string, ReplayRow>();
|
|
@@ -151,7 +153,7 @@ export function useSentinel() {
|
|
| 151 |
const payload = { task_type: t, seed: s };
|
| 152 |
setLastReq({ method: "POST", path: "/reset", body: payload });
|
| 153 |
try {
|
| 154 |
-
const res = await fetch(`${
|
| 155 |
method: "POST",
|
| 156 |
headers: { "Content-Type": "application/json" },
|
| 157 |
body: JSON.stringify(payload),
|
|
@@ -203,7 +205,7 @@ export function useSentinel() {
|
|
| 203 |
};
|
| 204 |
setLastReq({ method: "POST", path: `/step?session_id=${sid}`, body: payload });
|
| 205 |
try {
|
| 206 |
-
const res = await fetch(`${
|
| 207 |
method: "POST",
|
| 208 |
headers: { "Content-Type": "application/json" },
|
| 209 |
body: JSON.stringify(payload),
|
|
|
|
| 9 |
|
| 10 |
/* ββ helpers ββββββββββββββββββββββββββββββββββββββββββββ */
|
| 11 |
|
| 12 |
+
const API_BASE = process.env.NEXT_PUBLIC_API_URL || "";
|
| 13 |
+
|
| 14 |
function bestSpec(obs: Observation | null): string {
|
| 15 |
if (!obs) return "S0";
|
| 16 |
return [...obs.available_specialists].sort(
|
|
|
|
| 94 |
|
| 95 |
/* load evaluation data once */
|
| 96 |
useEffect(() => {
|
| 97 |
+
fetch(`${API_BASE}/assets/evaluation_results.json`)
|
| 98 |
.then((r) => r.json())
|
| 99 |
.then(setEval)
|
| 100 |
.catch(() => null);
|
| 101 |
|
| 102 |
+
fetch(`${API_BASE}/assets/trained_policy_replay.jsonl`)
|
| 103 |
.then((r) => r.ok ? r.text() : "")
|
| 104 |
.then((txt) => {
|
| 105 |
const table = new Map<string, ReplayRow>();
|
|
|
|
| 153 |
const payload = { task_type: t, seed: s };
|
| 154 |
setLastReq({ method: "POST", path: "/reset", body: payload });
|
| 155 |
try {
|
| 156 |
+
const res = await fetch(`${API_BASE}/reset`, {
|
| 157 |
method: "POST",
|
| 158 |
headers: { "Content-Type": "application/json" },
|
| 159 |
body: JSON.stringify(payload),
|
|
|
|
| 205 |
};
|
| 206 |
setLastReq({ method: "POST", path: `/step?session_id=${sid}`, body: payload });
|
| 207 |
try {
|
| 208 |
+
const res = await fetch(`${API_BASE}/step?session_id=${encodeURIComponent(sid)}`, {
|
| 209 |
method: "POST",
|
| 210 |
headers: { "Content-Type": "application/json" },
|
| 211 |
body: JSON.stringify(payload),
|