Upload 137 files
Browse files- hugging/td_fuse/validate.py +1 -1
- hugging/td_lang/compiler.py +10 -10
hugging/td_fuse/validate.py
CHANGED
|
@@ -64,7 +64,7 @@ def validate_merged_model(
|
|
| 64 |
results["canary"] = {
|
| 65 |
"passed": passed_canaries,
|
| 66 |
"total": total_canaries,
|
| 67 |
-
"ok": passed_canaries >= cfg.canary_pass_threshold,
|
| 68 |
"details": canary_results,
|
| 69 |
}
|
| 70 |
|
|
|
|
| 64 |
results["canary"] = {
|
| 65 |
"passed": passed_canaries,
|
| 66 |
"total": total_canaries,
|
| 67 |
+
"ok": passed_canaries >= min(cfg.canary_pass_threshold, total_canaries),
|
| 68 |
"details": canary_results,
|
| 69 |
}
|
| 70 |
|
hugging/td_lang/compiler.py
CHANGED
|
@@ -1411,7 +1411,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 1411 |
self._emit("args=grpo_config,")
|
| 1412 |
self._emit("train_dataset=train_data,")
|
| 1413 |
self._emit("reward_funcs=reward_fn,")
|
| 1414 |
-
self._emit("
|
| 1415 |
self._emit("callbacks=[EarlyStopper()],")
|
| 1416 |
self._indent -= 1
|
| 1417 |
self._emit(")")
|
|
@@ -1477,7 +1477,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 1477 |
self._emit("trainer = SFTTrainer(")
|
| 1478 |
self._indent += 1
|
| 1479 |
self._emit("model=model,")
|
| 1480 |
-
self._emit("
|
| 1481 |
self._emit("args=training_args,")
|
| 1482 |
self._emit("train_dataset=train_data,")
|
| 1483 |
self._emit('dataset_text_field="text",')
|
|
@@ -1504,7 +1504,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 1504 |
self._emit("ref_model=None,")
|
| 1505 |
self._emit("beta=0.1,")
|
| 1506 |
self._emit("train_dataset=train_data,")
|
| 1507 |
-
self._emit("
|
| 1508 |
self._emit("args=training_args,")
|
| 1509 |
self._emit('loss_type="sigmoid",')
|
| 1510 |
self._indent -= 1
|
|
@@ -3727,7 +3727,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 3727 |
self._emit("gradient_checkpointing=True,")
|
| 3728 |
self._indent -= 1
|
| 3729 |
self._emit(")")
|
| 3730 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=level_data, args=training_args,
|
| 3731 |
self._emit("trainer.train()")
|
| 3732 |
self._emit("trainer.save_model(level_out)")
|
| 3733 |
self._emit("checkpoint = level_out # next level starts from this")
|
|
@@ -3879,7 +3879,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 3879 |
self._emit("training_args = TrainingArguments(output_dir=star_out, max_steps=32,")
|
| 3880 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 3881 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 3882 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args,
|
| 3883 |
self._emit("trainer.train()")
|
| 3884 |
self._emit("trainer.save_model(star_out)")
|
| 3885 |
self._emit("checkpoint = star_out")
|
|
@@ -4033,7 +4033,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 4033 |
self._emit(f"training_args = TrainingArguments(output_dir=bon_out, max_steps={cmd.steps},")
|
| 4034 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4035 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 4036 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args,
|
| 4037 |
self._emit("trainer.train()")
|
| 4038 |
self._emit("trainer.save_model(bon_out)")
|
| 4039 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = bon_out')
|
|
@@ -4216,7 +4216,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 4216 |
self._emit(f"training_args = TrainingArguments(output_dir=exploit_out, max_steps={cmd.steps},")
|
| 4217 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4218 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 4219 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args,
|
| 4220 |
self._emit("trainer.train()")
|
| 4221 |
self._emit("trainer.save_model(exploit_out)")
|
| 4222 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = exploit_out')
|
|
@@ -4555,7 +4555,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 4555 |
self._emit(f"training_args = TrainingArguments(output_dir=arena_out, max_steps={cmd.steps},")
|
| 4556 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4557 |
self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
|
| 4558 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args,
|
| 4559 |
self._emit("trainer.train()")
|
| 4560 |
self._emit("trainer.save_model(arena_out)")
|
| 4561 |
self._emit("checkpoint = arena_out # next round uses improved model")
|
|
@@ -5126,7 +5126,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 5126 |
self._emit(f"training_args = TrainingArguments(output_dir=ra_out, max_steps={cmd.steps},")
|
| 5127 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 5128 |
self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
|
| 5129 |
-
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args,
|
| 5130 |
self._emit("trainer.train()")
|
| 5131 |
self._emit("trainer.save_model(ra_out)")
|
| 5132 |
self._emit("checkpoint = ra_out")
|
|
@@ -5399,7 +5399,7 @@ DO NOT EDIT - regenerate from the .td file instead.
|
|
| 5399 |
self._emit("model=student_model,")
|
| 5400 |
self._emit("train_dataset=ds,")
|
| 5401 |
self._emit("args=training_args,")
|
| 5402 |
-
self._emit("
|
| 5403 |
self._indent -= 1
|
| 5404 |
self._emit(")")
|
| 5405 |
self._emit('print(f"[td_lang] Training student for {training_args.max_steps} steps...")')
|
|
|
|
| 1411 |
self._emit("args=grpo_config,")
|
| 1412 |
self._emit("train_dataset=train_data,")
|
| 1413 |
self._emit("reward_funcs=reward_fn,")
|
| 1414 |
+
self._emit("processing_class=tok,")
|
| 1415 |
self._emit("callbacks=[EarlyStopper()],")
|
| 1416 |
self._indent -= 1
|
| 1417 |
self._emit(")")
|
|
|
|
| 1477 |
self._emit("trainer = SFTTrainer(")
|
| 1478 |
self._indent += 1
|
| 1479 |
self._emit("model=model,")
|
| 1480 |
+
self._emit("processing_class=tok,")
|
| 1481 |
self._emit("args=training_args,")
|
| 1482 |
self._emit("train_dataset=train_data,")
|
| 1483 |
self._emit('dataset_text_field="text",')
|
|
|
|
| 1504 |
self._emit("ref_model=None,")
|
| 1505 |
self._emit("beta=0.1,")
|
| 1506 |
self._emit("train_dataset=train_data,")
|
| 1507 |
+
self._emit("processing_class=tok,")
|
| 1508 |
self._emit("args=training_args,")
|
| 1509 |
self._emit('loss_type="sigmoid",')
|
| 1510 |
self._indent -= 1
|
|
|
|
| 3727 |
self._emit("gradient_checkpointing=True,")
|
| 3728 |
self._indent -= 1
|
| 3729 |
self._emit(")")
|
| 3730 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=level_data, args=training_args, processing_class=tok)")
|
| 3731 |
self._emit("trainer.train()")
|
| 3732 |
self._emit("trainer.save_model(level_out)")
|
| 3733 |
self._emit("checkpoint = level_out # next level starts from this")
|
|
|
|
| 3879 |
self._emit("training_args = TrainingArguments(output_dir=star_out, max_steps=32,")
|
| 3880 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 3881 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 3882 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
|
| 3883 |
self._emit("trainer.train()")
|
| 3884 |
self._emit("trainer.save_model(star_out)")
|
| 3885 |
self._emit("checkpoint = star_out")
|
|
|
|
| 4033 |
self._emit(f"training_args = TrainingArguments(output_dir=bon_out, max_steps={cmd.steps},")
|
| 4034 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4035 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 4036 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
|
| 4037 |
self._emit("trainer.train()")
|
| 4038 |
self._emit("trainer.save_model(bon_out)")
|
| 4039 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = bon_out')
|
|
|
|
| 4216 |
self._emit(f"training_args = TrainingArguments(output_dir=exploit_out, max_steps={cmd.steps},")
|
| 4217 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4218 |
self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
|
| 4219 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
|
| 4220 |
self._emit("trainer.train()")
|
| 4221 |
self._emit("trainer.save_model(exploit_out)")
|
| 4222 |
self._emit(f'models["{cmd.target}"]["checkpoint"] = exploit_out')
|
|
|
|
| 4555 |
self._emit(f"training_args = TrainingArguments(output_dir=arena_out, max_steps={cmd.steps},")
|
| 4556 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 4557 |
self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
|
| 4558 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
|
| 4559 |
self._emit("trainer.train()")
|
| 4560 |
self._emit("trainer.save_model(arena_out)")
|
| 4561 |
self._emit("checkpoint = arena_out # next round uses improved model")
|
|
|
|
| 5126 |
self._emit(f"training_args = TrainingArguments(output_dir=ra_out, max_steps={cmd.steps},")
|
| 5127 |
self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
|
| 5128 |
self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
|
| 5129 |
+
self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
|
| 5130 |
self._emit("trainer.train()")
|
| 5131 |
self._emit("trainer.save_model(ra_out)")
|
| 5132 |
self._emit("checkpoint = ra_out")
|
|
|
|
| 5399 |
self._emit("model=student_model,")
|
| 5400 |
self._emit("train_dataset=ds,")
|
| 5401 |
self._emit("args=training_args,")
|
| 5402 |
+
self._emit("processing_class=student_tok,")
|
| 5403 |
self._indent -= 1
|
| 5404 |
self._emit(")")
|
| 5405 |
self._emit('print(f"[td_lang] Training student for {training_args.max_steps} steps...")')
|