td-builder commited on
Commit
d30aa8a
·
verified ·
1 Parent(s): c524109

Upload 137 files

Browse files
hugging/td_fuse/validate.py CHANGED
@@ -64,7 +64,7 @@ def validate_merged_model(
64
  results["canary"] = {
65
  "passed": passed_canaries,
66
  "total": total_canaries,
67
- "ok": passed_canaries >= cfg.canary_pass_threshold,
68
  "details": canary_results,
69
  }
70
 
 
64
  results["canary"] = {
65
  "passed": passed_canaries,
66
  "total": total_canaries,
67
+ "ok": passed_canaries >= min(cfg.canary_pass_threshold, total_canaries),
68
  "details": canary_results,
69
  }
70
 
hugging/td_lang/compiler.py CHANGED
@@ -1411,7 +1411,7 @@ DO NOT EDIT - regenerate from the .td file instead.
1411
  self._emit("args=grpo_config,")
1412
  self._emit("train_dataset=train_data,")
1413
  self._emit("reward_funcs=reward_fn,")
1414
- self._emit("tokenizer=tok,")
1415
  self._emit("callbacks=[EarlyStopper()],")
1416
  self._indent -= 1
1417
  self._emit(")")
@@ -1477,7 +1477,7 @@ DO NOT EDIT - regenerate from the .td file instead.
1477
  self._emit("trainer = SFTTrainer(")
1478
  self._indent += 1
1479
  self._emit("model=model,")
1480
- self._emit("tokenizer=tok,")
1481
  self._emit("args=training_args,")
1482
  self._emit("train_dataset=train_data,")
1483
  self._emit('dataset_text_field="text",')
@@ -1504,7 +1504,7 @@ DO NOT EDIT - regenerate from the .td file instead.
1504
  self._emit("ref_model=None,")
1505
  self._emit("beta=0.1,")
1506
  self._emit("train_dataset=train_data,")
1507
- self._emit("tokenizer=tok,")
1508
  self._emit("args=training_args,")
1509
  self._emit('loss_type="sigmoid",')
1510
  self._indent -= 1
@@ -3727,7 +3727,7 @@ DO NOT EDIT - regenerate from the .td file instead.
3727
  self._emit("gradient_checkpointing=True,")
3728
  self._indent -= 1
3729
  self._emit(")")
3730
- self._emit("trainer = SFTTrainer(model=model, train_dataset=level_data, args=training_args, tokenizer=tok)")
3731
  self._emit("trainer.train()")
3732
  self._emit("trainer.save_model(level_out)")
3733
  self._emit("checkpoint = level_out # next level starts from this")
@@ -3879,7 +3879,7 @@ DO NOT EDIT - regenerate from the .td file instead.
3879
  self._emit("training_args = TrainingArguments(output_dir=star_out, max_steps=32,")
3880
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
3881
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
3882
- self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, tokenizer=tok)")
3883
  self._emit("trainer.train()")
3884
  self._emit("trainer.save_model(star_out)")
3885
  self._emit("checkpoint = star_out")
@@ -4033,7 +4033,7 @@ DO NOT EDIT - regenerate from the .td file instead.
4033
  self._emit(f"training_args = TrainingArguments(output_dir=bon_out, max_steps={cmd.steps},")
4034
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4035
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
4036
- self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, tokenizer=tok)")
4037
  self._emit("trainer.train()")
4038
  self._emit("trainer.save_model(bon_out)")
4039
  self._emit(f'models["{cmd.target}"]["checkpoint"] = bon_out')
@@ -4216,7 +4216,7 @@ DO NOT EDIT - regenerate from the .td file instead.
4216
  self._emit(f"training_args = TrainingArguments(output_dir=exploit_out, max_steps={cmd.steps},")
4217
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4218
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
4219
- self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, tokenizer=tok)")
4220
  self._emit("trainer.train()")
4221
  self._emit("trainer.save_model(exploit_out)")
4222
  self._emit(f'models["{cmd.target}"]["checkpoint"] = exploit_out')
@@ -4555,7 +4555,7 @@ DO NOT EDIT - regenerate from the .td file instead.
4555
  self._emit(f"training_args = TrainingArguments(output_dir=arena_out, max_steps={cmd.steps},")
4556
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4557
  self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
4558
- self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, tokenizer=tok)")
4559
  self._emit("trainer.train()")
4560
  self._emit("trainer.save_model(arena_out)")
4561
  self._emit("checkpoint = arena_out # next round uses improved model")
@@ -5126,7 +5126,7 @@ DO NOT EDIT - regenerate from the .td file instead.
5126
  self._emit(f"training_args = TrainingArguments(output_dir=ra_out, max_steps={cmd.steps},")
5127
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
5128
  self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
5129
- self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, tokenizer=tok)")
5130
  self._emit("trainer.train()")
5131
  self._emit("trainer.save_model(ra_out)")
5132
  self._emit("checkpoint = ra_out")
@@ -5399,7 +5399,7 @@ DO NOT EDIT - regenerate from the .td file instead.
5399
  self._emit("model=student_model,")
5400
  self._emit("train_dataset=ds,")
5401
  self._emit("args=training_args,")
5402
- self._emit("tokenizer=student_tok,")
5403
  self._indent -= 1
5404
  self._emit(")")
5405
  self._emit('print(f"[td_lang] Training student for {training_args.max_steps} steps...")')
 
1411
  self._emit("args=grpo_config,")
1412
  self._emit("train_dataset=train_data,")
1413
  self._emit("reward_funcs=reward_fn,")
1414
+ self._emit("processing_class=tok,")
1415
  self._emit("callbacks=[EarlyStopper()],")
1416
  self._indent -= 1
1417
  self._emit(")")
 
1477
  self._emit("trainer = SFTTrainer(")
1478
  self._indent += 1
1479
  self._emit("model=model,")
1480
+ self._emit("processing_class=tok,")
1481
  self._emit("args=training_args,")
1482
  self._emit("train_dataset=train_data,")
1483
  self._emit('dataset_text_field="text",')
 
1504
  self._emit("ref_model=None,")
1505
  self._emit("beta=0.1,")
1506
  self._emit("train_dataset=train_data,")
1507
+ self._emit("processing_class=tok,")
1508
  self._emit("args=training_args,")
1509
  self._emit('loss_type="sigmoid",')
1510
  self._indent -= 1
 
3727
  self._emit("gradient_checkpointing=True,")
3728
  self._indent -= 1
3729
  self._emit(")")
3730
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=level_data, args=training_args, processing_class=tok)")
3731
  self._emit("trainer.train()")
3732
  self._emit("trainer.save_model(level_out)")
3733
  self._emit("checkpoint = level_out # next level starts from this")
 
3879
  self._emit("training_args = TrainingArguments(output_dir=star_out, max_steps=32,")
3880
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
3881
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
3882
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
3883
  self._emit("trainer.train()")
3884
  self._emit("trainer.save_model(star_out)")
3885
  self._emit("checkpoint = star_out")
 
4033
  self._emit(f"training_args = TrainingArguments(output_dir=bon_out, max_steps={cmd.steps},")
4034
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4035
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
4036
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
4037
  self._emit("trainer.train()")
4038
  self._emit("trainer.save_model(bon_out)")
4039
  self._emit(f'models["{cmd.target}"]["checkpoint"] = bon_out')
 
4216
  self._emit(f"training_args = TrainingArguments(output_dir=exploit_out, max_steps={cmd.steps},")
4217
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4218
  self._emit(" learning_rate=5e-5, logging_steps=8, bf16=True, gradient_checkpointing=True)")
4219
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
4220
  self._emit("trainer.train()")
4221
  self._emit("trainer.save_model(exploit_out)")
4222
  self._emit(f'models["{cmd.target}"]["checkpoint"] = exploit_out')
 
4555
  self._emit(f"training_args = TrainingArguments(output_dir=arena_out, max_steps={cmd.steps},")
4556
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
4557
  self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
4558
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
4559
  self._emit("trainer.train()")
4560
  self._emit("trainer.save_model(arena_out)")
4561
  self._emit("checkpoint = arena_out # next round uses improved model")
 
5126
  self._emit(f"training_args = TrainingArguments(output_dir=ra_out, max_steps={cmd.steps},")
5127
  self._emit(" per_device_train_batch_size=1, gradient_accumulation_steps=4,")
5128
  self._emit(" learning_rate=5e-5, logging_steps=16, bf16=True, gradient_checkpointing=True)")
5129
+ self._emit("trainer = SFTTrainer(model=model, train_dataset=ds, args=training_args, processing_class=tok)")
5130
  self._emit("trainer.train()")
5131
  self._emit("trainer.save_model(ra_out)")
5132
  self._emit("checkpoint = ra_out")
 
5399
  self._emit("model=student_model,")
5400
  self._emit("train_dataset=ds,")
5401
  self._emit("args=training_args,")
5402
+ self._emit("processing_class=student_tok,")
5403
  self._indent -= 1
5404
  self._emit(")")
5405
  self._emit('print(f"[td_lang] Training student for {training_args.max_steps} steps...")')