explcre commited on
Commit
7ca9a68
·
verified ·
1 Parent(s): bfe646e

Upload exp_phase8_t3_rl_edit_tight_dapo_s1_20260506_083012/manifest.json with huggingface_hub

Browse files
exp_phase8_t3_rl_edit_tight_dapo_s1_20260506_083012/manifest.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": "train_mdlm_rl_t3_motif.py",
3
+ "algo": "MDLM-GRPO with motif-triggered chained reward",
4
+ "alpha_oracle": 0.0,
5
+ "beta_motif": 1.0,
6
+ "gamma_jsd": 0.0,
7
+ "delta_kd": 0.0,
8
+ "gamma_step": 0.99,
9
+ "lora_r": 16,
10
+ "rollouts_per_prompt": 4,
11
+ "num_mdlm_steps": 8,
12
+ "num_rl_steps": 200
13
+ }