Upload exp_phase8_through_mdlm_gspo_rich_20260501_123414/log.jsonl with huggingface_hub
Browse files
exp_phase8_through_mdlm_gspo_rich_20260501_123414/log.jsonl
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 1, "elapsed_s": 63.52334523200989, "wallclock_s_per_step": 63.523345947265625, "loss/total": 1.239776611328125e-05, "loss/per_rollout_mean": 1.2400560081005096e-05, "loss/per_rollout_std": 0.999969016007374, "reward/total_mean": -0.04760657464869405, "reward/total_std": 0.058985706160465196, "reward/total_min": -0.12572324872016905, "reward/total_max": 0.05238215403236916, "reward/argmax_match": 0.0, "reward/oracle_target_sigmoid": 1.6807002421751143e-09, "reward/oracle_offtarget_sigmoid": 0.49202191829681396, "reward/motif_hit_frac_topK": 0.25, "reward/r_argmax_term": 0.0, "reward/r_target_term": 8.403501210875571e-10, "reward/r_motif_term": 0.1, "reward/r_offtarget_term": -0.14760657548904418, "advantage/mean": 8.102506399154663e-08, "advantage/std": 0.9999529671815963, "advantage/abs_mean": 0.8042738074436784, "ratio/mean": 1.0, "ratio/std": 3.0517578125e-05, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": 0.0, "kl/abs_mean": 9.5367431640625e-07, "reward/cell_Ex": -0.04760657464869405}
|
| 2 |
+
{"step": 2, "elapsed_s": 112.03887796401978, "wallclock_s_per_step": 56.01943922042847, "loss/total": -0.029226824641227722, "loss/per_rollout_mean": -0.029226835817098618, "loss/per_rollout_std": 0.9388866324455706, "reward/total_mean": -0.06383666344202651, "reward/total_std": 0.044283511884477225, "reward/total_min": -0.11953117847442626, "reward/total_max": 0.0024095058441162137, "reward/argmax_match": 0.0, "reward/oracle_target_sigmoid": 0.0002118065630725141, "reward/oracle_offtarget_sigmoid": 0.3381418890785426, "reward/motif_hit_frac_topK": 0.09375, "reward/r_argmax_term": 0.0, "reward/r_target_term": 0.00010590328153625705, "reward/r_motif_term": 0.037500000000000006, "reward/r_offtarget_term": -0.10144256672356278, "advantage/mean": -8.754432201385498e-08, "advantage/std": 0.9999748563442401, "advantage/abs_mean": 0.7882378259673715, "ratio/mean": 0.9705291390419006, "ratio/std": 0.06939278513265741, "ratio/clip_frac_low": 0.125, "ratio/clip_frac_high": 0.0, "kl/mean": -0.0020444393157958984, "kl/abs_mean": 0.0030171871185302734, "reward/cell_Ex": -0.06383666344202651}
|
| 3 |
+
{"step": 3, "elapsed_s": 161.13213658332825, "wallclock_s_per_step": 53.71071227391561, "loss/total": 0.027997612953186035, "loss/per_rollout_mean": 0.02799762273207307, "loss/per_rollout_std": 1.038729253939765, "reward/total_mean": -0.06908574184103598, "reward/total_std": 0.07553512255758027, "reward/total_min": -0.14694018363952635, "reward/total_max": 0.05025631336611696, "reward/argmax_match": 0.0, "reward/oracle_target_sigmoid": 0.00010634361433403683, "reward/oracle_offtarget_sigmoid": 0.4596297121606767, "reward/motif_hit_frac_topK": 0.171875, "reward/r_argmax_term": 0.0, "reward/r_target_term": 5.3171807167018414e-05, "reward/r_motif_term": 0.06875, "reward/r_offtarget_term": -0.137888913648203, "advantage/mean": -2.193264663219452e-07, "advantage/std": 0.99992903661654, "advantage/abs_mean": 0.7969256457872689, "ratio/mean": 1.0284800231456757, "ratio/std": 0.058280375830392785, "ratio/clip_frac_low": 0.0, "ratio/clip_frac_high": 0.0, "kl/mean": 0.0016582012176513672, "kl/abs_mean": 0.0025594234466552734, "reward/cell_Ex": -0.06908574184103598}
|
| 4 |
+
{"step": 4, "elapsed_s": 210.11376333236694, "wallclock_s_per_step": 52.528440952301025, "loss/total": -0.06153355911374092, "loss/per_rollout_mean": -0.061533560045063496, "loss/per_rollout_std": 1.064852041859415, "reward/total_mean": 0.4117187967841951, "reward/total_std": 0.47762255479895765, "reward/total_min": -0.04952777322111045, "reward/total_max": 1.0505658319152418, "reward/argmax_match": 0.375, "reward/oracle_target_sigmoid": 0.054907934229886976, "reward/oracle_offtarget_sigmoid": 0.15661723443582787, "reward/motif_hit_frac_topK": 0.140625, "reward/r_argmax_term": 0.375, "reward/r_target_term": 0.027453967114943488, "reward/r_motif_term": 0.05625000000000001, "reward/r_offtarget_term": -0.04698517033074836, "advantage/mean": 3.67872416973114e-08, "advantage/std": 0.9999936606411358, "advantage/abs_mean": 0.8320146133191884, "ratio/mean": 0.9814127758145332, "ratio/std": 0.1600353887731184, "ratio/clip_frac_low": 0.125, "ratio/clip_frac_high": 0.125, "kl/mean": -0.002094745635986328, "kl/abs_mean": 0.008144855499267578, "reward/cell_Ex": 0.4117187967841951}
|