Upload exp_phase8_bridge_recipeSOTA_2000step_060246/log.jsonl with huggingface_hub
Browse files
exp_phase8_bridge_recipeSOTA_2000step_060246/log.jsonl
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "loss": 2.1940178871154785, "loss_mdlm": 1.2641350030899048, "loss_lm": 1.8597657680511475, "loss_ct": 0.0, "lr": 0.0, "gnorm": 3.7878217697143555, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.0, "elapsed_s": 3.500497579574585}
|
| 2 |
+
{"step": 100, "loss": 1.9080125093460083, "loss_mdlm": 1.3239084482192993, "loss_lm": 1.168208122253418, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 3.446755886077881, "bridge_gate_avg": 0.9999873638153076, "bridge_out_proj_avg": 0.0, "elapsed_s": 39.796420097351074}
|
| 3 |
+
{"step": 200, "loss": 2.072253704071045, "loss_mdlm": 1.3281831741333008, "loss_lm": 1.4881409406661987, "loss_ct": 0.0, "lr": 5e-05, "gnorm": 4.12031888961792, "bridge_gate_avg": 0.9999498128890991, "bridge_out_proj_avg": 0.0, "elapsed_s": 73.87015223503113}
|
| 4 |
+
{"step": 300, "loss": 1.6183552742004395, "loss_mdlm": 1.286725640296936, "loss_lm": 0.6632593274116516, "loss_ct": 0.0, "lr": 4.962019382530521e-05, "gnorm": 2.6898059844970703, "bridge_gate_avg": 0.9999021291732788, "bridge_out_proj_avg": 0.0, "elapsed_s": 107.52856540679932}
|
| 5 |
+
{"step": 400, "loss": 1.7649610042572021, "loss_mdlm": 1.313080072402954, "loss_lm": 0.9037617444992065, "loss_ct": 0.0, "lr": 4.849231551964771e-05, "gnorm": 3.3303797245025635, "bridge_gate_avg": 0.9998544454574585, "bridge_out_proj_avg": 0.0, "elapsed_s": 140.82858324050903}
|
| 6 |
+
{"step": 500, "loss": 1.7265830039978027, "loss_mdlm": 1.3373931646347046, "loss_lm": 0.7783797383308411, "loss_ct": 0.0, "lr": 4.665063509461097e-05, "gnorm": 3.316387176513672, "bridge_gate_avg": 0.9998067617416382, "bridge_out_proj_avg": 0.0, "elapsed_s": 172.91190099716187}
|
| 7 |
+
{"step": 600, "loss": 1.7463237047195435, "loss_mdlm": 1.3941558599472046, "loss_lm": 0.7043356895446777, "loss_ct": 0.0, "lr": 4.415111107797445e-05, "gnorm": 2.328320026397705, "bridge_gate_avg": 0.9997603297233582, "bridge_out_proj_avg": 0.0, "elapsed_s": 204.65343356132507}
|
| 8 |
+
{"step": 700, "loss": 1.6398268938064575, "loss_mdlm": 1.2805469036102295, "loss_lm": 0.718559980392456, "loss_ct": 0.0, "lr": 4.1069690242163484e-05, "gnorm": 2.5977656841278076, "bridge_gate_avg": 0.9997186064720154, "bridge_out_proj_avg": 0.0, "elapsed_s": 236.53532028198242}
|
| 9 |
+
{"step": 800, "loss": 1.8185946941375732, "loss_mdlm": 1.3217191696166992, "loss_lm": 0.9937509298324585, "loss_ct": 0.0, "lr": 3.7500000000000003e-05, "gnorm": 3.3088250160217285, "bridge_gate_avg": 0.999678909778595, "bridge_out_proj_avg": 0.0, "elapsed_s": 268.55660367012024}
|
| 10 |
+
{"step": 900, "loss": 1.658094882965088, "loss_mdlm": 1.303091049194336, "loss_lm": 0.7100077867507935, "loss_ct": 0.0, "lr": 3.355050358314172e-05, "gnorm": 2.7152211666107178, "bridge_gate_avg": 0.9996431469917297, "bridge_out_proj_avg": 0.0, "elapsed_s": 300.8158440589905}
|
| 11 |
+
{"step": 1000, "loss": 1.6346511840820312, "loss_mdlm": 1.3079025745391846, "loss_lm": 0.6534972786903381, "loss_ct": 0.0, "lr": 2.9341204441673266e-05, "gnorm": 2.118523120880127, "bridge_gate_avg": 0.9996122717857361, "bridge_out_proj_avg": 0.0, "elapsed_s": 333.2249026298523}
|
| 12 |
+
{"step": 1100, "loss": 1.4774835109710693, "loss_mdlm": 1.3124980926513672, "loss_lm": 0.32997095584869385, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 1.6270368099212646, "bridge_gate_avg": 0.999584972858429, "bridge_out_proj_avg": 0.0, "elapsed_s": 365.17956709861755}
|
| 13 |
+
{"step": 1200, "loss": 1.6549875736236572, "loss_mdlm": 1.3105659484863281, "loss_lm": 0.6888431906700134, "loss_ct": 0.0, "lr": 2.0658795558326743e-05, "gnorm": 2.342125177383423, "bridge_gate_avg": 0.9995614290237427, "bridge_out_proj_avg": 0.0, "elapsed_s": 397.37986302375793}
|
| 14 |
+
{"step": 1300, "loss": 1.623676061630249, "loss_mdlm": 1.3133928775787354, "loss_lm": 0.6205662488937378, "loss_ct": 0.0, "lr": 1.6449496416858284e-05, "gnorm": 2.877574920654297, "bridge_gate_avg": 0.9995435476303101, "bridge_out_proj_avg": 0.0, "elapsed_s": 429.3381826877594}
|
| 15 |
+
{"step": 1400, "loss": 1.6278972625732422, "loss_mdlm": 1.2783905267715454, "loss_lm": 0.6990134716033936, "loss_ct": 0.0, "lr": 1.2500000000000006e-05, "gnorm": 3.270975112915039, "bridge_gate_avg": 0.9995293617248535, "bridge_out_proj_avg": 0.0, "elapsed_s": 460.8725802898407}
|
| 16 |
+
{"step": 1500, "loss": 1.7069332599639893, "loss_mdlm": 1.3135886192321777, "loss_lm": 0.7866894006729126, "loss_ct": 0.0, "lr": 8.930309757836517e-06, "gnorm": 2.563070058822632, "bridge_gate_avg": 0.9995175004005432, "bridge_out_proj_avg": 0.0, "elapsed_s": 492.18294072151184}
|
| 17 |
+
{"step": 1600, "loss": 1.7398147583007812, "loss_mdlm": 1.340402364730835, "loss_lm": 0.7988247871398926, "loss_ct": 0.0, "lr": 5.848888922025553e-06, "gnorm": 2.626972198486328, "bridge_gate_avg": 0.9995115399360657, "bridge_out_proj_avg": 0.0, "elapsed_s": 523.6092762947083}
|
| 18 |
+
{"step": 1700, "loss": 1.5254170894622803, "loss_mdlm": 1.3161180019378662, "loss_lm": 0.41859811544418335, "loss_ct": 0.0, "lr": 3.3493649053890326e-06, "gnorm": 2.1489484310150146, "bridge_gate_avg": 0.9995055794715881, "bridge_out_proj_avg": 0.0, "elapsed_s": 555.3625299930573}
|
| 19 |
+
{"step": 1800, "loss": 1.7507048845291138, "loss_mdlm": 1.3215141296386719, "loss_lm": 0.8583815693855286, "loss_ct": 0.0, "lr": 1.5076844803522922e-06, "gnorm": 3.274841785430908, "bridge_gate_avg": 0.999504566192627, "bridge_out_proj_avg": 0.0, "elapsed_s": 586.8554952144623}
|
| 20 |
+
{"step": 1900, "loss": 1.6954622268676758, "loss_mdlm": 1.3421494960784912, "loss_lm": 0.7066253423690796, "loss_ct": 0.0, "lr": 3.7980617469479953e-07, "gnorm": 3.112032413482666, "bridge_gate_avg": 0.999504566192627, "bridge_out_proj_avg": 0.0, "elapsed_s": 618.8787922859192}
|