Upload exp_phase8_bridge_recipeSOTA_FIXED_2000step_063956/log.jsonl with huggingface_hub
Browse files
exp_phase8_bridge_recipeSOTA_FIXED_2000step_063956/log.jsonl
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"step": 0, "loss": 2.2276010513305664, "loss_mdlm": 1.3085075616836548, "loss_lm": 1.8381868600845337, "loss_ct": 0.0, "lr": 0.0, "gnorm": 3.259474277496338, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.01804453320801258, "elapsed_s": 4.6939074993133545}
|
| 2 |
+
{"step": 100, "loss": 1.9404067993164062, "loss_mdlm": 1.3262494802474976, "loss_lm": 1.2283146381378174, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 5.9164838790893555, "bridge_gate_avg": 1.0007342100143433, "bridge_out_proj_avg": 0.018048123456537724, "elapsed_s": 50.786860704422}
|
| 3 |
+
{"step": 200, "loss": 2.0613019466400146, "loss_mdlm": 1.310669183731079, "loss_lm": 1.501265525817871, "loss_ct": 0.0, "lr": 5e-05, "gnorm": 4.004824161529541, "bridge_gate_avg": 1.0018727779388428, "bridge_out_proj_avg": 0.018056116066873074, "elapsed_s": 93.75502300262451}
|
| 4 |
+
{"step": 300, "loss": 1.6354931592941284, "loss_mdlm": 1.2979412078857422, "loss_lm": 0.6751038432121277, "loss_ct": 0.0, "lr": 4.962019382530521e-05, "gnorm": 2.778289318084717, "bridge_gate_avg": 1.0025665760040283, "bridge_out_proj_avg": 0.01806130399927497, "elapsed_s": 136.46136236190796}
|
| 5 |
+
{"step": 400, "loss": 1.7922266721725464, "loss_mdlm": 1.3502998352050781, "loss_lm": 0.8838537335395813, "loss_ct": 0.0, "lr": 4.849231551964771e-05, "gnorm": 3.1240482330322266, "bridge_gate_avg": 1.003765881061554, "bridge_out_proj_avg": 0.018068622797727585, "elapsed_s": 178.86642813682556}
|
| 6 |
+
{"step": 500, "loss": 1.7229456901550293, "loss_mdlm": 1.3220018148422241, "loss_lm": 0.8018878102302551, "loss_ct": 0.0, "lr": 4.665063509461097e-05, "gnorm": 3.6189119815826416, "bridge_gate_avg": 1.0048598647117615, "bridge_out_proj_avg": 0.01807444030418992, "elapsed_s": 220.51574516296387}
|
| 7 |
+
{"step": 600, "loss": 1.6756023168563843, "loss_mdlm": 1.326345443725586, "loss_lm": 0.6985136866569519, "loss_ct": 0.0, "lr": 4.415111107797445e-05, "gnorm": 2.2997756004333496, "bridge_gate_avg": 1.0059041380882263, "bridge_out_proj_avg": 0.018081323709338903, "elapsed_s": 261.85207533836365}
|
| 8 |
+
{"step": 700, "loss": 1.6469870805740356, "loss_mdlm": 1.2904990911483765, "loss_lm": 0.7129759788513184, "loss_ct": 0.0, "lr": 4.1069690242163484e-05, "gnorm": 2.5828800201416016, "bridge_gate_avg": 1.0066717863082886, "bridge_out_proj_avg": 0.018088807817548513, "elapsed_s": 303.50137996673584}
|
| 9 |
+
{"step": 800, "loss": 1.8167022466659546, "loss_mdlm": 1.3145910501480103, "loss_lm": 1.0042223930358887, "loss_ct": 0.0, "lr": 3.7500000000000003e-05, "gnorm": 3.4258389472961426, "bridge_gate_avg": 1.0072508752346039, "bridge_out_proj_avg": 0.01809403533115983, "elapsed_s": 345.00909209251404}
|
| 10 |
+
{"step": 900, "loss": 1.6602590084075928, "loss_mdlm": 1.3005056381225586, "loss_lm": 0.7195068597793579, "loss_ct": 0.0, "lr": 3.355050358314172e-05, "gnorm": 2.8302524089813232, "bridge_gate_avg": 1.0078189074993134, "bridge_out_proj_avg": 0.018098872154951096, "elapsed_s": 386.60695242881775}
|
| 11 |
+
{"step": 1000, "loss": 1.635662317276001, "loss_mdlm": 1.3037633895874023, "loss_lm": 0.663797914981842, "loss_ct": 0.0, "lr": 2.9341204441673266e-05, "gnorm": 2.201650619506836, "bridge_gate_avg": 1.0081505179405212, "bridge_out_proj_avg": 0.018101483583450317, "elapsed_s": 428.2988369464874}
|
| 12 |
+
{"step": 1100, "loss": 1.4774835109710693, "loss_mdlm": 1.312296748161316, "loss_lm": 0.3303734064102173, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 1.6615906953811646, "bridge_gate_avg": 1.0084218978881836, "bridge_out_proj_avg": 0.018103872425854206, "elapsed_s": 469.85372519493103}
|
| 13 |
+
{"step": 1200, "loss": 1.6948537826538086, "loss_mdlm": 1.3354556560516357, "loss_lm": 0.7187961339950562, "loss_ct": 0.0, "lr": 2.0658795558326743e-05, "gnorm": 2.3477625846862793, "bridge_gate_avg": 1.0087869763374329, "bridge_out_proj_avg": 0.01810592133551836, "elapsed_s": 511.1933786869049}
|
| 14 |
+
{"step": 1300, "loss": 1.637911319732666, "loss_mdlm": 1.3226854801177979, "loss_lm": 0.6304517388343811, "loss_ct": 0.0, "lr": 1.6449496416858284e-05, "gnorm": 2.9021830558776855, "bridge_gate_avg": 1.0089676082134247, "bridge_out_proj_avg": 0.018107495736330748, "elapsed_s": 551.6636514663696}
|
| 15 |
+
{"step": 1400, "loss": 1.6334675550460815, "loss_mdlm": 1.2775675058364868, "loss_lm": 0.7118001580238342, "loss_ct": 0.0, "lr": 1.2500000000000006e-05, "gnorm": 3.2425525188446045, "bridge_gate_avg": 1.0091034471988678, "bridge_out_proj_avg": 0.018108748830854893, "elapsed_s": 583.1440794467926}
|
| 16 |
+
{"step": 1500, "loss": 1.7273304462432861, "loss_mdlm": 1.329987645149231, "loss_lm": 0.7946854829788208, "loss_ct": 0.0, "lr": 8.930309757836517e-06, "gnorm": 2.711003541946411, "bridge_gate_avg": 1.009172111749649, "bridge_out_proj_avg": 0.01810926804319024, "elapsed_s": 614.4573774337769}
|
| 17 |
+
{"step": 1600, "loss": 1.6967413425445557, "loss_mdlm": 1.2931792736053467, "loss_lm": 0.8071240186691284, "loss_ct": 0.0, "lr": 5.848888922025553e-06, "gnorm": 2.6251730918884277, "bridge_gate_avg": 1.0092514753341675, "bridge_out_proj_avg": 0.018109755590558052, "elapsed_s": 645.9658761024475}
|
| 18 |
+
{"step": 1700, "loss": 1.498193621635437, "loss_mdlm": 1.2892467975616455, "loss_lm": 0.4178936779499054, "loss_ct": 0.0, "lr": 3.3493649053890326e-06, "gnorm": 2.219194173812866, "bridge_gate_avg": 1.0092409253120422, "bridge_out_proj_avg": 0.018109961412847042, "elapsed_s": 677.4470465183258}
|
| 19 |
+
{"step": 1800, "loss": 1.7271722555160522, "loss_mdlm": 1.3017436265945435, "loss_lm": 0.8508573174476624, "loss_ct": 0.0, "lr": 1.5076844803522922e-06, "gnorm": 3.232522964477539, "bridge_gate_avg": 1.0092484652996063, "bridge_out_proj_avg": 0.018110081553459167, "elapsed_s": 708.7637624740601}
|
| 20 |
+
{"step": 1900, "loss": 1.6814062595367432, "loss_mdlm": 1.331810474395752, "loss_lm": 0.6991914510726929, "loss_ct": 0.0, "lr": 3.7980617469479953e-07, "gnorm": 3.043192148208618, "bridge_gate_avg": 1.0092499256134033, "bridge_out_proj_avg": 0.0181101206690073, "elapsed_s": 740.7570207118988}
|