explcre commited on
Commit
74b7993
·
verified ·
1 Parent(s): f4705bf

Upload exp_phase8_bridge_recipeB_only_2000step_045922/log.jsonl with huggingface_hub

Browse files
exp_phase8_bridge_recipeB_only_2000step_045922/log.jsonl ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"step": 0, "loss": 2.267940044403076, "loss_mdlm": 1.3468172550201416, "loss_lm": 1.8422456979751587, "loss_ct": 0.0, "lr": 0.0, "gnorm": 2.9235055446624756, "bridge_gate_avg": 1.0, "bridge_out_proj_avg": 0.0, "elapsed_s": 3.9034204483032227}
2
+ {"step": 100, "loss": 1.911407232284546, "loss_mdlm": 1.3321900367736816, "loss_lm": 1.158434271812439, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 3.5377378463745117, "bridge_gate_avg": 1.0006812512874603, "bridge_out_proj_avg": 0.000266664574155584, "elapsed_s": 60.06942415237427}
3
+ {"step": 200, "loss": 2.0708160400390625, "loss_mdlm": 1.3140462636947632, "loss_lm": 1.5135395526885986, "loss_ct": 0.0, "lr": 5e-05, "gnorm": 4.148580551147461, "bridge_gate_avg": 1.0030148029327393, "bridge_out_proj_avg": 0.0011600741418078542, "elapsed_s": 114.25046133995056}
4
+ {"step": 300, "loss": 1.6196874380111694, "loss_mdlm": 1.2812386751174927, "loss_lm": 0.6768975853919983, "loss_ct": 0.0, "lr": 4.962019382530521e-05, "gnorm": 2.762939929962158, "bridge_gate_avg": 1.0039019584655762, "bridge_out_proj_avg": 0.0014975214435253292, "elapsed_s": 168.0911946296692}
5
+ {"step": 400, "loss": 1.782671570777893, "loss_mdlm": 1.3548566102981567, "loss_lm": 0.8556299209594727, "loss_ct": 0.0, "lr": 4.849231551964771e-05, "gnorm": 3.107063055038452, "bridge_gate_avg": 1.0053487420082092, "bridge_out_proj_avg": 0.0017116115195676684, "elapsed_s": 222.1491940021515}
6
+ {"step": 500, "loss": 1.7088299989700317, "loss_mdlm": 1.3281694650650024, "loss_lm": 0.7613210082054138, "loss_ct": 0.0, "lr": 4.665063509461097e-05, "gnorm": 3.235947608947754, "bridge_gate_avg": 1.0064159333705902, "bridge_out_proj_avg": 0.0018412444915156811, "elapsed_s": 275.55312609672546}
7
+ {"step": 600, "loss": 1.7435210943222046, "loss_mdlm": 1.3852931261062622, "loss_lm": 0.71645587682724, "loss_ct": 0.0, "lr": 4.415111107797445e-05, "gnorm": 2.4082448482513428, "bridge_gate_avg": 1.0078760385513306, "bridge_out_proj_avg": 0.0019543488742783666, "elapsed_s": 329.28782081604004}
8
+ {"step": 700, "loss": 1.6808569431304932, "loss_mdlm": 1.3212289810180664, "loss_lm": 0.7192559838294983, "loss_ct": 0.0, "lr": 4.1069690242163484e-05, "gnorm": 2.4556784629821777, "bridge_gate_avg": 1.008607178926468, "bridge_out_proj_avg": 0.002023645443841815, "elapsed_s": 383.1740515232086}
9
+ {"step": 800, "loss": 1.8492941856384277, "loss_mdlm": 1.3321248292922974, "loss_lm": 1.0343385934829712, "loss_ct": 0.0, "lr": 3.7500000000000003e-05, "gnorm": 3.4557995796203613, "bridge_gate_avg": 1.00934499502182, "bridge_out_proj_avg": 0.002089552581310272, "elapsed_s": 436.84439301490784}
10
+ {"step": 900, "loss": 1.734226107597351, "loss_mdlm": 1.3686087131500244, "loss_lm": 0.7312347888946533, "loss_ct": 0.0, "lr": 3.355050358314172e-05, "gnorm": 2.734409809112549, "bridge_gate_avg": 1.0100584924221039, "bridge_out_proj_avg": 0.0021218453184701502, "elapsed_s": 490.6255536079407}
11
+ {"step": 1000, "loss": 1.6673076152801514, "loss_mdlm": 1.3422633409500122, "loss_lm": 0.6500885486602783, "loss_ct": 0.0, "lr": 2.9341204441673266e-05, "gnorm": 2.064119815826416, "bridge_gate_avg": 1.0106571018695831, "bridge_out_proj_avg": 0.002149562817066908, "elapsed_s": 544.1826660633087}
12
+ {"step": 1100, "loss": 1.498210072517395, "loss_mdlm": 1.3351895809173584, "loss_lm": 0.3260408937931061, "loss_ct": 0.0, "lr": 2.5e-05, "gnorm": 1.5977047681808472, "bridge_gate_avg": 1.0109351575374603, "bridge_out_proj_avg": 0.0021784051205031574, "elapsed_s": 597.6268215179443}
13
+ {"step": 1200, "loss": 1.692859172821045, "loss_mdlm": 1.3478386402130127, "loss_lm": 0.6900410652160645, "loss_ct": 0.0, "lr": 2.0658795558326743e-05, "gnorm": 2.3296940326690674, "bridge_gate_avg": 1.011233389377594, "bridge_out_proj_avg": 0.0021987237269058824, "elapsed_s": 651.2023386955261}
14
+ {"step": 1300, "loss": 1.628414511680603, "loss_mdlm": 1.3156527280807495, "loss_lm": 0.625523567199707, "loss_ct": 0.0, "lr": 1.6449496416858284e-05, "gnorm": 2.8393197059631348, "bridge_gate_avg": 1.0115362405776978, "bridge_out_proj_avg": 0.0022089772392064333, "elapsed_s": 704.5901415348053}
15
+ {"step": 1400, "loss": 1.6050933599472046, "loss_mdlm": 1.2551257610321045, "loss_lm": 0.6999351382255554, "loss_ct": 0.0, "lr": 1.2500000000000006e-05, "gnorm": 3.109539747238159, "bridge_gate_avg": 1.011936604976654, "bridge_out_proj_avg": 0.002219657471869141, "elapsed_s": 757.624299287796}
16
+ {"step": 1500, "loss": 1.7466274499893188, "loss_mdlm": 1.3470587730407715, "loss_lm": 0.7991373538970947, "loss_ct": 0.0, "lr": 8.930309757836517e-06, "gnorm": 2.5259363651275635, "bridge_gate_avg": 1.0122175216674805, "bridge_out_proj_avg": 0.0022250370820984244, "elapsed_s": 811.0120060443878}
17
+ {"step": 1600, "loss": 1.7561360597610474, "loss_mdlm": 1.3499571084976196, "loss_lm": 0.8123579621315002, "loss_ct": 0.0, "lr": 5.848888922025553e-06, "gnorm": 2.596261501312256, "bridge_gate_avg": 1.0123358368873596, "bridge_out_proj_avg": 0.0022298440453596413, "elapsed_s": 863.898199558258}
18
+ {"step": 1700, "loss": 1.5279524326324463, "loss_mdlm": 1.324222207069397, "loss_lm": 0.40746039152145386, "loss_ct": 0.0, "lr": 3.3493649053890326e-06, "gnorm": 2.219634771347046, "bridge_gate_avg": 1.0123794972896576, "bridge_out_proj_avg": 0.002231507038231939, "elapsed_s": 917.3566710948944}
19
+ {"step": 1800, "loss": 1.7677135467529297, "loss_mdlm": 1.3367701768875122, "loss_lm": 0.861886739730835, "loss_ct": 0.0, "lr": 1.5076844803522922e-06, "gnorm": 3.1980390548706055, "bridge_gate_avg": 1.0124304592609406, "bridge_out_proj_avg": 0.0022326772450469434, "elapsed_s": 970.7601613998413}
20
+ {"step": 1900, "loss": 1.701881766319275, "loss_mdlm": 1.3423289060592651, "loss_lm": 0.7191056609153748, "loss_ct": 0.0, "lr": 3.7980617469479953e-07, "gnorm": 3.1211397647857666, "bridge_gate_avg": 1.0124506652355194, "bridge_out_proj_avg": 0.0022332649677991867, "elapsed_s": 1026.0011944770813}