Sssplendid's picture
Add 1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110
b12805a verified
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_045610/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260505_194333/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260503_182739/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_165011/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_014613/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_183225/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_110637/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_065710/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260501_225211/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_071456/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_001613/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260427_003037/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_102326/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260430_173556/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_135357/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-20000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_021226/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_muon_lr5e_3_b1_0_9_b2_0_95_eps_1e_15_20260426_120457/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_153544/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/gla_340m_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260506_083337/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/transformer_340m_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260506_010507/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
340m_archs_lrtuning/deltanet_340m_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260504_063226/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_070221/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260504_201208/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_apollo_lr1e_2_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260507_052916/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_055049/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260505_014034/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_soap_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260505_153150/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_adamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260510_184728/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260506_000903/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260503_154536/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260506_140829/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_1b_v3/gated_deltanet_1b_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_013622/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_apollo_lr3e_3_b1_0_9_b2_0_99_eps_1e_12_scale2_0_rank512_20260506_000344/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_8_20260503_131006/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_shampoo_lr2e_2_b1_0_95_b2_0_99_eps_1e_15_scale2_0_rank512_20260504_183027/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_muon_lr3e_3_mom0_95_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_135147/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_rmnp_lr3e_3_mom0_95_beta0_95_adam_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260506_231415/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_soap_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260513_102955/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_8_20260503_101047/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_came_lr1e_3_b1_0_9_b2_0_98_eps_1e_12_20260504_195846/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_lion_lr2e_4_b1_0_9_b2_0_98_eps_1e_8_scale2_0_rank512_20260504_084244/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_adan_lr3e_3_b1_0_9_b2_0_92_eps_1e_8_b3_0_99_20260504_110921/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
gated_deltanet_340m_v3/gated_deltanet_340m_mars_adamw_lr5e_3_b1_0_95_b2_0_99_eps_1e_15_20260503_225007/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_apollo_rank256_scale1_channel_std_gap200_lr1e_2_b1_0_9_b2_0_99_eps_1e_15_20260501_205845/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_muon_ns5_nesterov_lr5e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_233332/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260430_102319/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_rmnp_adamlr1e_3_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260502_095526/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181638/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260511_072915/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_1b_c4/transformer_pp_1b_c4_valc4_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260508_191338/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260426_181628/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_muon_ns5_nesterov_lr3e_3_adamlr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260501_103148/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_apollo_rank512_scale2_channel_std_gap200_lr3e_3_b1_0_9_b2_0_99_eps_1e_15_20260503_043204/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_15_20260507_202117/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_soap_pdim2048_pfreq10_lr1e_3_b1_0_9_b2_0_95_eps_1e_15_20260427_111844/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_rmnp_adamlr1e_3_lr5e_3_b1_0_9_b2_0_99_eps_1e_15_20260508_073939/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
transformer_pp_340m_c4/transformer_pp_340m_c4_valc4_lion_lr1e_4_b1_0_9_b2_0_99_eps_1e_15_20260427_054600/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_043356/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_043118/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260519_181436/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_lion_lr3e_3_b1_0_9_b2_0_99_20260520_105611/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260521_163520/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260520_225034/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/deltanet_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260520_161429/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260524_054039/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_082210/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_214758/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260518_105241/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_131434/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_180301/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_adan_lr3e_3_b1_0_9_b2_0_92_b3_0_99_eps_1e_8_20260518_235004/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_lion_lr3e_4_b1_0_9_b2_0_99_20260519_061741/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_marsadamw_lr3e_3_b1_0_95_b2_0_99_eps_1e_8_20260517_044559/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_adamw_lr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260517_170019/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_rmnp_lr3e_3_adamlr1e_3_b1_0_9_b2_0_99_eps_1e_15_20260516_164940/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-20000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_muon_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260526_051916/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/gla_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195107/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/.metadata filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
1b_archs_fwe/transformer_1b_fwe_soap_pdim2048_pfreq10_lr3e_3_b1_0_9_b2_0_95_eps_1e_15_20260515_195110/exp_data/checkpoint/step-30720/__7_0.distcp filter=lfs diff=lfs merge=lfs -text