| *.7z filter=lfs diff=lfs merge=lfs -text | |
| *.arrow filter=lfs diff=lfs merge=lfs -text | |
| *.bin filter=lfs diff=lfs merge=lfs -text | |
| *.bz2 filter=lfs diff=lfs merge=lfs -text | |
| *.ckpt filter=lfs diff=lfs merge=lfs -text | |
| *.ftz filter=lfs diff=lfs merge=lfs -text | |
| *.gz filter=lfs diff=lfs merge=lfs -text | |
| *.h5 filter=lfs diff=lfs merge=lfs -text | |
| *.joblib filter=lfs diff=lfs merge=lfs -text | |
| *.lfs.* filter=lfs diff=lfs merge=lfs -text | |
| *.mlmodel filter=lfs diff=lfs merge=lfs -text | |
| *.model filter=lfs diff=lfs merge=lfs -text | |
| *.msgpack filter=lfs diff=lfs merge=lfs -text | |
| *.npy filter=lfs diff=lfs merge=lfs -text | |
| *.npz filter=lfs diff=lfs merge=lfs -text | |
| *.onnx filter=lfs diff=lfs merge=lfs -text | |
| *.ot filter=lfs diff=lfs merge=lfs -text | |
| *.parquet filter=lfs diff=lfs merge=lfs -text | |
| *.pb filter=lfs diff=lfs merge=lfs -text | |
| *.pickle filter=lfs diff=lfs merge=lfs -text | |
| *.pkl filter=lfs diff=lfs merge=lfs -text | |
| *.pt filter=lfs diff=lfs merge=lfs -text | |
| *.pth filter=lfs diff=lfs merge=lfs -text | |
| *.rar filter=lfs diff=lfs merge=lfs -text | |
| *.safetensors filter=lfs diff=lfs merge=lfs -text | |
| saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| *.tar.* filter=lfs diff=lfs merge=lfs -text | |
| *.tar filter=lfs diff=lfs merge=lfs -text | |
| *.tflite filter=lfs diff=lfs merge=lfs -text | |
| *.tgz filter=lfs diff=lfs merge=lfs -text | |
| *.wasm filter=lfs diff=lfs merge=lfs -text | |
| *.xz filter=lfs diff=lfs merge=lfs -text | |
| *.zip filter=lfs diff=lfs merge=lfs -text | |
| *.zst filter=lfs diff=lfs merge=lfs -text | |
| *tfevents* filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.0-bs_32-seq_1024-iters_25000/250730_045056/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.95-bs_32-seq_1024-iters_25000/250730_000401/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/combined_cuv_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_001000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_013000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_011000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_009000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_023000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_019000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_013000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_021000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_009000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_017000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_011000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_019000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_001000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_023000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_017000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_021000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.95-bs_32-seq_1024-iters_9536/250622_025948/eos/step_002500/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_000500/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_005000/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_006000/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_003000/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_002500/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_004500/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001500/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250728_063551/training.log filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_007000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_005000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_015000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_003000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_025000/combined_eos_analysis_ori.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.01-wd_0.0001-m_0.98-bs_32-seq_1024-iters_25000/250730_022725/eos/step_025000/combined_eos_analysis.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_001000/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/sgd/constant/lr_0.03-wd_0.0001-m_0.0-bs_32-seq_1024-iters_9536/250622_011417/eos/step_003500/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |
| fineweb-10B/gpt2/eos/adam/linear/linear-256-2048-15000-0.1/lr_0.0018-wd_0.1-betas_0.9_0.95-bs_32-seq_1024-iters_15000/250622_035242/eos/step_007000/losses_lr.png filter=lfs diff=lfs merge=lfs -text | |