Ctrl+K
- a-0718-true-28k-load-optim-lr-2e-5-bk-tie-no-warmup-no-decay-attn-influencedsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-2e-5-bs-128-gpus-8-seqlen-28672
- a-0718-true-28k-load-optim-lr-2e-5-bk-tie-no-warmup-no-decay-streamingLLMdsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-2e-5-bs-128-gpus-8-seqlen-28672
- a-0718-true-28k-no-load-optim-lr-0.001-bk-tie-no-warmup-no-decay-streamingLLMdsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-0.001-minlr-0.001-bs-128-gpus-8-seqlen-28672
- a-0718-true-4k-load-optim-lr-2e-5-bk-tie-no-warmup-no-decay-streamingLLMdsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-2e-5-bs-1024-gpus-8-seqlen-4096-ckpt-68
- a-0718-true-4k-no-load-optim-lr-0.001-bk-tie-no-warmup-no-decay-attn-influencedsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-0.001-minlr-0.001-bs-128-gpus-8-seqlen-4096
- a-0718-true-4k-no-load-optim-lr-0.001-bk-tie-no-warmup-no-decay-streamingLLMdsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-0.001-minlr-0.001-bs-128-gpus-8-seqlen-4096
- attn-influencedsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672
- minferencedsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672
- mp_rank_00
- mp_rank_01
- randomdsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672
- streamingLLM-4-1020dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672-ckpt-27
- streamingLLM-4-1020dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672-ckpt-54
- streamingLLM-4-1020dsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672
- tokenized_10B_5B_5B_baselinedsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672-ckpt-68
- tokenized_10B_5B_5B_modernbert_scoreddsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672-ckpt-68
- tokenized_10B_5B_5B_qwen3_scoreddsv3-0.5b-q16-kv2-ep-16-sep-0-top2-cf-2-bias-1e-3-bf16-ep4-mp2-pp1-lr-2e-5-minlr-7e-7-bs-1024-gpus-8-seqlen-28672-ckpt-68
- 1.52 kB