JinghuiLuAstronaut commited on
Commit
bdca108
·
verified ·
1 Parent(s): a6f32a2

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LTA_openwebtext_dualt/logs/_smoke4_lm1b_dualtline_cmax16_nwguard_20260503_232708.log +43 -0
  2. LTA_openwebtext_dualt/logs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b128_resume3000_20260504_202541.log +84 -0
  3. LTA_openwebtext_dualt/logs/ar_owt_gpt2_len1024_from100k_modelonly_lr1e4_wd0p1_b2p95_cosine_8gpu.log +0 -0
  4. LTA_openwebtext_dualt/logs/bench_lta_dualt_1gpu_b32_len1024_20260428_223957.log +101 -0
  5. LTA_openwebtext_dualt/logs/compact_gpt2bpe_v2048_stream1024_fullycoupled_mask1_wd0p1_fp32_8gpu/lta_owt_compact_gpt2bpe_v2048_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_logitnormal_m1p5_s0p8_hardce_mask1p0-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260517_141027.log +0 -0
  6. LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/20260517_queued_ctx1024_sweep.log +1 -0
  7. LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/ctx1024_sweep_selected_20260517_210705.log +1306 -0
  8. LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/ctx1024_sweep_selected_20260517_210705.nohup +0 -0
  9. LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/queued_ctx1024_sweep.nohup +1 -0
  10. LTA_openwebtext_dualt/logs/ctx1024_sampledpath_sweep_4gpu/ctx1024_sampledpath_20260517_223933.nohup +246 -0
  11. LTA_openwebtext_dualt/logs/ctx1024_sampledpath_sweep_4gpu/ctx1024_sampledpath_true_20260517_224139.nohup +985 -0
  12. LTA_openwebtext_dualt/logs/decode_timegrid_trace_len256_copied_20260517_155402.log +0 -0
  13. LTA_openwebtext_dualt/logs/elf_lm1b_t5small_elfb_aligned_datasetfix_len128_4gpu_tinysmoke_20260513.log +18 -0
  14. LTA_openwebtext_dualt/logs/elfaligned_t5record_4gpu/lta_owt_t5record_len1024_elfaligned_dditelf_muon_logitnormal_m1p5_s0p8_none_floor0p0_zeroout_tf32_gbs512_4gpu_20260516_011722.log +296 -0
  15. LTA_openwebtext_dualt/logs/eval_20260506/ar_8gpu_latest_temp_sweep_20260506_110706.log +47 -0
  16. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_eta1_stateweight_latest_20260506_113031.log +13 -0
  17. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_finalsample_hightemp_quick_20260506_114232.log +14 -0
  18. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_finalsample_latest_20260506_113603.log +41 -0
  19. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_rolling_quick512_20260506_112740.log +9 -0
  20. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_rolling_sweep_latest_20260506_112546.log +11 -0
  21. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_rolling_noise_focus_latest_20260506_112101.log +5 -0
  22. LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_rolling_noise_sweep_latest_20260506_110706.log +41 -0
  23. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_key3_state_n256.log +1 -0
  24. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_quick2_128steps_n64.log +4 -0
  25. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_quick2_n64.log +4 -0
  26. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_256steps_n64.log +4 -0
  27. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_4096steps_n64.log +4 -0
  28. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps16_n64.log +4 -0
  29. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps32_n64.log +4 -0
  30. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps64_n64.log +4 -0
  31. LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps8_n64.log +4 -0
  32. LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_1024steps_n64.log +4 -0
  33. LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_diffusion_noise_steps_128steps_n64.log +88 -0
  34. LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_step146k_128steps_n64.log +4 -0
  35. LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_temp_push43_128steps_n64.log +56 -0
  36. LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_temp_schedule_128steps_n64.log +32 -0
  37. LTA_openwebtext_dualt/logs/eval_20260508/mauve_step124k_n64_features.log +21 -0
  38. LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_dirres_n16_s256_20260514_023314.log +5 -0
  39. LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_online_dirres_n16_s256.log +4 -0
  40. LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_online_dirres_n8_s128_smoke.log +6 -0
  41. LTA_openwebtext_dualt/logs/fullycoupled_tpow2_wd0p1_fp32_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_tpow2_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260515_003246.log +0 -0
  42. LTA_openwebtext_dualt/logs/fullycoupled_uniform_mask1_swiglu_wd0p1_fp32_4gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638.log +163 -0
  43. LTA_openwebtext_dualt/logs/fullycoupled_uniform_mask1_swiglu_wd0p1_fp32_4gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638.outer.log +174 -0
  44. LTA_openwebtext_dualt/logs/genppl_lm1b_step_latest_k1024_s128_flm.log +0 -0
  45. LTA_openwebtext_dualt/logs/infer_owt_compact_v2048_ckpt_sweep_steps128_c256_temps_n8_large_20260520_205159.log +0 -0
  46. LTA_openwebtext_dualt/logs/infer_owt_compact_v8192_probe_flow_onehot_steps128_c1024_t1p45_n8_large_20260520_201801.log +42 -0
  47. LTA_openwebtext_dualt/logs/infer_owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large_20260520_202516.log +184 -0
  48. LTA_openwebtext_dualt/logs/infer_owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8.log +58 -0
  49. LTA_openwebtext_dualt/logs/infer_owt_t5_2node_latest_trainmatched_dirres_grid_n8.log +32 -0
  50. LTA_openwebtext_dualt/logs/infer_owt_t5_2node_step290000_compare_n8_20260520_200659.log +44 -0
LTA_openwebtext_dualt/logs/_smoke4_lm1b_dualtline_cmax16_nwguard_20260503_232708.log ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ *****************************************
3
+ Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
4
+ *****************************************
5
+ NCCL version 2.25.1+cuda12.8
6
+ {
7
+ "device": "cuda:0",
8
+ "rank": 0,
9
+ "world_size": 4,
10
+ "samples": "wrapped_streaming",
11
+ "vocab_size": 30522,
12
+ "save_dir": "runs/_smoke4_lm1b_dualtline_cmax16_nwguard_20260503_232708",
13
+ "batch_size": 1,
14
+ "grad_accum": 1,
15
+ "effective_batch_size": 4,
16
+ "global_batch_size": 4,
17
+ "lr_schedule": "constant_warmup",
18
+ "warmup_steps": 1,
19
+ "adam_beta1": 0.9,
20
+ "adam_beta2": 0.999,
21
+ "adam_eps": 1e-08,
22
+ "model_type": "ddit",
23
+ "dual_t": true,
24
+ "corrupt_t_mode": "independent",
25
+ "corrupt_min_t": 0.0,
26
+ "corrupt_max_t": 1.0,
27
+ "dirichlet_endpoint_mode": "dual_t_line",
28
+ "dirichlet_semantic_t_mode": "independent",
29
+ "dirichlet_semantic_t_value": 0.0,
30
+ "torch_compile": false,
31
+ "compile_mode": "max-autotune",
32
+ "state_format": "prob",
33
+ "target_loss": "hard_ce",
34
+ "meanflow_weight": 0.0,
35
+ "bridge_noise_init": "logistic_normal",
36
+ "noise_sigma": -1.0,
37
+ "wrap": true,
38
+ "openwebtext_split": "all",
39
+ "num_workers": 0,
40
+ "latest_every": 0,
41
+ "resume_path": ""
42
+ }
43
+ step=1 micro_steps=1 elapsed=0.8s lr=3.000000e-04 loss_all=10.3125 acc_all=0.0000 loss_corrupt=10.3125 acc_corrupt=0.0000 corrupt_frac=0.2812 loss=10.3125 loss_recon=10.3125 loss_meanflow=0.0000 mean_model_t=0.0669 mean_corrupt_t=0.6396 wrong_frac=1.0000 init_acc_corrupt=0.4444 init_gold_top10=1.0000 init_gold_top100=1.0000
LTA_openwebtext_dualt/logs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b128_resume3000_20260504_202541.log ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ *****************************************
3
+ Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
4
+ *****************************************
5
+ NCCL version 2.25.1+cuda12.8
6
+ resumed_from=runs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_20260504_201527/latest.pt start_step=3001
7
+ {
8
+ "task": "ar_lm",
9
+ "device": "cuda:0",
10
+ "rank": 0,
11
+ "world_size": 4,
12
+ "samples": "wrapped_streaming",
13
+ "vocab_size": 30522,
14
+ "bos_id": 101,
15
+ "eos_id": 102,
16
+ "save_dir": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b128_resume3000_20260504_202541",
17
+ "params": 108440832,
18
+ "batch_size": 128,
19
+ "grad_accum": 1,
20
+ "effective_batch_size": 512,
21
+ "global_batch_size": 512,
22
+ "max_len": 128,
23
+ "wrap": true,
24
+ "text_detokenizer": "lm1b",
25
+ "openwebtext_split": "all",
26
+ "torch_compile": false
27
+ }
28
+ step=3020 micro_steps=3020 elapsed=6.0s lr=3.000000e-04 loss=4.0947 ppl=60.0657 acc=0.3150 tokens=16256.0000
29
+ step=3040 micro_steps=3040 elapsed=4.4s lr=3.000000e-04 loss=4.0777 ppl=59.0408 acc=0.3169 tokens=16256.0000
30
+ step=3060 micro_steps=3060 elapsed=4.4s lr=3.000000e-04 loss=4.0853 ppl=59.4897 acc=0.3171 tokens=16256.0000
31
+ step=3080 micro_steps=3080 elapsed=4.4s lr=3.000000e-04 loss=4.0716 ppl=58.6893 acc=0.3176 tokens=16256.0000
32
+ step=3100 micro_steps=3100 elapsed=4.4s lr=3.000000e-04 loss=4.0606 ppl=58.0390 acc=0.3181 tokens=16256.0000
33
+ step=3120 micro_steps=3120 elapsed=4.4s lr=3.000000e-04 loss=4.0630 ppl=58.1939 acc=0.3180 tokens=16256.0000
34
+ step=3140 micro_steps=3140 elapsed=4.4s lr=3.000000e-04 loss=4.0699 ppl=58.5920 acc=0.3186 tokens=16256.0000
35
+ step=3160 micro_steps=3160 elapsed=4.4s lr=3.000000e-04 loss=4.0768 ppl=58.9850 acc=0.3161 tokens=16256.0000
36
+ step=3180 micro_steps=3180 elapsed=4.4s lr=3.000000e-04 loss=4.0478 ppl=57.2913 acc=0.3189 tokens=16256.0000
37
+ step=3200 micro_steps=3200 elapsed=4.4s lr=3.000000e-04 loss=4.0432 ppl=57.0342 acc=0.3195 tokens=16256.0000
38
+ step=3220 micro_steps=3220 elapsed=4.4s lr=3.000000e-04 loss=4.0407 ppl=56.9072 acc=0.3205 tokens=16256.0000
39
+ step=3240 micro_steps=3240 elapsed=4.4s lr=3.000000e-04 loss=4.0478 ppl=57.2947 acc=0.3191 tokens=16256.0000
40
+ step=3260 micro_steps=3260 elapsed=4.4s lr=3.000000e-04 loss=4.0492 ppl=57.3790 acc=0.3194 tokens=16256.0000
41
+ step=3280 micro_steps=3280 elapsed=4.4s lr=3.000000e-04 loss=4.0349 ppl=56.5688 acc=0.3210 tokens=16256.0000
42
+ step=3300 micro_steps=3300 elapsed=4.4s lr=3.000000e-04 loss=4.0456 ppl=57.1688 acc=0.3195 tokens=16256.0000
43
+ step=3320 micro_steps=3320 elapsed=4.4s lr=3.000000e-04 loss=4.0447 ppl=57.1197 acc=0.3188 tokens=16256.0000
44
+ step=3340 micro_steps=3340 elapsed=4.4s lr=3.000000e-04 loss=4.0445 ppl=57.1161 acc=0.3209 tokens=16256.0000
45
+ step=3360 micro_steps=3360 elapsed=4.4s lr=3.000000e-04 loss=4.0363 ppl=56.6692 acc=0.3201 tokens=16256.0000
46
+ step=3380 micro_steps=3380 elapsed=4.4s lr=3.000000e-04 loss=4.0198 ppl=55.7166 acc=0.3226 tokens=16256.0000
47
+ step=3400 micro_steps=3400 elapsed=4.4s lr=3.000000e-04 loss=4.0166 ppl=55.5595 acc=0.3225 tokens=16256.0000
48
+ step=3420 micro_steps=3420 elapsed=4.4s lr=3.000000e-04 loss=4.0025 ppl=54.7572 acc=0.3250 tokens=16256.0000
49
+ step=3440 micro_steps=3440 elapsed=4.4s lr=3.000000e-04 loss=4.0096 ppl=55.1576 acc=0.3227 tokens=16256.0000
50
+ step=3460 micro_steps=3460 elapsed=4.4s lr=3.000000e-04 loss=4.0131 ppl=55.3352 acc=0.3235 tokens=16256.0000
51
+ step=3480 micro_steps=3480 elapsed=4.4s lr=3.000000e-04 loss=4.0070 ppl=55.0338 acc=0.3237 tokens=16256.0000
52
+ step=3500 micro_steps=3500 elapsed=4.4s lr=3.000000e-04 loss=4.0074 ppl=55.0277 acc=0.3236 tokens=16256.0000
53
+ step=3520 micro_steps=3520 elapsed=4.4s lr=3.000000e-04 loss=4.0005 ppl=54.6736 acc=0.3242 tokens=16256.0000
54
+ step=3540 micro_steps=3540 elapsed=4.4s lr=3.000000e-04 loss=4.0080 ppl=55.0657 acc=0.3232 tokens=16256.0000
55
+ step=3560 micro_steps=3560 elapsed=4.4s lr=3.000000e-04 loss=4.0036 ppl=54.8304 acc=0.3232 tokens=16256.0000
56
+ step=3580 micro_steps=3580 elapsed=4.4s lr=3.000000e-04 loss=3.9893 ppl=54.0694 acc=0.3265 tokens=16256.0000
57
+ step=3600 micro_steps=3600 elapsed=4.4s lr=3.000000e-04 loss=4.0006 ppl=54.6650 acc=0.3238 tokens=16256.0000
58
+ step=3620 micro_steps=3620 elapsed=4.4s lr=3.000000e-04 loss=3.9700 ppl=53.0214 acc=0.3266 tokens=16256.0000
59
+ step=3640 micro_steps=3640 elapsed=4.4s lr=3.000000e-04 loss=3.9747 ppl=53.2832 acc=0.3263 tokens=16256.0000
60
+ step=3660 micro_steps=3660 elapsed=4.4s lr=3.000000e-04 loss=3.9813 ppl=53.6207 acc=0.3253 tokens=16256.0000
61
+ step=3680 micro_steps=3680 elapsed=4.4s lr=3.000000e-04 loss=3.9763 ppl=53.3634 acc=0.3270 tokens=16256.0000
62
+ step=3700 micro_steps=3700 elapsed=4.4s lr=3.000000e-04 loss=3.9751 ppl=53.2994 acc=0.3264 tokens=16256.0000
63
+ step=3720 micro_steps=3720 elapsed=4.4s lr=3.000000e-04 loss=3.9777 ppl=53.4121 acc=0.3253 tokens=16256.0000
64
+ step=3740 micro_steps=3740 elapsed=4.4s lr=3.000000e-04 loss=3.9552 ppl=52.2290 acc=0.3273 tokens=16256.0000
65
+ step=3760 micro_steps=3760 elapsed=4.4s lr=3.000000e-04 loss=3.9726 ppl=53.1452 acc=0.3267 tokens=16256.0000
66
+ step=3780 micro_steps=3780 elapsed=4.4s lr=3.000000e-04 loss=3.9734 ppl=53.2219 acc=0.3265 tokens=16256.0000
67
+ step=3800 micro_steps=3800 elapsed=4.4s lr=3.000000e-04 loss=3.9694 ppl=52.9792 acc=0.3259 tokens=16256.0000
68
+ step=3820 micro_steps=3820 elapsed=4.4s lr=3.000000e-04 loss=3.9546 ppl=52.2044 acc=0.3272 tokens=16256.0000
69
+ step=3840 micro_steps=3840 elapsed=4.4s lr=3.000000e-04 loss=3.9565 ppl=52.3033 acc=0.3292 tokens=16256.0000
70
+ step=3860 micro_steps=3860 elapsed=4.4s lr=3.000000e-04 loss=3.9553 ppl=52.2470 acc=0.3278 tokens=16256.0000
71
+ step=3880 micro_steps=3880 elapsed=4.4s lr=3.000000e-04 loss=3.9563 ppl=52.2977 acc=0.3282 tokens=16256.0000
72
+ step=3900 micro_steps=3900 elapsed=4.4s lr=3.000000e-04 loss=3.9502 ppl=51.9592 acc=0.3286 tokens=16256.0000
73
+ step=3920 micro_steps=3920 elapsed=4.4s lr=3.000000e-04 loss=3.9436 ppl=51.6124 acc=0.3299 tokens=16256.0000
74
+ step=3940 micro_steps=3940 elapsed=4.4s lr=3.000000e-04 loss=3.9517 ppl=52.0682 acc=0.3288 tokens=16256.0000
75
+ step=3960 micro_steps=3960 elapsed=4.4s lr=3.000000e-04 loss=3.9499 ppl=51.9636 acc=0.3275 tokens=16256.0000
76
+ step=3980 micro_steps=3980 elapsed=4.4s lr=3.000000e-04 loss=3.9439 ppl=51.6394 acc=0.3281 tokens=16256.0000
77
+ step=4000 micro_steps=4000 elapsed=4.4s lr=3.000000e-04 loss=3.9314 ppl=51.0094 acc=0.3300 tokens=16256.0000
78
+ [sample step=4000] [CLS] bashir on thursday. [SEP] he had often played cocaine and used a baseball used to talk about the prime minister's success. [SEP] i am glad you put a lot of in my swelling pool and 314 of the same. [SEP] "if the president didn't address that situation [monday], that celebration and he would accept it. [SEP] being an honour will soon be largely symbolic. [SEP] so we haven't experienced much of the season," hamilton was quoted as saying. [SEP] i'm sure that the administration of john f. kennedy has to realize that he's too rotten to move on from working with his [SEP]
79
+ step=4020 micro_steps=4020 elapsed=7.2s lr=3.000000e-04 loss=3.9248 ppl=50.6715 acc=0.3315 tokens=16256.0000
80
+ step=4040 micro_steps=4040 elapsed=4.5s lr=3.000000e-04 loss=3.9372 ppl=51.3072 acc=0.3292 tokens=16256.0000
81
+ step=4060 micro_steps=4060 elapsed=4.4s lr=3.000000e-04 loss=3.9534 ppl=52.1452 acc=0.3275 tokens=16256.0000
82
+ step=4080 micro_steps=4080 elapsed=4.4s lr=3.000000e-04 loss=3.9326 ppl=51.0598 acc=0.3308 tokens=16256.0000
83
+ step=4100 micro_steps=4100 elapsed=4.4s lr=3.000000e-04 loss=3.9261 ppl=50.7547 acc=0.3310 tokens=16256.0000
84
+ step=4120 micro_steps=4120 elapsed=4.4s lr=3.000000e-04 loss=3.9235 ppl=50.6324 acc=0.3313 tokens=16256.0000
LTA_openwebtext_dualt/logs/ar_owt_gpt2_len1024_from100k_modelonly_lr1e4_wd0p1_b2p95_cosine_8gpu.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/bench_lta_dualt_1gpu_b32_len1024_20260428_223957.log ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "device": "cuda:0",
3
+ "rank": 0,
4
+ "world_size": 1,
5
+ "samples": "wrapped_streaming",
6
+ "vocab_size": 50257,
7
+ "save_dir": "runs/bench_lta_dualt_1gpu_b32_len1024_20260428_223957",
8
+ "batch_size": 32,
9
+ "grad_accum": 1,
10
+ "effective_batch_size": 32,
11
+ "global_batch_size": 32,
12
+ "lr_schedule": "constant_warmup",
13
+ "warmup_steps": 1,
14
+ "model_type": "ddit",
15
+ "dual_t": true,
16
+ "corrupt_t_mode": "independent",
17
+ "corrupt_min_t": 0.0,
18
+ "corrupt_max_t": 1.0,
19
+ "torch_compile": false,
20
+ "compile_mode": "max-autotune",
21
+ "state_format": "prob",
22
+ "target_loss": "soft_ce",
23
+ "meanflow_weight": 0.0,
24
+ "bridge_noise_init": "logistic_normal",
25
+ "noise_sigma": -1.0,
26
+ "wrap": true,
27
+ "num_workers": 4
28
+ }
29
+ Traceback (most recent call last):
30
+ File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 456, in <module>
31
+ main()
32
+ File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 367, in main
33
+ batch = next(data_iter)
34
+ ^^^^^^^^^^^^^^^
35
+ File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 708, in __next__
36
+ data = self._next_data()
37
+ ^^^^^^^^^^^^^^^^^
38
+ File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1487, in _next_data
39
+ return self._process_data(data, worker_id)
40
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
41
+ File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/dataloader.py", line 1522, in _process_data
42
+ data.reraise()
43
+ File "/usr/local/lib/python3.12/dist-packages/torch/_utils.py", line 733, in reraise
44
+ raise exception
45
+ zipfile.BadZipFile: Caught BadZipFile in DataLoader worker process 0.
46
+ Original Traceback (most recent call last):
47
+ File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/_utils/worker.py", line 349, in _worker_loop
48
+ data = fetcher.fetch(index) # type: ignore[possibly-undefined]
49
+ ^^^^^^^^^^^^^^^^^^^^
50
+ File "/usr/local/lib/python3.12/dist-packages/torch/utils/data/_utils/fetch.py", line 33, in fetch
51
+ data.append(next(self.dataset_iter))
52
+ ^^^^^^^^^^^^^^^^^^^^^^^
53
+ File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/flowtext_lab/data.py", line 230, in __iter__
54
+ for text in iter_text_records(
55
+ File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/flowtext_lab/data.py", line 111, in iter_text_records
56
+ yield from _iter_parquet(path, text_column)
57
+ File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/flowtext_lab/data.py", line 69, in _iter_parquet
58
+ from datasets import Dataset as HFDataset
59
+ File "/usr/local/lib/python3.12/dist-packages/datasets/__init__.py", line 17, in <module>
60
+ from .arrow_dataset import Dataset
61
+ File "/usr/local/lib/python3.12/dist-packages/datasets/arrow_dataset.py", line 54, in <module>
62
+ import fsspec
63
+ File "/usr/local/lib/python3.12/dist-packages/fsspec/__init__.py", line 69, in <module>
64
+ process_entries()
65
+ File "/usr/local/lib/python3.12/dist-packages/fsspec/__init__.py", line 43, in process_entries
66
+ eps = entry_points()
67
+ ^^^^^^^^^^^^^^
68
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 913, in entry_points
69
+ return EntryPoints(eps).select(**params)
70
+ ^^^^^^^^^^^^^^^^
71
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 910, in <genexpr>
72
+ eps = itertools.chain.from_iterable(
73
+ ^
74
+ File "/usr/lib/python3.12/importlib/metadata/_itertools.py", line 16, in unique_everseen
75
+ k = key(element)
76
+ ^^^^^^^^^^^^
77
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 835, in _normalized_name
78
+ or super()._normalized_name
79
+ ^^^^^^^^^^^^^^^^^^^^^^^^
80
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 462, in _normalized_name
81
+ return Prepared.normalize(self.name)
82
+ ^^^^^^^^^
83
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 457, in name
84
+ return self.metadata['Name']
85
+ ^^^^^^^^^^^^^
86
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 445, in metadata
87
+ or self.read_text('PKG-INFO')
88
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^
89
+ File "/usr/lib/python3.12/importlib/metadata/__init__.py", line 819, in read_text
90
+ return self._path.joinpath(filename).read_text(encoding='utf-8')
91
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
92
+ File "/usr/lib/python3.12/zipfile/_path/__init__.py", line 339, in read_text
93
+ with self.open('r', encoding, *args, **kwargs) as strm:
94
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
95
+ File "/usr/lib/python3.12/zipfile/_path/__init__.py", line 305, in open
96
+ stream = self.root.open(self.at, zip_mode, pwd=pwd)
97
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
98
+ File "/usr/lib/python3.12/zipfile/__init__.py", line 1625, in open
99
+ raise BadZipFile("Bad magic number for file header")
100
+ zipfile.BadZipFile: Bad magic number for file header
101
+
LTA_openwebtext_dualt/logs/compact_gpt2bpe_v2048_stream1024_fullycoupled_mask1_wd0p1_fp32_8gpu/lta_owt_compact_gpt2bpe_v2048_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_logitnormal_m1p5_s0p8_hardce_mask1p0-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260517_141027.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/20260517_queued_ctx1024_sweep.log ADDED
@@ -0,0 +1 @@
 
 
1
+ [ctx1024-sweep] waiting for run=train8_rollin_len1024_rollin_p50_s4_i32_20260517_1855ctx1024bs128
LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/ctx1024_sweep_selected_20260517_210705.log ADDED
@@ -0,0 +1,1306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [ctx1024-sweep] waiting for run=train8_rollin_len1024_rollin_p50_s4_i32_20260517_1855ctx1024bs128
2
+ [ctx1024-sweep] start stamp=ctx1024_sweep_selected_20260517_210705 len=1024 vocab=2664 out=docs/lta_samples/metrics_20260517/ctx1024_rollin_sweep_bs512_ode128_ctx1024_sweep_selected_20260517_210705
3
+ [ctx1024-sweep] config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 p=0.75 steps=4 infer=32 outwd=-1 sync_t=0
4
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=0 to=1000
5
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=1000
6
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=1000 soft=none
7
+ [decode] max_len=1024 generated=64/64
8
+ {
9
+ "num_rows": 1,
10
+ "best_by_run": {
11
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
12
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
13
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0001000.pt",
14
+ "ckpt_step": 1000,
15
+ "endpoint_softening": "none",
16
+ "decode_rule": "flowmap",
17
+ "steps": 128,
18
+ "time_schedule": "logit_normal",
19
+ "model_t_mode": "post",
20
+ "final_from": "state",
21
+ "n_gen": 64,
22
+ "n_refs": 8,
23
+ "token_acc_mean": 0.0558013916015625,
24
+ "token_acc_min": 0.029296875,
25
+ "token_acc_max": 0.119140625,
26
+ "exact_acc": 0.0,
27
+ "exact_count": 0,
28
+ "exact_ref_coverage": 0.0,
29
+ "exact_ref_count": 0,
30
+ "exact_ref_hits": [],
31
+ "best_ref_idx": [
32
+ 7,
33
+ 7,
34
+ 4,
35
+ 7,
36
+ 6,
37
+ 7,
38
+ 2,
39
+ 6,
40
+ 6,
41
+ 6,
42
+ 6,
43
+ 6,
44
+ 2,
45
+ 6,
46
+ 2,
47
+ 6,
48
+ 6,
49
+ 7,
50
+ 7,
51
+ 6,
52
+ 6,
53
+ 6,
54
+ 6,
55
+ 4,
56
+ 6,
57
+ 6,
58
+ 5,
59
+ 6,
60
+ 6,
61
+ 6,
62
+ 3,
63
+ 6,
64
+ 6,
65
+ 2,
66
+ 4,
67
+ 4,
68
+ 4,
69
+ 6,
70
+ 2,
71
+ 5,
72
+ 6,
73
+ 6,
74
+ 7,
75
+ 6,
76
+ 6,
77
+ 6,
78
+ 1,
79
+ 6,
80
+ 2,
81
+ 4,
82
+ 6,
83
+ 6,
84
+ 7,
85
+ 6,
86
+ 6,
87
+ 5,
88
+ 6,
89
+ 4,
90
+ 2,
91
+ 6,
92
+ 6,
93
+ 7,
94
+ 4,
95
+ 6
96
+ ],
97
+ "best_token_acc": [
98
+ 0.04296875,
99
+ 0.060546875,
100
+ 0.0478515625,
101
+ 0.0537109375,
102
+ 0.0517578125,
103
+ 0.046875,
104
+ 0.08203125,
105
+ 0.046875,
106
+ 0.0341796875,
107
+ 0.119140625,
108
+ 0.0556640625,
109
+ 0.0576171875,
110
+ 0.0546875,
111
+ 0.0556640625,
112
+ 0.052734375,
113
+ 0.0703125,
114
+ 0.05859375,
115
+ 0.0634765625,
116
+ 0.046875,
117
+ 0.0751953125,
118
+ 0.08203125,
119
+ 0.0361328125,
120
+ 0.044921875,
121
+ 0.0341796875,
122
+ 0.0517578125,
123
+ 0.072265625,
124
+ 0.0322265625,
125
+ 0.0615234375,
126
+ 0.0615234375,
127
+ 0.0517578125,
128
+ 0.037109375,
129
+ 0.0703125,
130
+ 0.056640625,
131
+ 0.0869140625,
132
+ 0.0478515625,
133
+ 0.0380859375,
134
+ 0.037109375,
135
+ 0.0419921875,
136
+ 0.0703125,
137
+ 0.0341796875,
138
+ 0.056640625,
139
+ 0.033203125,
140
+ 0.072265625,
141
+ 0.0380859375,
142
+ 0.0478515625,
143
+ 0.0380859375,
144
+ 0.0322265625,
145
+ 0.0693359375,
146
+ 0.0712890625,
147
+ 0.0419921875,
148
+ 0.0576171875,
149
+ 0.0869140625,
150
+ 0.0615234375,
151
+ 0.03515625,
152
+ 0.0517578125,
153
+ 0.080078125,
154
+ 0.0380859375,
155
+ 0.0341796875,
156
+ 0.1171875,
157
+ 0.0791015625,
158
+ 0.029296875,
159
+ 0.0478515625,
160
+ 0.0595703125,
161
+ 0.06640625
162
+ ]
163
+ }
164
+ },
165
+ "first_exact_by_run": {}
166
+ }
167
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=1000 views=512000 token_acc=0.0558 exact=0/64 exact_refs=0 hits=[]
168
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=1000 to=2000
169
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=2000
170
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=2000 soft=none
171
+ [decode] max_len=1024 generated=64/64
172
+ {
173
+ "num_rows": 1,
174
+ "best_by_run": {
175
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
176
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
177
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0002000.pt",
178
+ "ckpt_step": 2000,
179
+ "endpoint_softening": "none",
180
+ "decode_rule": "flowmap",
181
+ "steps": 128,
182
+ "time_schedule": "logit_normal",
183
+ "model_t_mode": "post",
184
+ "final_from": "state",
185
+ "n_gen": 64,
186
+ "n_refs": 8,
187
+ "token_acc_mean": 0.970245361328125,
188
+ "token_acc_min": 0.3955078125,
189
+ "token_acc_max": 0.9990234375,
190
+ "exact_acc": 0.0,
191
+ "exact_count": 0,
192
+ "exact_ref_coverage": 0.0,
193
+ "exact_ref_count": 0,
194
+ "exact_ref_hits": [],
195
+ "best_ref_idx": [
196
+ 4,
197
+ 7,
198
+ 4,
199
+ 4,
200
+ 4,
201
+ 7,
202
+ 4,
203
+ 4,
204
+ 7,
205
+ 7,
206
+ 4,
207
+ 1,
208
+ 4,
209
+ 7,
210
+ 4,
211
+ 7,
212
+ 4,
213
+ 7,
214
+ 7,
215
+ 5,
216
+ 4,
217
+ 4,
218
+ 4,
219
+ 4,
220
+ 4,
221
+ 4,
222
+ 7,
223
+ 2,
224
+ 0,
225
+ 4,
226
+ 4,
227
+ 7,
228
+ 4,
229
+ 0,
230
+ 4,
231
+ 4,
232
+ 4,
233
+ 0,
234
+ 4,
235
+ 4,
236
+ 2,
237
+ 4,
238
+ 4,
239
+ 3,
240
+ 0,
241
+ 4,
242
+ 2,
243
+ 0,
244
+ 5,
245
+ 0,
246
+ 0,
247
+ 7,
248
+ 7,
249
+ 4,
250
+ 4,
251
+ 4,
252
+ 7,
253
+ 4,
254
+ 7,
255
+ 0,
256
+ 7,
257
+ 4,
258
+ 7,
259
+ 4
260
+ ],
261
+ "best_token_acc": [
262
+ 0.986328125,
263
+ 0.982421875,
264
+ 0.92578125,
265
+ 0.9404296875,
266
+ 0.9599609375,
267
+ 0.990234375,
268
+ 0.990234375,
269
+ 0.98828125,
270
+ 0.3955078125,
271
+ 0.9853515625,
272
+ 0.939453125,
273
+ 0.984375,
274
+ 0.97265625,
275
+ 0.984375,
276
+ 0.978515625,
277
+ 0.9833984375,
278
+ 0.9833984375,
279
+ 0.982421875,
280
+ 0.8916015625,
281
+ 0.998046875,
282
+ 0.9794921875,
283
+ 0.9873046875,
284
+ 0.986328125,
285
+ 0.9658203125,
286
+ 0.9912109375,
287
+ 0.9873046875,
288
+ 0.9873046875,
289
+ 0.9814453125,
290
+ 0.994140625,
291
+ 0.8408203125,
292
+ 0.98828125,
293
+ 0.9912109375,
294
+ 0.9658203125,
295
+ 0.98046875,
296
+ 0.9833984375,
297
+ 0.98828125,
298
+ 0.990234375,
299
+ 0.99609375,
300
+ 0.9873046875,
301
+ 0.9833984375,
302
+ 0.994140625,
303
+ 0.9892578125,
304
+ 0.9658203125,
305
+ 0.9697265625,
306
+ 0.9873046875,
307
+ 0.9853515625,
308
+ 0.982421875,
309
+ 0.99609375,
310
+ 0.9990234375,
311
+ 0.9931640625,
312
+ 0.9970703125,
313
+ 0.9912109375,
314
+ 0.984375,
315
+ 0.9892578125,
316
+ 0.9677734375,
317
+ 0.990234375,
318
+ 0.98828125,
319
+ 0.990234375,
320
+ 0.986328125,
321
+ 0.994140625,
322
+ 0.98828125,
323
+ 0.98828125,
324
+ 0.9892578125,
325
+ 0.990234375
326
+ ]
327
+ }
328
+ },
329
+ "first_exact_by_run": {}
330
+ }
331
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=2000 views=1024000 token_acc=0.9702 exact=0/64 exact_refs=0 hits=[]
332
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=2000 to=3000
333
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=3000
334
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=3000 soft=none
335
+ [decode] max_len=1024 generated=64/64
336
+ {
337
+ "num_rows": 1,
338
+ "best_by_run": {
339
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
340
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
341
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0003000.pt",
342
+ "ckpt_step": 3000,
343
+ "endpoint_softening": "none",
344
+ "decode_rule": "flowmap",
345
+ "steps": 128,
346
+ "time_schedule": "logit_normal",
347
+ "model_t_mode": "post",
348
+ "final_from": "state",
349
+ "n_gen": 64,
350
+ "n_refs": 8,
351
+ "token_acc_mean": 0.919891357421875,
352
+ "token_acc_min": 0.8154296875,
353
+ "token_acc_max": 0.97265625,
354
+ "exact_acc": 0.0,
355
+ "exact_count": 0,
356
+ "exact_ref_coverage": 0.0,
357
+ "exact_ref_count": 0,
358
+ "exact_ref_hits": [],
359
+ "best_ref_idx": [
360
+ 6,
361
+ 6,
362
+ 5,
363
+ 6,
364
+ 6,
365
+ 1,
366
+ 5,
367
+ 6,
368
+ 6,
369
+ 5,
370
+ 6,
371
+ 3,
372
+ 6,
373
+ 3,
374
+ 1,
375
+ 6,
376
+ 6,
377
+ 6,
378
+ 6,
379
+ 6,
380
+ 3,
381
+ 6,
382
+ 4,
383
+ 6,
384
+ 7,
385
+ 6,
386
+ 5,
387
+ 1,
388
+ 6,
389
+ 7,
390
+ 6,
391
+ 6,
392
+ 6,
393
+ 6,
394
+ 6,
395
+ 6,
396
+ 3,
397
+ 6,
398
+ 6,
399
+ 6,
400
+ 6,
401
+ 3,
402
+ 6,
403
+ 6,
404
+ 6,
405
+ 7,
406
+ 5,
407
+ 6,
408
+ 0,
409
+ 6,
410
+ 6,
411
+ 6,
412
+ 6,
413
+ 6,
414
+ 6,
415
+ 6,
416
+ 5,
417
+ 6,
418
+ 6,
419
+ 6,
420
+ 6,
421
+ 6,
422
+ 6,
423
+ 6
424
+ ],
425
+ "best_token_acc": [
426
+ 0.921875,
427
+ 0.908203125,
428
+ 0.96484375,
429
+ 0.90234375,
430
+ 0.912109375,
431
+ 0.955078125,
432
+ 0.94921875,
433
+ 0.91796875,
434
+ 0.9111328125,
435
+ 0.9404296875,
436
+ 0.9130859375,
437
+ 0.939453125,
438
+ 0.916015625,
439
+ 0.955078125,
440
+ 0.9189453125,
441
+ 0.9267578125,
442
+ 0.9091796875,
443
+ 0.8955078125,
444
+ 0.9296875,
445
+ 0.9248046875,
446
+ 0.94921875,
447
+ 0.9013671875,
448
+ 0.9609375,
449
+ 0.9091796875,
450
+ 0.962890625,
451
+ 0.9140625,
452
+ 0.9521484375,
453
+ 0.9482421875,
454
+ 0.8896484375,
455
+ 0.966796875,
456
+ 0.916015625,
457
+ 0.8916015625,
458
+ 0.908203125,
459
+ 0.912109375,
460
+ 0.9228515625,
461
+ 0.900390625,
462
+ 0.916015625,
463
+ 0.89453125,
464
+ 0.9091796875,
465
+ 0.92578125,
466
+ 0.91015625,
467
+ 0.919921875,
468
+ 0.88671875,
469
+ 0.9208984375,
470
+ 0.9091796875,
471
+ 0.9619140625,
472
+ 0.9697265625,
473
+ 0.90625,
474
+ 0.8154296875,
475
+ 0.91796875,
476
+ 0.9033203125,
477
+ 0.8828125,
478
+ 0.90625,
479
+ 0.9091796875,
480
+ 0.9267578125,
481
+ 0.916015625,
482
+ 0.97265625,
483
+ 0.892578125,
484
+ 0.9169921875,
485
+ 0.912109375,
486
+ 0.923828125,
487
+ 0.9033203125,
488
+ 0.9150390625,
489
+ 0.9111328125
490
+ ]
491
+ }
492
+ },
493
+ "first_exact_by_run": {}
494
+ }
495
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=3000 views=1536000 token_acc=0.9199 exact=0/64 exact_refs=0 hits=[]
496
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=3000 to=4000
497
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=4000
498
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=4000 soft=none
499
+ [decode] max_len=1024 generated=64/64
500
+ {
501
+ "num_rows": 1,
502
+ "best_by_run": {
503
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
504
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
505
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0004000.pt",
506
+ "ckpt_step": 4000,
507
+ "endpoint_softening": "none",
508
+ "decode_rule": "flowmap",
509
+ "steps": 128,
510
+ "time_schedule": "logit_normal",
511
+ "model_t_mode": "post",
512
+ "final_from": "state",
513
+ "n_gen": 64,
514
+ "n_refs": 8,
515
+ "token_acc_mean": 0.972259521484375,
516
+ "token_acc_min": 0.951171875,
517
+ "token_acc_max": 1.0,
518
+ "exact_acc": 0.015625,
519
+ "exact_count": 1,
520
+ "exact_ref_coverage": 0.125,
521
+ "exact_ref_count": 1,
522
+ "exact_ref_hits": [
523
+ 4
524
+ ],
525
+ "best_ref_idx": [
526
+ 6,
527
+ 6,
528
+ 6,
529
+ 6,
530
+ 7,
531
+ 6,
532
+ 6,
533
+ 2,
534
+ 6,
535
+ 6,
536
+ 6,
537
+ 7,
538
+ 7,
539
+ 6,
540
+ 6,
541
+ 7,
542
+ 6,
543
+ 6,
544
+ 5,
545
+ 6,
546
+ 6,
547
+ 6,
548
+ 6,
549
+ 6,
550
+ 6,
551
+ 7,
552
+ 2,
553
+ 7,
554
+ 7,
555
+ 6,
556
+ 6,
557
+ 6,
558
+ 6,
559
+ 6,
560
+ 0,
561
+ 6,
562
+ 6,
563
+ 0,
564
+ 6,
565
+ 6,
566
+ 6,
567
+ 6,
568
+ 6,
569
+ 6,
570
+ 6,
571
+ 6,
572
+ 6,
573
+ 7,
574
+ 6,
575
+ 4,
576
+ 6,
577
+ 7,
578
+ 6,
579
+ 6,
580
+ 6,
581
+ 2,
582
+ 6,
583
+ 6,
584
+ 2,
585
+ 6,
586
+ 6,
587
+ 6,
588
+ 2,
589
+ 0
590
+ ],
591
+ "best_token_acc": [
592
+ 0.951171875,
593
+ 0.9658203125,
594
+ 0.955078125,
595
+ 0.9658203125,
596
+ 0.9931640625,
597
+ 0.96484375,
598
+ 0.970703125,
599
+ 0.9912109375,
600
+ 0.970703125,
601
+ 0.9677734375,
602
+ 0.966796875,
603
+ 0.9931640625,
604
+ 0.998046875,
605
+ 0.96875,
606
+ 0.9619140625,
607
+ 0.994140625,
608
+ 0.9619140625,
609
+ 0.9560546875,
610
+ 0.998046875,
611
+ 0.966796875,
612
+ 0.9638671875,
613
+ 0.962890625,
614
+ 0.9609375,
615
+ 0.966796875,
616
+ 0.9658203125,
617
+ 0.994140625,
618
+ 0.98828125,
619
+ 0.9951171875,
620
+ 0.9921875,
621
+ 0.951171875,
622
+ 0.9697265625,
623
+ 0.9736328125,
624
+ 0.966796875,
625
+ 0.958984375,
626
+ 0.9990234375,
627
+ 0.9658203125,
628
+ 0.9677734375,
629
+ 0.9931640625,
630
+ 0.958984375,
631
+ 0.9599609375,
632
+ 0.966796875,
633
+ 0.9638671875,
634
+ 0.9658203125,
635
+ 0.966796875,
636
+ 0.962890625,
637
+ 0.96875,
638
+ 0.953125,
639
+ 0.9970703125,
640
+ 0.958984375,
641
+ 1.0,
642
+ 0.9560546875,
643
+ 0.99609375,
644
+ 0.9697265625,
645
+ 0.953125,
646
+ 0.9658203125,
647
+ 0.9921875,
648
+ 0.96875,
649
+ 0.962890625,
650
+ 0.9912109375,
651
+ 0.9677734375,
652
+ 0.9658203125,
653
+ 0.9638671875,
654
+ 0.9912109375,
655
+ 0.958984375
656
+ ]
657
+ }
658
+ },
659
+ "first_exact_by_run": {
660
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
661
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
662
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0004000.pt",
663
+ "ckpt_step": 4000,
664
+ "endpoint_softening": "none",
665
+ "decode_rule": "flowmap",
666
+ "steps": 128,
667
+ "time_schedule": "logit_normal",
668
+ "model_t_mode": "post",
669
+ "final_from": "state",
670
+ "n_gen": 64,
671
+ "n_refs": 8,
672
+ "token_acc_mean": 0.972259521484375,
673
+ "token_acc_min": 0.951171875,
674
+ "token_acc_max": 1.0,
675
+ "exact_acc": 0.015625,
676
+ "exact_count": 1,
677
+ "exact_ref_coverage": 0.125,
678
+ "exact_ref_count": 1,
679
+ "exact_ref_hits": [
680
+ 4
681
+ ],
682
+ "best_ref_idx": [
683
+ 6,
684
+ 6,
685
+ 6,
686
+ 6,
687
+ 7,
688
+ 6,
689
+ 6,
690
+ 2,
691
+ 6,
692
+ 6,
693
+ 6,
694
+ 7,
695
+ 7,
696
+ 6,
697
+ 6,
698
+ 7,
699
+ 6,
700
+ 6,
701
+ 5,
702
+ 6,
703
+ 6,
704
+ 6,
705
+ 6,
706
+ 6,
707
+ 6,
708
+ 7,
709
+ 2,
710
+ 7,
711
+ 7,
712
+ 6,
713
+ 6,
714
+ 6,
715
+ 6,
716
+ 6,
717
+ 0,
718
+ 6,
719
+ 6,
720
+ 0,
721
+ 6,
722
+ 6,
723
+ 6,
724
+ 6,
725
+ 6,
726
+ 6,
727
+ 6,
728
+ 6,
729
+ 6,
730
+ 7,
731
+ 6,
732
+ 4,
733
+ 6,
734
+ 7,
735
+ 6,
736
+ 6,
737
+ 6,
738
+ 2,
739
+ 6,
740
+ 6,
741
+ 2,
742
+ 6,
743
+ 6,
744
+ 6,
745
+ 2,
746
+ 0
747
+ ],
748
+ "best_token_acc": [
749
+ 0.951171875,
750
+ 0.9658203125,
751
+ 0.955078125,
752
+ 0.9658203125,
753
+ 0.9931640625,
754
+ 0.96484375,
755
+ 0.970703125,
756
+ 0.9912109375,
757
+ 0.970703125,
758
+ 0.9677734375,
759
+ 0.966796875,
760
+ 0.9931640625,
761
+ 0.998046875,
762
+ 0.96875,
763
+ 0.9619140625,
764
+ 0.994140625,
765
+ 0.9619140625,
766
+ 0.9560546875,
767
+ 0.998046875,
768
+ 0.966796875,
769
+ 0.9638671875,
770
+ 0.962890625,
771
+ 0.9609375,
772
+ 0.966796875,
773
+ 0.9658203125,
774
+ 0.994140625,
775
+ 0.98828125,
776
+ 0.9951171875,
777
+ 0.9921875,
778
+ 0.951171875,
779
+ 0.9697265625,
780
+ 0.9736328125,
781
+ 0.966796875,
782
+ 0.958984375,
783
+ 0.9990234375,
784
+ 0.9658203125,
785
+ 0.9677734375,
786
+ 0.9931640625,
787
+ 0.958984375,
788
+ 0.9599609375,
789
+ 0.966796875,
790
+ 0.9638671875,
791
+ 0.9658203125,
792
+ 0.966796875,
793
+ 0.962890625,
794
+ 0.96875,
795
+ 0.953125,
796
+ 0.9970703125,
797
+ 0.958984375,
798
+ 1.0,
799
+ 0.9560546875,
800
+ 0.99609375,
801
+ 0.9697265625,
802
+ 0.953125,
803
+ 0.9658203125,
804
+ 0.9921875,
805
+ 0.96875,
806
+ 0.962890625,
807
+ 0.9912109375,
808
+ 0.9677734375,
809
+ 0.9658203125,
810
+ 0.9638671875,
811
+ 0.9912109375,
812
+ 0.958984375
813
+ ]
814
+ }
815
+ }
816
+ }
817
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=4000 views=2048000 token_acc=0.9723 exact=1/64 exact_refs=1 hits=[4]
818
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=4000 to=5000
819
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=5000
820
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=5000 soft=none
821
+ [decode] max_len=1024 generated=64/64
822
+ {
823
+ "num_rows": 1,
824
+ "best_by_run": {
825
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
826
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
827
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0005000.pt",
828
+ "ckpt_step": 5000,
829
+ "endpoint_softening": "none",
830
+ "decode_rule": "flowmap",
831
+ "steps": 128,
832
+ "time_schedule": "logit_normal",
833
+ "model_t_mode": "post",
834
+ "final_from": "state",
835
+ "n_gen": 64,
836
+ "n_refs": 8,
837
+ "token_acc_mean": 0.9835357666015625,
838
+ "token_acc_min": 0.587890625,
839
+ "token_acc_max": 0.9970703125,
840
+ "exact_acc": 0.0,
841
+ "exact_count": 0,
842
+ "exact_ref_coverage": 0.0,
843
+ "exact_ref_count": 0,
844
+ "exact_ref_hits": [],
845
+ "best_ref_idx": [
846
+ 6,
847
+ 5,
848
+ 6,
849
+ 6,
850
+ 5,
851
+ 6,
852
+ 6,
853
+ 6,
854
+ 6,
855
+ 6,
856
+ 0,
857
+ 6,
858
+ 5,
859
+ 7,
860
+ 1,
861
+ 6,
862
+ 3,
863
+ 0,
864
+ 6,
865
+ 3,
866
+ 6,
867
+ 6,
868
+ 6,
869
+ 6,
870
+ 5,
871
+ 0,
872
+ 5,
873
+ 5,
874
+ 6,
875
+ 1,
876
+ 6,
877
+ 6,
878
+ 1,
879
+ 2,
880
+ 1,
881
+ 4,
882
+ 6,
883
+ 2,
884
+ 6,
885
+ 4,
886
+ 6,
887
+ 6,
888
+ 6,
889
+ 6,
890
+ 0,
891
+ 6,
892
+ 6,
893
+ 0,
894
+ 6,
895
+ 1,
896
+ 5,
897
+ 1,
898
+ 6,
899
+ 6,
900
+ 6,
901
+ 6,
902
+ 6,
903
+ 6,
904
+ 6,
905
+ 6,
906
+ 1,
907
+ 6,
908
+ 3,
909
+ 0
910
+ ],
911
+ "best_token_acc": [
912
+ 0.98828125,
913
+ 0.99609375,
914
+ 0.9921875,
915
+ 0.9931640625,
916
+ 0.9970703125,
917
+ 0.9912109375,
918
+ 0.9921875,
919
+ 0.9912109375,
920
+ 0.986328125,
921
+ 0.9931640625,
922
+ 0.9931640625,
923
+ 0.587890625,
924
+ 0.994140625,
925
+ 0.9814453125,
926
+ 0.9951171875,
927
+ 0.9951171875,
928
+ 0.97265625,
929
+ 0.9541015625,
930
+ 0.9833984375,
931
+ 0.984375,
932
+ 0.9921875,
933
+ 0.98828125,
934
+ 0.9931640625,
935
+ 0.994140625,
936
+ 0.9970703125,
937
+ 0.9921875,
938
+ 0.99609375,
939
+ 0.9931640625,
940
+ 0.9931640625,
941
+ 0.9814453125,
942
+ 0.994140625,
943
+ 0.9892578125,
944
+ 0.9833984375,
945
+ 0.9736328125,
946
+ 0.9970703125,
947
+ 0.984375,
948
+ 0.9921875,
949
+ 0.98046875,
950
+ 0.98828125,
951
+ 0.9931640625,
952
+ 0.994140625,
953
+ 0.9833984375,
954
+ 0.9931640625,
955
+ 0.9931640625,
956
+ 0.9921875,
957
+ 0.9931640625,
958
+ 0.9921875,
959
+ 0.9970703125,
960
+ 0.9921875,
961
+ 0.994140625,
962
+ 0.994140625,
963
+ 0.9892578125,
964
+ 0.9951171875,
965
+ 0.9765625,
966
+ 0.9921875,
967
+ 0.98828125,
968
+ 0.9951171875,
969
+ 0.9931640625,
970
+ 0.9921875,
971
+ 0.990234375,
972
+ 0.9970703125,
973
+ 0.9794921875,
974
+ 0.990234375,
975
+ 0.9892578125
976
+ ]
977
+ }
978
+ },
979
+ "first_exact_by_run": {}
980
+ }
981
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=5000 views=2560000 token_acc=0.9835 exact=0/64 exact_refs=0 hits=[]
982
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=5000 to=6000
983
+ [ctx1024-sweep] eval config=p75_s4_i32_outwdm1 step=6000
984
+ [eval-decode-acc] train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 step=6000 soft=none
985
+ [decode] max_len=1024 generated=64/64
986
+ {
987
+ "num_rows": 1,
988
+ "best_by_run": {
989
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
990
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
991
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0006000.pt",
992
+ "ckpt_step": 6000,
993
+ "endpoint_softening": "none",
994
+ "decode_rule": "flowmap",
995
+ "steps": 128,
996
+ "time_schedule": "logit_normal",
997
+ "model_t_mode": "post",
998
+ "final_from": "state",
999
+ "n_gen": 64,
1000
+ "n_refs": 8,
1001
+ "token_acc_mean": 0.9924468994140625,
1002
+ "token_acc_min": 0.9052734375,
1003
+ "token_acc_max": 1.0,
1004
+ "exact_acc": 0.0625,
1005
+ "exact_count": 4,
1006
+ "exact_ref_coverage": 0.25,
1007
+ "exact_ref_count": 2,
1008
+ "exact_ref_hits": [
1009
+ 3,
1010
+ 5
1011
+ ],
1012
+ "best_ref_idx": [
1013
+ 1,
1014
+ 6,
1015
+ 6,
1016
+ 5,
1017
+ 4,
1018
+ 3,
1019
+ 5,
1020
+ 6,
1021
+ 6,
1022
+ 5,
1023
+ 6,
1024
+ 1,
1025
+ 2,
1026
+ 3,
1027
+ 4,
1028
+ 1,
1029
+ 1,
1030
+ 6,
1031
+ 2,
1032
+ 3,
1033
+ 1,
1034
+ 6,
1035
+ 6,
1036
+ 0,
1037
+ 2,
1038
+ 6,
1039
+ 1,
1040
+ 4,
1041
+ 6,
1042
+ 6,
1043
+ 6,
1044
+ 1,
1045
+ 0,
1046
+ 1,
1047
+ 6,
1048
+ 7,
1049
+ 6,
1050
+ 1,
1051
+ 5,
1052
+ 2,
1053
+ 6,
1054
+ 1,
1055
+ 6,
1056
+ 6,
1057
+ 7,
1058
+ 1,
1059
+ 6,
1060
+ 6,
1061
+ 2,
1062
+ 2,
1063
+ 6,
1064
+ 1,
1065
+ 6,
1066
+ 7,
1067
+ 5,
1068
+ 5,
1069
+ 3,
1070
+ 6,
1071
+ 1,
1072
+ 1,
1073
+ 5,
1074
+ 6,
1075
+ 2,
1076
+ 6
1077
+ ],
1078
+ "best_token_acc": [
1079
+ 0.9951171875,
1080
+ 0.99609375,
1081
+ 0.998046875,
1082
+ 1.0,
1083
+ 0.9453125,
1084
+ 0.9970703125,
1085
+ 0.9990234375,
1086
+ 0.9970703125,
1087
+ 0.9990234375,
1088
+ 1.0,
1089
+ 0.9990234375,
1090
+ 0.99609375,
1091
+ 0.9970703125,
1092
+ 0.9990234375,
1093
+ 0.9951171875,
1094
+ 0.9990234375,
1095
+ 0.9990234375,
1096
+ 0.998046875,
1097
+ 0.9970703125,
1098
+ 0.9990234375,
1099
+ 0.998046875,
1100
+ 0.998046875,
1101
+ 0.994140625,
1102
+ 0.9052734375,
1103
+ 0.9970703125,
1104
+ 0.9970703125,
1105
+ 0.9990234375,
1106
+ 0.994140625,
1107
+ 0.998046875,
1108
+ 0.9970703125,
1109
+ 0.998046875,
1110
+ 0.9970703125,
1111
+ 0.9189453125,
1112
+ 0.9970703125,
1113
+ 0.9951171875,
1114
+ 0.9736328125,
1115
+ 0.998046875,
1116
+ 0.9970703125,
1117
+ 0.9970703125,
1118
+ 0.9970703125,
1119
+ 0.9970703125,
1120
+ 0.9970703125,
1121
+ 0.9970703125,
1122
+ 0.9970703125,
1123
+ 0.953125,
1124
+ 0.9990234375,
1125
+ 0.998046875,
1126
+ 0.998046875,
1127
+ 0.9970703125,
1128
+ 0.99609375,
1129
+ 0.998046875,
1130
+ 0.998046875,
1131
+ 0.9970703125,
1132
+ 0.962890625,
1133
+ 0.994140625,
1134
+ 1.0,
1135
+ 1.0,
1136
+ 0.9970703125,
1137
+ 0.9990234375,
1138
+ 0.9970703125,
1139
+ 0.9990234375,
1140
+ 0.998046875,
1141
+ 0.99609375,
1142
+ 0.998046875
1143
+ ]
1144
+ }
1145
+ },
1146
+ "first_exact_by_run": {
1147
+ "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705::none": {
1148
+ "run": "train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705",
1149
+ "checkpoint": "runs/train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705/step_0006000.pt",
1150
+ "ckpt_step": 6000,
1151
+ "endpoint_softening": "none",
1152
+ "decode_rule": "flowmap",
1153
+ "steps": 128,
1154
+ "time_schedule": "logit_normal",
1155
+ "model_t_mode": "post",
1156
+ "final_from": "state",
1157
+ "n_gen": 64,
1158
+ "n_refs": 8,
1159
+ "token_acc_mean": 0.9924468994140625,
1160
+ "token_acc_min": 0.9052734375,
1161
+ "token_acc_max": 1.0,
1162
+ "exact_acc": 0.0625,
1163
+ "exact_count": 4,
1164
+ "exact_ref_coverage": 0.25,
1165
+ "exact_ref_count": 2,
1166
+ "exact_ref_hits": [
1167
+ 3,
1168
+ 5
1169
+ ],
1170
+ "best_ref_idx": [
1171
+ 1,
1172
+ 6,
1173
+ 6,
1174
+ 5,
1175
+ 4,
1176
+ 3,
1177
+ 5,
1178
+ 6,
1179
+ 6,
1180
+ 5,
1181
+ 6,
1182
+ 1,
1183
+ 2,
1184
+ 3,
1185
+ 4,
1186
+ 1,
1187
+ 1,
1188
+ 6,
1189
+ 2,
1190
+ 3,
1191
+ 1,
1192
+ 6,
1193
+ 6,
1194
+ 0,
1195
+ 2,
1196
+ 6,
1197
+ 1,
1198
+ 4,
1199
+ 6,
1200
+ 6,
1201
+ 6,
1202
+ 1,
1203
+ 0,
1204
+ 1,
1205
+ 6,
1206
+ 7,
1207
+ 6,
1208
+ 1,
1209
+ 5,
1210
+ 2,
1211
+ 6,
1212
+ 1,
1213
+ 6,
1214
+ 6,
1215
+ 7,
1216
+ 1,
1217
+ 6,
1218
+ 6,
1219
+ 2,
1220
+ 2,
1221
+ 6,
1222
+ 1,
1223
+ 6,
1224
+ 7,
1225
+ 5,
1226
+ 5,
1227
+ 3,
1228
+ 6,
1229
+ 1,
1230
+ 1,
1231
+ 5,
1232
+ 6,
1233
+ 2,
1234
+ 6
1235
+ ],
1236
+ "best_token_acc": [
1237
+ 0.9951171875,
1238
+ 0.99609375,
1239
+ 0.998046875,
1240
+ 1.0,
1241
+ 0.9453125,
1242
+ 0.9970703125,
1243
+ 0.9990234375,
1244
+ 0.9970703125,
1245
+ 0.9990234375,
1246
+ 1.0,
1247
+ 0.9990234375,
1248
+ 0.99609375,
1249
+ 0.9970703125,
1250
+ 0.9990234375,
1251
+ 0.9951171875,
1252
+ 0.9990234375,
1253
+ 0.9990234375,
1254
+ 0.998046875,
1255
+ 0.9970703125,
1256
+ 0.9990234375,
1257
+ 0.998046875,
1258
+ 0.998046875,
1259
+ 0.994140625,
1260
+ 0.9052734375,
1261
+ 0.9970703125,
1262
+ 0.9970703125,
1263
+ 0.9990234375,
1264
+ 0.994140625,
1265
+ 0.998046875,
1266
+ 0.9970703125,
1267
+ 0.998046875,
1268
+ 0.9970703125,
1269
+ 0.9189453125,
1270
+ 0.9970703125,
1271
+ 0.9951171875,
1272
+ 0.9736328125,
1273
+ 0.998046875,
1274
+ 0.9970703125,
1275
+ 0.9970703125,
1276
+ 0.9970703125,
1277
+ 0.9970703125,
1278
+ 0.9970703125,
1279
+ 0.9970703125,
1280
+ 0.9970703125,
1281
+ 0.953125,
1282
+ 0.9990234375,
1283
+ 0.998046875,
1284
+ 0.998046875,
1285
+ 0.9970703125,
1286
+ 0.99609375,
1287
+ 0.998046875,
1288
+ 0.998046875,
1289
+ 0.9970703125,
1290
+ 0.962890625,
1291
+ 0.994140625,
1292
+ 1.0,
1293
+ 1.0,
1294
+ 0.9970703125,
1295
+ 0.9990234375,
1296
+ 0.9970703125,
1297
+ 0.9990234375,
1298
+ 0.998046875,
1299
+ 0.99609375,
1300
+ 0.998046875
1301
+ ]
1302
+ }
1303
+ }
1304
+ }
1305
+ RESULT config=p75_s4_i32_outwdm1 run=train8_ctx1024_p75_s4_i32_outwdm1_ctx1024_sweep_selected_20260517_210705 ckpt_step=6000 views=3072000 token_acc=0.9924 exact=4/64 exact_refs=2 hits=[3, 5]
1306
+ [ctx1024-sweep] train config=p75_s4_i32_outwdm1 from=6000 to=7000
LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/ctx1024_sweep_selected_20260517_210705.nohup ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/ctx1024_rollin_sweep_4gpu/queued_ctx1024_sweep.nohup ADDED
@@ -0,0 +1 @@
 
 
1
+ [ctx1024-sweep] waiting for run=train8_rollin_len1024_rollin_p50_s4_i32_20260517_1855ctx1024bs128
LTA_openwebtext_dualt/logs/ctx1024_sampledpath_sweep_4gpu/ctx1024_sampledpath_20260517_223933.nohup ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [ctx1024-sampleds] start stamp=ctx1024_sampledpath_20260517_223933 len=1024 vocab=2664 out=docs/lta_samples/metrics_20260517/ctx1024_sampleds_sweep_bs512_ode128_ctx1024_sampledpath_20260517_223933
2
+ [ctx1024-sampleds] config=p50_path4_unif0_0p125_outwdm1 run=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_20260517_223933 p=0.50 s_dist=uniform s_frac=0.0->0.125 beta=2.0,6.0 outwd=-1 sync_t=1
3
+ [ctx1024-sampleds] train config=p50_path4_unif0_0p125_outwdm1 from=0 to=1000
4
+ [launch] gpt2 cached OWT soft-endpoint m/n pilot
5
+ [launch] run_name=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_20260517_223933
6
+ [launch] save_dir=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_20260517_223933
7
+ [launch] n=1024 m=0 clean_state_mode=onehot
8
+ [launch] mask_mixture lowk=0.0 all=1.0
9
+ [launch] model d=192 layers=3 heads=3 ff=768 vocab_override=2664
10
+ [launch] optimizer=muon muon_impl=legacy weight_decay=0.1 output_weight_decay=-1
11
+ [launch] target_loss=hard_ce conf=0.0->1.0 power=1.0
12
+ [launch] mask_ratio=1.0->1.0
13
+ [launch] mask_ratio_floor_schedule=none
14
+ [launch] dirichlet C=1.0->1024 endpoint=categorical_dual_t sampler=dirichlet
15
+ [launch] wrong_mix seq_alpha=0.0 wrong_floor=0.0 unigram=0.0 uniform=0.0 basin=0.0 basin_ids=
16
+ [launch] rollout_train prob=0.50 mode=sampled_s steps=1 infer_steps=1 s_dist=uniform s_frac=0.0->0.125 temp=1.45 corrupt_only=1 samplewise=1 selected_only=1 sync_t=1
17
+ [launch] cache=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit exact_repeat_per_chunk=64
18
+ NCCL version 2.25.1+cuda12.8
19
+ {
20
+ "device": "cuda:0",
21
+ "rank": 0,
22
+ "world_size": 4,
23
+ "samples": "owt_cached_chunks:8",
24
+ "vocab_size": 2664,
25
+ "tokenizer_vocab_size": 50257,
26
+ "save_dir": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_20260517_223933",
27
+ "batch_size": 128,
28
+ "grad_accum": 1,
29
+ "effective_batch_size": 512,
30
+ "global_batch_size": 512,
31
+ "lr_schedule": "constant_warmup",
32
+ "optimizer": "muon",
33
+ "epochs": 0.0,
34
+ "steps_per_epoch": 1,
35
+ "total_steps": 1000,
36
+ "warmup_steps": 10,
37
+ "warmup_epochs": -1.0,
38
+ "min_lr": 0.0,
39
+ "weight_decay": 0.1,
40
+ "output_weight_decay": -1.0,
41
+ "adamw_param_groups": "nanogpt",
42
+ "adam_beta1": 0.9,
43
+ "adam_beta2": 0.95,
44
+ "adam_eps": 1e-08,
45
+ "muon_impl": "legacy",
46
+ "muon_momentum": 0.95,
47
+ "muon_ns_steps": 5,
48
+ "muon_update_scale": 1.0,
49
+ "muon_nesterov": false,
50
+ "muon_width_scale": false,
51
+ "muon_grouping": "legacy_dim_ge_2",
52
+ "muon_param_count": 2616320,
53
+ "muon_adam_param_count": 8192,
54
+ "muon_param_names": [
55
+ "vocab_embed.embedding",
56
+ "sigma_map.net.0.weight",
57
+ "sigma_map.net.2.weight",
58
+ "blocks.0.attn_qkv.weight",
59
+ "blocks.0.attn_out.weight",
60
+ "blocks.0.mlp.0.weight",
61
+ "blocks.0.mlp.2.weight",
62
+ "blocks.0.adaLN_modulation.weight",
63
+ "blocks.1.attn_qkv.weight",
64
+ "blocks.1.attn_out.weight",
65
+ "blocks.1.mlp.0.weight",
66
+ "blocks.1.mlp.2.weight",
67
+ "blocks.1.adaLN_modulation.weight",
68
+ "blocks.2.attn_qkv.weight",
69
+ "blocks.2.attn_out.weight",
70
+ "blocks.2.mlp.0.weight",
71
+ "blocks.2.mlp.2.weight",
72
+ "blocks.2.adaLN_modulation.weight",
73
+ "output_layer.linear.weight",
74
+ "output_layer.adaLN_modulation.weight"
75
+ ],
76
+ "muon_adam_param_names": [
77
+ "sigma_map.net.0.bias",
78
+ "sigma_map.net.2.bias",
79
+ "blocks.0.norm1.weight",
80
+ "blocks.0.norm2.weight",
81
+ "blocks.0.mlp.0.bias",
82
+ "blocks.0.mlp.2.bias",
83
+ "blocks.0.adaLN_modulation.bias",
84
+ "blocks.1.norm1.weight",
85
+ "blocks.1.norm2.weight",
86
+ "blocks.1.mlp.0.bias",
87
+ "blocks.1.mlp.2.bias",
88
+ "blocks.1.adaLN_modulation.bias",
89
+ "blocks.2.norm1.weight",
90
+ "blocks.2.norm2.weight",
91
+ "blocks.2.mlp.0.bias",
92
+ "blocks.2.mlp.2.bias",
93
+ "blocks.2.adaLN_modulation.bias",
94
+ "output_layer.norm_final.weight",
95
+ "output_layer.adaLN_modulation.bias"
96
+ ],
97
+ "muon_effective_nesterov": false,
98
+ "muon_effective_width_scale": false,
99
+ "muon_effective_weight_decay": 0.1,
100
+ "muon_adam_fallback_nesterov": false,
101
+ "muon_adam_fallback_weight_decay": 0.1,
102
+ "ema_decay": 0.9999,
103
+ "ema_start_step": 0,
104
+ "model_type": "ddit",
105
+ "ddit_mlp_type": "gelu",
106
+ "elf_num_time_tokens": 4,
107
+ "elf_num_model_mode_tokens": 0,
108
+ "qk_norm": true,
109
+ "output_bias": false,
110
+ "output_init_std": -1.0,
111
+ "norm_type": "rmsnorm",
112
+ "target_loss": "hard_ce",
113
+ "linear_soft_target_power": 1.0,
114
+ "linear_soft_target_min_conf": 0.0,
115
+ "linear_soft_target_max_conf": 1.0,
116
+ "t_sampling_mode": "logit_normal",
117
+ "t_sampling_power": 1.0,
118
+ "t_sampling_eps": 0.0001,
119
+ "t_sampling_logit_mean": -1.5,
120
+ "t_sampling_logit_std": 0.8,
121
+ "dual_t": true,
122
+ "corrupt_t_mode": "same",
123
+ "corrupt_min_t": 0.0,
124
+ "corrupt_max_t": 1.0,
125
+ "prefix_block_prob": 0.0,
126
+ "prefix_block_len": 128,
127
+ "mask_ratio_floor_schedule": "none",
128
+ "dirichlet_endpoint_mode": "categorical_dual_t",
129
+ "dirichlet_semantic_t_mode": "same",
130
+ "dirichlet_semantic_t_value": 0.0,
131
+ "dirichlet_semantic_t_curve": "linear",
132
+ "dirichlet_semantic_t_power": 1.0,
133
+ "endpoint_sequence_random_prob_alpha": 0.0,
134
+ "categorical_wrong_from_full_vocab": true,
135
+ "categorical_wrong_from_batch_valid_tokens": false,
136
+ "categorical_wrong_basin_token_ids": "",
137
+ "categorical_wrong_basin_prob": 0.0,
138
+ "categorical_wrong_unigram_prob": 0.0,
139
+ "categorical_wrong_uniform_prob": 0.0,
140
+ "categorical_wrong_prob_floor": 0.0,
141
+ "categorical_wrong_corpus_unigram_path": "",
142
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
143
+ "categorical_wrong_basin_shared_prob": 0.0,
144
+ "categorical_wrong_unigram_shared_prob": 0.0,
145
+ "mask_mixture_original_prob": 0.0,
146
+ "mask_mixture_lowk_prob": 0.0,
147
+ "mask_mixture_lowcorrupt_prob": 0.0,
148
+ "mask_mixture_block_prob": 0.0,
149
+ "mask_mixture_all_prob": 1.0,
150
+ "mask_mixture_lowk_clean_tokens": "0",
151
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
152
+ "mask_mixture_block_tokens": "64,128",
153
+ "simplex_bridge_sampler": "dirichlet",
154
+ "logistic_normal_sigma_min": 0.1,
155
+ "logistic_normal_sigma_max": 1.0,
156
+ "logistic_normal_tau_min": 1.0,
157
+ "logistic_normal_tau_max": 1.0,
158
+ "torch_compile": false,
159
+ "compile_mode": "max-autotune",
160
+ "state_format": "prob",
161
+ "meanflow_weight": 0.0,
162
+ "rollout_train_prob": 0.5,
163
+ "rollout_train_steps": 1,
164
+ "rollout_train_infer_steps": 1,
165
+ "rollout_train_time_mode": "sampled_s",
166
+ "rollout_train_s_dist": "uniform",
167
+ "rollout_train_s_min_frac": 0.0,
168
+ "rollout_train_s_max_frac": 0.125,
169
+ "rollout_train_s_beta_alpha": 2.0,
170
+ "rollout_train_s_beta_beta": 6.0,
171
+ "rollout_train_temp": 1.45,
172
+ "rollout_train_max_gamma": 1.0,
173
+ "rollout_train_corrupt_only": true,
174
+ "rollout_train_samplewise": true,
175
+ "rollout_train_compute_always": false,
176
+ "rollout_train_sync_t": true,
177
+ "bridge_noise_init": "logistic_normal",
178
+ "noise_sigma": -1.0,
179
+ "allow_tf32": true,
180
+ "activation_checkpointing": false,
181
+ "activation_checkpoint_interval": 1,
182
+ "activation_checkpoint_scope": "block",
183
+ "ddp_static_graph": false,
184
+ "ddp_gradient_as_bucket_view": true,
185
+ "blocking_data_transfer": false,
186
+ "dataloader_prefetch_factor": 4,
187
+ "full_train_stats": false,
188
+ "tokenized_hf": false,
189
+ "tokenized_pad_token": "pad",
190
+ "elf_conditional_hf": false,
191
+ "record_pad_truncate": false,
192
+ "record_add_eos": false,
193
+ "record_add_special_tokens": false,
194
+ "record_pad_token": "pad",
195
+ "record_shuffle_buffer": 10000,
196
+ "wrap": true,
197
+ "wrap_mode": "stream",
198
+ "wrap_record_buffer_size": 200,
199
+ "owt_cached_chunks": true,
200
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
201
+ "owt_chunk_cache_rebuild": false,
202
+ "owt_chunk_cache_write_batch": 4096,
203
+ "owt_exact_repeat_per_chunk": 64,
204
+ "online_chunk_shuffle": false,
205
+ "online_chunk_shuffle_buffer": 10000,
206
+ "openwebtext_split": "train_minus_100k",
207
+ "detokenizer": "auto",
208
+ "resolved_detokenizer": null,
209
+ "num_workers": 0,
210
+ "latest_every": 1000,
211
+ "resume_path": ""
212
+ }
213
+ W0517 22:40:01.897000 386925 torch/distributed/elastic/agent/server/api.py:719] Received 15 death signal, shutting down workers
214
+ W0517 22:40:01.899000 386925 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 386929 closing signal SIGTERM
215
+ W0517 22:40:01.900000 386925 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 386930 closing signal SIGTERM
216
+ W0517 22:40:01.900000 386925 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 386931 closing signal SIGTERM
217
+ W0517 22:40:01.901000 386925 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 386932 closing signal SIGTERM
218
+ Traceback (most recent call last):
219
+ File "<frozen runpy>", line 198, in _run_module_as_main
220
+ File "<frozen runpy>", line 88, in _run_code
221
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 922, in <module>
222
+ main()
223
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
224
+ return f(*args, **kwargs)
225
+ ^^^^^^^^^^^^^^^^^^
226
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 918, in main
227
+ run(args)
228
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 909, in run
229
+ elastic_launch(
230
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 139, in __call__
231
+ return launch_agent(self._config, self._entrypoint, list(args))
232
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
233
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 261, in launch_agent
234
+ result = agent.run()
235
+ ^^^^^^^^^^^
236
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/metrics/api.py", line 137, in wrapper
237
+ result = f(*args, **kwargs)
238
+ ^^^^^^^^^^^^^^^^^^
239
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run
240
+ result = self._invoke_run(role)
241
+ ^^^^^^^^^^^^^^^^^^^^^^
242
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 870, in _invoke_run
243
+ time.sleep(monitor_interval)
244
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/api.py", line 84, in _terminate_process_handler
245
+ raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval)
246
+ torch.distributed.elastic.multiprocessing.api.SignalException: Process 386925 got signal: 15
LTA_openwebtext_dualt/logs/ctx1024_sampledpath_sweep_4gpu/ctx1024_sampledpath_true_20260517_224139.nohup ADDED
@@ -0,0 +1,985 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [ctx1024-sampleds] start stamp=ctx1024_sampledpath_true_20260517_224139 len=1024 vocab=2664 out=docs/lta_samples/metrics_20260517/ctx1024_sampleds_sweep_bs512_ode128_ctx1024_sampledpath_true_20260517_224139
2
+ [ctx1024-sampleds] config=p50_path4_unif0_0p125_outwdm1 run=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139 p=0.50 mode=sampled_path steps=4 s_dist=uniform s_frac=0.0->0.125 beta=2.0,6.0 outwd=-1 sync_t=1
3
+ [ctx1024-sampleds] train config=p50_path4_unif0_0p125_outwdm1 from=0 to=1000
4
+ [launch] gpt2 cached OWT soft-endpoint m/n pilot
5
+ [launch] run_name=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
6
+ [launch] save_dir=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
7
+ [launch] n=1024 m=0 clean_state_mode=onehot
8
+ [launch] mask_mixture lowk=0.0 all=1.0
9
+ [launch] model d=192 layers=3 heads=3 ff=768 vocab_override=2664
10
+ [launch] optimizer=muon muon_impl=legacy weight_decay=0.1 output_weight_decay=-1
11
+ [launch] target_loss=hard_ce conf=0.0->1.0 power=1.0
12
+ [launch] mask_ratio=1.0->1.0
13
+ [launch] mask_ratio_floor_schedule=none
14
+ [launch] dirichlet C=1.0->1024 endpoint=categorical_dual_t sampler=dirichlet
15
+ [launch] wrong_mix seq_alpha=0.0 wrong_floor=0.0 unigram=0.0 uniform=0.0 basin=0.0 basin_ids=
16
+ [launch] rollout_train prob=0.50 mode=sampled_path steps=4 infer_steps=1 s_dist=uniform s_frac=0.0->0.125 temp=1.45 corrupt_only=1 samplewise=1 selected_only=1 sync_t=1
17
+ [launch] cache=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit exact_repeat_per_chunk=64
18
+ NCCL version 2.25.1+cuda12.8
19
+ {
20
+ "device": "cuda:0",
21
+ "rank": 0,
22
+ "world_size": 4,
23
+ "samples": "owt_cached_chunks:8",
24
+ "vocab_size": 2664,
25
+ "tokenizer_vocab_size": 50257,
26
+ "save_dir": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139",
27
+ "batch_size": 128,
28
+ "grad_accum": 1,
29
+ "effective_batch_size": 512,
30
+ "global_batch_size": 512,
31
+ "lr_schedule": "constant_warmup",
32
+ "optimizer": "muon",
33
+ "epochs": 0.0,
34
+ "steps_per_epoch": 1,
35
+ "total_steps": 1000,
36
+ "warmup_steps": 10,
37
+ "warmup_epochs": -1.0,
38
+ "min_lr": 0.0,
39
+ "weight_decay": 0.1,
40
+ "output_weight_decay": -1.0,
41
+ "adamw_param_groups": "nanogpt",
42
+ "adam_beta1": 0.9,
43
+ "adam_beta2": 0.95,
44
+ "adam_eps": 1e-08,
45
+ "muon_impl": "legacy",
46
+ "muon_momentum": 0.95,
47
+ "muon_ns_steps": 5,
48
+ "muon_update_scale": 1.0,
49
+ "muon_nesterov": false,
50
+ "muon_width_scale": false,
51
+ "muon_grouping": "legacy_dim_ge_2",
52
+ "muon_param_count": 2616320,
53
+ "muon_adam_param_count": 8192,
54
+ "muon_param_names": [
55
+ "vocab_embed.embedding",
56
+ "sigma_map.net.0.weight",
57
+ "sigma_map.net.2.weight",
58
+ "blocks.0.attn_qkv.weight",
59
+ "blocks.0.attn_out.weight",
60
+ "blocks.0.mlp.0.weight",
61
+ "blocks.0.mlp.2.weight",
62
+ "blocks.0.adaLN_modulation.weight",
63
+ "blocks.1.attn_qkv.weight",
64
+ "blocks.1.attn_out.weight",
65
+ "blocks.1.mlp.0.weight",
66
+ "blocks.1.mlp.2.weight",
67
+ "blocks.1.adaLN_modulation.weight",
68
+ "blocks.2.attn_qkv.weight",
69
+ "blocks.2.attn_out.weight",
70
+ "blocks.2.mlp.0.weight",
71
+ "blocks.2.mlp.2.weight",
72
+ "blocks.2.adaLN_modulation.weight",
73
+ "output_layer.linear.weight",
74
+ "output_layer.adaLN_modulation.weight"
75
+ ],
76
+ "muon_adam_param_names": [
77
+ "sigma_map.net.0.bias",
78
+ "sigma_map.net.2.bias",
79
+ "blocks.0.norm1.weight",
80
+ "blocks.0.norm2.weight",
81
+ "blocks.0.mlp.0.bias",
82
+ "blocks.0.mlp.2.bias",
83
+ "blocks.0.adaLN_modulation.bias",
84
+ "blocks.1.norm1.weight",
85
+ "blocks.1.norm2.weight",
86
+ "blocks.1.mlp.0.bias",
87
+ "blocks.1.mlp.2.bias",
88
+ "blocks.1.adaLN_modulation.bias",
89
+ "blocks.2.norm1.weight",
90
+ "blocks.2.norm2.weight",
91
+ "blocks.2.mlp.0.bias",
92
+ "blocks.2.mlp.2.bias",
93
+ "blocks.2.adaLN_modulation.bias",
94
+ "output_layer.norm_final.weight",
95
+ "output_layer.adaLN_modulation.bias"
96
+ ],
97
+ "muon_effective_nesterov": false,
98
+ "muon_effective_width_scale": false,
99
+ "muon_effective_weight_decay": 0.1,
100
+ "muon_adam_fallback_nesterov": false,
101
+ "muon_adam_fallback_weight_decay": 0.1,
102
+ "ema_decay": 0.9999,
103
+ "ema_start_step": 0,
104
+ "model_type": "ddit",
105
+ "ddit_mlp_type": "gelu",
106
+ "elf_num_time_tokens": 4,
107
+ "elf_num_model_mode_tokens": 0,
108
+ "qk_norm": true,
109
+ "output_bias": false,
110
+ "output_init_std": -1.0,
111
+ "norm_type": "rmsnorm",
112
+ "target_loss": "hard_ce",
113
+ "linear_soft_target_power": 1.0,
114
+ "linear_soft_target_min_conf": 0.0,
115
+ "linear_soft_target_max_conf": 1.0,
116
+ "t_sampling_mode": "logit_normal",
117
+ "t_sampling_power": 1.0,
118
+ "t_sampling_eps": 0.0001,
119
+ "t_sampling_logit_mean": -1.5,
120
+ "t_sampling_logit_std": 0.8,
121
+ "dual_t": true,
122
+ "corrupt_t_mode": "same",
123
+ "corrupt_min_t": 0.0,
124
+ "corrupt_max_t": 1.0,
125
+ "prefix_block_prob": 0.0,
126
+ "prefix_block_len": 128,
127
+ "mask_ratio_floor_schedule": "none",
128
+ "dirichlet_endpoint_mode": "categorical_dual_t",
129
+ "dirichlet_semantic_t_mode": "same",
130
+ "dirichlet_semantic_t_value": 0.0,
131
+ "dirichlet_semantic_t_curve": "linear",
132
+ "dirichlet_semantic_t_power": 1.0,
133
+ "endpoint_sequence_random_prob_alpha": 0.0,
134
+ "categorical_wrong_from_full_vocab": true,
135
+ "categorical_wrong_from_batch_valid_tokens": false,
136
+ "categorical_wrong_basin_token_ids": "",
137
+ "categorical_wrong_basin_prob": 0.0,
138
+ "categorical_wrong_unigram_prob": 0.0,
139
+ "categorical_wrong_uniform_prob": 0.0,
140
+ "categorical_wrong_prob_floor": 0.0,
141
+ "categorical_wrong_corpus_unigram_path": "",
142
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
143
+ "categorical_wrong_basin_shared_prob": 0.0,
144
+ "categorical_wrong_unigram_shared_prob": 0.0,
145
+ "mask_mixture_original_prob": 0.0,
146
+ "mask_mixture_lowk_prob": 0.0,
147
+ "mask_mixture_lowcorrupt_prob": 0.0,
148
+ "mask_mixture_block_prob": 0.0,
149
+ "mask_mixture_all_prob": 1.0,
150
+ "mask_mixture_lowk_clean_tokens": "0",
151
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
152
+ "mask_mixture_block_tokens": "64,128",
153
+ "simplex_bridge_sampler": "dirichlet",
154
+ "logistic_normal_sigma_min": 0.1,
155
+ "logistic_normal_sigma_max": 1.0,
156
+ "logistic_normal_tau_min": 1.0,
157
+ "logistic_normal_tau_max": 1.0,
158
+ "torch_compile": false,
159
+ "compile_mode": "max-autotune",
160
+ "state_format": "prob",
161
+ "meanflow_weight": 0.0,
162
+ "rollout_train_prob": 0.5,
163
+ "rollout_train_steps": 4,
164
+ "rollout_train_infer_steps": 1,
165
+ "rollout_train_time_mode": "sampled_path",
166
+ "rollout_train_s_dist": "uniform",
167
+ "rollout_train_s_min_frac": 0.0,
168
+ "rollout_train_s_max_frac": 0.125,
169
+ "rollout_train_s_beta_alpha": 2.0,
170
+ "rollout_train_s_beta_beta": 6.0,
171
+ "rollout_train_temp": 1.45,
172
+ "rollout_train_max_gamma": 1.0,
173
+ "rollout_train_corrupt_only": true,
174
+ "rollout_train_samplewise": true,
175
+ "rollout_train_compute_always": false,
176
+ "rollout_train_sync_t": true,
177
+ "bridge_noise_init": "logistic_normal",
178
+ "noise_sigma": -1.0,
179
+ "allow_tf32": true,
180
+ "activation_checkpointing": false,
181
+ "activation_checkpoint_interval": 1,
182
+ "activation_checkpoint_scope": "block",
183
+ "ddp_static_graph": false,
184
+ "ddp_gradient_as_bucket_view": true,
185
+ "blocking_data_transfer": false,
186
+ "dataloader_prefetch_factor": 4,
187
+ "full_train_stats": false,
188
+ "tokenized_hf": false,
189
+ "tokenized_pad_token": "pad",
190
+ "elf_conditional_hf": false,
191
+ "record_pad_truncate": false,
192
+ "record_add_eos": false,
193
+ "record_add_special_tokens": false,
194
+ "record_pad_token": "pad",
195
+ "record_shuffle_buffer": 10000,
196
+ "wrap": true,
197
+ "wrap_mode": "stream",
198
+ "wrap_record_buffer_size": 200,
199
+ "owt_cached_chunks": true,
200
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
201
+ "owt_chunk_cache_rebuild": false,
202
+ "owt_chunk_cache_write_batch": 4096,
203
+ "owt_exact_repeat_per_chunk": 64,
204
+ "online_chunk_shuffle": false,
205
+ "online_chunk_shuffle_buffer": 10000,
206
+ "openwebtext_split": "train_minus_100k",
207
+ "detokenizer": "auto",
208
+ "resolved_detokenizer": null,
209
+ "num_workers": 0,
210
+ "latest_every": 1000,
211
+ "resume_path": ""
212
+ }
213
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=24.8s lr=2.000000e-03 loss=7.7206 loss_recon=7.7206 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0958 corrupt_frac=1.0000 acc_corrupt=0.0958 loss_corrupt=7.7206 wrong_frac=0.7915 init_acc_corrupt=0.1164 acc_corrupt_t_0p0_0p2=0.0500 corrupt_frac_t_0p0_0p2=0.5640 acc_corrupt_t_0p2_0p4=0.1270 corrupt_frac_t_0p2_0p4=0.3466 acc_corrupt_t_0p4_0p6=0.2493 corrupt_frac_t_0p4_0p6=0.0791 acc_corrupt_t_0p6_0p8=0.3719 corrupt_frac_t_0p6_0p8=0.0136 out_w_norm=1.0047 out_g_norm=1.0928 acc_corrupt_t_0p8_1p0=0.4936 corrupt_frac_t_0p8_1p0=0.0078 loss_all=7.4724 init_gold_top10=0.2003 init_gold_top100=0.4085 rollout_applied_pos_frac=0.4453 init_acc_rollout_applied=0.1056 init_acc_rollout_kept=0.1192 logit_acc_rollout_applied=0.0969 logit_acc_rollout_kept=0.0996
214
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=23.9s lr=2.000000e-03 loss=7.0874 loss_recon=7.0874 loss_meanflow=0.0000 mean_model_t=0.2096 mean_corrupt_t=0.2096 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4995 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1036 corrupt_frac=1.0000 acc_corrupt=0.1036 loss_corrupt=7.0874 wrong_frac=0.7905 init_acc_corrupt=0.1172 acc_corrupt_t_0p0_0p2=0.0560 corrupt_frac_t_0p0_0p2=0.5557 acc_corrupt_t_0p2_0p4=0.1392 corrupt_frac_t_0p2_0p4=0.3595 acc_corrupt_t_0p4_0p6=0.2552 corrupt_frac_t_0p4_0p6=0.0762 acc_corrupt_t_0p6_0p8=0.3485 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=2.8612 out_g_norm=1.7761 acc_corrupt_t_0p8_1p0=0.4243 corrupt_frac_t_0p8_1p0=0.0078 loss_all=6.6891 init_gold_top10=0.2090 init_gold_top100=0.4276 rollout_applied_pos_frac=0.4688 init_acc_rollout_applied=0.1378 init_acc_rollout_kept=0.1215 logit_acc_rollout_applied=0.1143 logit_acc_rollout_kept=0.1146
215
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=24.0s lr=2.000000e-03 loss=6.4546 loss_recon=6.4546 loss_meanflow=0.0000 mean_model_t=0.2098 mean_corrupt_t=0.2098 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5023 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1137 corrupt_frac=1.0000 acc_corrupt=0.1137 loss_corrupt=6.4546 wrong_frac=0.7902 init_acc_corrupt=0.1187 acc_corrupt_t_0p0_0p2=0.0592 corrupt_frac_t_0p0_0p2=0.5544 acc_corrupt_t_0p2_0p4=0.1548 corrupt_frac_t_0p2_0p4=0.3617 acc_corrupt_t_0p4_0p6=0.2839 corrupt_frac_t_0p4_0p6=0.0743 out_w_norm=4.3408 out_g_norm=1.3199 acc_corrupt_t_0p6_0p8=0.3901 corrupt_frac_t_0p6_0p8=0.0139 acc_corrupt_t_0p8_1p0=0.5415 corrupt_frac_t_0p8_1p0=0.0078 loss_all=6.2279 init_gold_top10=0.2020 init_gold_top100=0.4348 rollout_applied_pos_frac=0.4375 init_acc_rollout_applied=0.0878 init_acc_rollout_kept=0.1243 logit_acc_rollout_applied=0.1032 logit_acc_rollout_kept=0.1241
216
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=23.9s lr=2.000000e-03 loss=5.9837 loss_recon=5.9837 loss_meanflow=0.0000 mean_model_t=0.2072 mean_corrupt_t=0.2072 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1233 corrupt_frac=1.0000 acc_corrupt=0.1233 loss_corrupt=5.9837 wrong_frac=0.7929 init_acc_corrupt=0.1160 acc_corrupt_t_0p0_0p2=0.0639 corrupt_frac_t_0p0_0p2=0.5638 acc_corrupt_t_0p2_0p4=0.1708 corrupt_frac_t_0p2_0p4=0.3526 acc_corrupt_t_0p4_0p6=0.3106 corrupt_frac_t_0p4_0p6=0.0753 out_w_norm=5.4789 out_g_norm=0.5031 acc_corrupt_t_0p6_0p8=0.4367 corrupt_frac_t_0p6_0p8=0.0128 acc_corrupt_t_0p8_1p0=0.5306 corrupt_frac_t_0p8_1p0=0.0117 loss_all=5.7599 init_gold_top10=0.2017 init_gold_top100=0.4699 rollout_applied_pos_frac=0.4844 init_acc_rollout_applied=0.1182 init_acc_rollout_kept=0.1042 logit_acc_rollout_applied=0.1310 logit_acc_rollout_kept=0.1227
217
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=23.9s lr=2.000000e-03 loss=5.4774 loss_recon=5.4774 loss_meanflow=0.0000 mean_model_t=0.2101 mean_corrupt_t=0.2101 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4994 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1378 corrupt_frac=1.0000 acc_corrupt=0.1378 loss_corrupt=5.4774 wrong_frac=0.7898 init_acc_corrupt=0.1197 acc_corrupt_t_0p0_0p2=0.0677 corrupt_frac_t_0p0_0p2=0.5506 acc_corrupt_t_0p2_0p4=0.1918 corrupt_frac_t_0p2_0p4=0.3660 acc_corrupt_t_0p4_0p6=0.3501 corrupt_frac_t_0p4_0p6=0.0752 acc_corrupt_t_0p6_0p8=0.4913 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=6.7180 out_g_norm=0.4070 acc_corrupt_t_0p8_1p0=0.6104 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.1003 init_gold_top10=0.2273 init_gold_top100=0.5161 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.1306 init_acc_rollout_kept=0.1486 logit_acc_rollout_applied=0.1487 logit_acc_rollout_kept=0.1648
218
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=23.9s lr=2.000000e-03 loss=4.8712 loss_recon=4.8712 loss_meanflow=0.0000 mean_model_t=0.2082 mean_corrupt_t=0.2082 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5037 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1521 corrupt_frac=1.0000 acc_corrupt=0.1521 loss_corrupt=4.8712 wrong_frac=0.7918 init_acc_corrupt=0.1187 acc_corrupt_t_0p0_0p2=0.0719 corrupt_frac_t_0p0_0p2=0.5629 acc_corrupt_t_0p2_0p4=0.2147 corrupt_frac_t_0p2_0p4=0.3502 acc_corrupt_t_0p4_0p6=0.4001 corrupt_frac_t_0p4_0p6=0.0768 out_w_norm=7.9804 out_g_norm=0.4280 acc_corrupt_t_0p6_0p8=0.5625 corrupt_frac_t_0p6_0p8=0.0133 loss_all=4.5656 init_gold_top10=0.2029 init_gold_top100=0.5925 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.1124 init_acc_rollout_kept=0.1078 logit_acc_rollout_applied=0.1600 logit_acc_rollout_kept=0.1533
219
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=24.1s lr=2.000000e-03 loss=4.2343 loss_recon=4.2343 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5123 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1770 corrupt_frac=1.0000 acc_corrupt=0.1770 loss_corrupt=4.2343 wrong_frac=0.7915 init_acc_corrupt=0.1192 acc_corrupt_t_0p0_0p2=0.0787 corrupt_frac_t_0p0_0p2=0.5591 acc_corrupt_t_0p2_0p4=0.2531 corrupt_frac_t_0p2_0p4=0.3563 acc_corrupt_t_0p4_0p6=0.4880 corrupt_frac_t_0p4_0p6=0.0764 out_w_norm=9.1564 out_g_norm=0.4726 acc_corrupt_t_0p6_0p8=0.6622 corrupt_frac_t_0p6_0p8=0.0125 acc_corrupt_t_0p8_1p0=0.8376 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.9427 init_gold_top10=0.2206 init_gold_top100=0.6230 rollout_applied_pos_frac=0.5156 init_acc_rollout_applied=0.1283 init_acc_rollout_kept=0.1214 logit_acc_rollout_applied=0.1920 logit_acc_rollout_kept=0.1924
220
+ step=800 epoch=800/1000 epoch_step=1/1 micro_steps=800 elapsed=23.9s lr=2.000000e-03 loss=3.7296 loss_recon=3.7296 loss_meanflow=0.0000 mean_model_t=0.2092 mean_corrupt_t=0.2092 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5020 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2008 corrupt_frac=1.0000 acc_corrupt=0.2008 loss_corrupt=3.7296 wrong_frac=0.7911 init_acc_corrupt=0.1208 acc_corrupt_t_0p0_0p2=0.0884 corrupt_frac_t_0p0_0p2=0.5509 acc_corrupt_t_0p2_0p4=0.2944 corrupt_frac_t_0p2_0p4=0.3674 acc_corrupt_t_0p4_0p6=0.5237 corrupt_frac_t_0p4_0p6=0.0748 acc_corrupt_t_0p6_0p8=0.6942 corrupt_frac_t_0p6_0p8=0.0126 out_w_norm=9.9997 out_g_norm=0.5989 acc_corrupt_t_0p8_1p0=0.8555 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.4499 init_gold_top10=0.2440 init_gold_top100=0.5899 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.1209 init_acc_rollout_kept=0.1210 logit_acc_rollout_applied=0.2120 logit_acc_rollout_kept=0.2282
221
+ step=900 epoch=900/1000 epoch_step=1/1 micro_steps=900 elapsed=24.0s lr=2.000000e-03 loss=3.2983 loss_recon=3.2983 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5052 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2379 corrupt_frac=1.0000 acc_corrupt=0.2379 loss_corrupt=3.2983 wrong_frac=0.7905 init_acc_corrupt=0.1230 acc_corrupt_t_0p0_0p2=0.1028 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.3571 corrupt_frac_t_0p2_0p4=0.3645 acc_corrupt_t_0p4_0p6=0.5909 corrupt_frac_t_0p4_0p6=0.0748 acc_corrupt_t_0p6_0p8=0.7411 corrupt_frac_t_0p6_0p8=0.0123 out_w_norm=10.4993 out_g_norm=0.8729 acc_corrupt_t_0p8_1p0=0.8477 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.0155 init_gold_top10=0.2915 init_gold_top100=0.6315 rollout_applied_pos_frac=0.4766 init_acc_rollout_applied=0.1272 init_acc_rollout_kept=0.1170 logit_acc_rollout_applied=0.2742 logit_acc_rollout_kept=0.2656
222
+ step=1000 epoch=1000/1000 epoch_step=1/1 micro_steps=1000 elapsed=23.8s lr=2.000000e-03 loss=2.8778 loss_recon=2.8778 loss_meanflow=0.0000 mean_model_t=0.2077 mean_corrupt_t=0.2077 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4952 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2973 corrupt_frac=1.0000 acc_corrupt=0.2973 loss_corrupt=2.8778 wrong_frac=0.7924 init_acc_corrupt=0.1228 acc_corrupt_t_0p0_0p2=0.1297 corrupt_frac_t_0p0_0p2=0.5567 acc_corrupt_t_0p2_0p4=0.4607 corrupt_frac_t_0p2_0p4=0.3598 acc_corrupt_t_0p4_0p6=0.6989 corrupt_frac_t_0p4_0p6=0.0751 acc_corrupt_t_0p6_0p8=0.8142 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=10.8390 out_g_norm=1.0522 loss_all=2.7461 init_gold_top10=0.3628 init_gold_top100=0.6665 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.1368 init_acc_rollout_kept=0.0965 logit_acc_rollout_applied=0.3638 logit_acc_rollout_kept=0.2733
223
+ [ctx1024-sampleds] eval config=p50_path4_unif0_0p125_outwdm1 step=1000
224
+ [eval-decode-acc] train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139 step=1000 soft=none
225
+ [decode] max_len=1024 generated=64/64
226
+ {
227
+ "num_rows": 1,
228
+ "best_by_run": {
229
+ "train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139::none": {
230
+ "run": "train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139",
231
+ "checkpoint": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/step_0001000.pt",
232
+ "ckpt_step": 1000,
233
+ "endpoint_softening": "none",
234
+ "decode_rule": "flowmap",
235
+ "steps": 128,
236
+ "time_schedule": "logit_normal",
237
+ "model_t_mode": "post",
238
+ "final_from": "state",
239
+ "n_gen": 64,
240
+ "n_refs": 8,
241
+ "token_acc_mean": 0.0288543701171875,
242
+ "token_acc_min": 0.01953125,
243
+ "token_acc_max": 0.0478515625,
244
+ "exact_acc": 0.0,
245
+ "exact_count": 0,
246
+ "exact_ref_coverage": 0.0,
247
+ "exact_ref_count": 0,
248
+ "exact_ref_hits": [],
249
+ "best_ref_idx": [
250
+ 7,
251
+ 2,
252
+ 5,
253
+ 7,
254
+ 7,
255
+ 7,
256
+ 2,
257
+ 0,
258
+ 7,
259
+ 3,
260
+ 7,
261
+ 4,
262
+ 2,
263
+ 7,
264
+ 7,
265
+ 5,
266
+ 7,
267
+ 2,
268
+ 1,
269
+ 0,
270
+ 7,
271
+ 2,
272
+ 7,
273
+ 5,
274
+ 7,
275
+ 0,
276
+ 5,
277
+ 5,
278
+ 7,
279
+ 7,
280
+ 5,
281
+ 7,
282
+ 4,
283
+ 7,
284
+ 7,
285
+ 5,
286
+ 5,
287
+ 4,
288
+ 7,
289
+ 5,
290
+ 2,
291
+ 7,
292
+ 5,
293
+ 7,
294
+ 7,
295
+ 7,
296
+ 7,
297
+ 4,
298
+ 4,
299
+ 5,
300
+ 7,
301
+ 2,
302
+ 4,
303
+ 7,
304
+ 0,
305
+ 7,
306
+ 7,
307
+ 5,
308
+ 0,
309
+ 4,
310
+ 0,
311
+ 7,
312
+ 4,
313
+ 5
314
+ ],
315
+ "best_token_acc": [
316
+ 0.0263671875,
317
+ 0.021484375,
318
+ 0.029296875,
319
+ 0.01953125,
320
+ 0.029296875,
321
+ 0.0283203125,
322
+ 0.0234375,
323
+ 0.021484375,
324
+ 0.0263671875,
325
+ 0.0283203125,
326
+ 0.0234375,
327
+ 0.025390625,
328
+ 0.0302734375,
329
+ 0.0263671875,
330
+ 0.0263671875,
331
+ 0.0263671875,
332
+ 0.0263671875,
333
+ 0.0234375,
334
+ 0.0283203125,
335
+ 0.0302734375,
336
+ 0.033203125,
337
+ 0.0380859375,
338
+ 0.0263671875,
339
+ 0.0283203125,
340
+ 0.02734375,
341
+ 0.03515625,
342
+ 0.044921875,
343
+ 0.0419921875,
344
+ 0.03125,
345
+ 0.0283203125,
346
+ 0.0478515625,
347
+ 0.0224609375,
348
+ 0.0263671875,
349
+ 0.0283203125,
350
+ 0.033203125,
351
+ 0.041015625,
352
+ 0.03125,
353
+ 0.0224609375,
354
+ 0.03125,
355
+ 0.03515625,
356
+ 0.0244140625,
357
+ 0.0263671875,
358
+ 0.0302734375,
359
+ 0.0234375,
360
+ 0.01953125,
361
+ 0.021484375,
362
+ 0.0224609375,
363
+ 0.0322265625,
364
+ 0.025390625,
365
+ 0.0322265625,
366
+ 0.029296875,
367
+ 0.033203125,
368
+ 0.0244140625,
369
+ 0.033203125,
370
+ 0.02734375,
371
+ 0.041015625,
372
+ 0.025390625,
373
+ 0.0302734375,
374
+ 0.0234375,
375
+ 0.02734375,
376
+ 0.0244140625,
377
+ 0.025390625,
378
+ 0.025390625,
379
+ 0.0439453125
380
+ ]
381
+ }
382
+ },
383
+ "first_exact_by_run": {}
384
+ }
385
+ RESULT config=p50_path4_unif0_0p125_outwdm1 run=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139 ckpt_step=1000 views=512000 token_acc=0.0289 exact=0/64 exact_refs=0 hits=[]
386
+ [ctx1024-sampleds] train config=p50_path4_unif0_0p125_outwdm1 from=1000 to=2000
387
+ [launch] gpt2 cached OWT soft-endpoint m/n pilot
388
+ [launch] run_name=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
389
+ [launch] save_dir=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
390
+ [launch] n=1024 m=0 clean_state_mode=onehot
391
+ [launch] mask_mixture lowk=0.0 all=1.0
392
+ [launch] model d=192 layers=3 heads=3 ff=768 vocab_override=2664
393
+ [launch] optimizer=muon muon_impl=legacy weight_decay=0.1 output_weight_decay=-1
394
+ [launch] target_loss=hard_ce conf=0.0->1.0 power=1.0
395
+ [launch] mask_ratio=1.0->1.0
396
+ [launch] mask_ratio_floor_schedule=none
397
+ [launch] dirichlet C=1.0->1024 endpoint=categorical_dual_t sampler=dirichlet
398
+ [launch] wrong_mix seq_alpha=0.0 wrong_floor=0.0 unigram=0.0 uniform=0.0 basin=0.0 basin_ids=
399
+ [launch] rollout_train prob=0.50 mode=sampled_path steps=4 infer_steps=1 s_dist=uniform s_frac=0.0->0.125 temp=1.45 corrupt_only=1 samplewise=1 selected_only=1 sync_t=1
400
+ [launch] cache=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit exact_repeat_per_chunk=64
401
+ [launch] resume_path=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt
402
+ NCCL version 2.25.1+cuda12.8
403
+ resumed_from=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt start_step=1001
404
+ {
405
+ "device": "cuda:0",
406
+ "rank": 0,
407
+ "world_size": 4,
408
+ "samples": "owt_cached_chunks:8",
409
+ "vocab_size": 2664,
410
+ "tokenizer_vocab_size": 50257,
411
+ "save_dir": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139",
412
+ "batch_size": 128,
413
+ "grad_accum": 1,
414
+ "effective_batch_size": 512,
415
+ "global_batch_size": 512,
416
+ "lr_schedule": "constant_warmup",
417
+ "optimizer": "muon",
418
+ "epochs": 0.0,
419
+ "steps_per_epoch": 1,
420
+ "total_steps": 2000,
421
+ "warmup_steps": 10,
422
+ "warmup_epochs": -1.0,
423
+ "min_lr": 0.0,
424
+ "weight_decay": 0.1,
425
+ "output_weight_decay": -1.0,
426
+ "adamw_param_groups": "nanogpt",
427
+ "adam_beta1": 0.9,
428
+ "adam_beta2": 0.95,
429
+ "adam_eps": 1e-08,
430
+ "muon_impl": "legacy",
431
+ "muon_momentum": 0.95,
432
+ "muon_ns_steps": 5,
433
+ "muon_update_scale": 1.0,
434
+ "muon_nesterov": false,
435
+ "muon_width_scale": false,
436
+ "muon_grouping": "legacy_dim_ge_2",
437
+ "muon_param_count": 2616320,
438
+ "muon_adam_param_count": 8192,
439
+ "muon_param_names": [
440
+ "vocab_embed.embedding",
441
+ "sigma_map.net.0.weight",
442
+ "sigma_map.net.2.weight",
443
+ "blocks.0.attn_qkv.weight",
444
+ "blocks.0.attn_out.weight",
445
+ "blocks.0.mlp.0.weight",
446
+ "blocks.0.mlp.2.weight",
447
+ "blocks.0.adaLN_modulation.weight",
448
+ "blocks.1.attn_qkv.weight",
449
+ "blocks.1.attn_out.weight",
450
+ "blocks.1.mlp.0.weight",
451
+ "blocks.1.mlp.2.weight",
452
+ "blocks.1.adaLN_modulation.weight",
453
+ "blocks.2.attn_qkv.weight",
454
+ "blocks.2.attn_out.weight",
455
+ "blocks.2.mlp.0.weight",
456
+ "blocks.2.mlp.2.weight",
457
+ "blocks.2.adaLN_modulation.weight",
458
+ "output_layer.linear.weight",
459
+ "output_layer.adaLN_modulation.weight"
460
+ ],
461
+ "muon_adam_param_names": [
462
+ "sigma_map.net.0.bias",
463
+ "sigma_map.net.2.bias",
464
+ "blocks.0.norm1.weight",
465
+ "blocks.0.norm2.weight",
466
+ "blocks.0.mlp.0.bias",
467
+ "blocks.0.mlp.2.bias",
468
+ "blocks.0.adaLN_modulation.bias",
469
+ "blocks.1.norm1.weight",
470
+ "blocks.1.norm2.weight",
471
+ "blocks.1.mlp.0.bias",
472
+ "blocks.1.mlp.2.bias",
473
+ "blocks.1.adaLN_modulation.bias",
474
+ "blocks.2.norm1.weight",
475
+ "blocks.2.norm2.weight",
476
+ "blocks.2.mlp.0.bias",
477
+ "blocks.2.mlp.2.bias",
478
+ "blocks.2.adaLN_modulation.bias",
479
+ "output_layer.norm_final.weight",
480
+ "output_layer.adaLN_modulation.bias"
481
+ ],
482
+ "muon_effective_nesterov": false,
483
+ "muon_effective_width_scale": false,
484
+ "muon_effective_weight_decay": 0.1,
485
+ "muon_adam_fallback_nesterov": false,
486
+ "muon_adam_fallback_weight_decay": 0.1,
487
+ "ema_decay": 0.9999,
488
+ "ema_start_step": 0,
489
+ "model_type": "ddit",
490
+ "ddit_mlp_type": "gelu",
491
+ "elf_num_time_tokens": 4,
492
+ "elf_num_model_mode_tokens": 0,
493
+ "qk_norm": true,
494
+ "output_bias": false,
495
+ "output_init_std": -1.0,
496
+ "norm_type": "rmsnorm",
497
+ "target_loss": "hard_ce",
498
+ "linear_soft_target_power": 1.0,
499
+ "linear_soft_target_min_conf": 0.0,
500
+ "linear_soft_target_max_conf": 1.0,
501
+ "t_sampling_mode": "logit_normal",
502
+ "t_sampling_power": 1.0,
503
+ "t_sampling_eps": 0.0001,
504
+ "t_sampling_logit_mean": -1.5,
505
+ "t_sampling_logit_std": 0.8,
506
+ "dual_t": true,
507
+ "corrupt_t_mode": "same",
508
+ "corrupt_min_t": 0.0,
509
+ "corrupt_max_t": 1.0,
510
+ "prefix_block_prob": 0.0,
511
+ "prefix_block_len": 128,
512
+ "mask_ratio_floor_schedule": "none",
513
+ "dirichlet_endpoint_mode": "categorical_dual_t",
514
+ "dirichlet_semantic_t_mode": "same",
515
+ "dirichlet_semantic_t_value": 0.0,
516
+ "dirichlet_semantic_t_curve": "linear",
517
+ "dirichlet_semantic_t_power": 1.0,
518
+ "endpoint_sequence_random_prob_alpha": 0.0,
519
+ "categorical_wrong_from_full_vocab": true,
520
+ "categorical_wrong_from_batch_valid_tokens": false,
521
+ "categorical_wrong_basin_token_ids": "",
522
+ "categorical_wrong_basin_prob": 0.0,
523
+ "categorical_wrong_unigram_prob": 0.0,
524
+ "categorical_wrong_uniform_prob": 0.0,
525
+ "categorical_wrong_prob_floor": 0.0,
526
+ "categorical_wrong_corpus_unigram_path": "",
527
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
528
+ "categorical_wrong_basin_shared_prob": 0.0,
529
+ "categorical_wrong_unigram_shared_prob": 0.0,
530
+ "mask_mixture_original_prob": 0.0,
531
+ "mask_mixture_lowk_prob": 0.0,
532
+ "mask_mixture_lowcorrupt_prob": 0.0,
533
+ "mask_mixture_block_prob": 0.0,
534
+ "mask_mixture_all_prob": 1.0,
535
+ "mask_mixture_lowk_clean_tokens": "0",
536
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
537
+ "mask_mixture_block_tokens": "64,128",
538
+ "simplex_bridge_sampler": "dirichlet",
539
+ "logistic_normal_sigma_min": 0.1,
540
+ "logistic_normal_sigma_max": 1.0,
541
+ "logistic_normal_tau_min": 1.0,
542
+ "logistic_normal_tau_max": 1.0,
543
+ "torch_compile": false,
544
+ "compile_mode": "max-autotune",
545
+ "state_format": "prob",
546
+ "meanflow_weight": 0.0,
547
+ "rollout_train_prob": 0.5,
548
+ "rollout_train_steps": 4,
549
+ "rollout_train_infer_steps": 1,
550
+ "rollout_train_time_mode": "sampled_path",
551
+ "rollout_train_s_dist": "uniform",
552
+ "rollout_train_s_min_frac": 0.0,
553
+ "rollout_train_s_max_frac": 0.125,
554
+ "rollout_train_s_beta_alpha": 2.0,
555
+ "rollout_train_s_beta_beta": 6.0,
556
+ "rollout_train_temp": 1.45,
557
+ "rollout_train_max_gamma": 1.0,
558
+ "rollout_train_corrupt_only": true,
559
+ "rollout_train_samplewise": true,
560
+ "rollout_train_compute_always": false,
561
+ "rollout_train_sync_t": true,
562
+ "bridge_noise_init": "logistic_normal",
563
+ "noise_sigma": -1.0,
564
+ "allow_tf32": true,
565
+ "activation_checkpointing": false,
566
+ "activation_checkpoint_interval": 1,
567
+ "activation_checkpoint_scope": "block",
568
+ "ddp_static_graph": false,
569
+ "ddp_gradient_as_bucket_view": true,
570
+ "blocking_data_transfer": false,
571
+ "dataloader_prefetch_factor": 4,
572
+ "full_train_stats": false,
573
+ "tokenized_hf": false,
574
+ "tokenized_pad_token": "pad",
575
+ "elf_conditional_hf": false,
576
+ "record_pad_truncate": false,
577
+ "record_add_eos": false,
578
+ "record_add_special_tokens": false,
579
+ "record_pad_token": "pad",
580
+ "record_shuffle_buffer": 10000,
581
+ "wrap": true,
582
+ "wrap_mode": "stream",
583
+ "wrap_record_buffer_size": 200,
584
+ "owt_cached_chunks": true,
585
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
586
+ "owt_chunk_cache_rebuild": false,
587
+ "owt_chunk_cache_write_batch": 4096,
588
+ "owt_exact_repeat_per_chunk": 64,
589
+ "online_chunk_shuffle": false,
590
+ "online_chunk_shuffle_buffer": 10000,
591
+ "openwebtext_split": "train_minus_100k",
592
+ "detokenizer": "auto",
593
+ "resolved_detokenizer": null,
594
+ "num_workers": 0,
595
+ "latest_every": 1000,
596
+ "resume_path": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt"
597
+ }
598
+ step=1100 epoch=1100/2000 epoch_step=1/1 micro_steps=1100 elapsed=24.6s lr=2.000000e-03 loss=2.4561 loss_recon=2.4561 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3788 corrupt_frac=1.0000 acc_corrupt=0.3788 loss_corrupt=2.4561 wrong_frac=0.7915 init_acc_corrupt=0.1281 acc_corrupt_t_0p0_0p2=0.1728 corrupt_frac_t_0p0_0p2=0.5640 acc_corrupt_t_0p2_0p4=0.6004 corrupt_frac_t_0p2_0p4=0.3466 acc_corrupt_t_0p4_0p6=0.8111 corrupt_frac_t_0p4_0p6=0.0791 acc_corrupt_t_0p6_0p8=0.8828 corrupt_frac_t_0p6_0p8=0.0136 out_w_norm=11.1283 out_g_norm=1.2622 acc_corrupt_t_0p8_1p0=0.9307 corrupt_frac_t_0p8_1p0=0.0078 loss_all=2.1052 init_gold_top10=0.4033 init_gold_top100=0.6222 rollout_applied_pos_frac=0.4453 init_acc_rollout_applied=0.1418 init_acc_rollout_kept=0.1192 logit_acc_rollout_applied=0.4907 logit_acc_rollout_kept=0.4183
599
+ step=1200 epoch=1200/2000 epoch_step=1/1 micro_steps=1200 elapsed=23.7s lr=2.000000e-03 loss=2.0694 loss_recon=2.0694 loss_meanflow=0.0000 mean_model_t=0.2096 mean_corrupt_t=0.2096 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4995 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4665 corrupt_frac=1.0000 acc_corrupt=0.4665 loss_corrupt=2.0694 wrong_frac=0.7905 init_acc_corrupt=0.1362 acc_corrupt_t_0p0_0p2=0.2250 corrupt_frac_t_0p0_0p2=0.5557 acc_corrupt_t_0p2_0p4=0.7365 corrupt_frac_t_0p2_0p4=0.3595 acc_corrupt_t_0p4_0p6=0.9014 corrupt_frac_t_0p4_0p6=0.0762 acc_corrupt_t_0p6_0p8=0.9335 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=11.3914 out_g_norm=1.4567 acc_corrupt_t_0p8_1p0=0.9663 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.9323 init_gold_top10=0.4233 init_gold_top100=0.6392 rollout_applied_pos_frac=0.4688 init_acc_rollout_applied=0.1810 init_acc_rollout_kept=0.1215 logit_acc_rollout_applied=0.5332 logit_acc_rollout_kept=0.4872
600
+ step=1300 epoch=1300/2000 epoch_step=1/1 micro_steps=1300 elapsed=23.7s lr=2.000000e-03 loss=1.7590 loss_recon=1.7590 loss_meanflow=0.0000 mean_model_t=0.2098 mean_corrupt_t=0.2098 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5023 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5421 corrupt_frac=1.0000 acc_corrupt=0.5421 loss_corrupt=1.7590 wrong_frac=0.7902 init_acc_corrupt=0.1490 acc_corrupt_t_0p0_0p2=0.2864 corrupt_frac_t_0p0_0p2=0.5544 acc_corrupt_t_0p2_0p4=0.8375 corrupt_frac_t_0p2_0p4=0.3617 acc_corrupt_t_0p4_0p6=0.9568 corrupt_frac_t_0p4_0p6=0.0743 out_w_norm=11.5756 out_g_norm=1.5131 acc_corrupt_t_0p6_0p8=0.9688 corrupt_frac_t_0p6_0p8=0.0139 acc_corrupt_t_0p8_1p0=0.9624 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.4657 init_gold_top10=0.4631 init_gold_top100=0.6198 rollout_applied_pos_frac=0.4375 init_acc_rollout_applied=0.1669 init_acc_rollout_kept=0.1243 logit_acc_rollout_applied=0.6838 logit_acc_rollout_kept=0.5458
601
+ step=1400 epoch=1400/2000 epoch_step=1/1 micro_steps=1400 elapsed=23.7s lr=2.000000e-03 loss=1.5170 loss_recon=1.5170 loss_meanflow=0.0000 mean_model_t=0.2072 mean_corrupt_t=0.2072 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6018 corrupt_frac=1.0000 acc_corrupt=0.6018 loss_corrupt=1.5170 wrong_frac=0.7929 init_acc_corrupt=0.1570 acc_corrupt_t_0p0_0p2=0.3584 corrupt_frac_t_0p0_0p2=0.5638 acc_corrupt_t_0p2_0p4=0.9009 corrupt_frac_t_0p2_0p4=0.3526 acc_corrupt_t_0p4_0p6=0.9811 corrupt_frac_t_0p4_0p6=0.0753 out_w_norm=11.6982 out_g_norm=1.5191 acc_corrupt_t_0p6_0p8=0.9840 corrupt_frac_t_0p6_0p8=0.0128 acc_corrupt_t_0p8_1p0=0.9805 corrupt_frac_t_0p8_1p0=0.0117 loss_all=1.3672 init_gold_top10=0.4951 init_gold_top100=0.6454 rollout_applied_pos_frac=0.4844 init_acc_rollout_applied=0.2290 init_acc_rollout_kept=0.1042 logit_acc_rollout_applied=0.7189 logit_acc_rollout_kept=0.5401
602
+ step=1500 epoch=1500/2000 epoch_step=1/1 micro_steps=1500 elapsed=23.7s lr=2.000000e-03 loss=1.2915 loss_recon=1.2915 loss_meanflow=0.0000 mean_model_t=0.2101 mean_corrupt_t=0.2101 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4994 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6540 corrupt_frac=1.0000 acc_corrupt=0.6540 loss_corrupt=1.2915 wrong_frac=0.7898 init_acc_corrupt=0.1734 acc_corrupt_t_0p0_0p2=0.4134 corrupt_frac_t_0p0_0p2=0.5506 acc_corrupt_t_0p2_0p4=0.9392 corrupt_frac_t_0p2_0p4=0.3660 acc_corrupt_t_0p4_0p6=0.9911 corrupt_frac_t_0p4_0p6=0.0752 acc_corrupt_t_0p6_0p8=0.9905 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=11.7810 out_g_norm=1.4652 acc_corrupt_t_0p8_1p0=0.9917 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.0025 init_gold_top10=0.5435 init_gold_top100=0.6617 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.2334 init_acc_rollout_kept=0.1486 logit_acc_rollout_applied=0.7939 logit_acc_rollout_kept=0.6810
603
+ step=1600 epoch=1600/2000 epoch_step=1/1 micro_steps=1600 elapsed=23.7s lr=2.000000e-03 loss=1.1774 loss_recon=1.1774 loss_meanflow=0.0000 mean_model_t=0.2082 mean_corrupt_t=0.2082 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5037 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6826 corrupt_frac=1.0000 acc_corrupt=0.6826 loss_corrupt=1.1774 wrong_frac=0.7918 init_acc_corrupt=0.1812 acc_corrupt_t_0p0_0p2=0.4607 corrupt_frac_t_0p0_0p2=0.5629 acc_corrupt_t_0p2_0p4=0.9616 corrupt_frac_t_0p2_0p4=0.3502 acc_corrupt_t_0p4_0p6=0.9956 corrupt_frac_t_0p4_0p6=0.0768 out_w_norm=11.8241 out_g_norm=1.3619 acc_corrupt_t_0p6_0p8=0.9946 corrupt_frac_t_0p6_0p8=0.0133 loss_all=1.1961 init_gold_top10=0.5235 init_gold_top100=0.6535 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.2526 init_acc_rollout_kept=0.1078 logit_acc_rollout_applied=0.8093 logit_acc_rollout_kept=0.5727
604
+ step=1700 epoch=1700/2000 epoch_step=1/1 micro_steps=1700 elapsed=23.8s lr=2.000000e-03 loss=1.0426 loss_recon=1.0426 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5123 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7140 corrupt_frac=1.0000 acc_corrupt=0.7140 loss_corrupt=1.0426 wrong_frac=0.7915 init_acc_corrupt=0.1897 acc_corrupt_t_0p0_0p2=0.5042 corrupt_frac_t_0p0_0p2=0.5591 acc_corrupt_t_0p2_0p4=0.9759 corrupt_frac_t_0p2_0p4=0.3563 acc_corrupt_t_0p4_0p6=0.9978 corrupt_frac_t_0p4_0p6=0.0764 out_w_norm=11.8438 out_g_norm=1.2704 acc_corrupt_t_0p6_0p8=0.9969 corrupt_frac_t_0p6_0p8=0.0125 acc_corrupt_t_0p8_1p0=0.9912 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.0589 init_gold_top10=0.5409 init_gold_top100=0.6740 rollout_applied_pos_frac=0.5156 init_acc_rollout_applied=0.2758 init_acc_rollout_kept=0.1214 logit_acc_rollout_applied=0.7361 logit_acc_rollout_kept=0.6505
605
+ step=1800 epoch=1800/2000 epoch_step=1/1 micro_steps=1800 elapsed=23.7s lr=2.000000e-03 loss=0.9326 loss_recon=0.9326 loss_meanflow=0.0000 mean_model_t=0.2092 mean_corrupt_t=0.2092 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5020 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7394 corrupt_frac=1.0000 acc_corrupt=0.7394 loss_corrupt=0.9326 wrong_frac=0.7911 init_acc_corrupt=0.1967 acc_corrupt_t_0p0_0p2=0.5387 corrupt_frac_t_0p0_0p2=0.5509 acc_corrupt_t_0p2_0p4=0.9829 corrupt_frac_t_0p2_0p4=0.3674 acc_corrupt_t_0p4_0p6=0.9988 corrupt_frac_t_0p4_0p6=0.0748 acc_corrupt_t_0p6_0p8=0.9967 corrupt_frac_t_0p6_0p8=0.0126 out_w_norm=11.8643 out_g_norm=1.1264 acc_corrupt_t_0p8_1p0=0.9866 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.7271 init_gold_top10=0.5306 init_gold_top100=0.6286 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.2711 init_acc_rollout_kept=0.1210 logit_acc_rollout_applied=0.8536 logit_acc_rollout_kept=0.7328
606
+ step=1900 epoch=1900/2000 epoch_step=1/1 micro_steps=1900 elapsed=23.8s lr=2.000000e-03 loss=0.8706 loss_recon=0.8706 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5052 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7491 corrupt_frac=1.0000 acc_corrupt=0.7491 loss_corrupt=0.8706 wrong_frac=0.7905 init_acc_corrupt=0.2009 acc_corrupt_t_0p0_0p2=0.5529 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.9885 corrupt_frac_t_0p2_0p4=0.3645 acc_corrupt_t_0p4_0p6=0.9988 corrupt_frac_t_0p4_0p6=0.0748 acc_corrupt_t_0p6_0p8=0.9971 corrupt_frac_t_0p6_0p8=0.0123 out_w_norm=11.8558 out_g_norm=1.0997 acc_corrupt_t_0p8_1p0=0.9964 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.7654 init_gold_top10=0.5507 init_gold_top100=0.6459 rollout_applied_pos_frac=0.4766 init_acc_rollout_applied=0.2787 init_acc_rollout_kept=0.1170 logit_acc_rollout_applied=0.8564 logit_acc_rollout_kept=0.7075
607
+ step=2000 epoch=2000/2000 epoch_step=1/1 micro_steps=2000 elapsed=23.7s lr=2.000000e-03 loss=0.7684 loss_recon=0.7684 loss_meanflow=0.0000 mean_model_t=0.2077 mean_corrupt_t=0.2077 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4952 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7720 corrupt_frac=1.0000 acc_corrupt=0.7720 loss_corrupt=0.7684 wrong_frac=0.7924 init_acc_corrupt=0.1993 acc_corrupt_t_0p0_0p2=0.5971 corrupt_frac_t_0p0_0p2=0.5567 acc_corrupt_t_0p2_0p4=0.9900 corrupt_frac_t_0p2_0p4=0.3598 acc_corrupt_t_0p4_0p6=0.9990 corrupt_frac_t_0p4_0p6=0.0751 acc_corrupt_t_0p6_0p8=0.9969 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=11.8523 out_g_norm=1.1152 loss_all=0.7562 init_gold_top10=0.5871 init_gold_top100=0.6723 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.3057 init_acc_rollout_kept=0.0965 logit_acc_rollout_applied=0.8930 logit_acc_rollout_kept=0.6352
608
+ [ctx1024-sampleds] eval config=p50_path4_unif0_0p125_outwdm1 step=2000
609
+ [eval-decode-acc] train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139 step=2000 soft=none
610
+ [decode] max_len=1024 generated=64/64
611
+ {
612
+ "num_rows": 1,
613
+ "best_by_run": {
614
+ "train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139::none": {
615
+ "run": "train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139",
616
+ "checkpoint": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/step_0002000.pt",
617
+ "ckpt_step": 2000,
618
+ "endpoint_softening": "none",
619
+ "decode_rule": "flowmap",
620
+ "steps": 128,
621
+ "time_schedule": "logit_normal",
622
+ "model_t_mode": "post",
623
+ "final_from": "state",
624
+ "n_gen": 64,
625
+ "n_refs": 8,
626
+ "token_acc_mean": 0.86529541015625,
627
+ "token_acc_min": 0.2119140625,
628
+ "token_acc_max": 0.9921875,
629
+ "exact_acc": 0.0,
630
+ "exact_count": 0,
631
+ "exact_ref_coverage": 0.0,
632
+ "exact_ref_count": 0,
633
+ "exact_ref_hits": [],
634
+ "best_ref_idx": [
635
+ 4,
636
+ 4,
637
+ 5,
638
+ 4,
639
+ 1,
640
+ 7,
641
+ 4,
642
+ 4,
643
+ 5,
644
+ 1,
645
+ 5,
646
+ 1,
647
+ 4,
648
+ 1,
649
+ 4,
650
+ 4,
651
+ 1,
652
+ 5,
653
+ 4,
654
+ 1,
655
+ 4,
656
+ 4,
657
+ 4,
658
+ 4,
659
+ 4,
660
+ 4,
661
+ 7,
662
+ 4,
663
+ 4,
664
+ 1,
665
+ 4,
666
+ 4,
667
+ 4,
668
+ 1,
669
+ 4,
670
+ 4,
671
+ 4,
672
+ 4,
673
+ 5,
674
+ 4,
675
+ 1,
676
+ 4,
677
+ 4,
678
+ 3,
679
+ 4,
680
+ 5,
681
+ 7,
682
+ 7,
683
+ 5,
684
+ 4,
685
+ 4,
686
+ 4,
687
+ 5,
688
+ 4,
689
+ 4,
690
+ 4,
691
+ 5,
692
+ 4,
693
+ 7,
694
+ 4,
695
+ 5,
696
+ 4,
697
+ 1,
698
+ 4
699
+ ],
700
+ "best_token_acc": [
701
+ 0.962890625,
702
+ 0.966796875,
703
+ 0.9912109375,
704
+ 0.3701171875,
705
+ 0.78125,
706
+ 0.7607421875,
707
+ 0.9658203125,
708
+ 0.9560546875,
709
+ 0.9873046875,
710
+ 0.376953125,
711
+ 0.9921875,
712
+ 0.970703125,
713
+ 0.95703125,
714
+ 0.72265625,
715
+ 0.71484375,
716
+ 0.7314453125,
717
+ 0.9765625,
718
+ 0.9892578125,
719
+ 0.73828125,
720
+ 0.861328125,
721
+ 0.9521484375,
722
+ 0.9716796875,
723
+ 0.8662109375,
724
+ 0.8330078125,
725
+ 0.96875,
726
+ 0.9482421875,
727
+ 0.9853515625,
728
+ 0.689453125,
729
+ 0.962890625,
730
+ 0.9775390625,
731
+ 0.9580078125,
732
+ 0.2119140625,
733
+ 0.9345703125,
734
+ 0.2138671875,
735
+ 0.90625,
736
+ 0.5400390625,
737
+ 0.9736328125,
738
+ 0.685546875,
739
+ 0.9716796875,
740
+ 0.8994140625,
741
+ 0.974609375,
742
+ 0.9736328125,
743
+ 0.9248046875,
744
+ 0.984375,
745
+ 0.9677734375,
746
+ 0.7060546875,
747
+ 0.9345703125,
748
+ 0.9228515625,
749
+ 0.9912109375,
750
+ 0.8466796875,
751
+ 0.9599609375,
752
+ 0.9638671875,
753
+ 0.4013671875,
754
+ 0.9033203125,
755
+ 0.9736328125,
756
+ 0.943359375,
757
+ 0.9912109375,
758
+ 0.9716796875,
759
+ 0.98828125,
760
+ 0.9716796875,
761
+ 0.97265625,
762
+ 0.94921875,
763
+ 0.97265625,
764
+ 0.9658203125
765
+ ]
766
+ }
767
+ },
768
+ "first_exact_by_run": {}
769
+ }
770
+ RESULT config=p50_path4_unif0_0p125_outwdm1 run=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139 ckpt_step=2000 views=1024000 token_acc=0.8653 exact=0/64 exact_refs=0 hits=[]
771
+ [ctx1024-sampleds] train config=p50_path4_unif0_0p125_outwdm1 from=2000 to=3000
772
+ [launch] gpt2 cached OWT soft-endpoint m/n pilot
773
+ [launch] run_name=train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
774
+ [launch] save_dir=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139
775
+ [launch] n=1024 m=0 clean_state_mode=onehot
776
+ [launch] mask_mixture lowk=0.0 all=1.0
777
+ [launch] model d=192 layers=3 heads=3 ff=768 vocab_override=2664
778
+ [launch] optimizer=muon muon_impl=legacy weight_decay=0.1 output_weight_decay=-1
779
+ [launch] target_loss=hard_ce conf=0.0->1.0 power=1.0
780
+ [launch] mask_ratio=1.0->1.0
781
+ [launch] mask_ratio_floor_schedule=none
782
+ [launch] dirichlet C=1.0->1024 endpoint=categorical_dual_t sampler=dirichlet
783
+ [launch] wrong_mix seq_alpha=0.0 wrong_floor=0.0 unigram=0.0 uniform=0.0 basin=0.0 basin_ids=
784
+ [launch] rollout_train prob=0.50 mode=sampled_path steps=4 infer_steps=1 s_dist=uniform s_frac=0.0->0.125 temp=1.45 corrupt_only=1 samplewise=1 selected_only=1 sync_t=1
785
+ [launch] cache=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit exact_repeat_per_chunk=64
786
+ [launch] resume_path=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt
787
+ NCCL version 2.25.1+cuda12.8
788
+ resumed_from=runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt start_step=2001
789
+ {
790
+ "device": "cuda:0",
791
+ "rank": 0,
792
+ "world_size": 4,
793
+ "samples": "owt_cached_chunks:8",
794
+ "vocab_size": 2664,
795
+ "tokenizer_vocab_size": 50257,
796
+ "save_dir": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139",
797
+ "batch_size": 128,
798
+ "grad_accum": 1,
799
+ "effective_batch_size": 512,
800
+ "global_batch_size": 512,
801
+ "lr_schedule": "constant_warmup",
802
+ "optimizer": "muon",
803
+ "epochs": 0.0,
804
+ "steps_per_epoch": 1,
805
+ "total_steps": 3000,
806
+ "warmup_steps": 10,
807
+ "warmup_epochs": -1.0,
808
+ "min_lr": 0.0,
809
+ "weight_decay": 0.1,
810
+ "output_weight_decay": -1.0,
811
+ "adamw_param_groups": "nanogpt",
812
+ "adam_beta1": 0.9,
813
+ "adam_beta2": 0.95,
814
+ "adam_eps": 1e-08,
815
+ "muon_impl": "legacy",
816
+ "muon_momentum": 0.95,
817
+ "muon_ns_steps": 5,
818
+ "muon_update_scale": 1.0,
819
+ "muon_nesterov": false,
820
+ "muon_width_scale": false,
821
+ "muon_grouping": "legacy_dim_ge_2",
822
+ "muon_param_count": 2616320,
823
+ "muon_adam_param_count": 8192,
824
+ "muon_param_names": [
825
+ "vocab_embed.embedding",
826
+ "sigma_map.net.0.weight",
827
+ "sigma_map.net.2.weight",
828
+ "blocks.0.attn_qkv.weight",
829
+ "blocks.0.attn_out.weight",
830
+ "blocks.0.mlp.0.weight",
831
+ "blocks.0.mlp.2.weight",
832
+ "blocks.0.adaLN_modulation.weight",
833
+ "blocks.1.attn_qkv.weight",
834
+ "blocks.1.attn_out.weight",
835
+ "blocks.1.mlp.0.weight",
836
+ "blocks.1.mlp.2.weight",
837
+ "blocks.1.adaLN_modulation.weight",
838
+ "blocks.2.attn_qkv.weight",
839
+ "blocks.2.attn_out.weight",
840
+ "blocks.2.mlp.0.weight",
841
+ "blocks.2.mlp.2.weight",
842
+ "blocks.2.adaLN_modulation.weight",
843
+ "output_layer.linear.weight",
844
+ "output_layer.adaLN_modulation.weight"
845
+ ],
846
+ "muon_adam_param_names": [
847
+ "sigma_map.net.0.bias",
848
+ "sigma_map.net.2.bias",
849
+ "blocks.0.norm1.weight",
850
+ "blocks.0.norm2.weight",
851
+ "blocks.0.mlp.0.bias",
852
+ "blocks.0.mlp.2.bias",
853
+ "blocks.0.adaLN_modulation.bias",
854
+ "blocks.1.norm1.weight",
855
+ "blocks.1.norm2.weight",
856
+ "blocks.1.mlp.0.bias",
857
+ "blocks.1.mlp.2.bias",
858
+ "blocks.1.adaLN_modulation.bias",
859
+ "blocks.2.norm1.weight",
860
+ "blocks.2.norm2.weight",
861
+ "blocks.2.mlp.0.bias",
862
+ "blocks.2.mlp.2.bias",
863
+ "blocks.2.adaLN_modulation.bias",
864
+ "output_layer.norm_final.weight",
865
+ "output_layer.adaLN_modulation.bias"
866
+ ],
867
+ "muon_effective_nesterov": false,
868
+ "muon_effective_width_scale": false,
869
+ "muon_effective_weight_decay": 0.1,
870
+ "muon_adam_fallback_nesterov": false,
871
+ "muon_adam_fallback_weight_decay": 0.1,
872
+ "ema_decay": 0.9999,
873
+ "ema_start_step": 0,
874
+ "model_type": "ddit",
875
+ "ddit_mlp_type": "gelu",
876
+ "elf_num_time_tokens": 4,
877
+ "elf_num_model_mode_tokens": 0,
878
+ "qk_norm": true,
879
+ "output_bias": false,
880
+ "output_init_std": -1.0,
881
+ "norm_type": "rmsnorm",
882
+ "target_loss": "hard_ce",
883
+ "linear_soft_target_power": 1.0,
884
+ "linear_soft_target_min_conf": 0.0,
885
+ "linear_soft_target_max_conf": 1.0,
886
+ "t_sampling_mode": "logit_normal",
887
+ "t_sampling_power": 1.0,
888
+ "t_sampling_eps": 0.0001,
889
+ "t_sampling_logit_mean": -1.5,
890
+ "t_sampling_logit_std": 0.8,
891
+ "dual_t": true,
892
+ "corrupt_t_mode": "same",
893
+ "corrupt_min_t": 0.0,
894
+ "corrupt_max_t": 1.0,
895
+ "prefix_block_prob": 0.0,
896
+ "prefix_block_len": 128,
897
+ "mask_ratio_floor_schedule": "none",
898
+ "dirichlet_endpoint_mode": "categorical_dual_t",
899
+ "dirichlet_semantic_t_mode": "same",
900
+ "dirichlet_semantic_t_value": 0.0,
901
+ "dirichlet_semantic_t_curve": "linear",
902
+ "dirichlet_semantic_t_power": 1.0,
903
+ "endpoint_sequence_random_prob_alpha": 0.0,
904
+ "categorical_wrong_from_full_vocab": true,
905
+ "categorical_wrong_from_batch_valid_tokens": false,
906
+ "categorical_wrong_basin_token_ids": "",
907
+ "categorical_wrong_basin_prob": 0.0,
908
+ "categorical_wrong_unigram_prob": 0.0,
909
+ "categorical_wrong_uniform_prob": 0.0,
910
+ "categorical_wrong_prob_floor": 0.0,
911
+ "categorical_wrong_corpus_unigram_path": "",
912
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
913
+ "categorical_wrong_basin_shared_prob": 0.0,
914
+ "categorical_wrong_unigram_shared_prob": 0.0,
915
+ "mask_mixture_original_prob": 0.0,
916
+ "mask_mixture_lowk_prob": 0.0,
917
+ "mask_mixture_lowcorrupt_prob": 0.0,
918
+ "mask_mixture_block_prob": 0.0,
919
+ "mask_mixture_all_prob": 1.0,
920
+ "mask_mixture_lowk_clean_tokens": "0",
921
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
922
+ "mask_mixture_block_tokens": "64,128",
923
+ "simplex_bridge_sampler": "dirichlet",
924
+ "logistic_normal_sigma_min": 0.1,
925
+ "logistic_normal_sigma_max": 1.0,
926
+ "logistic_normal_tau_min": 1.0,
927
+ "logistic_normal_tau_max": 1.0,
928
+ "torch_compile": false,
929
+ "compile_mode": "max-autotune",
930
+ "state_format": "prob",
931
+ "meanflow_weight": 0.0,
932
+ "rollout_train_prob": 0.5,
933
+ "rollout_train_steps": 4,
934
+ "rollout_train_infer_steps": 1,
935
+ "rollout_train_time_mode": "sampled_path",
936
+ "rollout_train_s_dist": "uniform",
937
+ "rollout_train_s_min_frac": 0.0,
938
+ "rollout_train_s_max_frac": 0.125,
939
+ "rollout_train_s_beta_alpha": 2.0,
940
+ "rollout_train_s_beta_beta": 6.0,
941
+ "rollout_train_temp": 1.45,
942
+ "rollout_train_max_gamma": 1.0,
943
+ "rollout_train_corrupt_only": true,
944
+ "rollout_train_samplewise": true,
945
+ "rollout_train_compute_always": false,
946
+ "rollout_train_sync_t": true,
947
+ "bridge_noise_init": "logistic_normal",
948
+ "noise_sigma": -1.0,
949
+ "allow_tf32": true,
950
+ "activation_checkpointing": false,
951
+ "activation_checkpoint_interval": 1,
952
+ "activation_checkpoint_scope": "block",
953
+ "ddp_static_graph": false,
954
+ "ddp_gradient_as_bucket_view": true,
955
+ "blocking_data_transfer": false,
956
+ "dataloader_prefetch_factor": 4,
957
+ "full_train_stats": false,
958
+ "tokenized_hf": false,
959
+ "tokenized_pad_token": "pad",
960
+ "elf_conditional_hf": false,
961
+ "record_pad_truncate": false,
962
+ "record_add_eos": false,
963
+ "record_add_special_tokens": false,
964
+ "record_pad_token": "pad",
965
+ "record_shuffle_buffer": 10000,
966
+ "wrap": true,
967
+ "wrap_mode": "stream",
968
+ "wrap_record_buffer_size": 200,
969
+ "owt_cached_chunks": true,
970
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
971
+ "owt_chunk_cache_rebuild": false,
972
+ "owt_chunk_cache_write_batch": 4096,
973
+ "owt_exact_repeat_per_chunk": 64,
974
+ "online_chunk_shuffle": false,
975
+ "online_chunk_shuffle_buffer": 10000,
976
+ "openwebtext_split": "train_minus_100k",
977
+ "detokenizer": "auto",
978
+ "resolved_detokenizer": null,
979
+ "num_workers": 0,
980
+ "latest_every": 1000,
981
+ "resume_path": "runs/train8_ctx1024_p50_path4_unif0_0p125_outwdm1_ctx1024_sampledpath_true_20260517_224139/latest.pt"
982
+ }
983
+ step=2100 epoch=2100/3000 epoch_step=1/1 micro_steps=2100 elapsed=24.6s lr=2.000000e-03 loss=0.6727 loss_recon=0.6727 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7942 corrupt_frac=1.0000 acc_corrupt=0.7942 loss_corrupt=0.6727 wrong_frac=0.7915 init_acc_corrupt=0.2065 acc_corrupt_t_0p0_0p2=0.6391 corrupt_frac_t_0p0_0p2=0.5640 acc_corrupt_t_0p2_0p4=0.9938 corrupt_frac_t_0p2_0p4=0.3466 acc_corrupt_t_0p4_0p6=0.9993 corrupt_frac_t_0p4_0p6=0.0791 acc_corrupt_t_0p6_0p8=0.9977 corrupt_frac_t_0p6_0p8=0.0136 out_w_norm=11.8640 out_g_norm=1.0068 acc_corrupt_t_0p8_1p0=0.9895 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.5473 init_gold_top10=0.5368 init_gold_top100=0.6265 rollout_applied_pos_frac=0.4453 init_acc_rollout_applied=0.3061 init_acc_rollout_kept=0.1192 logit_acc_rollout_applied=0.8278 logit_acc_rollout_kept=0.8275
984
+ step=2200 epoch=2200/3000 epoch_step=1/1 micro_steps=2200 elapsed=23.6s lr=2.000000e-03 loss=0.5787 loss_recon=0.5787 loss_meanflow=0.0000 mean_model_t=0.2096 mean_corrupt_t=0.2096 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4995 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8214 corrupt_frac=1.0000 acc_corrupt=0.8214 loss_corrupt=0.5787 wrong_frac=0.7905 init_acc_corrupt=0.2095 acc_corrupt_t_0p0_0p2=0.6810 corrupt_frac_t_0p0_0p2=0.5557 acc_corrupt_t_0p2_0p4=0.9966 corrupt_frac_t_0p2_0p4=0.3595 acc_corrupt_t_0p4_0p6=0.9994 corrupt_frac_t_0p4_0p6=0.0762 acc_corrupt_t_0p6_0p8=0.9984 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=11.8582 out_g_norm=0.9122 acc_corrupt_t_0p8_1p0=0.9941 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.5341 init_gold_top10=0.5514 init_gold_top100=0.6421 rollout_applied_pos_frac=0.4688 init_acc_rollout_applied=0.2945 init_acc_rollout_kept=0.1215 logit_acc_rollout_applied=0.8562 logit_acc_rollout_kept=0.8293
985
+ Terminated
LTA_openwebtext_dualt/logs/decode_timegrid_trace_len256_copied_20260517_155402.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/elf_lm1b_t5small_elfb_aligned_datasetfix_len128_4gpu_tinysmoke_20260513.log ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
2
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
3
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
4
+ You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
5
+ [elf-lm1b] encoder=/e2e-data/evad-tech-vla/wanghan58/models/hf/t5-small enc_dim=512 vocab=32100
6
+ [elf-lm1b] batch=4 world=4 grad_accum=1 gbs~=16
7
+ /usr/local/lib/python3.12/dist-packages/apex/_autocast_utils.py:26: FutureWarning: `torch.cuda.amp.autocast_mode._cast(value, dtype)` is deprecated. Please use `torch.amp.autocast_mode._cast(value, 'cuda', dtype)` instead.
8
+ return torch.cuda.amp.autocast_mode._cast(args, torch.get_autocast_gpu_dtype())
9
+ /usr/local/lib/python3.12/dist-packages/apex/_autocast_utils.py:26: FutureWarning: `torch.cuda.amp.autocast_mode._cast(value, dtype)` is deprecated. Please use `torch.amp.autocast_mode._cast(value, 'cuda', dtype)` instead.
10
+ return torch.cuda.amp.autocast_mode._cast(args, torch.get_autocast_gpu_dtype())
11
+ /usr/local/lib/python3.12/dist-packages/apex/_autocast_utils.py:26: FutureWarning: `torch.cuda.amp.autocast_mode._cast(value, dtype)` is deprecated. Please use `torch.amp.autocast_mode._cast(value, 'cuda', dtype)` instead.
12
+ return torch.cuda.amp.autocast_mode._cast(args, torch.get_autocast_gpu_dtype())
13
+ /usr/local/lib/python3.12/dist-packages/apex/_autocast_utils.py:26: FutureWarning: `torch.cuda.amp.autocast_mode._cast(value, dtype)` is deprecated. Please use `torch.amp.autocast_mode._cast(value, 'cuda', dtype)` instead.
14
+ return torch.cuda.amp.autocast_mode._cast(args, torch.get_autocast_gpu_dtype())
15
+ [2026-05-13 16:43:32] step=1 elapsed=1.0s lr=2.000000e-03 loss=1.0246 l2=1.2808 ce=0.0000 decoder_frac=0.000 t=0.217 tokens=129
16
+ [2026-05-13 16:43:32] step=2 elapsed=0.2s lr=2.000000e-03 loss=1.4248 l2=1.7810 ce=0.0000 decoder_frac=0.000 t=0.233 tokens=108
17
+ [2026-05-13 16:43:32] step=3 elapsed=0.2s lr=2.000000e-03 loss=1.2049 l2=1.5061 ce=0.0000 decoder_frac=0.000 t=0.245 tokens=111
18
+ [2026-05-13 16:43:32] step=4 elapsed=0.2s lr=2.000000e-03 loss=0.9481 l2=1.1851 ce=0.0000 decoder_frac=0.000 t=0.189 tokens=144
LTA_openwebtext_dualt/logs/elfaligned_t5record_4gpu/lta_owt_t5record_len1024_elfaligned_dditelf_muon_logitnormal_m1p5_s0p8_none_floor0p0_zeroout_tf32_gbs512_4gpu_20260516_011722.log ADDED
@@ -0,0 +1,296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "record_pad_truncate:pad=0:add_eos=0:add_special=0:shuffle_buffer=10000",
7
+ "vocab_size": 32100,
8
+ "tokenizer_vocab_size": 32100,
9
+ "save_dir": "runs/lta_owt_t5record_len1024_elfaligned_dditelf_muon_logitnormal_m1p5_s0p8_none_floor0p0_zeroout_tf32_gbs512_4gpu_20260516_011722",
10
+ "batch_size": 32,
11
+ "grad_accum": 4,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 5.0,
17
+ "steps_per_epoch": 15457,
18
+ "total_steps": 77285,
19
+ "warmup_steps": 7729,
20
+ "warmup_epochs": 0.5,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.0,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.999,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "optax",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": true,
33
+ "muon_width_scale": true,
34
+ "muon_grouping": "hidden_2d",
35
+ "muon_param_count": 84934656,
36
+ "muon_adam_param_count": 50212608,
37
+ "muon_param_names": [
38
+ "blocks.0.attn_qkv.weight",
39
+ "blocks.0.attn_out.weight",
40
+ "blocks.0.mlp.w12.weight",
41
+ "blocks.0.mlp.w3.weight",
42
+ "blocks.1.attn_qkv.weight",
43
+ "blocks.1.attn_out.weight",
44
+ "blocks.1.mlp.w12.weight",
45
+ "blocks.1.mlp.w3.weight",
46
+ "blocks.2.attn_qkv.weight",
47
+ "blocks.2.attn_out.weight",
48
+ "blocks.2.mlp.w12.weight",
49
+ "blocks.2.mlp.w3.weight",
50
+ "blocks.3.attn_qkv.weight",
51
+ "blocks.3.attn_out.weight",
52
+ "blocks.3.mlp.w12.weight",
53
+ "blocks.3.mlp.w3.weight",
54
+ "blocks.4.attn_qkv.weight",
55
+ "blocks.4.attn_out.weight",
56
+ "blocks.4.mlp.w12.weight",
57
+ "blocks.4.mlp.w3.weight",
58
+ "blocks.5.attn_qkv.weight",
59
+ "blocks.5.attn_out.weight",
60
+ "blocks.5.mlp.w12.weight",
61
+ "blocks.5.mlp.w3.weight",
62
+ "blocks.6.attn_qkv.weight",
63
+ "blocks.6.attn_out.weight",
64
+ "blocks.6.mlp.w12.weight",
65
+ "blocks.6.mlp.w3.weight",
66
+ "blocks.7.attn_qkv.weight",
67
+ "blocks.7.attn_out.weight",
68
+ "blocks.7.mlp.w12.weight",
69
+ "blocks.7.mlp.w3.weight",
70
+ "blocks.8.attn_qkv.weight",
71
+ "blocks.8.attn_out.weight",
72
+ "blocks.8.mlp.w12.weight",
73
+ "blocks.8.mlp.w3.weight",
74
+ "blocks.9.attn_qkv.weight",
75
+ "blocks.9.attn_out.weight",
76
+ "blocks.9.mlp.w12.weight",
77
+ "blocks.9.mlp.w3.weight",
78
+ "blocks.10.attn_qkv.weight",
79
+ "blocks.10.attn_out.weight",
80
+ "blocks.10.mlp.w12.weight",
81
+ "blocks.10.mlp.w3.weight",
82
+ "blocks.11.attn_qkv.weight",
83
+ "blocks.11.attn_out.weight",
84
+ "blocks.11.mlp.w12.weight",
85
+ "blocks.11.mlp.w3.weight"
86
+ ],
87
+ "muon_adam_param_names": [
88
+ "time_tokens",
89
+ "vocab_embed.embedding",
90
+ "sigma_map.net.0.weight",
91
+ "sigma_map.net.0.bias",
92
+ "sigma_map.net.2.weight",
93
+ "sigma_map.net.2.bias",
94
+ "blocks.0.norm1.weight",
95
+ "blocks.0.attn_qkv.bias",
96
+ "blocks.0.attn_out.bias",
97
+ "blocks.0.q_norm.weight",
98
+ "blocks.0.k_norm.weight",
99
+ "blocks.0.norm2.weight",
100
+ "blocks.0.mlp.w12.bias",
101
+ "blocks.0.mlp.w3.bias",
102
+ "blocks.1.norm1.weight",
103
+ "blocks.1.attn_qkv.bias",
104
+ "blocks.1.attn_out.bias",
105
+ "blocks.1.q_norm.weight",
106
+ "blocks.1.k_norm.weight",
107
+ "blocks.1.norm2.weight",
108
+ "blocks.1.mlp.w12.bias",
109
+ "blocks.1.mlp.w3.bias",
110
+ "blocks.2.norm1.weight",
111
+ "blocks.2.attn_qkv.bias",
112
+ "blocks.2.attn_out.bias",
113
+ "blocks.2.q_norm.weight",
114
+ "blocks.2.k_norm.weight",
115
+ "blocks.2.norm2.weight",
116
+ "blocks.2.mlp.w12.bias",
117
+ "blocks.2.mlp.w3.bias",
118
+ "blocks.3.norm1.weight",
119
+ "blocks.3.attn_qkv.bias",
120
+ "blocks.3.attn_out.bias",
121
+ "blocks.3.q_norm.weight",
122
+ "blocks.3.k_norm.weight",
123
+ "blocks.3.norm2.weight",
124
+ "blocks.3.mlp.w12.bias",
125
+ "blocks.3.mlp.w3.bias",
126
+ "blocks.4.norm1.weight",
127
+ "blocks.4.attn_qkv.bias",
128
+ "blocks.4.attn_out.bias",
129
+ "blocks.4.q_norm.weight",
130
+ "blocks.4.k_norm.weight",
131
+ "blocks.4.norm2.weight",
132
+ "blocks.4.mlp.w12.bias",
133
+ "blocks.4.mlp.w3.bias",
134
+ "blocks.5.norm1.weight",
135
+ "blocks.5.attn_qkv.bias",
136
+ "blocks.5.attn_out.bias",
137
+ "blocks.5.q_norm.weight",
138
+ "blocks.5.k_norm.weight",
139
+ "blocks.5.norm2.weight",
140
+ "blocks.5.mlp.w12.bias",
141
+ "blocks.5.mlp.w3.bias",
142
+ "blocks.6.norm1.weight",
143
+ "blocks.6.attn_qkv.bias",
144
+ "blocks.6.attn_out.bias",
145
+ "blocks.6.q_norm.weight",
146
+ "blocks.6.k_norm.weight",
147
+ "blocks.6.norm2.weight",
148
+ "blocks.6.mlp.w12.bias",
149
+ "blocks.6.mlp.w3.bias",
150
+ "blocks.7.norm1.weight",
151
+ "blocks.7.attn_qkv.bias",
152
+ "blocks.7.attn_out.bias",
153
+ "blocks.7.q_norm.weight",
154
+ "blocks.7.k_norm.weight",
155
+ "blocks.7.norm2.weight",
156
+ "blocks.7.mlp.w12.bias",
157
+ "blocks.7.mlp.w3.bias",
158
+ "blocks.8.norm1.weight",
159
+ "blocks.8.attn_qkv.bias",
160
+ "blocks.8.attn_out.bias",
161
+ "blocks.8.q_norm.weight",
162
+ "blocks.8.k_norm.weight",
163
+ "blocks.8.norm2.weight",
164
+ "blocks.8.mlp.w12.bias",
165
+ "blocks.8.mlp.w3.bias",
166
+ "blocks.9.norm1.weight",
167
+ "blocks.9.attn_qkv.bias",
168
+ "blocks.9.attn_out.bias",
169
+ "blocks.9.q_norm.weight",
170
+ "blocks.9.k_norm.weight",
171
+ "blocks.9.norm2.weight",
172
+ "blocks.9.mlp.w12.bias",
173
+ "blocks.9.mlp.w3.bias",
174
+ "blocks.10.norm1.weight",
175
+ "blocks.10.attn_qkv.bias",
176
+ "blocks.10.attn_out.bias",
177
+ "blocks.10.q_norm.weight",
178
+ "blocks.10.k_norm.weight",
179
+ "blocks.10.norm2.weight",
180
+ "blocks.10.mlp.w12.bias",
181
+ "blocks.10.mlp.w3.bias",
182
+ "blocks.11.norm1.weight",
183
+ "blocks.11.attn_qkv.bias",
184
+ "blocks.11.attn_out.bias",
185
+ "blocks.11.q_norm.weight",
186
+ "blocks.11.k_norm.weight",
187
+ "blocks.11.norm2.weight",
188
+ "blocks.11.mlp.w12.bias",
189
+ "blocks.11.mlp.w3.bias",
190
+ "output_layer.norm_final.weight",
191
+ "output_layer.linear.weight"
192
+ ],
193
+ "muon_effective_nesterov": true,
194
+ "muon_effective_width_scale": true,
195
+ "muon_effective_weight_decay": 0.0,
196
+ "muon_adam_fallback_nesterov": true,
197
+ "muon_adam_fallback_weight_decay": 0.0,
198
+ "ema_decay": 0.9999,
199
+ "ema_start_step": 0,
200
+ "model_type": "ddit_elf",
201
+ "elf_num_time_tokens": 4,
202
+ "elf_num_model_mode_tokens": 0,
203
+ "qk_norm": true,
204
+ "output_bias": false,
205
+ "output_init_std": 0.0,
206
+ "norm_type": "rmsnorm",
207
+ "t_sampling_mode": "logit_normal",
208
+ "t_sampling_power": 1.0,
209
+ "t_sampling_eps": 0.0001,
210
+ "t_sampling_logit_mean": -1.5,
211
+ "t_sampling_logit_std": 0.8,
212
+ "dual_t": true,
213
+ "corrupt_t_mode": "same",
214
+ "corrupt_min_t": 0.0,
215
+ "corrupt_max_t": 1.0,
216
+ "prefix_block_prob": 0.0,
217
+ "prefix_block_len": 128,
218
+ "mask_ratio_floor_schedule": "none",
219
+ "dirichlet_endpoint_mode": "categorical_dual_t",
220
+ "dirichlet_semantic_t_mode": "same",
221
+ "dirichlet_semantic_t_value": 0.0,
222
+ "dirichlet_semantic_t_curve": "linear",
223
+ "dirichlet_semantic_t_power": 1.0,
224
+ "endpoint_sequence_random_prob_alpha": 0.0,
225
+ "categorical_wrong_from_full_vocab": true,
226
+ "categorical_wrong_from_batch_valid_tokens": false,
227
+ "categorical_wrong_basin_token_ids": "",
228
+ "categorical_wrong_basin_prob": 0.0,
229
+ "categorical_wrong_unigram_prob": 0.0,
230
+ "categorical_wrong_uniform_prob": 0.0,
231
+ "categorical_wrong_corpus_unigram_path": "",
232
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
233
+ "categorical_wrong_basin_shared_prob": 0.0,
234
+ "categorical_wrong_unigram_shared_prob": 0.0,
235
+ "mask_mixture_original_prob": 0.0,
236
+ "mask_mixture_lowk_prob": 0.0,
237
+ "mask_mixture_lowcorrupt_prob": 0.0,
238
+ "mask_mixture_block_prob": 0.0,
239
+ "mask_mixture_all_prob": 0.0,
240
+ "mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
241
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
242
+ "mask_mixture_block_tokens": "64,128",
243
+ "simplex_bridge_sampler": "dirichlet",
244
+ "logistic_normal_sigma_min": 0.18,
245
+ "logistic_normal_sigma_max": 2.2,
246
+ "logistic_normal_tau_min": 0.65,
247
+ "logistic_normal_tau_max": 1.15,
248
+ "torch_compile": false,
249
+ "compile_mode": "max-autotune",
250
+ "state_format": "prob",
251
+ "target_loss": "hard_ce",
252
+ "meanflow_weight": 0.0,
253
+ "rollout_train_prob": 0.0,
254
+ "rollout_train_steps": 1,
255
+ "rollout_train_infer_steps": 64,
256
+ "rollout_train_temp": 1.45,
257
+ "rollout_train_max_gamma": 1.0,
258
+ "rollout_train_corrupt_only": true,
259
+ "rollout_train_samplewise": false,
260
+ "rollout_train_compute_always": false,
261
+ "bridge_noise_init": "logistic_normal",
262
+ "noise_sigma": -1.0,
263
+ "allow_tf32": true,
264
+ "activation_checkpointing": true,
265
+ "activation_checkpoint_interval": 1,
266
+ "activation_checkpoint_scope": "mlp",
267
+ "ddp_static_graph": false,
268
+ "ddp_gradient_as_bucket_view": true,
269
+ "blocking_data_transfer": false,
270
+ "dataloader_prefetch_factor": 4,
271
+ "full_train_stats": false,
272
+ "tokenized_hf": false,
273
+ "tokenized_pad_token": "pad",
274
+ "elf_conditional_hf": false,
275
+ "record_pad_truncate": true,
276
+ "record_add_eos": false,
277
+ "record_add_special_tokens": false,
278
+ "record_pad_token": "pad",
279
+ "record_shuffle_buffer": 10000,
280
+ "wrap": false,
281
+ "wrap_mode": "stream",
282
+ "wrap_record_buffer_size": 200,
283
+ "owt_cached_chunks": false,
284
+ "owt_chunk_cache_dir": "",
285
+ "owt_chunk_cache_rebuild": false,
286
+ "owt_chunk_cache_write_batch": 4096,
287
+ "owt_exact_repeat_per_chunk": 0,
288
+ "online_chunk_shuffle": false,
289
+ "online_chunk_shuffle_buffer": 10000,
290
+ "openwebtext_split": "train_minus_100k",
291
+ "detokenizer": "auto",
292
+ "resolved_detokenizer": null,
293
+ "num_workers": 8,
294
+ "latest_every": 1000,
295
+ "resume_path": ""
296
+ }
LTA_openwebtext_dualt/logs/eval_20260506/ar_8gpu_latest_temp_sweep_20260506_110706.log ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] ar_8gpu_scratch_latest step=1000000 ckpt=runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt
2
+ [ar temp=1] generated 32/256
3
+ [ar temp=1] generated 64/256
4
+ [ar temp=1] generated 96/256
5
+ [ar temp=1] generated 128/256
6
+ [ar temp=1] generated 160/256
7
+ [ar temp=1] generated 192/256
8
+ [ar temp=1] generated 224/256
9
+ [ar temp=1] generated 256/256
10
+ [ar temp=0.8] generated 32/256
11
+ [ar temp=0.8] generated 64/256
12
+ [ar temp=0.8] generated 96/256
13
+ [ar temp=0.8] generated 128/256
14
+ [ar temp=0.8] generated 160/256
15
+ [ar temp=0.8] generated 192/256
16
+ [ar temp=0.8] generated 224/256
17
+ [ar temp=0.8] generated 256/256
18
+ [ar temp=0.6] generated 32/256
19
+ [ar temp=0.6] generated 64/256
20
+ [ar temp=0.6] generated 96/256
21
+ [ar temp=0.6] generated 128/256
22
+ [ar temp=0.6] generated 160/256
23
+ [ar temp=0.6] generated 192/256
24
+ [ar temp=0.6] generated 224/256
25
+ [ar temp=0.6] generated 256/256
26
+ [ar temp=0.4] generated 32/256
27
+ [ar temp=0.4] generated 64/256
28
+ [ar temp=0.4] generated 96/256
29
+ [ar temp=0.4] generated 128/256
30
+ [ar temp=0.4] generated 160/256
31
+ [ar temp=0.4] generated 192/256
32
+ [ar temp=0.4] generated 224/256
33
+ [ar temp=0.4] generated 256/256
34
+ [ar temp=0.2] generated 32/256
35
+ [ar temp=0.2] generated 64/256
36
+ [ar temp=0.2] generated 96/256
37
+ [ar temp=0.2] generated 128/256
38
+ [ar temp=0.2] generated 160/256
39
+ [ar temp=0.2] generated 192/256
40
+ [ar temp=0.2] generated 224/256
41
+ [ar temp=0.2] generated 256/256
42
+ [summary] {"type": "summary", "name": "ar_8gpu_scratch_latest_t1p0", "kind": "ar", "checkpoint": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt", "step": 1000000, "decode": {"kind": "ar_sample", "temp": 1.0, "max_new_tokens": 127, "n_samples": 256, "seed": 20260506}, "raw_genppl": {"ppl": 66.26789796127002, "nll_per_token": 4.193705586286218, "tokens": 38758, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 102.02902966909716, "nll_per_token": 4.625257377391057, "tokens": 32517, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.3391454366321325, "unique_tokens": 6850, "token_count": 32768, "distinct_1": 0.20904541015625, "distinct_2": 0.721795029527559, "top_token_mass": 0.04486083984375}}
43
+ [summary] {"type": "summary", "name": "ar_8gpu_scratch_latest_t0p8", "kind": "ar", "checkpoint": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt", "step": 1000000, "decode": {"kind": "ar_sample", "temp": 0.8, "max_new_tokens": 127, "n_samples": 256, "seed": 20260506}, "raw_genppl": {"ppl": 32.32856535877376, "nll_per_token": 3.475951215830808, "tokens": 38613, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 44.806999513308874, "nll_per_token": 3.8023643663532356, "tokens": 32111, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.198900388253301, "unique_tokens": 5328, "token_count": 32768, "distinct_1": 0.16259765625, "distinct_2": 0.5892285925196851, "top_token_mass": 0.06524658203125}}
44
+ [summary] {"type": "summary", "name": "ar_8gpu_scratch_latest_t0p6", "kind": "ar", "checkpoint": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt", "step": 1000000, "decode": {"kind": "ar_sample", "temp": 0.6, "max_new_tokens": 127, "n_samples": 256, "seed": 20260506}, "raw_genppl": {"ppl": 20.308583315932225, "nll_per_token": 3.0110436201280355, "tokens": 38710, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 26.539437908520128, "nll_per_token": 3.2786318496488907, "tokens": 31979, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.087555248966228, "unique_tokens": 4227, "token_count": 32768, "distinct_1": 0.128997802734375, "distinct_2": 0.4760396161417323, "top_token_mass": 0.083648681640625}}
45
+ [summary] {"type": "summary", "name": "ar_8gpu_scratch_latest_t0p4", "kind": "ar", "checkpoint": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt", "step": 1000000, "decode": {"kind": "ar_sample", "temp": 0.4, "max_new_tokens": 127, "n_samples": 256, "seed": 20260506}, "raw_genppl": {"ppl": 15.039557317205846, "nll_per_token": 2.710683884392094, "tokens": 38573, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 18.999365346672068, "nll_per_token": 2.944405575801821, "tokens": 31826, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.005193950920615, "unique_tokens": 3146, "token_count": 32768, "distinct_1": 0.09600830078125, "distinct_2": 0.3395669291338583, "top_token_mass": 0.095611572265625}}
46
+ [summary] {"type": "summary", "name": "ar_8gpu_scratch_latest_t0p2", "kind": "ar", "checkpoint": "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_8gpu_1m_scratch_20260505/latest.pt", "step": 1000000, "decode": {"kind": "ar_sample", "temp": 0.2, "max_new_tokens": 127, "n_samples": 256, "seed": 20260506}, "raw_genppl": {"ppl": 9.414447999719362, "nll_per_token": 2.2422455305145195, "tokens": 38474, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 11.492390079715493, "nll_per_token": 2.4416850841291637, "tokens": 31417, "kept_samples": 256, "total_samples": 256, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.78621437718483, "unique_tokens": 1502, "token_count": 32768, "distinct_1": 0.04583740234375, "distinct_2": 0.13379675196850394, "top_token_mass": 0.093292236328125}}
47
+ [done] docs/lta_samples/metrics_20260506/ar_8gpu_latest_temp_sweep
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_eta1_stateweight_latest_20260506_113031.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=650000
2
+ [decode] diff_t1p3_eta1_sw0p70 generated 16/64
3
+ [decode] diff_t1p3_eta1_sw0p70 generated 32/64
4
+ [decode] diff_t1p3_eta1_sw0p70 generated 48/64
5
+ [decode] diff_t1p3_eta1_sw0p70 generated 64/64
6
+ [summary] diff_t1p3_eta1_sw0p70 raw=28.808 strip=39.590 ent=3.972 d2=0.599
7
+ [decode] diff_t1p3_eta1_sw0p90 generated 16/64
8
+ [decode] diff_t1p3_eta1_sw0p90 generated 32/64
9
+ [decode] diff_t1p3_eta1_sw0p90 generated 48/64
10
+ [decode] diff_t1p3_eta1_sw0p90 generated 64/64
11
+ [summary] diff_t1p3_eta1_sw0p90 raw=28.808 strip=39.590 ent=3.972 d2=0.599
12
+ [decode] diff_t1p3_eta1_state generated 16/64
13
+ [decode] diff_t1p3_eta1_state generated 32/64
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_finalsample_hightemp_quick_20260506_114232.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=656000
2
+ [decode] fs_t1p3_eta1_blend_ft1p00 generated 16/32
3
+ [decode] fs_t1p3_eta1_blend_ft1p00 generated 32/32
4
+ [summary] fs_t1p3_eta1_blend_ft1p00 raw=24.742 strip=37.189 ent=3.994 d2=0.643
5
+ [decode] fs_t1p3_eta1_blend_ft1p30 generated 16/32
6
+ [decode] fs_t1p3_eta1_blend_ft1p30 generated 32/32
7
+ [summary] fs_t1p3_eta1_blend_ft1p30 raw=34.585 strip=54.985 ent=4.025 d2=0.663
8
+ [decode] fs_t1p3_eta1_blend_ft1p60 generated 16/32
9
+ [decode] fs_t1p3_eta1_blend_ft1p60 generated 32/32
10
+ [summary] fs_t1p3_eta1_blend_ft1p60 raw=346.345 strip=639.185 ent=4.286 d2=0.825
11
+ [decode] fs_t1p2_eta1_blend_ft1p00 generated 16/32
12
+ [decode] fs_t1p2_eta1_blend_ft1p00 generated 32/32
13
+ [summary] fs_t1p2_eta1_blend_ft1p00 raw=23.393 strip=34.329 ent=3.971 d2=0.610
14
+ [done] docs/lta_samples/metrics_20260506/categorical_c1024_diffusion_finalsample_hightemp_quick
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_finalsample_latest_20260506_113603.log ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=653000
2
+ [decode] fs_t1p3_eta1_blend_ft0p35 generated 16/64
3
+ [decode] fs_t1p3_eta1_blend_ft0p35 generated 32/64
4
+ [decode] fs_t1p3_eta1_blend_ft0p35 generated 48/64
5
+ [decode] fs_t1p3_eta1_blend_ft0p35 generated 64/64
6
+ [summary] fs_t1p3_eta1_blend_ft0p35 raw=24.791 strip=35.428 ent=4.041 d2=0.563
7
+ [decode] fs_t1p3_eta1_blend_ft0p50 generated 16/64
8
+ [decode] fs_t1p3_eta1_blend_ft0p50 generated 32/64
9
+ [decode] fs_t1p3_eta1_blend_ft0p50 generated 48/64
10
+ [decode] fs_t1p3_eta1_blend_ft0p50 generated 64/64
11
+ [summary] fs_t1p3_eta1_blend_ft0p50 raw=24.791 strip=35.428 ent=4.041 d2=0.563
12
+ [decode] fs_t1p3_eta1_blend_ft0p70 generated 16/64
13
+ [decode] fs_t1p3_eta1_blend_ft0p70 generated 32/64
14
+ [decode] fs_t1p3_eta1_blend_ft0p70 generated 48/64
15
+ [decode] fs_t1p3_eta1_blend_ft0p70 generated 64/64
16
+ [summary] fs_t1p3_eta1_blend_ft0p70 raw=24.791 strip=35.428 ent=4.041 d2=0.563
17
+ [decode] fs_t1p2_eta1_blend_ft0p50 generated 16/64
18
+ [decode] fs_t1p2_eta1_blend_ft0p50 generated 32/64
19
+ [decode] fs_t1p2_eta1_blend_ft0p50 generated 48/64
20
+ [decode] fs_t1p2_eta1_blend_ft0p50 generated 64/64
21
+ [summary] fs_t1p2_eta1_blend_ft0p50 raw=22.928 strip=32.454 ent=4.015 d2=0.529
22
+ [decode] fs_t1p2_eta1_blend_ft0p70 generated 16/64
23
+ [decode] fs_t1p2_eta1_blend_ft0p70 generated 32/64
24
+ [decode] fs_t1p2_eta1_blend_ft0p70 generated 48/64
25
+ [decode] fs_t1p2_eta1_blend_ft0p70 generated 64/64
26
+ [summary] fs_t1p2_eta1_blend_ft0p70 raw=22.928 strip=32.454 ent=4.015 d2=0.529
27
+ [decode] fs_t1p1_eta1_blend_ft0p70 generated 16/64
28
+ [decode] fs_t1p1_eta1_blend_ft0p70 generated 32/64
29
+ [decode] fs_t1p1_eta1_blend_ft0p70 generated 48/64
30
+ [decode] fs_t1p1_eta1_blend_ft0p70 generated 64/64
31
+ [summary] fs_t1p1_eta1_blend_ft0p70 raw=22.678 strip=30.867 ent=3.946 d2=0.521
32
+ [decode] fs_t1p3_eta1_state_ft0p35 generated 16/64
33
+ [decode] fs_t1p3_eta1_state_ft0p35 generated 32/64
34
+ [decode] fs_t1p3_eta1_state_ft0p35 generated 48/64
35
+ [decode] fs_t1p3_eta1_state_ft0p35 generated 64/64
36
+ [summary] fs_t1p3_eta1_state_ft0p35 raw=24.791 strip=35.428 ent=4.041 d2=0.563
37
+ [decode] fs_t1p3_eta1_state_ft0p50 generated 16/64
38
+ [decode] fs_t1p3_eta1_state_ft0p50 generated 32/64
39
+ [decode] fs_t1p3_eta1_state_ft0p50 generated 48/64
40
+ [decode] fs_t1p3_eta1_state_ft0p50 generated 64/64
41
+ [summary] fs_t1p3_eta1_state_ft0p50 raw=24.791 strip=35.428 ent=4.041 d2=0.563
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_rolling_quick512_20260506_112740.log ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=648000
2
+ [decode] diff_t1p1_eta0p50_sw0p70 generated 32/32
3
+ [summary] diff_t1p1_eta0p50_sw0p70 raw=30.088 strip=40.977 ent=3.473 d2=0.531
4
+ [decode] diff_t1p3_eta0p25_sw0p70 generated 32/32
5
+ [summary] diff_t1p3_eta0p25_sw0p70 raw=2.578 strip=2.274 ent=0.672 d2=0.049
6
+ [decode] diff_t1p3_eta0p50_sw0p70 generated 32/32
7
+ [summary] diff_t1p3_eta0p50_sw0p70 raw=18.456 strip=16.892 ent=1.686 d2=0.197
8
+ [decode] diff_t1p3_eta0p75_sw0p70 generated 32/32
9
+ [summary] diff_t1p3_eta0p75_sw0p70 raw=34.201 strip=52.082 ent=4.029 d2=0.675
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_diffusion_rolling_sweep_latest_20260506_112546.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=647000
2
+ [decode] diff_t1p1_eta0p50_sw0p70 generated 16/64
3
+ [decode] diff_t1p1_eta0p50_sw0p70 generated 32/64
4
+ [decode] diff_t1p1_eta0p50_sw0p70 generated 48/64
5
+ [decode] diff_t1p1_eta0p50_sw0p70 generated 64/64
6
+ [summary] diff_t1p1_eta0p50_sw0p70 raw=24.353 strip=33.608 ent=3.641 d2=0.482
7
+ [decode] diff_t1p1_eta0p75_sw0p70 generated 16/64
8
+ [decode] diff_t1p1_eta0p75_sw0p70 generated 32/64
9
+ [decode] diff_t1p1_eta0p75_sw0p70 generated 48/64
10
+ [decode] diff_t1p1_eta0p75_sw0p70 generated 64/64
11
+ [summary] diff_t1p1_eta0p75_sw0p70 raw=24.930 strip=34.618 ent=3.987 d2=0.553
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_rolling_noise_focus_latest_20260506_112101.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=644000
2
+ [decode] sp1p25_sem2p0_temp1p5_eta1_blend generated 64/64
3
+ [summary] sp1p25_sem2p0_temp1p5_eta1_blend raw=36.622 strip=54.520 ent=4.057 d2=0.601
4
+ [decode] sem2p5_temp1p7_eta0p75_blend generated 64/64
5
+ [summary] sem2p5_temp1p7_eta0p75_blend raw=7.537 strip=5.859 ent=0.867 d2=0.074
LTA_openwebtext_dualt/logs/eval_20260506/categorical_c1024_rolling_noise_sweep_latest_20260506_110706.log ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [load] checkpoint=runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt step=637000
2
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 16/128
3
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 32/128
4
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 48/128
5
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 64/128
6
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 80/128
7
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 96/128
8
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 112/128
9
+ [decode] baseline_t1p3_sp1_sem1p5_eta1_blend generated 128/128
10
+ [summary] baseline_t1p3_sp1_sem1p5_eta1_blend raw=27.627 strip=40.194 ent=4.004 d2=0.506
11
+ [decode] temp1p5_eta1_blend generated 16/128
12
+ [decode] temp1p5_eta1_blend generated 32/128
13
+ [decode] temp1p5_eta1_blend generated 48/128
14
+ [decode] temp1p5_eta1_blend generated 64/128
15
+ [decode] temp1p5_eta1_blend generated 80/128
16
+ [decode] temp1p5_eta1_blend generated 96/128
17
+ [decode] temp1p5_eta1_blend generated 112/128
18
+ [decode] temp1p5_eta1_blend generated 128/128
19
+ [summary] temp1p5_eta1_blend raw=32.000 strip=44.321 ent=3.980 d2=0.544
20
+ [decode] temp1p7_eta1_blend generated 16/128
21
+ [decode] temp1p7_eta1_blend generated 32/128
22
+ [decode] temp1p7_eta1_blend generated 48/128
23
+ [decode] temp1p7_eta1_blend generated 64/128
24
+ [decode] temp1p7_eta1_blend generated 80/128
25
+ [decode] temp1p7_eta1_blend generated 96/128
26
+ [decode] temp1p7_eta1_blend generated 112/128
27
+ [decode] temp1p7_eta1_blend generated 128/128
28
+ [summary] temp1p7_eta1_blend raw=16.912 strip=15.385 ent=2.584 d2=0.146
29
+ [decode] temp2p0_eta1_blend generated 16/128
30
+ [decode] temp2p0_eta1_blend generated 32/128
31
+ [decode] temp2p0_eta1_blend generated 48/128
32
+ [decode] temp2p0_eta1_blend generated 64/128
33
+ [decode] temp2p0_eta1_blend generated 80/128
34
+ [decode] temp2p0_eta1_blend generated 96/128
35
+ [decode] temp2p0_eta1_blend generated 112/128
36
+ [decode] temp2p0_eta1_blend generated 128/128
37
+ [summary] temp2p0_eta1_blend raw=15.834 strip=13.898 ent=2.520 d2=0.094
38
+ [decode] sem2p0_temp1p5_eta1_blend generated 16/128
39
+ [decode] sem2p0_temp1p5_eta1_blend generated 32/128
40
+ [decode] sem2p0_temp1p5_eta1_blend generated 48/128
41
+ [decode] sem2p0_temp1p5_eta1_blend generated 64/128
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_key3_state_n256.log ADDED
@@ -0,0 +1 @@
 
 
1
+ [decode] match_post_sem1_state_c16_t1p3
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_quick2_128steps_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 123000, "detok_genppl": 34.04571508013546, "sample_entropy": 4.117197060615391, "distinct_2": 0.6318897637795275, "top_token_mass": 0.05859375, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 123000, "detok_genppl": 32.44883591727502, "sample_entropy": 4.1283035155644505, "distinct_2": 0.6188484251968503, "top_token_mass": 0.0576171875, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step122k_quick2_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 123000, "detok_genppl": 23.76129065779824, "sample_entropy": 4.072567398410477, "distinct_2": 0.562869094488189, "top_token_mass": 0.0634765625, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 123000, "detok_genppl": 22.850390314662413, "sample_entropy": 4.072407155167, "distinct_2": 0.5417076771653543, "top_token_mass": 0.0562744140625, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_256steps_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 29.81545187350111, "sample_entropy": 4.085741040367163, "distinct_2": 0.5905511811023622, "top_token_mass": 0.0538330078125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 28.415282971033125, "sample_entropy": 4.048794239038511, "distinct_2": 0.5878444881889764, "top_token_mass": 0.057861328125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_4096steps_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 15.863792771486848, "sample_entropy": 3.8477992784844646, "distinct_2": 0.3817667322834646, "top_token_mass": 0.0645751953125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 14.379869187838807, "sample_entropy": 3.7611245679001697, "distinct_2": 0.32258858267716534, "top_token_mass": 0.0748291015625, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps16_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 42.769466206615945, "sample_entropy": 3.1708587735321347, "distinct_2": 0.41313976377952755, "top_token_mass": 0.22607421875, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 24.348744185198743, "sample_entropy": 2.0679188483539344, "distinct_2": 0.23363681102362205, "top_token_mass": 0.392578125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps32_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 44.930859242990756, "sample_entropy": 3.7823763722555808, "distinct_2": 0.5745570866141733, "top_token_mass": 0.11572265625, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 31.13635173493272, "sample_entropy": 3.3146319833825286, "distinct_2": 0.4309793307086614, "top_token_mass": 0.2288818359375, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps64_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 42.24168306980635, "sample_entropy": 4.010304198715844, "distinct_2": 0.6353346456692913, "top_token_mass": 0.0780029296875, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 35.36294023649359, "sample_entropy": 3.884623591993139, "distinct_2": 0.5751722440944882, "top_token_mass": 0.1007080078125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/dirichlet_step124k_steps8_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 124000, "detok_genppl": 146.35828905491644, "sample_entropy": 2.9716103029900554, "distinct_2": 0.45681594488188976, "top_token_mass": 0.2855224609375, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 124000, "detok_genppl": 92.67198759188139, "sample_entropy": 1.7085331062756133, "distinct_2": 0.1935285433070866, "top_token_mass": 0.3070068359375, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_1024steps_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 156000, "detok_genppl": 26.453125333929982, "sample_entropy": 3.994721810704877, "distinct_2": 0.5850147637795275, "top_token_mass": 0.0726318359375, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 156000, "detok_genppl": 21.308442953038007, "sample_entropy": 3.9465318229680624, "distinct_2": 0.5394931102362205, "top_token_mass": 0.071533203125, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_diffusion_noise_steps_128steps_n64.log ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [decode] base_const1p45_c256
2
+ [summary] {"name": "base_const1p45_c256", "step": 187000, "detok_genppl": 45.86065023558471, "sample_entropy": 4.124430385339583, "distinct_2": 0.6966043307086615, "top_token_mass": 0.0712890625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.0, "eta_schedule": "none", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] base_temp2to08_c256
4
+ [summary] {"name": "base_temp2to08_c256", "step": 187000, "detok_genppl": 47.86767440351385, "sample_entropy": 4.196306894865523, "distinct_2": 0.718380905511811, "top_token_mass": 0.0533447265625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.0, "eta_schedule": "none", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
5
+ [decode] tpow1p5_const1p45_c256
6
+ [summary] {"name": "tpow1p5_const1p45_c256", "step": 187000, "detok_genppl": 47.15461218178996, "sample_entropy": 4.124970969360973, "distinct_2": 0.7017716535433071, "top_token_mass": 0.067138671875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.5, "eta0": 0.0, "eta_schedule": "none", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
7
+ [decode] tpow2p0_const1p45_c256
8
+ [summary] {"name": "tpow2p0_const1p45_c256", "step": 187000, "detok_genppl": 50.51946723435765, "sample_entropy": 4.078867688498713, "distinct_2": 0.7000492125984252, "top_token_mass": 0.0865478515625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 2.0, "eta0": 0.0, "eta_schedule": "none", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
9
+ [decode] tpow0p7_const1p45_c256
10
+ [summary] {"name": "tpow0p7_const1p45_c256", "step": 187000, "detok_genppl": 43.35758042812266, "sample_entropy": 4.1828232497490045, "distinct_2": 0.6999261811023622, "top_token_mass": 0.055908203125, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 0.7, "eta0": 0.0, "eta_schedule": "none", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
11
+ [decode] noise_const1p45_early_linear_eta0p01_c256
12
+ [summary] {"name": "noise_const1p45_early_linear_eta0p01_c256", "step": 187000, "detok_genppl": 43.2830792498643, "sample_entropy": 4.186505573692286, "distinct_2": 0.7080462598425197, "top_token_mass": 0.055419921875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
13
+ [decode] noise_const1p45_early_cosine_eta0p01_c256
14
+ [summary] {"name": "noise_const1p45_early_cosine_eta0p01_c256", "step": 187000, "detok_genppl": 42.09900669006817, "sample_entropy": 4.165938766811043, "distinct_2": 0.6938976377952756, "top_token_mass": 0.0592041015625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
15
+ [decode] noise_const1p45_mid_sine_eta0p01_c256
16
+ [summary] {"name": "noise_const1p45_mid_sine_eta0p01_c256", "step": 187000, "detok_genppl": 44.031384454172304, "sample_entropy": 4.192402841950814, "distinct_2": 0.7004183070866141, "top_token_mass": 0.0552978515625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
17
+ [decode] noise_const1p45_early_linear_eta0p02_c256
18
+ [summary] {"name": "noise_const1p45_early_linear_eta0p02_c256", "step": 187000, "detok_genppl": 44.65909863566894, "sample_entropy": 4.198494329382783, "distinct_2": 0.7079232283464567, "top_token_mass": 0.05419921875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
19
+ [decode] noise_const1p45_early_cosine_eta0p02_c256
20
+ [summary] {"name": "noise_const1p45_early_cosine_eta0p02_c256", "step": 187000, "detok_genppl": 46.54789765723706, "sample_entropy": 4.189696595593963, "distinct_2": 0.7057086614173228, "top_token_mass": 0.0576171875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
21
+ [decode] noise_const1p45_mid_sine_eta0p02_c256
22
+ [summary] {"name": "noise_const1p45_mid_sine_eta0p02_c256", "step": 187000, "detok_genppl": 46.05345631456055, "sample_entropy": 4.179009616612305, "distinct_2": 0.6916830708661418, "top_token_mass": 0.05712890625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
23
+ [decode] noise_const1p45_early_linear_eta0p05_c256
24
+ [summary] {"name": "noise_const1p45_early_linear_eta0p05_c256", "step": 187000, "detok_genppl": 45.9198653520818, "sample_entropy": 4.198348275601427, "distinct_2": 0.7081692913385826, "top_token_mass": 0.0552978515625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
25
+ [decode] noise_const1p45_early_cosine_eta0p05_c256
26
+ [summary] {"name": "noise_const1p45_early_cosine_eta0p05_c256", "step": 187000, "detok_genppl": 46.75523684526139, "sample_entropy": 4.197912062162802, "distinct_2": 0.7121062992125984, "top_token_mass": 0.0545654296875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
27
+ [decode] noise_const1p45_mid_sine_eta0p05_c256
28
+ [summary] {"name": "noise_const1p45_mid_sine_eta0p05_c256", "step": 187000, "detok_genppl": 42.7693226634343, "sample_entropy": 4.183180739045019, "distinct_2": 0.6919291338582677, "top_token_mass": 0.0548095703125, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
29
+ [decode] noise_const1p45_early_linear_eta0p08_c256
30
+ [summary] {"name": "noise_const1p45_early_linear_eta0p08_c256", "step": 187000, "detok_genppl": 44.54583377426971, "sample_entropy": 4.1536400284735615, "distinct_2": 0.6952509842519685, "top_token_mass": 0.0660400390625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
31
+ [decode] noise_const1p45_early_cosine_eta0p08_c256
32
+ [summary] {"name": "noise_const1p45_early_cosine_eta0p08_c256", "step": 187000, "detok_genppl": 42.21974970343552, "sample_entropy": 4.12835095126496, "distinct_2": 0.6998031496062992, "top_token_mass": 0.0679931640625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
33
+ [decode] noise_const1p45_mid_sine_eta0p08_c256
34
+ [summary] {"name": "noise_const1p45_mid_sine_eta0p08_c256", "step": 187000, "detok_genppl": 44.612465411027486, "sample_entropy": 4.105863199315936, "distinct_2": 0.6919291338582677, "top_token_mass": 0.0728759765625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
35
+ [decode] noise_const1p45_early_linear_eta0p12_c256
36
+ [summary] {"name": "noise_const1p45_early_linear_eta0p12_c256", "step": 187000, "detok_genppl": 46.894060450372244, "sample_entropy": 4.130338682029675, "distinct_2": 0.7098917322834646, "top_token_mass": 0.0697021484375, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
37
+ [decode] noise_const1p45_early_cosine_eta0p12_c256
38
+ [summary] {"name": "noise_const1p45_early_cosine_eta0p12_c256", "step": 187000, "detok_genppl": 44.33293502778378, "sample_entropy": 4.205396118072462, "distinct_2": 0.7105068897637795, "top_token_mass": 0.055908203125, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
39
+ [decode] noise_const1p45_mid_sine_eta0p12_c256
40
+ [summary] {"name": "noise_const1p45_mid_sine_eta0p12_c256", "step": 187000, "detok_genppl": 44.742664272649876, "sample_entropy": 4.208047575760355, "distinct_2": 0.6950049212598425, "top_token_mass": 0.0523681640625, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
41
+ [decode] noise_linear2to08_early_linear_eta0p01_c256
42
+ [summary] {"name": "noise_linear2to08_early_linear_eta0p01_c256", "step": 187000, "detok_genppl": 48.10164682261911, "sample_entropy": 4.200641298969796, "distinct_2": 0.7181348425196851, "top_token_mass": 0.052978515625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
43
+ [decode] noise_linear2to08_early_cosine_eta0p01_c256
44
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p01_c256", "step": 187000, "detok_genppl": 48.581707982325014, "sample_entropy": 4.18684190232035, "distinct_2": 0.7247785433070866, "top_token_mass": 0.0528564453125, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
45
+ [decode] noise_linear2to08_mid_sine_eta0p01_c256
46
+ [summary] {"name": "noise_linear2to08_mid_sine_eta0p01_c256", "step": 187000, "detok_genppl": 48.2817867052147, "sample_entropy": 4.19866648234313, "distinct_2": 0.7100147637795275, "top_token_mass": 0.0546875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
47
+ [decode] noise_linear2to08_early_linear_eta0p02_c256
48
+ [summary] {"name": "noise_linear2to08_early_linear_eta0p02_c256", "step": 187000, "detok_genppl": 48.18240875879645, "sample_entropy": 4.200519902464651, "distinct_2": 0.7197342519685039, "top_token_mass": 0.0533447265625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
49
+ [decode] noise_linear2to08_early_cosine_eta0p02_c256
50
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p02_c256", "step": 187000, "detok_genppl": 49.24717321473146, "sample_entropy": 4.190932299534418, "distinct_2": 0.7204724409448819, "top_token_mass": 0.0516357421875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
51
+ [decode] noise_linear2to08_mid_sine_eta0p02_c256
52
+ [summary] {"name": "noise_linear2to08_mid_sine_eta0p02_c256", "step": 187000, "detok_genppl": 49.82313453731583, "sample_entropy": 4.217693530581953, "distinct_2": 0.7095226377952756, "top_token_mass": 0.05419921875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
53
+ [decode] noise_linear2to08_early_linear_eta0p05_c256
54
+ [summary] {"name": "noise_linear2to08_early_linear_eta0p05_c256", "step": 187000, "detok_genppl": 48.83178019589149, "sample_entropy": 4.2050137794841085, "distinct_2": 0.7265009842519685, "top_token_mass": 0.050537109375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
55
+ [decode] noise_linear2to08_early_cosine_eta0p05_c256
56
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p05_c256", "step": 187000, "detok_genppl": 49.35257597935552, "sample_entropy": 4.205382906118391, "distinct_2": 0.7299458661417323, "top_token_mass": 0.0531005859375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
57
+ [decode] noise_linear2to08_mid_sine_eta0p05_c256
58
+ [summary] {"name": "noise_linear2to08_mid_sine_eta0p05_c256", "step": 187000, "detok_genppl": 49.82670395081913, "sample_entropy": 4.223790486496133, "distinct_2": 0.7219488188976378, "top_token_mass": 0.051025390625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
59
+ [decode] noise_linear2to08_early_linear_eta0p08_c256
60
+ [summary] {"name": "noise_linear2to08_early_linear_eta0p08_c256", "step": 187000, "detok_genppl": 49.12591398887004, "sample_entropy": 4.217049731211352, "distinct_2": 0.7287155511811023, "top_token_mass": 0.051513671875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
61
+ [decode] noise_linear2to08_early_cosine_eta0p08_c256
62
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p08_c256", "step": 187000, "detok_genppl": 46.47073894768385, "sample_entropy": 4.206427871276884, "distinct_2": 0.718873031496063, "top_token_mass": 0.053466796875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
63
+ [decode] noise_linear2to08_mid_sine_eta0p08_c256
64
+ [summary] {"name": "noise_linear2to08_mid_sine_eta0p08_c256", "step": 187000, "detok_genppl": 49.95328576578725, "sample_entropy": 4.222333312252868, "distinct_2": 0.7234251968503937, "top_token_mass": 0.0513916015625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.08, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
65
+ [decode] noise_linear2to08_early_linear_eta0p12_c256
66
+ [summary] {"name": "noise_linear2to08_early_linear_eta0p12_c256", "step": 187000, "detok_genppl": 45.858783855541894, "sample_entropy": 4.225998514820446, "distinct_2": 0.717888779527559, "top_token_mass": 0.0540771484375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "early_linear", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
67
+ [decode] noise_linear2to08_early_cosine_eta0p12_c256
68
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p12_c256", "step": 187000, "detok_genppl": 46.82628279884142, "sample_entropy": 4.213082036146694, "distinct_2": 0.7100147637795275, "top_token_mass": 0.0517578125, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
69
+ [decode] noise_linear2to08_mid_sine_eta0p12_c256
70
+ [summary] {"name": "noise_linear2to08_mid_sine_eta0p12_c256", "step": 187000, "detok_genppl": 47.38313453813254, "sample_entropy": 4.222067649767945, "distinct_2": 0.7165354330708661, "top_token_mass": 0.049560546875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.12, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
71
+ [decode] tpow1p5_noise_linear2to08_early_cosine_eta0p02_c256
72
+ [summary] {"name": "tpow1p5_noise_linear2to08_early_cosine_eta0p02_c256", "step": 187000, "detok_genppl": 41.43285220336455, "sample_entropy": 4.144170494681995, "distinct_2": 0.6658464566929134, "top_token_mass": 0.058837890625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.02, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
73
+ [decode] tpow1p5_noise_linear2to08_mid_sine_eta0p02_c256
74
+ [summary] {"name": "tpow1p5_noise_linear2to08_mid_sine_eta0p02_c256", "step": 187000, "detok_genppl": 40.688636689981976, "sample_entropy": 4.138078246400767, "distinct_2": 0.6857775590551181, "top_token_mass": 0.0615234375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.02, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
75
+ [decode] tpow1p5_noise_linear2to08_early_cosine_eta0p05_c256
76
+ [summary] {"name": "tpow1p5_noise_linear2to08_early_cosine_eta0p05_c256", "step": 187000, "detok_genppl": 43.72608758972266, "sample_entropy": 4.146192054211393, "distinct_2": 0.6766732283464567, "top_token_mass": 0.0582275390625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.05, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
77
+ [decode] tpow1p5_noise_linear2to08_mid_sine_eta0p05_c256
78
+ [summary] {"name": "tpow1p5_noise_linear2to08_mid_sine_eta0p05_c256", "step": 187000, "detok_genppl": 40.41088438525592, "sample_entropy": 4.153850742833741, "distinct_2": 0.6948818897637795, "top_token_mass": 0.06103515625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.05, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
79
+ [decode] tpow1p5_noise_linear2to08_early_cosine_eta0p08_c256
80
+ [summary] {"name": "tpow1p5_noise_linear2to08_early_cosine_eta0p08_c256", "step": 187000, "detok_genppl": 42.80336239844838, "sample_entropy": 4.142722771044542, "distinct_2": 0.6817175196850394, "top_token_mass": 0.0576171875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.08, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
81
+ [decode] tpow1p5_noise_linear2to08_mid_sine_eta0p08_c256
82
+ [summary] {"name": "tpow1p5_noise_linear2to08_mid_sine_eta0p08_c256", "step": 187000, "detok_genppl": 41.16435120941998, "sample_entropy": 4.162179467920619, "distinct_2": 0.6884842519685039, "top_token_mass": 0.0582275390625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.5, "eta0": 0.08, "eta_schedule": "mid_sine", "noise_conc": 1.0, "concentration_max": 256.0, "update": "resample"}
83
+ [decode] noise_linear2to08_early_cosine_eta0p01_c1024
84
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p01_c1024", "step": 187000, "detok_genppl": 46.75827674418147, "sample_entropy": 4.14898724091752, "distinct_2": 0.6947588582677166, "top_token_mass": 0.05419921875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.01, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 1024.0, "update": "resample"}
85
+ [decode] noise_linear2to08_early_cosine_eta0p02_c1024
86
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p02_c1024", "step": 187000, "detok_genppl": 45.648558001763085, "sample_entropy": 4.153952417528344, "distinct_2": 0.687623031496063, "top_token_mass": 0.0555419921875, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.02, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 1024.0, "update": "resample"}
87
+ [decode] noise_linear2to08_early_cosine_eta0p05_c1024
88
+ [summary] {"name": "noise_linear2to08_early_cosine_eta0p05_c1024", "step": 187000, "detok_genppl": 46.266957384208816, "sample_entropy": 4.169058739359141, "distinct_2": 0.6929133858267716, "top_token_mass": 0.0540771484375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "t_power": 1.0, "eta0": 0.05, "eta_schedule": "early_cosine", "noise_conc": 1.0, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_step146k_128steps_n64.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [decode] state_c256_t1p45
2
+ [summary] {"name": "state_c256_t1p45", "step": 147000, "detok_genppl": 42.95175406093094, "sample_entropy": 4.177227507708967, "distinct_2": 0.7065698818897638, "top_token_mass": 0.0526123046875, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 256.0, "update": "resample"}
3
+ [decode] state_c1024_t1p45
4
+ [summary] {"name": "state_c1024_t1p45", "step": 147000, "detok_genppl": 43.46101424967429, "sample_entropy": 4.177106000985858, "distinct_2": 0.7044783464566929, "top_token_mass": 0.063232421875, "model_t_mode": "post", "support_power": 1.0, "semantic_power": 1.0, "final_from": "state", "endpoint_temp": 1.45, "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_temp_push43_128steps_n64.log ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [decode] const1p7_c256
2
+ [summary] {"name": "const1p7_c256", "step": 175000, "detok_genppl": 73.02172589789832, "sample_entropy": 3.883921295576758, "distinct_2": 0.6646161417322834, "top_token_mass": 0.0772705078125, "temp_start": 1.7, "temp_end": 1.7, "temp_schedule": "const", "concentration_max": 256.0, "update": "resample"}
3
+ [decode] const1p9_c256
4
+ [summary] {"name": "const1p9_c256", "step": 175000, "detok_genppl": 14.922162281776664, "sample_entropy": 0.8953066809266682, "distinct_2": 0.06594488188976377, "top_token_mass": 0.535888671875, "temp_start": 1.9, "temp_end": 1.9, "temp_schedule": "const", "concentration_max": 256.0, "update": "resample"}
5
+ [decode] linear_2p2_to_1p0_c256
6
+ [summary] {"name": "linear_2p2_to_1p0_c256", "step": 175000, "detok_genppl": 58.24675350743373, "sample_entropy": 4.1634674784481325, "distinct_2": 0.7332677165354331, "top_token_mass": 0.0516357421875, "temp_start": 2.2, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
7
+ [decode] linear_2p2_to_0p8_c256
8
+ [summary] {"name": "linear_2p2_to_0p8_c256", "step": 175000, "detok_genppl": 56.400704956455975, "sample_entropy": 4.217845318082235, "distinct_2": 0.7203494094488189, "top_token_mass": 0.0545654296875, "temp_start": 2.2, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
9
+ [decode] linear_2p4_to_1p0_c256
10
+ [summary] {"name": "linear_2p4_to_1p0_c256", "step": 175000, "detok_genppl": 64.6726820413709, "sample_entropy": 3.974846120158277, "distinct_2": 0.68626968503937, "top_token_mass": 0.075439453125, "temp_start": 2.4, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
11
+ [decode] linear_2p4_to_0p8_c256
12
+ [summary] {"name": "linear_2p4_to_0p8_c256", "step": 175000, "detok_genppl": 58.89225325764547, "sample_entropy": 4.186939088090959, "distinct_2": 0.7390501968503937, "top_token_mass": 0.0535888671875, "temp_start": 2.4, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
13
+ [decode] linear_2p6_to_1p0_c256
14
+ [summary] {"name": "linear_2p6_to_1p0_c256", "step": 175000, "detok_genppl": 93.22654001417575, "sample_entropy": 3.5502153622269423, "distinct_2": 0.5669291338582677, "top_token_mass": 0.0662841796875, "temp_start": 2.6, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
15
+ [decode] linear_2p6_to_0p8_c256
16
+ [summary] {"name": "linear_2p6_to_0p8_c256", "step": 175000, "detok_genppl": 67.44367329956249, "sample_entropy": 4.013322546727759, "distinct_2": 0.6983267716535433, "top_token_mass": 0.062744140625, "temp_start": 2.6, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
17
+ [decode] cosine_2p2_to_1p0_c256
18
+ [summary] {"name": "cosine_2p2_to_1p0_c256", "step": 175000, "detok_genppl": 59.13250534161515, "sample_entropy": 4.156997882351534, "distinct_2": 0.7245324803149606, "top_token_mass": 0.0587158203125, "temp_start": 2.2, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
19
+ [decode] cosine_2p2_to_0p8_c256
20
+ [summary] {"name": "cosine_2p2_to_0p8_c256", "step": 175000, "detok_genppl": 53.079358293412945, "sample_entropy": 4.194362495717261, "distinct_2": 0.71751968503937, "top_token_mass": 0.057373046875, "temp_start": 2.2, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
21
+ [decode] cosine_2p4_to_1p0_c256
22
+ [summary] {"name": "cosine_2p4_to_1p0_c256", "step": 175000, "detok_genppl": 67.63405924691095, "sample_entropy": 3.9118554001672514, "distinct_2": 0.6654773622047244, "top_token_mass": 0.061767578125, "temp_start": 2.4, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
23
+ [decode] cosine_2p4_to_0p8_c256
24
+ [summary] {"name": "cosine_2p4_to_0p8_c256", "step": 175000, "detok_genppl": 59.268749670223364, "sample_entropy": 4.142414604309686, "distinct_2": 0.7123523622047244, "top_token_mass": 0.0567626953125, "temp_start": 2.4, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
25
+ [decode] cosine_2p6_to_1p0_c256
26
+ [summary] {"name": "cosine_2p6_to_1p0_c256", "step": 175000, "detok_genppl": 81.90252773757335, "sample_entropy": 3.697331724362402, "distinct_2": 0.609744094488189, "top_token_mass": 0.0743408203125, "temp_start": 2.6, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
27
+ [decode] cosine_2p6_to_0p8_c256
28
+ [summary] {"name": "cosine_2p6_to_0p8_c256", "step": 175000, "detok_genppl": 66.93942012291302, "sample_entropy": 4.024862423606699, "distinct_2": 0.702632874015748, "top_token_mass": 0.060791015625, "temp_start": 2.6, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
29
+ [decode] const1p7_c1024
30
+ [summary] {"name": "const1p7_c1024", "step": 175000, "detok_genppl": 24.95490342565739, "sample_entropy": 2.0072551200432747, "distinct_2": 0.19266732283464566, "top_token_mass": 0.1678466796875, "temp_start": 1.7, "temp_end": 1.7, "temp_schedule": "const", "concentration_max": 1024.0, "update": "resample"}
31
+ [decode] const1p9_c1024
32
+ [summary] {"name": "const1p9_c1024", "step": 175000, "detok_genppl": 3.385201792360027, "sample_entropy": 0.2811448039592659, "distinct_2": 0.01439468503937008, "top_token_mass": 0.6204833984375, "temp_start": 1.9, "temp_end": 1.9, "temp_schedule": "const", "concentration_max": 1024.0, "update": "resample"}
33
+ [decode] linear_2p2_to_1p0_c1024
34
+ [summary] {"name": "linear_2p2_to_1p0_c1024", "step": 175000, "detok_genppl": 40.143828038381145, "sample_entropy": 3.0747068927734635, "distinct_2": 0.40760334645669294, "top_token_mass": 0.1370849609375, "temp_start": 2.2, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
35
+ [decode] linear_2p2_to_0p8_c1024
36
+ [summary] {"name": "linear_2p2_to_0p8_c1024", "step": 175000, "detok_genppl": 46.983933012582675, "sample_entropy": 3.9660751510643766, "distinct_2": 0.6374261811023622, "top_token_mass": 0.0643310546875, "temp_start": 2.2, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
37
+ [decode] linear_2p4_to_1p0_c1024
38
+ [summary] {"name": "linear_2p4_to_1p0_c1024", "step": 175000, "detok_genppl": 36.51915972403966, "sample_entropy": 2.188378572202313, "distinct_2": 0.17679625984251968, "top_token_mass": 0.1708984375, "temp_start": 2.4, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
39
+ [decode] linear_2p4_to_0p8_c1024
40
+ [summary] {"name": "linear_2p4_to_0p8_c1024", "step": 175000, "detok_genppl": 42.03332370225938, "sample_entropy": 2.927752437640226, "distinct_2": 0.37893700787401574, "top_token_mass": 0.14404296875, "temp_start": 2.4, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
41
+ [decode] linear_2p6_to_1p0_c1024
42
+ [summary] {"name": "linear_2p6_to_1p0_c1024", "step": 175000, "detok_genppl": 56.54484225723178, "sample_entropy": 1.6456302306471402, "distinct_2": 0.06975885826771654, "top_token_mass": 0.28125, "temp_start": 2.6, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
43
+ [decode] linear_2p6_to_0p8_c1024
44
+ [summary] {"name": "linear_2p6_to_0p8_c1024", "step": 175000, "detok_genppl": 39.52693715768492, "sample_entropy": 2.094969797494312, "distinct_2": 0.1622785433070866, "top_token_mass": 0.1690673828125, "temp_start": 2.6, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
45
+ [decode] cosine_2p2_to_1p0_c1024
46
+ [summary] {"name": "cosine_2p2_to_1p0_c1024", "step": 175000, "detok_genppl": 40.03004349797324, "sample_entropy": 2.982592103912474, "distinct_2": 0.38250492125984253, "top_token_mass": 0.1568603515625, "temp_start": 2.2, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
47
+ [decode] cosine_2p2_to_0p8_c1024
48
+ [summary] {"name": "cosine_2p2_to_0p8_c1024", "step": 175000, "detok_genppl": 48.996002380359144, "sample_entropy": 3.7614902426841117, "distinct_2": 0.578986220472441, "top_token_mass": 0.092529296875, "temp_start": 2.2, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
49
+ [decode] cosine_2p4_to_1p0_c1024
50
+ [summary] {"name": "cosine_2p4_to_1p0_c1024", "step": 175000, "detok_genppl": 39.082045935365684, "sample_entropy": 2.2156439771111716, "distinct_2": 0.16818405511811024, "top_token_mass": 0.183349609375, "temp_start": 2.4, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
51
+ [decode] cosine_2p4_to_0p8_c1024
52
+ [summary] {"name": "cosine_2p4_to_0p8_c1024", "step": 175000, "detok_genppl": 38.684306933753554, "sample_entropy": 2.9657303464806444, "distinct_2": 0.374753937007874, "top_token_mass": 0.1798095703125, "temp_start": 2.4, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
53
+ [decode] cosine_2p6_to_1p0_c1024
54
+ [summary] {"name": "cosine_2p6_to_1p0_c1024", "step": 175000, "detok_genppl": 41.033074923366925, "sample_entropy": 1.7315247008660806, "distinct_2": 0.08476870078740158, "top_token_mass": 0.225341796875, "temp_start": 2.6, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
55
+ [decode] cosine_2p6_to_0p8_c1024
56
+ [summary] {"name": "cosine_2p6_to_0p8_c1024", "step": 175000, "detok_genppl": 39.47164351744778, "sample_entropy": 2.30810961807286, "distinct_2": 0.1984498031496063, "top_token_mass": 0.1820068359375, "temp_start": 2.6, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/lm1b_8gpu_latest_temp_schedule_128steps_n64.log ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [decode] const1p45_c256
2
+ [summary] {"name": "const1p45_c256", "step": 168000, "detok_genppl": 46.167692609958465, "sample_entropy": 4.203627610836947, "distinct_2": 0.7084153543307087, "top_token_mass": 0.0589599609375, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "concentration_max": 256.0, "update": "resample"}
3
+ [decode] linear_2p0_to_1p0_c256
4
+ [summary] {"name": "linear_2p0_to_1p0_c256", "step": 168000, "detok_genppl": 57.89282886876284, "sample_entropy": 4.230719319713245, "distinct_2": 0.7317913385826772, "top_token_mass": 0.0478515625, "temp_start": 2.0, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
5
+ [decode] linear_2p0_to_0p8_c256
6
+ [summary] {"name": "linear_2p0_to_0p8_c256", "step": 168000, "detok_genppl": 50.33995014345255, "sample_entropy": 4.20795476641468, "distinct_2": 0.7055856299212598, "top_token_mass": 0.0501708984375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
7
+ [decode] linear_1p8_to_0p8_c256
8
+ [summary] {"name": "linear_1p8_to_0p8_c256", "step": 168000, "detok_genppl": 42.701768026370516, "sample_entropy": 4.1504656186868445, "distinct_2": 0.6781496062992126, "top_token_mass": 0.0665283203125, "temp_start": 1.8, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 256.0, "update": "resample"}
9
+ [decode] cosine_2p0_to_1p0_c256
10
+ [summary] {"name": "cosine_2p0_to_1p0_c256", "step": 168000, "detok_genppl": 57.69433042345068, "sample_entropy": 4.217734038811499, "distinct_2": 0.7210875984251969, "top_token_mass": 0.0491943359375, "temp_start": 2.0, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
11
+ [decode] cosine_2p0_to_0p8_c256
12
+ [summary] {"name": "cosine_2p0_to_0p8_c256", "step": 168000, "detok_genppl": 52.61771836855152, "sample_entropy": 4.229658640820065, "distinct_2": 0.7153051181102362, "top_token_mass": 0.052490234375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
13
+ [decode] cosine_1p8_to_0p8_c256
14
+ [summary] {"name": "cosine_1p8_to_0p8_c256", "step": 168000, "detok_genppl": 44.48490287414872, "sample_entropy": 4.189465390460094, "distinct_2": 0.6977116141732284, "top_token_mass": 0.0614013671875, "temp_start": 1.8, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 256.0, "update": "resample"}
15
+ [decode] late_2p0_to_0p8_c256
16
+ [summary] {"name": "late_2p0_to_0p8_c256", "step": 168000, "detok_genppl": 76.25641195724324, "sample_entropy": 4.119732817656021, "distinct_2": 0.6507135826771654, "top_token_mass": 0.050537109375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "late", "concentration_max": 256.0, "update": "resample"}
17
+ [decode] const1p45_c1024
18
+ [summary] {"name": "const1p45_c1024", "step": 168000, "detok_genppl": 44.64688583445828, "sample_entropy": 4.190344770313543, "distinct_2": 0.6918061023622047, "top_token_mass": 0.0584716796875, "temp_start": 1.45, "temp_end": 1.45, "temp_schedule": "const", "concentration_max": 1024.0, "update": "resample"}
19
+ [decode] linear_2p0_to_1p0_c1024
20
+ [summary] {"name": "linear_2p0_to_1p0_c1024", "step": 168000, "detok_genppl": 67.09372257056255, "sample_entropy": 4.212984973676147, "distinct_2": 0.6530511811023622, "top_token_mass": 0.0494384765625, "temp_start": 2.0, "temp_end": 1.0, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
21
+ [decode] linear_2p0_to_0p8_c1024
22
+ [summary] {"name": "linear_2p0_to_0p8_c1024", "step": 168000, "detok_genppl": 59.49218961431751, "sample_entropy": 4.231511210178493, "distinct_2": 0.7090305118110236, "top_token_mass": 0.051025390625, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
23
+ [decode] linear_1p8_to_0p8_c1024
24
+ [summary] {"name": "linear_1p8_to_0p8_c1024", "step": 168000, "detok_genppl": 44.10530348094567, "sample_entropy": 4.16083612008407, "distinct_2": 0.6830708661417323, "top_token_mass": 0.0643310546875, "temp_start": 1.8, "temp_end": 0.8, "temp_schedule": "linear", "concentration_max": 1024.0, "update": "resample"}
25
+ [decode] cosine_2p0_to_1p0_c1024
26
+ [summary] {"name": "cosine_2p0_to_1p0_c1024", "step": 168000, "detok_genppl": 69.45955054398952, "sample_entropy": 4.014714437300709, "distinct_2": 0.59251968503937, "top_token_mass": 0.058837890625, "temp_start": 2.0, "temp_end": 1.0, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
27
+ [decode] cosine_2p0_to_0p8_c1024
28
+ [summary] {"name": "cosine_2p0_to_0p8_c1024", "step": 168000, "detok_genppl": 61.62004678653673, "sample_entropy": 4.226612023722781, "distinct_2": 0.68873031496063, "top_token_mass": 0.048095703125, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
29
+ [decode] cosine_1p8_to_0p8_c1024
30
+ [summary] {"name": "cosine_1p8_to_0p8_c1024", "step": 168000, "detok_genppl": 49.414174907327684, "sample_entropy": 4.191741629686525, "distinct_2": 0.6957431102362205, "top_token_mass": 0.05908203125, "temp_start": 1.8, "temp_end": 0.8, "temp_schedule": "cosine", "concentration_max": 1024.0, "update": "resample"}
31
+ [decode] late_2p0_to_0p8_c1024
32
+ [summary] {"name": "late_2p0_to_0p8_c1024", "step": 168000, "detok_genppl": 30.292676715795903, "sample_entropy": 1.950918831735721, "distinct_2": 0.21345964566929135, "top_token_mass": 0.4271240234375, "temp_start": 2.0, "temp_end": 0.8, "temp_schedule": "late", "concentration_max": 1024.0, "update": "resample"}
LTA_openwebtext_dualt/logs/eval_20260508/mauve_step124k_n64_features.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loaded s64_c256 64
2
+ loaded s64_c1024 64
3
+ loaded s128_c256 64
4
+ loaded s128_c1024 64
5
+ loaded s512_c256 64
6
+ loaded s512_c1024 64
7
+ feat gen_s64_c256_raw (64, 1280) time 1.0
8
+ feat gen_s64_c256_detok (64, 1280) time 0.4
9
+ feat gen_s64_c1024_raw (64, 1280) time 0.6
10
+ feat gen_s64_c1024_detok (64, 1280) time 0.5
11
+ feat gen_s128_c256_raw (64, 1280) time 0.5
12
+ feat gen_s128_c256_detok (64, 1280) time 0.5
13
+ feat gen_s128_c1024_raw (64, 1280) time 0.6
14
+ feat gen_s128_c1024_detok (64, 1280) time 0.4
15
+ feat gen_s512_c256_raw (64, 1280) time 0.6
16
+ feat gen_s512_c256_detok (64, 1280) time 0.5
17
+ feat gen_s512_c1024_raw (64, 1280) time 0.5
18
+ feat gen_s512_c1024_detok (64, 1280) time 0.5
19
+ feat ref_raw (64, 1280) time 0.6
20
+ feat ref_detok (64, 1280) time 0.5
21
+ DONE docs/lta_samples/metrics_20260508/mauve_step124k_n64_features.npz docs/lta_samples/metrics_20260508/mauve_step124k_n64_meta.json
LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_dirres_n16_s256_20260514_023314.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ [ckpt] runs/lta_owt_gpt2cached_len1024_selfcond_p05_rollout1_autocastfix_c1024_ddit768x12_muon_ema_gbs512_4gpu_50k_20260514_005426/step_0001000.pt step=1000
2
+ [decode-base] n=16 max_len=1024 steps=256 model_t=flow
3
+ [decode] temp=1.30 final=state rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 4/16
4
+ [decode] temp=1.30 final=state rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 8/16
5
+ [decode] temp=1.30 final=state rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 12/16
LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_online_dirres_n16_s256.log ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [ckpt] runs/lta_owt_gpt2cached_len1024_selfcond_p05_rollout1_autocastfix_c1024_ddit768x12_muon_ema_gbs512_4gpu_50k_20260514_005426/step_0001000.pt step=1000
2
+ [decode-base] n=16 max_len=1024 steps=256 model_t=flow
3
+ [decode] temp=1.30 final=state rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 4/16
4
+ [decode] temp=1.30 final=state rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 8/16
LTA_openwebtext_dualt/logs/eval_selfcond/selfcond_step1000_online_dirres_n8_s128_smoke.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ [ckpt] runs/lta_owt_gpt2cached_len1024_selfcond_p05_rollout1_autocastfix_c1024_ddit768x12_muon_ema_gbs512_4gpu_50k_20260514_005426/step_0001000.pt step=1000
2
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
3
+ [decode] temp=1.45 final=blend rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 4/8
4
+ [decode] temp=1.45 final=blend rule=dirichlet_resample support=1 semantic=1.5 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise generated 8/8
5
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_gpt2cached_len1024_selfcond_p05_rollout1_autocastfix_c1024_ddit768x12_muon_ema_gbs512_4gpu_50k_20260514_005426/step_0001000.pt", "step": 1000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.5, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "blend", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260514}, "raw_genppl": {"ppl": 5.401778570796268, "nll_per_token": 1.6867282643037684, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 4.281976989862169, "nll_per_token": 1.4544148164636947, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 0.8891367845472721, "unique_tokens": 8, "token_count": 8192, "distinct_1": 0.0009765625, "distinct_2": 0.0050097751710654935, "top_token_mass": 0.750732421875}}
6
+ [done] docs/lta_samples/metrics_20260514/selfcond_step1000_quick/selfcond_step1000_online_dirres_n8_s128_smoke.jsonl
LTA_openwebtext_dualt/logs/fullycoupled_tpow2_wd0p1_fp32_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_tpow2_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260515_003246.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/fullycoupled_uniform_mask1_swiglu_wd0p1_fp32_4gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638.log ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8734897",
7
+ "vocab_size": 50257,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638",
10
+ "batch_size": 32,
11
+ "grad_accum": 4,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "cosine",
15
+ "optimizer": "adamw",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 17061,
18
+ "total_steps": 1000000,
19
+ "warmup_steps": 2000,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 6e-05,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "",
35
+ "muon_param_count": 0,
36
+ "muon_adam_param_count": 0,
37
+ "muon_param_names": [],
38
+ "muon_adam_param_names": [],
39
+ "muon_effective_nesterov": false,
40
+ "muon_effective_width_scale": false,
41
+ "muon_effective_weight_decay": 0.1,
42
+ "muon_adam_fallback_nesterov": false,
43
+ "muon_adam_fallback_weight_decay": 0.1,
44
+ "ema_decay": 0.0,
45
+ "ema_start_step": 0,
46
+ "model_type": "ddit",
47
+ "ddit_mlp_type": "swiglu",
48
+ "elf_num_time_tokens": 4,
49
+ "elf_num_model_mode_tokens": 0,
50
+ "qk_norm": true,
51
+ "output_bias": false,
52
+ "output_init_std": -1.0,
53
+ "norm_type": "rmsnorm",
54
+ "target_loss": "hard_ce",
55
+ "linear_soft_target_power": 1.0,
56
+ "linear_soft_target_min_conf": 0.0,
57
+ "linear_soft_target_max_conf": 1.0,
58
+ "t_sampling_mode": "uniform",
59
+ "t_sampling_power": 1.0,
60
+ "t_sampling_eps": 0.0001,
61
+ "t_sampling_logit_mean": -0.22,
62
+ "t_sampling_logit_std": 0.5,
63
+ "dual_t": true,
64
+ "corrupt_t_mode": "same",
65
+ "corrupt_min_t": 0.0,
66
+ "corrupt_max_t": 1.0,
67
+ "prefix_block_prob": 0.0,
68
+ "prefix_block_len": 128,
69
+ "mask_ratio_floor_schedule": "none",
70
+ "dirichlet_endpoint_mode": "categorical_dual_t",
71
+ "dirichlet_semantic_t_mode": "same",
72
+ "dirichlet_semantic_t_value": 0.0,
73
+ "dirichlet_semantic_t_curve": "linear",
74
+ "dirichlet_semantic_t_power": 1.0,
75
+ "endpoint_sequence_random_prob_alpha": 0.0,
76
+ "categorical_wrong_from_full_vocab": true,
77
+ "categorical_wrong_from_batch_valid_tokens": false,
78
+ "categorical_wrong_basin_token_ids": "",
79
+ "categorical_wrong_basin_prob": 0.0,
80
+ "categorical_wrong_unigram_prob": 0.0,
81
+ "categorical_wrong_uniform_prob": 0.0,
82
+ "categorical_wrong_corpus_unigram_path": "",
83
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
84
+ "categorical_wrong_basin_shared_prob": 0.0,
85
+ "categorical_wrong_unigram_shared_prob": 0.0,
86
+ "mask_mixture_original_prob": 0.0,
87
+ "mask_mixture_lowk_prob": 0.0,
88
+ "mask_mixture_lowcorrupt_prob": 0.0,
89
+ "mask_mixture_block_prob": 0.0,
90
+ "mask_mixture_all_prob": 0.0,
91
+ "mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
92
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
93
+ "mask_mixture_block_tokens": "64,128",
94
+ "simplex_bridge_sampler": "dirichlet",
95
+ "logistic_normal_sigma_min": 0.18,
96
+ "logistic_normal_sigma_max": 2.2,
97
+ "logistic_normal_tau_min": 0.65,
98
+ "logistic_normal_tau_max": 1.15,
99
+ "torch_compile": false,
100
+ "compile_mode": "max-autotune",
101
+ "state_format": "prob",
102
+ "meanflow_weight": 0.0,
103
+ "rollout_train_prob": 0.0,
104
+ "rollout_train_steps": 1,
105
+ "rollout_train_infer_steps": 64,
106
+ "rollout_train_temp": 1.45,
107
+ "rollout_train_max_gamma": 1.0,
108
+ "rollout_train_corrupt_only": true,
109
+ "rollout_train_samplewise": false,
110
+ "rollout_train_compute_always": false,
111
+ "bridge_noise_init": "logistic_normal",
112
+ "noise_sigma": -1.0,
113
+ "allow_tf32": true,
114
+ "activation_checkpointing": false,
115
+ "activation_checkpoint_interval": 1,
116
+ "activation_checkpoint_scope": "block",
117
+ "ddp_static_graph": false,
118
+ "ddp_gradient_as_bucket_view": true,
119
+ "blocking_data_transfer": false,
120
+ "dataloader_prefetch_factor": 4,
121
+ "full_train_stats": false,
122
+ "tokenized_hf": false,
123
+ "tokenized_pad_token": "pad",
124
+ "elf_conditional_hf": false,
125
+ "record_pad_truncate": false,
126
+ "record_add_eos": false,
127
+ "record_add_special_tokens": false,
128
+ "record_pad_token": "pad",
129
+ "record_shuffle_buffer": 10000,
130
+ "wrap": true,
131
+ "wrap_mode": "stream",
132
+ "wrap_record_buffer_size": 200,
133
+ "owt_cached_chunks": true,
134
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k",
135
+ "owt_chunk_cache_rebuild": false,
136
+ "owt_chunk_cache_write_batch": 4096,
137
+ "owt_exact_repeat_per_chunk": 0,
138
+ "online_chunk_shuffle": false,
139
+ "online_chunk_shuffle_buffer": 10000,
140
+ "openwebtext_split": "train_minus_100k",
141
+ "detokenizer": "auto",
142
+ "resolved_detokenizer": null,
143
+ "num_workers": 8,
144
+ "latest_every": 1000,
145
+ "resume_path": ""
146
+ }
147
+ step=50 epoch=1/59 epoch_step=50/17061 micro_steps=200 elapsed=388.8s lr=1.530000e-05 loss=10.7861 loss_recon=10.7861 loss_meanflow=0.0000 mean_model_t=0.4957 mean_corrupt_t=0.4957 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4577 corrupt_frac=1.0000 acc_corrupt=0.4577 loss_corrupt=10.7861 wrong_frac=0.5041 init_acc_corrupt=0.4610 acc_corrupt_t_0p0_0p2=0.0278 corrupt_frac_t_0p0_0p2=0.2087 acc_corrupt_t_0p2_0p4=0.2440 corrupt_frac_t_0p2_0p4=0.1978 acc_corrupt_t_0p4_0p6=0.4847 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.6804 corrupt_frac_t_0p6_0p8=0.1936 acc_corrupt_t_0p8_1p0=0.8739 corrupt_frac_t_0p8_1p0=0.2008 out_w_norm=0.4686 out_g_norm=0.7110 loss_all=10.6822 init_gold_top10=0.5175 init_gold_top100=0.5356
148
+ step=100 epoch=1/59 epoch_step=100/17061 micro_steps=400 elapsed=426.5s lr=3.030000e-05 loss=10.2172 loss_recon=10.2172 loss_meanflow=0.0000 mean_model_t=0.4967 mean_corrupt_t=0.4967 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2252 corrupt_frac=1.0000 acc_corrupt=0.2252 loss_corrupt=10.2172 wrong_frac=0.5029 init_acc_corrupt=0.4624 acc_corrupt_t_0p0_0p2=0.0406 corrupt_frac_t_0p0_0p2=0.1973 acc_corrupt_t_0p2_0p4=0.0829 corrupt_frac_t_0p2_0p4=0.2069 acc_corrupt_t_0p4_0p6=0.1761 corrupt_frac_t_0p4_0p6=0.2028 acc_corrupt_t_0p6_0p8=0.3049 corrupt_frac_t_0p6_0p8=0.1972 acc_corrupt_t_0p8_1p0=0.5321 corrupt_frac_t_0p8_1p0=0.1958 out_w_norm=4.1655 out_g_norm=1.4201 loss_all=9.6351 init_gold_top10=0.5489 init_gold_top100=0.5677
149
+ step=150 epoch=1/59 epoch_step=150/17061 micro_steps=600 elapsed=454.5s lr=4.530000e-05 loss=8.9059 loss_recon=8.9059 loss_meanflow=0.0000 mean_model_t=0.4954 mean_corrupt_t=0.4954 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1046 corrupt_frac=1.0000 acc_corrupt=0.1046 loss_corrupt=8.9059 wrong_frac=0.5044 init_acc_corrupt=0.4602 acc_corrupt_t_0p0_0p2=0.0359 corrupt_frac_t_0p0_0p2=0.2023 acc_corrupt_t_0p2_0p4=0.0433 corrupt_frac_t_0p2_0p4=0.2061 acc_corrupt_t_0p4_0p6=0.0754 corrupt_frac_t_0p4_0p6=0.2009 acc_corrupt_t_0p6_0p8=0.1354 corrupt_frac_t_0p6_0p8=0.1908 acc_corrupt_t_0p8_1p0=0.2374 corrupt_frac_t_0p8_1p0=0.1998 out_w_norm=12.0903 out_g_norm=1.5895 loss_all=8.1304 init_gold_top10=0.5795 init_gold_top100=0.6040
150
+ step=200 epoch=1/59 epoch_step=200/17061 micro_steps=800 elapsed=405.5s lr=6.030000e-05 loss=7.5686 loss_recon=7.5686 loss_meanflow=0.0000 mean_model_t=0.4978 mean_corrupt_t=0.4978 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0820 corrupt_frac=1.0000 acc_corrupt=0.0820 loss_corrupt=7.5686 wrong_frac=0.5022 init_acc_corrupt=0.4627 acc_corrupt_t_0p0_0p2=0.0357 corrupt_frac_t_0p0_0p2=0.2045 acc_corrupt_t_0p2_0p4=0.0444 corrupt_frac_t_0p2_0p4=0.1994 acc_corrupt_t_0p4_0p6=0.0623 corrupt_frac_t_0p4_0p6=0.2011 acc_corrupt_t_0p6_0p8=0.1070 corrupt_frac_t_0p6_0p8=0.1956 acc_corrupt_t_0p8_1p0=0.1626 corrupt_frac_t_0p8_1p0=0.1994 out_w_norm=21.3144 out_g_norm=1.2391 loss_all=7.0871 init_gold_top10=0.4736 init_gold_top100=0.5074
151
+ step=250 epoch=1/59 epoch_step=250/17061 micro_steps=1000 elapsed=280.9s lr=7.530000e-05 loss=6.2367 loss_recon=6.2367 loss_meanflow=0.0000 mean_model_t=0.5006 mean_corrupt_t=0.5006 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2156 corrupt_frac=1.0000 acc_corrupt=0.2156 loss_corrupt=6.2367 wrong_frac=0.4997 init_acc_corrupt=0.4660 acc_corrupt_t_0p0_0p2=0.0445 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.1035 corrupt_frac_t_0p2_0p4=0.2019 acc_corrupt_t_0p4_0p6=0.2063 corrupt_frac_t_0p4_0p6=0.1980 acc_corrupt_t_0p6_0p8=0.3031 corrupt_frac_t_0p6_0p8=0.2026 acc_corrupt_t_0p8_1p0=0.4207 corrupt_frac_t_0p8_1p0=0.1995 out_w_norm=30.2056 out_g_norm=0.5883 loss_all=4.9413 init_gold_top10=0.5498 init_gold_top100=0.5749
152
+ step=300 epoch=1/59 epoch_step=300/17061 micro_steps=1200 elapsed=280.4s lr=9.030000e-05 loss=4.7166 loss_recon=4.7166 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4289 corrupt_frac=1.0000 acc_corrupt=0.4289 loss_corrupt=4.7166 wrong_frac=0.5015 init_acc_corrupt=0.4639 acc_corrupt_t_0p0_0p2=0.0575 corrupt_frac_t_0p0_0p2=0.1995 acc_corrupt_t_0p2_0p4=0.2283 corrupt_frac_t_0p2_0p4=0.2008 acc_corrupt_t_0p4_0p6=0.4425 corrupt_frac_t_0p4_0p6=0.2034 acc_corrupt_t_0p6_0p8=0.6262 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.7964 corrupt_frac_t_0p8_1p0=0.1961 out_w_norm=38.0298 out_g_norm=0.2246 loss_all=4.2610 init_gold_top10=0.5237 init_gold_top100=0.5499
153
+ step=350 epoch=1/59 epoch_step=350/17061 micro_steps=1400 elapsed=330.1s lr=1.053000e-04 loss=4.3317 loss_recon=4.3317 loss_meanflow=0.0000 mean_model_t=0.5043 mean_corrupt_t=0.5043 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4660 corrupt_frac=1.0000 acc_corrupt=0.4660 loss_corrupt=4.3317 wrong_frac=0.4958 init_acc_corrupt=0.4701 acc_corrupt_t_0p0_0p2=0.0577 corrupt_frac_t_0p0_0p2=0.1980 acc_corrupt_t_0p2_0p4=0.2453 corrupt_frac_t_0p2_0p4=0.1964 acc_corrupt_t_0p4_0p6=0.4762 corrupt_frac_t_0p4_0p6=0.1986 acc_corrupt_t_0p6_0p8=0.6668 corrupt_frac_t_0p6_0p8=0.2000 acc_corrupt_t_0p8_1p0=0.8604 corrupt_frac_t_0p8_1p0=0.2090 out_w_norm=42.6140 out_g_norm=0.2474 loss_all=5.1987 init_gold_top10=0.3885 init_gold_top100=0.4312
154
+ step=400 epoch=1/59 epoch_step=400/17061 micro_steps=1600 elapsed=285.2s lr=1.203000e-04 loss=4.2408 loss_recon=4.2408 loss_meanflow=0.0000 mean_model_t=0.5011 mean_corrupt_t=0.5011 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4764 corrupt_frac=1.0000 acc_corrupt=0.4764 loss_corrupt=4.2408 wrong_frac=0.4990 init_acc_corrupt=0.4663 acc_corrupt_t_0p0_0p2=0.0571 corrupt_frac_t_0p0_0p2=0.1959 acc_corrupt_t_0p2_0p4=0.2487 corrupt_frac_t_0p2_0p4=0.2062 acc_corrupt_t_0p4_0p6=0.4926 corrupt_frac_t_0p4_0p6=0.1989 acc_corrupt_t_0p6_0p8=0.6913 corrupt_frac_t_0p6_0p8=0.1939 acc_corrupt_t_0p8_1p0=0.8873 corrupt_frac_t_0p8_1p0=0.2050 out_w_norm=45.7084 out_g_norm=0.2444 loss_all=4.2385 init_gold_top10=0.4913 init_gold_top100=0.5316
155
+ step=450 epoch=1/59 epoch_step=450/17061 micro_steps=1800 elapsed=277.1s lr=1.353000e-04 loss=4.1617 loss_recon=4.1617 loss_meanflow=0.0000 mean_model_t=0.5048 mean_corrupt_t=0.5048 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4857 corrupt_frac=1.0000 acc_corrupt=0.4857 loss_corrupt=4.1617 wrong_frac=0.4951 init_acc_corrupt=0.4704 acc_corrupt_t_0p0_0p2=0.0586 corrupt_frac_t_0p0_0p2=0.1971 acc_corrupt_t_0p2_0p4=0.2521 corrupt_frac_t_0p2_0p4=0.2003 acc_corrupt_t_0p4_0p6=0.4975 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.6955 corrupt_frac_t_0p6_0p8=0.1892 acc_corrupt_t_0p8_1p0=0.8958 corrupt_frac_t_0p8_1p0=0.2162 out_w_norm=47.9240 out_g_norm=0.2380 loss_all=3.6068 init_gold_top10=0.5674 init_gold_top100=0.5807
156
+ step=500 epoch=1/59 epoch_step=500/17061 micro_steps=2000 elapsed=300.5s lr=1.503000e-04 loss=4.1949 loss_recon=4.1949 loss_meanflow=0.0000 mean_model_t=0.4975 mean_corrupt_t=0.4975 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4796 corrupt_frac=1.0000 acc_corrupt=0.4796 loss_corrupt=4.1949 wrong_frac=0.5028 init_acc_corrupt=0.4625 acc_corrupt_t_0p0_0p2=0.0590 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.2582 corrupt_frac_t_0p2_0p4=0.2053 acc_corrupt_t_0p4_0p6=0.4971 corrupt_frac_t_0p4_0p6=0.1923 acc_corrupt_t_0p6_0p8=0.6952 corrupt_frac_t_0p6_0p8=0.2052 acc_corrupt_t_0p8_1p0=0.8972 corrupt_frac_t_0p8_1p0=0.1975 out_w_norm=49.5350 out_g_norm=0.2347 loss_all=3.9710 init_gold_top10=0.5273 init_gold_top100=0.5571
157
+ step=550 epoch=1/59 epoch_step=550/17061 micro_steps=2200 elapsed=287.1s lr=1.653000e-04 loss=4.1088 loss_recon=4.1088 loss_meanflow=0.0000 mean_model_t=0.5066 mean_corrupt_t=0.5066 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4899 corrupt_frac=1.0000 acc_corrupt=0.4899 loss_corrupt=4.1088 wrong_frac=0.4935 init_acc_corrupt=0.4726 acc_corrupt_t_0p0_0p2=0.0611 corrupt_frac_t_0p0_0p2=0.1905 acc_corrupt_t_0p2_0p4=0.2599 corrupt_frac_t_0p2_0p4=0.2028 acc_corrupt_t_0p4_0p6=0.4995 corrupt_frac_t_0p4_0p6=0.2010 acc_corrupt_t_0p6_0p8=0.7002 corrupt_frac_t_0p6_0p8=0.2051 acc_corrupt_t_0p8_1p0=0.9015 corrupt_frac_t_0p8_1p0=0.2027 out_w_norm=51.0302 out_g_norm=0.2278 loss_all=4.3138 init_gold_top10=0.4809 init_gold_top100=0.5140
158
+ step=600 epoch=1/59 epoch_step=600/17061 micro_steps=2400 elapsed=327.2s lr=1.803000e-04 loss=4.1122 loss_recon=4.1122 loss_meanflow=0.0000 mean_model_t=0.5025 mean_corrupt_t=0.5025 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4889 corrupt_frac=1.0000 acc_corrupt=0.4889 loss_corrupt=4.1122 wrong_frac=0.4977 init_acc_corrupt=0.4679 acc_corrupt_t_0p0_0p2=0.0605 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.2628 corrupt_frac_t_0p2_0p4=0.1970 acc_corrupt_t_0p4_0p6=0.5033 corrupt_frac_t_0p4_0p6=0.1941 acc_corrupt_t_0p6_0p8=0.7018 corrupt_frac_t_0p6_0p8=0.2026 acc_corrupt_t_0p8_1p0=0.8992 corrupt_frac_t_0p8_1p0=0.2078 out_w_norm=52.4291 out_g_norm=0.2511 loss_all=4.0709 init_gold_top10=0.4996 init_gold_top100=0.5313
159
+ step=650 epoch=1/59 epoch_step=650/17061 micro_steps=2600 elapsed=268.4s lr=1.953000e-04 loss=4.0279 loss_recon=4.0279 loss_meanflow=0.0000 mean_model_t=0.5036 mean_corrupt_t=0.5036 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4964 corrupt_frac=1.0000 acc_corrupt=0.4964 loss_corrupt=4.0279 wrong_frac=0.4964 init_acc_corrupt=0.4702 acc_corrupt_t_0p0_0p2=0.0627 corrupt_frac_t_0p0_0p2=0.1948 acc_corrupt_t_0p2_0p4=0.2751 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.5146 corrupt_frac_t_0p4_0p6=0.2028 acc_corrupt_t_0p6_0p8=0.7119 corrupt_frac_t_0p6_0p8=0.2009 acc_corrupt_t_0p8_1p0=0.9017 corrupt_frac_t_0p8_1p0=0.2017 out_w_norm=53.9562 out_g_norm=0.2470 loss_all=3.3140 init_gold_top10=0.5782 init_gold_top100=0.5980
160
+ step=700 epoch=1/59 epoch_step=700/17061 micro_steps=2800 elapsed=286.2s lr=2.103000e-04 loss=3.9270 loss_recon=3.9270 loss_meanflow=0.0000 mean_model_t=0.5033 mean_corrupt_t=0.5033 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5030 corrupt_frac=1.0000 acc_corrupt=0.5030 loss_corrupt=3.9270 wrong_frac=0.4966 init_acc_corrupt=0.4694 acc_corrupt_t_0p0_0p2=0.0662 corrupt_frac_t_0p0_0p2=0.1959 acc_corrupt_t_0p2_0p4=0.2824 corrupt_frac_t_0p2_0p4=0.1964 acc_corrupt_t_0p4_0p6=0.5242 corrupt_frac_t_0p4_0p6=0.2042 acc_corrupt_t_0p6_0p8=0.7141 corrupt_frac_t_0p6_0p8=0.1988 acc_corrupt_t_0p8_1p0=0.9067 corrupt_frac_t_0p8_1p0=0.2047 out_w_norm=55.6574 out_g_norm=0.2686 loss_all=3.8679 init_gold_top10=0.4979 init_gold_top100=0.5239
161
+ step=750 epoch=1/59 epoch_step=750/17061 micro_steps=3000 elapsed=268.1s lr=2.253000e-04 loss=3.7913 loss_recon=3.7913 loss_meanflow=0.0000 mean_model_t=0.5050 mean_corrupt_t=0.5050 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5137 corrupt_frac=1.0000 acc_corrupt=0.5137 loss_corrupt=3.7913 wrong_frac=0.4948 init_acc_corrupt=0.4710 acc_corrupt_t_0p0_0p2=0.0713 corrupt_frac_t_0p0_0p2=0.1955 acc_corrupt_t_0p2_0p4=0.2947 corrupt_frac_t_0p2_0p4=0.1966 acc_corrupt_t_0p4_0p6=0.5423 corrupt_frac_t_0p4_0p6=0.2014 acc_corrupt_t_0p6_0p8=0.7290 corrupt_frac_t_0p6_0p8=0.2086 acc_corrupt_t_0p8_1p0=0.9119 corrupt_frac_t_0p8_1p0=0.1980 out_w_norm=57.6091 out_g_norm=0.2684 loss_all=4.1275 init_gold_top10=0.4555 init_gold_top100=0.4853
162
+ step=800 epoch=1/59 epoch_step=800/17061 micro_steps=3200 elapsed=242.0s lr=2.403000e-04 loss=3.7452 loss_recon=3.7452 loss_meanflow=0.0000 mean_model_t=0.5002 mean_corrupt_t=0.5002 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5149 corrupt_frac=1.0000 acc_corrupt=0.5149 loss_corrupt=3.7452 wrong_frac=0.4996 init_acc_corrupt=0.4653 acc_corrupt_t_0p0_0p2=0.0719 corrupt_frac_t_0p0_0p2=0.2013 acc_corrupt_t_0p2_0p4=0.3060 corrupt_frac_t_0p2_0p4=0.2027 acc_corrupt_t_0p4_0p6=0.5500 corrupt_frac_t_0p4_0p6=0.1976 acc_corrupt_t_0p6_0p8=0.7398 corrupt_frac_t_0p6_0p8=0.2008 acc_corrupt_t_0p8_1p0=0.9144 corrupt_frac_t_0p8_1p0=0.1988 out_w_norm=59.5190 out_g_norm=0.2504 loss_all=3.3060 init_gold_top10=0.5482 init_gold_top100=0.5781
163
+ step=850 epoch=1/59 epoch_step=850/17061 micro_steps=3400 elapsed=221.9s lr=2.553000e-04 loss=3.6764 loss_recon=3.6764 loss_meanflow=0.0000 mean_model_t=0.5017 mean_corrupt_t=0.5017 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5207 corrupt_frac=1.0000 acc_corrupt=0.5207 loss_corrupt=3.6764 wrong_frac=0.4984 init_acc_corrupt=0.4672 acc_corrupt_t_0p0_0p2=0.0725 corrupt_frac_t_0p0_0p2=0.1967 acc_corrupt_t_0p2_0p4=0.3063 corrupt_frac_t_0p2_0p4=0.1995 acc_corrupt_t_0p4_0p6=0.5525 corrupt_frac_t_0p4_0p6=0.1994 acc_corrupt_t_0p6_0p8=0.7426 corrupt_frac_t_0p6_0p8=0.2030 acc_corrupt_t_0p8_1p0=0.9156 corrupt_frac_t_0p8_1p0=0.2014 out_w_norm=61.4423 out_g_norm=0.2535 loss_all=3.6387 init_gold_top10=0.4996 init_gold_top100=0.5321
LTA_openwebtext_dualt/logs/fullycoupled_uniform_mask1_swiglu_wd0p1_fp32_4gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638.outer.log ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [launch] method=owt_fullycoupled_adamw_wd0p1_nanogpt_fp32 host=di-20260411014000-djqhq time=2026-05-17T13:36:38+00:00
2
+ [launch] run_name=lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638
3
+ [launch] save_dir=runs/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638
4
+ [launch] log_file=logs/fullycoupled_uniform_mask1_swiglu_wd0p1_fp32_4gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638.log
5
+ [launch] data_path=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext
6
+ [launch] owt_cache=/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k
7
+ [launch] optimizer=adamw lr=6e-4 min_lr=6e-5 wd=0.1 output_wd=-1 param_groups=nanogpt ema=0.0
8
+ [launch] fp32=true bf16=false tf32=true norm_type=rmsnorm output_bias=false ddit_mlp_type=swiglu batch=512 per_gpu=32
9
+ [launch] loss_t_weight_mode=none loss_t_min_weight=0.0 loss_t_drop_below=0.2
10
+ [launch] target_loss=hard_ce t_sampling_mode=uniform t_sampling_logit_mean=-0.22 t_sampling_logit_std=0.5 t_sampling_power=1.0 t_sampling_eps=1e-4 mask_ratio=1.0->1.0
11
+ NCCL version 2.25.1+cuda12.8
12
+ {
13
+ "device": "cuda:0",
14
+ "rank": 0,
15
+ "world_size": 4,
16
+ "samples": "owt_cached_chunks:8734897",
17
+ "vocab_size": 50257,
18
+ "tokenizer_vocab_size": 50257,
19
+ "save_dir": "runs/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_swiglu_adamw_wd0p1_uniformt_hardce_mask1p0-1p0_nanogpt_fp32_ddit768x12_gbs512_4gpu_1m_20260517_133638",
20
+ "batch_size": 32,
21
+ "grad_accum": 4,
22
+ "effective_batch_size": 512,
23
+ "global_batch_size": 512,
24
+ "lr_schedule": "cosine",
25
+ "optimizer": "adamw",
26
+ "epochs": 0.0,
27
+ "steps_per_epoch": 17061,
28
+ "total_steps": 1000000,
29
+ "warmup_steps": 2000,
30
+ "warmup_epochs": -1.0,
31
+ "min_lr": 6e-05,
32
+ "weight_decay": 0.1,
33
+ "output_weight_decay": -1.0,
34
+ "adamw_param_groups": "nanogpt",
35
+ "adam_beta1": 0.9,
36
+ "adam_beta2": 0.95,
37
+ "adam_eps": 1e-08,
38
+ "muon_impl": "legacy",
39
+ "muon_momentum": 0.95,
40
+ "muon_ns_steps": 5,
41
+ "muon_update_scale": 1.0,
42
+ "muon_nesterov": false,
43
+ "muon_width_scale": false,
44
+ "muon_grouping": "",
45
+ "muon_param_count": 0,
46
+ "muon_adam_param_count": 0,
47
+ "muon_param_names": [],
48
+ "muon_adam_param_names": [],
49
+ "muon_effective_nesterov": false,
50
+ "muon_effective_width_scale": false,
51
+ "muon_effective_weight_decay": 0.1,
52
+ "muon_adam_fallback_nesterov": false,
53
+ "muon_adam_fallback_weight_decay": 0.1,
54
+ "ema_decay": 0.0,
55
+ "ema_start_step": 0,
56
+ "model_type": "ddit",
57
+ "ddit_mlp_type": "swiglu",
58
+ "elf_num_time_tokens": 4,
59
+ "elf_num_model_mode_tokens": 0,
60
+ "qk_norm": true,
61
+ "output_bias": false,
62
+ "output_init_std": -1.0,
63
+ "norm_type": "rmsnorm",
64
+ "target_loss": "hard_ce",
65
+ "linear_soft_target_power": 1.0,
66
+ "linear_soft_target_min_conf": 0.0,
67
+ "linear_soft_target_max_conf": 1.0,
68
+ "t_sampling_mode": "uniform",
69
+ "t_sampling_power": 1.0,
70
+ "t_sampling_eps": 0.0001,
71
+ "t_sampling_logit_mean": -0.22,
72
+ "t_sampling_logit_std": 0.5,
73
+ "dual_t": true,
74
+ "corrupt_t_mode": "same",
75
+ "corrupt_min_t": 0.0,
76
+ "corrupt_max_t": 1.0,
77
+ "prefix_block_prob": 0.0,
78
+ "prefix_block_len": 128,
79
+ "mask_ratio_floor_schedule": "none",
80
+ "dirichlet_endpoint_mode": "categorical_dual_t",
81
+ "dirichlet_semantic_t_mode": "same",
82
+ "dirichlet_semantic_t_value": 0.0,
83
+ "dirichlet_semantic_t_curve": "linear",
84
+ "dirichlet_semantic_t_power": 1.0,
85
+ "endpoint_sequence_random_prob_alpha": 0.0,
86
+ "categorical_wrong_from_full_vocab": true,
87
+ "categorical_wrong_from_batch_valid_tokens": false,
88
+ "categorical_wrong_basin_token_ids": "",
89
+ "categorical_wrong_basin_prob": 0.0,
90
+ "categorical_wrong_unigram_prob": 0.0,
91
+ "categorical_wrong_uniform_prob": 0.0,
92
+ "categorical_wrong_corpus_unigram_path": "",
93
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
94
+ "categorical_wrong_basin_shared_prob": 0.0,
95
+ "categorical_wrong_unigram_shared_prob": 0.0,
96
+ "mask_mixture_original_prob": 0.0,
97
+ "mask_mixture_lowk_prob": 0.0,
98
+ "mask_mixture_lowcorrupt_prob": 0.0,
99
+ "mask_mixture_block_prob": 0.0,
100
+ "mask_mixture_all_prob": 0.0,
101
+ "mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
102
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
103
+ "mask_mixture_block_tokens": "64,128",
104
+ "simplex_bridge_sampler": "dirichlet",
105
+ "logistic_normal_sigma_min": 0.18,
106
+ "logistic_normal_sigma_max": 2.2,
107
+ "logistic_normal_tau_min": 0.65,
108
+ "logistic_normal_tau_max": 1.15,
109
+ "torch_compile": false,
110
+ "compile_mode": "max-autotune",
111
+ "state_format": "prob",
112
+ "meanflow_weight": 0.0,
113
+ "rollout_train_prob": 0.0,
114
+ "rollout_train_steps": 1,
115
+ "rollout_train_infer_steps": 64,
116
+ "rollout_train_temp": 1.45,
117
+ "rollout_train_max_gamma": 1.0,
118
+ "rollout_train_corrupt_only": true,
119
+ "rollout_train_samplewise": false,
120
+ "rollout_train_compute_always": false,
121
+ "bridge_noise_init": "logistic_normal",
122
+ "noise_sigma": -1.0,
123
+ "allow_tf32": true,
124
+ "activation_checkpointing": false,
125
+ "activation_checkpoint_interval": 1,
126
+ "activation_checkpoint_scope": "block",
127
+ "ddp_static_graph": false,
128
+ "ddp_gradient_as_bucket_view": true,
129
+ "blocking_data_transfer": false,
130
+ "dataloader_prefetch_factor": 4,
131
+ "full_train_stats": false,
132
+ "tokenized_hf": false,
133
+ "tokenized_pad_token": "pad",
134
+ "elf_conditional_hf": false,
135
+ "record_pad_truncate": false,
136
+ "record_add_eos": false,
137
+ "record_add_special_tokens": false,
138
+ "record_pad_token": "pad",
139
+ "record_shuffle_buffer": 10000,
140
+ "wrap": true,
141
+ "wrap_mode": "stream",
142
+ "wrap_record_buffer_size": 200,
143
+ "owt_cached_chunks": true,
144
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k",
145
+ "owt_chunk_cache_rebuild": false,
146
+ "owt_chunk_cache_write_batch": 4096,
147
+ "owt_exact_repeat_per_chunk": 0,
148
+ "online_chunk_shuffle": false,
149
+ "online_chunk_shuffle_buffer": 10000,
150
+ "openwebtext_split": "train_minus_100k",
151
+ "detokenizer": "auto",
152
+ "resolved_detokenizer": null,
153
+ "num_workers": 8,
154
+ "latest_every": 1000,
155
+ "resume_path": ""
156
+ }
157
+ step=50 epoch=1/59 epoch_step=50/17061 micro_steps=200 elapsed=388.8s lr=1.530000e-05 loss=10.7861 loss_recon=10.7861 loss_meanflow=0.0000 mean_model_t=0.4957 mean_corrupt_t=0.4957 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4577 corrupt_frac=1.0000 acc_corrupt=0.4577 loss_corrupt=10.7861 wrong_frac=0.5041 init_acc_corrupt=0.4610 acc_corrupt_t_0p0_0p2=0.0278 corrupt_frac_t_0p0_0p2=0.2087 acc_corrupt_t_0p2_0p4=0.2440 corrupt_frac_t_0p2_0p4=0.1978 acc_corrupt_t_0p4_0p6=0.4847 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.6804 corrupt_frac_t_0p6_0p8=0.1936 acc_corrupt_t_0p8_1p0=0.8739 corrupt_frac_t_0p8_1p0=0.2008 out_w_norm=0.4686 out_g_norm=0.7110 loss_all=10.6822 init_gold_top10=0.5175 init_gold_top100=0.5356
158
+ step=100 epoch=1/59 epoch_step=100/17061 micro_steps=400 elapsed=426.5s lr=3.030000e-05 loss=10.2172 loss_recon=10.2172 loss_meanflow=0.0000 mean_model_t=0.4967 mean_corrupt_t=0.4967 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2252 corrupt_frac=1.0000 acc_corrupt=0.2252 loss_corrupt=10.2172 wrong_frac=0.5029 init_acc_corrupt=0.4624 acc_corrupt_t_0p0_0p2=0.0406 corrupt_frac_t_0p0_0p2=0.1973 acc_corrupt_t_0p2_0p4=0.0829 corrupt_frac_t_0p2_0p4=0.2069 acc_corrupt_t_0p4_0p6=0.1761 corrupt_frac_t_0p4_0p6=0.2028 acc_corrupt_t_0p6_0p8=0.3049 corrupt_frac_t_0p6_0p8=0.1972 acc_corrupt_t_0p8_1p0=0.5321 corrupt_frac_t_0p8_1p0=0.1958 out_w_norm=4.1655 out_g_norm=1.4201 loss_all=9.6351 init_gold_top10=0.5489 init_gold_top100=0.5677
159
+ step=150 epoch=1/59 epoch_step=150/17061 micro_steps=600 elapsed=454.5s lr=4.530000e-05 loss=8.9059 loss_recon=8.9059 loss_meanflow=0.0000 mean_model_t=0.4954 mean_corrupt_t=0.4954 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1046 corrupt_frac=1.0000 acc_corrupt=0.1046 loss_corrupt=8.9059 wrong_frac=0.5044 init_acc_corrupt=0.4602 acc_corrupt_t_0p0_0p2=0.0359 corrupt_frac_t_0p0_0p2=0.2023 acc_corrupt_t_0p2_0p4=0.0433 corrupt_frac_t_0p2_0p4=0.2061 acc_corrupt_t_0p4_0p6=0.0754 corrupt_frac_t_0p4_0p6=0.2009 acc_corrupt_t_0p6_0p8=0.1354 corrupt_frac_t_0p6_0p8=0.1908 acc_corrupt_t_0p8_1p0=0.2374 corrupt_frac_t_0p8_1p0=0.1998 out_w_norm=12.0903 out_g_norm=1.5895 loss_all=8.1304 init_gold_top10=0.5795 init_gold_top100=0.6040
160
+ step=200 epoch=1/59 epoch_step=200/17061 micro_steps=800 elapsed=405.5s lr=6.030000e-05 loss=7.5686 loss_recon=7.5686 loss_meanflow=0.0000 mean_model_t=0.4978 mean_corrupt_t=0.4978 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0820 corrupt_frac=1.0000 acc_corrupt=0.0820 loss_corrupt=7.5686 wrong_frac=0.5022 init_acc_corrupt=0.4627 acc_corrupt_t_0p0_0p2=0.0357 corrupt_frac_t_0p0_0p2=0.2045 acc_corrupt_t_0p2_0p4=0.0444 corrupt_frac_t_0p2_0p4=0.1994 acc_corrupt_t_0p4_0p6=0.0623 corrupt_frac_t_0p4_0p6=0.2011 acc_corrupt_t_0p6_0p8=0.1070 corrupt_frac_t_0p6_0p8=0.1956 acc_corrupt_t_0p8_1p0=0.1626 corrupt_frac_t_0p8_1p0=0.1994 out_w_norm=21.3144 out_g_norm=1.2391 loss_all=7.0871 init_gold_top10=0.4736 init_gold_top100=0.5074
161
+ step=250 epoch=1/59 epoch_step=250/17061 micro_steps=1000 elapsed=280.9s lr=7.530000e-05 loss=6.2367 loss_recon=6.2367 loss_meanflow=0.0000 mean_model_t=0.5006 mean_corrupt_t=0.5006 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2156 corrupt_frac=1.0000 acc_corrupt=0.2156 loss_corrupt=6.2367 wrong_frac=0.4997 init_acc_corrupt=0.4660 acc_corrupt_t_0p0_0p2=0.0445 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.1035 corrupt_frac_t_0p2_0p4=0.2019 acc_corrupt_t_0p4_0p6=0.2063 corrupt_frac_t_0p4_0p6=0.1980 acc_corrupt_t_0p6_0p8=0.3031 corrupt_frac_t_0p6_0p8=0.2026 acc_corrupt_t_0p8_1p0=0.4207 corrupt_frac_t_0p8_1p0=0.1995 out_w_norm=30.2056 out_g_norm=0.5883 loss_all=4.9413 init_gold_top10=0.5498 init_gold_top100=0.5749
162
+ step=300 epoch=1/59 epoch_step=300/17061 micro_steps=1200 elapsed=280.4s lr=9.030000e-05 loss=4.7166 loss_recon=4.7166 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4289 corrupt_frac=1.0000 acc_corrupt=0.4289 loss_corrupt=4.7166 wrong_frac=0.5015 init_acc_corrupt=0.4639 acc_corrupt_t_0p0_0p2=0.0575 corrupt_frac_t_0p0_0p2=0.1995 acc_corrupt_t_0p2_0p4=0.2283 corrupt_frac_t_0p2_0p4=0.2008 acc_corrupt_t_0p4_0p6=0.4425 corrupt_frac_t_0p4_0p6=0.2034 acc_corrupt_t_0p6_0p8=0.6262 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.7964 corrupt_frac_t_0p8_1p0=0.1961 out_w_norm=38.0298 out_g_norm=0.2246 loss_all=4.2610 init_gold_top10=0.5237 init_gold_top100=0.5499
163
+ step=350 epoch=1/59 epoch_step=350/17061 micro_steps=1400 elapsed=330.1s lr=1.053000e-04 loss=4.3317 loss_recon=4.3317 loss_meanflow=0.0000 mean_model_t=0.5043 mean_corrupt_t=0.5043 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4660 corrupt_frac=1.0000 acc_corrupt=0.4660 loss_corrupt=4.3317 wrong_frac=0.4958 init_acc_corrupt=0.4701 acc_corrupt_t_0p0_0p2=0.0577 corrupt_frac_t_0p0_0p2=0.1980 acc_corrupt_t_0p2_0p4=0.2453 corrupt_frac_t_0p2_0p4=0.1964 acc_corrupt_t_0p4_0p6=0.4762 corrupt_frac_t_0p4_0p6=0.1986 acc_corrupt_t_0p6_0p8=0.6668 corrupt_frac_t_0p6_0p8=0.2000 acc_corrupt_t_0p8_1p0=0.8604 corrupt_frac_t_0p8_1p0=0.2090 out_w_norm=42.6140 out_g_norm=0.2474 loss_all=5.1987 init_gold_top10=0.3885 init_gold_top100=0.4312
164
+ step=400 epoch=1/59 epoch_step=400/17061 micro_steps=1600 elapsed=285.2s lr=1.203000e-04 loss=4.2408 loss_recon=4.2408 loss_meanflow=0.0000 mean_model_t=0.5011 mean_corrupt_t=0.5011 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4764 corrupt_frac=1.0000 acc_corrupt=0.4764 loss_corrupt=4.2408 wrong_frac=0.4990 init_acc_corrupt=0.4663 acc_corrupt_t_0p0_0p2=0.0571 corrupt_frac_t_0p0_0p2=0.1959 acc_corrupt_t_0p2_0p4=0.2487 corrupt_frac_t_0p2_0p4=0.2062 acc_corrupt_t_0p4_0p6=0.4926 corrupt_frac_t_0p4_0p6=0.1989 acc_corrupt_t_0p6_0p8=0.6913 corrupt_frac_t_0p6_0p8=0.1939 acc_corrupt_t_0p8_1p0=0.8873 corrupt_frac_t_0p8_1p0=0.2050 out_w_norm=45.7084 out_g_norm=0.2444 loss_all=4.2385 init_gold_top10=0.4913 init_gold_top100=0.5316
165
+ step=450 epoch=1/59 epoch_step=450/17061 micro_steps=1800 elapsed=277.1s lr=1.353000e-04 loss=4.1617 loss_recon=4.1617 loss_meanflow=0.0000 mean_model_t=0.5048 mean_corrupt_t=0.5048 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4857 corrupt_frac=1.0000 acc_corrupt=0.4857 loss_corrupt=4.1617 wrong_frac=0.4951 init_acc_corrupt=0.4704 acc_corrupt_t_0p0_0p2=0.0586 corrupt_frac_t_0p0_0p2=0.1971 acc_corrupt_t_0p2_0p4=0.2521 corrupt_frac_t_0p2_0p4=0.2003 acc_corrupt_t_0p4_0p6=0.4975 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.6955 corrupt_frac_t_0p6_0p8=0.1892 acc_corrupt_t_0p8_1p0=0.8958 corrupt_frac_t_0p8_1p0=0.2162 out_w_norm=47.9240 out_g_norm=0.2380 loss_all=3.6068 init_gold_top10=0.5674 init_gold_top100=0.5807
166
+ step=500 epoch=1/59 epoch_step=500/17061 micro_steps=2000 elapsed=300.5s lr=1.503000e-04 loss=4.1949 loss_recon=4.1949 loss_meanflow=0.0000 mean_model_t=0.4975 mean_corrupt_t=0.4975 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4796 corrupt_frac=1.0000 acc_corrupt=0.4796 loss_corrupt=4.1949 wrong_frac=0.5028 init_acc_corrupt=0.4625 acc_corrupt_t_0p0_0p2=0.0590 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.2582 corrupt_frac_t_0p2_0p4=0.2053 acc_corrupt_t_0p4_0p6=0.4971 corrupt_frac_t_0p4_0p6=0.1923 acc_corrupt_t_0p6_0p8=0.6952 corrupt_frac_t_0p6_0p8=0.2052 acc_corrupt_t_0p8_1p0=0.8972 corrupt_frac_t_0p8_1p0=0.1975 out_w_norm=49.5350 out_g_norm=0.2347 loss_all=3.9710 init_gold_top10=0.5273 init_gold_top100=0.5571
167
+ step=550 epoch=1/59 epoch_step=550/17061 micro_steps=2200 elapsed=287.1s lr=1.653000e-04 loss=4.1088 loss_recon=4.1088 loss_meanflow=0.0000 mean_model_t=0.5066 mean_corrupt_t=0.5066 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4899 corrupt_frac=1.0000 acc_corrupt=0.4899 loss_corrupt=4.1088 wrong_frac=0.4935 init_acc_corrupt=0.4726 acc_corrupt_t_0p0_0p2=0.0611 corrupt_frac_t_0p0_0p2=0.1905 acc_corrupt_t_0p2_0p4=0.2599 corrupt_frac_t_0p2_0p4=0.2028 acc_corrupt_t_0p4_0p6=0.4995 corrupt_frac_t_0p4_0p6=0.2010 acc_corrupt_t_0p6_0p8=0.7002 corrupt_frac_t_0p6_0p8=0.2051 acc_corrupt_t_0p8_1p0=0.9015 corrupt_frac_t_0p8_1p0=0.2027 out_w_norm=51.0302 out_g_norm=0.2278 loss_all=4.3138 init_gold_top10=0.4809 init_gold_top100=0.5140
168
+ step=600 epoch=1/59 epoch_step=600/17061 micro_steps=2400 elapsed=327.2s lr=1.803000e-04 loss=4.1122 loss_recon=4.1122 loss_meanflow=0.0000 mean_model_t=0.5025 mean_corrupt_t=0.5025 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4889 corrupt_frac=1.0000 acc_corrupt=0.4889 loss_corrupt=4.1122 wrong_frac=0.4977 init_acc_corrupt=0.4679 acc_corrupt_t_0p0_0p2=0.0605 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.2628 corrupt_frac_t_0p2_0p4=0.1970 acc_corrupt_t_0p4_0p6=0.5033 corrupt_frac_t_0p4_0p6=0.1941 acc_corrupt_t_0p6_0p8=0.7018 corrupt_frac_t_0p6_0p8=0.2026 acc_corrupt_t_0p8_1p0=0.8992 corrupt_frac_t_0p8_1p0=0.2078 out_w_norm=52.4291 out_g_norm=0.2511 loss_all=4.0709 init_gold_top10=0.4996 init_gold_top100=0.5313
169
+ step=650 epoch=1/59 epoch_step=650/17061 micro_steps=2600 elapsed=268.4s lr=1.953000e-04 loss=4.0279 loss_recon=4.0279 loss_meanflow=0.0000 mean_model_t=0.5036 mean_corrupt_t=0.5036 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4964 corrupt_frac=1.0000 acc_corrupt=0.4964 loss_corrupt=4.0279 wrong_frac=0.4964 init_acc_corrupt=0.4702 acc_corrupt_t_0p0_0p2=0.0627 corrupt_frac_t_0p0_0p2=0.1948 acc_corrupt_t_0p2_0p4=0.2751 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.5146 corrupt_frac_t_0p4_0p6=0.2028 acc_corrupt_t_0p6_0p8=0.7119 corrupt_frac_t_0p6_0p8=0.2009 acc_corrupt_t_0p8_1p0=0.9017 corrupt_frac_t_0p8_1p0=0.2017 out_w_norm=53.9562 out_g_norm=0.2470 loss_all=3.3140 init_gold_top10=0.5782 init_gold_top100=0.5980
170
+ step=700 epoch=1/59 epoch_step=700/17061 micro_steps=2800 elapsed=286.2s lr=2.103000e-04 loss=3.9270 loss_recon=3.9270 loss_meanflow=0.0000 mean_model_t=0.5033 mean_corrupt_t=0.5033 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5030 corrupt_frac=1.0000 acc_corrupt=0.5030 loss_corrupt=3.9270 wrong_frac=0.4966 init_acc_corrupt=0.4694 acc_corrupt_t_0p0_0p2=0.0662 corrupt_frac_t_0p0_0p2=0.1959 acc_corrupt_t_0p2_0p4=0.2824 corrupt_frac_t_0p2_0p4=0.1964 acc_corrupt_t_0p4_0p6=0.5242 corrupt_frac_t_0p4_0p6=0.2042 acc_corrupt_t_0p6_0p8=0.7141 corrupt_frac_t_0p6_0p8=0.1988 acc_corrupt_t_0p8_1p0=0.9067 corrupt_frac_t_0p8_1p0=0.2047 out_w_norm=55.6574 out_g_norm=0.2686 loss_all=3.8679 init_gold_top10=0.4979 init_gold_top100=0.5239
171
+ step=750 epoch=1/59 epoch_step=750/17061 micro_steps=3000 elapsed=268.1s lr=2.253000e-04 loss=3.7913 loss_recon=3.7913 loss_meanflow=0.0000 mean_model_t=0.5050 mean_corrupt_t=0.5050 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5137 corrupt_frac=1.0000 acc_corrupt=0.5137 loss_corrupt=3.7913 wrong_frac=0.4948 init_acc_corrupt=0.4710 acc_corrupt_t_0p0_0p2=0.0713 corrupt_frac_t_0p0_0p2=0.1955 acc_corrupt_t_0p2_0p4=0.2947 corrupt_frac_t_0p2_0p4=0.1966 acc_corrupt_t_0p4_0p6=0.5423 corrupt_frac_t_0p4_0p6=0.2014 acc_corrupt_t_0p6_0p8=0.7290 corrupt_frac_t_0p6_0p8=0.2086 acc_corrupt_t_0p8_1p0=0.9119 corrupt_frac_t_0p8_1p0=0.1980 out_w_norm=57.6091 out_g_norm=0.2684 loss_all=4.1275 init_gold_top10=0.4555 init_gold_top100=0.4853
172
+ step=800 epoch=1/59 epoch_step=800/17061 micro_steps=3200 elapsed=242.0s lr=2.403000e-04 loss=3.7452 loss_recon=3.7452 loss_meanflow=0.0000 mean_model_t=0.5002 mean_corrupt_t=0.5002 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5149 corrupt_frac=1.0000 acc_corrupt=0.5149 loss_corrupt=3.7452 wrong_frac=0.4996 init_acc_corrupt=0.4653 acc_corrupt_t_0p0_0p2=0.0719 corrupt_frac_t_0p0_0p2=0.2013 acc_corrupt_t_0p2_0p4=0.3060 corrupt_frac_t_0p2_0p4=0.2027 acc_corrupt_t_0p4_0p6=0.5500 corrupt_frac_t_0p4_0p6=0.1976 acc_corrupt_t_0p6_0p8=0.7398 corrupt_frac_t_0p6_0p8=0.2008 acc_corrupt_t_0p8_1p0=0.9144 corrupt_frac_t_0p8_1p0=0.1988 out_w_norm=59.5190 out_g_norm=0.2504 loss_all=3.3060 init_gold_top10=0.5482 init_gold_top100=0.5781
173
+ step=850 epoch=1/59 epoch_step=850/17061 micro_steps=3400 elapsed=221.9s lr=2.553000e-04 loss=3.6764 loss_recon=3.6764 loss_meanflow=0.0000 mean_model_t=0.5017 mean_corrupt_t=0.5017 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5207 corrupt_frac=1.0000 acc_corrupt=0.5207 loss_corrupt=3.6764 wrong_frac=0.4984 init_acc_corrupt=0.4672 acc_corrupt_t_0p0_0p2=0.0725 corrupt_frac_t_0p0_0p2=0.1967 acc_corrupt_t_0p2_0p4=0.3063 corrupt_frac_t_0p2_0p4=0.1995 acc_corrupt_t_0p4_0p6=0.5525 corrupt_frac_t_0p4_0p6=0.1994 acc_corrupt_t_0p6_0p8=0.7426 corrupt_frac_t_0p6_0p8=0.2030 acc_corrupt_t_0p8_1p0=0.9156 corrupt_frac_t_0p8_1p0=0.2014 out_w_norm=61.4423 out_g_norm=0.2535 loss_all=3.6387 init_gold_top10=0.4996 init_gold_top100=0.5321
174
+ Terminated
LTA_openwebtext_dualt/logs/genppl_lm1b_step_latest_k1024_s128_flm.log ADDED
File without changes
LTA_openwebtext_dualt/logs/infer_owt_compact_v2048_ckpt_sweep_steps128_c256_temps_n8_large_20260520_205159.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/infer_owt_compact_v8192_probe_flow_onehot_steps128_c1024_t1p45_n8_large_20260520_201801.log ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [infer] step=20359 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step20359_flow_onehot_steps128_c1024_t1p45.jsonl
2
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0020359.pt step=20359
3
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
6
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
7
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
8
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
9
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
10
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
11
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
12
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
13
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0020359.pt", "step": 20359, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 81.02696275879805, "nll_per_token": 4.394781972847733, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 89.86994499964428, "nll_per_token": 4.4983635696710325, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 5.328659805364104, "unique_tokens": 1826, "token_count": 8192, "distinct_1": 0.222900390625, "distinct_2": 0.6592130987292277, "top_token_mass": 0.03173828125}}
14
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step20359_flow_onehot_steps128_c1024_t1p45.jsonl
15
+ [infer] step=81436 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step81436_flow_onehot_steps128_c1024_t1p45.jsonl
16
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0081436.pt step=81436
17
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
18
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
19
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
20
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
21
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
22
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
23
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
24
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
25
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
26
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
27
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0081436.pt", "step": 81436, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 59.11577517968969, "nll_per_token": 4.079497812308517, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 68.59219993052562, "nll_per_token": 4.228178824630438, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.876995635387916, "unique_tokens": 1754, "token_count": 8192, "distinct_1": 0.214111328125, "distinct_2": 0.5983626588465298, "top_token_mass": 0.0494384765625}}
28
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step81436_flow_onehot_steps128_c1024_t1p45.jsonl
29
+ [infer] step=142513 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step142513_flow_onehot_steps128_c1024_t1p45.jsonl
30
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt step=142513
31
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
32
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
33
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
34
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
35
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
36
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
37
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
38
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
39
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
40
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
41
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 24.558909049838665, "nll_per_token": 3.2010746824975107, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 24.95213814549325, "nll_per_token": 3.2169595157398896, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.367480129322778, "unique_tokens": 750, "token_count": 8192, "distinct_1": 0.091552734375, "distinct_2": 0.34616324535679377, "top_token_mass": 0.0933837890625}}
42
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_ckpt_probe_flow_onehot_steps128_c1024_t1p45_n8_large/step142513_flow_onehot_steps128_c1024_t1p45.jsonl
LTA_openwebtext_dualt/logs/infer_owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large_20260520_202516.log ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [sweep] ckpt=runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt
2
+ [infer] c=1024 temps=1.45,1.60,1.80,2.00 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c1024_temps.jsonl
3
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt step=142513
4
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
5
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
6
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
7
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
8
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
9
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
10
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
11
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
12
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
13
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
14
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
15
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
16
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
17
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
18
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
19
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
20
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
21
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
22
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
23
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
24
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
25
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
26
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
27
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
28
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
29
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
30
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
31
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
32
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
33
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
34
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
35
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
36
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
37
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
38
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 24.558909049838665, "nll_per_token": 3.2010746824975107, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 24.95213814549325, "nll_per_token": 3.2169595157398896, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.367480129322778, "unique_tokens": 750, "token_count": 8192, "distinct_1": 0.091552734375, "distinct_2": 0.34616324535679377, "top_token_mass": 0.0933837890625}}
39
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.6, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 61.90123501159806, "nll_per_token": 4.125540131213619, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 61.82879988490115, "nll_per_token": 4.124369273466223, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.8654221065259846, "unique_tokens": 241, "token_count": 8192, "distinct_1": 0.0294189453125, "distinct_2": 0.2380254154447703, "top_token_mass": 0.1497802734375}}
40
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.8, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 236.10354018277863, "nll_per_token": 5.46427043839997, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 236.38662406204426, "nll_per_token": 5.465468702129289, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.426329846830298, "unique_tokens": 334, "token_count": 8192, "distinct_1": 0.040771484375, "distinct_2": 0.29056695992179865, "top_token_mass": 0.1021728515625}}
41
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 2.0, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 797.2969082577021, "nll_per_token": 6.681227141735601, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 797.2969082577021, "nll_per_token": 6.681227141735601, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.9980454244051566, "unique_tokens": 594, "token_count": 8192, "distinct_1": 0.072509765625, "distinct_2": 0.43169599217986315, "top_token_mass": 0.0872802734375}}
42
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c1024_temps.jsonl
43
+ [infer] c=512 temps=1.45,1.60,1.80,2.00 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c512_temps.jsonl
44
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt step=142513
45
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
46
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
47
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
48
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
49
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
50
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
51
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
52
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
53
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
54
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
55
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
56
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
57
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
58
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
59
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
60
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
61
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
62
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
63
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
64
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
65
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
66
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
67
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
68
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
69
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
70
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
71
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
72
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
73
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
74
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
75
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
76
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
77
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
78
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
79
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 512.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 35.595751158665244, "nll_per_token": 3.572226281259574, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 36.84510754064624, "nll_per_token": 3.6067228429457723, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.6511284538790205, "unique_tokens": 651, "token_count": 8192, "distinct_1": 0.0794677734375, "distinct_2": 0.35373900293255134, "top_token_mass": 0.039306640625}}
80
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 512.0, "target_prob": 1.0, "endpoint_temp": 1.6, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 125.87374709240041, "nll_per_token": 4.835279397403492, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 126.45687922544968, "nll_per_token": 4.8399013743681065, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.827819755664881, "unique_tokens": 429, "token_count": 8192, "distinct_1": 0.0523681640625, "distinct_2": 0.34787390029325516, "top_token_mass": 0.0482177734375}}
81
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 512.0, "target_prob": 1.0, "endpoint_temp": 1.8, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 494.19662154369934, "nll_per_token": 6.2029334573184745, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 495.8881854021738, "nll_per_token": 6.206350468654259, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.102994734668988, "unique_tokens": 806, "token_count": 8192, "distinct_1": 0.098388671875, "distinct_2": 0.5232160312805474, "top_token_mass": 0.0655517578125}}
82
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 512.0, "target_prob": 1.0, "endpoint_temp": 2.0, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 2772.9985691507204, "nll_per_token": 7.927684529622396, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 2757.659277476166, "nll_per_token": 7.922137511010264, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 5.166956872628071, "unique_tokens": 1542, "token_count": 8192, "distinct_1": 0.188232421875, "distinct_2": 0.7913000977517106, "top_token_mass": 0.0462646484375}}
83
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c512_temps.jsonl
84
+ [infer] c=256 temps=1.45,1.60,1.80,2.00 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c256_temps.jsonl
85
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt step=142513
86
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
87
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
88
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
89
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
90
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
91
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
92
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
93
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
94
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
95
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
96
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
97
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
98
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
99
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
100
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
101
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
102
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
103
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
104
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
105
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
106
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
107
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
108
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
109
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
110
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
111
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
112
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
113
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
114
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
115
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
116
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
117
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
118
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
119
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
120
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 256.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 93.24024737790586, "nll_per_token": 4.53517946729473, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 99.5832169983887, "nll_per_token": 4.600993646359911, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.271051344525033, "unique_tokens": 634, "token_count": 8192, "distinct_1": 0.077392578125, "distinct_2": 0.5084310850439883, "top_token_mass": 0.04736328125}}
121
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 256.0, "target_prob": 1.0, "endpoint_temp": 1.6, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 82.17690506325746, "nll_per_token": 4.40887430228439, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 82.90081842761556, "nll_per_token": 4.417644934560738, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.701395777792879, "unique_tokens": 371, "token_count": 8192, "distinct_1": 0.0452880859375, "distinct_2": 0.3543499511241447, "top_token_mass": 0.0709228515625}}
122
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 256.0, "target_prob": 1.0, "endpoint_temp": 1.8, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 415.99116937865665, "nll_per_token": 6.030664032580805, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 417.34936172966326, "nll_per_token": 6.033923668954887, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.6335167480868, "unique_tokens": 1248, "token_count": 8192, "distinct_1": 0.15234375, "distinct_2": 0.6768084066471163, "top_token_mass": 0.0369873046875}}
123
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 256.0, "target_prob": 1.0, "endpoint_temp": 2.0, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 5750.184435097303, "nll_per_token": 8.656987208946079, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 5745.7928768823385, "nll_per_token": 8.65622319240196, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 5.908193959743645, "unique_tokens": 2292, "token_count": 8192, "distinct_1": 0.27978515625, "distinct_2": 0.9499022482893451, "top_token_mass": 0.0211181640625}}
124
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c256_temps.jsonl
125
+ [infer] c=128 temps=1.45,1.60,1.80,2.00 out=docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c128_temps.jsonl
126
+ [ckpt] runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt step=142513
127
+ [decode-base] n=8 max_len=1024 steps=128 model_t=flow
128
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
129
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
130
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
131
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
132
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
133
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
134
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
135
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
136
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
137
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
138
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
139
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
140
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
141
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
142
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
143
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
144
+ [decode] temp=1.60 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
145
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
146
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
147
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
148
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
149
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
150
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
151
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
152
+ [decode] temp=1.80 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
153
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
154
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
155
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
156
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
157
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
158
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
159
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
160
+ [decode] temp=2.00 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
161
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 160.43586427465166, "nll_per_token": 5.07789426317402, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 165.1494624806168, "nll_per_token": 5.10685089709712, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.445461443708831, "unique_tokens": 847, "token_count": 8192, "distinct_1": 0.1033935546875, "distinct_2": 0.5334799608993157, "top_token_mass": 0.029541015625}}
162
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.6, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 216.4485436312606, "nll_per_token": 5.377352845435049, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 218.31928970818086, "nll_per_token": 5.385958622951134, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.9082948069564583, "unique_tokens": 565, "token_count": 8192, "distinct_1": 0.0689697265625, "distinct_2": 0.4560117302052786, "top_token_mass": 0.1065673828125}}
163
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.8, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 428.9966791804292, "nll_per_token": 6.061449178059896, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 432.2821554908295, "nll_per_token": 6.069078512752758, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 4.355772572908551, "unique_tokens": 1079, "token_count": 8192, "distinct_1": 0.1317138671875, "distinct_2": 0.5505865102639296, "top_token_mass": 0.0419921875}}
164
+ [summary] {"type": "summary", "checkpoint": "runs/lta_owt_compact_gpt2bpe_v8192_stream1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_uniformt_hardce_mask0p1-1p0_fp32_ddit768x12_gbs512_8gpu_1m_20260519_201817/step_0142513.pt", "step": 142513, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 2.0, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 8196.449066792626, "nll_per_token": 9.011456298828126, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 8202.819703826315, "nll_per_token": 9.012233240464154, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 6.2585507689933495, "unique_tokens": 2703, "token_count": 8192, "distinct_1": 0.3299560546875, "distinct_2": 0.9870478983382209, "top_token_mass": 0.0123291015625}}
165
+ [done] docs/lta_samples/metrics_20260520/owt_compact_v8192_step142k_flow_onehot_argmax_sweep_n8_large/c128_temps.jsonl
166
+ cmax temp raw stripped entropy unique top_mass tokens kept
167
+ 128.0 1.45 160.4359 165.1495 4.4455 847 0.0295 2040 8
168
+ 128.0 1.60 216.4485 218.3193 3.9083 565 0.1066 2040 8
169
+ 128.0 1.80 428.9967 432.2822 4.3558 1079 0.0420 2040 8
170
+ 128.0 2.00 8196.4491 8202.8197 6.2586 2703 0.0123 2040 8
171
+ 256.0 1.45 93.2402 99.5832 4.2711 634 0.0474 2040 8
172
+ 256.0 1.60 82.1769 82.9008 3.7014 371 0.0709 2040 8
173
+ 256.0 1.80 415.9912 417.3494 4.6335 1248 0.0370 2040 8
174
+ 256.0 2.00 5750.1844 5745.7929 5.9082 2292 0.0211 2040 8
175
+ 512.0 1.45 35.5958 36.8451 3.6511 651 0.0393 2040 8
176
+ 512.0 1.60 125.8737 126.4569 3.8278 429 0.0482 2040 8
177
+ 512.0 1.80 494.1966 495.8882 4.1030 806 0.0656 2040 8
178
+ 512.0 2.00 2772.9986 2757.6593 5.1670 1542 0.0463 2040 8
179
+ 1024.0 1.45 24.5589 24.9521 3.3675 750 0.0934 2040 8
180
+ 1024.0 1.60 61.9012 61.8288 2.8654 241 0.1498 2040 8
181
+ 1024.0 1.80 236.1035 236.3866 3.4263 334 0.1022 2040 8
182
+ 1024.0 2.00 797.2969 797.2969 3.9980 594 0.0873 2040 8
183
+
184
+ [rank target-ish entropy>=4.5 raw<80]
LTA_openwebtext_dualt/logs/infer_owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8.log ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
2
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
3
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
4
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
5
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
6
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
7
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
8
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
9
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
10
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
11
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
12
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.55, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 54.220414987338124, "nll_per_token": 3.993057497809915, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 54.851453523563, "nll_per_token": 4.004628686343922, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.7687910224042147, "unique_tokens": 2329, "token_count": 8192, "distinct_1": 0.2843017578125, "distinct_2": 0.38428641251221896, "top_token_mass": 0.0831298828125}}
13
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8/t1p55.jsonl
14
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
15
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
16
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
17
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
18
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
19
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
20
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
21
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
22
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
23
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
24
+ [decode] temp=1.60 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
25
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.6, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 6.049422227527874, "nll_per_token": 1.799962767900205, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 6.08581982781651, "nll_per_token": 1.8059614466685874, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.3043278884277236, "unique_tokens": 915, "token_count": 8192, "distinct_1": 0.1116943359375, "distinct_2": 0.13734115347018572, "top_token_mass": 0.125}}
26
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8/t1p60.jsonl
27
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
28
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
29
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
30
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
31
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
32
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
33
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
34
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
35
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
36
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
37
+ [decode] temp=1.65 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
38
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.65, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 75.77343953176887, "nll_per_token": 4.32774782928766, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 76.36644019355695, "nll_per_token": 4.335543335185331, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.274294560163754, "unique_tokens": 2246, "token_count": 8192, "distinct_1": 0.274169921875, "distinct_2": 0.36656891495601174, "top_token_mass": 0.1500244140625}}
39
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8/t1p65.jsonl
40
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
41
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
42
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
43
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
44
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
45
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
46
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
47
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
48
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
49
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
50
+ [decode] temp=1.70 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
51
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.7, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 1397.261927910019, "nll_per_token": 7.242269834817624, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 1403.9901107955734, "nll_per_token": 7.247073540968054, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 5.037653085441597, "unique_tokens": 4476, "token_count": 8192, "distinct_1": 0.54638671875, "distinct_2": 0.7487781036168133, "top_token_mass": 0.25}}
52
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_c128_lowtemp_n8/t1p70.jsonl
53
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
54
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
55
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
56
+ [decode] temp=1.75 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
57
+ [decode] temp=1.75 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
58
+ [decode] temp=1.75 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
LTA_openwebtext_dualt/logs/infer_owt_t5_2node_latest_trainmatched_dirres_grid_n8.log ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
2
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
3
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
4
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
5
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
6
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
7
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
8
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
9
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
10
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
11
+ [decode] temp=1.50 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
12
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.5, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 2.3705850238950044, "nll_per_token": 0.8631367702110141, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 2.3705850238950044, "nll_per_token": 0.8631367702110141, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.9894434601248572, "unique_tokens": 194, "token_count": 8192, "distinct_1": 0.023681640625, "distinct_2": 0.050342130987292275, "top_token_mass": 0.1419677734375}}
13
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_grid_n8/cmax128_t1p50.jsonl
14
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
15
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
16
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
17
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
18
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
19
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
20
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
21
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
22
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
23
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
24
+ [decode] temp=1.80 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
25
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt", "step": 101000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.8, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260519}, "raw_genppl": {"ppl": 15174.468645389643, "nll_per_token": 9.627369600183824, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 15291.225085679736, "nll_per_token": 9.635034419041054, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 6.7776062571987445, "unique_tokens": 5795, "token_count": 8192, "distinct_1": 0.7073974609375, "distinct_2": 0.9993890518084066, "top_token_mass": 0.0107421875}}
26
+ [done] docs/lta_samples/metrics_20260519/owt_t5_2node_latest_trainmatched_dirres_grid_n8/cmax128_t1p80.jsonl
27
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step101200_20260519_092441.pt step=101000
28
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
29
+ [decode-time] schedule=linear s=[0.0,0.25] force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
30
+ [decode] temp=2.00 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
31
+ [decode] temp=2.00 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
32
+ [decode] temp=2.00 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
LTA_openwebtext_dualt/logs/infer_owt_t5_2node_step290000_compare_n8_20260520_200659.log ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [infer] src=runs/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest.pt
2
+ [infer] frozen=eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt step=290000
3
+ [infer] tag=dual_state_c1024_t1p45 rule=dual_line_resample anchor=state cmax=1024 temp=1.45 out=docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dual_state_c1024_t1p45.jsonl
4
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt step=290000
5
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
6
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
7
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
8
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
9
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
10
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
11
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
12
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
13
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
14
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
15
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt", "step": 290000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 49.921613025080326, "nll_per_token": 3.9104540357402726, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 80.28644877394166, "nll_per_token": 4.385600849226409, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.834606444575173, "unique_tokens": 256, "token_count": 8192, "distinct_1": 0.03125, "distinct_2": 0.08870967741935484, "top_token_mass": 0.475830078125}}
16
+ [done] docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dual_state_c1024_t1p45.jsonl
17
+ [infer] tag=dual_onehot_c1024_t1p45 rule=dual_line_resample anchor=onehot cmax=1024 temp=1.45 out=docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dual_onehot_c1024_t1p45.jsonl
18
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt step=290000
19
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
20
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
21
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
22
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
23
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
24
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
25
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
26
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
27
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
28
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
29
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt", "step": 290000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 21.517578062657247, "nll_per_token": 3.068870185403263, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 38.67711208881378, "nll_per_token": 3.655248006184896, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.5010731185605346, "unique_tokens": 253, "token_count": 8192, "distinct_1": 0.0308837890625, "distinct_2": 0.0846774193548387, "top_token_mass": 0.4466552734375}}
30
+ [done] docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dual_onehot_c1024_t1p45.jsonl
31
+ [infer] tag=dirres_c128_t1p55 rule=dirichlet_resample anchor=onehot cmax=128 temp=1.55 out=docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dirres_c128_t1p55.jsonl
32
+ [ckpt] eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt step=290000
33
+ [decode-base] n=8 max_len=1024 steps=1024 model_t=post
34
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.000977 dt_max=0.000977
35
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 1/8
36
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 2/8
37
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 3/8
38
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 4/8
39
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 5/8
40
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 6/8
41
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 7/8
42
+ [decode] temp=1.55 final=state rule=dirichlet_resample support=1 semantic=1 anchor=onehot cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 8/8
43
+ [summary] {"type": "summary", "checkpoint": "eval_ckpts/lta_owt_t5_adaln_adamw_wd0p1_rollin_p50_randk0_3_uniformt_temp1_synct_gbs512_2node8gpu_1m_t-20260518224737-tftgw/latest_frozen_step290000_20260520_200659.pt", "step": 290000, "decode": {"steps": 1024, "model_t_mode": "post", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0009765625, 0.001953125, 0.0029296875, 0.00390625, 0.0048828125, 0.005859375, 0.0068359375, 0.0078125, 0.0087890625, 0.009765625, 0.0107421875, 0.01171875, 0.0126953125, 0.013671875, 0.0146484375, 0.015625, 0.0166015625, 0.017578125, 0.0185546875, 0.01953125, 0.0205078125, 0.021484375, 0.0224609375, 0.0234375, 0.0244140625, 0.025390625, 0.0263671875, 0.02734375, 0.0283203125, 0.029296875, 0.0302734375, 0.03125, 0.0322265625, 0.033203125, 0.0341796875, 0.03515625, 0.0361328125, 0.037109375, 0.0380859375, 0.0390625, 0.0400390625, 0.041015625, 0.0419921875, 0.04296875, 0.0439453125, 0.044921875, 0.0458984375, 0.046875, 0.0478515625, 0.048828125, 0.0498046875, 0.05078125, 0.0517578125, 0.052734375, 0.0537109375, 0.0546875, 0.0556640625, 0.056640625, 0.0576171875, 0.05859375, 0.0595703125, 0.060546875, 0.0615234375, 0.0625, 0.0634765625, 0.064453125, 0.0654296875, 0.06640625, 0.0673828125, 0.068359375, 0.0693359375, 0.0703125, 0.0712890625, 0.072265625, 0.0732421875, 0.07421875, 0.0751953125, 0.076171875, 0.0771484375, 0.078125, 0.0791015625, 0.080078125, 0.0810546875, 0.08203125, 0.0830078125, 0.083984375, 0.0849609375, 0.0859375, 0.0869140625, 0.087890625, 0.0888671875, 0.08984375, 0.0908203125, 0.091796875, 0.0927734375, 0.09375, 0.0947265625, 0.095703125, 0.0966796875, 0.09765625, 0.0986328125, 0.099609375, 0.1005859375, 0.1015625, 0.1025390625, 0.103515625, 0.1044921875, 0.10546875, 0.1064453125, 0.107421875, 0.1083984375, 0.109375, 0.1103515625, 0.111328125, 0.1123046875, 0.11328125, 0.1142578125, 0.115234375, 0.1162109375, 0.1171875, 0.1181640625, 0.119140625, 0.1201171875, 0.12109375, 0.1220703125, 0.123046875, 0.1240234375, 0.125, 0.1259765625, 0.126953125, 0.1279296875, 0.12890625, 0.1298828125, 0.130859375, 0.1318359375, 0.1328125, 0.1337890625, 0.134765625, 0.1357421875, 0.13671875, 0.1376953125, 0.138671875, 0.1396484375, 0.140625, 0.1416015625, 0.142578125, 0.1435546875, 0.14453125, 0.1455078125, 0.146484375, 0.1474609375, 0.1484375, 0.1494140625, 0.150390625, 0.1513671875, 0.15234375, 0.1533203125, 0.154296875, 0.1552734375, 0.15625, 0.1572265625, 0.158203125, 0.1591796875, 0.16015625, 0.1611328125, 0.162109375, 0.1630859375, 0.1640625, 0.1650390625, 0.166015625, 0.1669921875, 0.16796875, 0.1689453125, 0.169921875, 0.1708984375, 0.171875, 0.1728515625, 0.173828125, 0.1748046875, 0.17578125, 0.1767578125, 0.177734375, 0.1787109375, 0.1796875, 0.1806640625, 0.181640625, 0.1826171875, 0.18359375, 0.1845703125, 0.185546875, 0.1865234375, 0.1875, 0.1884765625, 0.189453125, 0.1904296875, 0.19140625, 0.1923828125, 0.193359375, 0.1943359375, 0.1953125, 0.1962890625, 0.197265625, 0.1982421875, 0.19921875, 0.2001953125, 0.201171875, 0.2021484375, 0.203125, 0.2041015625, 0.205078125, 0.2060546875, 0.20703125, 0.2080078125, 0.208984375, 0.2099609375, 0.2109375, 0.2119140625, 0.212890625, 0.2138671875, 0.21484375, 0.2158203125, 0.216796875, 0.2177734375, 0.21875, 0.2197265625, 0.220703125, 0.2216796875, 0.22265625, 0.2236328125, 0.224609375, 0.2255859375, 0.2265625, 0.2275390625, 0.228515625, 0.2294921875, 0.23046875, 0.2314453125, 0.232421875, 0.2333984375, 0.234375, 0.2353515625, 0.236328125, 0.2373046875, 0.23828125, 0.2392578125, 0.240234375, 0.2412109375, 0.2421875, 0.2431640625, 0.244140625, 0.2451171875, 0.24609375, 0.2470703125, 0.248046875, 0.2490234375, 0.25, 0.2509765625, 0.251953125, 0.2529296875, 0.25390625, 0.2548828125, 0.255859375, 0.2568359375, 0.2578125, 0.2587890625, 0.259765625, 0.2607421875, 0.26171875, 0.2626953125, 0.263671875, 0.2646484375, 0.265625, 0.2666015625, 0.267578125, 0.2685546875, 0.26953125, 0.2705078125, 0.271484375, 0.2724609375, 0.2734375, 0.2744140625, 0.275390625, 0.2763671875, 0.27734375, 0.2783203125, 0.279296875, 0.2802734375, 0.28125, 0.2822265625, 0.283203125, 0.2841796875, 0.28515625, 0.2861328125, 0.287109375, 0.2880859375, 0.2890625, 0.2900390625, 0.291015625, 0.2919921875, 0.29296875, 0.2939453125, 0.294921875, 0.2958984375, 0.296875, 0.2978515625, 0.298828125, 0.2998046875, 0.30078125, 0.3017578125, 0.302734375, 0.3037109375, 0.3046875, 0.3056640625, 0.306640625, 0.3076171875, 0.30859375, 0.3095703125, 0.310546875, 0.3115234375, 0.3125, 0.3134765625, 0.314453125, 0.3154296875, 0.31640625, 0.3173828125, 0.318359375, 0.3193359375, 0.3203125, 0.3212890625, 0.322265625, 0.3232421875, 0.32421875, 0.3251953125, 0.326171875, 0.3271484375, 0.328125, 0.3291015625, 0.330078125, 0.3310546875, 0.33203125, 0.3330078125, 0.333984375, 0.3349609375, 0.3359375, 0.3369140625, 0.337890625, 0.3388671875, 0.33984375, 0.3408203125, 0.341796875, 0.3427734375, 0.34375, 0.3447265625, 0.345703125, 0.3466796875, 0.34765625, 0.3486328125, 0.349609375, 0.3505859375, 0.3515625, 0.3525390625, 0.353515625, 0.3544921875, 0.35546875, 0.3564453125, 0.357421875, 0.3583984375, 0.359375, 0.3603515625, 0.361328125, 0.3623046875, 0.36328125, 0.3642578125, 0.365234375, 0.3662109375, 0.3671875, 0.3681640625, 0.369140625, 0.3701171875, 0.37109375, 0.3720703125, 0.373046875, 0.3740234375, 0.375, 0.3759765625, 0.376953125, 0.3779296875, 0.37890625, 0.3798828125, 0.380859375, 0.3818359375, 0.3828125, 0.3837890625, 0.384765625, 0.3857421875, 0.38671875, 0.3876953125, 0.388671875, 0.3896484375, 0.390625, 0.3916015625, 0.392578125, 0.3935546875, 0.39453125, 0.3955078125, 0.396484375, 0.3974609375, 0.3984375, 0.3994140625, 0.400390625, 0.4013671875, 0.40234375, 0.4033203125, 0.404296875, 0.4052734375, 0.40625, 0.4072265625, 0.408203125, 0.4091796875, 0.41015625, 0.4111328125, 0.412109375, 0.4130859375, 0.4140625, 0.4150390625, 0.416015625, 0.4169921875, 0.41796875, 0.4189453125, 0.419921875, 0.4208984375, 0.421875, 0.4228515625, 0.423828125, 0.4248046875, 0.42578125, 0.4267578125, 0.427734375, 0.4287109375, 0.4296875, 0.4306640625, 0.431640625, 0.4326171875, 0.43359375, 0.4345703125, 0.435546875, 0.4365234375, 0.4375, 0.4384765625, 0.439453125, 0.4404296875, 0.44140625, 0.4423828125, 0.443359375, 0.4443359375, 0.4453125, 0.4462890625, 0.447265625, 0.4482421875, 0.44921875, 0.4501953125, 0.451171875, 0.4521484375, 0.453125, 0.4541015625, 0.455078125, 0.4560546875, 0.45703125, 0.4580078125, 0.458984375, 0.4599609375, 0.4609375, 0.4619140625, 0.462890625, 0.4638671875, 0.46484375, 0.4658203125, 0.466796875, 0.4677734375, 0.46875, 0.4697265625, 0.470703125, 0.4716796875, 0.47265625, 0.4736328125, 0.474609375, 0.4755859375, 0.4765625, 0.4775390625, 0.478515625, 0.4794921875, 0.48046875, 0.4814453125, 0.482421875, 0.4833984375, 0.484375, 0.4853515625, 0.486328125, 0.4873046875, 0.48828125, 0.4892578125, 0.490234375, 0.4912109375, 0.4921875, 0.4931640625, 0.494140625, 0.4951171875, 0.49609375, 0.4970703125, 0.498046875, 0.4990234375, 0.5, 0.5009765625, 0.501953125, 0.5029296875, 0.50390625, 0.5048828125, 0.505859375, 0.5068359375, 0.5078125, 0.5087890625, 0.509765625, 0.5107421875, 0.51171875, 0.5126953125, 0.513671875, 0.5146484375, 0.515625, 0.5166015625, 0.517578125, 0.5185546875, 0.51953125, 0.5205078125, 0.521484375, 0.5224609375, 0.5234375, 0.5244140625, 0.525390625, 0.5263671875, 0.52734375, 0.5283203125, 0.529296875, 0.5302734375, 0.53125, 0.5322265625, 0.533203125, 0.5341796875, 0.53515625, 0.5361328125, 0.537109375, 0.5380859375, 0.5390625, 0.5400390625, 0.541015625, 0.5419921875, 0.54296875, 0.5439453125, 0.544921875, 0.5458984375, 0.546875, 0.5478515625, 0.548828125, 0.5498046875, 0.55078125, 0.5517578125, 0.552734375, 0.5537109375, 0.5546875, 0.5556640625, 0.556640625, 0.5576171875, 0.55859375, 0.5595703125, 0.560546875, 0.5615234375, 0.5625, 0.5634765625, 0.564453125, 0.5654296875, 0.56640625, 0.5673828125, 0.568359375, 0.5693359375, 0.5703125, 0.5712890625, 0.572265625, 0.5732421875, 0.57421875, 0.5751953125, 0.576171875, 0.5771484375, 0.578125, 0.5791015625, 0.580078125, 0.5810546875, 0.58203125, 0.5830078125, 0.583984375, 0.5849609375, 0.5859375, 0.5869140625, 0.587890625, 0.5888671875, 0.58984375, 0.5908203125, 0.591796875, 0.5927734375, 0.59375, 0.5947265625, 0.595703125, 0.5966796875, 0.59765625, 0.5986328125, 0.599609375, 0.6005859375, 0.6015625, 0.6025390625, 0.603515625, 0.6044921875, 0.60546875, 0.6064453125, 0.607421875, 0.6083984375, 0.609375, 0.6103515625, 0.611328125, 0.6123046875, 0.61328125, 0.6142578125, 0.615234375, 0.6162109375, 0.6171875, 0.6181640625, 0.619140625, 0.6201171875, 0.62109375, 0.6220703125, 0.623046875, 0.6240234375, 0.625, 0.6259765625, 0.626953125, 0.6279296875, 0.62890625, 0.6298828125, 0.630859375, 0.6318359375, 0.6328125, 0.6337890625, 0.634765625, 0.6357421875, 0.63671875, 0.6376953125, 0.638671875, 0.6396484375, 0.640625, 0.6416015625, 0.642578125, 0.6435546875, 0.64453125, 0.6455078125, 0.646484375, 0.6474609375, 0.6484375, 0.6494140625, 0.650390625, 0.6513671875, 0.65234375, 0.6533203125, 0.654296875, 0.6552734375, 0.65625, 0.6572265625, 0.658203125, 0.6591796875, 0.66015625, 0.6611328125, 0.662109375, 0.6630859375, 0.6640625, 0.6650390625, 0.666015625, 0.6669921875, 0.66796875, 0.6689453125, 0.669921875, 0.6708984375, 0.671875, 0.6728515625, 0.673828125, 0.6748046875, 0.67578125, 0.6767578125, 0.677734375, 0.6787109375, 0.6796875, 0.6806640625, 0.681640625, 0.6826171875, 0.68359375, 0.6845703125, 0.685546875, 0.6865234375, 0.6875, 0.6884765625, 0.689453125, 0.6904296875, 0.69140625, 0.6923828125, 0.693359375, 0.6943359375, 0.6953125, 0.6962890625, 0.697265625, 0.6982421875, 0.69921875, 0.7001953125, 0.701171875, 0.7021484375, 0.703125, 0.7041015625, 0.705078125, 0.7060546875, 0.70703125, 0.7080078125, 0.708984375, 0.7099609375, 0.7109375, 0.7119140625, 0.712890625, 0.7138671875, 0.71484375, 0.7158203125, 0.716796875, 0.7177734375, 0.71875, 0.7197265625, 0.720703125, 0.7216796875, 0.72265625, 0.7236328125, 0.724609375, 0.7255859375, 0.7265625, 0.7275390625, 0.728515625, 0.7294921875, 0.73046875, 0.7314453125, 0.732421875, 0.7333984375, 0.734375, 0.7353515625, 0.736328125, 0.7373046875, 0.73828125, 0.7392578125, 0.740234375, 0.7412109375, 0.7421875, 0.7431640625, 0.744140625, 0.7451171875, 0.74609375, 0.7470703125, 0.748046875, 0.7490234375, 0.75, 0.7509765625, 0.751953125, 0.7529296875, 0.75390625, 0.7548828125, 0.755859375, 0.7568359375, 0.7578125, 0.7587890625, 0.759765625, 0.7607421875, 0.76171875, 0.7626953125, 0.763671875, 0.7646484375, 0.765625, 0.7666015625, 0.767578125, 0.7685546875, 0.76953125, 0.7705078125, 0.771484375, 0.7724609375, 0.7734375, 0.7744140625, 0.775390625, 0.7763671875, 0.77734375, 0.7783203125, 0.779296875, 0.7802734375, 0.78125, 0.7822265625, 0.783203125, 0.7841796875, 0.78515625, 0.7861328125, 0.787109375, 0.7880859375, 0.7890625, 0.7900390625, 0.791015625, 0.7919921875, 0.79296875, 0.7939453125, 0.794921875, 0.7958984375, 0.796875, 0.7978515625, 0.798828125, 0.7998046875, 0.80078125, 0.8017578125, 0.802734375, 0.8037109375, 0.8046875, 0.8056640625, 0.806640625, 0.8076171875, 0.80859375, 0.8095703125, 0.810546875, 0.8115234375, 0.8125, 0.8134765625, 0.814453125, 0.8154296875, 0.81640625, 0.8173828125, 0.818359375, 0.8193359375, 0.8203125, 0.8212890625, 0.822265625, 0.8232421875, 0.82421875, 0.8251953125, 0.826171875, 0.8271484375, 0.828125, 0.8291015625, 0.830078125, 0.8310546875, 0.83203125, 0.8330078125, 0.833984375, 0.8349609375, 0.8359375, 0.8369140625, 0.837890625, 0.8388671875, 0.83984375, 0.8408203125, 0.841796875, 0.8427734375, 0.84375, 0.8447265625, 0.845703125, 0.8466796875, 0.84765625, 0.8486328125, 0.849609375, 0.8505859375, 0.8515625, 0.8525390625, 0.853515625, 0.8544921875, 0.85546875, 0.8564453125, 0.857421875, 0.8583984375, 0.859375, 0.8603515625, 0.861328125, 0.8623046875, 0.86328125, 0.8642578125, 0.865234375, 0.8662109375, 0.8671875, 0.8681640625, 0.869140625, 0.8701171875, 0.87109375, 0.8720703125, 0.873046875, 0.8740234375, 0.875, 0.8759765625, 0.876953125, 0.8779296875, 0.87890625, 0.8798828125, 0.880859375, 0.8818359375, 0.8828125, 0.8837890625, 0.884765625, 0.8857421875, 0.88671875, 0.8876953125, 0.888671875, 0.8896484375, 0.890625, 0.8916015625, 0.892578125, 0.8935546875, 0.89453125, 0.8955078125, 0.896484375, 0.8974609375, 0.8984375, 0.8994140625, 0.900390625, 0.9013671875, 0.90234375, 0.9033203125, 0.904296875, 0.9052734375, 0.90625, 0.9072265625, 0.908203125, 0.9091796875, 0.91015625, 0.9111328125, 0.912109375, 0.9130859375, 0.9140625, 0.9150390625, 0.916015625, 0.9169921875, 0.91796875, 0.9189453125, 0.919921875, 0.9208984375, 0.921875, 0.9228515625, 0.923828125, 0.9248046875, 0.92578125, 0.9267578125, 0.927734375, 0.9287109375, 0.9296875, 0.9306640625, 0.931640625, 0.9326171875, 0.93359375, 0.9345703125, 0.935546875, 0.9365234375, 0.9375, 0.9384765625, 0.939453125, 0.9404296875, 0.94140625, 0.9423828125, 0.943359375, 0.9443359375, 0.9453125, 0.9462890625, 0.947265625, 0.9482421875, 0.94921875, 0.9501953125, 0.951171875, 0.9521484375, 0.953125, 0.9541015625, 0.955078125, 0.9560546875, 0.95703125, 0.9580078125, 0.958984375, 0.9599609375, 0.9609375, 0.9619140625, 0.962890625, 0.9638671875, 0.96484375, 0.9658203125, 0.966796875, 0.9677734375, 0.96875, 0.9697265625, 0.970703125, 0.9716796875, 0.97265625, 0.9736328125, 0.974609375, 0.9755859375, 0.9765625, 0.9775390625, 0.978515625, 0.9794921875, 0.98046875, 0.9814453125, 0.982421875, 0.9833984375, 0.984375, 0.9853515625, 0.986328125, 0.9873046875, 0.98828125, 0.9892578125, 0.990234375, 0.9912109375, 0.9921875, 0.9931640625, 0.994140625, 0.9951171875, 0.99609375, 0.9970703125, 0.998046875, 0.9990234375, 1.0], "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "onehot", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 128.0, "target_prob": 1.0, "endpoint_temp": 1.55, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 8, "seed": 20260520}, "raw_genppl": {"ppl": 1.05319821217009, "nll_per_token": 0.05183145111682368, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 1.05319821217009, "nll_per_token": 0.05183145111682368, "tokens": 2040, "kept_samples": 8, "total_samples": 8, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 0.0, "unique_tokens": 1, "token_count": 8192, "distinct_1": 0.0001220703125, "distinct_2": 0.00012218963831867058, "top_token_mass": 1.0}}
44
+ [done] docs/lta_samples/metrics_20260520/owt_t5_2node_step290000_infer_compare_n8/dirres_c128_t1p55.jsonl