JinghuiLuAstronaut commited on
Commit
edff6fa
·
verified ·
1 Parent(s): a96e98b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LTA_openwebtext_dualt/logs/lta_owt_abspos_specialloss16_4gpu_v2_launch.out +0 -0
  2. LTA_openwebtext_dualt/logs/lta_owt_distilbert_recordpad_baseline_len1024_ddit768x12_gbs512_8gpu_1m_20260513_124935.log +0 -0
  3. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/smoke_softendpoint_mn_n128_onehot.log +1 -0
  4. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456.log +395 -0
  5. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_path_p50_path2_unif0_0p25_outwdm1_ctx1024_path_tradeoff_sde_20260517_232950.log +0 -0
  6. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800.log +1034 -0
  7. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638.log +402 -0
  8. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n1024_linear_soft_kl_bridge_20260517_train8_overfit.log +316 -0
  9. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n256_compactv969_3l_bs512_hard_ce_onehot.log +0 -0
  10. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n64_compactv335_3l_hard_ce_onehot.log +0 -0
  11. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n8_allcorrupt_hard_ce_20260517_train8ctx8_allcorrupt.log +326 -0
  12. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n8_hard_ce_onehot_20260517_train8ctx8_overfit.log +326 -0
  13. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_rollin_focused_len256_rollin_p75_s8_i64_20260517_1733focused.log +193 -0
  14. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654.log +396 -0
  15. LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_wrongfloor_len256_wrongfloor0p5_20260517_1815wrongfloor.log +230 -0
  16. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step11000_decode128_quick_n8/first8.txt +38 -0
  17. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step113000_gpu_C1to64exp_temp1_decode128_quick_n8/first8.txt +32 -0
  18. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step12000_decode128_quick_n8/first8.txt +38 -0
  19. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step2000_decode128_quick_n8/first8.txt +38 -0
  20. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step3000_decode128_quick_n8/first8.txt +38 -0
  21. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_quick_n8/first8.txt +38 -0
  22. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_seed456_quick_n8/first8.txt +38 -0
  23. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_seed789_quick_n8/first8.txt +38 -0
  24. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step6000_decode128_quick_n8/first8.txt +38 -0
  25. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step9000_decode128_quick_n8/first8.txt +38 -0
  26. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_elftokenized_stateprobadd_latest_step256_endpoint/first2.txt +15 -0
  27. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step007000_temp1_decode128_quick_n8/first8.txt +32 -0
  28. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step013000_gpu_temp1_decode128_quick_n8/first8.txt +32 -0
  29. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step020000_gpu3_temp1_decode128_quick_n8/first8.txt +32 -0
  30. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step027000_gpu3_temp1_decode128_quick_n8/first8.txt +32 -0
  31. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_articlefull_10k_C1to1024sqrt_step010000_gpu3_temp1_decode128_quick_n8/first8.txt +32 -0
  32. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_C64to1024sqrt_step10000_aligned_temp1_decode128_quick_n8/first8.txt +32 -0
  33. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_C64to1024sqrt_step9000_aligned_temp1_decode128_quick_n8/first8.txt +32 -0
  34. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C16to256_exp_temp1_decode128/first8.txt +32 -0
  35. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64const_temp1p45_decode128/first8.txt +32 -0
  36. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64to1024_sqrt_temp1_decode128/first8.txt +32 -0
  37. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64to1024_sqrt_temp1p45_decode128/first8.txt +32 -0
  38. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_t5_len1024_d768_8gpu_step19000_temp1_decode128_quick_n8/first8.txt +32 -0
  39. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/tinystories_t5_len1024_d768_8gpu_step10000_decode128_quick_n8/first8.txt +38 -0
  40. LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/tinystories_t5_len1024_d768_8gpu_step15000_decode128_quick_n8/first8.txt +38 -0
  41. LTA_openwebtext_dualt/scripts/_tmp_trace_lta_prompt_decode.py +164 -0
  42. LTA_openwebtext_dualt/scripts/build_lta_owt_compact_gpt2bpe_packed_train_minus_100k_np8.sh +91 -0
  43. LTA_openwebtext_dualt/scripts/build_owt_t5_elf_dataset.py.bak_pre_dataset_preload_20260517_0247 +555 -0
  44. LTA_openwebtext_dualt/scripts/dump_position_top1_trace.py +245 -0
  45. LTA_openwebtext_dualt/scripts/eval_dirichlet_diffusion_like_prefilter_20260508.py +228 -0
  46. LTA_openwebtext_dualt/scripts/eval_lm1b_c1024_fullycoupled_8gpu_1m_checkpoint.sh +62 -0
  47. LTA_openwebtext_dualt/scripts/eval_lm1b_latest_non_duo_methods_genppl_20260506.py +54 -0
  48. LTA_openwebtext_dualt/scripts/eval_lm1b_latest_non_owt_methods_genppl_20260506.py +311 -0
  49. LTA_openwebtext_dualt/scripts/eval_lta_openwebtext_dualt_light.sh +28 -0
  50. LTA_openwebtext_dualt/scripts/flowtext_decode_lab.py.bak_correct_decode_20260430_222618 +466 -0
LTA_openwebtext_dualt/logs/lta_owt_abspos_specialloss16_4gpu_v2_launch.out ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/lta_owt_distilbert_recordpad_baseline_len1024_ddit768x12_gbs512_8gpu_1m_20260513_124935.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/smoke_softendpoint_mn_n128_onehot.log ADDED
@@ -0,0 +1 @@
 
 
1
+ NCCL version 2.25.1+cuda12.8
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456.log ADDED
@@ -0,0 +1,395 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 969,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 1965440,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "logit_normal",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 1.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_corpus_unigram_path": "",
124
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
125
+ "categorical_wrong_basin_shared_prob": 0.0,
126
+ "categorical_wrong_unigram_shared_prob": 0.5,
127
+ "mask_mixture_original_prob": 0.0,
128
+ "mask_mixture_lowk_prob": 0.0,
129
+ "mask_mixture_lowcorrupt_prob": 0.0,
130
+ "mask_mixture_block_prob": 0.0,
131
+ "mask_mixture_all_prob": 1.0,
132
+ "mask_mixture_lowk_clean_tokens": "0",
133
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
134
+ "mask_mixture_block_tokens": "64,128",
135
+ "simplex_bridge_sampler": "logistic_normal_linear_mean",
136
+ "logistic_normal_sigma_min": 0.03,
137
+ "logistic_normal_sigma_max": 0.4,
138
+ "logistic_normal_tau_min": 1.0,
139
+ "logistic_normal_tau_max": 1.0,
140
+ "torch_compile": false,
141
+ "compile_mode": "max-autotune",
142
+ "state_format": "prob",
143
+ "meanflow_weight": 0.0,
144
+ "rollout_train_prob": 0.0,
145
+ "rollout_train_steps": 1,
146
+ "rollout_train_infer_steps": 64,
147
+ "rollout_train_temp": 1.45,
148
+ "rollout_train_max_gamma": 1.0,
149
+ "rollout_train_corrupt_only": true,
150
+ "rollout_train_samplewise": false,
151
+ "rollout_train_compute_always": false,
152
+ "bridge_noise_init": "logistic_normal",
153
+ "noise_sigma": -1.0,
154
+ "allow_tf32": true,
155
+ "activation_checkpointing": false,
156
+ "activation_checkpoint_interval": 1,
157
+ "activation_checkpoint_scope": "block",
158
+ "ddp_static_graph": false,
159
+ "ddp_gradient_as_bucket_view": true,
160
+ "blocking_data_transfer": false,
161
+ "dataloader_prefetch_factor": 4,
162
+ "full_train_stats": false,
163
+ "tokenized_hf": false,
164
+ "tokenized_pad_token": "pad",
165
+ "elf_conditional_hf": false,
166
+ "record_pad_truncate": false,
167
+ "record_add_eos": false,
168
+ "record_add_special_tokens": false,
169
+ "record_pad_token": "pad",
170
+ "record_shuffle_buffer": 10000,
171
+ "wrap": true,
172
+ "wrap_mode": "stream",
173
+ "wrap_record_buffer_size": 200,
174
+ "owt_cached_chunks": true,
175
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
176
+ "owt_chunk_cache_rebuild": false,
177
+ "owt_chunk_cache_write_batch": 4096,
178
+ "owt_exact_repeat_per_chunk": 64,
179
+ "online_chunk_shuffle": false,
180
+ "online_chunk_shuffle_buffer": 10000,
181
+ "openwebtext_split": "train_minus_100k",
182
+ "detokenizer": "auto",
183
+ "resolved_detokenizer": null,
184
+ "num_workers": 0,
185
+ "latest_every": 1000,
186
+ "resume_path": ""
187
+ }
188
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=4.8s lr=2.000000e-03 loss=6.6554 loss_recon=6.6554 loss_meanflow=0.0000 mean_model_t=0.2098 mean_corrupt_t=0.2098 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1884 corrupt_frac=1.0000 acc_corrupt=0.1884 loss_corrupt=6.6554 wrong_frac=0.7900 init_acc_corrupt=0.2100 acc_corrupt_t_0p0_0p2=0.0937 corrupt_frac_t_0p0_0p2=0.5564 acc_corrupt_t_0p2_0p4=0.2650 corrupt_frac_t_0p2_0p4=0.3581 acc_corrupt_t_0p4_0p6=0.4636 corrupt_frac_t_0p4_0p6=0.0759 acc_corrupt_t_0p6_0p8=0.6394 corrupt_frac_t_0p6_0p8=0.0137 out_w_norm=1.2294 out_g_norm=0.9309 acc_corrupt_t_0p8_1p0=0.8184 corrupt_frac_t_0p8_1p0=0.0078 loss_all=6.3647 init_gold_top10=0.2083 init_gold_top100=0.2807
189
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=4.0s lr=2.000000e-03 loss=5.9356 loss_recon=5.9356 loss_meanflow=0.0000 mean_model_t=0.2086 mean_corrupt_t=0.2086 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1916 corrupt_frac=1.0000 acc_corrupt=0.1916 loss_corrupt=5.9356 wrong_frac=0.7913 init_acc_corrupt=0.2087 acc_corrupt_t_0p0_0p2=0.1036 corrupt_frac_t_0p0_0p2=0.5588 acc_corrupt_t_0p2_0p4=0.2602 corrupt_frac_t_0p2_0p4=0.3575 acc_corrupt_t_0p4_0p6=0.4665 corrupt_frac_t_0p4_0p6=0.0753 acc_corrupt_t_0p6_0p8=0.6580 corrupt_frac_t_0p6_0p8=0.0130 out_w_norm=4.0673 out_g_norm=1.2994 acc_corrupt_t_0p8_1p0=0.8477 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.5045 init_gold_top10=0.2240 init_gold_top100=0.2987
190
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=4.0s lr=2.000000e-03 loss=5.2619 loss_recon=5.2619 loss_meanflow=0.0000 mean_model_t=0.2113 mean_corrupt_t=0.2113 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2186 corrupt_frac=1.0000 acc_corrupt=0.2186 loss_corrupt=5.2619 wrong_frac=0.7886 init_acc_corrupt=0.2114 acc_corrupt_t_0p0_0p2=0.1285 corrupt_frac_t_0p0_0p2=0.5517 acc_corrupt_t_0p2_0p4=0.2875 corrupt_frac_t_0p2_0p4=0.3592 acc_corrupt_t_0p4_0p6=0.4807 corrupt_frac_t_0p4_0p6=0.0802 acc_corrupt_t_0p6_0p8=0.6575 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=6.7067 out_g_norm=0.5999 acc_corrupt_t_0p8_1p0=0.8438 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.1481 init_gold_top10=0.2068 init_gold_top100=0.2804
191
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=4.0s lr=2.000000e-03 loss=4.9796 loss_recon=4.9796 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2246 corrupt_frac=1.0000 acc_corrupt=0.2246 loss_corrupt=4.9796 wrong_frac=0.7907 init_acc_corrupt=0.2093 acc_corrupt_t_0p0_0p2=0.1368 corrupt_frac_t_0p0_0p2=0.5551 acc_corrupt_t_0p2_0p4=0.2930 corrupt_frac_t_0p2_0p4=0.3560 acc_corrupt_t_0p4_0p6=0.4809 corrupt_frac_t_0p4_0p6=0.0795 acc_corrupt_t_0p6_0p8=0.6536 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=8.3842 out_g_norm=0.3521 acc_corrupt_t_0p8_1p0=0.7969 corrupt_frac_t_0p8_1p0=0.0078 loss_all=4.8480 init_gold_top10=0.2126 init_gold_top100=0.2850
192
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=4.0s lr=2.000000e-03 loss=4.4947 loss_recon=4.4947 loss_meanflow=0.0000 mean_model_t=0.2074 mean_corrupt_t=0.2074 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2309 corrupt_frac=1.0000 acc_corrupt=0.2309 loss_corrupt=4.4947 wrong_frac=0.7926 init_acc_corrupt=0.2074 acc_corrupt_t_0p0_0p2=0.1447 corrupt_frac_t_0p0_0p2=0.5591 acc_corrupt_t_0p2_0p4=0.3020 corrupt_frac_t_0p2_0p4=0.3566 acc_corrupt_t_0p4_0p6=0.4839 corrupt_frac_t_0p4_0p6=0.0758 out_w_norm=9.5633 out_g_norm=0.4817 acc_corrupt_t_0p6_0p8=0.6634 corrupt_frac_t_0p6_0p8=0.0129 acc_corrupt_t_0p8_1p0=0.8516 corrupt_frac_t_0p8_1p0=0.0078 loss_all=4.1750 init_gold_top10=0.1929 init_gold_top100=0.2686
193
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=4.0s lr=2.000000e-03 loss=3.6893 loss_recon=3.6893 loss_meanflow=0.0000 mean_model_t=0.2080 mean_corrupt_t=0.2080 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2459 corrupt_frac=1.0000 acc_corrupt=0.2459 loss_corrupt=3.6893 wrong_frac=0.7923 init_acc_corrupt=0.2077 acc_corrupt_t_0p0_0p2=0.1584 corrupt_frac_t_0p0_0p2=0.5596 acc_corrupt_t_0p2_0p4=0.3192 corrupt_frac_t_0p2_0p4=0.3571 acc_corrupt_t_0p4_0p6=0.5011 corrupt_frac_t_0p4_0p6=0.0742 acc_corrupt_t_0p6_0p8=0.6711 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=9.9203 out_g_norm=0.4404 acc_corrupt_t_0p8_1p0=0.7656 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.1350 init_gold_top10=0.2306 init_gold_top100=0.3021
194
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=4.0s lr=2.000000e-03 loss=2.6742 loss_recon=2.6742 loss_meanflow=0.0000 mean_model_t=0.2109 mean_corrupt_t=0.2109 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3112 corrupt_frac=1.0000 acc_corrupt=0.3112 loss_corrupt=2.6742 wrong_frac=0.7892 init_acc_corrupt=0.2108 acc_corrupt_t_0p0_0p2=0.2056 corrupt_frac_t_0p0_0p2=0.5513 acc_corrupt_t_0p2_0p4=0.4036 corrupt_frac_t_0p2_0p4=0.3610 acc_corrupt_t_0p4_0p6=0.5805 corrupt_frac_t_0p4_0p6=0.0786 out_w_norm=10.1693 out_g_norm=0.4827 acc_corrupt_t_0p6_0p8=0.7189 corrupt_frac_t_0p6_0p8=0.0134 acc_corrupt_t_0p8_1p0=0.8789 corrupt_frac_t_0p8_1p0=0.0078 loss_all=2.1149 init_gold_top10=0.2158 init_gold_top100=0.2894
195
+ step=800 epoch=800/1000 epoch_step=1/1 micro_steps=800 elapsed=4.0s lr=2.000000e-03 loss=1.5900 loss_recon=1.5900 loss_meanflow=0.0000 mean_model_t=0.2097 mean_corrupt_t=0.2097 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5102 corrupt_frac=1.0000 acc_corrupt=0.5102 loss_corrupt=1.5900 wrong_frac=0.7911 init_acc_corrupt=0.2089 acc_corrupt_t_0p0_0p2=0.3754 corrupt_frac_t_0p0_0p2=0.5545 acc_corrupt_t_0p2_0p4=0.6481 corrupt_frac_t_0p2_0p4=0.3613 acc_corrupt_t_0p4_0p6=0.7977 corrupt_frac_t_0p4_0p6=0.0755 acc_corrupt_t_0p6_0p8=0.8729 corrupt_frac_t_0p6_0p8=0.0125 out_w_norm=10.4756 out_g_norm=0.5089 acc_corrupt_t_0p8_1p0=0.9342 corrupt_frac_t_0p8_1p0=0.0094 loss_all=1.0926 init_gold_top10=0.2185 init_gold_top100=0.2932
196
+ step=900 epoch=900/1000 epoch_step=1/1 micro_steps=900 elapsed=4.0s lr=2.000000e-03 loss=0.7840 loss_recon=0.7840 loss_meanflow=0.0000 mean_model_t=0.2101 mean_corrupt_t=0.2101 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7687 corrupt_frac=1.0000 acc_corrupt=0.7687 loss_corrupt=0.7840 wrong_frac=0.7898 init_acc_corrupt=0.2102 acc_corrupt_t_0p0_0p2=0.6409 corrupt_frac_t_0p0_0p2=0.5542 acc_corrupt_t_0p2_0p4=0.9159 corrupt_frac_t_0p2_0p4=0.3577 acc_corrupt_t_0p4_0p6=0.9730 corrupt_frac_t_0p4_0p6=0.0784 out_w_norm=10.7803 out_g_norm=0.4530 acc_corrupt_t_0p6_0p8=0.9885 corrupt_frac_t_0p6_0p8=0.0141 loss_all=0.4419 init_gold_top10=0.2162 init_gold_top100=0.2884
197
+ step=1000 epoch=1000/1000 epoch_step=1/1 micro_steps=1000 elapsed=4.0s lr=2.000000e-03 loss=0.3991 loss_recon=0.3991 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8878 corrupt_frac=1.0000 acc_corrupt=0.8878 loss_corrupt=0.3991 wrong_frac=0.7906 init_acc_corrupt=0.2094 acc_corrupt_t_0p0_0p2=0.8048 corrupt_frac_t_0p0_0p2=0.5538 acc_corrupt_t_0p2_0p4=0.9889 corrupt_frac_t_0p2_0p4=0.3596 acc_corrupt_t_0p4_0p6=0.9981 corrupt_frac_t_0p4_0p6=0.0773 out_w_norm=11.1175 out_g_norm=0.3625 acc_corrupt_t_0p6_0p8=0.9988 corrupt_frac_t_0p6_0p8=0.0140 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.3601 init_gold_top10=0.2185 init_gold_top100=0.2934
198
+ NCCL version 2.25.1+cuda12.8
199
+ resumed_from=runs/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456/latest.pt start_step=1001
200
+ {
201
+ "device": "cuda:0",
202
+ "rank": 0,
203
+ "world_size": 4,
204
+ "samples": "owt_cached_chunks:8",
205
+ "vocab_size": 969,
206
+ "tokenizer_vocab_size": 50257,
207
+ "save_dir": "runs/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456",
208
+ "batch_size": 128,
209
+ "grad_accum": 1,
210
+ "effective_batch_size": 512,
211
+ "global_batch_size": 512,
212
+ "lr_schedule": "constant_warmup",
213
+ "optimizer": "muon",
214
+ "epochs": 0.0,
215
+ "steps_per_epoch": 1,
216
+ "total_steps": 2000,
217
+ "warmup_steps": 10,
218
+ "warmup_epochs": -1.0,
219
+ "min_lr": 0.0,
220
+ "weight_decay": 0.1,
221
+ "output_weight_decay": -1.0,
222
+ "adamw_param_groups": "nanogpt",
223
+ "adam_beta1": 0.9,
224
+ "adam_beta2": 0.95,
225
+ "adam_eps": 1e-08,
226
+ "muon_impl": "legacy",
227
+ "muon_momentum": 0.95,
228
+ "muon_ns_steps": 5,
229
+ "muon_update_scale": 1.0,
230
+ "muon_nesterov": false,
231
+ "muon_width_scale": false,
232
+ "muon_grouping": "legacy_dim_ge_2",
233
+ "muon_param_count": 1965440,
234
+ "muon_adam_param_count": 8192,
235
+ "muon_param_names": [
236
+ "vocab_embed.embedding",
237
+ "sigma_map.net.0.weight",
238
+ "sigma_map.net.2.weight",
239
+ "blocks.0.attn_qkv.weight",
240
+ "blocks.0.attn_out.weight",
241
+ "blocks.0.mlp.0.weight",
242
+ "blocks.0.mlp.2.weight",
243
+ "blocks.0.adaLN_modulation.weight",
244
+ "blocks.1.attn_qkv.weight",
245
+ "blocks.1.attn_out.weight",
246
+ "blocks.1.mlp.0.weight",
247
+ "blocks.1.mlp.2.weight",
248
+ "blocks.1.adaLN_modulation.weight",
249
+ "blocks.2.attn_qkv.weight",
250
+ "blocks.2.attn_out.weight",
251
+ "blocks.2.mlp.0.weight",
252
+ "blocks.2.mlp.2.weight",
253
+ "blocks.2.adaLN_modulation.weight",
254
+ "output_layer.linear.weight",
255
+ "output_layer.adaLN_modulation.weight"
256
+ ],
257
+ "muon_adam_param_names": [
258
+ "sigma_map.net.0.bias",
259
+ "sigma_map.net.2.bias",
260
+ "blocks.0.norm1.weight",
261
+ "blocks.0.norm2.weight",
262
+ "blocks.0.mlp.0.bias",
263
+ "blocks.0.mlp.2.bias",
264
+ "blocks.0.adaLN_modulation.bias",
265
+ "blocks.1.norm1.weight",
266
+ "blocks.1.norm2.weight",
267
+ "blocks.1.mlp.0.bias",
268
+ "blocks.1.mlp.2.bias",
269
+ "blocks.1.adaLN_modulation.bias",
270
+ "blocks.2.norm1.weight",
271
+ "blocks.2.norm2.weight",
272
+ "blocks.2.mlp.0.bias",
273
+ "blocks.2.mlp.2.bias",
274
+ "blocks.2.adaLN_modulation.bias",
275
+ "output_layer.norm_final.weight",
276
+ "output_layer.adaLN_modulation.bias"
277
+ ],
278
+ "muon_effective_nesterov": false,
279
+ "muon_effective_width_scale": false,
280
+ "muon_effective_weight_decay": 0.1,
281
+ "muon_adam_fallback_nesterov": false,
282
+ "muon_adam_fallback_weight_decay": 0.1,
283
+ "ema_decay": 0.9999,
284
+ "ema_start_step": 0,
285
+ "model_type": "ddit",
286
+ "ddit_mlp_type": "gelu",
287
+ "elf_num_time_tokens": 4,
288
+ "elf_num_model_mode_tokens": 0,
289
+ "qk_norm": true,
290
+ "output_bias": false,
291
+ "output_init_std": -1.0,
292
+ "norm_type": "rmsnorm",
293
+ "target_loss": "hard_ce",
294
+ "linear_soft_target_power": 1.0,
295
+ "linear_soft_target_min_conf": 0.0,
296
+ "linear_soft_target_max_conf": 1.0,
297
+ "t_sampling_mode": "logit_normal",
298
+ "t_sampling_power": 1.0,
299
+ "t_sampling_eps": 0.0001,
300
+ "t_sampling_logit_mean": -1.5,
301
+ "t_sampling_logit_std": 0.8,
302
+ "dual_t": true,
303
+ "corrupt_t_mode": "same",
304
+ "corrupt_min_t": 0.0,
305
+ "corrupt_max_t": 1.0,
306
+ "prefix_block_prob": 0.0,
307
+ "prefix_block_len": 128,
308
+ "mask_ratio_floor_schedule": "none",
309
+ "dirichlet_endpoint_mode": "categorical_dual_t",
310
+ "dirichlet_semantic_t_mode": "same",
311
+ "dirichlet_semantic_t_value": 0.0,
312
+ "dirichlet_semantic_t_curve": "linear",
313
+ "dirichlet_semantic_t_power": 1.0,
314
+ "endpoint_sequence_random_prob_alpha": 0.0,
315
+ "categorical_wrong_from_full_vocab": true,
316
+ "categorical_wrong_from_batch_valid_tokens": false,
317
+ "categorical_wrong_basin_token_ids": "",
318
+ "categorical_wrong_basin_prob": 0.0,
319
+ "categorical_wrong_unigram_prob": 1.0,
320
+ "categorical_wrong_uniform_prob": 0.0,
321
+ "categorical_wrong_corpus_unigram_path": "",
322
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
323
+ "categorical_wrong_basin_shared_prob": 0.0,
324
+ "categorical_wrong_unigram_shared_prob": 0.5,
325
+ "mask_mixture_original_prob": 0.0,
326
+ "mask_mixture_lowk_prob": 0.0,
327
+ "mask_mixture_lowcorrupt_prob": 0.0,
328
+ "mask_mixture_block_prob": 0.0,
329
+ "mask_mixture_all_prob": 1.0,
330
+ "mask_mixture_lowk_clean_tokens": "0",
331
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
332
+ "mask_mixture_block_tokens": "64,128",
333
+ "simplex_bridge_sampler": "logistic_normal_linear_mean",
334
+ "logistic_normal_sigma_min": 0.03,
335
+ "logistic_normal_sigma_max": 0.4,
336
+ "logistic_normal_tau_min": 1.0,
337
+ "logistic_normal_tau_max": 1.0,
338
+ "torch_compile": false,
339
+ "compile_mode": "max-autotune",
340
+ "state_format": "prob",
341
+ "meanflow_weight": 0.0,
342
+ "rollout_train_prob": 0.0,
343
+ "rollout_train_steps": 1,
344
+ "rollout_train_infer_steps": 64,
345
+ "rollout_train_temp": 1.45,
346
+ "rollout_train_max_gamma": 1.0,
347
+ "rollout_train_corrupt_only": true,
348
+ "rollout_train_samplewise": false,
349
+ "rollout_train_compute_always": false,
350
+ "bridge_noise_init": "logistic_normal",
351
+ "noise_sigma": -1.0,
352
+ "allow_tf32": true,
353
+ "activation_checkpointing": false,
354
+ "activation_checkpoint_interval": 1,
355
+ "activation_checkpoint_scope": "block",
356
+ "ddp_static_graph": false,
357
+ "ddp_gradient_as_bucket_view": true,
358
+ "blocking_data_transfer": false,
359
+ "dataloader_prefetch_factor": 4,
360
+ "full_train_stats": false,
361
+ "tokenized_hf": false,
362
+ "tokenized_pad_token": "pad",
363
+ "elf_conditional_hf": false,
364
+ "record_pad_truncate": false,
365
+ "record_add_eos": false,
366
+ "record_add_special_tokens": false,
367
+ "record_pad_token": "pad",
368
+ "record_shuffle_buffer": 10000,
369
+ "wrap": true,
370
+ "wrap_mode": "stream",
371
+ "wrap_record_buffer_size": 200,
372
+ "owt_cached_chunks": true,
373
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
374
+ "owt_chunk_cache_rebuild": false,
375
+ "owt_chunk_cache_write_batch": 4096,
376
+ "owt_exact_repeat_per_chunk": 64,
377
+ "online_chunk_shuffle": false,
378
+ "online_chunk_shuffle_buffer": 10000,
379
+ "openwebtext_split": "train_minus_100k",
380
+ "detokenizer": "auto",
381
+ "resolved_detokenizer": null,
382
+ "num_workers": 0,
383
+ "latest_every": 1000,
384
+ "resume_path": "runs/train8_combo_len256_logistic_unigram_shared_highC_20260517_170456/latest.pt"
385
+ }
386
+ step=1100 epoch=1100/2000 epoch_step=1/1 micro_steps=1100 elapsed=4.8s lr=2.000000e-03 loss=0.2548 loss_recon=0.2548 loss_meanflow=0.0000 mean_model_t=0.2098 mean_corrupt_t=0.2098 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9283 corrupt_frac=1.0000 acc_corrupt=0.9283 loss_corrupt=0.2548 wrong_frac=0.7900 init_acc_corrupt=0.2100 acc_corrupt_t_0p0_0p2=0.8728 corrupt_frac_t_0p0_0p2=0.5564 acc_corrupt_t_0p2_0p4=0.9975 corrupt_frac_t_0p2_0p4=0.3581 acc_corrupt_t_0p4_0p6=0.9995 corrupt_frac_t_0p4_0p6=0.0759 acc_corrupt_t_0p6_0p8=0.9996 corrupt_frac_t_0p6_0p8=0.0137 out_w_norm=11.3638 out_g_norm=0.3115 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.2897 init_gold_top10=0.2083 init_gold_top100=0.2807
387
+ step=1200 epoch=1200/2000 epoch_step=1/1 micro_steps=1200 elapsed=4.0s lr=2.000000e-03 loss=0.1933 loss_recon=0.1933 loss_meanflow=0.0000 mean_model_t=0.2086 mean_corrupt_t=0.2086 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9462 corrupt_frac=1.0000 acc_corrupt=0.9462 loss_corrupt=0.1933 wrong_frac=0.7913 init_acc_corrupt=0.2087 acc_corrupt_t_0p0_0p2=0.9044 corrupt_frac_t_0p0_0p2=0.5588 acc_corrupt_t_0p2_0p4=0.9990 corrupt_frac_t_0p2_0p4=0.3575 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.0753 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.0130 out_w_norm=11.4992 out_g_norm=0.2630 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.2213 init_gold_top10=0.2240 init_gold_top100=0.2987
388
+ step=1300 epoch=1300/2000 epoch_step=1/1 micro_steps=1300 elapsed=4.0s lr=2.000000e-03 loss=0.1427 loss_recon=0.1427 loss_meanflow=0.0000 mean_model_t=0.2113 mean_corrupt_t=0.2113 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9602 corrupt_frac=1.0000 acc_corrupt=0.9602 loss_corrupt=0.1427 wrong_frac=0.7886 init_acc_corrupt=0.2114 acc_corrupt_t_0p0_0p2=0.9283 corrupt_frac_t_0p0_0p2=0.5517 acc_corrupt_t_0p2_0p4=0.9994 corrupt_frac_t_0p2_0p4=0.3592 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.0802 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=11.5852 out_g_norm=0.2256 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.1299 init_gold_top10=0.2068 init_gold_top100=0.2804
389
+ step=1400 epoch=1400/2000 epoch_step=1/1 micro_steps=1400 elapsed=4.0s lr=2.000000e-03 loss=0.1327 loss_recon=0.1327 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9637 corrupt_frac=1.0000 acc_corrupt=0.9637 loss_corrupt=0.1327 wrong_frac=0.7907 init_acc_corrupt=0.2093 acc_corrupt_t_0p0_0p2=0.9349 corrupt_frac_t_0p0_0p2=0.5551 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.3560 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0795 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=11.6459 out_g_norm=0.2093 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.0916 init_gold_top10=0.2126 init_gold_top100=0.2850
390
+ step=1500 epoch=1500/2000 epoch_step=1/1 micro_steps=1500 elapsed=4.0s lr=2.000000e-03 loss=0.1088 loss_recon=0.1088 loss_meanflow=0.0000 mean_model_t=0.2074 mean_corrupt_t=0.2074 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9702 corrupt_frac=1.0000 acc_corrupt=0.9702 loss_corrupt=0.1088 wrong_frac=0.7926 init_acc_corrupt=0.2074 acc_corrupt_t_0p0_0p2=0.9468 corrupt_frac_t_0p0_0p2=0.5591 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.3566 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0758 out_w_norm=11.6954 out_g_norm=0.1768 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.0129 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.1137 init_gold_top10=0.1929 init_gold_top100=0.2686
391
+ step=1600 epoch=1600/2000 epoch_step=1/1 micro_steps=1600 elapsed=4.0s lr=2.000000e-03 loss=0.0978 loss_recon=0.0978 loss_meanflow=0.0000 mean_model_t=0.2080 mean_corrupt_t=0.2080 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9730 corrupt_frac=1.0000 acc_corrupt=0.9730 loss_corrupt=0.0978 wrong_frac=0.7923 init_acc_corrupt=0.2077 acc_corrupt_t_0p0_0p2=0.9520 corrupt_frac_t_0p0_0p2=0.5596 acc_corrupt_t_0p2_0p4=0.9998 corrupt_frac_t_0p2_0p4=0.3571 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0742 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.0128 out_w_norm=11.7344 out_g_norm=0.1572 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.0759 init_gold_top10=0.2306 init_gold_top100=0.3021
392
+ step=1700 epoch=1700/2000 epoch_step=1/1 micro_steps=1700 elapsed=4.0s lr=2.000000e-03 loss=0.0883 loss_recon=0.0883 loss_meanflow=0.0000 mean_model_t=0.2109 mean_corrupt_t=0.2109 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9762 corrupt_frac=1.0000 acc_corrupt=0.9762 loss_corrupt=0.0883 wrong_frac=0.7892 init_acc_corrupt=0.2108 acc_corrupt_t_0p0_0p2=0.9570 corrupt_frac_t_0p0_0p2=0.5513 acc_corrupt_t_0p2_0p4=0.9998 corrupt_frac_t_0p2_0p4=0.3610 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.0786 out_w_norm=11.7643 out_g_norm=0.1483 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.0134 acc_corrupt_t_0p8_1p0=0.9961 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.1070 init_gold_top10=0.2158 init_gold_top100=0.2894
393
+ step=1800 epoch=1800/2000 epoch_step=1/1 micro_steps=1800 elapsed=4.0s lr=2.000000e-03 loss=0.0794 loss_recon=0.0794 loss_meanflow=0.0000 mean_model_t=0.2097 mean_corrupt_t=0.2097 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9787 corrupt_frac=1.0000 acc_corrupt=0.9787 loss_corrupt=0.0794 wrong_frac=0.7911 init_acc_corrupt=0.2089 acc_corrupt_t_0p0_0p2=0.9617 corrupt_frac_t_0p0_0p2=0.5545 acc_corrupt_t_0p2_0p4=0.9998 corrupt_frac_t_0p2_0p4=0.3613 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0755 acc_corrupt_t_0p6_0p8=0.9996 corrupt_frac_t_0p6_0p8=0.0125 out_w_norm=11.7925 out_g_norm=0.1356 acc_corrupt_t_0p8_1p0=0.9993 corrupt_frac_t_0p8_1p0=0.0094 loss_all=0.0852 init_gold_top10=0.2185 init_gold_top100=0.2932
394
+ step=1900 epoch=1900/2000 epoch_step=1/1 micro_steps=1900 elapsed=4.0s lr=2.000000e-03 loss=0.0728 loss_recon=0.0728 loss_meanflow=0.0000 mean_model_t=0.2101 mean_corrupt_t=0.2101 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9806 corrupt_frac=1.0000 acc_corrupt=0.9806 loss_corrupt=0.0728 wrong_frac=0.7898 init_acc_corrupt=0.2102 acc_corrupt_t_0p0_0p2=0.9650 corrupt_frac_t_0p0_0p2=0.5542 acc_corrupt_t_0p2_0p4=0.9999 corrupt_frac_t_0p2_0p4=0.3577 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0784 out_w_norm=11.8173 out_g_norm=0.1276 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.0141 loss_all=0.0249 init_gold_top10=0.2162 init_gold_top100=0.2884
395
+ step=2000 epoch=2000/2000 epoch_step=1/1 micro_steps=2000 elapsed=4.0s lr=2.000000e-03 loss=0.0702 loss_recon=0.0702 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9812 corrupt_frac=1.0000 acc_corrupt=0.9812 loss_corrupt=0.0702 wrong_frac=0.7906 init_acc_corrupt=0.2094 acc_corrupt_t_0p0_0p2=0.9662 corrupt_frac_t_0p0_0p2=0.5538 acc_corrupt_t_0p2_0p4=0.9999 corrupt_frac_t_0p2_0p4=0.3596 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0773 out_w_norm=11.8352 out_g_norm=0.1188 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.0140 acc_corrupt_t_0p8_1p0=0.9961 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.1269 init_gold_top10=0.2185 init_gold_top100=0.2934
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_path_p50_path2_unif0_0p25_outwdm1_ctx1024_path_tradeoff_sde_20260517_232950.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800.log ADDED
@@ -0,0 +1,1034 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 2423,
8
+ "tokenizer_vocab_size": 32100,
9
+ "save_dir": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 2523776,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "uniform",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 0.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_prob_floor": 0.0,
124
+ "categorical_wrong_corpus_unigram_path": "",
125
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
126
+ "categorical_wrong_basin_shared_prob": 0.0,
127
+ "categorical_wrong_unigram_shared_prob": 0.0,
128
+ "mask_mixture_original_prob": 0.0,
129
+ "mask_mixture_lowk_prob": 0.0,
130
+ "mask_mixture_lowcorrupt_prob": 0.0,
131
+ "mask_mixture_block_prob": 0.0,
132
+ "mask_mixture_all_prob": 1.0,
133
+ "mask_mixture_lowk_clean_tokens": "0",
134
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
135
+ "mask_mixture_block_tokens": "64,128",
136
+ "simplex_bridge_sampler": "dirichlet",
137
+ "logistic_normal_sigma_min": 0.1,
138
+ "logistic_normal_sigma_max": 1.0,
139
+ "logistic_normal_tau_min": 1.0,
140
+ "logistic_normal_tau_max": 1.0,
141
+ "torch_compile": false,
142
+ "compile_mode": "max-autotune",
143
+ "state_format": "prob",
144
+ "meanflow_weight": 0.0,
145
+ "rollout_train_prob": 0.5,
146
+ "rollout_train_steps": 4,
147
+ "rollout_train_steps_min": 0,
148
+ "rollout_train_infer_steps": 1,
149
+ "rollout_train_time_mode": "sampled_path",
150
+ "rollout_train_s_dist": "uniform",
151
+ "rollout_train_s_min_frac": 0.0,
152
+ "rollout_train_s_max_frac": 0.25,
153
+ "rollout_train_s_beta_alpha": 2.0,
154
+ "rollout_train_s_beta_beta": 6.0,
155
+ "rollout_train_temp": 1.0,
156
+ "rollout_train_max_gamma": 1.0,
157
+ "rollout_train_corrupt_only": true,
158
+ "rollout_train_samplewise": true,
159
+ "rollout_train_compute_always": false,
160
+ "rollout_train_sync_t": true,
161
+ "bridge_noise_init": "logistic_normal",
162
+ "noise_sigma": -1.0,
163
+ "allow_tf32": true,
164
+ "activation_checkpointing": false,
165
+ "activation_checkpoint_interval": 1,
166
+ "activation_checkpoint_scope": "block",
167
+ "ddp_static_graph": false,
168
+ "ddp_gradient_as_bucket_view": true,
169
+ "blocking_data_transfer": false,
170
+ "dataloader_prefetch_factor": 4,
171
+ "full_train_stats": false,
172
+ "tokenized_hf": false,
173
+ "tokenized_pad_token": "pad",
174
+ "elf_conditional_hf": false,
175
+ "record_pad_truncate": false,
176
+ "record_add_eos": false,
177
+ "record_add_special_tokens": false,
178
+ "record_pad_token": "pad",
179
+ "record_shuffle_buffer": 10000,
180
+ "wrap": true,
181
+ "wrap_mode": "stream",
182
+ "wrap_record_buffer_size": 200,
183
+ "owt_cached_chunks": true,
184
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/t5_len1024_train8_compact_overfit",
185
+ "owt_chunk_cache_rebuild": false,
186
+ "owt_chunk_cache_write_batch": 4096,
187
+ "owt_exact_repeat_per_chunk": 64,
188
+ "online_chunk_shuffle": false,
189
+ "online_chunk_shuffle_buffer": 10000,
190
+ "openwebtext_split": "train_minus_100k",
191
+ "detokenizer": "auto",
192
+ "resolved_detokenizer": null,
193
+ "num_workers": 0,
194
+ "latest_every": 1000,
195
+ "resume_path": ""
196
+ }
197
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=26.2s lr=2.000000e-03 loss=7.3399 loss_recon=7.3399 loss_meanflow=0.0000 mean_model_t=0.5013 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5002 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3325 corrupt_frac=1.0000 acc_corrupt=0.3325 loss_corrupt=7.3399 wrong_frac=0.4986 init_acc_corrupt=0.4674 acc_corrupt_t_0p0_0p2=0.0457 corrupt_frac_t_0p0_0p2=0.1952 acc_corrupt_t_0p2_0p4=0.1646 corrupt_frac_t_0p2_0p4=0.2063 acc_corrupt_t_0p4_0p6=0.3267 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.4815 corrupt_frac_t_0p6_0p8=0.1976 acc_corrupt_t_0p8_1p0=0.6387 corrupt_frac_t_0p8_1p0=0.2036 out_w_norm=1.0906 out_g_norm=1.0044 loss_all=6.6981 init_gold_top10=0.5044 init_gold_top100=0.6198 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.4979 init_acc_rollout_kept=0.4387 logit_acc_rollout_applied=0.3337 logit_acc_rollout_kept=0.2999
198
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=25.3s lr=2.000000e-03 loss=5.8172 loss_recon=5.8172 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3271 corrupt_frac=1.0000 acc_corrupt=0.3271 loss_corrupt=5.8172 wrong_frac=0.5014 init_acc_corrupt=0.4646 acc_corrupt_t_0p0_0p2=0.0526 corrupt_frac_t_0p0_0p2=0.2037 acc_corrupt_t_0p2_0p4=0.1621 corrupt_frac_t_0p2_0p4=0.1982 acc_corrupt_t_0p4_0p6=0.3284 corrupt_frac_t_0p4_0p6=0.1956 acc_corrupt_t_0p6_0p8=0.4722 corrupt_frac_t_0p6_0p8=0.2056 acc_corrupt_t_0p8_1p0=0.6244 corrupt_frac_t_0p8_1p0=0.1969 out_w_norm=3.4900 out_g_norm=1.3272 loss_all=5.0546 init_gold_top10=0.5050 init_gold_top100=0.6441 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.5127 init_acc_rollout_kept=0.4286 logit_acc_rollout_applied=0.3773 logit_acc_rollout_kept=0.3212
199
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=25.5s lr=2.000000e-03 loss=4.7556 loss_recon=4.7556 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.4953 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5102 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3619 corrupt_frac=1.0000 acc_corrupt=0.3619 loss_corrupt=4.7556 wrong_frac=0.5048 init_acc_corrupt=0.4615 acc_corrupt_t_0p0_0p2=0.0555 corrupt_frac_t_0p0_0p2=0.2036 acc_corrupt_t_0p2_0p4=0.1875 corrupt_frac_t_0p2_0p4=0.2030 acc_corrupt_t_0p4_0p6=0.3634 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=0.5244 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.6984 corrupt_frac_t_0p8_1p0=0.1934 out_w_norm=5.5768 out_g_norm=0.5510 loss_all=4.3231 init_gold_top10=0.5280 init_gold_top100=0.6625 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.5230 init_acc_rollout_kept=0.4573 logit_acc_rollout_applied=0.4251 logit_acc_rollout_kept=0.3738
200
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=25.4s lr=2.000000e-03 loss=4.1317 loss_recon=4.1317 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4980 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4209 corrupt_frac=1.0000 acc_corrupt=0.4209 loss_corrupt=4.1317 wrong_frac=0.5019 init_acc_corrupt=0.4652 acc_corrupt_t_0p0_0p2=0.0581 corrupt_frac_t_0p0_0p2=0.2061 acc_corrupt_t_0p2_0p4=0.2100 corrupt_frac_t_0p2_0p4=0.1960 acc_corrupt_t_0p4_0p6=0.4188 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.6132 corrupt_frac_t_0p6_0p8=0.2020 acc_corrupt_t_0p8_1p0=0.8121 corrupt_frac_t_0p8_1p0=0.1985 out_w_norm=7.1061 out_g_norm=0.2755 loss_all=3.8732 init_gold_top10=0.5097 init_gold_top100=0.6643 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.4337 init_acc_rollout_kept=0.4870 logit_acc_rollout_applied=0.4383 logit_acc_rollout_kept=0.4931
201
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=25.4s lr=2.000000e-03 loss=3.5371 loss_recon=3.5371 loss_meanflow=0.0000 mean_model_t=0.4998 mean_corrupt_t=0.4998 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4846 corrupt_frac=1.0000 acc_corrupt=0.4846 loss_corrupt=3.5371 wrong_frac=0.5002 init_acc_corrupt=0.4678 acc_corrupt_t_0p0_0p2=0.0593 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.2383 corrupt_frac_t_0p2_0p4=0.1971 acc_corrupt_t_0p4_0p6=0.5041 corrupt_frac_t_0p4_0p6=0.2008 acc_corrupt_t_0p6_0p8=0.7115 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.9013 corrupt_frac_t_0p8_1p0=0.2020 out_w_norm=8.4353 out_g_norm=0.2381 loss_all=3.1644 init_gold_top10=0.5262 init_gold_top100=0.6726 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.4713 init_acc_rollout_kept=0.4937 logit_acc_rollout_applied=0.4920 logit_acc_rollout_kept=0.5176
202
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=25.3s lr=2.000000e-03 loss=3.0897 loss_recon=3.0897 loss_meanflow=0.0000 mean_model_t=0.5009 mean_corrupt_t=0.5009 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4992 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4958 corrupt_frac=1.0000 acc_corrupt=0.4958 loss_corrupt=3.0897 wrong_frac=0.4992 init_acc_corrupt=0.4690 acc_corrupt_t_0p0_0p2=0.0612 corrupt_frac_t_0p0_0p2=0.1963 acc_corrupt_t_0p2_0p4=0.2685 corrupt_frac_t_0p2_0p4=0.2016 acc_corrupt_t_0p4_0p6=0.5235 corrupt_frac_t_0p4_0p6=0.2056 acc_corrupt_t_0p6_0p8=0.7170 corrupt_frac_t_0p6_0p8=0.1941 acc_corrupt_t_0p8_1p0=0.9033 corrupt_frac_t_0p8_1p0=0.2024 out_w_norm=9.6981 out_g_norm=0.2439 loss_all=2.8362 init_gold_top10=0.5378 init_gold_top100=0.6937 rollout_applied_pos_frac=0.5469 init_acc_rollout_applied=0.5065 init_acc_rollout_kept=0.4697 logit_acc_rollout_applied=0.5309 logit_acc_rollout_kept=0.5021
203
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=25.3s lr=2.000000e-03 loss=2.7597 loss_recon=2.7597 loss_meanflow=0.0000 mean_model_t=0.5003 mean_corrupt_t=0.5003 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4918 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5027 corrupt_frac=1.0000 acc_corrupt=0.5027 loss_corrupt=2.7597 wrong_frac=0.4998 init_acc_corrupt=0.4685 acc_corrupt_t_0p0_0p2=0.0631 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.2830 corrupt_frac_t_0p2_0p4=0.1945 acc_corrupt_t_0p4_0p6=0.5334 corrupt_frac_t_0p4_0p6=0.2064 acc_corrupt_t_0p6_0p8=0.7221 corrupt_frac_t_0p6_0p8=0.1962 acc_corrupt_t_0p8_1p0=0.9057 corrupt_frac_t_0p8_1p0=0.2023 out_w_norm=10.6934 out_g_norm=0.2836 loss_all=2.4361 init_gold_top10=0.5494 init_gold_top100=0.7213 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.4773 init_acc_rollout_kept=0.5063 logit_acc_rollout_applied=0.5171 logit_acc_rollout_kept=0.5495
204
+ step=800 epoch=800/1000 epoch_step=1/1 micro_steps=800 elapsed=25.3s lr=2.000000e-03 loss=2.2898 loss_recon=2.2898 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4990 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4960 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5367 corrupt_frac=1.0000 acc_corrupt=0.5367 loss_corrupt=2.2898 wrong_frac=0.5010 init_acc_corrupt=0.4684 acc_corrupt_t_0p0_0p2=0.0625 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.3151 corrupt_frac_t_0p2_0p4=0.2004 acc_corrupt_t_0p4_0p6=0.5996 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.7792 corrupt_frac_t_0p6_0p8=0.2018 acc_corrupt_t_0p8_1p0=0.9301 corrupt_frac_t_0p8_1p0=0.1982 out_w_norm=11.2230 out_g_norm=0.3424 loss_all=1.8891 init_gold_top10=0.5743 init_gold_top100=0.7283 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.4767 init_acc_rollout_kept=0.4894 logit_acc_rollout_applied=0.5812 logit_acc_rollout_kept=0.6061
205
+ step=900 epoch=900/1000 epoch_step=1/1 micro_steps=900 elapsed=25.5s lr=2.000000e-03 loss=1.8022 loss_recon=1.8022 loss_meanflow=0.0000 mean_model_t=0.5027 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6154 corrupt_frac=1.0000 acc_corrupt=0.6154 loss_corrupt=1.8022 wrong_frac=0.4975 init_acc_corrupt=0.4773 acc_corrupt_t_0p0_0p2=0.0643 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.3909 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.7435 corrupt_frac_t_0p4_0p6=0.1958 acc_corrupt_t_0p6_0p8=0.8955 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.9732 corrupt_frac_t_0p8_1p0=0.2059 out_w_norm=11.6942 out_g_norm=0.4741 loss_all=1.5768 init_gold_top10=0.5957 init_gold_top100=0.7365 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5000 init_acc_rollout_kept=0.4748 logit_acc_rollout_applied=0.6691 logit_acc_rollout_kept=0.6601
206
+ step=1000 epoch=1000/1000 epoch_step=1/1 micro_steps=1000 elapsed=25.3s lr=2.000000e-03 loss=1.4777 loss_recon=1.4777 loss_meanflow=0.0000 mean_model_t=0.5005 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4931 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6819 corrupt_frac=1.0000 acc_corrupt=0.6819 loss_corrupt=1.4777 wrong_frac=0.4995 init_acc_corrupt=0.4862 acc_corrupt_t_0p0_0p2=0.0711 corrupt_frac_t_0p0_0p2=0.2029 acc_corrupt_t_0p2_0p4=0.5129 corrupt_frac_t_0p2_0p4=0.1965 acc_corrupt_t_0p4_0p6=0.8696 corrupt_frac_t_0p4_0p6=0.1988 acc_corrupt_t_0p6_0p8=0.9661 corrupt_frac_t_0p6_0p8=0.2028 acc_corrupt_t_0p8_1p0=0.9944 corrupt_frac_t_0p8_1p0=0.1990 out_w_norm=12.0477 out_g_norm=0.5497 loss_all=0.9938 init_gold_top10=0.6508 init_gold_top100=0.7640 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5264 init_acc_rollout_kept=0.5098 logit_acc_rollout_applied=0.7689 logit_acc_rollout_kept=0.7548
207
+ NCCL version 2.25.1+cuda12.8
208
+ resumed_from=runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt start_step=1001
209
+ {
210
+ "device": "cuda:0",
211
+ "rank": 0,
212
+ "world_size": 4,
213
+ "samples": "owt_cached_chunks:8",
214
+ "vocab_size": 2423,
215
+ "tokenizer_vocab_size": 32100,
216
+ "save_dir": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800",
217
+ "batch_size": 128,
218
+ "grad_accum": 1,
219
+ "effective_batch_size": 512,
220
+ "global_batch_size": 512,
221
+ "lr_schedule": "constant_warmup",
222
+ "optimizer": "muon",
223
+ "epochs": 0.0,
224
+ "steps_per_epoch": 1,
225
+ "total_steps": 2000,
226
+ "warmup_steps": 10,
227
+ "warmup_epochs": -1.0,
228
+ "min_lr": 0.0,
229
+ "weight_decay": 0.1,
230
+ "output_weight_decay": -1.0,
231
+ "adamw_param_groups": "nanogpt",
232
+ "adam_beta1": 0.9,
233
+ "adam_beta2": 0.95,
234
+ "adam_eps": 1e-08,
235
+ "muon_impl": "legacy",
236
+ "muon_momentum": 0.95,
237
+ "muon_ns_steps": 5,
238
+ "muon_update_scale": 1.0,
239
+ "muon_nesterov": false,
240
+ "muon_width_scale": false,
241
+ "muon_grouping": "legacy_dim_ge_2",
242
+ "muon_param_count": 2523776,
243
+ "muon_adam_param_count": 8192,
244
+ "muon_param_names": [
245
+ "vocab_embed.embedding",
246
+ "sigma_map.net.0.weight",
247
+ "sigma_map.net.2.weight",
248
+ "blocks.0.attn_qkv.weight",
249
+ "blocks.0.attn_out.weight",
250
+ "blocks.0.mlp.0.weight",
251
+ "blocks.0.mlp.2.weight",
252
+ "blocks.0.adaLN_modulation.weight",
253
+ "blocks.1.attn_qkv.weight",
254
+ "blocks.1.attn_out.weight",
255
+ "blocks.1.mlp.0.weight",
256
+ "blocks.1.mlp.2.weight",
257
+ "blocks.1.adaLN_modulation.weight",
258
+ "blocks.2.attn_qkv.weight",
259
+ "blocks.2.attn_out.weight",
260
+ "blocks.2.mlp.0.weight",
261
+ "blocks.2.mlp.2.weight",
262
+ "blocks.2.adaLN_modulation.weight",
263
+ "output_layer.linear.weight",
264
+ "output_layer.adaLN_modulation.weight"
265
+ ],
266
+ "muon_adam_param_names": [
267
+ "sigma_map.net.0.bias",
268
+ "sigma_map.net.2.bias",
269
+ "blocks.0.norm1.weight",
270
+ "blocks.0.norm2.weight",
271
+ "blocks.0.mlp.0.bias",
272
+ "blocks.0.mlp.2.bias",
273
+ "blocks.0.adaLN_modulation.bias",
274
+ "blocks.1.norm1.weight",
275
+ "blocks.1.norm2.weight",
276
+ "blocks.1.mlp.0.bias",
277
+ "blocks.1.mlp.2.bias",
278
+ "blocks.1.adaLN_modulation.bias",
279
+ "blocks.2.norm1.weight",
280
+ "blocks.2.norm2.weight",
281
+ "blocks.2.mlp.0.bias",
282
+ "blocks.2.mlp.2.bias",
283
+ "blocks.2.adaLN_modulation.bias",
284
+ "output_layer.norm_final.weight",
285
+ "output_layer.adaLN_modulation.bias"
286
+ ],
287
+ "muon_effective_nesterov": false,
288
+ "muon_effective_width_scale": false,
289
+ "muon_effective_weight_decay": 0.1,
290
+ "muon_adam_fallback_nesterov": false,
291
+ "muon_adam_fallback_weight_decay": 0.1,
292
+ "ema_decay": 0.9999,
293
+ "ema_start_step": 0,
294
+ "model_type": "ddit",
295
+ "ddit_mlp_type": "gelu",
296
+ "elf_num_time_tokens": 4,
297
+ "elf_num_model_mode_tokens": 0,
298
+ "qk_norm": true,
299
+ "output_bias": false,
300
+ "output_init_std": -1.0,
301
+ "norm_type": "rmsnorm",
302
+ "target_loss": "hard_ce",
303
+ "linear_soft_target_power": 1.0,
304
+ "linear_soft_target_min_conf": 0.0,
305
+ "linear_soft_target_max_conf": 1.0,
306
+ "t_sampling_mode": "uniform",
307
+ "t_sampling_power": 1.0,
308
+ "t_sampling_eps": 0.0001,
309
+ "t_sampling_logit_mean": -1.5,
310
+ "t_sampling_logit_std": 0.8,
311
+ "dual_t": true,
312
+ "corrupt_t_mode": "same",
313
+ "corrupt_min_t": 0.0,
314
+ "corrupt_max_t": 1.0,
315
+ "prefix_block_prob": 0.0,
316
+ "prefix_block_len": 128,
317
+ "mask_ratio_floor_schedule": "none",
318
+ "dirichlet_endpoint_mode": "categorical_dual_t",
319
+ "dirichlet_semantic_t_mode": "same",
320
+ "dirichlet_semantic_t_value": 0.0,
321
+ "dirichlet_semantic_t_curve": "linear",
322
+ "dirichlet_semantic_t_power": 1.0,
323
+ "endpoint_sequence_random_prob_alpha": 0.0,
324
+ "categorical_wrong_from_full_vocab": true,
325
+ "categorical_wrong_from_batch_valid_tokens": false,
326
+ "categorical_wrong_basin_token_ids": "",
327
+ "categorical_wrong_basin_prob": 0.0,
328
+ "categorical_wrong_unigram_prob": 0.0,
329
+ "categorical_wrong_uniform_prob": 0.0,
330
+ "categorical_wrong_prob_floor": 0.0,
331
+ "categorical_wrong_corpus_unigram_path": "",
332
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
333
+ "categorical_wrong_basin_shared_prob": 0.0,
334
+ "categorical_wrong_unigram_shared_prob": 0.0,
335
+ "mask_mixture_original_prob": 0.0,
336
+ "mask_mixture_lowk_prob": 0.0,
337
+ "mask_mixture_lowcorrupt_prob": 0.0,
338
+ "mask_mixture_block_prob": 0.0,
339
+ "mask_mixture_all_prob": 1.0,
340
+ "mask_mixture_lowk_clean_tokens": "0",
341
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
342
+ "mask_mixture_block_tokens": "64,128",
343
+ "simplex_bridge_sampler": "dirichlet",
344
+ "logistic_normal_sigma_min": 0.1,
345
+ "logistic_normal_sigma_max": 1.0,
346
+ "logistic_normal_tau_min": 1.0,
347
+ "logistic_normal_tau_max": 1.0,
348
+ "torch_compile": false,
349
+ "compile_mode": "max-autotune",
350
+ "state_format": "prob",
351
+ "meanflow_weight": 0.0,
352
+ "rollout_train_prob": 0.5,
353
+ "rollout_train_steps": 4,
354
+ "rollout_train_steps_min": 0,
355
+ "rollout_train_infer_steps": 1,
356
+ "rollout_train_time_mode": "sampled_path",
357
+ "rollout_train_s_dist": "uniform",
358
+ "rollout_train_s_min_frac": 0.0,
359
+ "rollout_train_s_max_frac": 0.25,
360
+ "rollout_train_s_beta_alpha": 2.0,
361
+ "rollout_train_s_beta_beta": 6.0,
362
+ "rollout_train_temp": 1.0,
363
+ "rollout_train_max_gamma": 1.0,
364
+ "rollout_train_corrupt_only": true,
365
+ "rollout_train_samplewise": true,
366
+ "rollout_train_compute_always": false,
367
+ "rollout_train_sync_t": true,
368
+ "bridge_noise_init": "logistic_normal",
369
+ "noise_sigma": -1.0,
370
+ "allow_tf32": true,
371
+ "activation_checkpointing": false,
372
+ "activation_checkpoint_interval": 1,
373
+ "activation_checkpoint_scope": "block",
374
+ "ddp_static_graph": false,
375
+ "ddp_gradient_as_bucket_view": true,
376
+ "blocking_data_transfer": false,
377
+ "dataloader_prefetch_factor": 4,
378
+ "full_train_stats": false,
379
+ "tokenized_hf": false,
380
+ "tokenized_pad_token": "pad",
381
+ "elf_conditional_hf": false,
382
+ "record_pad_truncate": false,
383
+ "record_add_eos": false,
384
+ "record_add_special_tokens": false,
385
+ "record_pad_token": "pad",
386
+ "record_shuffle_buffer": 10000,
387
+ "wrap": true,
388
+ "wrap_mode": "stream",
389
+ "wrap_record_buffer_size": 200,
390
+ "owt_cached_chunks": true,
391
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/t5_len1024_train8_compact_overfit",
392
+ "owt_chunk_cache_rebuild": false,
393
+ "owt_chunk_cache_write_batch": 4096,
394
+ "owt_exact_repeat_per_chunk": 64,
395
+ "online_chunk_shuffle": false,
396
+ "online_chunk_shuffle_buffer": 10000,
397
+ "openwebtext_split": "train_minus_100k",
398
+ "detokenizer": "auto",
399
+ "resolved_detokenizer": null,
400
+ "num_workers": 0,
401
+ "latest_every": 1000,
402
+ "resume_path": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt"
403
+ }
404
+ step=1100 epoch=1100/2000 epoch_step=1/1 micro_steps=1100 elapsed=26.2s lr=2.000000e-03 loss=1.2344 loss_recon=1.2344 loss_meanflow=0.0000 mean_model_t=0.5013 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5002 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7302 corrupt_frac=1.0000 acc_corrupt=0.7302 loss_corrupt=1.2344 wrong_frac=0.4986 init_acc_corrupt=0.4980 acc_corrupt_t_0p0_0p2=0.0838 corrupt_frac_t_0p0_0p2=0.1952 acc_corrupt_t_0p2_0p4=0.6282 corrupt_frac_t_0p2_0p4=0.2063 acc_corrupt_t_0p4_0p6=0.9396 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.9896 corrupt_frac_t_0p6_0p8=0.1976 acc_corrupt_t_0p8_1p0=0.9987 corrupt_frac_t_0p8_1p0=0.2036 out_w_norm=12.2959 out_g_norm=0.5515 loss_all=0.9902 init_gold_top10=0.6298 init_gold_top100=0.7388 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.5386 init_acc_rollout_kept=0.4387 logit_acc_rollout_applied=0.7955 logit_acc_rollout_kept=0.7412
405
+ step=1200 epoch=1200/2000 epoch_step=1/1 micro_steps=1200 elapsed=25.3s lr=2.000000e-03 loss=1.0733 loss_recon=1.0733 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7618 corrupt_frac=1.0000 acc_corrupt=0.7618 loss_corrupt=1.0733 wrong_frac=0.5014 init_acc_corrupt=0.5039 acc_corrupt_t_0p0_0p2=0.1136 corrupt_frac_t_0p0_0p2=0.2037 acc_corrupt_t_0p2_0p4=0.7417 corrupt_frac_t_0p2_0p4=0.1982 acc_corrupt_t_0p4_0p6=0.9715 corrupt_frac_t_0p4_0p6=0.1956 acc_corrupt_t_0p6_0p8=0.9962 corrupt_frac_t_0p6_0p8=0.2056 acc_corrupt_t_0p8_1p0=0.9995 corrupt_frac_t_0p8_1p0=0.1969 out_w_norm=12.4860 out_g_norm=0.5688 loss_all=0.9003 init_gold_top10=0.6437 init_gold_top100=0.7551 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.6043 init_acc_rollout_kept=0.4286 logit_acc_rollout_applied=0.8600 logit_acc_rollout_kept=0.7247
406
+ step=1300 epoch=1300/2000 epoch_step=1/1 micro_steps=1300 elapsed=25.5s lr=2.000000e-03 loss=0.9386 loss_recon=0.9386 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.4953 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5102 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7883 corrupt_frac=1.0000 acc_corrupt=0.7883 loss_corrupt=0.9386 wrong_frac=0.5048 init_acc_corrupt=0.5082 acc_corrupt_t_0p0_0p2=0.1491 corrupt_frac_t_0p0_0p2=0.2036 acc_corrupt_t_0p2_0p4=0.8254 corrupt_frac_t_0p2_0p4=0.2030 acc_corrupt_t_0p4_0p6=0.9864 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=0.9985 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.9998 corrupt_frac_t_0p8_1p0=0.1934 out_w_norm=12.6527 out_g_norm=0.5836 loss_all=0.9096 init_gold_top10=0.6388 init_gold_top100=0.7579 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.5984 init_acc_rollout_kept=0.4573 logit_acc_rollout_applied=0.8193 logit_acc_rollout_kept=0.7818
407
+ step=1400 epoch=1400/2000 epoch_step=1/1 micro_steps=1400 elapsed=25.3s lr=2.000000e-03 loss=0.8326 loss_recon=0.8326 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4980 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8089 corrupt_frac=1.0000 acc_corrupt=0.8089 loss_corrupt=0.8326 wrong_frac=0.5019 init_acc_corrupt=0.5159 acc_corrupt_t_0p0_0p2=0.1867 corrupt_frac_t_0p0_0p2=0.2061 acc_corrupt_t_0p2_0p4=0.8874 corrupt_frac_t_0p2_0p4=0.1960 acc_corrupt_t_0p4_0p6=0.9936 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.9992 corrupt_frac_t_0p6_0p8=0.2020 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.1985 out_w_norm=12.7900 out_g_norm=0.6058 loss_all=0.9790 init_gold_top10=0.6331 init_gold_top100=0.7760 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5342 init_acc_rollout_kept=0.4870 logit_acc_rollout_applied=0.7610 logit_acc_rollout_kept=0.8135
408
+ step=1500 epoch=1500/2000 epoch_step=1/1 micro_steps=1500 elapsed=25.4s lr=2.000000e-03 loss=0.7346 loss_recon=0.7346 loss_meanflow=0.0000 mean_model_t=0.4998 mean_corrupt_t=0.4998 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8291 corrupt_frac=1.0000 acc_corrupt=0.8291 loss_corrupt=0.7346 wrong_frac=0.5002 init_acc_corrupt=0.5247 acc_corrupt_t_0p0_0p2=0.2215 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.9265 corrupt_frac_t_0p2_0p4=0.1971 acc_corrupt_t_0p4_0p6=0.9967 corrupt_frac_t_0p4_0p6=0.2008 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2020 out_w_norm=12.8940 out_g_norm=0.5915 loss_all=0.7378 init_gold_top10=0.6498 init_gold_top100=0.7620 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.5844 init_acc_rollout_kept=0.4937 logit_acc_rollout_applied=0.8150 logit_acc_rollout_kept=0.8507
409
+ step=1600 epoch=1600/2000 epoch_step=1/1 micro_steps=1600 elapsed=25.2s lr=2.000000e-03 loss=0.6538 loss_recon=0.6538 loss_meanflow=0.0000 mean_model_t=0.5009 mean_corrupt_t=0.5009 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4992 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8438 corrupt_frac=1.0000 acc_corrupt=0.8438 loss_corrupt=0.6538 wrong_frac=0.4992 init_acc_corrupt=0.5289 acc_corrupt_t_0p0_0p2=0.2541 corrupt_frac_t_0p0_0p2=0.1963 acc_corrupt_t_0p2_0p4=0.9533 corrupt_frac_t_0p2_0p4=0.2016 acc_corrupt_t_0p4_0p6=0.9982 corrupt_frac_t_0p4_0p6=0.2056 acc_corrupt_t_0p6_0p8=0.9997 corrupt_frac_t_0p6_0p8=0.1941 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2024 out_w_norm=12.9664 out_g_norm=0.5899 loss_all=0.6217 init_gold_top10=0.6633 init_gold_top100=0.7844 rollout_applied_pos_frac=0.5469 init_acc_rollout_applied=0.6083 init_acc_rollout_kept=0.4697 logit_acc_rollout_applied=0.8460 logit_acc_rollout_kept=0.8638
410
+ step=1700 epoch=1700/2000 epoch_step=1/1 micro_steps=1700 elapsed=25.3s lr=2.000000e-03 loss=0.6057 loss_recon=0.6057 loss_meanflow=0.0000 mean_model_t=0.5003 mean_corrupt_t=0.5003 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4918 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8511 corrupt_frac=1.0000 acc_corrupt=0.8511 loss_corrupt=0.6057 wrong_frac=0.4998 init_acc_corrupt=0.5276 acc_corrupt_t_0p0_0p2=0.2917 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.9667 corrupt_frac_t_0p2_0p4=0.1945 acc_corrupt_t_0p4_0p6=0.9989 corrupt_frac_t_0p4_0p6=0.2064 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.1962 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2023 out_w_norm=13.0069 out_g_norm=0.5721 loss_all=0.5100 init_gold_top10=0.6651 init_gold_top100=0.7683 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5736 init_acc_rollout_kept=0.5063 logit_acc_rollout_applied=0.8628 logit_acc_rollout_kept=0.8789
411
+ step=1800 epoch=1800/2000 epoch_step=1/1 micro_steps=1800 elapsed=25.3s lr=2.000000e-03 loss=0.5628 loss_recon=0.5628 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4990 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4960 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8568 corrupt_frac=1.0000 acc_corrupt=0.8568 loss_corrupt=0.5628 wrong_frac=0.5010 init_acc_corrupt=0.5297 acc_corrupt_t_0p0_0p2=0.3077 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.9789 corrupt_frac_t_0p2_0p4=0.2004 acc_corrupt_t_0p4_0p6=0.9993 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.2018 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.1982 out_w_norm=13.0385 out_g_norm=0.5779 loss_all=0.4891 init_gold_top10=0.6652 init_gold_top100=0.7587 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5679 init_acc_rollout_kept=0.4894 logit_acc_rollout_applied=0.8559 logit_acc_rollout_kept=0.9019
412
+ step=1900 epoch=1900/2000 epoch_step=1/1 micro_steps=1900 elapsed=25.5s lr=2.000000e-03 loss=0.5038 loss_recon=0.5038 loss_meanflow=0.0000 mean_model_t=0.5027 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8672 corrupt_frac=1.0000 acc_corrupt=0.8672 loss_corrupt=0.5038 wrong_frac=0.4975 init_acc_corrupt=0.5357 acc_corrupt_t_0p0_0p2=0.3506 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.9836 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.9994 corrupt_frac_t_0p4_0p6=0.1958 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2059 out_w_norm=13.0550 out_g_norm=0.5923 loss_all=0.5111 init_gold_top10=0.6586 init_gold_top100=0.7661 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5827 init_acc_rollout_kept=0.4748 logit_acc_rollout_applied=0.8607 logit_acc_rollout_kept=0.8673
413
+ step=2000 epoch=2000/2000 epoch_step=1/1 micro_steps=2000 elapsed=25.3s lr=2.000000e-03 loss=0.4619 loss_recon=0.4619 loss_meanflow=0.0000 mean_model_t=0.5005 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4931 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8734 corrupt_frac=1.0000 acc_corrupt=0.8734 loss_corrupt=0.4619 wrong_frac=0.4995 init_acc_corrupt=0.5314 acc_corrupt_t_0p0_0p2=0.3878 corrupt_frac_t_0p0_0p2=0.2029 acc_corrupt_t_0p2_0p4=0.9887 corrupt_frac_t_0p2_0p4=0.1965 acc_corrupt_t_0p4_0p6=0.9996 corrupt_frac_t_0p4_0p6=0.1988 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2028 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1990 out_w_norm=13.0738 out_g_norm=0.5136 loss_all=0.2064 init_gold_top10=0.7059 init_gold_top100=0.7647 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6530 init_acc_rollout_kept=0.5098 logit_acc_rollout_applied=0.9759 logit_acc_rollout_kept=0.9126
414
+ NCCL version 2.25.1+cuda12.8
415
+ resumed_from=runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt start_step=2001
416
+ {
417
+ "device": "cuda:0",
418
+ "rank": 0,
419
+ "world_size": 4,
420
+ "samples": "owt_cached_chunks:8",
421
+ "vocab_size": 2423,
422
+ "tokenizer_vocab_size": 32100,
423
+ "save_dir": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800",
424
+ "batch_size": 128,
425
+ "grad_accum": 1,
426
+ "effective_batch_size": 512,
427
+ "global_batch_size": 512,
428
+ "lr_schedule": "constant_warmup",
429
+ "optimizer": "muon",
430
+ "epochs": 0.0,
431
+ "steps_per_epoch": 1,
432
+ "total_steps": 3000,
433
+ "warmup_steps": 10,
434
+ "warmup_epochs": -1.0,
435
+ "min_lr": 0.0,
436
+ "weight_decay": 0.1,
437
+ "output_weight_decay": -1.0,
438
+ "adamw_param_groups": "nanogpt",
439
+ "adam_beta1": 0.9,
440
+ "adam_beta2": 0.95,
441
+ "adam_eps": 1e-08,
442
+ "muon_impl": "legacy",
443
+ "muon_momentum": 0.95,
444
+ "muon_ns_steps": 5,
445
+ "muon_update_scale": 1.0,
446
+ "muon_nesterov": false,
447
+ "muon_width_scale": false,
448
+ "muon_grouping": "legacy_dim_ge_2",
449
+ "muon_param_count": 2523776,
450
+ "muon_adam_param_count": 8192,
451
+ "muon_param_names": [
452
+ "vocab_embed.embedding",
453
+ "sigma_map.net.0.weight",
454
+ "sigma_map.net.2.weight",
455
+ "blocks.0.attn_qkv.weight",
456
+ "blocks.0.attn_out.weight",
457
+ "blocks.0.mlp.0.weight",
458
+ "blocks.0.mlp.2.weight",
459
+ "blocks.0.adaLN_modulation.weight",
460
+ "blocks.1.attn_qkv.weight",
461
+ "blocks.1.attn_out.weight",
462
+ "blocks.1.mlp.0.weight",
463
+ "blocks.1.mlp.2.weight",
464
+ "blocks.1.adaLN_modulation.weight",
465
+ "blocks.2.attn_qkv.weight",
466
+ "blocks.2.attn_out.weight",
467
+ "blocks.2.mlp.0.weight",
468
+ "blocks.2.mlp.2.weight",
469
+ "blocks.2.adaLN_modulation.weight",
470
+ "output_layer.linear.weight",
471
+ "output_layer.adaLN_modulation.weight"
472
+ ],
473
+ "muon_adam_param_names": [
474
+ "sigma_map.net.0.bias",
475
+ "sigma_map.net.2.bias",
476
+ "blocks.0.norm1.weight",
477
+ "blocks.0.norm2.weight",
478
+ "blocks.0.mlp.0.bias",
479
+ "blocks.0.mlp.2.bias",
480
+ "blocks.0.adaLN_modulation.bias",
481
+ "blocks.1.norm1.weight",
482
+ "blocks.1.norm2.weight",
483
+ "blocks.1.mlp.0.bias",
484
+ "blocks.1.mlp.2.bias",
485
+ "blocks.1.adaLN_modulation.bias",
486
+ "blocks.2.norm1.weight",
487
+ "blocks.2.norm2.weight",
488
+ "blocks.2.mlp.0.bias",
489
+ "blocks.2.mlp.2.bias",
490
+ "blocks.2.adaLN_modulation.bias",
491
+ "output_layer.norm_final.weight",
492
+ "output_layer.adaLN_modulation.bias"
493
+ ],
494
+ "muon_effective_nesterov": false,
495
+ "muon_effective_width_scale": false,
496
+ "muon_effective_weight_decay": 0.1,
497
+ "muon_adam_fallback_nesterov": false,
498
+ "muon_adam_fallback_weight_decay": 0.1,
499
+ "ema_decay": 0.9999,
500
+ "ema_start_step": 0,
501
+ "model_type": "ddit",
502
+ "ddit_mlp_type": "gelu",
503
+ "elf_num_time_tokens": 4,
504
+ "elf_num_model_mode_tokens": 0,
505
+ "qk_norm": true,
506
+ "output_bias": false,
507
+ "output_init_std": -1.0,
508
+ "norm_type": "rmsnorm",
509
+ "target_loss": "hard_ce",
510
+ "linear_soft_target_power": 1.0,
511
+ "linear_soft_target_min_conf": 0.0,
512
+ "linear_soft_target_max_conf": 1.0,
513
+ "t_sampling_mode": "uniform",
514
+ "t_sampling_power": 1.0,
515
+ "t_sampling_eps": 0.0001,
516
+ "t_sampling_logit_mean": -1.5,
517
+ "t_sampling_logit_std": 0.8,
518
+ "dual_t": true,
519
+ "corrupt_t_mode": "same",
520
+ "corrupt_min_t": 0.0,
521
+ "corrupt_max_t": 1.0,
522
+ "prefix_block_prob": 0.0,
523
+ "prefix_block_len": 128,
524
+ "mask_ratio_floor_schedule": "none",
525
+ "dirichlet_endpoint_mode": "categorical_dual_t",
526
+ "dirichlet_semantic_t_mode": "same",
527
+ "dirichlet_semantic_t_value": 0.0,
528
+ "dirichlet_semantic_t_curve": "linear",
529
+ "dirichlet_semantic_t_power": 1.0,
530
+ "endpoint_sequence_random_prob_alpha": 0.0,
531
+ "categorical_wrong_from_full_vocab": true,
532
+ "categorical_wrong_from_batch_valid_tokens": false,
533
+ "categorical_wrong_basin_token_ids": "",
534
+ "categorical_wrong_basin_prob": 0.0,
535
+ "categorical_wrong_unigram_prob": 0.0,
536
+ "categorical_wrong_uniform_prob": 0.0,
537
+ "categorical_wrong_prob_floor": 0.0,
538
+ "categorical_wrong_corpus_unigram_path": "",
539
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
540
+ "categorical_wrong_basin_shared_prob": 0.0,
541
+ "categorical_wrong_unigram_shared_prob": 0.0,
542
+ "mask_mixture_original_prob": 0.0,
543
+ "mask_mixture_lowk_prob": 0.0,
544
+ "mask_mixture_lowcorrupt_prob": 0.0,
545
+ "mask_mixture_block_prob": 0.0,
546
+ "mask_mixture_all_prob": 1.0,
547
+ "mask_mixture_lowk_clean_tokens": "0",
548
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
549
+ "mask_mixture_block_tokens": "64,128",
550
+ "simplex_bridge_sampler": "dirichlet",
551
+ "logistic_normal_sigma_min": 0.1,
552
+ "logistic_normal_sigma_max": 1.0,
553
+ "logistic_normal_tau_min": 1.0,
554
+ "logistic_normal_tau_max": 1.0,
555
+ "torch_compile": false,
556
+ "compile_mode": "max-autotune",
557
+ "state_format": "prob",
558
+ "meanflow_weight": 0.0,
559
+ "rollout_train_prob": 0.5,
560
+ "rollout_train_steps": 4,
561
+ "rollout_train_steps_min": 0,
562
+ "rollout_train_infer_steps": 1,
563
+ "rollout_train_time_mode": "sampled_path",
564
+ "rollout_train_s_dist": "uniform",
565
+ "rollout_train_s_min_frac": 0.0,
566
+ "rollout_train_s_max_frac": 0.25,
567
+ "rollout_train_s_beta_alpha": 2.0,
568
+ "rollout_train_s_beta_beta": 6.0,
569
+ "rollout_train_temp": 1.0,
570
+ "rollout_train_max_gamma": 1.0,
571
+ "rollout_train_corrupt_only": true,
572
+ "rollout_train_samplewise": true,
573
+ "rollout_train_compute_always": false,
574
+ "rollout_train_sync_t": true,
575
+ "bridge_noise_init": "logistic_normal",
576
+ "noise_sigma": -1.0,
577
+ "allow_tf32": true,
578
+ "activation_checkpointing": false,
579
+ "activation_checkpoint_interval": 1,
580
+ "activation_checkpoint_scope": "block",
581
+ "ddp_static_graph": false,
582
+ "ddp_gradient_as_bucket_view": true,
583
+ "blocking_data_transfer": false,
584
+ "dataloader_prefetch_factor": 4,
585
+ "full_train_stats": false,
586
+ "tokenized_hf": false,
587
+ "tokenized_pad_token": "pad",
588
+ "elf_conditional_hf": false,
589
+ "record_pad_truncate": false,
590
+ "record_add_eos": false,
591
+ "record_add_special_tokens": false,
592
+ "record_pad_token": "pad",
593
+ "record_shuffle_buffer": 10000,
594
+ "wrap": true,
595
+ "wrap_mode": "stream",
596
+ "wrap_record_buffer_size": 200,
597
+ "owt_cached_chunks": true,
598
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/t5_len1024_train8_compact_overfit",
599
+ "owt_chunk_cache_rebuild": false,
600
+ "owt_chunk_cache_write_batch": 4096,
601
+ "owt_exact_repeat_per_chunk": 64,
602
+ "online_chunk_shuffle": false,
603
+ "online_chunk_shuffle_buffer": 10000,
604
+ "openwebtext_split": "train_minus_100k",
605
+ "detokenizer": "auto",
606
+ "resolved_detokenizer": null,
607
+ "num_workers": 0,
608
+ "latest_every": 1000,
609
+ "resume_path": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt"
610
+ }
611
+ step=2100 epoch=2100/3000 epoch_step=1/1 micro_steps=2100 elapsed=26.2s lr=2.000000e-03 loss=0.4190 loss_recon=0.4190 loss_meanflow=0.0000 mean_model_t=0.5013 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5002 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8812 corrupt_frac=1.0000 acc_corrupt=0.8812 loss_corrupt=0.4190 wrong_frac=0.4986 init_acc_corrupt=0.5341 acc_corrupt_t_0p0_0p2=0.4008 corrupt_frac_t_0p0_0p2=0.1952 acc_corrupt_t_0p2_0p4=0.9915 corrupt_frac_t_0p2_0p4=0.2063 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1976 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2036 out_w_norm=13.0937 out_g_norm=0.5156 loss_all=0.3296 init_gold_top10=0.6719 init_gold_top100=0.7541 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6163 init_acc_rollout_kept=0.4387 logit_acc_rollout_applied=0.9030 logit_acc_rollout_kept=0.9081
612
+ step=2200 epoch=2200/3000 epoch_step=1/1 micro_steps=2200 elapsed=25.3s lr=2.000000e-03 loss=0.4020 loss_recon=0.4020 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8834 corrupt_frac=1.0000 acc_corrupt=0.8834 loss_corrupt=0.4020 wrong_frac=0.5014 init_acc_corrupt=0.5317 acc_corrupt_t_0p0_0p2=0.4348 corrupt_frac_t_0p0_0p2=0.2037 acc_corrupt_t_0p2_0p4=0.9932 corrupt_frac_t_0p2_0p4=0.1982 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.1956 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2056 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1969 out_w_norm=13.0841 out_g_norm=0.5157 loss_all=0.3613 init_gold_top10=0.6713 init_gold_top100=0.7618 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.6304 init_acc_rollout_kept=0.4286 logit_acc_rollout_applied=0.9367 logit_acc_rollout_kept=0.8423
613
+ step=2300 epoch=2300/3000 epoch_step=1/1 micro_steps=2300 elapsed=25.5s lr=2.000000e-03 loss=0.3683 loss_recon=0.3683 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.4953 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5102 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8895 corrupt_frac=1.0000 acc_corrupt=0.8895 loss_corrupt=0.3683 wrong_frac=0.5048 init_acc_corrupt=0.5306 acc_corrupt_t_0p0_0p2=0.4633 corrupt_frac_t_0p0_0p2=0.2036 acc_corrupt_t_0p2_0p4=0.9942 corrupt_frac_t_0p2_0p4=0.2030 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1934 out_w_norm=13.0576 out_g_norm=0.5447 loss_all=0.3975 init_gold_top10=0.6742 init_gold_top100=0.7632 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6516 init_acc_rollout_kept=0.4573 logit_acc_rollout_applied=0.8970 logit_acc_rollout_kept=0.8564
614
+ step=2400 epoch=2400/3000 epoch_step=1/1 micro_steps=2400 elapsed=25.3s lr=2.000000e-03 loss=0.3434 loss_recon=0.3434 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4980 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8949 corrupt_frac=1.0000 acc_corrupt=0.8949 loss_corrupt=0.3434 wrong_frac=0.5019 init_acc_corrupt=0.5332 acc_corrupt_t_0p0_0p2=0.4940 corrupt_frac_t_0p0_0p2=0.2061 acc_corrupt_t_0p2_0p4=0.9960 corrupt_frac_t_0p2_0p4=0.1960 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2020 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1985 out_w_norm=13.0340 out_g_norm=0.5392 loss_all=0.4300 init_gold_top10=0.6882 init_gold_top100=0.7822 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5630 init_acc_rollout_kept=0.4870 logit_acc_rollout_applied=0.8407 logit_acc_rollout_kept=0.8916
615
+ step=2500 epoch=2500/3000 epoch_step=1/1 micro_steps=2500 elapsed=25.4s lr=2.000000e-03 loss=0.3163 loss_recon=0.3163 loss_meanflow=0.0000 mean_model_t=0.4998 mean_corrupt_t=0.4998 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8987 corrupt_frac=1.0000 acc_corrupt=0.8987 loss_corrupt=0.3163 wrong_frac=0.5002 init_acc_corrupt=0.5391 acc_corrupt_t_0p0_0p2=0.4964 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.9967 corrupt_frac_t_0p2_0p4=0.1971 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.2008 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2020 out_w_norm=12.9838 out_g_norm=0.4663 loss_all=0.4043 init_gold_top10=0.6901 init_gold_top100=0.7652 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.6117 init_acc_rollout_kept=0.4937 logit_acc_rollout_applied=0.8817 logit_acc_rollout_kept=0.9003
616
+ step=2600 epoch=2600/3000 epoch_step=1/1 micro_steps=2600 elapsed=25.3s lr=2.000000e-03 loss=0.2946 loss_recon=0.2946 loss_meanflow=0.0000 mean_model_t=0.5009 mean_corrupt_t=0.5009 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4992 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9046 corrupt_frac=1.0000 acc_corrupt=0.9046 loss_corrupt=0.2946 wrong_frac=0.4992 init_acc_corrupt=0.5390 acc_corrupt_t_0p0_0p2=0.5172 corrupt_frac_t_0p0_0p2=0.1963 acc_corrupt_t_0p2_0p4=0.9971 corrupt_frac_t_0p2_0p4=0.2016 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.2056 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1941 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2024 out_w_norm=12.9131 out_g_norm=0.4678 loss_all=0.2708 init_gold_top10=0.7101 init_gold_top100=0.7854 rollout_applied_pos_frac=0.5469 init_acc_rollout_applied=0.6213 init_acc_rollout_kept=0.4697 logit_acc_rollout_applied=0.8978 logit_acc_rollout_kept=0.9240
617
+ step=2700 epoch=2700/3000 epoch_step=1/1 micro_steps=2700 elapsed=25.3s lr=2.000000e-03 loss=0.2811 loss_recon=0.2811 loss_meanflow=0.0000 mean_model_t=0.5003 mean_corrupt_t=0.5003 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4918 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9079 corrupt_frac=1.0000 acc_corrupt=0.9079 loss_corrupt=0.2811 wrong_frac=0.4998 init_acc_corrupt=0.5370 acc_corrupt_t_0p0_0p2=0.5432 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.9978 corrupt_frac_t_0p2_0p4=0.1945 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.2064 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1962 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2023 out_w_norm=12.8539 out_g_norm=0.4151 loss_all=0.2373 init_gold_top10=0.6927 init_gold_top100=0.7685 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5863 init_acc_rollout_kept=0.5063 logit_acc_rollout_applied=0.8911 logit_acc_rollout_kept=0.9464
618
+ step=2800 epoch=2800/3000 epoch_step=1/1 micro_steps=2800 elapsed=25.3s lr=2.000000e-03 loss=0.2840 loss_recon=0.2840 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4990 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4960 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9050 corrupt_frac=1.0000 acc_corrupt=0.9050 loss_corrupt=0.2840 wrong_frac=0.5010 init_acc_corrupt=0.5367 acc_corrupt_t_0p0_0p2=0.5279 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.9985 corrupt_frac_t_0p2_0p4=0.2004 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2018 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1982 out_w_norm=12.7871 out_g_norm=0.3989 loss_all=0.2110 init_gold_top10=0.6883 init_gold_top100=0.7588 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5831 init_acc_rollout_kept=0.4894 logit_acc_rollout_applied=0.9010 logit_acc_rollout_kept=0.9581
619
+ step=2900 epoch=2900/3000 epoch_step=1/1 micro_steps=2900 elapsed=25.5s lr=2.000000e-03 loss=0.2628 loss_recon=0.2628 loss_meanflow=0.0000 mean_model_t=0.5027 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9106 corrupt_frac=1.0000 acc_corrupt=0.9106 loss_corrupt=0.2628 wrong_frac=0.4975 init_acc_corrupt=0.5428 acc_corrupt_t_0p0_0p2=0.5528 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.9986 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1958 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2059 out_w_norm=12.7316 out_g_norm=0.3606 loss_all=0.3097 init_gold_top10=0.6875 init_gold_top100=0.7661 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5933 init_acc_rollout_kept=0.4748 logit_acc_rollout_applied=0.8714 logit_acc_rollout_kept=0.9071
620
+ step=3000 epoch=3000/3000 epoch_step=1/1 micro_steps=3000 elapsed=25.3s lr=2.000000e-03 loss=0.2591 loss_recon=0.2591 loss_meanflow=0.0000 mean_model_t=0.5005 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4931 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9125 corrupt_frac=1.0000 acc_corrupt=0.9125 loss_corrupt=0.2591 wrong_frac=0.4995 init_acc_corrupt=0.5377 acc_corrupt_t_0p0_0p2=0.5697 corrupt_frac_t_0p0_0p2=0.2029 acc_corrupt_t_0p2_0p4=0.9991 corrupt_frac_t_0p2_0p4=0.1965 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1988 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2028 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1990 out_w_norm=12.6787 out_g_norm=0.3601 loss_all=0.1232 init_gold_top10=0.7090 init_gold_top100=0.7647 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6620 init_acc_rollout_kept=0.5098 logit_acc_rollout_applied=0.9970 logit_acc_rollout_kept=0.9235
621
+ NCCL version 2.25.1+cuda12.8
622
+ resumed_from=runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt start_step=3001
623
+ {
624
+ "device": "cuda:0",
625
+ "rank": 0,
626
+ "world_size": 4,
627
+ "samples": "owt_cached_chunks:8",
628
+ "vocab_size": 2423,
629
+ "tokenizer_vocab_size": 32100,
630
+ "save_dir": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800",
631
+ "batch_size": 128,
632
+ "grad_accum": 1,
633
+ "effective_batch_size": 512,
634
+ "global_batch_size": 512,
635
+ "lr_schedule": "constant_warmup",
636
+ "optimizer": "muon",
637
+ "epochs": 0.0,
638
+ "steps_per_epoch": 1,
639
+ "total_steps": 4000,
640
+ "warmup_steps": 10,
641
+ "warmup_epochs": -1.0,
642
+ "min_lr": 0.0,
643
+ "weight_decay": 0.1,
644
+ "output_weight_decay": -1.0,
645
+ "adamw_param_groups": "nanogpt",
646
+ "adam_beta1": 0.9,
647
+ "adam_beta2": 0.95,
648
+ "adam_eps": 1e-08,
649
+ "muon_impl": "legacy",
650
+ "muon_momentum": 0.95,
651
+ "muon_ns_steps": 5,
652
+ "muon_update_scale": 1.0,
653
+ "muon_nesterov": false,
654
+ "muon_width_scale": false,
655
+ "muon_grouping": "legacy_dim_ge_2",
656
+ "muon_param_count": 2523776,
657
+ "muon_adam_param_count": 8192,
658
+ "muon_param_names": [
659
+ "vocab_embed.embedding",
660
+ "sigma_map.net.0.weight",
661
+ "sigma_map.net.2.weight",
662
+ "blocks.0.attn_qkv.weight",
663
+ "blocks.0.attn_out.weight",
664
+ "blocks.0.mlp.0.weight",
665
+ "blocks.0.mlp.2.weight",
666
+ "blocks.0.adaLN_modulation.weight",
667
+ "blocks.1.attn_qkv.weight",
668
+ "blocks.1.attn_out.weight",
669
+ "blocks.1.mlp.0.weight",
670
+ "blocks.1.mlp.2.weight",
671
+ "blocks.1.adaLN_modulation.weight",
672
+ "blocks.2.attn_qkv.weight",
673
+ "blocks.2.attn_out.weight",
674
+ "blocks.2.mlp.0.weight",
675
+ "blocks.2.mlp.2.weight",
676
+ "blocks.2.adaLN_modulation.weight",
677
+ "output_layer.linear.weight",
678
+ "output_layer.adaLN_modulation.weight"
679
+ ],
680
+ "muon_adam_param_names": [
681
+ "sigma_map.net.0.bias",
682
+ "sigma_map.net.2.bias",
683
+ "blocks.0.norm1.weight",
684
+ "blocks.0.norm2.weight",
685
+ "blocks.0.mlp.0.bias",
686
+ "blocks.0.mlp.2.bias",
687
+ "blocks.0.adaLN_modulation.bias",
688
+ "blocks.1.norm1.weight",
689
+ "blocks.1.norm2.weight",
690
+ "blocks.1.mlp.0.bias",
691
+ "blocks.1.mlp.2.bias",
692
+ "blocks.1.adaLN_modulation.bias",
693
+ "blocks.2.norm1.weight",
694
+ "blocks.2.norm2.weight",
695
+ "blocks.2.mlp.0.bias",
696
+ "blocks.2.mlp.2.bias",
697
+ "blocks.2.adaLN_modulation.bias",
698
+ "output_layer.norm_final.weight",
699
+ "output_layer.adaLN_modulation.bias"
700
+ ],
701
+ "muon_effective_nesterov": false,
702
+ "muon_effective_width_scale": false,
703
+ "muon_effective_weight_decay": 0.1,
704
+ "muon_adam_fallback_nesterov": false,
705
+ "muon_adam_fallback_weight_decay": 0.1,
706
+ "ema_decay": 0.9999,
707
+ "ema_start_step": 0,
708
+ "model_type": "ddit",
709
+ "ddit_mlp_type": "gelu",
710
+ "elf_num_time_tokens": 4,
711
+ "elf_num_model_mode_tokens": 0,
712
+ "qk_norm": true,
713
+ "output_bias": false,
714
+ "output_init_std": -1.0,
715
+ "norm_type": "rmsnorm",
716
+ "target_loss": "hard_ce",
717
+ "linear_soft_target_power": 1.0,
718
+ "linear_soft_target_min_conf": 0.0,
719
+ "linear_soft_target_max_conf": 1.0,
720
+ "t_sampling_mode": "uniform",
721
+ "t_sampling_power": 1.0,
722
+ "t_sampling_eps": 0.0001,
723
+ "t_sampling_logit_mean": -1.5,
724
+ "t_sampling_logit_std": 0.8,
725
+ "dual_t": true,
726
+ "corrupt_t_mode": "same",
727
+ "corrupt_min_t": 0.0,
728
+ "corrupt_max_t": 1.0,
729
+ "prefix_block_prob": 0.0,
730
+ "prefix_block_len": 128,
731
+ "mask_ratio_floor_schedule": "none",
732
+ "dirichlet_endpoint_mode": "categorical_dual_t",
733
+ "dirichlet_semantic_t_mode": "same",
734
+ "dirichlet_semantic_t_value": 0.0,
735
+ "dirichlet_semantic_t_curve": "linear",
736
+ "dirichlet_semantic_t_power": 1.0,
737
+ "endpoint_sequence_random_prob_alpha": 0.0,
738
+ "categorical_wrong_from_full_vocab": true,
739
+ "categorical_wrong_from_batch_valid_tokens": false,
740
+ "categorical_wrong_basin_token_ids": "",
741
+ "categorical_wrong_basin_prob": 0.0,
742
+ "categorical_wrong_unigram_prob": 0.0,
743
+ "categorical_wrong_uniform_prob": 0.0,
744
+ "categorical_wrong_prob_floor": 0.0,
745
+ "categorical_wrong_corpus_unigram_path": "",
746
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
747
+ "categorical_wrong_basin_shared_prob": 0.0,
748
+ "categorical_wrong_unigram_shared_prob": 0.0,
749
+ "mask_mixture_original_prob": 0.0,
750
+ "mask_mixture_lowk_prob": 0.0,
751
+ "mask_mixture_lowcorrupt_prob": 0.0,
752
+ "mask_mixture_block_prob": 0.0,
753
+ "mask_mixture_all_prob": 1.0,
754
+ "mask_mixture_lowk_clean_tokens": "0",
755
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
756
+ "mask_mixture_block_tokens": "64,128",
757
+ "simplex_bridge_sampler": "dirichlet",
758
+ "logistic_normal_sigma_min": 0.1,
759
+ "logistic_normal_sigma_max": 1.0,
760
+ "logistic_normal_tau_min": 1.0,
761
+ "logistic_normal_tau_max": 1.0,
762
+ "torch_compile": false,
763
+ "compile_mode": "max-autotune",
764
+ "state_format": "prob",
765
+ "meanflow_weight": 0.0,
766
+ "rollout_train_prob": 0.5,
767
+ "rollout_train_steps": 4,
768
+ "rollout_train_steps_min": 0,
769
+ "rollout_train_infer_steps": 1,
770
+ "rollout_train_time_mode": "sampled_path",
771
+ "rollout_train_s_dist": "uniform",
772
+ "rollout_train_s_min_frac": 0.0,
773
+ "rollout_train_s_max_frac": 0.25,
774
+ "rollout_train_s_beta_alpha": 2.0,
775
+ "rollout_train_s_beta_beta": 6.0,
776
+ "rollout_train_temp": 1.0,
777
+ "rollout_train_max_gamma": 1.0,
778
+ "rollout_train_corrupt_only": true,
779
+ "rollout_train_samplewise": true,
780
+ "rollout_train_compute_always": false,
781
+ "rollout_train_sync_t": true,
782
+ "bridge_noise_init": "logistic_normal",
783
+ "noise_sigma": -1.0,
784
+ "allow_tf32": true,
785
+ "activation_checkpointing": false,
786
+ "activation_checkpoint_interval": 1,
787
+ "activation_checkpoint_scope": "block",
788
+ "ddp_static_graph": false,
789
+ "ddp_gradient_as_bucket_view": true,
790
+ "blocking_data_transfer": false,
791
+ "dataloader_prefetch_factor": 4,
792
+ "full_train_stats": false,
793
+ "tokenized_hf": false,
794
+ "tokenized_pad_token": "pad",
795
+ "elf_conditional_hf": false,
796
+ "record_pad_truncate": false,
797
+ "record_add_eos": false,
798
+ "record_add_special_tokens": false,
799
+ "record_pad_token": "pad",
800
+ "record_shuffle_buffer": 10000,
801
+ "wrap": true,
802
+ "wrap_mode": "stream",
803
+ "wrap_record_buffer_size": 200,
804
+ "owt_cached_chunks": true,
805
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/t5_len1024_train8_compact_overfit",
806
+ "owt_chunk_cache_rebuild": false,
807
+ "owt_chunk_cache_write_batch": 4096,
808
+ "owt_exact_repeat_per_chunk": 64,
809
+ "online_chunk_shuffle": false,
810
+ "online_chunk_shuffle_buffer": 10000,
811
+ "openwebtext_split": "train_minus_100k",
812
+ "detokenizer": "auto",
813
+ "resolved_detokenizer": null,
814
+ "num_workers": 0,
815
+ "latest_every": 1000,
816
+ "resume_path": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt"
817
+ }
818
+ step=3100 epoch=3100/4000 epoch_step=1/1 micro_steps=3100 elapsed=26.3s lr=2.000000e-03 loss=0.2585 loss_recon=0.2585 loss_meanflow=0.0000 mean_model_t=0.5013 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5002 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9118 corrupt_frac=1.0000 acc_corrupt=0.9118 loss_corrupt=0.2585 wrong_frac=0.4986 init_acc_corrupt=0.5384 acc_corrupt_t_0p0_0p2=0.5490 corrupt_frac_t_0p0_0p2=0.1952 acc_corrupt_t_0p2_0p4=0.9993 corrupt_frac_t_0p2_0p4=0.2063 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.1976 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2036 out_w_norm=12.6446 out_g_norm=0.3371 loss_all=0.1788 init_gold_top10=0.6953 init_gold_top100=0.7541 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6225 init_acc_rollout_kept=0.4387 logit_acc_rollout_applied=0.9237 logit_acc_rollout_kept=0.9519
819
+ step=3200 epoch=3200/4000 epoch_step=1/1 micro_steps=3200 elapsed=25.6s lr=2.000000e-03 loss=0.2490 loss_recon=0.2490 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9133 corrupt_frac=1.0000 acc_corrupt=0.9133 loss_corrupt=0.2490 wrong_frac=0.5014 init_acc_corrupt=0.5363 acc_corrupt_t_0p0_0p2=0.5749 corrupt_frac_t_0p0_0p2=0.2037 acc_corrupt_t_0p2_0p4=0.9994 corrupt_frac_t_0p2_0p4=0.1982 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1956 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2056 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1969 out_w_norm=12.6089 out_g_norm=0.2866 loss_all=0.2228 init_gold_top10=0.6804 init_gold_top100=0.7618 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.6477 init_acc_rollout_kept=0.4286 logit_acc_rollout_applied=0.9706 logit_acc_rollout_kept=0.8768
820
+ step=3300 epoch=3300/4000 epoch_step=1/1 micro_steps=3300 elapsed=25.7s lr=2.000000e-03 loss=0.2485 loss_recon=0.2485 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.4953 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5102 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9144 corrupt_frac=1.0000 acc_corrupt=0.9144 loss_corrupt=0.2485 wrong_frac=0.5048 init_acc_corrupt=0.5349 acc_corrupt_t_0p0_0p2=0.5806 corrupt_frac_t_0p0_0p2=0.2036 acc_corrupt_t_0p2_0p4=0.9991 corrupt_frac_t_0p2_0p4=0.2030 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1934 out_w_norm=12.5783 out_g_norm=0.3324 loss_all=0.2909 init_gold_top10=0.6843 init_gold_top100=0.7632 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6723 init_acc_rollout_kept=0.4573 logit_acc_rollout_applied=0.9145 logit_acc_rollout_kept=0.8771
821
+ step=3400 epoch=3400/4000 epoch_step=1/1 micro_steps=3400 elapsed=25.5s lr=2.000000e-03 loss=0.2423 loss_recon=0.2423 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4980 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9170 corrupt_frac=1.0000 acc_corrupt=0.9170 loss_corrupt=0.2423 wrong_frac=0.5019 init_acc_corrupt=0.5367 acc_corrupt_t_0p0_0p2=0.5984 corrupt_frac_t_0p0_0p2=0.2061 acc_corrupt_t_0p2_0p4=0.9990 corrupt_frac_t_0p2_0p4=0.1960 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2020 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1985 out_w_norm=12.5616 out_g_norm=0.2825 loss_all=0.3823 init_gold_top10=0.7051 init_gold_top100=0.7822 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5722 init_acc_rollout_kept=0.4870 logit_acc_rollout_applied=0.8564 logit_acc_rollout_kept=0.9133
822
+ step=3500 epoch=3500/4000 epoch_step=1/1 micro_steps=3500 elapsed=25.5s lr=2.000000e-03 loss=0.2440 loss_recon=0.2440 loss_meanflow=0.0000 mean_model_t=0.4998 mean_corrupt_t=0.4998 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9168 corrupt_frac=1.0000 acc_corrupt=0.9168 loss_corrupt=0.2440 wrong_frac=0.5002 init_acc_corrupt=0.5414 acc_corrupt_t_0p0_0p2=0.5851 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.9987 corrupt_frac_t_0p2_0p4=0.1971 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2008 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2020 out_w_norm=12.5491 out_g_norm=0.3061 loss_all=0.3157 init_gold_top10=0.7029 init_gold_top100=0.7652 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.6169 init_acc_rollout_kept=0.4937 logit_acc_rollout_applied=0.8954 logit_acc_rollout_kept=0.9080
823
+ step=3600 epoch=3600/4000 epoch_step=1/1 micro_steps=3600 elapsed=25.3s lr=2.000000e-03 loss=0.2269 loss_recon=0.2269 loss_meanflow=0.0000 mean_model_t=0.5009 mean_corrupt_t=0.5009 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4992 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9208 corrupt_frac=1.0000 acc_corrupt=0.9208 loss_corrupt=0.2269 wrong_frac=0.4992 init_acc_corrupt=0.5419 acc_corrupt_t_0p0_0p2=0.5973 corrupt_frac_t_0p0_0p2=0.1963 acc_corrupt_t_0p2_0p4=0.9994 corrupt_frac_t_0p2_0p4=0.2016 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2056 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.1941 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2024 out_w_norm=12.5320 out_g_norm=0.2724 loss_all=0.2221 init_gold_top10=0.7132 init_gold_top100=0.7854 rollout_applied_pos_frac=0.5469 init_acc_rollout_applied=0.6239 init_acc_rollout_kept=0.4697 logit_acc_rollout_applied=0.9083 logit_acc_rollout_kept=0.9323
824
+ step=3700 epoch=3700/4000 epoch_step=1/1 micro_steps=3700 elapsed=25.3s lr=2.000000e-03 loss=0.2286 loss_recon=0.2286 loss_meanflow=0.0000 mean_model_t=0.5003 mean_corrupt_t=0.5003 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4918 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9222 corrupt_frac=1.0000 acc_corrupt=0.9222 loss_corrupt=0.2286 wrong_frac=0.4998 init_acc_corrupt=0.5391 acc_corrupt_t_0p0_0p2=0.6131 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.9992 corrupt_frac_t_0p2_0p4=0.1945 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.2064 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1962 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2023 out_w_norm=12.5313 out_g_norm=0.2704 loss_all=0.1737 init_gold_top10=0.7026 init_gold_top100=0.7685 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6008 init_acc_rollout_kept=0.5063 logit_acc_rollout_applied=0.9206 logit_acc_rollout_kept=0.9552
825
+ step=3800 epoch=3800/4000 epoch_step=1/1 micro_steps=3800 elapsed=25.3s lr=2.000000e-03 loss=0.2299 loss_recon=0.2299 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4990 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4960 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9201 corrupt_frac=1.0000 acc_corrupt=0.9201 loss_corrupt=0.2299 wrong_frac=0.5010 init_acc_corrupt=0.5390 acc_corrupt_t_0p0_0p2=0.6021 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.2004 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2018 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1982 out_w_norm=12.5312 out_g_norm=0.2116 loss_all=0.1417 init_gold_top10=0.6969 init_gold_top100=0.7588 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5894 init_acc_rollout_kept=0.4894 logit_acc_rollout_applied=0.9411 logit_acc_rollout_kept=0.9625
826
+ step=3900 epoch=3900/4000 epoch_step=1/1 micro_steps=3900 elapsed=25.5s lr=2.000000e-03 loss=0.2197 loss_recon=0.2197 loss_meanflow=0.0000 mean_model_t=0.5027 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9244 corrupt_frac=1.0000 acc_corrupt=0.9244 loss_corrupt=0.2197 wrong_frac=0.4975 init_acc_corrupt=0.5450 acc_corrupt_t_0p0_0p2=0.6217 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.9991 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.1958 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2059 out_w_norm=12.5317 out_g_norm=0.2390 loss_all=0.3058 init_gold_top10=0.6858 init_gold_top100=0.7661 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5908 init_acc_rollout_kept=0.4748 logit_acc_rollout_applied=0.8692 logit_acc_rollout_kept=0.9259
827
+ step=4000 epoch=4000/4000 epoch_step=1/1 micro_steps=4000 elapsed=25.4s lr=2.000000e-03 loss=0.2222 loss_recon=0.2222 loss_meanflow=0.0000 mean_model_t=0.5005 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4931 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9236 corrupt_frac=1.0000 acc_corrupt=0.9236 loss_corrupt=0.2222 wrong_frac=0.4995 init_acc_corrupt=0.5391 acc_corrupt_t_0p0_0p2=0.6240 corrupt_frac_t_0p0_0p2=0.2029 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.1965 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1988 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2028 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1990 out_w_norm=12.5445 out_g_norm=0.2244 loss_all=0.0821 init_gold_top10=0.7090 init_gold_top100=0.7647 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6604 init_acc_rollout_kept=0.5098 logit_acc_rollout_applied=0.9968 logit_acc_rollout_kept=0.9536
828
+ NCCL version 2.25.1+cuda12.8
829
+ resumed_from=runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt start_step=4001
830
+ {
831
+ "device": "cuda:0",
832
+ "rank": 0,
833
+ "world_size": 4,
834
+ "samples": "owt_cached_chunks:8",
835
+ "vocab_size": 2423,
836
+ "tokenizer_vocab_size": 32100,
837
+ "save_dir": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800",
838
+ "batch_size": 128,
839
+ "grad_accum": 1,
840
+ "effective_batch_size": 512,
841
+ "global_batch_size": 512,
842
+ "lr_schedule": "constant_warmup",
843
+ "optimizer": "muon",
844
+ "epochs": 0.0,
845
+ "steps_per_epoch": 1,
846
+ "total_steps": 5000,
847
+ "warmup_steps": 10,
848
+ "warmup_epochs": -1.0,
849
+ "min_lr": 0.0,
850
+ "weight_decay": 0.1,
851
+ "output_weight_decay": -1.0,
852
+ "adamw_param_groups": "nanogpt",
853
+ "adam_beta1": 0.9,
854
+ "adam_beta2": 0.95,
855
+ "adam_eps": 1e-08,
856
+ "muon_impl": "legacy",
857
+ "muon_momentum": 0.95,
858
+ "muon_ns_steps": 5,
859
+ "muon_update_scale": 1.0,
860
+ "muon_nesterov": false,
861
+ "muon_width_scale": false,
862
+ "muon_grouping": "legacy_dim_ge_2",
863
+ "muon_param_count": 2523776,
864
+ "muon_adam_param_count": 8192,
865
+ "muon_param_names": [
866
+ "vocab_embed.embedding",
867
+ "sigma_map.net.0.weight",
868
+ "sigma_map.net.2.weight",
869
+ "blocks.0.attn_qkv.weight",
870
+ "blocks.0.attn_out.weight",
871
+ "blocks.0.mlp.0.weight",
872
+ "blocks.0.mlp.2.weight",
873
+ "blocks.0.adaLN_modulation.weight",
874
+ "blocks.1.attn_qkv.weight",
875
+ "blocks.1.attn_out.weight",
876
+ "blocks.1.mlp.0.weight",
877
+ "blocks.1.mlp.2.weight",
878
+ "blocks.1.adaLN_modulation.weight",
879
+ "blocks.2.attn_qkv.weight",
880
+ "blocks.2.attn_out.weight",
881
+ "blocks.2.mlp.0.weight",
882
+ "blocks.2.mlp.2.weight",
883
+ "blocks.2.adaLN_modulation.weight",
884
+ "output_layer.linear.weight",
885
+ "output_layer.adaLN_modulation.weight"
886
+ ],
887
+ "muon_adam_param_names": [
888
+ "sigma_map.net.0.bias",
889
+ "sigma_map.net.2.bias",
890
+ "blocks.0.norm1.weight",
891
+ "blocks.0.norm2.weight",
892
+ "blocks.0.mlp.0.bias",
893
+ "blocks.0.mlp.2.bias",
894
+ "blocks.0.adaLN_modulation.bias",
895
+ "blocks.1.norm1.weight",
896
+ "blocks.1.norm2.weight",
897
+ "blocks.1.mlp.0.bias",
898
+ "blocks.1.mlp.2.bias",
899
+ "blocks.1.adaLN_modulation.bias",
900
+ "blocks.2.norm1.weight",
901
+ "blocks.2.norm2.weight",
902
+ "blocks.2.mlp.0.bias",
903
+ "blocks.2.mlp.2.bias",
904
+ "blocks.2.adaLN_modulation.bias",
905
+ "output_layer.norm_final.weight",
906
+ "output_layer.adaLN_modulation.bias"
907
+ ],
908
+ "muon_effective_nesterov": false,
909
+ "muon_effective_width_scale": false,
910
+ "muon_effective_weight_decay": 0.1,
911
+ "muon_adam_fallback_nesterov": false,
912
+ "muon_adam_fallback_weight_decay": 0.1,
913
+ "ema_decay": 0.9999,
914
+ "ema_start_step": 0,
915
+ "model_type": "ddit",
916
+ "ddit_mlp_type": "gelu",
917
+ "elf_num_time_tokens": 4,
918
+ "elf_num_model_mode_tokens": 0,
919
+ "qk_norm": true,
920
+ "output_bias": false,
921
+ "output_init_std": -1.0,
922
+ "norm_type": "rmsnorm",
923
+ "target_loss": "hard_ce",
924
+ "linear_soft_target_power": 1.0,
925
+ "linear_soft_target_min_conf": 0.0,
926
+ "linear_soft_target_max_conf": 1.0,
927
+ "t_sampling_mode": "uniform",
928
+ "t_sampling_power": 1.0,
929
+ "t_sampling_eps": 0.0001,
930
+ "t_sampling_logit_mean": -1.5,
931
+ "t_sampling_logit_std": 0.8,
932
+ "dual_t": true,
933
+ "corrupt_t_mode": "same",
934
+ "corrupt_min_t": 0.0,
935
+ "corrupt_max_t": 1.0,
936
+ "prefix_block_prob": 0.0,
937
+ "prefix_block_len": 128,
938
+ "mask_ratio_floor_schedule": "none",
939
+ "dirichlet_endpoint_mode": "categorical_dual_t",
940
+ "dirichlet_semantic_t_mode": "same",
941
+ "dirichlet_semantic_t_value": 0.0,
942
+ "dirichlet_semantic_t_curve": "linear",
943
+ "dirichlet_semantic_t_power": 1.0,
944
+ "endpoint_sequence_random_prob_alpha": 0.0,
945
+ "categorical_wrong_from_full_vocab": true,
946
+ "categorical_wrong_from_batch_valid_tokens": false,
947
+ "categorical_wrong_basin_token_ids": "",
948
+ "categorical_wrong_basin_prob": 0.0,
949
+ "categorical_wrong_unigram_prob": 0.0,
950
+ "categorical_wrong_uniform_prob": 0.0,
951
+ "categorical_wrong_prob_floor": 0.0,
952
+ "categorical_wrong_corpus_unigram_path": "",
953
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
954
+ "categorical_wrong_basin_shared_prob": 0.0,
955
+ "categorical_wrong_unigram_shared_prob": 0.0,
956
+ "mask_mixture_original_prob": 0.0,
957
+ "mask_mixture_lowk_prob": 0.0,
958
+ "mask_mixture_lowcorrupt_prob": 0.0,
959
+ "mask_mixture_block_prob": 0.0,
960
+ "mask_mixture_all_prob": 1.0,
961
+ "mask_mixture_lowk_clean_tokens": "0",
962
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
963
+ "mask_mixture_block_tokens": "64,128",
964
+ "simplex_bridge_sampler": "dirichlet",
965
+ "logistic_normal_sigma_min": 0.1,
966
+ "logistic_normal_sigma_max": 1.0,
967
+ "logistic_normal_tau_min": 1.0,
968
+ "logistic_normal_tau_max": 1.0,
969
+ "torch_compile": false,
970
+ "compile_mode": "max-autotune",
971
+ "state_format": "prob",
972
+ "meanflow_weight": 0.0,
973
+ "rollout_train_prob": 0.5,
974
+ "rollout_train_steps": 4,
975
+ "rollout_train_steps_min": 0,
976
+ "rollout_train_infer_steps": 1,
977
+ "rollout_train_time_mode": "sampled_path",
978
+ "rollout_train_s_dist": "uniform",
979
+ "rollout_train_s_min_frac": 0.0,
980
+ "rollout_train_s_max_frac": 0.25,
981
+ "rollout_train_s_beta_alpha": 2.0,
982
+ "rollout_train_s_beta_beta": 6.0,
983
+ "rollout_train_temp": 1.0,
984
+ "rollout_train_max_gamma": 1.0,
985
+ "rollout_train_corrupt_only": true,
986
+ "rollout_train_samplewise": true,
987
+ "rollout_train_compute_always": false,
988
+ "rollout_train_sync_t": true,
989
+ "bridge_noise_init": "logistic_normal",
990
+ "noise_sigma": -1.0,
991
+ "allow_tf32": true,
992
+ "activation_checkpointing": false,
993
+ "activation_checkpoint_interval": 1,
994
+ "activation_checkpoint_scope": "block",
995
+ "ddp_static_graph": false,
996
+ "ddp_gradient_as_bucket_view": true,
997
+ "blocking_data_transfer": false,
998
+ "dataloader_prefetch_factor": 4,
999
+ "full_train_stats": false,
1000
+ "tokenized_hf": false,
1001
+ "tokenized_pad_token": "pad",
1002
+ "elf_conditional_hf": false,
1003
+ "record_pad_truncate": false,
1004
+ "record_add_eos": false,
1005
+ "record_add_special_tokens": false,
1006
+ "record_pad_token": "pad",
1007
+ "record_shuffle_buffer": 10000,
1008
+ "wrap": true,
1009
+ "wrap_mode": "stream",
1010
+ "wrap_record_buffer_size": 200,
1011
+ "owt_cached_chunks": true,
1012
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/t5_len1024_train8_compact_overfit",
1013
+ "owt_chunk_cache_rebuild": false,
1014
+ "owt_chunk_cache_write_batch": 4096,
1015
+ "owt_exact_repeat_per_chunk": 64,
1016
+ "online_chunk_shuffle": false,
1017
+ "online_chunk_shuffle_buffer": 10000,
1018
+ "openwebtext_split": "train_minus_100k",
1019
+ "detokenizer": "auto",
1020
+ "resolved_detokenizer": null,
1021
+ "num_workers": 0,
1022
+ "latest_every": 1000,
1023
+ "resume_path": "runs/train8_ctx1024_t5tok_p50_rand0_4_unif0_0p25_outwdm1_t5tok_ctx1024_randk_20260518_014800/latest.pt"
1024
+ }
1025
+ step=4100 epoch=4100/5000 epoch_step=1/1 micro_steps=4100 elapsed=26.2s lr=2.000000e-03 loss=0.2186 loss_recon=0.2186 loss_meanflow=0.0000 mean_model_t=0.5013 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5002 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9238 corrupt_frac=1.0000 acc_corrupt=0.9238 loss_corrupt=0.2186 wrong_frac=0.4986 init_acc_corrupt=0.5406 acc_corrupt_t_0p0_0p2=0.6100 corrupt_frac_t_0p0_0p2=0.1952 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.2063 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.1976 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2036 out_w_norm=12.5634 out_g_norm=0.2004 loss_all=0.1765 init_gold_top10=0.6897 init_gold_top100=0.7541 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6232 init_acc_rollout_kept=0.4387 logit_acc_rollout_applied=0.9170 logit_acc_rollout_kept=0.9681
1026
+ step=4200 epoch=4200/5000 epoch_step=1/1 micro_steps=4200 elapsed=25.3s lr=2.000000e-03 loss=0.2225 loss_recon=0.2225 loss_meanflow=0.0000 mean_model_t=0.4985 mean_corrupt_t=0.4985 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9240 corrupt_frac=1.0000 acc_corrupt=0.9240 loss_corrupt=0.2225 wrong_frac=0.5014 init_acc_corrupt=0.5372 acc_corrupt_t_0p0_0p2=0.6275 corrupt_frac_t_0p0_0p2=0.2037 acc_corrupt_t_0p2_0p4=0.9993 corrupt_frac_t_0p2_0p4=0.1982 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.1956 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2056 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1969 out_w_norm=12.5838 out_g_norm=0.1905 loss_all=0.1620 init_gold_top10=0.6844 init_gold_top100=0.7618 rollout_applied_pos_frac=0.5234 init_acc_rollout_applied=0.6516 init_acc_rollout_kept=0.4286 logit_acc_rollout_applied=0.9779 logit_acc_rollout_kept=0.8964
1027
+ step=4300 epoch=4300/5000 epoch_step=1/1 micro_steps=4300 elapsed=25.5s lr=2.000000e-03 loss=0.2157 loss_recon=0.2157 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.4953 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5102 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9254 corrupt_frac=1.0000 acc_corrupt=0.9254 loss_corrupt=0.2157 wrong_frac=0.5048 init_acc_corrupt=0.5364 acc_corrupt_t_0p0_0p2=0.6343 corrupt_frac_t_0p0_0p2=0.2036 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.2030 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1934 out_w_norm=12.6004 out_g_norm=0.1667 loss_all=0.2117 init_gold_top10=0.6878 init_gold_top100=0.7632 rollout_applied_pos_frac=0.4922 init_acc_rollout_applied=0.6725 init_acc_rollout_kept=0.4573 logit_acc_rollout_applied=0.9250 logit_acc_rollout_kept=0.9231
1028
+ step=4400 epoch=4400/5000 epoch_step=1/1 micro_steps=4400 elapsed=25.4s lr=2.000000e-03 loss=0.2121 loss_recon=0.2121 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4980 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9258 corrupt_frac=1.0000 acc_corrupt=0.9258 loss_corrupt=0.2121 wrong_frac=0.5019 init_acc_corrupt=0.5376 acc_corrupt_t_0p0_0p2=0.6403 corrupt_frac_t_0p0_0p2=0.2061 acc_corrupt_t_0p2_0p4=0.9998 corrupt_frac_t_0p2_0p4=0.1960 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1973 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.2020 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1985 out_w_norm=12.6099 out_g_norm=0.1469 loss_all=0.2892 init_gold_top10=0.7065 init_gold_top100=0.7822 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5722 init_acc_rollout_kept=0.4870 logit_acc_rollout_applied=0.8531 logit_acc_rollout_kept=0.9403
1029
+ step=4500 epoch=4500/5000 epoch_step=1/1 micro_steps=4500 elapsed=25.4s lr=2.000000e-03 loss=0.2133 loss_recon=0.2133 loss_meanflow=0.0000 mean_model_t=0.4998 mean_corrupt_t=0.4998 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5031 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9265 corrupt_frac=1.0000 acc_corrupt=0.9265 loss_corrupt=0.2133 wrong_frac=0.5002 init_acc_corrupt=0.5429 acc_corrupt_t_0p0_0p2=0.6329 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.9994 corrupt_frac_t_0p2_0p4=0.1971 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2008 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2002 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2020 out_w_norm=12.6294 out_g_norm=0.1496 loss_all=0.2702 init_gold_top10=0.7049 init_gold_top100=0.7652 rollout_applied_pos_frac=0.4531 init_acc_rollout_applied=0.6152 init_acc_rollout_kept=0.4937 logit_acc_rollout_applied=0.8901 logit_acc_rollout_kept=0.9134
1030
+ step=4600 epoch=4600/5000 epoch_step=1/1 micro_steps=4600 elapsed=25.3s lr=2.000000e-03 loss=0.2022 loss_recon=0.2022 loss_meanflow=0.0000 mean_model_t=0.5009 mean_corrupt_t=0.5009 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4992 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9301 corrupt_frac=1.0000 acc_corrupt=0.9301 loss_corrupt=0.2022 wrong_frac=0.4992 init_acc_corrupt=0.5427 acc_corrupt_t_0p0_0p2=0.6440 corrupt_frac_t_0p0_0p2=0.1963 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.2016 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2056 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=0.1941 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2024 out_w_norm=12.6419 out_g_norm=0.1513 loss_all=0.1736 init_gold_top10=0.7198 init_gold_top100=0.7854 rollout_applied_pos_frac=0.5469 init_acc_rollout_applied=0.6244 init_acc_rollout_kept=0.4697 logit_acc_rollout_applied=0.9318 logit_acc_rollout_kept=0.9491
1031
+ step=4700 epoch=4700/5000 epoch_step=1/1 micro_steps=4700 elapsed=25.3s lr=2.000000e-03 loss=0.1975 loss_recon=0.1975 loss_meanflow=0.0000 mean_model_t=0.5003 mean_corrupt_t=0.5003 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4918 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9328 corrupt_frac=1.0000 acc_corrupt=0.9328 loss_corrupt=0.1975 wrong_frac=0.4998 init_acc_corrupt=0.5406 acc_corrupt_t_0p0_0p2=0.6657 corrupt_frac_t_0p0_0p2=0.2006 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.1945 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.2064 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.1962 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.2023 out_w_norm=12.6608 out_g_norm=0.1390 loss_all=0.1708 init_gold_top10=0.7059 init_gold_top100=0.7685 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6019 init_acc_rollout_kept=0.5063 logit_acc_rollout_applied=0.9325 logit_acc_rollout_kept=0.9396
1032
+ step=4800 epoch=4800/5000 epoch_step=1/1 micro_steps=4800 elapsed=25.3s lr=2.000000e-03 loss=0.2091 loss_recon=0.2091 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4990 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4960 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9280 corrupt_frac=1.0000 acc_corrupt=0.9280 loss_corrupt=0.2091 wrong_frac=0.5010 init_acc_corrupt=0.5398 acc_corrupt_t_0p0_0p2=0.6413 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.2004 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.1991 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2018 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.1982 out_w_norm=12.6854 out_g_norm=0.1459 loss_all=0.1538 init_gold_top10=0.6980 init_gold_top100=0.7588 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5934 init_acc_rollout_kept=0.4894 logit_acc_rollout_applied=0.9379 logit_acc_rollout_kept=0.9556
1033
+ step=4900 epoch=4900/5000 epoch_step=1/1 micro_steps=4900 elapsed=25.5s lr=2.000000e-03 loss=0.1977 loss_recon=0.1977 loss_meanflow=0.0000 mean_model_t=0.5027 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5077 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9315 corrupt_frac=1.0000 acc_corrupt=0.9315 loss_corrupt=0.1977 wrong_frac=0.4975 init_acc_corrupt=0.5459 acc_corrupt_t_0p0_0p2=0.6575 corrupt_frac_t_0p0_0p2=0.1991 acc_corrupt_t_0p2_0p4=0.9993 corrupt_frac_t_0p2_0p4=0.1997 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.1958 acc_corrupt_t_0p6_0p8=0.9998 corrupt_frac_t_0p6_0p8=0.1995 acc_corrupt_t_0p8_1p0=0.9999 corrupt_frac_t_0p8_1p0=0.2059 out_w_norm=12.7066 out_g_norm=0.1458 loss_all=0.2784 init_gold_top10=0.6868 init_gold_top100=0.7661 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.5915 init_acc_rollout_kept=0.4748 logit_acc_rollout_applied=0.8609 logit_acc_rollout_kept=0.9276
1034
+ step=5000 epoch=5000/5000 epoch_step=1/1 micro_steps=5000 elapsed=25.3s lr=2.000000e-03 loss=0.2020 loss_recon=0.2020 loss_meanflow=0.0000 mean_model_t=0.5005 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4931 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9310 corrupt_frac=1.0000 acc_corrupt=0.9310 loss_corrupt=0.2020 wrong_frac=0.4995 init_acc_corrupt=0.5398 acc_corrupt_t_0p0_0p2=0.6608 corrupt_frac_t_0p0_0p2=0.2029 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.1965 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.1988 acc_corrupt_t_0p6_0p8=0.9999 corrupt_frac_t_0p6_0p8=0.2028 acc_corrupt_t_0p8_1p0=0.9998 corrupt_frac_t_0p8_1p0=0.1990 out_w_norm=12.7400 out_g_norm=0.1335 loss_all=0.0640 init_gold_top10=0.7090 init_gold_top100=0.7647 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.6647 init_acc_rollout_kept=0.5098 logit_acc_rollout_applied=0.9999 logit_acc_rollout_kept=0.9611
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638.log ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 2664,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 2616320,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "uniform",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 0.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_prob_floor": 0.0,
124
+ "categorical_wrong_corpus_unigram_path": "",
125
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
126
+ "categorical_wrong_basin_shared_prob": 0.0,
127
+ "categorical_wrong_unigram_shared_prob": 0.0,
128
+ "mask_mixture_original_prob": 0.0,
129
+ "mask_mixture_lowk_prob": 0.0,
130
+ "mask_mixture_lowcorrupt_prob": 0.0,
131
+ "mask_mixture_block_prob": 0.0,
132
+ "mask_mixture_all_prob": 1.0,
133
+ "mask_mixture_lowk_clean_tokens": "0",
134
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
135
+ "mask_mixture_block_tokens": "64,128",
136
+ "simplex_bridge_sampler": "dirichlet",
137
+ "logistic_normal_sigma_min": 0.1,
138
+ "logistic_normal_sigma_max": 1.0,
139
+ "logistic_normal_tau_min": 1.0,
140
+ "logistic_normal_tau_max": 1.0,
141
+ "torch_compile": false,
142
+ "compile_mode": "max-autotune",
143
+ "state_format": "prob",
144
+ "meanflow_weight": 0.0,
145
+ "rollout_train_prob": 0.5,
146
+ "rollout_train_steps": 3,
147
+ "rollout_train_infer_steps": 1,
148
+ "rollout_train_time_mode": "sampled_path",
149
+ "rollout_train_s_dist": "uniform",
150
+ "rollout_train_s_min_frac": 0.0,
151
+ "rollout_train_s_max_frac": 0.25,
152
+ "rollout_train_s_beta_alpha": 2.0,
153
+ "rollout_train_s_beta_beta": 6.0,
154
+ "rollout_train_temp": 1.0,
155
+ "rollout_train_max_gamma": 1.0,
156
+ "rollout_train_corrupt_only": true,
157
+ "rollout_train_samplewise": true,
158
+ "rollout_train_compute_always": false,
159
+ "rollout_train_sync_t": true,
160
+ "bridge_noise_init": "logistic_normal",
161
+ "noise_sigma": -1.0,
162
+ "allow_tf32": true,
163
+ "activation_checkpointing": false,
164
+ "activation_checkpoint_interval": 1,
165
+ "activation_checkpoint_scope": "block",
166
+ "ddp_static_graph": false,
167
+ "ddp_gradient_as_bucket_view": true,
168
+ "blocking_data_transfer": false,
169
+ "dataloader_prefetch_factor": 4,
170
+ "full_train_stats": false,
171
+ "tokenized_hf": false,
172
+ "tokenized_pad_token": "pad",
173
+ "elf_conditional_hf": false,
174
+ "record_pad_truncate": false,
175
+ "record_add_eos": false,
176
+ "record_add_special_tokens": false,
177
+ "record_pad_token": "pad",
178
+ "record_shuffle_buffer": 10000,
179
+ "wrap": true,
180
+ "wrap_mode": "stream",
181
+ "wrap_record_buffer_size": 200,
182
+ "owt_cached_chunks": true,
183
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
184
+ "owt_chunk_cache_rebuild": false,
185
+ "owt_chunk_cache_write_batch": 4096,
186
+ "owt_exact_repeat_per_chunk": 64,
187
+ "online_chunk_shuffle": false,
188
+ "online_chunk_shuffle_buffer": 10000,
189
+ "openwebtext_split": "train_minus_100k",
190
+ "detokenizer": "auto",
191
+ "resolved_detokenizer": null,
192
+ "num_workers": 0,
193
+ "latest_every": 1000,
194
+ "resume_path": ""
195
+ }
196
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=21.6s lr=2.000000e-03 loss=7.4630 loss_recon=7.4630 loss_meanflow=0.0000 mean_model_t=0.4940 mean_corrupt_t=0.4940 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5008 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3113 corrupt_frac=1.0000 acc_corrupt=0.3113 loss_corrupt=7.4630 wrong_frac=0.5062 init_acc_corrupt=0.4590 acc_corrupt_t_0p0_0p2=0.0446 corrupt_frac_t_0p0_0p2=0.2070 acc_corrupt_t_0p2_0p4=0.1539 corrupt_frac_t_0p2_0p4=0.2002 acc_corrupt_t_0p4_0p6=0.3084 corrupt_frac_t_0p4_0p6=0.2017 acc_corrupt_t_0p6_0p8=0.4607 corrupt_frac_t_0p6_0p8=0.1966 acc_corrupt_t_0p8_1p0=0.6088 corrupt_frac_t_0p8_1p0=0.1945 out_w_norm=1.0710 out_g_norm=0.9511 loss_all=6.8397 init_gold_top10=0.4966 init_gold_top100=0.6045 rollout_applied_pos_frac=0.5312 init_acc_rollout_applied=0.4381 init_acc_rollout_kept=0.4931 logit_acc_rollout_applied=0.2800 logit_acc_rollout_kept=0.3227
197
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=20.7s lr=2.000000e-03 loss=6.0368 loss_recon=6.0368 loss_meanflow=0.0000 mean_model_t=0.4932 mean_corrupt_t=0.4932 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5070 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3060 corrupt_frac=1.0000 acc_corrupt=0.3060 loss_corrupt=6.0368 wrong_frac=0.5067 init_acc_corrupt=0.4587 acc_corrupt_t_0p0_0p2=0.0494 corrupt_frac_t_0p0_0p2=0.2062 acc_corrupt_t_0p2_0p4=0.1493 corrupt_frac_t_0p2_0p4=0.2049 acc_corrupt_t_0p4_0p6=0.3068 corrupt_frac_t_0p4_0p6=0.1945 acc_corrupt_t_0p6_0p8=0.4499 corrupt_frac_t_0p6_0p8=0.2008 acc_corrupt_t_0p8_1p0=0.5952 corrupt_frac_t_0p8_1p0=0.1935 out_w_norm=3.4216 out_g_norm=1.2554 loss_all=5.3733 init_gold_top10=0.4880 init_gold_top100=0.5996 rollout_applied_pos_frac=0.4766 init_acc_rollout_applied=0.5000 init_acc_rollout_kept=0.4037 logit_acc_rollout_applied=0.3555 logit_acc_rollout_kept=0.2970
198
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=20.5s lr=2.000000e-03 loss=4.9439 loss_recon=4.9439 loss_meanflow=0.0000 mean_model_t=0.4982 mean_corrupt_t=0.4982 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4945 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3512 corrupt_frac=1.0000 acc_corrupt=0.3512 loss_corrupt=4.9439 wrong_frac=0.5020 init_acc_corrupt=0.4654 acc_corrupt_t_0p0_0p2=0.0522 corrupt_frac_t_0p0_0p2=0.2005 acc_corrupt_t_0p2_0p4=0.1768 corrupt_frac_t_0p2_0p4=0.2007 acc_corrupt_t_0p4_0p6=0.3493 corrupt_frac_t_0p4_0p6=0.2010 acc_corrupt_t_0p6_0p8=0.5057 corrupt_frac_t_0p6_0p8=0.2013 acc_corrupt_t_0p8_1p0=0.6778 corrupt_frac_t_0p8_1p0=0.1965 out_w_norm=5.5143 out_g_norm=0.5197 loss_all=4.2064 init_gold_top10=0.5841 init_gold_top100=0.6777 rollout_applied_pos_frac=0.5391 init_acc_rollout_applied=0.5668 init_acc_rollout_kept=0.5232 logit_acc_rollout_applied=0.4486 logit_acc_rollout_kept=0.4243
199
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=20.6s lr=2.000000e-03 loss=4.2842 loss_recon=4.2842 loss_meanflow=0.0000 mean_model_t=0.4993 mean_corrupt_t=0.4993 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5039 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4069 corrupt_frac=1.0000 acc_corrupt=0.4069 loss_corrupt=4.2842 wrong_frac=0.5008 init_acc_corrupt=0.4671 acc_corrupt_t_0p0_0p2=0.0556 corrupt_frac_t_0p0_0p2=0.1981 acc_corrupt_t_0p2_0p4=0.2024 corrupt_frac_t_0p2_0p4=0.2023 acc_corrupt_t_0p4_0p6=0.4020 corrupt_frac_t_0p4_0p6=0.2014 acc_corrupt_t_0p6_0p8=0.5934 corrupt_frac_t_0p6_0p8=0.2034 acc_corrupt_t_0p8_1p0=0.7871 corrupt_frac_t_0p8_1p0=0.1948 out_w_norm=7.0978 out_g_norm=0.2717 loss_all=3.7483 init_gold_top10=0.5483 init_gold_top100=0.6612 rollout_applied_pos_frac=0.4766 init_acc_rollout_applied=0.4934 init_acc_rollout_kept=0.5149 logit_acc_rollout_applied=0.4794 logit_acc_rollout_kept=0.5112
200
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=20.6s lr=2.000000e-03 loss=3.6374 loss_recon=3.6374 loss_meanflow=0.0000 mean_model_t=0.4995 mean_corrupt_t=0.4995 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5030 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4790 corrupt_frac=1.0000 acc_corrupt=0.4790 loss_corrupt=3.6374 wrong_frac=0.5005 init_acc_corrupt=0.4678 acc_corrupt_t_0p0_0p2=0.0571 corrupt_frac_t_0p0_0p2=0.2000 acc_corrupt_t_0p2_0p4=0.2319 corrupt_frac_t_0p2_0p4=0.2012 acc_corrupt_t_0p4_0p6=0.4969 corrupt_frac_t_0p4_0p6=0.1999 acc_corrupt_t_0p6_0p8=0.7097 corrupt_frac_t_0p6_0p8=0.1996 acc_corrupt_t_0p8_1p0=0.9030 corrupt_frac_t_0p8_1p0=0.1993 out_w_norm=8.4280 out_g_norm=0.2576 loss_all=3.4323 init_gold_top10=0.5046 init_gold_top100=0.6328 rollout_applied_pos_frac=0.4609 init_acc_rollout_applied=0.4649 init_acc_rollout_kept=0.4401 logit_acc_rollout_applied=0.4835 logit_acc_rollout_kept=0.4651
201
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=20.6s lr=2.000000e-03 loss=3.1397 loss_recon=3.1397 loss_meanflow=0.0000 mean_model_t=0.5006 mean_corrupt_t=0.5006 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4963 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4926 corrupt_frac=1.0000 acc_corrupt=0.4926 loss_corrupt=3.1397 wrong_frac=0.4994 init_acc_corrupt=0.4690 acc_corrupt_t_0p0_0p2=0.0592 corrupt_frac_t_0p0_0p2=0.1968 acc_corrupt_t_0p2_0p4=0.2579 corrupt_frac_t_0p2_0p4=0.2020 acc_corrupt_t_0p4_0p6=0.5215 corrupt_frac_t_0p4_0p6=0.2005 acc_corrupt_t_0p6_0p8=0.7137 corrupt_frac_t_0p6_0p8=0.1991 acc_corrupt_t_0p8_1p0=0.9036 corrupt_frac_t_0p8_1p0=0.2016 out_w_norm=9.6978 out_g_norm=0.2729 loss_all=2.9047 init_gold_top10=0.5234 init_gold_top100=0.6711 rollout_applied_pos_frac=0.4375 init_acc_rollout_applied=0.4681 init_acc_rollout_kept=0.4758 logit_acc_rollout_applied=0.4917 logit_acc_rollout_kept=0.5055
202
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=20.6s lr=2.000000e-03 loss=2.7726 loss_recon=2.7726 loss_meanflow=0.0000 mean_model_t=0.5039 mean_corrupt_t=0.5039 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.5007 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5041 corrupt_frac=1.0000 acc_corrupt=0.5041 loss_corrupt=2.7726 wrong_frac=0.4961 init_acc_corrupt=0.4739 acc_corrupt_t_0p0_0p2=0.0617 corrupt_frac_t_0p0_0p2=0.1958 acc_corrupt_t_0p2_0p4=0.2814 corrupt_frac_t_0p2_0p4=0.1975 acc_corrupt_t_0p4_0p6=0.5305 corrupt_frac_t_0p4_0p6=0.2052 acc_corrupt_t_0p6_0p8=0.7211 corrupt_frac_t_0p6_0p8=0.1946 acc_corrupt_t_0p8_1p0=0.9049 corrupt_frac_t_0p8_1p0=0.2069 out_w_norm=10.6988 out_g_norm=0.2909 loss_all=2.6186 init_gold_top10=0.5364 init_gold_top100=0.7379 rollout_applied_pos_frac=0.5312 init_acc_rollout_applied=0.4626 init_acc_rollout_kept=0.4647 logit_acc_rollout_applied=0.4969 logit_acc_rollout_kept=0.5058
203
+ step=800 epoch=800/1000 epoch_step=1/1 micro_steps=800 elapsed=20.6s lr=2.000000e-03 loss=2.3265 loss_recon=2.3265 loss_meanflow=0.0000 mean_model_t=0.5021 mean_corrupt_t=0.5021 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4989 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5323 corrupt_frac=1.0000 acc_corrupt=0.5323 loss_corrupt=2.3265 wrong_frac=0.4978 init_acc_corrupt=0.4722 acc_corrupt_t_0p0_0p2=0.0617 corrupt_frac_t_0p0_0p2=0.1935 acc_corrupt_t_0p2_0p4=0.3031 corrupt_frac_t_0p2_0p4=0.2022 acc_corrupt_t_0p4_0p6=0.5826 corrupt_frac_t_0p4_0p6=0.2002 acc_corrupt_t_0p6_0p8=0.7704 corrupt_frac_t_0p6_0p8=0.2048 acc_corrupt_t_0p8_1p0=0.9263 corrupt_frac_t_0p8_1p0=0.1993 out_w_norm=11.1937 out_g_norm=0.3853 loss_all=2.0510 init_gold_top10=0.5836 init_gold_top100=0.7211 rollout_applied_pos_frac=0.4375 init_acc_rollout_applied=0.4701 init_acc_rollout_kept=0.4812 logit_acc_rollout_applied=0.5598 logit_acc_rollout_kept=0.5791
204
+ step=900 epoch=900/1000 epoch_step=1/1 micro_steps=900 elapsed=20.6s lr=2.000000e-03 loss=1.7991 loss_recon=1.7991 loss_meanflow=0.0000 mean_model_t=0.5046 mean_corrupt_t=0.5046 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4971 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6181 corrupt_frac=1.0000 acc_corrupt=0.6181 loss_corrupt=1.7991 wrong_frac=0.4954 init_acc_corrupt=0.4825 acc_corrupt_t_0p0_0p2=0.0642 corrupt_frac_t_0p0_0p2=0.1938 acc_corrupt_t_0p2_0p4=0.3941 corrupt_frac_t_0p2_0p4=0.1966 acc_corrupt_t_0p4_0p6=0.7401 corrupt_frac_t_0p4_0p6=0.2052 acc_corrupt_t_0p6_0p8=0.8910 corrupt_frac_t_0p6_0p8=0.2031 acc_corrupt_t_0p8_1p0=0.9706 corrupt_frac_t_0p8_1p0=0.2013 out_w_norm=11.5876 out_g_norm=0.4754 loss_all=1.4802 init_gold_top10=0.6218 init_gold_top100=0.7397 rollout_applied_pos_frac=0.4297 init_acc_rollout_applied=0.4676 init_acc_rollout_kept=0.5195 logit_acc_rollout_applied=0.6235 logit_acc_rollout_kept=0.7161
205
+ step=1000 epoch=1000/1000 epoch_step=1/1 micro_steps=1000 elapsed=20.6s lr=2.000000e-03 loss=1.4671 loss_recon=1.4671 loss_meanflow=0.0000 mean_model_t=0.4971 mean_corrupt_t=0.4971 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.4980 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6870 corrupt_frac=1.0000 acc_corrupt=0.6870 loss_corrupt=1.4671 wrong_frac=0.5030 init_acc_corrupt=0.4920 acc_corrupt_t_0p0_0p2=0.0716 corrupt_frac_t_0p0_0p2=0.1998 acc_corrupt_t_0p2_0p4=0.5283 corrupt_frac_t_0p2_0p4=0.2031 acc_corrupt_t_0p4_0p6=0.8801 corrupt_frac_t_0p4_0p6=0.2009 acc_corrupt_t_0p6_0p8=0.9675 corrupt_frac_t_0p6_0p8=0.1981 acc_corrupt_t_0p8_1p0=0.9941 corrupt_frac_t_0p8_1p0=0.1980 out_w_norm=11.8835 out_g_norm=0.5694 loss_all=1.3845 init_gold_top10=0.6323 init_gold_top100=0.7237 rollout_applied_pos_frac=0.4844 init_acc_rollout_applied=0.5816 init_acc_rollout_kept=0.4216 logit_acc_rollout_applied=0.7618 logit_acc_rollout_kept=0.6682
206
+ NCCL version 2.25.1+cuda12.8
207
+ resumed_from=runs/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638/latest.pt start_step=1001
208
+ {
209
+ "device": "cuda:0",
210
+ "rank": 0,
211
+ "world_size": 4,
212
+ "samples": "owt_cached_chunks:8",
213
+ "vocab_size": 2664,
214
+ "tokenizer_vocab_size": 50257,
215
+ "save_dir": "runs/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638",
216
+ "batch_size": 128,
217
+ "grad_accum": 1,
218
+ "effective_batch_size": 512,
219
+ "global_batch_size": 512,
220
+ "lr_schedule": "constant_warmup",
221
+ "optimizer": "muon",
222
+ "epochs": 0.0,
223
+ "steps_per_epoch": 1,
224
+ "total_steps": 2000,
225
+ "warmup_steps": 10,
226
+ "warmup_epochs": -1.0,
227
+ "min_lr": 0.0,
228
+ "weight_decay": 0.1,
229
+ "output_weight_decay": -1.0,
230
+ "adamw_param_groups": "nanogpt",
231
+ "adam_beta1": 0.9,
232
+ "adam_beta2": 0.95,
233
+ "adam_eps": 1e-08,
234
+ "muon_impl": "legacy",
235
+ "muon_momentum": 0.95,
236
+ "muon_ns_steps": 5,
237
+ "muon_update_scale": 1.0,
238
+ "muon_nesterov": false,
239
+ "muon_width_scale": false,
240
+ "muon_grouping": "legacy_dim_ge_2",
241
+ "muon_param_count": 2616320,
242
+ "muon_adam_param_count": 8192,
243
+ "muon_param_names": [
244
+ "vocab_embed.embedding",
245
+ "sigma_map.net.0.weight",
246
+ "sigma_map.net.2.weight",
247
+ "blocks.0.attn_qkv.weight",
248
+ "blocks.0.attn_out.weight",
249
+ "blocks.0.mlp.0.weight",
250
+ "blocks.0.mlp.2.weight",
251
+ "blocks.0.adaLN_modulation.weight",
252
+ "blocks.1.attn_qkv.weight",
253
+ "blocks.1.attn_out.weight",
254
+ "blocks.1.mlp.0.weight",
255
+ "blocks.1.mlp.2.weight",
256
+ "blocks.1.adaLN_modulation.weight",
257
+ "blocks.2.attn_qkv.weight",
258
+ "blocks.2.attn_out.weight",
259
+ "blocks.2.mlp.0.weight",
260
+ "blocks.2.mlp.2.weight",
261
+ "blocks.2.adaLN_modulation.weight",
262
+ "output_layer.linear.weight",
263
+ "output_layer.adaLN_modulation.weight"
264
+ ],
265
+ "muon_adam_param_names": [
266
+ "sigma_map.net.0.bias",
267
+ "sigma_map.net.2.bias",
268
+ "blocks.0.norm1.weight",
269
+ "blocks.0.norm2.weight",
270
+ "blocks.0.mlp.0.bias",
271
+ "blocks.0.mlp.2.bias",
272
+ "blocks.0.adaLN_modulation.bias",
273
+ "blocks.1.norm1.weight",
274
+ "blocks.1.norm2.weight",
275
+ "blocks.1.mlp.0.bias",
276
+ "blocks.1.mlp.2.bias",
277
+ "blocks.1.adaLN_modulation.bias",
278
+ "blocks.2.norm1.weight",
279
+ "blocks.2.norm2.weight",
280
+ "blocks.2.mlp.0.bias",
281
+ "blocks.2.mlp.2.bias",
282
+ "blocks.2.adaLN_modulation.bias",
283
+ "output_layer.norm_final.weight",
284
+ "output_layer.adaLN_modulation.bias"
285
+ ],
286
+ "muon_effective_nesterov": false,
287
+ "muon_effective_width_scale": false,
288
+ "muon_effective_weight_decay": 0.1,
289
+ "muon_adam_fallback_nesterov": false,
290
+ "muon_adam_fallback_weight_decay": 0.1,
291
+ "ema_decay": 0.9999,
292
+ "ema_start_step": 0,
293
+ "model_type": "ddit",
294
+ "ddit_mlp_type": "gelu",
295
+ "elf_num_time_tokens": 4,
296
+ "elf_num_model_mode_tokens": 0,
297
+ "qk_norm": true,
298
+ "output_bias": false,
299
+ "output_init_std": -1.0,
300
+ "norm_type": "rmsnorm",
301
+ "target_loss": "hard_ce",
302
+ "linear_soft_target_power": 1.0,
303
+ "linear_soft_target_min_conf": 0.0,
304
+ "linear_soft_target_max_conf": 1.0,
305
+ "t_sampling_mode": "uniform",
306
+ "t_sampling_power": 1.0,
307
+ "t_sampling_eps": 0.0001,
308
+ "t_sampling_logit_mean": -1.5,
309
+ "t_sampling_logit_std": 0.8,
310
+ "dual_t": true,
311
+ "corrupt_t_mode": "same",
312
+ "corrupt_min_t": 0.0,
313
+ "corrupt_max_t": 1.0,
314
+ "prefix_block_prob": 0.0,
315
+ "prefix_block_len": 128,
316
+ "mask_ratio_floor_schedule": "none",
317
+ "dirichlet_endpoint_mode": "categorical_dual_t",
318
+ "dirichlet_semantic_t_mode": "same",
319
+ "dirichlet_semantic_t_value": 0.0,
320
+ "dirichlet_semantic_t_curve": "linear",
321
+ "dirichlet_semantic_t_power": 1.0,
322
+ "endpoint_sequence_random_prob_alpha": 0.0,
323
+ "categorical_wrong_from_full_vocab": true,
324
+ "categorical_wrong_from_batch_valid_tokens": false,
325
+ "categorical_wrong_basin_token_ids": "",
326
+ "categorical_wrong_basin_prob": 0.0,
327
+ "categorical_wrong_unigram_prob": 0.0,
328
+ "categorical_wrong_uniform_prob": 0.0,
329
+ "categorical_wrong_prob_floor": 0.0,
330
+ "categorical_wrong_corpus_unigram_path": "",
331
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
332
+ "categorical_wrong_basin_shared_prob": 0.0,
333
+ "categorical_wrong_unigram_shared_prob": 0.0,
334
+ "mask_mixture_original_prob": 0.0,
335
+ "mask_mixture_lowk_prob": 0.0,
336
+ "mask_mixture_lowcorrupt_prob": 0.0,
337
+ "mask_mixture_block_prob": 0.0,
338
+ "mask_mixture_all_prob": 1.0,
339
+ "mask_mixture_lowk_clean_tokens": "0",
340
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
341
+ "mask_mixture_block_tokens": "64,128",
342
+ "simplex_bridge_sampler": "dirichlet",
343
+ "logistic_normal_sigma_min": 0.1,
344
+ "logistic_normal_sigma_max": 1.0,
345
+ "logistic_normal_tau_min": 1.0,
346
+ "logistic_normal_tau_max": 1.0,
347
+ "torch_compile": false,
348
+ "compile_mode": "max-autotune",
349
+ "state_format": "prob",
350
+ "meanflow_weight": 0.0,
351
+ "rollout_train_prob": 0.5,
352
+ "rollout_train_steps": 3,
353
+ "rollout_train_steps_min": -1,
354
+ "rollout_train_infer_steps": 1,
355
+ "rollout_train_time_mode": "sampled_path",
356
+ "rollout_train_s_dist": "uniform",
357
+ "rollout_train_s_min_frac": 0.0,
358
+ "rollout_train_s_max_frac": 0.25,
359
+ "rollout_train_s_beta_alpha": 2.0,
360
+ "rollout_train_s_beta_beta": 6.0,
361
+ "rollout_train_temp": 1.0,
362
+ "rollout_train_max_gamma": 1.0,
363
+ "rollout_train_corrupt_only": true,
364
+ "rollout_train_samplewise": true,
365
+ "rollout_train_compute_always": false,
366
+ "rollout_train_sync_t": true,
367
+ "bridge_noise_init": "logistic_normal",
368
+ "noise_sigma": -1.0,
369
+ "allow_tf32": true,
370
+ "activation_checkpointing": false,
371
+ "activation_checkpoint_interval": 1,
372
+ "activation_checkpoint_scope": "block",
373
+ "ddp_static_graph": false,
374
+ "ddp_gradient_as_bucket_view": true,
375
+ "blocking_data_transfer": false,
376
+ "dataloader_prefetch_factor": 4,
377
+ "full_train_stats": false,
378
+ "tokenized_hf": false,
379
+ "tokenized_pad_token": "pad",
380
+ "elf_conditional_hf": false,
381
+ "record_pad_truncate": false,
382
+ "record_add_eos": false,
383
+ "record_add_special_tokens": false,
384
+ "record_pad_token": "pad",
385
+ "record_shuffle_buffer": 10000,
386
+ "wrap": true,
387
+ "wrap_mode": "stream",
388
+ "wrap_record_buffer_size": 200,
389
+ "owt_cached_chunks": true,
390
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_compact_overfit",
391
+ "owt_chunk_cache_rebuild": false,
392
+ "owt_chunk_cache_write_batch": 4096,
393
+ "owt_exact_repeat_per_chunk": 64,
394
+ "online_chunk_shuffle": false,
395
+ "online_chunk_shuffle_buffer": 10000,
396
+ "openwebtext_split": "train_minus_100k",
397
+ "detokenizer": "auto",
398
+ "resolved_detokenizer": null,
399
+ "num_workers": 0,
400
+ "latest_every": 1000,
401
+ "resume_path": "runs/train8_ctx1024_uniformt_p50_path3_unif0_0p25_outwdm1_ctx1024_uniformt_temp1_path_sweep_20260518_005638/latest.pt"
402
+ }
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n1024_linear_soft_kl_bridge_20260517_train8_overfit.log ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 50257,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_n1024_linear_soft_kl_bridge_20260517_train8_overfit",
10
+ "batch_size": 1,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 4,
13
+ "global_batch_size": 4,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 2,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 20,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 169453056,
36
+ "muon_adam_param_count": 122368,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "blocks.3.attn_qkv.weight",
57
+ "blocks.3.attn_out.weight",
58
+ "blocks.3.mlp.0.weight",
59
+ "blocks.3.mlp.2.weight",
60
+ "blocks.3.adaLN_modulation.weight",
61
+ "blocks.4.attn_qkv.weight",
62
+ "blocks.4.attn_out.weight",
63
+ "blocks.4.mlp.0.weight",
64
+ "blocks.4.mlp.2.weight",
65
+ "blocks.4.adaLN_modulation.weight",
66
+ "blocks.5.attn_qkv.weight",
67
+ "blocks.5.attn_out.weight",
68
+ "blocks.5.mlp.0.weight",
69
+ "blocks.5.mlp.2.weight",
70
+ "blocks.5.adaLN_modulation.weight",
71
+ "blocks.6.attn_qkv.weight",
72
+ "blocks.6.attn_out.weight",
73
+ "blocks.6.mlp.0.weight",
74
+ "blocks.6.mlp.2.weight",
75
+ "blocks.6.adaLN_modulation.weight",
76
+ "blocks.7.attn_qkv.weight",
77
+ "blocks.7.attn_out.weight",
78
+ "blocks.7.mlp.0.weight",
79
+ "blocks.7.mlp.2.weight",
80
+ "blocks.7.adaLN_modulation.weight",
81
+ "blocks.8.attn_qkv.weight",
82
+ "blocks.8.attn_out.weight",
83
+ "blocks.8.mlp.0.weight",
84
+ "blocks.8.mlp.2.weight",
85
+ "blocks.8.adaLN_modulation.weight",
86
+ "blocks.9.attn_qkv.weight",
87
+ "blocks.9.attn_out.weight",
88
+ "blocks.9.mlp.0.weight",
89
+ "blocks.9.mlp.2.weight",
90
+ "blocks.9.adaLN_modulation.weight",
91
+ "blocks.10.attn_qkv.weight",
92
+ "blocks.10.attn_out.weight",
93
+ "blocks.10.mlp.0.weight",
94
+ "blocks.10.mlp.2.weight",
95
+ "blocks.10.adaLN_modulation.weight",
96
+ "blocks.11.attn_qkv.weight",
97
+ "blocks.11.attn_out.weight",
98
+ "blocks.11.mlp.0.weight",
99
+ "blocks.11.mlp.2.weight",
100
+ "blocks.11.adaLN_modulation.weight",
101
+ "output_layer.linear.weight",
102
+ "output_layer.adaLN_modulation.weight"
103
+ ],
104
+ "muon_adam_param_names": [
105
+ "sigma_map.net.0.bias",
106
+ "sigma_map.net.2.bias",
107
+ "blocks.0.norm1.weight",
108
+ "blocks.0.norm2.weight",
109
+ "blocks.0.mlp.0.bias",
110
+ "blocks.0.mlp.2.bias",
111
+ "blocks.0.adaLN_modulation.bias",
112
+ "blocks.1.norm1.weight",
113
+ "blocks.1.norm2.weight",
114
+ "blocks.1.mlp.0.bias",
115
+ "blocks.1.mlp.2.bias",
116
+ "blocks.1.adaLN_modulation.bias",
117
+ "blocks.2.norm1.weight",
118
+ "blocks.2.norm2.weight",
119
+ "blocks.2.mlp.0.bias",
120
+ "blocks.2.mlp.2.bias",
121
+ "blocks.2.adaLN_modulation.bias",
122
+ "blocks.3.norm1.weight",
123
+ "blocks.3.norm2.weight",
124
+ "blocks.3.mlp.0.bias",
125
+ "blocks.3.mlp.2.bias",
126
+ "blocks.3.adaLN_modulation.bias",
127
+ "blocks.4.norm1.weight",
128
+ "blocks.4.norm2.weight",
129
+ "blocks.4.mlp.0.bias",
130
+ "blocks.4.mlp.2.bias",
131
+ "blocks.4.adaLN_modulation.bias",
132
+ "blocks.5.norm1.weight",
133
+ "blocks.5.norm2.weight",
134
+ "blocks.5.mlp.0.bias",
135
+ "blocks.5.mlp.2.bias",
136
+ "blocks.5.adaLN_modulation.bias",
137
+ "blocks.6.norm1.weight",
138
+ "blocks.6.norm2.weight",
139
+ "blocks.6.mlp.0.bias",
140
+ "blocks.6.mlp.2.bias",
141
+ "blocks.6.adaLN_modulation.bias",
142
+ "blocks.7.norm1.weight",
143
+ "blocks.7.norm2.weight",
144
+ "blocks.7.mlp.0.bias",
145
+ "blocks.7.mlp.2.bias",
146
+ "blocks.7.adaLN_modulation.bias",
147
+ "blocks.8.norm1.weight",
148
+ "blocks.8.norm2.weight",
149
+ "blocks.8.mlp.0.bias",
150
+ "blocks.8.mlp.2.bias",
151
+ "blocks.8.adaLN_modulation.bias",
152
+ "blocks.9.norm1.weight",
153
+ "blocks.9.norm2.weight",
154
+ "blocks.9.mlp.0.bias",
155
+ "blocks.9.mlp.2.bias",
156
+ "blocks.9.adaLN_modulation.bias",
157
+ "blocks.10.norm1.weight",
158
+ "blocks.10.norm2.weight",
159
+ "blocks.10.mlp.0.bias",
160
+ "blocks.10.mlp.2.bias",
161
+ "blocks.10.adaLN_modulation.bias",
162
+ "blocks.11.norm1.weight",
163
+ "blocks.11.norm2.weight",
164
+ "blocks.11.mlp.0.bias",
165
+ "blocks.11.mlp.2.bias",
166
+ "blocks.11.adaLN_modulation.bias",
167
+ "output_layer.norm_final.weight",
168
+ "output_layer.adaLN_modulation.bias"
169
+ ],
170
+ "muon_effective_nesterov": false,
171
+ "muon_effective_width_scale": false,
172
+ "muon_effective_weight_decay": 0.1,
173
+ "muon_adam_fallback_nesterov": false,
174
+ "muon_adam_fallback_weight_decay": 0.1,
175
+ "ema_decay": 0.9999,
176
+ "ema_start_step": 0,
177
+ "model_type": "ddit",
178
+ "elf_num_time_tokens": 4,
179
+ "elf_num_model_mode_tokens": 0,
180
+ "qk_norm": true,
181
+ "output_bias": false,
182
+ "output_init_std": -1.0,
183
+ "norm_type": "rmsnorm",
184
+ "target_loss": "linear_soft_kl",
185
+ "linear_soft_target_power": 1.0,
186
+ "linear_soft_target_min_conf": 0.0,
187
+ "linear_soft_target_max_conf": 1.0,
188
+ "t_sampling_mode": "logit_normal",
189
+ "t_sampling_power": 1.0,
190
+ "t_sampling_eps": 0.0001,
191
+ "t_sampling_logit_mean": -1.5,
192
+ "t_sampling_logit_std": 0.8,
193
+ "dual_t": true,
194
+ "corrupt_t_mode": "same",
195
+ "corrupt_min_t": 0.0,
196
+ "corrupt_max_t": 1.0,
197
+ "prefix_block_prob": 0.0,
198
+ "prefix_block_len": 128,
199
+ "mask_ratio_floor_schedule": "none",
200
+ "dirichlet_endpoint_mode": "categorical_dual_t",
201
+ "dirichlet_semantic_t_mode": "same",
202
+ "dirichlet_semantic_t_value": 0.0,
203
+ "dirichlet_semantic_t_curve": "linear",
204
+ "dirichlet_semantic_t_power": 1.0,
205
+ "endpoint_sequence_random_prob_alpha": 0.0,
206
+ "categorical_wrong_from_full_vocab": true,
207
+ "categorical_wrong_from_batch_valid_tokens": false,
208
+ "categorical_wrong_basin_token_ids": "",
209
+ "categorical_wrong_basin_prob": 0.0,
210
+ "categorical_wrong_unigram_prob": 0.0,
211
+ "categorical_wrong_uniform_prob": 0.0,
212
+ "categorical_wrong_corpus_unigram_path": "",
213
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
214
+ "categorical_wrong_basin_shared_prob": 0.0,
215
+ "categorical_wrong_unigram_shared_prob": 0.0,
216
+ "mask_mixture_original_prob": 0.0,
217
+ "mask_mixture_lowk_prob": 1.0,
218
+ "mask_mixture_lowcorrupt_prob": 0.0,
219
+ "mask_mixture_block_prob": 0.0,
220
+ "mask_mixture_all_prob": 0.0,
221
+ "mask_mixture_lowk_clean_tokens": "64,128,256",
222
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
223
+ "mask_mixture_block_tokens": "64,128",
224
+ "simplex_bridge_sampler": "dirichlet",
225
+ "logistic_normal_sigma_min": 0.18,
226
+ "logistic_normal_sigma_max": 2.2,
227
+ "logistic_normal_tau_min": 0.65,
228
+ "logistic_normal_tau_max": 1.15,
229
+ "torch_compile": false,
230
+ "compile_mode": "max-autotune",
231
+ "state_format": "prob",
232
+ "meanflow_weight": 0.0,
233
+ "rollout_train_prob": 0.0,
234
+ "rollout_train_steps": 1,
235
+ "rollout_train_infer_steps": 64,
236
+ "rollout_train_temp": 1.45,
237
+ "rollout_train_max_gamma": 1.0,
238
+ "rollout_train_corrupt_only": true,
239
+ "rollout_train_samplewise": false,
240
+ "rollout_train_compute_always": false,
241
+ "bridge_noise_init": "logistic_normal",
242
+ "noise_sigma": -1.0,
243
+ "allow_tf32": true,
244
+ "activation_checkpointing": false,
245
+ "activation_checkpoint_interval": 1,
246
+ "activation_checkpoint_scope": "block",
247
+ "ddp_static_graph": false,
248
+ "ddp_gradient_as_bucket_view": true,
249
+ "blocking_data_transfer": false,
250
+ "dataloader_prefetch_factor": 4,
251
+ "full_train_stats": false,
252
+ "tokenized_hf": false,
253
+ "tokenized_pad_token": "pad",
254
+ "elf_conditional_hf": false,
255
+ "record_pad_truncate": false,
256
+ "record_add_eos": false,
257
+ "record_add_special_tokens": false,
258
+ "record_pad_token": "pad",
259
+ "record_shuffle_buffer": 10000,
260
+ "wrap": true,
261
+ "wrap_mode": "stream",
262
+ "wrap_record_buffer_size": 200,
263
+ "owt_cached_chunks": true,
264
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train8_overfit",
265
+ "owt_chunk_cache_rebuild": false,
266
+ "owt_chunk_cache_write_batch": 4096,
267
+ "owt_exact_repeat_per_chunk": 0,
268
+ "online_chunk_shuffle": false,
269
+ "online_chunk_shuffle_buffer": 10000,
270
+ "openwebtext_split": "train_minus_100k",
271
+ "detokenizer": "auto",
272
+ "resolved_detokenizer": null,
273
+ "num_workers": 0,
274
+ "latest_every": 50,
275
+ "resume_path": ""
276
+ }
277
+ step=25 epoch=13/500 epoch_step=1/2 micro_steps=25 elapsed=3.9s lr=2.000000e-03 loss=1.9292 loss_recon=1.9292 loss_meanflow=0.0000 mean_model_t=0.2229 mean_corrupt_t=0.2229 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2229 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1784 corrupt_frac=0.8125 acc_corrupt=0.1331 loss_corrupt=2.1982 wrong_frac=0.7737 init_acc_corrupt=0.1321 acc_corrupt_t_0p0_0p2=0.0651 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.2481 out_g_norm=0.3227 acc_corrupt_t_0p2_0p4=0.1507 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.4118 corrupt_frac_t_0p4_0p6=1.0000 loss_all=10.6282 init_gold_top10=0.3646 init_gold_top100=0.3646
278
+ step=50 epoch=25/500 epoch_step=2/2 micro_steps=50 elapsed=3.2s lr=2.000000e-03 loss=2.0628 loss_recon=2.0628 loss_meanflow=0.0000 mean_model_t=0.2418 mean_corrupt_t=0.2418 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2418 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1820 corrupt_frac=0.8675 acc_corrupt=0.1462 loss_corrupt=2.4723 wrong_frac=0.7587 init_acc_corrupt=0.1501 acc_corrupt_t_0p0_0p2=0.0710 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.2339 out_g_norm=0.3951 acc_corrupt_t_0p2_0p4=0.1754 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3303 corrupt_frac_t_0p4_0p6=1.0000 loss_all=10.6132 init_gold_top10=0.1135 init_gold_top100=0.2458
279
+ step=75 epoch=38/500 epoch_step=1/2 micro_steps=75 elapsed=7.4s lr=2.000000e-03 loss=1.5959 loss_recon=1.5959 loss_meanflow=0.0000 mean_model_t=0.1995 mean_corrupt_t=0.1995 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1995 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1477 corrupt_frac=0.8675 acc_corrupt=0.1153 loss_corrupt=1.9307 wrong_frac=0.7995 init_acc_corrupt=0.1001 acc_corrupt_t_0p2_0p4=0.1775 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.1262 out_g_norm=0.4897 acc_corrupt_t_0p0_0p2=0.0646 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.2896 corrupt_frac_t_0p4_0p6=1.0000 loss_all=9.9409 init_gold_top10=0.3073 init_gold_top100=0.3292
280
+ step=100 epoch=50/500 epoch_step=2/2 micro_steps=100 elapsed=3.2s lr=2.000000e-03 loss=1.9173 loss_recon=1.9173 loss_meanflow=0.0000 mean_model_t=0.2487 mean_corrupt_t=0.2487 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2487 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1938 corrupt_frac=0.8425 acc_corrupt=0.1509 loss_corrupt=2.1707 wrong_frac=0.7557 init_acc_corrupt=0.1566 acc_corrupt_t_0p0_0p2=0.0671 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8933 out_g_norm=0.6142 acc_corrupt_t_0p2_0p4=0.1712 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3528 corrupt_frac_t_0p4_0p6=1.0000 loss_all=9.4293 init_gold_top10=0.3372 init_gold_top100=0.3411
281
+ step=125 epoch=63/500 epoch_step=1/2 micro_steps=125 elapsed=7.3s lr=2.000000e-03 loss=1.5192 loss_recon=1.5192 loss_meanflow=0.0000 mean_model_t=0.2121 mean_corrupt_t=0.2121 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2121 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1628 corrupt_frac=0.8450 acc_corrupt=0.1252 loss_corrupt=1.8785 wrong_frac=0.7876 init_acc_corrupt=0.1138 acc_corrupt_t_0p0_0p2=0.0647 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=3.6385 out_g_norm=0.6544 acc_corrupt_t_0p2_0p4=0.1423 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3462 corrupt_frac_t_0p4_0p6=1.0000 loss_all=9.6248 init_gold_top10=0.1384 init_gold_top100=0.2511
282
+ step=150 epoch=75/500 epoch_step=2/2 micro_steps=150 elapsed=3.2s lr=2.000000e-03 loss=1.5579 loss_recon=1.5579 loss_meanflow=0.0000 mean_model_t=0.2285 mean_corrupt_t=0.2285 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2285 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1755 corrupt_frac=0.8525 acc_corrupt=0.1408 loss_corrupt=1.8343 wrong_frac=0.7725 init_acc_corrupt=0.1303 acc_corrupt_t_0p2_0p4=0.1705 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=4.3633 out_g_norm=0.6731 acc_corrupt_t_0p4_0p6=0.3574 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.0692 corrupt_frac_t_0p0_0p2=1.0000 loss_all=8.2060 init_gold_top10=0.2612 init_gold_top100=0.2991
283
+ step=175 epoch=88/500 epoch_step=1/2 micro_steps=175 elapsed=7.2s lr=2.000000e-03 loss=1.4135 loss_recon=1.4135 loss_meanflow=0.0000 mean_model_t=0.2193 mean_corrupt_t=0.2193 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2193 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1782 corrupt_frac=0.8375 acc_corrupt=0.1330 loss_corrupt=1.6443 wrong_frac=0.7870 init_acc_corrupt=0.1225 acc_corrupt_t_0p4_0p6=0.3708 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=5.0639 out_g_norm=0.5870 acc_corrupt_t_0p2_0p4=0.1906 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p0_0p2=0.0670 corrupt_frac_t_0p0_0p2=1.0000 loss_all=7.1919 init_gold_top10=0.3661 init_gold_top100=0.3694
284
+ step=200 epoch=100/500 epoch_step=2/2 micro_steps=200 elapsed=3.3s lr=2.000000e-03 loss=1.2966 loss_recon=1.2966 loss_meanflow=0.0000 mean_model_t=0.2000 mean_corrupt_t=0.2000 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1622 corrupt_frac=0.8400 acc_corrupt=0.1248 loss_corrupt=1.6265 wrong_frac=0.7967 init_acc_corrupt=0.1161 acc_corrupt_t_0p0_0p2=0.0635 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=5.5787 out_g_norm=0.5550 acc_corrupt_t_0p2_0p4=0.1714 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3082 corrupt_frac_t_0p4_0p6=1.0000 loss_all=9.3518 init_gold_top10=0.0104 init_gold_top100=0.1367
285
+ step=225 epoch=113/500 epoch_step=1/2 micro_steps=225 elapsed=6.9s lr=2.000000e-03 loss=1.2596 loss_recon=1.2596 loss_meanflow=0.0000 mean_model_t=0.2059 mean_corrupt_t=0.2059 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2059 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1807 corrupt_frac=0.8475 acc_corrupt=0.1389 loss_corrupt=1.5060 wrong_frac=0.7966 init_acc_corrupt=0.1126 acc_corrupt_t_0p0_0p2=0.0485 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=5.9420 out_g_norm=0.5233 acc_corrupt_t_0p2_0p4=0.1825 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3848 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.7350 init_gold_top10=0.2122 init_gold_top100=0.3138
286
+ step=250 epoch=125/500 epoch_step=2/2 micro_steps=250 elapsed=3.3s lr=2.000000e-03 loss=1.0794 loss_recon=1.0794 loss_meanflow=0.0000 mean_model_t=0.1793 mean_corrupt_t=0.1793 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1793 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1632 corrupt_frac=0.8500 acc_corrupt=0.1210 loss_corrupt=1.4172 wrong_frac=0.8201 init_acc_corrupt=0.0854 acc_corrupt_t_0p2_0p4=0.1986 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=6.2537 out_g_norm=0.4435 acc_corrupt_t_0p0_0p2=0.0611 corrupt_frac_t_0p0_0p2=1.0000 loss_all=8.7217 init_gold_top10=0.0837 init_gold_top100=0.1987
287
+ step=275 epoch=138/500 epoch_step=1/2 micro_steps=275 elapsed=7.6s lr=2.000000e-03 loss=1.2184 loss_recon=1.2184 loss_meanflow=0.0000 mean_model_t=0.2038 mean_corrupt_t=0.2038 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2038 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1685 corrupt_frac=0.8600 acc_corrupt=0.1272 loss_corrupt=1.5189 wrong_frac=0.8049 init_acc_corrupt=0.0914 acc_corrupt_t_0p0_0p2=0.0738 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=6.5572 out_g_norm=0.4130 acc_corrupt_t_0p2_0p4=0.1865 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.4505 corrupt_frac_t_0p6_0p8=1.0000 loss_all=8.6460 init_gold_top10=0.0182 init_gold_top100=0.1224
288
+ step=300 epoch=150/500 epoch_step=2/2 micro_steps=300 elapsed=3.2s lr=2.000000e-03 loss=1.0496 loss_recon=1.0496 loss_meanflow=0.0000 mean_model_t=0.1754 mean_corrupt_t=0.1754 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1754 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1491 corrupt_frac=0.8725 acc_corrupt=0.1196 loss_corrupt=1.3023 wrong_frac=0.8226 init_acc_corrupt=0.0753 acc_corrupt_t_0p2_0p4=0.2018 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=6.8571 out_g_norm=0.3994 acc_corrupt_t_0p0_0p2=0.0714 corrupt_frac_t_0p0_0p2=1.0000 loss_all=7.0718 init_gold_top10=0.2604 init_gold_top100=0.3187
289
+ step=325 epoch=163/500 epoch_step=1/2 micro_steps=325 elapsed=7.3s lr=2.000000e-03 loss=1.2811 loss_recon=1.2811 loss_meanflow=0.0000 mean_model_t=0.2307 mean_corrupt_t=0.2307 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2307 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2167 corrupt_frac=0.8425 acc_corrupt=0.1665 loss_corrupt=1.5354 wrong_frac=0.7709 init_acc_corrupt=0.1407 acc_corrupt_t_0p2_0p4=0.2165 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=7.1720 out_g_norm=0.4190 acc_corrupt_t_0p0_0p2=0.0746 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.3664 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.6212 init_gold_top10=0.1471 init_gold_top100=0.2539
290
+ step=350 epoch=175/500 epoch_step=2/2 micro_steps=350 elapsed=3.2s lr=2.000000e-03 loss=1.1113 loss_recon=1.1113 loss_meanflow=0.0000 mean_model_t=0.2000 mean_corrupt_t=0.2000 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1812 corrupt_frac=0.8525 acc_corrupt=0.1405 loss_corrupt=1.3994 wrong_frac=0.7984 init_acc_corrupt=0.1058 acc_corrupt_t_0p2_0p4=0.1975 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=7.4602 out_g_norm=0.4178 acc_corrupt_t_0p0_0p2=0.0613 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.3714 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.5593 init_gold_top10=0.0716 init_gold_top100=0.1667
291
+ step=375 epoch=188/500 epoch_step=1/2 micro_steps=375 elapsed=6.1s lr=2.000000e-03 loss=0.9947 loss_recon=0.9947 loss_meanflow=0.0000 mean_model_t=0.1848 mean_corrupt_t=0.1848 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1848 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1754 corrupt_frac=0.8275 acc_corrupt=0.1286 loss_corrupt=1.2082 wrong_frac=0.8215 init_acc_corrupt=0.0898 acc_corrupt_t_0p0_0p2=0.0663 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=7.7510 out_g_norm=0.3472 acc_corrupt_t_0p2_0p4=0.2107 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.3901 corrupt_frac_t_0p4_0p6=1.0000 loss_all=6.9657 init_gold_top10=0.2943 init_gold_top100=0.3060
292
+ step=400 epoch=200/500 epoch_step=2/2 micro_steps=400 elapsed=3.2s lr=2.000000e-03 loss=1.1437 loss_recon=1.1437 loss_meanflow=0.0000 mean_model_t=0.2432 mean_corrupt_t=0.2432 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2432 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2484 corrupt_frac=0.8475 acc_corrupt=0.1991 loss_corrupt=1.4032 wrong_frac=0.7520 init_acc_corrupt=0.1677 acc_corrupt_t_0p4_0p6=0.5122 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=8.0407 out_g_norm=0.3851 acc_corrupt_t_0p0_0p2=0.0735 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2711 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.5677 corrupt_frac_t_0p6_0p8=1.0000 loss_all=7.7681 init_gold_top10=0.1354 init_gold_top100=0.2435
293
+ step=425 epoch=213/500 epoch_step=1/2 micro_steps=425 elapsed=6.8s lr=2.000000e-03 loss=1.0961 loss_recon=1.0961 loss_meanflow=0.0000 mean_model_t=0.2254 mean_corrupt_t=0.2254 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2254 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2287 corrupt_frac=0.8475 acc_corrupt=0.1800 loss_corrupt=1.4120 wrong_frac=0.7710 init_acc_corrupt=0.1320 acc_corrupt_t_0p0_0p2=0.0775 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=8.3189 out_g_norm=0.3768 acc_corrupt_t_0p2_0p4=0.2591 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5409 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.2746 init_gold_top10=0.1339 init_gold_top100=0.2333
294
+ step=450 epoch=225/500 epoch_step=2/2 micro_steps=450 elapsed=3.2s lr=2.000000e-03 loss=1.2043 loss_recon=1.2043 loss_meanflow=0.0000 mean_model_t=0.2573 mean_corrupt_t=0.2573 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2573 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2714 corrupt_frac=0.8425 acc_corrupt=0.2156 loss_corrupt=1.4170 wrong_frac=0.7421 init_acc_corrupt=0.1684 acc_corrupt_t_0p2_0p4=0.2319 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=8.5926 out_g_norm=0.4680 acc_corrupt_t_0p0_0p2=0.0840 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4976 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p6_0p8=0.6362 corrupt_frac_t_0p6_0p8=1.0000 loss_all=6.4287 init_gold_top10=0.2148 init_gold_top100=0.3008
295
+ step=475 epoch=238/500 epoch_step=1/2 micro_steps=475 elapsed=7.2s lr=2.000000e-03 loss=1.0695 loss_recon=1.0695 loss_meanflow=0.0000 mean_model_t=0.2275 mean_corrupt_t=0.2275 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2275 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2506 corrupt_frac=0.8325 acc_corrupt=0.1946 loss_corrupt=1.3589 wrong_frac=0.7694 init_acc_corrupt=0.1501 acc_corrupt_t_0p0_0p2=0.0772 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=8.8962 out_g_norm=0.4110 acc_corrupt_t_0p2_0p4=0.2623 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.4316 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.4180 init_gold_top10=0.0982 init_gold_top100=0.2254
296
+ step=500 epoch=250/500 epoch_step=2/2 micro_steps=500 elapsed=3.2s lr=2.000000e-03 loss=0.9610 loss_recon=0.9610 loss_meanflow=0.0000 mean_model_t=0.2087 mean_corrupt_t=0.2087 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2087 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2305 corrupt_frac=0.8325 acc_corrupt=0.1766 loss_corrupt=1.1436 wrong_frac=0.7906 init_acc_corrupt=0.1262 acc_corrupt_t_0p2_0p4=0.2759 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=9.2029 out_g_norm=0.3927 acc_corrupt_t_0p0_0p2=0.0715 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.5685 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.3541 init_gold_top10=0.3604 init_gold_top100=0.3615
297
+ step=525 epoch=263/500 epoch_step=1/2 micro_steps=525 elapsed=6.3s lr=2.000000e-03 loss=1.0511 loss_recon=1.0511 loss_meanflow=0.0000 mean_model_t=0.2178 mean_corrupt_t=0.2178 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2178 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2335 corrupt_frac=0.8500 acc_corrupt=0.1804 loss_corrupt=1.1904 wrong_frac=0.7787 init_acc_corrupt=0.1194 acc_corrupt_t_0p2_0p4=0.2416 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=9.4743 out_g_norm=0.4127 acc_corrupt_t_0p0_0p2=0.0802 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4263 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.6855 init_gold_top10=0.4342 init_gold_top100=0.4353
298
+ step=550 epoch=275/500 epoch_step=2/2 micro_steps=550 elapsed=3.3s lr=2.000000e-03 loss=0.9946 loss_recon=0.9946 loss_meanflow=0.0000 mean_model_t=0.2122 mean_corrupt_t=0.2122 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2122 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2371 corrupt_frac=0.8475 acc_corrupt=0.1836 loss_corrupt=1.3336 wrong_frac=0.7820 init_acc_corrupt=0.1212 acc_corrupt_t_0p0_0p2=0.0734 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=9.7200 out_g_norm=0.3782 acc_corrupt_t_0p2_0p4=0.2693 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.4196 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.3814 init_gold_top10=0.0145 init_gold_top100=0.1194
299
+ step=575 epoch=288/500 epoch_step=1/2 micro_steps=575 elapsed=6.4s lr=2.000000e-03 loss=0.8530 loss_recon=0.8530 loss_meanflow=0.0000 mean_model_t=0.1913 mean_corrupt_t=0.1913 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1913 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2010 corrupt_frac=0.8675 acc_corrupt=0.1569 loss_corrupt=1.2501 wrong_frac=0.8059 init_acc_corrupt=0.0965 acc_corrupt_t_0p0_0p2=0.0591 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=9.9654 out_g_norm=0.3631 acc_corrupt_t_0p2_0p4=0.2402 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.7591 corrupt_frac_t_0p6_0p8=1.0000 loss_all=9.4386 init_gold_top10=0.0146 init_gold_top100=0.1229
300
+ step=600 epoch=300/500 epoch_step=2/2 micro_steps=600 elapsed=3.3s lr=2.000000e-03 loss=0.9203 loss_recon=0.9203 loss_meanflow=0.0000 mean_model_t=0.2002 mean_corrupt_t=0.2002 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2002 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2107 corrupt_frac=0.8600 acc_corrupt=0.1605 loss_corrupt=1.0956 wrong_frac=0.8077 init_acc_corrupt=0.1017 acc_corrupt_t_0p2_0p4=0.2323 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.1867 out_g_norm=0.3898 acc_corrupt_t_0p0_0p2=0.0704 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4909 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.4129 init_gold_top10=0.2422 init_gold_top100=0.3177
301
+ step=625 epoch=313/500 epoch_step=1/2 micro_steps=625 elapsed=7.4s lr=2.000000e-03 loss=0.9067 loss_recon=0.9067 loss_meanflow=0.0000 mean_model_t=0.1977 mean_corrupt_t=0.1977 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1977 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2162 corrupt_frac=0.8650 acc_corrupt=0.1712 loss_corrupt=1.0752 wrong_frac=0.8011 init_acc_corrupt=0.1018 acc_corrupt_t_0p2_0p4=0.2487 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.3504 out_g_norm=0.3548 acc_corrupt_t_0p0_0p2=0.0756 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.5089 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.9936 init_gold_top10=0.3052 init_gold_top100=0.3052
302
+ step=650 epoch=325/500 epoch_step=2/2 micro_steps=650 elapsed=3.3s lr=2.000000e-03 loss=1.0812 loss_recon=1.0812 loss_meanflow=0.0000 mean_model_t=0.2686 mean_corrupt_t=0.2686 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2686 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3181 corrupt_frac=0.8500 acc_corrupt=0.2540 loss_corrupt=1.3163 wrong_frac=0.7288 init_acc_corrupt=0.1849 acc_corrupt_t_0p2_0p4=0.2847 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.5032 out_g_norm=0.4212 acc_corrupt_t_0p0_0p2=0.0803 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.5031 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p6_0p8=0.6618 corrupt_frac_t_0p6_0p8=1.0000 loss_all=6.9221 init_gold_top10=0.0586 init_gold_top100=0.2070
303
+ step=675 epoch=338/500 epoch_step=1/2 micro_steps=675 elapsed=6.3s lr=2.000000e-03 loss=0.9295 loss_recon=0.9295 loss_meanflow=0.0000 mean_model_t=0.2244 mean_corrupt_t=0.2244 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2244 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2606 corrupt_frac=0.8450 acc_corrupt=0.2044 loss_corrupt=1.1223 wrong_frac=0.7735 init_acc_corrupt=0.1386 acc_corrupt_t_0p2_0p4=0.2548 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.6287 out_g_norm=0.3932 acc_corrupt_t_0p0_0p2=0.0862 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4817 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.4106 init_gold_top10=0.2533 init_gold_top100=0.2935
304
+ step=700 epoch=350/500 epoch_step=2/2 micro_steps=700 elapsed=3.3s lr=2.000000e-03 loss=0.8337 loss_recon=0.8337 loss_meanflow=0.0000 mean_model_t=0.1820 mean_corrupt_t=0.1820 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1820 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2044 corrupt_frac=0.8550 acc_corrupt=0.1522 loss_corrupt=1.0238 wrong_frac=0.8183 init_acc_corrupt=0.0776 acc_corrupt_t_0p2_0p4=0.2138 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.7533 out_g_norm=0.4313 acc_corrupt_t_0p0_0p2=0.0790 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4245 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.3746 init_gold_top10=0.3094 init_gold_top100=0.3146
305
+ step=725 epoch=363/500 epoch_step=1/2 micro_steps=725 elapsed=6.3s lr=2.000000e-03 loss=0.9137 loss_recon=0.9137 loss_meanflow=0.0000 mean_model_t=0.2167 mean_corrupt_t=0.2167 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2167 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2622 corrupt_frac=0.8275 acc_corrupt=0.1893 loss_corrupt=1.1469 wrong_frac=0.7835 init_acc_corrupt=0.1159 acc_corrupt_t_0p0_0p2=0.0841 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=10.8663 out_g_norm=0.4318 acc_corrupt_t_0p2_0p4=0.2858 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5123 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.9560 init_gold_top10=0.1979 init_gold_top100=0.2875
306
+ step=750 epoch=375/500 epoch_step=2/2 micro_steps=750 elapsed=3.3s lr=2.000000e-03 loss=0.8475 loss_recon=0.8475 loss_meanflow=0.0000 mean_model_t=0.2249 mean_corrupt_t=0.2249 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2249 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2523 corrupt_frac=0.8575 acc_corrupt=0.1955 loss_corrupt=1.0302 wrong_frac=0.7773 init_acc_corrupt=0.1290 acc_corrupt_t_0p2_0p4=0.2594 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=10.9546 out_g_norm=0.3843 acc_corrupt_t_0p4_0p6=0.4941 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p8_1p0=0.8382 corrupt_frac_t_0p8_1p0=1.0000 acc_corrupt_t_0p0_0p2=0.0887 corrupt_frac_t_0p0_0p2=1.0000 loss_all=5.5484 init_gold_top10=0.1654 init_gold_top100=0.2630
307
+ step=775 epoch=388/500 epoch_step=1/2 micro_steps=775 elapsed=7.1s lr=2.000000e-03 loss=0.8419 loss_recon=0.8419 loss_meanflow=0.0000 mean_model_t=0.2047 mean_corrupt_t=0.2047 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2047 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2355 corrupt_frac=0.8525 acc_corrupt=0.1754 loss_corrupt=1.0470 wrong_frac=0.8014 init_acc_corrupt=0.1122 acc_corrupt_t_0p0_0p2=0.0686 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.0451 out_g_norm=0.4072 acc_corrupt_t_0p2_0p4=0.2804 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.4922 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.4637 init_gold_top10=0.2271 init_gold_top100=0.3146
308
+ step=800 epoch=400/500 epoch_step=2/2 micro_steps=800 elapsed=3.3s lr=2.000000e-03 loss=0.9083 loss_recon=0.9083 loss_meanflow=0.0000 mean_model_t=0.2219 mean_corrupt_t=0.2219 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2219 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2591 corrupt_frac=0.8575 acc_corrupt=0.1992 loss_corrupt=1.1724 wrong_frac=0.7766 init_acc_corrupt=0.1295 acc_corrupt_t_0p4_0p6=0.4456 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=11.1244 out_g_norm=0.3700 acc_corrupt_t_0p0_0p2=0.0879 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2556 corrupt_frac_t_0p2_0p4=1.0000 loss_all=6.9205 init_gold_top10=0.1094 init_gold_top100=0.2176
309
+ step=825 epoch=413/500 epoch_step=1/2 micro_steps=825 elapsed=6.4s lr=2.000000e-03 loss=0.7589 loss_recon=0.7589 loss_meanflow=0.0000 mean_model_t=0.1673 mean_corrupt_t=0.1673 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1673 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1843 corrupt_frac=0.8700 acc_corrupt=0.1309 loss_corrupt=1.1194 wrong_frac=0.8311 init_acc_corrupt=0.0565 acc_corrupt_t_0p0_0p2=0.0852 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.2187 out_g_norm=0.3368 acc_corrupt_t_0p2_0p4=0.2189 corrupt_frac_t_0p2_0p4=1.0000 loss_all=8.5999 init_gold_top10=0.0188 init_gold_top100=0.1240
310
+ step=850 epoch=425/500 epoch_step=2/2 micro_steps=850 elapsed=3.3s lr=2.000000e-03 loss=0.8390 loss_recon=0.8390 loss_meanflow=0.0000 mean_model_t=0.2215 mean_corrupt_t=0.2215 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2215 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2685 corrupt_frac=0.8475 acc_corrupt=0.2015 loss_corrupt=1.0946 wrong_frac=0.7834 init_acc_corrupt=0.1358 acc_corrupt_t_0p0_0p2=0.0705 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.3069 out_g_norm=0.3728 acc_corrupt_t_0p2_0p4=0.2829 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5491 corrupt_frac_t_0p4_0p6=1.0000 loss_all=6.6402 init_gold_top10=0.0982 init_gold_top100=0.2054
311
+ step=875 epoch=438/500 epoch_step=1/2 micro_steps=875 elapsed=6.4s lr=2.000000e-03 loss=0.9321 loss_recon=0.9321 loss_meanflow=0.0000 mean_model_t=0.2682 mean_corrupt_t=0.2682 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2682 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3264 corrupt_frac=0.8500 acc_corrupt=0.2580 loss_corrupt=1.0279 wrong_frac=0.7365 init_acc_corrupt=0.1897 acc_corrupt_t_0p4_0p6=0.4869 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=11.3918 out_g_norm=0.4013 acc_corrupt_t_0p0_0p2=0.0857 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2753 corrupt_frac_t_0p2_0p4=1.0000 loss_all=3.2197 init_gold_top10=0.4475 init_gold_top100=0.4487
312
+ step=900 epoch=450/500 epoch_step=2/2 micro_steps=900 elapsed=3.3s lr=2.000000e-03 loss=0.7588 loss_recon=0.7588 loss_meanflow=0.0000 mean_model_t=0.1950 mean_corrupt_t=0.1950 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1950 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2202 corrupt_frac=0.8675 acc_corrupt=0.1730 loss_corrupt=1.0286 wrong_frac=0.8093 init_acc_corrupt=0.1086 acc_corrupt_t_0p0_0p2=0.0723 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.4656 out_g_norm=0.3590 acc_corrupt_t_0p2_0p4=0.2857 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.6250 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p4_0p6=0.5446 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.3870 init_gold_top10=0.1440 init_gold_top100=0.2411
313
+ step=925 epoch=463/500 epoch_step=1/2 micro_steps=925 elapsed=7.2s lr=2.000000e-03 loss=0.8113 loss_recon=0.8113 loss_meanflow=0.0000 mean_model_t=0.2037 mean_corrupt_t=0.2037 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2037 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2324 corrupt_frac=0.8550 acc_corrupt=0.1750 loss_corrupt=1.1130 wrong_frac=0.7979 init_acc_corrupt=0.1045 acc_corrupt_t_0p2_0p4=0.2974 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=11.5452 out_g_norm=0.3375 acc_corrupt_t_0p0_0p2=0.0959 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.4928 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.6930 init_gold_top10=0.0480 init_gold_top100=0.1797
314
+ step=950 epoch=475/500 epoch_step=2/2 micro_steps=950 elapsed=3.3s lr=2.000000e-03 loss=0.7805 loss_recon=0.7805 loss_meanflow=0.0000 mean_model_t=0.1864 mean_corrupt_t=0.1864 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.1864 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2186 corrupt_frac=0.8275 acc_corrupt=0.1571 loss_corrupt=1.1102 wrong_frac=0.8129 init_acc_corrupt=0.0838 acc_corrupt_t_0p0_0p2=0.1000 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.6360 out_g_norm=0.3247 acc_corrupt_t_0p2_0p4=0.2736 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.4576 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.0911 init_gold_top10=0.0368 init_gold_top100=0.1462
315
+ step=975 epoch=488/500 epoch_step=1/2 micro_steps=975 elapsed=6.4s lr=2.000000e-03 loss=0.7559 loss_recon=0.7559 loss_meanflow=0.0000 mean_model_t=0.2108 mean_corrupt_t=0.2108 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2108 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2459 corrupt_frac=0.8600 acc_corrupt=0.1871 loss_corrupt=1.1226 wrong_frac=0.7927 init_acc_corrupt=0.1185 acc_corrupt_t_0p6_0p8=0.7292 corrupt_frac_t_0p6_0p8=1.0000 out_w_norm=11.7141 out_g_norm=0.3183 acc_corrupt_t_0p0_0p2=0.0793 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2667 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5513 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.7143 init_gold_top10=0.0292 init_gold_top100=0.1271
316
+ step=1000 epoch=500/500 epoch_step=2/2 micro_steps=1000 elapsed=3.3s lr=2.000000e-03 loss=0.8140 loss_recon=0.8140 loss_meanflow=0.0000 mean_model_t=0.2305 mean_corrupt_t=0.2305 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.2305 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2721 corrupt_frac=0.8625 acc_corrupt=0.2186 loss_corrupt=0.9849 wrong_frac=0.7682 init_acc_corrupt=0.1369 acc_corrupt_t_0p0_0p2=0.0828 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=11.7773 out_g_norm=0.3201 acc_corrupt_t_0p2_0p4=0.2590 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5636 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.8487 init_gold_top10=0.2455 init_gold_top100=0.2902
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n256_compactv969_3l_bs512_hard_ce_onehot.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n64_compactv335_3l_hard_ce_onehot.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n8_allcorrupt_hard_ce_20260517_train8ctx8_allcorrupt.log ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 50257,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_n8_allcorrupt_hard_ce_20260517_train8ctx8_allcorrupt",
10
+ "batch_size": 1,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 4,
13
+ "global_batch_size": 4,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 2,
18
+ "total_steps": 500,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 169453056,
36
+ "muon_adam_param_count": 122368,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "blocks.3.attn_qkv.weight",
57
+ "blocks.3.attn_out.weight",
58
+ "blocks.3.mlp.0.weight",
59
+ "blocks.3.mlp.2.weight",
60
+ "blocks.3.adaLN_modulation.weight",
61
+ "blocks.4.attn_qkv.weight",
62
+ "blocks.4.attn_out.weight",
63
+ "blocks.4.mlp.0.weight",
64
+ "blocks.4.mlp.2.weight",
65
+ "blocks.4.adaLN_modulation.weight",
66
+ "blocks.5.attn_qkv.weight",
67
+ "blocks.5.attn_out.weight",
68
+ "blocks.5.mlp.0.weight",
69
+ "blocks.5.mlp.2.weight",
70
+ "blocks.5.adaLN_modulation.weight",
71
+ "blocks.6.attn_qkv.weight",
72
+ "blocks.6.attn_out.weight",
73
+ "blocks.6.mlp.0.weight",
74
+ "blocks.6.mlp.2.weight",
75
+ "blocks.6.adaLN_modulation.weight",
76
+ "blocks.7.attn_qkv.weight",
77
+ "blocks.7.attn_out.weight",
78
+ "blocks.7.mlp.0.weight",
79
+ "blocks.7.mlp.2.weight",
80
+ "blocks.7.adaLN_modulation.weight",
81
+ "blocks.8.attn_qkv.weight",
82
+ "blocks.8.attn_out.weight",
83
+ "blocks.8.mlp.0.weight",
84
+ "blocks.8.mlp.2.weight",
85
+ "blocks.8.adaLN_modulation.weight",
86
+ "blocks.9.attn_qkv.weight",
87
+ "blocks.9.attn_out.weight",
88
+ "blocks.9.mlp.0.weight",
89
+ "blocks.9.mlp.2.weight",
90
+ "blocks.9.adaLN_modulation.weight",
91
+ "blocks.10.attn_qkv.weight",
92
+ "blocks.10.attn_out.weight",
93
+ "blocks.10.mlp.0.weight",
94
+ "blocks.10.mlp.2.weight",
95
+ "blocks.10.adaLN_modulation.weight",
96
+ "blocks.11.attn_qkv.weight",
97
+ "blocks.11.attn_out.weight",
98
+ "blocks.11.mlp.0.weight",
99
+ "blocks.11.mlp.2.weight",
100
+ "blocks.11.adaLN_modulation.weight",
101
+ "output_layer.linear.weight",
102
+ "output_layer.adaLN_modulation.weight"
103
+ ],
104
+ "muon_adam_param_names": [
105
+ "sigma_map.net.0.bias",
106
+ "sigma_map.net.2.bias",
107
+ "blocks.0.norm1.weight",
108
+ "blocks.0.norm2.weight",
109
+ "blocks.0.mlp.0.bias",
110
+ "blocks.0.mlp.2.bias",
111
+ "blocks.0.adaLN_modulation.bias",
112
+ "blocks.1.norm1.weight",
113
+ "blocks.1.norm2.weight",
114
+ "blocks.1.mlp.0.bias",
115
+ "blocks.1.mlp.2.bias",
116
+ "blocks.1.adaLN_modulation.bias",
117
+ "blocks.2.norm1.weight",
118
+ "blocks.2.norm2.weight",
119
+ "blocks.2.mlp.0.bias",
120
+ "blocks.2.mlp.2.bias",
121
+ "blocks.2.adaLN_modulation.bias",
122
+ "blocks.3.norm1.weight",
123
+ "blocks.3.norm2.weight",
124
+ "blocks.3.mlp.0.bias",
125
+ "blocks.3.mlp.2.bias",
126
+ "blocks.3.adaLN_modulation.bias",
127
+ "blocks.4.norm1.weight",
128
+ "blocks.4.norm2.weight",
129
+ "blocks.4.mlp.0.bias",
130
+ "blocks.4.mlp.2.bias",
131
+ "blocks.4.adaLN_modulation.bias",
132
+ "blocks.5.norm1.weight",
133
+ "blocks.5.norm2.weight",
134
+ "blocks.5.mlp.0.bias",
135
+ "blocks.5.mlp.2.bias",
136
+ "blocks.5.adaLN_modulation.bias",
137
+ "blocks.6.norm1.weight",
138
+ "blocks.6.norm2.weight",
139
+ "blocks.6.mlp.0.bias",
140
+ "blocks.6.mlp.2.bias",
141
+ "blocks.6.adaLN_modulation.bias",
142
+ "blocks.7.norm1.weight",
143
+ "blocks.7.norm2.weight",
144
+ "blocks.7.mlp.0.bias",
145
+ "blocks.7.mlp.2.bias",
146
+ "blocks.7.adaLN_modulation.bias",
147
+ "blocks.8.norm1.weight",
148
+ "blocks.8.norm2.weight",
149
+ "blocks.8.mlp.0.bias",
150
+ "blocks.8.mlp.2.bias",
151
+ "blocks.8.adaLN_modulation.bias",
152
+ "blocks.9.norm1.weight",
153
+ "blocks.9.norm2.weight",
154
+ "blocks.9.mlp.0.bias",
155
+ "blocks.9.mlp.2.bias",
156
+ "blocks.9.adaLN_modulation.bias",
157
+ "blocks.10.norm1.weight",
158
+ "blocks.10.norm2.weight",
159
+ "blocks.10.mlp.0.bias",
160
+ "blocks.10.mlp.2.bias",
161
+ "blocks.10.adaLN_modulation.bias",
162
+ "blocks.11.norm1.weight",
163
+ "blocks.11.norm2.weight",
164
+ "blocks.11.mlp.0.bias",
165
+ "blocks.11.mlp.2.bias",
166
+ "blocks.11.adaLN_modulation.bias",
167
+ "output_layer.norm_final.weight",
168
+ "output_layer.adaLN_modulation.bias"
169
+ ],
170
+ "muon_effective_nesterov": false,
171
+ "muon_effective_width_scale": false,
172
+ "muon_effective_weight_decay": 0.1,
173
+ "muon_adam_fallback_nesterov": false,
174
+ "muon_adam_fallback_weight_decay": 0.1,
175
+ "ema_decay": 0.9999,
176
+ "ema_start_step": 0,
177
+ "model_type": "ddit",
178
+ "elf_num_time_tokens": 4,
179
+ "elf_num_model_mode_tokens": 0,
180
+ "qk_norm": true,
181
+ "output_bias": false,
182
+ "output_init_std": -1.0,
183
+ "norm_type": "rmsnorm",
184
+ "target_loss": "hard_ce",
185
+ "linear_soft_target_power": 1.0,
186
+ "linear_soft_target_min_conf": 0.0,
187
+ "linear_soft_target_max_conf": 1.0,
188
+ "t_sampling_mode": "logit_normal",
189
+ "t_sampling_power": 1.0,
190
+ "t_sampling_eps": 0.0001,
191
+ "t_sampling_logit_mean": -1.5,
192
+ "t_sampling_logit_std": 0.8,
193
+ "dual_t": true,
194
+ "corrupt_t_mode": "same",
195
+ "corrupt_min_t": 0.0,
196
+ "corrupt_max_t": 1.0,
197
+ "prefix_block_prob": 0.0,
198
+ "prefix_block_len": 128,
199
+ "mask_ratio_floor_schedule": "none",
200
+ "dirichlet_endpoint_mode": "categorical_dual_t",
201
+ "dirichlet_semantic_t_mode": "same",
202
+ "dirichlet_semantic_t_value": 0.0,
203
+ "dirichlet_semantic_t_curve": "linear",
204
+ "dirichlet_semantic_t_power": 1.0,
205
+ "endpoint_sequence_random_prob_alpha": 0.0,
206
+ "categorical_wrong_from_full_vocab": true,
207
+ "categorical_wrong_from_batch_valid_tokens": false,
208
+ "categorical_wrong_basin_token_ids": "",
209
+ "categorical_wrong_basin_prob": 0.0,
210
+ "categorical_wrong_unigram_prob": 0.0,
211
+ "categorical_wrong_uniform_prob": 0.0,
212
+ "categorical_wrong_corpus_unigram_path": "",
213
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
214
+ "categorical_wrong_basin_shared_prob": 0.0,
215
+ "categorical_wrong_unigram_shared_prob": 0.0,
216
+ "mask_mixture_original_prob": 0.0,
217
+ "mask_mixture_lowk_prob": 0.0,
218
+ "mask_mixture_lowcorrupt_prob": 0.0,
219
+ "mask_mixture_block_prob": 0.0,
220
+ "mask_mixture_all_prob": 1.0,
221
+ "mask_mixture_lowk_clean_tokens": "0",
222
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
223
+ "mask_mixture_block_tokens": "64,128",
224
+ "simplex_bridge_sampler": "dirichlet",
225
+ "logistic_normal_sigma_min": 0.18,
226
+ "logistic_normal_sigma_max": 2.2,
227
+ "logistic_normal_tau_min": 0.65,
228
+ "logistic_normal_tau_max": 1.15,
229
+ "torch_compile": false,
230
+ "compile_mode": "max-autotune",
231
+ "state_format": "prob",
232
+ "meanflow_weight": 0.0,
233
+ "rollout_train_prob": 0.0,
234
+ "rollout_train_steps": 1,
235
+ "rollout_train_infer_steps": 64,
236
+ "rollout_train_temp": 1.45,
237
+ "rollout_train_max_gamma": 1.0,
238
+ "rollout_train_corrupt_only": true,
239
+ "rollout_train_samplewise": false,
240
+ "rollout_train_compute_always": false,
241
+ "bridge_noise_init": "logistic_normal",
242
+ "noise_sigma": -1.0,
243
+ "allow_tf32": true,
244
+ "activation_checkpointing": false,
245
+ "activation_checkpoint_interval": 1,
246
+ "activation_checkpoint_scope": "block",
247
+ "ddp_static_graph": false,
248
+ "ddp_gradient_as_bucket_view": true,
249
+ "blocking_data_transfer": false,
250
+ "dataloader_prefetch_factor": 4,
251
+ "full_train_stats": false,
252
+ "tokenized_hf": false,
253
+ "tokenized_pad_token": "pad",
254
+ "elf_conditional_hf": false,
255
+ "record_pad_truncate": false,
256
+ "record_add_eos": false,
257
+ "record_add_special_tokens": false,
258
+ "record_pad_token": "pad",
259
+ "record_shuffle_buffer": 10000,
260
+ "wrap": true,
261
+ "wrap_mode": "stream",
262
+ "wrap_record_buffer_size": 200,
263
+ "owt_cached_chunks": true,
264
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len8_train8_overfit",
265
+ "owt_chunk_cache_rebuild": false,
266
+ "owt_chunk_cache_write_batch": 4096,
267
+ "owt_exact_repeat_per_chunk": 0,
268
+ "online_chunk_shuffle": false,
269
+ "online_chunk_shuffle_buffer": 10000,
270
+ "openwebtext_split": "train_minus_100k",
271
+ "detokenizer": "auto",
272
+ "resolved_detokenizer": null,
273
+ "num_workers": 0,
274
+ "latest_every": 10,
275
+ "resume_path": ""
276
+ }
277
+ step=10 epoch=5/250 epoch_step=2/2 micro_steps=10 elapsed=1.8s lr=2.000000e-03 loss=10.8055 loss_recon=10.8055 loss_meanflow=0.0000 mean_model_t=0.2827 mean_corrupt_t=0.2827 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=10.8055 wrong_frac=0.7500 init_acc_corrupt=0.1625 acc_corrupt_t_0p0_0p2=0.0938 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.0155 out_g_norm=4.9894 acc_corrupt_t_0p6_0p8=0.0000 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p2_0p4=0.1875 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=10.7734 init_gold_top10=0.3750 init_gold_top100=0.3750
278
+ step=20 epoch=10/250 epoch_step=2/2 micro_steps=20 elapsed=5.5s lr=2.000000e-03 loss=10.7273 loss_recon=10.7273 loss_meanflow=0.0000 mean_model_t=0.2379 mean_corrupt_t=0.2379 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=10.7273 wrong_frac=0.7250 init_acc_corrupt=0.1625 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.0995 out_g_norm=6.0688 acc_corrupt_t_0p2_0p4=0.1875 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.3750 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p4_0p6=0.1250 corrupt_frac_t_0p4_0p6=1.0000 loss_all=10.7188 init_gold_top10=0.1250 init_gold_top100=0.1250
279
+ step=30 epoch=15/250 epoch_step=2/2 micro_steps=30 elapsed=5.2s lr=2.000000e-03 loss=10.6273 loss_recon=10.6273 loss_meanflow=0.0000 mean_model_t=0.2564 mean_corrupt_t=0.2564 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1500 corrupt_frac=1.0000 acc_corrupt=0.1500 loss_corrupt=10.6273 wrong_frac=0.7750 init_acc_corrupt=0.1750 acc_corrupt_t_0p2_0p4=0.1500 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=0.2176 out_g_norm=6.9701 acc_corrupt_t_0p4_0p6=0.1250 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 loss_all=10.6328 init_gold_top10=0.0000 init_gold_top100=0.1250
280
+ step=40 epoch=20/250 epoch_step=2/2 micro_steps=40 elapsed=4.5s lr=2.000000e-03 loss=10.5273 loss_recon=10.5273 loss_meanflow=0.0000 mean_model_t=0.1906 mean_corrupt_t=0.1906 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1375 corrupt_frac=1.0000 acc_corrupt=0.1375 loss_corrupt=10.5273 wrong_frac=0.8625 init_acc_corrupt=0.0875 acc_corrupt_t_0p2_0p4=0.1250 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=0.3264 out_g_norm=7.2241 acc_corrupt_t_0p4_0p6=0.1250 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.1458 corrupt_frac_t_0p0_0p2=1.0000 loss_all=10.4922 init_gold_top10=0.0000 init_gold_top100=0.1250
281
+ step=50 epoch=25/250 epoch_step=2/2 micro_steps=50 elapsed=5.5s lr=2.000000e-03 loss=10.4398 loss_recon=10.4398 loss_meanflow=0.0000 mean_model_t=0.1646 mean_corrupt_t=0.1646 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1375 corrupt_frac=1.0000 acc_corrupt=0.1375 loss_corrupt=10.4398 wrong_frac=0.8875 init_acc_corrupt=0.0250 acc_corrupt_t_0p0_0p2=0.1458 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.4272 out_g_norm=7.5896 acc_corrupt_t_0p2_0p4=0.1250 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.5156 init_gold_top10=0.0000 init_gold_top100=0.2500
282
+ step=60 epoch=30/250 epoch_step=2/2 micro_steps=60 elapsed=4.4s lr=2.000000e-03 loss=10.2430 loss_recon=10.2430 loss_meanflow=0.0000 mean_model_t=0.2362 mean_corrupt_t=0.2362 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1500 corrupt_frac=1.0000 acc_corrupt=0.1500 loss_corrupt=10.2430 wrong_frac=0.7250 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.5347 out_g_norm=7.9975 acc_corrupt_t_0p4_0p6=0.1250 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.1750 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.0469 init_gold_top10=0.1250 init_gold_top100=0.2500
283
+ step=70 epoch=35/250 epoch_step=2/2 micro_steps=70 elapsed=5.2s lr=2.000000e-03 loss=10.1438 loss_recon=10.1438 loss_meanflow=0.0000 mean_model_t=0.1956 mean_corrupt_t=0.1956 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1500 corrupt_frac=1.0000 acc_corrupt=0.1500 loss_corrupt=10.1438 wrong_frac=0.8375 init_acc_corrupt=0.0375 acc_corrupt_t_0p2_0p4=0.1667 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=0.6329 out_g_norm=8.0782 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 loss_all=10.3750 init_gold_top10=0.1250 init_gold_top100=0.1250
284
+ step=80 epoch=40/250 epoch_step=2/2 micro_steps=80 elapsed=4.6s lr=2.000000e-03 loss=9.7523 loss_recon=9.7523 loss_meanflow=0.0000 mean_model_t=0.2455 mean_corrupt_t=0.2455 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2750 corrupt_frac=1.0000 acc_corrupt=0.2750 loss_corrupt=9.7523 wrong_frac=0.7250 init_acc_corrupt=0.2000 acc_corrupt_t_0p4_0p6=0.5417 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=0.7131 out_g_norm=8.2600 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2083 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.1094 init_gold_top10=0.5000 init_gold_top100=0.5000
285
+ step=90 epoch=45/250 epoch_step=2/2 micro_steps=90 elapsed=4.6s lr=2.000000e-03 loss=9.8027 loss_recon=9.8027 loss_meanflow=0.0000 mean_model_t=0.2205 mean_corrupt_t=0.2205 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1500 corrupt_frac=1.0000 acc_corrupt=0.1500 loss_corrupt=9.8027 wrong_frac=0.7750 init_acc_corrupt=0.1375 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.7917 out_g_norm=8.6682 acc_corrupt_t_0p2_0p4=0.1667 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.1875 corrupt_frac_t_0p4_0p6=1.0000 loss_all=9.4961 init_gold_top10=0.3750 init_gold_top100=0.3750
286
+ step=100 epoch=50/250 epoch_step=2/2 micro_steps=100 elapsed=5.5s lr=2.000000e-03 loss=9.5336 loss_recon=9.5336 loss_meanflow=0.0000 mean_model_t=0.1800 mean_corrupt_t=0.1800 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1750 corrupt_frac=1.0000 acc_corrupt=0.1750 loss_corrupt=9.5336 wrong_frac=0.8375 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.1458 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.8749 out_g_norm=8.9338 acc_corrupt_t_0p2_0p4=0.2188 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.0547 init_gold_top10=0.1250 init_gold_top100=0.2500
287
+ step=110 epoch=55/250 epoch_step=2/2 micro_steps=110 elapsed=4.6s lr=2.000000e-03 loss=9.2227 loss_recon=9.2227 loss_meanflow=0.0000 mean_model_t=0.2276 mean_corrupt_t=0.2276 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2000 corrupt_frac=1.0000 acc_corrupt=0.2000 loss_corrupt=9.2227 wrong_frac=0.8375 init_acc_corrupt=0.1375 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.9618 out_g_norm=9.2988 acc_corrupt_t_0p4_0p6=0.2500 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.2500 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.2617 init_gold_top10=0.0000 init_gold_top100=0.1250
288
+ step=120 epoch=60/250 epoch_step=2/2 micro_steps=120 elapsed=4.2s lr=2.000000e-03 loss=8.8680 loss_recon=8.8680 loss_meanflow=0.0000 mean_model_t=0.2553 mean_corrupt_t=0.2553 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2750 corrupt_frac=1.0000 acc_corrupt=0.2750 loss_corrupt=8.8680 wrong_frac=0.6750 init_acc_corrupt=0.2250 acc_corrupt_t_0p2_0p4=0.3214 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.0520 out_g_norm=9.2110 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 loss_all=9.4043 init_gold_top10=0.5000 init_gold_top100=0.5000
289
+ step=130 epoch=65/250 epoch_step=2/2 micro_steps=130 elapsed=5.2s lr=2.000000e-03 loss=8.7281 loss_recon=8.7281 loss_meanflow=0.0000 mean_model_t=0.2286 mean_corrupt_t=0.2286 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2375 corrupt_frac=1.0000 acc_corrupt=0.2375 loss_corrupt=8.7281 wrong_frac=0.7875 init_acc_corrupt=0.1375 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.1458 out_g_norm=9.8395 acc_corrupt_t_0p2_0p4=0.2679 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.1562 init_gold_top10=0.1250 init_gold_top100=0.1250
290
+ step=140 epoch=70/250 epoch_step=2/2 micro_steps=140 elapsed=4.6s lr=2.000000e-03 loss=8.8338 loss_recon=8.8338 loss_meanflow=0.0000 mean_model_t=0.1682 mean_corrupt_t=0.1682 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=8.8338 wrong_frac=0.8375 init_acc_corrupt=0.0500 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.2408 out_g_norm=9.9622 acc_corrupt_t_0p2_0p4=0.2500 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.0508 init_gold_top10=0.1250 init_gold_top100=0.1250
291
+ step=150 epoch=75/250 epoch_step=2/2 micro_steps=150 elapsed=4.2s lr=2.000000e-03 loss=8.2799 loss_recon=8.2799 loss_meanflow=0.0000 mean_model_t=0.2326 mean_corrupt_t=0.2326 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2500 corrupt_frac=1.0000 acc_corrupt=0.2500 loss_corrupt=8.2799 wrong_frac=0.7625 init_acc_corrupt=0.1375 acc_corrupt_t_0p0_0p2=0.1750 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.3332 out_g_norm=10.2737 acc_corrupt_t_0p2_0p4=0.2812 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.1875 init_gold_top10=0.2500 init_gold_top100=0.2500
292
+ step=160 epoch=80/250 epoch_step=2/2 micro_steps=160 elapsed=4.7s lr=2.000000e-03 loss=7.4644 loss_recon=7.4644 loss_meanflow=0.0000 mean_model_t=0.1882 mean_corrupt_t=0.1882 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3125 corrupt_frac=1.0000 acc_corrupt=0.3125 loss_corrupt=7.4644 wrong_frac=0.7500 init_acc_corrupt=0.1750 acc_corrupt_t_0p2_0p4=0.4750 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.4282 out_g_norm=10.1290 acc_corrupt_t_0p0_0p2=0.1500 corrupt_frac_t_0p0_0p2=1.0000 loss_all=8.6543 init_gold_top10=0.0000 init_gold_top100=0.0000
293
+ step=170 epoch=85/250 epoch_step=2/2 micro_steps=170 elapsed=4.5s lr=2.000000e-03 loss=7.5713 loss_recon=7.5713 loss_meanflow=0.0000 mean_model_t=0.2017 mean_corrupt_t=0.2017 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2500 corrupt_frac=1.0000 acc_corrupt=0.2500 loss_corrupt=7.5713 wrong_frac=0.7250 init_acc_corrupt=0.1000 acc_corrupt_t_0p2_0p4=0.4000 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.5251 out_g_norm=10.4161 acc_corrupt_t_0p0_0p2=0.1000 corrupt_frac_t_0p0_0p2=1.0000 loss_all=5.2729 init_gold_top10=0.5000 init_gold_top100=0.6250
294
+ step=180 epoch=90/250 epoch_step=2/2 micro_steps=180 elapsed=4.2s lr=2.000000e-03 loss=7.6099 loss_recon=7.6099 loss_meanflow=0.0000 mean_model_t=0.2081 mean_corrupt_t=0.2081 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2250 corrupt_frac=1.0000 acc_corrupt=0.2250 loss_corrupt=7.6099 wrong_frac=0.7875 init_acc_corrupt=0.1250 acc_corrupt_t_0p0_0p2=0.1458 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.6215 out_g_norm=9.9961 acc_corrupt_t_0p2_0p4=0.3438 corrupt_frac_t_0p2_0p4=1.0000 loss_all=7.9766 init_gold_top10=0.3750 init_gold_top100=0.5000
295
+ step=190 epoch=95/250 epoch_step=2/2 micro_steps=190 elapsed=4.6s lr=2.000000e-03 loss=7.4357 loss_recon=7.4357 loss_meanflow=0.0000 mean_model_t=0.1942 mean_corrupt_t=0.1942 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2250 corrupt_frac=1.0000 acc_corrupt=0.2250 loss_corrupt=7.4357 wrong_frac=0.8625 init_acc_corrupt=0.0625 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.7179 out_g_norm=10.3147 acc_corrupt_t_0p2_0p4=0.3125 corrupt_frac_t_0p2_0p4=1.0000 loss_all=8.7559 init_gold_top10=0.1250 init_gold_top100=0.1250
296
+ step=200 epoch=100/250 epoch_step=2/2 micro_steps=200 elapsed=4.6s lr=2.000000e-03 loss=7.6464 loss_recon=7.6464 loss_meanflow=0.0000 mean_model_t=0.1970 mean_corrupt_t=0.1970 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=7.6464 wrong_frac=0.8250 init_acc_corrupt=0.0625 acc_corrupt_t_0p0_0p2=0.1071 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.8126 out_g_norm=10.9069 acc_corrupt_t_0p2_0p4=0.2917 corrupt_frac_t_0p2_0p4=1.0000 loss_all=7.8730 init_gold_top10=0.0000 init_gold_top100=0.3750
297
+ step=210 epoch=105/250 epoch_step=2/2 micro_steps=210 elapsed=4.2s lr=2.000000e-03 loss=6.5814 loss_recon=6.5814 loss_meanflow=0.0000 mean_model_t=0.1880 mean_corrupt_t=0.1880 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2375 corrupt_frac=1.0000 acc_corrupt=0.2375 loss_corrupt=6.5814 wrong_frac=0.8500 init_acc_corrupt=0.0875 acc_corrupt_t_0p2_0p4=0.2812 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.9072 out_g_norm=9.6478 acc_corrupt_t_0p0_0p2=0.2083 corrupt_frac_t_0p0_0p2=1.0000 loss_all=4.4038 init_gold_top10=0.2500 init_gold_top100=0.3750
298
+ step=220 epoch=110/250 epoch_step=2/2 micro_steps=220 elapsed=4.6s lr=2.000000e-03 loss=7.1286 loss_recon=7.1286 loss_meanflow=0.0000 mean_model_t=0.1718 mean_corrupt_t=0.1718 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=7.1286 wrong_frac=0.8500 init_acc_corrupt=0.0375 acc_corrupt_t_0p0_0p2=0.1667 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.9991 out_g_norm=10.3675 acc_corrupt_t_0p2_0p4=0.1562 corrupt_frac_t_0p2_0p4=1.0000 loss_all=7.7090 init_gold_top10=0.1250 init_gold_top100=0.1250
299
+ step=230 epoch=115/250 epoch_step=2/2 micro_steps=230 elapsed=4.6s lr=2.000000e-03 loss=4.9539 loss_recon=4.9539 loss_meanflow=0.0000 mean_model_t=0.3049 mean_corrupt_t=0.3049 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4250 corrupt_frac=1.0000 acc_corrupt=0.4250 loss_corrupt=4.9539 wrong_frac=0.6500 init_acc_corrupt=0.2875 acc_corrupt_t_0p4_0p6=0.7917 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=2.0831 out_g_norm=9.7181 acc_corrupt_t_0p2_0p4=0.4167 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p0_0p2=0.1562 corrupt_frac_t_0p0_0p2=1.0000 loss_all=7.3464 init_gold_top10=0.2500 init_gold_top100=0.2500
300
+ step=240 epoch=120/250 epoch_step=2/2 micro_steps=240 elapsed=4.1s lr=2.000000e-03 loss=5.3579 loss_recon=5.3579 loss_meanflow=0.0000 mean_model_t=0.2693 mean_corrupt_t=0.2693 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3500 corrupt_frac=1.0000 acc_corrupt=0.3500 loss_corrupt=5.3579 wrong_frac=0.7125 init_acc_corrupt=0.2000 acc_corrupt_t_0p0_0p2=0.1562 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.1655 out_g_norm=10.1470 acc_corrupt_t_0p4_0p6=0.6250 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.4062 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.7973 init_gold_top10=0.2500 init_gold_top100=0.2500
301
+ step=250 epoch=125/250 epoch_step=2/2 micro_steps=250 elapsed=4.6s lr=2.000000e-03 loss=5.4397 loss_recon=5.4397 loss_meanflow=0.0000 mean_model_t=0.1964 mean_corrupt_t=0.1964 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3000 corrupt_frac=1.0000 acc_corrupt=0.3000 loss_corrupt=5.4397 wrong_frac=0.7500 init_acc_corrupt=0.1250 acc_corrupt_t_0p0_0p2=0.1875 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.2462 out_g_norm=10.4779 acc_corrupt_t_0p6_0p8=0.8750 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p2_0p4=0.6250 corrupt_frac_t_0p2_0p4=1.0000 loss_all=2.5704 init_gold_top10=0.5000 init_gold_top100=0.5000
302
+ step=260 epoch=130/250 epoch_step=2/2 micro_steps=260 elapsed=4.5s lr=2.000000e-03 loss=6.4952 loss_recon=6.4952 loss_meanflow=0.0000 mean_model_t=0.1517 mean_corrupt_t=0.1517 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1375 corrupt_frac=1.0000 acc_corrupt=0.1375 loss_corrupt=6.4952 wrong_frac=0.8750 init_acc_corrupt=0.0250 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.3208 out_g_norm=10.8209 acc_corrupt_t_0p2_0p4=0.1667 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.7949 init_gold_top10=0.1250 init_gold_top100=0.5000
303
+ step=270 epoch=135/250 epoch_step=2/2 micro_steps=270 elapsed=4.2s lr=2.000000e-03 loss=5.5522 loss_recon=5.5522 loss_meanflow=0.0000 mean_model_t=0.1781 mean_corrupt_t=0.1781 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2625 corrupt_frac=1.0000 acc_corrupt=0.2625 loss_corrupt=5.5522 wrong_frac=0.7750 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.1964 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.3848 out_g_norm=10.4100 acc_corrupt_t_0p2_0p4=0.4167 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.8256 init_gold_top10=0.3750 init_gold_top100=0.3750
304
+ step=280 epoch=140/250 epoch_step=2/2 micro_steps=280 elapsed=5.0s lr=2.000000e-03 loss=5.8259 loss_recon=5.8259 loss_meanflow=0.0000 mean_model_t=0.2199 mean_corrupt_t=0.2199 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=5.8259 wrong_frac=0.8000 init_acc_corrupt=0.0625 acc_corrupt_t_0p4_0p6=0.1250 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=2.4410 out_g_norm=10.8257 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.2188 corrupt_frac_t_0p2_0p4=1.0000 loss_all=5.9141 init_gold_top10=0.1250 init_gold_top100=0.2500
305
+ step=290 epoch=145/250 epoch_step=2/2 micro_steps=290 elapsed=4.5s lr=2.000000e-03 loss=4.2819 loss_recon=4.2819 loss_meanflow=0.0000 mean_model_t=0.2100 mean_corrupt_t=0.2100 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3250 corrupt_frac=1.0000 acc_corrupt=0.3250 loss_corrupt=4.2819 wrong_frac=0.7625 init_acc_corrupt=0.1625 acc_corrupt_t_0p2_0p4=0.4500 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.4965 out_g_norm=10.5429 acc_corrupt_t_0p0_0p2=0.2000 corrupt_frac_t_0p0_0p2=1.0000 loss_all=4.9616 init_gold_top10=0.2500 init_gold_top100=0.2500
306
+ step=300 epoch=150/250 epoch_step=2/2 micro_steps=300 elapsed=4.2s lr=2.000000e-03 loss=4.6966 loss_recon=4.6966 loss_meanflow=0.0000 mean_model_t=0.2037 mean_corrupt_t=0.2037 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3000 corrupt_frac=1.0000 acc_corrupt=0.3000 loss_corrupt=4.6966 wrong_frac=0.7250 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.2500 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.5467 out_g_norm=10.7409 acc_corrupt_t_0p2_0p4=0.3500 corrupt_frac_t_0p2_0p4=1.0000 loss_all=5.9668 init_gold_top10=0.1250 init_gold_top100=0.5000
307
+ step=310 epoch=155/250 epoch_step=2/2 micro_steps=310 elapsed=5.0s lr=2.000000e-03 loss=4.8928 loss_recon=4.8928 loss_meanflow=0.0000 mean_model_t=0.1660 mean_corrupt_t=0.1660 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2750 corrupt_frac=1.0000 acc_corrupt=0.2750 loss_corrupt=4.8928 wrong_frac=0.8500 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.2321 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.5929 out_g_norm=10.7503 acc_corrupt_t_0p4_0p6=0.8750 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.1250 corrupt_frac_t_0p2_0p4=1.0000 loss_all=6.3398 init_gold_top10=0.0000 init_gold_top100=0.0000
308
+ step=320 epoch=160/250 epoch_step=2/2 micro_steps=320 elapsed=4.5s lr=2.000000e-03 loss=4.6516 loss_recon=4.6516 loss_meanflow=0.0000 mean_model_t=0.2203 mean_corrupt_t=0.2203 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2125 corrupt_frac=1.0000 acc_corrupt=0.2125 loss_corrupt=4.6516 wrong_frac=0.8250 init_acc_corrupt=0.1125 acc_corrupt_t_0p0_0p2=0.1458 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.6345 out_g_norm=10.9623 acc_corrupt_t_0p4_0p6=0.2500 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.3333 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.9951 init_gold_top10=0.0000 init_gold_top100=0.3750
309
+ step=330 epoch=165/250 epoch_step=2/2 micro_steps=330 elapsed=4.2s lr=2.000000e-03 loss=3.6249 loss_recon=3.6249 loss_meanflow=0.0000 mean_model_t=0.2293 mean_corrupt_t=0.2293 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3625 corrupt_frac=1.0000 acc_corrupt=0.3625 loss_corrupt=3.6249 wrong_frac=0.6750 init_acc_corrupt=0.2125 acc_corrupt_t_0p0_0p2=0.1562 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.6737 out_g_norm=11.0498 acc_corrupt_t_0p2_0p4=0.5000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=1.3865 init_gold_top10=0.6250 init_gold_top100=0.6250
310
+ step=340 epoch=170/250 epoch_step=2/2 micro_steps=340 elapsed=5.1s lr=2.000000e-03 loss=3.6906 loss_recon=3.6906 loss_meanflow=0.0000 mean_model_t=0.2682 mean_corrupt_t=0.2682 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4000 corrupt_frac=1.0000 acc_corrupt=0.4000 loss_corrupt=3.6906 wrong_frac=0.6875 init_acc_corrupt=0.2375 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7084 out_g_norm=10.1418 acc_corrupt_t_0p2_0p4=0.5417 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.7500 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=1.0000 loss_all=5.2021 init_gold_top10=0.1250 init_gold_top100=0.1250
311
+ step=350 epoch=175/250 epoch_step=2/2 micro_steps=350 elapsed=4.5s lr=2.000000e-03 loss=4.6338 loss_recon=4.6338 loss_meanflow=0.0000 mean_model_t=0.1929 mean_corrupt_t=0.1929 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2000 corrupt_frac=1.0000 acc_corrupt=0.2000 loss_corrupt=4.6338 wrong_frac=0.8375 init_acc_corrupt=0.0750 acc_corrupt_t_0p2_0p4=0.3250 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.7401 out_g_norm=11.0149 acc_corrupt_t_0p0_0p2=0.0750 corrupt_frac_t_0p0_0p2=1.0000 loss_all=3.8555 init_gold_top10=0.1250 init_gold_top100=0.2500
312
+ step=360 epoch=180/250 epoch_step=2/2 micro_steps=360 elapsed=4.2s lr=2.000000e-03 loss=4.2104 loss_recon=4.2104 loss_meanflow=0.0000 mean_model_t=0.1923 mean_corrupt_t=0.1923 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2000 corrupt_frac=1.0000 acc_corrupt=0.2000 loss_corrupt=4.2104 wrong_frac=0.7875 init_acc_corrupt=0.1000 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7663 out_g_norm=11.0627 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=1.9618 init_gold_top10=0.2500 init_gold_top100=0.2500
313
+ step=370 epoch=185/250 epoch_step=2/2 micro_steps=370 elapsed=5.0s lr=2.000000e-03 loss=3.6980 loss_recon=3.6980 loss_meanflow=0.0000 mean_model_t=0.1757 mean_corrupt_t=0.1757 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3125 corrupt_frac=1.0000 acc_corrupt=0.3125 loss_corrupt=3.6980 wrong_frac=0.8250 init_acc_corrupt=0.0750 acc_corrupt_t_0p2_0p4=0.5625 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.7910 out_g_norm=10.5404 acc_corrupt_t_0p0_0p2=0.2500 corrupt_frac_t_0p0_0p2=1.0000 loss_all=3.2881 init_gold_top10=0.2500 init_gold_top100=0.5000
314
+ step=380 epoch=190/250 epoch_step=2/2 micro_steps=380 elapsed=4.5s lr=2.000000e-03 loss=3.2751 loss_recon=3.2751 loss_meanflow=0.0000 mean_model_t=0.1794 mean_corrupt_t=0.1794 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2625 corrupt_frac=1.0000 acc_corrupt=0.2625 loss_corrupt=3.2751 wrong_frac=0.8375 init_acc_corrupt=0.1000 acc_corrupt_t_0p0_0p2=0.1607 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8162 out_g_norm=11.1712 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.5000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=2.6837 init_gold_top10=0.2500 init_gold_top100=0.2500
315
+ step=390 epoch=195/250 epoch_step=2/2 micro_steps=390 elapsed=4.2s lr=2.000000e-03 loss=4.0428 loss_recon=4.0428 loss_meanflow=0.0000 mean_model_t=0.1873 mean_corrupt_t=0.1873 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2250 corrupt_frac=1.0000 acc_corrupt=0.2250 loss_corrupt=4.0428 wrong_frac=0.8125 init_acc_corrupt=0.0750 acc_corrupt_t_0p2_0p4=0.2812 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.8404 out_g_norm=12.1307 acc_corrupt_t_0p0_0p2=0.1875 corrupt_frac_t_0p0_0p2=1.0000 loss_all=5.0107 init_gold_top10=0.1250 init_gold_top100=0.5000
316
+ step=400 epoch=200/250 epoch_step=2/2 micro_steps=400 elapsed=5.0s lr=2.000000e-03 loss=3.0034 loss_recon=3.0034 loss_meanflow=0.0000 mean_model_t=0.3288 mean_corrupt_t=0.3288 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3625 corrupt_frac=1.0000 acc_corrupt=0.3625 loss_corrupt=3.0034 wrong_frac=0.7250 init_acc_corrupt=0.2250 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8626 out_g_norm=10.6248 acc_corrupt_t_0p2_0p4=0.2188 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.8750 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p4_0p6=0.7500 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.2258 init_gold_top10=0.7500 init_gold_top100=0.7500
317
+ step=410 epoch=205/250 epoch_step=2/2 micro_steps=410 elapsed=4.6s lr=2.000000e-03 loss=2.5523 loss_recon=2.5523 loss_meanflow=0.0000 mean_model_t=0.2749 mean_corrupt_t=0.2749 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4500 corrupt_frac=1.0000 acc_corrupt=0.4500 loss_corrupt=2.5523 wrong_frac=0.6750 init_acc_corrupt=0.2375 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8829 out_g_norm=11.8955 acc_corrupt_t_0p2_0p4=0.5750 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.6250 corrupt_frac_t_0p4_0p6=1.0000 loss_all=1.8015 init_gold_top10=0.3750 init_gold_top100=0.5000
318
+ step=420 epoch=210/250 epoch_step=2/2 micro_steps=420 elapsed=4.1s lr=2.000000e-03 loss=3.2439 loss_recon=3.2439 loss_meanflow=0.0000 mean_model_t=0.2123 mean_corrupt_t=0.2123 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2750 corrupt_frac=1.0000 acc_corrupt=0.2750 loss_corrupt=3.2439 wrong_frac=0.7750 init_acc_corrupt=0.1000 acc_corrupt_t_0p2_0p4=0.3333 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.8965 out_g_norm=11.8560 acc_corrupt_t_0p0_0p2=0.1875 corrupt_frac_t_0p0_0p2=1.0000 loss_all=4.2246 init_gold_top10=0.1250 init_gold_top100=0.3750
319
+ step=430 epoch=215/250 epoch_step=2/2 micro_steps=430 elapsed=5.0s lr=2.000000e-03 loss=3.1938 loss_recon=3.1938 loss_meanflow=0.0000 mean_model_t=0.1772 mean_corrupt_t=0.1772 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2625 corrupt_frac=1.0000 acc_corrupt=0.2625 loss_corrupt=3.1938 wrong_frac=0.8250 init_acc_corrupt=0.0875 acc_corrupt_t_0p0_0p2=0.2000 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9060 out_g_norm=11.6931 acc_corrupt_t_0p2_0p4=0.3250 corrupt_frac_t_0p2_0p4=1.0000 loss_all=3.3418 init_gold_top10=0.1250 init_gold_top100=0.3750
320
+ step=440 epoch=220/250 epoch_step=2/2 micro_steps=440 elapsed=4.6s lr=2.000000e-03 loss=2.7462 loss_recon=2.7462 loss_meanflow=0.0000 mean_model_t=0.2333 mean_corrupt_t=0.2333 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3625 corrupt_frac=1.0000 acc_corrupt=0.3625 loss_corrupt=2.7462 wrong_frac=0.6875 init_acc_corrupt=0.1875 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9144 out_g_norm=11.5046 acc_corrupt_t_0p2_0p4=0.5208 corrupt_frac_t_0p2_0p4=1.0000 loss_all=3.6408 init_gold_top10=0.3750 init_gold_top100=0.6250
321
+ step=450 epoch=225/250 epoch_step=2/2 micro_steps=450 elapsed=4.2s lr=2.000000e-03 loss=3.8439 loss_recon=3.8439 loss_meanflow=0.0000 mean_model_t=0.1578 mean_corrupt_t=0.1578 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=1.0000 acc_corrupt=0.1625 loss_corrupt=3.8439 wrong_frac=0.8375 init_acc_corrupt=0.0500 acc_corrupt_t_0p0_0p2=0.1250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9216 out_g_norm=10.7069 acc_corrupt_t_0p2_0p4=0.2500 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.7910 init_gold_top10=0.0000 init_gold_top100=0.0000
322
+ step=460 epoch=230/250 epoch_step=2/2 micro_steps=460 elapsed=5.0s lr=2.000000e-03 loss=3.0667 loss_recon=3.0667 loss_meanflow=0.0000 mean_model_t=0.1884 mean_corrupt_t=0.1884 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3000 corrupt_frac=1.0000 acc_corrupt=0.3000 loss_corrupt=3.0667 wrong_frac=0.8250 init_acc_corrupt=0.1000 acc_corrupt_t_0p0_0p2=0.1429 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9302 out_g_norm=11.1555 acc_corrupt_t_0p4_0p6=0.7500 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.5000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=4.3457 init_gold_top10=0.0000 init_gold_top100=0.1250
323
+ step=470 epoch=235/250 epoch_step=2/2 micro_steps=470 elapsed=4.5s lr=2.000000e-03 loss=3.0813 loss_recon=3.0813 loss_meanflow=0.0000 mean_model_t=0.1817 mean_corrupt_t=0.1817 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3000 corrupt_frac=1.0000 acc_corrupt=0.3000 loss_corrupt=3.0813 wrong_frac=0.8625 init_acc_corrupt=0.0750 acc_corrupt_t_0p0_0p2=0.1964 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9401 out_g_norm=13.3409 acc_corrupt_t_0p2_0p4=0.5000 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.6250 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.3584 init_gold_top10=0.0000 init_gold_top100=0.0000
324
+ step=480 epoch=240/250 epoch_step=2/2 micro_steps=480 elapsed=4.2s lr=2.000000e-03 loss=2.3354 loss_recon=2.3354 loss_meanflow=0.0000 mean_model_t=0.2314 mean_corrupt_t=0.2314 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4375 corrupt_frac=1.0000 acc_corrupt=0.4375 loss_corrupt=2.3354 wrong_frac=0.6750 init_acc_corrupt=0.1625 acc_corrupt_t_0p0_0p2=0.2250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9456 out_g_norm=12.1579 acc_corrupt_t_0p2_0p4=0.6250 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.7500 corrupt_frac_t_0p4_0p6=1.0000 loss_all=1.3855 init_gold_top10=0.3750 init_gold_top100=0.5000
325
+ step=490 epoch=245/250 epoch_step=2/2 micro_steps=490 elapsed=5.1s lr=2.000000e-03 loss=2.5818 loss_recon=2.5818 loss_meanflow=0.0000 mean_model_t=0.2122 mean_corrupt_t=0.2122 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3875 corrupt_frac=1.0000 acc_corrupt=0.3875 loss_corrupt=2.5818 wrong_frac=0.7250 init_acc_corrupt=0.1375 acc_corrupt_t_0p0_0p2=0.3125 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9460 out_g_norm=11.7760 acc_corrupt_t_0p2_0p4=0.3500 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.8750 corrupt_frac_t_0p4_0p6=1.0000 loss_all=2.2825 init_gold_top10=0.2500 init_gold_top100=0.2500
326
+ step=500 epoch=250/250 epoch_step=2/2 micro_steps=500 elapsed=4.5s lr=2.000000e-03 loss=3.2203 loss_recon=3.2203 loss_meanflow=0.0000 mean_model_t=0.1335 mean_corrupt_t=0.1335 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2125 corrupt_frac=1.0000 acc_corrupt=0.2125 loss_corrupt=3.2203 wrong_frac=0.8875 init_acc_corrupt=0.0500 acc_corrupt_t_0p2_0p4=0.2500 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.9466 out_g_norm=11.3624 acc_corrupt_t_0p0_0p2=0.2031 corrupt_frac_t_0p0_0p2=1.0000 loss_all=2.3523 init_gold_top10=0.2500 init_gold_top100=0.2500
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_n8_hard_ce_onehot_20260517_train8ctx8_overfit.log ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 50257,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_n8_hard_ce_onehot_20260517_train8ctx8_overfit",
10
+ "batch_size": 1,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 4,
13
+ "global_batch_size": 4,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 2,
18
+ "total_steps": 500,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 169453056,
36
+ "muon_adam_param_count": 122368,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "blocks.3.attn_qkv.weight",
57
+ "blocks.3.attn_out.weight",
58
+ "blocks.3.mlp.0.weight",
59
+ "blocks.3.mlp.2.weight",
60
+ "blocks.3.adaLN_modulation.weight",
61
+ "blocks.4.attn_qkv.weight",
62
+ "blocks.4.attn_out.weight",
63
+ "blocks.4.mlp.0.weight",
64
+ "blocks.4.mlp.2.weight",
65
+ "blocks.4.adaLN_modulation.weight",
66
+ "blocks.5.attn_qkv.weight",
67
+ "blocks.5.attn_out.weight",
68
+ "blocks.5.mlp.0.weight",
69
+ "blocks.5.mlp.2.weight",
70
+ "blocks.5.adaLN_modulation.weight",
71
+ "blocks.6.attn_qkv.weight",
72
+ "blocks.6.attn_out.weight",
73
+ "blocks.6.mlp.0.weight",
74
+ "blocks.6.mlp.2.weight",
75
+ "blocks.6.adaLN_modulation.weight",
76
+ "blocks.7.attn_qkv.weight",
77
+ "blocks.7.attn_out.weight",
78
+ "blocks.7.mlp.0.weight",
79
+ "blocks.7.mlp.2.weight",
80
+ "blocks.7.adaLN_modulation.weight",
81
+ "blocks.8.attn_qkv.weight",
82
+ "blocks.8.attn_out.weight",
83
+ "blocks.8.mlp.0.weight",
84
+ "blocks.8.mlp.2.weight",
85
+ "blocks.8.adaLN_modulation.weight",
86
+ "blocks.9.attn_qkv.weight",
87
+ "blocks.9.attn_out.weight",
88
+ "blocks.9.mlp.0.weight",
89
+ "blocks.9.mlp.2.weight",
90
+ "blocks.9.adaLN_modulation.weight",
91
+ "blocks.10.attn_qkv.weight",
92
+ "blocks.10.attn_out.weight",
93
+ "blocks.10.mlp.0.weight",
94
+ "blocks.10.mlp.2.weight",
95
+ "blocks.10.adaLN_modulation.weight",
96
+ "blocks.11.attn_qkv.weight",
97
+ "blocks.11.attn_out.weight",
98
+ "blocks.11.mlp.0.weight",
99
+ "blocks.11.mlp.2.weight",
100
+ "blocks.11.adaLN_modulation.weight",
101
+ "output_layer.linear.weight",
102
+ "output_layer.adaLN_modulation.weight"
103
+ ],
104
+ "muon_adam_param_names": [
105
+ "sigma_map.net.0.bias",
106
+ "sigma_map.net.2.bias",
107
+ "blocks.0.norm1.weight",
108
+ "blocks.0.norm2.weight",
109
+ "blocks.0.mlp.0.bias",
110
+ "blocks.0.mlp.2.bias",
111
+ "blocks.0.adaLN_modulation.bias",
112
+ "blocks.1.norm1.weight",
113
+ "blocks.1.norm2.weight",
114
+ "blocks.1.mlp.0.bias",
115
+ "blocks.1.mlp.2.bias",
116
+ "blocks.1.adaLN_modulation.bias",
117
+ "blocks.2.norm1.weight",
118
+ "blocks.2.norm2.weight",
119
+ "blocks.2.mlp.0.bias",
120
+ "blocks.2.mlp.2.bias",
121
+ "blocks.2.adaLN_modulation.bias",
122
+ "blocks.3.norm1.weight",
123
+ "blocks.3.norm2.weight",
124
+ "blocks.3.mlp.0.bias",
125
+ "blocks.3.mlp.2.bias",
126
+ "blocks.3.adaLN_modulation.bias",
127
+ "blocks.4.norm1.weight",
128
+ "blocks.4.norm2.weight",
129
+ "blocks.4.mlp.0.bias",
130
+ "blocks.4.mlp.2.bias",
131
+ "blocks.4.adaLN_modulation.bias",
132
+ "blocks.5.norm1.weight",
133
+ "blocks.5.norm2.weight",
134
+ "blocks.5.mlp.0.bias",
135
+ "blocks.5.mlp.2.bias",
136
+ "blocks.5.adaLN_modulation.bias",
137
+ "blocks.6.norm1.weight",
138
+ "blocks.6.norm2.weight",
139
+ "blocks.6.mlp.0.bias",
140
+ "blocks.6.mlp.2.bias",
141
+ "blocks.6.adaLN_modulation.bias",
142
+ "blocks.7.norm1.weight",
143
+ "blocks.7.norm2.weight",
144
+ "blocks.7.mlp.0.bias",
145
+ "blocks.7.mlp.2.bias",
146
+ "blocks.7.adaLN_modulation.bias",
147
+ "blocks.8.norm1.weight",
148
+ "blocks.8.norm2.weight",
149
+ "blocks.8.mlp.0.bias",
150
+ "blocks.8.mlp.2.bias",
151
+ "blocks.8.adaLN_modulation.bias",
152
+ "blocks.9.norm1.weight",
153
+ "blocks.9.norm2.weight",
154
+ "blocks.9.mlp.0.bias",
155
+ "blocks.9.mlp.2.bias",
156
+ "blocks.9.adaLN_modulation.bias",
157
+ "blocks.10.norm1.weight",
158
+ "blocks.10.norm2.weight",
159
+ "blocks.10.mlp.0.bias",
160
+ "blocks.10.mlp.2.bias",
161
+ "blocks.10.adaLN_modulation.bias",
162
+ "blocks.11.norm1.weight",
163
+ "blocks.11.norm2.weight",
164
+ "blocks.11.mlp.0.bias",
165
+ "blocks.11.mlp.2.bias",
166
+ "blocks.11.adaLN_modulation.bias",
167
+ "output_layer.norm_final.weight",
168
+ "output_layer.adaLN_modulation.bias"
169
+ ],
170
+ "muon_effective_nesterov": false,
171
+ "muon_effective_width_scale": false,
172
+ "muon_effective_weight_decay": 0.1,
173
+ "muon_adam_fallback_nesterov": false,
174
+ "muon_adam_fallback_weight_decay": 0.1,
175
+ "ema_decay": 0.9999,
176
+ "ema_start_step": 0,
177
+ "model_type": "ddit",
178
+ "elf_num_time_tokens": 4,
179
+ "elf_num_model_mode_tokens": 0,
180
+ "qk_norm": true,
181
+ "output_bias": false,
182
+ "output_init_std": -1.0,
183
+ "norm_type": "rmsnorm",
184
+ "target_loss": "hard_ce",
185
+ "linear_soft_target_power": 1.0,
186
+ "linear_soft_target_min_conf": 0.0,
187
+ "linear_soft_target_max_conf": 1.0,
188
+ "t_sampling_mode": "logit_normal",
189
+ "t_sampling_power": 1.0,
190
+ "t_sampling_eps": 0.0001,
191
+ "t_sampling_logit_mean": -1.5,
192
+ "t_sampling_logit_std": 0.8,
193
+ "dual_t": true,
194
+ "corrupt_t_mode": "same",
195
+ "corrupt_min_t": 0.0,
196
+ "corrupt_max_t": 1.0,
197
+ "prefix_block_prob": 0.0,
198
+ "prefix_block_len": 128,
199
+ "mask_ratio_floor_schedule": "none",
200
+ "dirichlet_endpoint_mode": "categorical_dual_t",
201
+ "dirichlet_semantic_t_mode": "same",
202
+ "dirichlet_semantic_t_value": 0.0,
203
+ "dirichlet_semantic_t_curve": "linear",
204
+ "dirichlet_semantic_t_power": 1.0,
205
+ "endpoint_sequence_random_prob_alpha": 0.0,
206
+ "categorical_wrong_from_full_vocab": true,
207
+ "categorical_wrong_from_batch_valid_tokens": false,
208
+ "categorical_wrong_basin_token_ids": "",
209
+ "categorical_wrong_basin_prob": 0.0,
210
+ "categorical_wrong_unigram_prob": 0.0,
211
+ "categorical_wrong_uniform_prob": 0.0,
212
+ "categorical_wrong_corpus_unigram_path": "",
213
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
214
+ "categorical_wrong_basin_shared_prob": 0.0,
215
+ "categorical_wrong_unigram_shared_prob": 0.0,
216
+ "mask_mixture_original_prob": 0.0,
217
+ "mask_mixture_lowk_prob": 1.0,
218
+ "mask_mixture_lowcorrupt_prob": 0.0,
219
+ "mask_mixture_block_prob": 0.0,
220
+ "mask_mixture_all_prob": 0.0,
221
+ "mask_mixture_lowk_clean_tokens": "1,2,4",
222
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
223
+ "mask_mixture_block_tokens": "64,128",
224
+ "simplex_bridge_sampler": "dirichlet",
225
+ "logistic_normal_sigma_min": 0.18,
226
+ "logistic_normal_sigma_max": 2.2,
227
+ "logistic_normal_tau_min": 0.65,
228
+ "logistic_normal_tau_max": 1.15,
229
+ "torch_compile": false,
230
+ "compile_mode": "max-autotune",
231
+ "state_format": "prob",
232
+ "meanflow_weight": 0.0,
233
+ "rollout_train_prob": 0.0,
234
+ "rollout_train_steps": 1,
235
+ "rollout_train_infer_steps": 64,
236
+ "rollout_train_temp": 1.45,
237
+ "rollout_train_max_gamma": 1.0,
238
+ "rollout_train_corrupt_only": true,
239
+ "rollout_train_samplewise": false,
240
+ "rollout_train_compute_always": false,
241
+ "bridge_noise_init": "logistic_normal",
242
+ "noise_sigma": -1.0,
243
+ "allow_tf32": true,
244
+ "activation_checkpointing": false,
245
+ "activation_checkpoint_interval": 1,
246
+ "activation_checkpoint_scope": "block",
247
+ "ddp_static_graph": false,
248
+ "ddp_gradient_as_bucket_view": true,
249
+ "blocking_data_transfer": false,
250
+ "dataloader_prefetch_factor": 4,
251
+ "full_train_stats": false,
252
+ "tokenized_hf": false,
253
+ "tokenized_pad_token": "pad",
254
+ "elf_conditional_hf": false,
255
+ "record_pad_truncate": false,
256
+ "record_add_eos": false,
257
+ "record_add_special_tokens": false,
258
+ "record_pad_token": "pad",
259
+ "record_shuffle_buffer": 10000,
260
+ "wrap": true,
261
+ "wrap_mode": "stream",
262
+ "wrap_record_buffer_size": 200,
263
+ "owt_cached_chunks": true,
264
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len8_train8_overfit",
265
+ "owt_chunk_cache_rebuild": false,
266
+ "owt_chunk_cache_write_batch": 4096,
267
+ "owt_exact_repeat_per_chunk": 0,
268
+ "online_chunk_shuffle": false,
269
+ "online_chunk_shuffle_buffer": 10000,
270
+ "openwebtext_split": "train_minus_100k",
271
+ "detokenizer": "auto",
272
+ "resolved_detokenizer": null,
273
+ "num_workers": 0,
274
+ "latest_every": 10,
275
+ "resume_path": ""
276
+ }
277
+ step=10 epoch=5/250 epoch_step=2/2 micro_steps=10 elapsed=2.0s lr=2.000000e-03 loss=10.8092 loss_recon=10.8092 loss_meanflow=0.0000 mean_model_t=0.1662 mean_corrupt_t=0.1662 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2125 corrupt_frac=0.7125 acc_corrupt=0.0877 loss_corrupt=10.8092 wrong_frac=0.7895 init_acc_corrupt=0.0877 acc_corrupt_t_0p0_0p2=0.0256 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.0162 out_g_norm=6.0434 acc_corrupt_t_0p2_0p4=0.2222 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.7578 init_gold_top10=0.5000 init_gold_top100=0.5000
278
+ step=20 epoch=10/250 epoch_step=2/2 micro_steps=20 elapsed=5.6s lr=2.000000e-03 loss=10.7549 loss_recon=10.7549 loss_meanflow=0.0000 mean_model_t=0.1939 mean_corrupt_t=0.1939 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2375 corrupt_frac=0.6375 acc_corrupt=0.1569 loss_corrupt=10.7549 wrong_frac=0.8824 init_acc_corrupt=0.0588 acc_corrupt_t_0p0_0p2=0.1538 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.1006 out_g_norm=6.7868 acc_corrupt_t_0p2_0p4=0.1579 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.1667 corrupt_frac_t_0p4_0p6=1.0000 loss_all=10.6562 init_gold_top10=0.0000 init_gold_top100=0.0000
279
+ step=30 epoch=15/250 epoch_step=2/2 micro_steps=30 elapsed=6.7s lr=2.000000e-03 loss=10.6261 loss_recon=10.6261 loss_meanflow=0.0000 mean_model_t=0.1869 mean_corrupt_t=0.1869 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1500 corrupt_frac=0.7125 acc_corrupt=0.1579 loss_corrupt=10.6261 wrong_frac=0.8772 init_acc_corrupt=0.0175 acc_corrupt_t_0p0_0p2=0.1379 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.2140 out_g_norm=8.0074 acc_corrupt_t_0p2_0p4=0.1786 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.4766 init_gold_top10=0.0000 init_gold_top100=0.0000
280
+ step=40 epoch=20/250 epoch_step=2/2 micro_steps=40 elapsed=6.0s lr=2.000000e-03 loss=10.4505 loss_recon=10.4505 loss_meanflow=0.0000 mean_model_t=0.2433 mean_corrupt_t=0.2433 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1625 corrupt_frac=0.6625 acc_corrupt=0.1698 loss_corrupt=10.4505 wrong_frac=0.7925 init_acc_corrupt=0.1509 acc_corrupt_t_0p0_0p2=0.2222 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.3242 out_g_norm=8.1540 acc_corrupt_t_0p2_0p4=0.1724 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.0000 corrupt_frac_t_0p6_0p8=1.0000 loss_all=10.2344 init_gold_top10=0.0000 init_gold_top100=0.0000
281
+ step=50 epoch=25/250 epoch_step=2/2 micro_steps=50 elapsed=5.6s lr=2.000000e-03 loss=10.3760 loss_recon=10.3760 loss_meanflow=0.0000 mean_model_t=0.2392 mean_corrupt_t=0.2392 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2375 corrupt_frac=0.7875 acc_corrupt=0.1270 loss_corrupt=10.3760 wrong_frac=0.7778 init_acc_corrupt=0.1111 acc_corrupt_t_0p0_0p2=0.0833 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.4206 out_g_norm=7.8383 acc_corrupt_t_0p4_0p6=0.2143 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.1200 corrupt_frac_t_0p2_0p4=1.0000 loss_all=10.2656 init_gold_top10=0.2857 init_gold_top100=0.4286
282
+ step=60 epoch=30/250 epoch_step=2/2 micro_steps=60 elapsed=4.5s lr=2.000000e-03 loss=10.1919 loss_recon=10.1919 loss_meanflow=0.0000 mean_model_t=0.0956 mean_corrupt_t=0.0956 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2875 corrupt_frac=0.7125 acc_corrupt=0.1404 loss_corrupt=10.1919 wrong_frac=0.8596 init_acc_corrupt=0.0175 acc_corrupt_t_0p0_0p2=0.1400 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.5043 out_g_norm=7.9290 acc_corrupt_t_0p2_0p4=0.1429 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.8750 init_gold_top10=0.0000 init_gold_top100=0.1667
283
+ step=70 epoch=35/250 epoch_step=2/2 micro_steps=70 elapsed=5.3s lr=2.000000e-03 loss=9.8955 loss_recon=9.8955 loss_meanflow=0.0000 mean_model_t=0.1739 mean_corrupt_t=0.1739 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3125 corrupt_frac=0.6875 acc_corrupt=0.2545 loss_corrupt=9.8955 wrong_frac=0.8000 init_acc_corrupt=0.1091 acc_corrupt_t_0p0_0p2=0.2051 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.5815 out_g_norm=8.5369 acc_corrupt_t_0p2_0p4=0.3750 corrupt_frac_t_0p2_0p4=1.0000 loss_all=9.7734 init_gold_top10=0.1667 init_gold_top100=0.1667
284
+ step=80 epoch=40/250 epoch_step=2/2 micro_steps=80 elapsed=4.4s lr=2.000000e-03 loss=9.5797 loss_recon=9.5797 loss_meanflow=0.0000 mean_model_t=0.1471 mean_corrupt_t=0.1471 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2875 corrupt_frac=0.7250 acc_corrupt=0.2069 loss_corrupt=9.5797 wrong_frac=0.8103 init_acc_corrupt=0.0517 acc_corrupt_t_0p0_0p2=0.1923 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.6585 out_g_norm=8.6702 acc_corrupt_t_0p2_0p4=0.3333 corrupt_frac_t_0p2_0p4=1.0000 loss_all=8.9883 init_gold_top10=0.2500 init_gold_top100=0.2500
285
+ step=90 epoch=45/250 epoch_step=2/2 micro_steps=90 elapsed=4.4s lr=2.000000e-03 loss=9.4962 loss_recon=9.4962 loss_meanflow=0.0000 mean_model_t=0.2610 mean_corrupt_t=0.2610 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3250 corrupt_frac=0.7125 acc_corrupt=0.2281 loss_corrupt=9.4962 wrong_frac=0.7719 init_acc_corrupt=0.2105 acc_corrupt_t_0p4_0p6=0.3333 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=0.7366 out_g_norm=8.7897 acc_corrupt_t_0p2_0p4=0.2083 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p0_0p2=0.1905 corrupt_frac_t_0p0_0p2=1.0000 loss_all=9.1523 init_gold_top10=0.1667 init_gold_top100=0.1667
286
+ step=100 epoch=50/250 epoch_step=2/2 micro_steps=100 elapsed=5.3s lr=2.000000e-03 loss=9.2059 loss_recon=9.2059 loss_meanflow=0.0000 mean_model_t=0.2061 mean_corrupt_t=0.2061 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3500 corrupt_frac=0.7000 acc_corrupt=0.2143 loss_corrupt=9.2059 wrong_frac=0.8393 init_acc_corrupt=0.0893 acc_corrupt_t_0p0_0p2=0.2059 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.8171 out_g_norm=8.9302 acc_corrupt_t_0p2_0p4=0.2273 corrupt_frac_t_0p2_0p4=1.0000 loss_all=8.4062 init_gold_top10=0.2500 init_gold_top100=0.2500
287
+ step=110 epoch=55/250 epoch_step=2/2 micro_steps=110 elapsed=4.4s lr=2.000000e-03 loss=8.5684 loss_recon=8.5684 loss_meanflow=0.0000 mean_model_t=0.1964 mean_corrupt_t=0.1964 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4125 corrupt_frac=0.6625 acc_corrupt=0.2642 loss_corrupt=8.5684 wrong_frac=0.6981 init_acc_corrupt=0.1509 acc_corrupt_t_0p2_0p4=0.3182 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=0.9019 out_g_norm=9.8078 acc_corrupt_t_0p0_0p2=0.2000 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.3333 corrupt_frac_t_0p4_0p6=1.0000 loss_all=8.0508 init_gold_top10=0.2857 init_gold_top100=0.2857
288
+ step=120 epoch=60/250 epoch_step=2/2 micro_steps=120 elapsed=4.0s lr=2.000000e-03 loss=8.5687 loss_recon=8.5687 loss_meanflow=0.0000 mean_model_t=0.2019 mean_corrupt_t=0.2019 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3875 corrupt_frac=0.7500 acc_corrupt=0.2333 loss_corrupt=8.5687 wrong_frac=0.7500 init_acc_corrupt=0.2167 acc_corrupt_t_0p0_0p2=0.1429 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=0.9906 out_g_norm=9.6213 acc_corrupt_t_0p2_0p4=0.1923 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.8333 corrupt_frac_t_0p4_0p6=1.0000 loss_all=7.1250 init_gold_top10=0.6667 init_gold_top100=0.6667
289
+ step=130 epoch=65/250 epoch_step=2/2 micro_steps=130 elapsed=5.1s lr=2.000000e-03 loss=8.1694 loss_recon=8.1694 loss_meanflow=0.0000 mean_model_t=0.1694 mean_corrupt_t=0.1694 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3750 corrupt_frac=0.7125 acc_corrupt=0.1930 loss_corrupt=8.1694 wrong_frac=0.7895 init_acc_corrupt=0.0175 acc_corrupt_t_0p0_0p2=0.2222 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.0816 out_g_norm=10.3474 acc_corrupt_t_0p2_0p4=0.1429 corrupt_frac_t_0p2_0p4=1.0000 loss_all=7.6836 init_gold_top10=0.3333 init_gold_top100=0.3333
290
+ step=140 epoch=70/250 epoch_step=2/2 micro_steps=140 elapsed=4.4s lr=2.000000e-03 loss=7.9983 loss_recon=7.9983 loss_meanflow=0.0000 mean_model_t=0.2216 mean_corrupt_t=0.2216 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3500 corrupt_frac=0.8000 acc_corrupt=0.2344 loss_corrupt=7.9983 wrong_frac=0.7969 init_acc_corrupt=0.1094 acc_corrupt_t_0p2_0p4=0.2105 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.1727 out_g_norm=10.0991 acc_corrupt_t_0p4_0p6=0.5714 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.1842 corrupt_frac_t_0p0_0p2=1.0000 loss_all=6.2656 init_gold_top10=0.3333 init_gold_top100=0.3333
291
+ step=150 epoch=75/250 epoch_step=2/2 micro_steps=150 elapsed=4.0s lr=2.000000e-03 loss=7.4801 loss_recon=7.4801 loss_meanflow=0.0000 mean_model_t=0.2049 mean_corrupt_t=0.2049 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4375 corrupt_frac=0.6875 acc_corrupt=0.2545 loss_corrupt=7.4801 wrong_frac=0.8727 init_acc_corrupt=0.0727 acc_corrupt_t_0p4_0p6=0.0000 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=1.2643 out_g_norm=10.6291 acc_corrupt_t_0p2_0p4=0.3810 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p0_0p2=0.2143 corrupt_frac_t_0p0_0p2=1.0000 loss_all=7.0391 init_gold_top10=0.0000 init_gold_top100=0.2857
292
+ step=160 epoch=80/250 epoch_step=2/2 micro_steps=160 elapsed=4.5s lr=2.000000e-03 loss=6.5977 loss_recon=6.5977 loss_meanflow=0.0000 mean_model_t=0.2270 mean_corrupt_t=0.2270 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4375 corrupt_frac=0.7125 acc_corrupt=0.2807 loss_corrupt=6.5977 wrong_frac=0.7018 init_acc_corrupt=0.1754 acc_corrupt_t_0p0_0p2=0.2500 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.3567 out_g_norm=10.4843 acc_corrupt_t_0p2_0p4=0.2105 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=0.6667 corrupt_frac_t_0p6_0p8=1.0000 loss_all=5.6914 init_gold_top10=0.1667 init_gold_top100=0.1667
293
+ step=170 epoch=85/250 epoch_step=2/2 micro_steps=170 elapsed=4.3s lr=2.000000e-03 loss=6.3296 loss_recon=6.3296 loss_meanflow=0.0000 mean_model_t=0.2447 mean_corrupt_t=0.2447 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5250 corrupt_frac=0.6500 acc_corrupt=0.3269 loss_corrupt=6.3296 wrong_frac=0.8654 init_acc_corrupt=0.0962 acc_corrupt_t_0p2_0p4=0.3103 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.4500 out_g_norm=10.5915 acc_corrupt_t_0p4_0p6=0.4286 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.3125 corrupt_frac_t_0p0_0p2=1.0000 loss_all=3.4771 init_gold_top10=0.0000 init_gold_top100=0.2500
294
+ step=180 epoch=90/250 epoch_step=2/2 micro_steps=180 elapsed=4.0s lr=2.000000e-03 loss=5.2966 loss_recon=5.2966 loss_meanflow=0.0000 mean_model_t=0.2271 mean_corrupt_t=0.2271 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5000 corrupt_frac=0.6375 acc_corrupt=0.3529 loss_corrupt=5.2966 wrong_frac=0.7843 init_acc_corrupt=0.1569 acc_corrupt_t_0p2_0p4=0.4444 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.5444 out_g_norm=10.7251 acc_corrupt_t_0p0_0p2=0.2963 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.3333 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.5913 init_gold_top10=0.2500 init_gold_top100=0.2500
295
+ step=190 epoch=95/250 epoch_step=2/2 micro_steps=190 elapsed=4.4s lr=2.000000e-03 loss=5.9283 loss_recon=5.9283 loss_meanflow=0.0000 mean_model_t=0.1793 mean_corrupt_t=0.1793 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4625 corrupt_frac=0.7125 acc_corrupt=0.2807 loss_corrupt=5.9283 wrong_frac=0.8070 init_acc_corrupt=0.0702 acc_corrupt_t_0p0_0p2=0.2143 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.6385 out_g_norm=10.3948 acc_corrupt_t_0p2_0p4=0.4545 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=5.8466 init_gold_top10=0.5000 init_gold_top100=0.5000
296
+ step=200 epoch=100/250 epoch_step=2/2 micro_steps=200 elapsed=4.3s lr=2.000000e-03 loss=5.3512 loss_recon=5.3512 loss_meanflow=0.0000 mean_model_t=0.1869 mean_corrupt_t=0.1869 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5000 corrupt_frac=0.7625 acc_corrupt=0.3443 loss_corrupt=5.3512 wrong_frac=0.8033 init_acc_corrupt=0.1148 acc_corrupt_t_0p2_0p4=0.4688 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.7338 out_g_norm=9.9925 acc_corrupt_t_0p0_0p2=0.2069 corrupt_frac_t_0p0_0p2=1.0000 loss_all=2.3576 init_gold_top10=0.0000 init_gold_top100=0.2500
297
+ step=210 epoch=105/250 epoch_step=2/2 micro_steps=210 elapsed=4.0s lr=2.000000e-03 loss=5.0235 loss_recon=5.0235 loss_meanflow=0.0000 mean_model_t=0.1656 mean_corrupt_t=0.1656 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4875 corrupt_frac=0.7625 acc_corrupt=0.3279 loss_corrupt=5.0235 wrong_frac=0.8689 init_acc_corrupt=0.0656 acc_corrupt_t_0p0_0p2=0.2609 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=1.8268 out_g_norm=10.0242 acc_corrupt_t_0p2_0p4=0.5455 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=3.2325 init_gold_top10=0.0000 init_gold_top100=0.0000
298
+ step=220 epoch=110/250 epoch_step=2/2 micro_steps=220 elapsed=4.4s lr=2.000000e-03 loss=3.5529 loss_recon=3.5529 loss_meanflow=0.0000 mean_model_t=0.2338 mean_corrupt_t=0.2338 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6000 corrupt_frac=0.7125 acc_corrupt=0.4386 loss_corrupt=3.5529 wrong_frac=0.7193 init_acc_corrupt=0.1754 acc_corrupt_t_0p2_0p4=0.4839 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=1.9199 out_g_norm=10.0285 acc_corrupt_t_0p0_0p2=0.3000 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=0.6667 corrupt_frac_t_0p4_0p6=1.0000 loss_all=2.5658 init_gold_top10=0.4286 init_gold_top100=0.4286
299
+ step=230 epoch=115/250 epoch_step=2/2 micro_steps=230 elapsed=4.4s lr=2.000000e-03 loss=3.5945 loss_recon=3.5945 loss_meanflow=0.0000 mean_model_t=0.1464 mean_corrupt_t=0.1464 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5250 corrupt_frac=0.6875 acc_corrupt=0.3091 loss_corrupt=3.5945 wrong_frac=0.8545 init_acc_corrupt=0.0545 acc_corrupt_t_0p0_0p2=0.2500 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.0116 out_g_norm=9.6671 acc_corrupt_t_0p2_0p4=0.4211 corrupt_frac_t_0p2_0p4=1.0000 loss_all=2.6006 init_gold_top10=0.3333 init_gold_top100=0.3333
300
+ step=240 epoch=120/250 epoch_step=2/2 micro_steps=240 elapsed=4.0s lr=2.000000e-03 loss=4.6291 loss_recon=4.6291 loss_meanflow=0.0000 mean_model_t=0.1917 mean_corrupt_t=0.1917 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4125 corrupt_frac=0.8125 acc_corrupt=0.2769 loss_corrupt=4.6291 wrong_frac=0.8769 init_acc_corrupt=0.0462 acc_corrupt_t_0p2_0p4=0.3158 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.1059 out_g_norm=8.9642 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p0_0p2=0.2250 corrupt_frac_t_0p0_0p2=1.0000 loss_all=3.9569 init_gold_top10=0.0000 init_gold_top100=0.0000
301
+ step=250 epoch=125/250 epoch_step=2/2 micro_steps=250 elapsed=4.4s lr=2.000000e-03 loss=3.4013 loss_recon=3.4013 loss_meanflow=0.0000 mean_model_t=0.2324 mean_corrupt_t=0.2324 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5500 corrupt_frac=0.7375 acc_corrupt=0.3898 loss_corrupt=3.4013 wrong_frac=0.7966 init_acc_corrupt=0.1356 acc_corrupt_t_0p2_0p4=0.4048 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.2015 out_g_norm=9.2718 acc_corrupt_t_0p0_0p2=0.3529 corrupt_frac_t_0p0_0p2=1.0000 loss_all=6.6187 init_gold_top10=0.0000 init_gold_top100=0.0000
302
+ step=260 epoch=130/250 epoch_step=2/2 micro_steps=260 elapsed=4.3s lr=2.000000e-03 loss=2.6402 loss_recon=2.6402 loss_meanflow=0.0000 mean_model_t=0.2037 mean_corrupt_t=0.2037 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6500 corrupt_frac=0.7000 acc_corrupt=0.5000 loss_corrupt=2.6402 wrong_frac=0.7857 init_acc_corrupt=0.0893 acc_corrupt_t_0p0_0p2=0.3214 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.2892 out_g_norm=8.9012 acc_corrupt_t_0p2_0p4=0.6786 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.5023 init_gold_top10=0.0000 init_gold_top100=0.2500
303
+ step=270 epoch=135/250 epoch_step=2/2 micro_steps=270 elapsed=4.0s lr=2.000000e-03 loss=2.7208 loss_recon=2.7208 loss_meanflow=0.0000 mean_model_t=0.2494 mean_corrupt_t=0.2494 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6875 corrupt_frac=0.6375 acc_corrupt=0.5098 loss_corrupt=2.7208 wrong_frac=0.8235 init_acc_corrupt=0.1373 acc_corrupt_t_0p0_0p2=0.3793 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.3675 out_g_norm=8.7006 acc_corrupt_t_0p2_0p4=0.6667 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.7000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.6621 init_gold_top10=0.3333 init_gold_top100=0.3333
304
+ step=280 epoch=140/250 epoch_step=2/2 micro_steps=280 elapsed=4.4s lr=2.000000e-03 loss=1.6523 loss_recon=1.6523 loss_meanflow=0.0000 mean_model_t=0.2913 mean_corrupt_t=0.2913 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7250 corrupt_frac=0.6125 acc_corrupt=0.5510 loss_corrupt=1.6523 wrong_frac=0.7143 init_acc_corrupt=0.2041 acc_corrupt_t_0p0_0p2=0.4800 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.4361 out_g_norm=7.9884 acc_corrupt_t_0p6_0p8=0.7500 corrupt_frac_t_0p6_0p8=1.0000 acc_corrupt_t_0p2_0p4=0.6667 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.5000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.9345 init_gold_top10=0.0000 init_gold_top100=0.0000
305
+ step=290 epoch=145/250 epoch_step=2/2 micro_steps=290 elapsed=4.3s lr=2.000000e-03 loss=1.7248 loss_recon=1.7248 loss_meanflow=0.0000 mean_model_t=0.1777 mean_corrupt_t=0.1777 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6750 corrupt_frac=0.7125 acc_corrupt=0.5439 loss_corrupt=1.7248 wrong_frac=0.8947 init_acc_corrupt=0.0526 acc_corrupt_t_0p0_0p2=0.4242 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.4944 out_g_norm=8.7925 acc_corrupt_t_0p2_0p4=0.7083 corrupt_frac_t_0p2_0p4=1.0000 loss_all=2.5422 init_gold_top10=0.1667 init_gold_top100=0.3333
306
+ step=300 epoch=150/250 epoch_step=2/2 micro_steps=300 elapsed=3.9s lr=2.000000e-03 loss=1.4703 loss_recon=1.4703 loss_meanflow=0.0000 mean_model_t=0.2061 mean_corrupt_t=0.2061 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7250 corrupt_frac=0.7375 acc_corrupt=0.6271 loss_corrupt=1.4703 wrong_frac=0.7627 init_acc_corrupt=0.0678 acc_corrupt_t_0p0_0p2=0.6154 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.5453 out_g_norm=9.1597 acc_corrupt_t_0p2_0p4=0.6364 corrupt_frac_t_0p2_0p4=1.0000 loss_all=1.7826 init_gold_top10=0.1429 init_gold_top100=0.5714
307
+ step=310 epoch=155/250 epoch_step=2/2 micro_steps=310 elapsed=4.4s lr=2.000000e-03 loss=2.2349 loss_recon=2.2349 loss_meanflow=0.0000 mean_model_t=0.1841 mean_corrupt_t=0.1841 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6750 corrupt_frac=0.7000 acc_corrupt=0.5357 loss_corrupt=2.2349 wrong_frac=0.7857 init_acc_corrupt=0.1429 acc_corrupt_t_0p0_0p2=0.4722 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.5889 out_g_norm=9.3705 acc_corrupt_t_0p4_0p6=0.7143 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.6154 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.1804 init_gold_top10=0.5714 init_gold_top100=0.5714
308
+ step=320 epoch=160/250 epoch_step=2/2 micro_steps=320 elapsed=4.3s lr=2.000000e-03 loss=0.9740 loss_recon=0.9740 loss_meanflow=0.0000 mean_model_t=0.2237 mean_corrupt_t=0.2237 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8125 corrupt_frac=0.7625 acc_corrupt=0.7541 loss_corrupt=0.9740 wrong_frac=0.7213 init_acc_corrupt=0.1803 acc_corrupt_t_0p0_0p2=0.7407 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.6232 out_g_norm=6.9783 acc_corrupt_t_0p2_0p4=0.7037 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.6350 init_gold_top10=0.2500 init_gold_top100=0.5000
309
+ step=330 epoch=165/250 epoch_step=2/2 micro_steps=330 elapsed=4.0s lr=2.000000e-03 loss=4.2225 loss_recon=4.2225 loss_meanflow=0.0000 mean_model_t=0.1733 mean_corrupt_t=0.1733 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.5125 corrupt_frac=0.7750 acc_corrupt=0.3710 loss_corrupt=4.2225 wrong_frac=0.8710 init_acc_corrupt=0.0968 acc_corrupt_t_0p0_0p2=0.2340 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.6494 out_g_norm=10.7513 acc_corrupt_t_0p2_0p4=0.8000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=6.4063 init_gold_top10=0.0000 init_gold_top100=0.0000
310
+ step=340 epoch=170/250 epoch_step=2/2 micro_steps=340 elapsed=4.4s lr=2.000000e-03 loss=1.2265 loss_recon=1.2265 loss_meanflow=0.0000 mean_model_t=0.1706 mean_corrupt_t=0.1706 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7625 corrupt_frac=0.6875 acc_corrupt=0.6545 loss_corrupt=1.2265 wrong_frac=0.8364 init_acc_corrupt=0.0545 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 out_w_norm=2.6690 out_g_norm=7.0931 acc_corrupt_t_0p0_0p2=0.6136 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p2_0p4=0.7143 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.1398 init_gold_top10=0.0000 init_gold_top100=0.0000
311
+ step=350 epoch=175/250 epoch_step=2/2 micro_steps=350 elapsed=4.3s lr=2.000000e-03 loss=0.6328 loss_recon=0.6328 loss_meanflow=0.0000 mean_model_t=0.1718 mean_corrupt_t=0.1718 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8375 corrupt_frac=0.7000 acc_corrupt=0.7679 loss_corrupt=0.6328 wrong_frac=0.8214 init_acc_corrupt=0.1607 acc_corrupt_t_0p0_0p2=0.6750 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.6859 out_g_norm=6.9347 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.5976 init_gold_top10=0.1667 init_gold_top100=0.3333
312
+ step=360 epoch=180/250 epoch_step=2/2 micro_steps=360 elapsed=4.0s lr=2.000000e-03 loss=0.7077 loss_recon=0.7077 loss_meanflow=0.0000 mean_model_t=0.2313 mean_corrupt_t=0.2313 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8500 corrupt_frac=0.7125 acc_corrupt=0.7895 loss_corrupt=0.7077 wrong_frac=0.7368 init_acc_corrupt=0.1930 acc_corrupt_t_0p2_0p4=0.9286 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.7034 out_g_norm=7.5520 acc_corrupt_t_0p0_0p2=0.6207 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.8466 init_gold_top10=0.0000 init_gold_top100=0.5000
313
+ step=370 epoch=185/250 epoch_step=2/2 micro_steps=370 elapsed=4.4s lr=2.000000e-03 loss=0.8368 loss_recon=0.8368 loss_meanflow=0.0000 mean_model_t=0.1875 mean_corrupt_t=0.1875 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8375 corrupt_frac=0.6250 acc_corrupt=0.7400 loss_corrupt=0.8368 wrong_frac=0.8000 init_acc_corrupt=0.1000 acc_corrupt_t_0p0_0p2=0.8621 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7209 out_g_norm=6.6692 acc_corrupt_t_0p2_0p4=0.5714 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.2370 init_gold_top10=0.2500 init_gold_top100=0.2500
314
+ step=380 epoch=190/250 epoch_step=2/2 micro_steps=380 elapsed=4.4s lr=2.000000e-03 loss=0.5329 loss_recon=0.5329 loss_meanflow=0.0000 mean_model_t=0.1769 mean_corrupt_t=0.1769 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9000 corrupt_frac=0.6500 acc_corrupt=0.8462 loss_corrupt=0.5329 wrong_frac=0.7692 init_acc_corrupt=0.0962 acc_corrupt_t_0p0_0p2=0.8000 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7366 out_g_norm=5.5692 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.0335 init_gold_top10=0.5000 init_gold_top100=0.5000
315
+ step=390 epoch=195/250 epoch_step=2/2 micro_steps=390 elapsed=4.0s lr=2.000000e-03 loss=0.3023 loss_recon=0.3023 loss_meanflow=0.0000 mean_model_t=0.2492 mean_corrupt_t=0.2492 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9125 corrupt_frac=0.7750 acc_corrupt=0.8871 loss_corrupt=0.3023 wrong_frac=0.7097 init_acc_corrupt=0.0968 acc_corrupt_t_0p2_0p4=0.8627 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.7553 out_g_norm=5.7337 acc_corrupt_t_0p0_0p2=1.0000 corrupt_frac_t_0p0_0p2=1.0000 loss_all=0.4651 init_gold_top10=0.1429 init_gold_top100=0.1429
316
+ step=400 epoch=200/250 epoch_step=2/2 micro_steps=400 elapsed=4.4s lr=2.000000e-03 loss=0.6908 loss_recon=0.6908 loss_meanflow=0.0000 mean_model_t=0.2467 mean_corrupt_t=0.2467 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8250 corrupt_frac=0.7750 acc_corrupt=0.7742 loss_corrupt=0.6908 wrong_frac=0.7903 init_acc_corrupt=0.1613 acc_corrupt_t_0p0_0p2=0.6250 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7733 out_g_norm=5.5334 acc_corrupt_t_0p4_0p6=0.8333 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.8750 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.0713 init_gold_top10=0.4286 init_gold_top100=0.4286
317
+ step=410 epoch=205/250 epoch_step=2/2 micro_steps=410 elapsed=4.5s lr=2.000000e-03 loss=1.9599 loss_recon=1.9599 loss_meanflow=0.0000 mean_model_t=0.2461 mean_corrupt_t=0.2461 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7875 corrupt_frac=0.6250 acc_corrupt=0.6600 loss_corrupt=1.9599 wrong_frac=0.7600 init_acc_corrupt=0.1200 acc_corrupt_t_0p0_0p2=0.4286 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.7925 out_g_norm=5.5970 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=0.9167 corrupt_frac_t_0p4_0p6=1.0000 loss_all=3.7725 init_gold_top10=0.0000 init_gold_top100=0.2857
318
+ step=420 epoch=210/250 epoch_step=2/2 micro_steps=420 elapsed=3.9s lr=2.000000e-03 loss=0.2908 loss_recon=0.2908 loss_meanflow=0.0000 mean_model_t=0.2875 mean_corrupt_t=0.2875 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9375 corrupt_frac=0.7625 acc_corrupt=0.9180 loss_corrupt=0.2908 wrong_frac=0.7049 init_acc_corrupt=0.2459 acc_corrupt_t_0p0_0p2=0.8148 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8073 out_g_norm=4.0182 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.0318 init_gold_top10=0.5000 init_gold_top100=0.5000
319
+ step=430 epoch=215/250 epoch_step=2/2 micro_steps=430 elapsed=4.8s lr=2.000000e-03 loss=0.8458 loss_recon=0.8458 loss_meanflow=0.0000 mean_model_t=0.2302 mean_corrupt_t=0.2302 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8625 corrupt_frac=0.6875 acc_corrupt=0.8000 loss_corrupt=0.8458 wrong_frac=0.8727 init_acc_corrupt=0.0727 acc_corrupt_t_0p0_0p2=0.8889 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8246 out_g_norm=4.8845 acc_corrupt_t_0p2_0p4=0.6667 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=4.2910 init_gold_top10=0.0000 init_gold_top100=0.2857
320
+ step=440 epoch=220/250 epoch_step=2/2 micro_steps=440 elapsed=4.3s lr=2.000000e-03 loss=0.3215 loss_recon=0.3215 loss_meanflow=0.0000 mean_model_t=0.2328 mean_corrupt_t=0.2328 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9125 corrupt_frac=0.6500 acc_corrupt=0.8654 loss_corrupt=0.3215 wrong_frac=0.7692 init_acc_corrupt=0.1731 acc_corrupt_t_0p0_0p2=0.7941 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8403 out_g_norm=6.5439 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p6_0p8=1.0000 corrupt_frac_t_0p6_0p8=1.0000 loss_all=0.8865 init_gold_top10=0.0000 init_gold_top100=0.2857
321
+ step=450 epoch=225/250 epoch_step=2/2 micro_steps=450 elapsed=4.0s lr=2.000000e-03 loss=0.6827 loss_recon=0.6827 loss_meanflow=0.0000 mean_model_t=0.1586 mean_corrupt_t=0.1586 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8625 corrupt_frac=0.6750 acc_corrupt=0.7963 loss_corrupt=0.6827 wrong_frac=0.8333 init_acc_corrupt=0.0185 acc_corrupt_t_0p2_0p4=0.8824 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.8520 out_g_norm=5.0425 acc_corrupt_t_0p0_0p2=0.7568 corrupt_frac_t_0p0_0p2=1.0000 loss_all=2.7378 init_gold_top10=0.1429 init_gold_top100=0.1429
322
+ step=460 epoch=230/250 epoch_step=2/2 micro_steps=460 elapsed=4.5s lr=2.000000e-03 loss=0.2457 loss_recon=0.2457 loss_meanflow=0.0000 mean_model_t=0.2081 mean_corrupt_t=0.2081 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9375 corrupt_frac=0.6375 acc_corrupt=0.9020 loss_corrupt=0.2457 wrong_frac=0.7255 init_acc_corrupt=0.1765 acc_corrupt_t_0p0_0p2=0.8438 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8662 out_g_norm=3.4803 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.0009 init_gold_top10=0.5000 init_gold_top100=0.5000
323
+ step=470 epoch=235/250 epoch_step=2/2 micro_steps=470 elapsed=4.7s lr=2.000000e-03 loss=0.4232 loss_recon=0.4232 loss_meanflow=0.0000 mean_model_t=0.1753 mean_corrupt_t=0.1753 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9000 corrupt_frac=0.7000 acc_corrupt=0.8571 loss_corrupt=0.4232 wrong_frac=0.8393 init_acc_corrupt=0.0893 acc_corrupt_t_0p0_0p2=0.8205 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.8826 out_g_norm=3.8271 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 acc_corrupt_t_0p2_0p4=0.9091 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.8206 init_gold_top10=0.1429 init_gold_top100=0.1429
324
+ step=480 epoch=240/250 epoch_step=2/2 micro_steps=480 elapsed=4.0s lr=2.000000e-03 loss=0.2027 loss_recon=0.2027 loss_meanflow=0.0000 mean_model_t=0.2083 mean_corrupt_t=0.2083 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9500 corrupt_frac=0.6500 acc_corrupt=0.9231 loss_corrupt=0.2027 wrong_frac=0.7500 init_acc_corrupt=0.1731 acc_corrupt_t_0p0_0p2=0.8750 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9040 out_g_norm=3.8791 acc_corrupt_t_0p2_0p4=0.9545 corrupt_frac_t_0p2_0p4=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.0600 init_gold_top10=0.0000 init_gold_top100=0.5000
325
+ step=490 epoch=245/250 epoch_step=2/2 micro_steps=490 elapsed=4.3s lr=2.000000e-03 loss=0.1580 loss_recon=0.1580 loss_meanflow=0.0000 mean_model_t=0.2059 mean_corrupt_t=0.2059 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.9500 corrupt_frac=0.7125 acc_corrupt=0.9298 loss_corrupt=0.1580 wrong_frac=0.7895 init_acc_corrupt=0.0877 acc_corrupt_t_0p0_0p2=0.9302 corrupt_frac_t_0p0_0p2=1.0000 out_w_norm=2.9254 out_g_norm=5.2319 acc_corrupt_t_0p2_0p4=0.9286 corrupt_frac_t_0p2_0p4=1.0000 loss_all=0.0206 init_gold_top10=0.1667 init_gold_top100=0.1667
326
+ step=500 epoch=250/250 epoch_step=2/2 micro_steps=500 elapsed=4.3s lr=2.000000e-03 loss=0.9157 loss_recon=0.9157 loss_meanflow=0.0000 mean_model_t=0.2092 mean_corrupt_t=0.2092 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8375 corrupt_frac=0.8000 acc_corrupt=0.7969 loss_corrupt=0.9157 wrong_frac=0.7656 init_acc_corrupt=0.1563 acc_corrupt_t_0p2_0p4=1.0000 corrupt_frac_t_0p2_0p4=1.0000 out_w_norm=2.9437 out_g_norm=3.3090 acc_corrupt_t_0p0_0p2=0.6061 corrupt_frac_t_0p0_0p2=1.0000 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=1.0000 loss_all=0.0372 init_gold_top10=0.3333 init_gold_top100=0.3333
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_rollin_focused_len256_rollin_p75_s8_i64_20260517_1733focused.log ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 969,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_rollin_focused_len256_rollin_p75_s8_i64_20260517_1733focused",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 500,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 1965440,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "logit_normal",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 0.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_prob_floor": 0.0,
124
+ "categorical_wrong_corpus_unigram_path": "",
125
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
126
+ "categorical_wrong_basin_shared_prob": 0.0,
127
+ "categorical_wrong_unigram_shared_prob": 0.0,
128
+ "mask_mixture_original_prob": 0.0,
129
+ "mask_mixture_lowk_prob": 0.0,
130
+ "mask_mixture_lowcorrupt_prob": 0.0,
131
+ "mask_mixture_block_prob": 0.0,
132
+ "mask_mixture_all_prob": 1.0,
133
+ "mask_mixture_lowk_clean_tokens": "0",
134
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
135
+ "mask_mixture_block_tokens": "64,128",
136
+ "simplex_bridge_sampler": "dirichlet",
137
+ "logistic_normal_sigma_min": 0.1,
138
+ "logistic_normal_sigma_max": 1.0,
139
+ "logistic_normal_tau_min": 1.0,
140
+ "logistic_normal_tau_max": 1.0,
141
+ "torch_compile": false,
142
+ "compile_mode": "max-autotune",
143
+ "state_format": "prob",
144
+ "meanflow_weight": 0.0,
145
+ "rollout_train_prob": 0.75,
146
+ "rollout_train_steps": 8,
147
+ "rollout_train_infer_steps": 64,
148
+ "rollout_train_temp": 1.45,
149
+ "rollout_train_max_gamma": 1.0,
150
+ "rollout_train_corrupt_only": true,
151
+ "rollout_train_samplewise": true,
152
+ "rollout_train_compute_always": false,
153
+ "bridge_noise_init": "logistic_normal",
154
+ "noise_sigma": -1.0,
155
+ "allow_tf32": true,
156
+ "activation_checkpointing": false,
157
+ "activation_checkpoint_interval": 1,
158
+ "activation_checkpoint_scope": "block",
159
+ "ddp_static_graph": false,
160
+ "ddp_gradient_as_bucket_view": true,
161
+ "blocking_data_transfer": false,
162
+ "dataloader_prefetch_factor": 4,
163
+ "full_train_stats": false,
164
+ "tokenized_hf": false,
165
+ "tokenized_pad_token": "pad",
166
+ "elf_conditional_hf": false,
167
+ "record_pad_truncate": false,
168
+ "record_add_eos": false,
169
+ "record_add_special_tokens": false,
170
+ "record_pad_token": "pad",
171
+ "record_shuffle_buffer": 10000,
172
+ "wrap": true,
173
+ "wrap_mode": "stream",
174
+ "wrap_record_buffer_size": 200,
175
+ "owt_cached_chunks": true,
176
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
177
+ "owt_chunk_cache_rebuild": false,
178
+ "owt_chunk_cache_write_batch": 4096,
179
+ "owt_exact_repeat_per_chunk": 64,
180
+ "online_chunk_shuffle": false,
181
+ "online_chunk_shuffle_buffer": 10000,
182
+ "openwebtext_split": "train_minus_100k",
183
+ "detokenizer": "auto",
184
+ "resolved_detokenizer": null,
185
+ "num_workers": 0,
186
+ "latest_every": 500,
187
+ "resume_path": ""
188
+ }
189
+ step=100 epoch=100/500 epoch_step=1/1 micro_steps=100 elapsed=11.2s lr=2.000000e-03 loss=6.7062 loss_recon=6.7062 loss_meanflow=0.0000 mean_model_t=0.2083 mean_corrupt_t=0.2083 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.7551 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0995 corrupt_frac=1.0000 acc_corrupt=0.0995 loss_corrupt=6.7062 wrong_frac=0.7915 init_acc_corrupt=0.1159 acc_corrupt_t_0p0_0p2=0.0487 corrupt_frac_t_0p0_0p2=0.5559 acc_corrupt_t_0p2_0p4=0.1328 corrupt_frac_t_0p2_0p4=0.3589 acc_corrupt_t_0p4_0p6=0.2794 corrupt_frac_t_0p4_0p6=0.0773 acc_corrupt_t_0p6_0p8=0.3953 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=1.0988 out_g_norm=1.0089 loss_all=6.4473 init_gold_top10=0.2094 init_gold_top100=0.5156 rollout_applied_pos_frac=0.7109 init_acc_rollout_applied=0.1260 init_acc_rollout_kept=0.0945 logit_acc_rollout_applied=0.1111 logit_acc_rollout_kept=0.0885
190
+ step=200 epoch=200/500 epoch_step=1/1 micro_steps=200 elapsed=10.5s lr=2.000000e-03 loss=6.0950 loss_recon=6.0950 loss_meanflow=0.0000 mean_model_t=0.2108 mean_corrupt_t=0.2108 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.7441 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1106 corrupt_frac=1.0000 acc_corrupt=0.1106 loss_corrupt=6.0950 wrong_frac=0.7892 init_acc_corrupt=0.1189 acc_corrupt_t_0p0_0p2=0.0550 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.1488 corrupt_frac_t_0p2_0p4=0.3621 acc_corrupt_t_0p4_0p6=0.2932 corrupt_frac_t_0p4_0p6=0.0781 acc_corrupt_t_0p6_0p8=0.4252 corrupt_frac_t_0p6_0p8=0.0123 out_w_norm=3.3192 out_g_norm=1.4040 acc_corrupt_t_0p8_1p0=0.4740 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.8266 init_gold_top10=0.2031 init_gold_top100=0.5559 rollout_applied_pos_frac=0.7422 init_acc_rollout_applied=0.0931 init_acc_rollout_kept=0.1610 logit_acc_rollout_applied=0.1016 logit_acc_rollout_kept=0.1322
191
+ step=300 epoch=300/500 epoch_step=1/1 micro_steps=300 elapsed=10.5s lr=2.000000e-03 loss=5.5671 loss_recon=5.5671 loss_meanflow=0.0000 mean_model_t=0.2067 mean_corrupt_t=0.2067 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.7523 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1201 corrupt_frac=1.0000 acc_corrupt=0.1201 loss_corrupt=5.5671 wrong_frac=0.7935 init_acc_corrupt=0.1151 acc_corrupt_t_0p0_0p2=0.0585 corrupt_frac_t_0p0_0p2=0.5641 acc_corrupt_t_0p2_0p4=0.1676 corrupt_frac_t_0p2_0p4=0.3542 acc_corrupt_t_0p4_0p6=0.3231 corrupt_frac_t_0p4_0p6=0.0734 acc_corrupt_t_0p6_0p8=0.4789 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=5.1966 out_g_norm=0.7255 acc_corrupt_t_0p8_1p0=0.6445 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.2920 init_gold_top10=0.2141 init_gold_top100=0.6023 rollout_applied_pos_frac=0.7266 init_acc_rollout_applied=0.1141 init_acc_rollout_kept=0.1450 logit_acc_rollout_applied=0.1252 logit_acc_rollout_kept=0.1500
192
+ step=400 epoch=400/500 epoch_step=1/1 micro_steps=400 elapsed=10.5s lr=2.000000e-03 loss=4.9962 loss_recon=4.9962 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.7507 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1463 corrupt_frac=1.0000 acc_corrupt=0.1463 loss_corrupt=4.9962 wrong_frac=0.7917 init_acc_corrupt=0.1179 acc_corrupt_t_0p0_0p2=0.0639 corrupt_frac_t_0p0_0p2=0.5564 acc_corrupt_t_0p2_0p4=0.2015 corrupt_frac_t_0p2_0p4=0.3620 acc_corrupt_t_0p4_0p6=0.4385 corrupt_frac_t_0p4_0p6=0.0719 out_w_norm=6.8565 out_g_norm=0.4149 acc_corrupt_t_0p6_0p8=0.6449 corrupt_frac_t_0p6_0p8=0.0131 acc_corrupt_t_0p8_1p0=0.7031 corrupt_frac_t_0p8_1p0=0.0078 loss_all=4.7734 init_gold_top10=0.2049 init_gold_top100=0.6761 rollout_applied_pos_frac=0.7188 init_acc_rollout_applied=0.0819 init_acc_rollout_kept=0.1494 logit_acc_rollout_applied=0.1270 logit_acc_rollout_kept=0.2018
193
+ step=500 epoch=500/500 epoch_step=1/1 micro_steps=500 elapsed=10.5s lr=2.000000e-03 loss=4.2104 loss_recon=4.2104 loss_meanflow=0.0000 mean_model_t=0.2071 mean_corrupt_t=0.2071 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.7552 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1863 corrupt_frac=1.0000 acc_corrupt=0.1863 loss_corrupt=4.2104 wrong_frac=0.7928 init_acc_corrupt=0.1176 acc_corrupt_t_0p0_0p2=0.0757 corrupt_frac_t_0p0_0p2=0.5632 acc_corrupt_t_0p2_0p4=0.2778 corrupt_frac_t_0p2_0p4=0.3546 acc_corrupt_t_0p4_0p6=0.5334 corrupt_frac_t_0p4_0p6=0.0745 acc_corrupt_t_0p6_0p8=0.6991 corrupt_frac_t_0p6_0p8=0.0118 acc_corrupt_t_0p8_1p0=0.8594 corrupt_frac_t_0p8_1p0=0.0078 out_w_norm=8.3604 out_g_norm=0.4534 loss_all=3.6982 init_gold_top10=0.2438 init_gold_top100=0.7904 rollout_applied_pos_frac=0.7344 init_acc_rollout_applied=0.1312 init_acc_rollout_kept=0.1149 logit_acc_rollout_applied=0.2264 logit_acc_rollout_kept=0.2090
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654.log ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 969,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 1965440,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "logit_normal",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 0.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_corpus_unigram_path": "",
124
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
125
+ "categorical_wrong_basin_shared_prob": 0.0,
126
+ "categorical_wrong_unigram_shared_prob": 0.0,
127
+ "mask_mixture_original_prob": 0.0,
128
+ "mask_mixture_lowk_prob": 0.0,
129
+ "mask_mixture_lowcorrupt_prob": 0.0,
130
+ "mask_mixture_block_prob": 0.0,
131
+ "mask_mixture_all_prob": 1.0,
132
+ "mask_mixture_lowk_clean_tokens": "0",
133
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
134
+ "mask_mixture_block_tokens": "64,128",
135
+ "simplex_bridge_sampler": "dirichlet",
136
+ "logistic_normal_sigma_min": 0.1,
137
+ "logistic_normal_sigma_max": 1.0,
138
+ "logistic_normal_tau_min": 1.0,
139
+ "logistic_normal_tau_max": 1.0,
140
+ "torch_compile": false,
141
+ "compile_mode": "max-autotune",
142
+ "state_format": "prob",
143
+ "meanflow_weight": 0.0,
144
+ "rollout_train_prob": 0.25,
145
+ "rollout_train_steps": 4,
146
+ "rollout_train_infer_steps": 32,
147
+ "rollout_train_temp": 1.45,
148
+ "rollout_train_max_gamma": 1.0,
149
+ "rollout_train_corrupt_only": true,
150
+ "rollout_train_samplewise": true,
151
+ "rollout_train_compute_always": false,
152
+ "bridge_noise_init": "logistic_normal",
153
+ "noise_sigma": -1.0,
154
+ "allow_tf32": true,
155
+ "activation_checkpointing": false,
156
+ "activation_checkpoint_interval": 1,
157
+ "activation_checkpoint_scope": "block",
158
+ "ddp_static_graph": false,
159
+ "ddp_gradient_as_bucket_view": true,
160
+ "blocking_data_transfer": false,
161
+ "dataloader_prefetch_factor": 4,
162
+ "full_train_stats": false,
163
+ "tokenized_hf": false,
164
+ "tokenized_pad_token": "pad",
165
+ "elf_conditional_hf": false,
166
+ "record_pad_truncate": false,
167
+ "record_add_eos": false,
168
+ "record_add_special_tokens": false,
169
+ "record_pad_token": "pad",
170
+ "record_shuffle_buffer": 10000,
171
+ "wrap": true,
172
+ "wrap_mode": "stream",
173
+ "wrap_record_buffer_size": 200,
174
+ "owt_cached_chunks": true,
175
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
176
+ "owt_chunk_cache_rebuild": false,
177
+ "owt_chunk_cache_write_batch": 4096,
178
+ "owt_exact_repeat_per_chunk": 64,
179
+ "online_chunk_shuffle": false,
180
+ "online_chunk_shuffle_buffer": 10000,
181
+ "openwebtext_split": "train_minus_100k",
182
+ "detokenizer": "auto",
183
+ "resolved_detokenizer": null,
184
+ "num_workers": 0,
185
+ "latest_every": 1000,
186
+ "resume_path": ""
187
+ }
188
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=7.9s lr=2.000000e-03 loss=6.7067 loss_recon=6.7067 loss_meanflow=0.0000 mean_model_t=0.2083 mean_corrupt_t=0.2083 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2530 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0998 corrupt_frac=1.0000 acc_corrupt=0.0998 loss_corrupt=6.7067 wrong_frac=0.7915 init_acc_corrupt=0.1159 acc_corrupt_t_0p0_0p2=0.0484 corrupt_frac_t_0p0_0p2=0.5559 acc_corrupt_t_0p2_0p4=0.1328 corrupt_frac_t_0p2_0p4=0.3589 acc_corrupt_t_0p4_0p6=0.2836 corrupt_frac_t_0p4_0p6=0.0773 acc_corrupt_t_0p6_0p8=0.4130 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=1.1017 out_g_norm=1.0028 loss_all=6.4476 init_gold_top10=0.2087 init_gold_top100=0.4590 rollout_applied_pos_frac=0.2344 init_acc_rollout_applied=0.0948 init_acc_rollout_kept=0.1237 logit_acc_rollout_applied=0.1040 logit_acc_rollout_kept=0.1037
189
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=7.2s lr=2.000000e-03 loss=6.0947 loss_recon=6.0947 loss_meanflow=0.0000 mean_model_t=0.2108 mean_corrupt_t=0.2108 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2494 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1107 corrupt_frac=1.0000 acc_corrupt=0.1107 loss_corrupt=6.0947 wrong_frac=0.7892 init_acc_corrupt=0.1185 acc_corrupt_t_0p0_0p2=0.0551 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.1485 corrupt_frac_t_0p2_0p4=0.3621 acc_corrupt_t_0p4_0p6=0.2936 corrupt_frac_t_0p4_0p6=0.0781 acc_corrupt_t_0p6_0p8=0.4298 corrupt_frac_t_0p6_0p8=0.0123 out_w_norm=3.3191 out_g_norm=1.4053 acc_corrupt_t_0p8_1p0=0.5352 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.8231 init_gold_top10=0.1984 init_gold_top100=0.4810 rollout_applied_pos_frac=0.3047 init_acc_rollout_applied=0.0981 init_acc_rollout_kept=0.1146 logit_acc_rollout_applied=0.1079 logit_acc_rollout_kept=0.1113
190
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=7.2s lr=2.000000e-03 loss=5.5716 loss_recon=5.5716 loss_meanflow=0.0000 mean_model_t=0.2067 mean_corrupt_t=0.2067 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2520 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1202 corrupt_frac=1.0000 acc_corrupt=0.1202 loss_corrupt=5.5716 wrong_frac=0.7935 init_acc_corrupt=0.1140 acc_corrupt_t_0p0_0p2=0.0587 corrupt_frac_t_0p0_0p2=0.5641 acc_corrupt_t_0p2_0p4=0.1674 corrupt_frac_t_0p2_0p4=0.3542 acc_corrupt_t_0p4_0p6=0.3232 corrupt_frac_t_0p4_0p6=0.0734 acc_corrupt_t_0p6_0p8=0.4842 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=5.2128 out_g_norm=0.7169 acc_corrupt_t_0p8_1p0=0.7201 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.3109 init_gold_top10=0.2009 init_gold_top100=0.4817 rollout_applied_pos_frac=0.2109 init_acc_rollout_applied=0.1409 init_acc_rollout_kept=0.1165 logit_acc_rollout_applied=0.1441 logit_acc_rollout_kept=0.1302
191
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=7.2s lr=2.000000e-03 loss=5.0178 loss_recon=5.0178 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2573 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1453 corrupt_frac=1.0000 acc_corrupt=0.1453 loss_corrupt=5.0178 wrong_frac=0.7917 init_acc_corrupt=0.1163 acc_corrupt_t_0p0_0p2=0.0634 corrupt_frac_t_0p0_0p2=0.5564 acc_corrupt_t_0p2_0p4=0.1996 corrupt_frac_t_0p2_0p4=0.3620 acc_corrupt_t_0p4_0p6=0.4376 corrupt_frac_t_0p4_0p6=0.0719 out_w_norm=6.8871 out_g_norm=0.4204 acc_corrupt_t_0p6_0p8=0.6489 corrupt_frac_t_0p6_0p8=0.0131 acc_corrupt_t_0p8_1p0=0.7793 corrupt_frac_t_0p8_1p0=0.0078 loss_all=4.8020 init_gold_top10=0.1953 init_gold_top100=0.5052 rollout_applied_pos_frac=0.2422 init_acc_rollout_applied=0.1080 init_acc_rollout_kept=0.0967 logit_acc_rollout_applied=0.1518 logit_acc_rollout_kept=0.1453
192
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=7.2s lr=2.000000e-03 loss=4.2859 loss_recon=4.2859 loss_meanflow=0.0000 mean_model_t=0.2071 mean_corrupt_t=0.2071 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2505 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1812 corrupt_frac=1.0000 acc_corrupt=0.1812 loss_corrupt=4.2859 wrong_frac=0.7928 init_acc_corrupt=0.1154 acc_corrupt_t_0p0_0p2=0.0738 corrupt_frac_t_0p0_0p2=0.5632 acc_corrupt_t_0p2_0p4=0.2688 corrupt_frac_t_0p2_0p4=0.3546 acc_corrupt_t_0p4_0p6=0.5233 corrupt_frac_t_0p4_0p6=0.0745 acc_corrupt_t_0p6_0p8=0.6904 corrupt_frac_t_0p6_0p8=0.0118 acc_corrupt_t_0p8_1p0=0.8477 corrupt_frac_t_0p8_1p0=0.0078 out_w_norm=8.4102 out_g_norm=0.4524 loss_all=3.8338 init_gold_top10=0.2167 init_gold_top100=0.5624 rollout_applied_pos_frac=0.2656 init_acc_rollout_applied=0.1174 init_acc_rollout_kept=0.1265 logit_acc_rollout_applied=0.2034 logit_acc_rollout_kept=0.2089
193
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=7.2s lr=2.000000e-03 loss=3.4521 loss_recon=3.4521 loss_meanflow=0.0000 mean_model_t=0.2074 mean_corrupt_t=0.2074 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2479 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2373 corrupt_frac=1.0000 acc_corrupt=0.2373 loss_corrupt=3.4521 wrong_frac=0.7927 init_acc_corrupt=0.1163 acc_corrupt_t_0p0_0p2=0.0932 corrupt_frac_t_0p0_0p2=0.5617 acc_corrupt_t_0p2_0p4=0.3707 corrupt_frac_t_0p2_0p4=0.3570 acc_corrupt_t_0p4_0p6=0.6334 corrupt_frac_t_0p4_0p6=0.0729 acc_corrupt_t_0p6_0p8=0.7711 corrupt_frac_t_0p6_0p8=0.0124 out_w_norm=9.5699 out_g_norm=0.4882 acc_corrupt_t_0p8_1p0=0.8945 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.2664 init_gold_top10=0.2361 init_gold_top100=0.5932 rollout_applied_pos_frac=0.3203 init_acc_rollout_applied=0.1109 init_acc_rollout_kept=0.1015 logit_acc_rollout_applied=0.2539 logit_acc_rollout_kept=0.2524
194
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=7.2s lr=2.000000e-03 loss=2.6394 loss_recon=2.6394 loss_meanflow=0.0000 mean_model_t=0.2094 mean_corrupt_t=0.2094 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2563 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3420 corrupt_frac=1.0000 acc_corrupt=0.3420 loss_corrupt=2.6394 wrong_frac=0.7908 init_acc_corrupt=0.1210 acc_corrupt_t_0p0_0p2=0.1365 corrupt_frac_t_0p0_0p2=0.5550 acc_corrupt_t_0p2_0p4=0.5457 corrupt_frac_t_0p2_0p4=0.3587 acc_corrupt_t_0p4_0p6=0.8091 corrupt_frac_t_0p4_0p6=0.0781 acc_corrupt_t_0p6_0p8=0.8865 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=10.2536 out_g_norm=0.6227 acc_corrupt_t_0p8_1p0=0.9375 corrupt_frac_t_0p8_1p0=0.0078 loss_all=2.2054 init_gold_top10=0.2965 init_gold_top100=0.5353 rollout_applied_pos_frac=0.1953 init_acc_rollout_applied=0.1761 init_acc_rollout_kept=0.1197 logit_acc_rollout_applied=0.5416 logit_acc_rollout_kept=0.3866
195
+ step=800 epoch=800/1000 epoch_step=1/1 micro_steps=800 elapsed=7.1s lr=2.000000e-03 loss=1.9386 loss_recon=1.9386 loss_meanflow=0.0000 mean_model_t=0.2099 mean_corrupt_t=0.2099 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2495 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.4785 corrupt_frac=1.0000 acc_corrupt=0.4785 loss_corrupt=1.9386 wrong_frac=0.7896 init_acc_corrupt=0.1237 acc_corrupt_t_0p0_0p2=0.2203 corrupt_frac_t_0p0_0p2=0.5548 acc_corrupt_t_0p2_0p4=0.7631 corrupt_frac_t_0p2_0p4=0.3585 acc_corrupt_t_0p4_0p6=0.9514 corrupt_frac_t_0p4_0p6=0.0790 out_w_norm=10.6546 out_g_norm=0.7053 acc_corrupt_t_0p6_0p8=0.9738 corrupt_frac_t_0p6_0p8=0.0130 acc_corrupt_t_0p8_1p0=0.9648 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.5589 init_gold_top10=0.3369 init_gold_top100=0.5577 rollout_applied_pos_frac=0.2188 init_acc_rollout_applied=0.1800 init_acc_rollout_kept=0.1344 logit_acc_rollout_applied=0.6586 logit_acc_rollout_kept=0.5515
196
+ step=900 epoch=900/1000 epoch_step=1/1 micro_steps=900 elapsed=7.1s lr=2.000000e-03 loss=1.3894 loss_recon=1.3894 loss_meanflow=0.0000 mean_model_t=0.2107 mean_corrupt_t=0.2107 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2528 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6018 corrupt_frac=1.0000 acc_corrupt=0.6018 loss_corrupt=1.3894 wrong_frac=0.7895 init_acc_corrupt=0.1283 acc_corrupt_t_0p0_0p2=0.3447 corrupt_frac_t_0p0_0p2=0.5521 acc_corrupt_t_0p2_0p4=0.9007 corrupt_frac_t_0p2_0p4=0.3599 acc_corrupt_t_0p4_0p6=0.9919 corrupt_frac_t_0p4_0p6=0.0788 acc_corrupt_t_0p6_0p8=0.9925 corrupt_frac_t_0p6_0p8=0.0133 out_w_norm=10.9674 out_g_norm=0.8489 acc_corrupt_t_0p8_1p0=0.9909 corrupt_frac_t_0p8_1p0=0.0078 loss_all=1.2188 init_gold_top10=0.3704 init_gold_top100=0.5918 rollout_applied_pos_frac=0.2891 init_acc_rollout_applied=0.1534 init_acc_rollout_kept=0.1097 logit_acc_rollout_applied=0.6857 logit_acc_rollout_kept=0.6122
197
+ step=1000 epoch=1000/1000 epoch_step=1/1 micro_steps=1000 elapsed=7.2s lr=2.000000e-03 loss=1.0306 loss_recon=1.0306 loss_meanflow=0.0000 mean_model_t=0.2112 mean_corrupt_t=0.2112 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2503 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.6813 corrupt_frac=1.0000 acc_corrupt=0.6813 loss_corrupt=1.0306 wrong_frac=0.7887 init_acc_corrupt=0.1315 acc_corrupt_t_0p0_0p2=0.4443 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.9665 corrupt_frac_t_0p2_0p4=0.3570 acc_corrupt_t_0p4_0p6=0.9978 corrupt_frac_t_0p4_0p6=0.0810 out_w_norm=11.1836 out_g_norm=0.9117 acc_corrupt_t_0p6_0p8=0.9956 corrupt_frac_t_0p6_0p8=0.0137 acc_corrupt_t_0p8_1p0=0.9844 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.8158 init_gold_top10=0.4048 init_gold_top100=0.5936 rollout_applied_pos_frac=0.2891 init_acc_rollout_applied=0.2068 init_acc_rollout_kept=0.1181 logit_acc_rollout_applied=0.7595 logit_acc_rollout_kept=0.7236
198
+ NCCL version 2.25.1+cuda12.8
199
+ resumed_from=runs/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654/latest.pt start_step=1001
200
+ {
201
+ "device": "cuda:0",
202
+ "rank": 0,
203
+ "world_size": 4,
204
+ "samples": "owt_cached_chunks:8",
205
+ "vocab_size": 969,
206
+ "tokenizer_vocab_size": 50257,
207
+ "save_dir": "runs/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654",
208
+ "batch_size": 128,
209
+ "grad_accum": 1,
210
+ "effective_batch_size": 512,
211
+ "global_batch_size": 512,
212
+ "lr_schedule": "constant_warmup",
213
+ "optimizer": "muon",
214
+ "epochs": 0.0,
215
+ "steps_per_epoch": 1,
216
+ "total_steps": 2000,
217
+ "warmup_steps": 10,
218
+ "warmup_epochs": -1.0,
219
+ "min_lr": 0.0,
220
+ "weight_decay": 0.1,
221
+ "output_weight_decay": -1.0,
222
+ "adamw_param_groups": "nanogpt",
223
+ "adam_beta1": 0.9,
224
+ "adam_beta2": 0.95,
225
+ "adam_eps": 1e-08,
226
+ "muon_impl": "legacy",
227
+ "muon_momentum": 0.95,
228
+ "muon_ns_steps": 5,
229
+ "muon_update_scale": 1.0,
230
+ "muon_nesterov": false,
231
+ "muon_width_scale": false,
232
+ "muon_grouping": "legacy_dim_ge_2",
233
+ "muon_param_count": 1965440,
234
+ "muon_adam_param_count": 8192,
235
+ "muon_param_names": [
236
+ "vocab_embed.embedding",
237
+ "sigma_map.net.0.weight",
238
+ "sigma_map.net.2.weight",
239
+ "blocks.0.attn_qkv.weight",
240
+ "blocks.0.attn_out.weight",
241
+ "blocks.0.mlp.0.weight",
242
+ "blocks.0.mlp.2.weight",
243
+ "blocks.0.adaLN_modulation.weight",
244
+ "blocks.1.attn_qkv.weight",
245
+ "blocks.1.attn_out.weight",
246
+ "blocks.1.mlp.0.weight",
247
+ "blocks.1.mlp.2.weight",
248
+ "blocks.1.adaLN_modulation.weight",
249
+ "blocks.2.attn_qkv.weight",
250
+ "blocks.2.attn_out.weight",
251
+ "blocks.2.mlp.0.weight",
252
+ "blocks.2.mlp.2.weight",
253
+ "blocks.2.adaLN_modulation.weight",
254
+ "output_layer.linear.weight",
255
+ "output_layer.adaLN_modulation.weight"
256
+ ],
257
+ "muon_adam_param_names": [
258
+ "sigma_map.net.0.bias",
259
+ "sigma_map.net.2.bias",
260
+ "blocks.0.norm1.weight",
261
+ "blocks.0.norm2.weight",
262
+ "blocks.0.mlp.0.bias",
263
+ "blocks.0.mlp.2.bias",
264
+ "blocks.0.adaLN_modulation.bias",
265
+ "blocks.1.norm1.weight",
266
+ "blocks.1.norm2.weight",
267
+ "blocks.1.mlp.0.bias",
268
+ "blocks.1.mlp.2.bias",
269
+ "blocks.1.adaLN_modulation.bias",
270
+ "blocks.2.norm1.weight",
271
+ "blocks.2.norm2.weight",
272
+ "blocks.2.mlp.0.bias",
273
+ "blocks.2.mlp.2.bias",
274
+ "blocks.2.adaLN_modulation.bias",
275
+ "output_layer.norm_final.weight",
276
+ "output_layer.adaLN_modulation.bias"
277
+ ],
278
+ "muon_effective_nesterov": false,
279
+ "muon_effective_width_scale": false,
280
+ "muon_effective_weight_decay": 0.1,
281
+ "muon_adam_fallback_nesterov": false,
282
+ "muon_adam_fallback_weight_decay": 0.1,
283
+ "ema_decay": 0.9999,
284
+ "ema_start_step": 0,
285
+ "model_type": "ddit",
286
+ "ddit_mlp_type": "gelu",
287
+ "elf_num_time_tokens": 4,
288
+ "elf_num_model_mode_tokens": 0,
289
+ "qk_norm": true,
290
+ "output_bias": false,
291
+ "output_init_std": -1.0,
292
+ "norm_type": "rmsnorm",
293
+ "target_loss": "hard_ce",
294
+ "linear_soft_target_power": 1.0,
295
+ "linear_soft_target_min_conf": 0.0,
296
+ "linear_soft_target_max_conf": 1.0,
297
+ "t_sampling_mode": "logit_normal",
298
+ "t_sampling_power": 1.0,
299
+ "t_sampling_eps": 0.0001,
300
+ "t_sampling_logit_mean": -1.5,
301
+ "t_sampling_logit_std": 0.8,
302
+ "dual_t": true,
303
+ "corrupt_t_mode": "same",
304
+ "corrupt_min_t": 0.0,
305
+ "corrupt_max_t": 1.0,
306
+ "prefix_block_prob": 0.0,
307
+ "prefix_block_len": 128,
308
+ "mask_ratio_floor_schedule": "none",
309
+ "dirichlet_endpoint_mode": "categorical_dual_t",
310
+ "dirichlet_semantic_t_mode": "same",
311
+ "dirichlet_semantic_t_value": 0.0,
312
+ "dirichlet_semantic_t_curve": "linear",
313
+ "dirichlet_semantic_t_power": 1.0,
314
+ "endpoint_sequence_random_prob_alpha": 0.0,
315
+ "categorical_wrong_from_full_vocab": true,
316
+ "categorical_wrong_from_batch_valid_tokens": false,
317
+ "categorical_wrong_basin_token_ids": "",
318
+ "categorical_wrong_basin_prob": 0.0,
319
+ "categorical_wrong_unigram_prob": 0.0,
320
+ "categorical_wrong_uniform_prob": 0.0,
321
+ "categorical_wrong_prob_floor": 0.0,
322
+ "categorical_wrong_corpus_unigram_path": "",
323
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
324
+ "categorical_wrong_basin_shared_prob": 0.0,
325
+ "categorical_wrong_unigram_shared_prob": 0.0,
326
+ "mask_mixture_original_prob": 0.0,
327
+ "mask_mixture_lowk_prob": 0.0,
328
+ "mask_mixture_lowcorrupt_prob": 0.0,
329
+ "mask_mixture_block_prob": 0.0,
330
+ "mask_mixture_all_prob": 1.0,
331
+ "mask_mixture_lowk_clean_tokens": "0",
332
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
333
+ "mask_mixture_block_tokens": "64,128",
334
+ "simplex_bridge_sampler": "dirichlet",
335
+ "logistic_normal_sigma_min": 0.1,
336
+ "logistic_normal_sigma_max": 1.0,
337
+ "logistic_normal_tau_min": 1.0,
338
+ "logistic_normal_tau_max": 1.0,
339
+ "torch_compile": false,
340
+ "compile_mode": "max-autotune",
341
+ "state_format": "prob",
342
+ "meanflow_weight": 0.0,
343
+ "rollout_train_prob": 0.25,
344
+ "rollout_train_steps": 4,
345
+ "rollout_train_infer_steps": 32,
346
+ "rollout_train_temp": 1.45,
347
+ "rollout_train_max_gamma": 1.0,
348
+ "rollout_train_corrupt_only": true,
349
+ "rollout_train_samplewise": true,
350
+ "rollout_train_compute_always": false,
351
+ "bridge_noise_init": "logistic_normal",
352
+ "noise_sigma": -1.0,
353
+ "allow_tf32": true,
354
+ "activation_checkpointing": false,
355
+ "activation_checkpoint_interval": 1,
356
+ "activation_checkpoint_scope": "block",
357
+ "ddp_static_graph": false,
358
+ "ddp_gradient_as_bucket_view": true,
359
+ "blocking_data_transfer": false,
360
+ "dataloader_prefetch_factor": 4,
361
+ "full_train_stats": false,
362
+ "tokenized_hf": false,
363
+ "tokenized_pad_token": "pad",
364
+ "elf_conditional_hf": false,
365
+ "record_pad_truncate": false,
366
+ "record_add_eos": false,
367
+ "record_add_special_tokens": false,
368
+ "record_pad_token": "pad",
369
+ "record_shuffle_buffer": 10000,
370
+ "wrap": true,
371
+ "wrap_mode": "stream",
372
+ "wrap_record_buffer_size": 200,
373
+ "owt_cached_chunks": true,
374
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
375
+ "owt_chunk_cache_rebuild": false,
376
+ "owt_chunk_cache_write_batch": 4096,
377
+ "owt_exact_repeat_per_chunk": 64,
378
+ "online_chunk_shuffle": false,
379
+ "online_chunk_shuffle_buffer": 10000,
380
+ "openwebtext_split": "train_minus_100k",
381
+ "detokenizer": "auto",
382
+ "resolved_detokenizer": null,
383
+ "num_workers": 0,
384
+ "latest_every": 1000,
385
+ "resume_path": "runs/train8_rollin_len256_rollin_p25_s4_i32_20260517_171654/latest.pt"
386
+ }
387
+ step=1100 epoch=1100/2000 epoch_step=1/1 micro_steps=1100 elapsed=7.9s lr=2.000000e-03 loss=0.8615 loss_recon=0.8615 loss_meanflow=0.0000 mean_model_t=0.2083 mean_corrupt_t=0.2083 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2530 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7159 corrupt_frac=1.0000 acc_corrupt=0.7159 loss_corrupt=0.8615 wrong_frac=0.7915 init_acc_corrupt=0.1296 acc_corrupt_t_0p0_0p2=0.4975 corrupt_frac_t_0p0_0p2=0.5559 acc_corrupt_t_0p2_0p4=0.9869 corrupt_frac_t_0p2_0p4=0.3589 acc_corrupt_t_0p4_0p6=0.9991 corrupt_frac_t_0p4_0p6=0.0773 acc_corrupt_t_0p6_0p8=0.9964 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=11.2440 out_g_norm=0.8999 loss_all=0.7691 init_gold_top10=0.3715 init_gold_top100=0.5637 rollout_applied_pos_frac=0.2344 init_acc_rollout_applied=0.1711 init_acc_rollout_kept=0.1237 logit_acc_rollout_applied=0.7966 logit_acc_rollout_kept=0.7191
388
+ step=1200 epoch=1200/2000 epoch_step=1/1 micro_steps=1200 elapsed=7.4s lr=2.000000e-03 loss=0.7241 loss_recon=0.7241 loss_meanflow=0.0000 mean_model_t=0.2108 mean_corrupt_t=0.2108 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2494 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7520 corrupt_frac=1.0000 acc_corrupt=0.7520 loss_corrupt=0.7241 wrong_frac=0.7892 init_acc_corrupt=0.1326 acc_corrupt_t_0p0_0p2=0.5542 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.9943 corrupt_frac_t_0p2_0p4=0.3621 acc_corrupt_t_0p4_0p6=0.9995 corrupt_frac_t_0p4_0p6=0.0781 acc_corrupt_t_0p6_0p8=0.9981 corrupt_frac_t_0p6_0p8=0.0123 out_w_norm=11.2260 out_g_norm=0.7855 acc_corrupt_t_0p8_1p0=0.9844 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.7395 init_gold_top10=0.4136 init_gold_top100=0.5993 rollout_applied_pos_frac=0.3047 init_acc_rollout_applied=0.1501 init_acc_rollout_kept=0.1146 logit_acc_rollout_applied=0.6852 logit_acc_rollout_kept=0.7614
389
+ step=1300 epoch=1300/2000 epoch_step=1/1 micro_steps=1300 elapsed=7.6s lr=2.000000e-03 loss=0.6933 loss_recon=0.6933 loss_meanflow=0.0000 mean_model_t=0.2067 mean_corrupt_t=0.2067 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2520 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7597 corrupt_frac=1.0000 acc_corrupt=0.7597 loss_corrupt=0.6933 wrong_frac=0.7935 init_acc_corrupt=0.1286 acc_corrupt_t_0p0_0p2=0.5764 corrupt_frac_t_0p0_0p2=0.5641 acc_corrupt_t_0p2_0p4=0.9965 corrupt_frac_t_0p2_0p4=0.3542 acc_corrupt_t_0p4_0p6=0.9997 corrupt_frac_t_0p4_0p6=0.0734 acc_corrupt_t_0p6_0p8=0.9978 corrupt_frac_t_0p6_0p8=0.0121 out_w_norm=11.1827 out_g_norm=0.6628 acc_corrupt_t_0p8_1p0=0.9961 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.7183 init_gold_top10=0.3512 init_gold_top100=0.5487 rollout_applied_pos_frac=0.2109 init_acc_rollout_applied=0.2034 init_acc_rollout_kept=0.1165 logit_acc_rollout_applied=0.8299 logit_acc_rollout_kept=0.7257
390
+ step=1400 epoch=1400/2000 epoch_step=1/1 micro_steps=1400 elapsed=7.6s lr=2.000000e-03 loss=0.6470 loss_recon=0.6470 loss_meanflow=0.0000 mean_model_t=0.2085 mean_corrupt_t=0.2085 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2573 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7753 corrupt_frac=1.0000 acc_corrupt=0.7753 loss_corrupt=0.6470 wrong_frac=0.7917 init_acc_corrupt=0.1310 acc_corrupt_t_0p0_0p2=0.5977 corrupt_frac_t_0p0_0p2=0.5564 acc_corrupt_t_0p2_0p4=0.9977 corrupt_frac_t_0p2_0p4=0.3620 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0719 out_w_norm=11.1427 out_g_norm=0.6136 acc_corrupt_t_0p6_0p8=0.9990 corrupt_frac_t_0p6_0p8=0.0131 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.6995 init_gold_top10=0.3550 init_gold_top100=0.5616 rollout_applied_pos_frac=0.2422 init_acc_rollout_applied=0.1628 init_acc_rollout_kept=0.0967 logit_acc_rollout_applied=0.6905 logit_acc_rollout_kept=0.7726
391
+ step=1500 epoch=1500/2000 epoch_step=1/1 micro_steps=1500 elapsed=7.6s lr=2.000000e-03 loss=0.6155 loss_recon=0.6155 loss_meanflow=0.0000 mean_model_t=0.2071 mean_corrupt_t=0.2071 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2505 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7870 corrupt_frac=1.0000 acc_corrupt=0.7870 loss_corrupt=0.6155 wrong_frac=0.7928 init_acc_corrupt=0.1295 acc_corrupt_t_0p0_0p2=0.6226 corrupt_frac_t_0p0_0p2=0.5632 acc_corrupt_t_0p2_0p4=0.9988 corrupt_frac_t_0p2_0p4=0.3546 acc_corrupt_t_0p4_0p6=0.9998 corrupt_frac_t_0p4_0p6=0.0745 acc_corrupt_t_0p6_0p8=0.9987 corrupt_frac_t_0p6_0p8=0.0118 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 out_w_norm=11.1172 out_g_norm=0.5546 loss_all=0.7838 init_gold_top10=0.3934 init_gold_top100=0.5820 rollout_applied_pos_frac=0.2656 init_acc_rollout_applied=0.1757 init_acc_rollout_kept=0.1265 logit_acc_rollout_applied=0.7716 logit_acc_rollout_kept=0.7345
392
+ step=1600 epoch=1600/2000 epoch_step=1/1 micro_steps=1600 elapsed=7.6s lr=2.000000e-03 loss=0.5805 loss_recon=0.5805 loss_meanflow=0.0000 mean_model_t=0.2074 mean_corrupt_t=0.2074 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2479 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.7979 corrupt_frac=1.0000 acc_corrupt=0.7979 loss_corrupt=0.5805 wrong_frac=0.7927 init_acc_corrupt=0.1292 acc_corrupt_t_0p0_0p2=0.6409 corrupt_frac_t_0p0_0p2=0.5617 acc_corrupt_t_0p2_0p4=0.9991 corrupt_frac_t_0p2_0p4=0.3570 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0729 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.0124 out_w_norm=11.1053 out_g_norm=0.5120 acc_corrupt_t_0p8_1p0=0.7422 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.7242 init_gold_top10=0.4185 init_gold_top100=0.6063 rollout_applied_pos_frac=0.3203 init_acc_rollout_applied=0.1492 init_acc_rollout_kept=0.1015 logit_acc_rollout_applied=0.7253 logit_acc_rollout_kept=0.7539
393
+ step=1700 epoch=1700/2000 epoch_step=1/1 micro_steps=1700 elapsed=7.6s lr=2.000000e-03 loss=0.5616 loss_recon=0.5616 loss_meanflow=0.0000 mean_model_t=0.2094 mean_corrupt_t=0.2094 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2563 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8048 corrupt_frac=1.0000 acc_corrupt=0.8048 loss_corrupt=0.5616 wrong_frac=0.7908 init_acc_corrupt=0.1334 acc_corrupt_t_0p0_0p2=0.6486 corrupt_frac_t_0p0_0p2=0.5550 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.3587 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0781 acc_corrupt_t_0p6_0p8=0.9993 corrupt_frac_t_0p6_0p8=0.0129 out_w_norm=11.0941 out_g_norm=0.4938 acc_corrupt_t_0p8_1p0=0.9922 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.5621 init_gold_top10=0.3426 init_gold_top100=0.5366 rollout_applied_pos_frac=0.1953 init_acc_rollout_applied=0.2289 init_acc_rollout_kept=0.1197 logit_acc_rollout_applied=0.9022 logit_acc_rollout_kept=0.7822
394
+ step=1800 epoch=1800/2000 epoch_step=1/1 micro_steps=1800 elapsed=7.6s lr=2.000000e-03 loss=0.5387 loss_recon=0.5387 loss_meanflow=0.0000 mean_model_t=0.2099 mean_corrupt_t=0.2099 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2495 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8128 corrupt_frac=1.0000 acc_corrupt=0.8128 loss_corrupt=0.5387 wrong_frac=0.7896 init_acc_corrupt=0.1338 acc_corrupt_t_0p0_0p2=0.6628 corrupt_frac_t_0p0_0p2=0.5548 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.3585 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=0.0790 out_w_norm=11.0839 out_g_norm=0.4787 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.0130 acc_corrupt_t_0p8_1p0=1.0000 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.5373 init_gold_top10=0.3827 init_gold_top100=0.5577 rollout_applied_pos_frac=0.2188 init_acc_rollout_applied=0.2114 init_acc_rollout_kept=0.1344 logit_acc_rollout_applied=0.8574 logit_acc_rollout_kept=0.8103
395
+ step=1900 epoch=1900/2000 epoch_step=1/1 micro_steps=1900 elapsed=7.6s lr=2.000000e-03 loss=0.5037 loss_recon=0.5037 loss_meanflow=0.0000 mean_model_t=0.2107 mean_corrupt_t=0.2107 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2528 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8251 corrupt_frac=1.0000 acc_corrupt=0.8251 loss_corrupt=0.5037 wrong_frac=0.7895 init_acc_corrupt=0.1353 acc_corrupt_t_0p0_0p2=0.6835 corrupt_frac_t_0p0_0p2=0.5521 acc_corrupt_t_0p2_0p4=0.9996 corrupt_frac_t_0p2_0p4=0.3599 acc_corrupt_t_0p4_0p6=0.9999 corrupt_frac_t_0p4_0p6=0.0788 acc_corrupt_t_0p6_0p8=0.9996 corrupt_frac_t_0p6_0p8=0.0133 out_w_norm=11.0801 out_g_norm=0.4154 acc_corrupt_t_0p8_1p0=0.9974 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.5739 init_gold_top10=0.4066 init_gold_top100=0.5918 rollout_applied_pos_frac=0.2891 init_acc_rollout_applied=0.1761 init_acc_rollout_kept=0.1097 logit_acc_rollout_applied=0.8213 logit_acc_rollout_kept=0.8056
396
+ step=2000 epoch=2000/2000 epoch_step=1/1 micro_steps=2000 elapsed=7.6s lr=2.000000e-03 loss=0.4909 loss_recon=0.4909 loss_meanflow=0.0000 mean_model_t=0.2112 mean_corrupt_t=0.2112 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.2503 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.8309 corrupt_frac=1.0000 acc_corrupt=0.8309 loss_corrupt=0.4909 wrong_frac=0.7887 init_acc_corrupt=0.1354 acc_corrupt_t_0p0_0p2=0.6937 corrupt_frac_t_0p0_0p2=0.5516 acc_corrupt_t_0p2_0p4=0.9997 corrupt_frac_t_0p2_0p4=0.3570 acc_corrupt_t_0p4_0p6=1.0000 corrupt_frac_t_0p4_0p6=0.0810 out_w_norm=11.0769 out_g_norm=0.3879 acc_corrupt_t_0p6_0p8=0.9995 corrupt_frac_t_0p6_0p8=0.0137 acc_corrupt_t_0p8_1p0=0.9969 corrupt_frac_t_0p8_1p0=0.0078 loss_all=0.4895 init_gold_top10=0.4187 init_gold_top100=0.5936 rollout_applied_pos_frac=0.2891 init_acc_rollout_applied=0.2180 init_acc_rollout_kept=0.1181 logit_acc_rollout_applied=0.8300 logit_acc_rollout_kept=0.8207
LTA_openwebtext_dualt/logs/softendpoint_mn_pilot_4gpu/train8_wrongfloor_len256_wrongfloor0p5_20260517_1815wrongfloor.log ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NCCL version 2.25.1+cuda12.8
2
+ {
3
+ "device": "cuda:0",
4
+ "rank": 0,
5
+ "world_size": 4,
6
+ "samples": "owt_cached_chunks:8",
7
+ "vocab_size": 969,
8
+ "tokenizer_vocab_size": 50257,
9
+ "save_dir": "runs/train8_wrongfloor_len256_wrongfloor0p5_20260517_1815wrongfloor",
10
+ "batch_size": 128,
11
+ "grad_accum": 1,
12
+ "effective_batch_size": 512,
13
+ "global_batch_size": 512,
14
+ "lr_schedule": "constant_warmup",
15
+ "optimizer": "muon",
16
+ "epochs": 0.0,
17
+ "steps_per_epoch": 1,
18
+ "total_steps": 1000,
19
+ "warmup_steps": 10,
20
+ "warmup_epochs": -1.0,
21
+ "min_lr": 0.0,
22
+ "weight_decay": 0.1,
23
+ "output_weight_decay": -1.0,
24
+ "adamw_param_groups": "nanogpt",
25
+ "adam_beta1": 0.9,
26
+ "adam_beta2": 0.95,
27
+ "adam_eps": 1e-08,
28
+ "muon_impl": "legacy",
29
+ "muon_momentum": 0.95,
30
+ "muon_ns_steps": 5,
31
+ "muon_update_scale": 1.0,
32
+ "muon_nesterov": false,
33
+ "muon_width_scale": false,
34
+ "muon_grouping": "legacy_dim_ge_2",
35
+ "muon_param_count": 1965440,
36
+ "muon_adam_param_count": 8192,
37
+ "muon_param_names": [
38
+ "vocab_embed.embedding",
39
+ "sigma_map.net.0.weight",
40
+ "sigma_map.net.2.weight",
41
+ "blocks.0.attn_qkv.weight",
42
+ "blocks.0.attn_out.weight",
43
+ "blocks.0.mlp.0.weight",
44
+ "blocks.0.mlp.2.weight",
45
+ "blocks.0.adaLN_modulation.weight",
46
+ "blocks.1.attn_qkv.weight",
47
+ "blocks.1.attn_out.weight",
48
+ "blocks.1.mlp.0.weight",
49
+ "blocks.1.mlp.2.weight",
50
+ "blocks.1.adaLN_modulation.weight",
51
+ "blocks.2.attn_qkv.weight",
52
+ "blocks.2.attn_out.weight",
53
+ "blocks.2.mlp.0.weight",
54
+ "blocks.2.mlp.2.weight",
55
+ "blocks.2.adaLN_modulation.weight",
56
+ "output_layer.linear.weight",
57
+ "output_layer.adaLN_modulation.weight"
58
+ ],
59
+ "muon_adam_param_names": [
60
+ "sigma_map.net.0.bias",
61
+ "sigma_map.net.2.bias",
62
+ "blocks.0.norm1.weight",
63
+ "blocks.0.norm2.weight",
64
+ "blocks.0.mlp.0.bias",
65
+ "blocks.0.mlp.2.bias",
66
+ "blocks.0.adaLN_modulation.bias",
67
+ "blocks.1.norm1.weight",
68
+ "blocks.1.norm2.weight",
69
+ "blocks.1.mlp.0.bias",
70
+ "blocks.1.mlp.2.bias",
71
+ "blocks.1.adaLN_modulation.bias",
72
+ "blocks.2.norm1.weight",
73
+ "blocks.2.norm2.weight",
74
+ "blocks.2.mlp.0.bias",
75
+ "blocks.2.mlp.2.bias",
76
+ "blocks.2.adaLN_modulation.bias",
77
+ "output_layer.norm_final.weight",
78
+ "output_layer.adaLN_modulation.bias"
79
+ ],
80
+ "muon_effective_nesterov": false,
81
+ "muon_effective_width_scale": false,
82
+ "muon_effective_weight_decay": 0.1,
83
+ "muon_adam_fallback_nesterov": false,
84
+ "muon_adam_fallback_weight_decay": 0.1,
85
+ "ema_decay": 0.9999,
86
+ "ema_start_step": 0,
87
+ "model_type": "ddit",
88
+ "ddit_mlp_type": "gelu",
89
+ "elf_num_time_tokens": 4,
90
+ "elf_num_model_mode_tokens": 0,
91
+ "qk_norm": true,
92
+ "output_bias": false,
93
+ "output_init_std": -1.0,
94
+ "norm_type": "rmsnorm",
95
+ "target_loss": "hard_ce",
96
+ "linear_soft_target_power": 1.0,
97
+ "linear_soft_target_min_conf": 0.0,
98
+ "linear_soft_target_max_conf": 1.0,
99
+ "t_sampling_mode": "logit_normal",
100
+ "t_sampling_power": 1.0,
101
+ "t_sampling_eps": 0.0001,
102
+ "t_sampling_logit_mean": -1.5,
103
+ "t_sampling_logit_std": 0.8,
104
+ "dual_t": true,
105
+ "corrupt_t_mode": "same",
106
+ "corrupt_min_t": 0.0,
107
+ "corrupt_max_t": 1.0,
108
+ "prefix_block_prob": 0.0,
109
+ "prefix_block_len": 128,
110
+ "mask_ratio_floor_schedule": "none",
111
+ "dirichlet_endpoint_mode": "categorical_dual_t",
112
+ "dirichlet_semantic_t_mode": "same",
113
+ "dirichlet_semantic_t_value": 0.0,
114
+ "dirichlet_semantic_t_curve": "linear",
115
+ "dirichlet_semantic_t_power": 1.0,
116
+ "endpoint_sequence_random_prob_alpha": 0.0,
117
+ "categorical_wrong_from_full_vocab": true,
118
+ "categorical_wrong_from_batch_valid_tokens": false,
119
+ "categorical_wrong_basin_token_ids": "",
120
+ "categorical_wrong_basin_prob": 0.0,
121
+ "categorical_wrong_unigram_prob": 0.0,
122
+ "categorical_wrong_uniform_prob": 0.0,
123
+ "categorical_wrong_prob_floor": 0.5,
124
+ "categorical_wrong_corpus_unigram_path": "",
125
+ "categorical_wrong_corpus_unigram_alpha": 1.0,
126
+ "categorical_wrong_basin_shared_prob": 0.0,
127
+ "categorical_wrong_unigram_shared_prob": 0.0,
128
+ "mask_mixture_original_prob": 0.0,
129
+ "mask_mixture_lowk_prob": 0.0,
130
+ "mask_mixture_lowcorrupt_prob": 0.0,
131
+ "mask_mixture_block_prob": 0.0,
132
+ "mask_mixture_all_prob": 1.0,
133
+ "mask_mixture_lowk_clean_tokens": "0",
134
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
135
+ "mask_mixture_block_tokens": "64,128",
136
+ "simplex_bridge_sampler": "dirichlet",
137
+ "logistic_normal_sigma_min": 0.1,
138
+ "logistic_normal_sigma_max": 1.0,
139
+ "logistic_normal_tau_min": 1.0,
140
+ "logistic_normal_tau_max": 1.0,
141
+ "torch_compile": false,
142
+ "compile_mode": "max-autotune",
143
+ "state_format": "prob",
144
+ "meanflow_weight": 0.0,
145
+ "rollout_train_prob": 0.0,
146
+ "rollout_train_steps": 1,
147
+ "rollout_train_infer_steps": 64,
148
+ "rollout_train_temp": 1.45,
149
+ "rollout_train_max_gamma": 1.0,
150
+ "rollout_train_corrupt_only": true,
151
+ "rollout_train_samplewise": false,
152
+ "rollout_train_compute_always": false,
153
+ "rollout_train_sync_t": false,
154
+ "bridge_noise_init": "logistic_normal",
155
+ "noise_sigma": -1.0,
156
+ "allow_tf32": true,
157
+ "activation_checkpointing": false,
158
+ "activation_checkpoint_interval": 1,
159
+ "activation_checkpoint_scope": "block",
160
+ "ddp_static_graph": false,
161
+ "ddp_gradient_as_bucket_view": true,
162
+ "blocking_data_transfer": false,
163
+ "dataloader_prefetch_factor": 4,
164
+ "full_train_stats": false,
165
+ "tokenized_hf": false,
166
+ "tokenized_pad_token": "pad",
167
+ "elf_conditional_hf": false,
168
+ "record_pad_truncate": false,
169
+ "record_add_eos": false,
170
+ "record_add_special_tokens": false,
171
+ "record_pad_token": "pad",
172
+ "record_shuffle_buffer": 10000,
173
+ "wrap": true,
174
+ "wrap_mode": "stream",
175
+ "wrap_record_buffer_size": 200,
176
+ "owt_cached_chunks": true,
177
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len256_train8_compact_overfit",
178
+ "owt_chunk_cache_rebuild": false,
179
+ "owt_chunk_cache_write_batch": 4096,
180
+ "owt_exact_repeat_per_chunk": 64,
181
+ "online_chunk_shuffle": false,
182
+ "online_chunk_shuffle_buffer": 10000,
183
+ "openwebtext_split": "train_minus_100k",
184
+ "detokenizer": "auto",
185
+ "resolved_detokenizer": null,
186
+ "num_workers": 0,
187
+ "latest_every": 1000,
188
+ "resume_path": ""
189
+ }
190
+ step=100 epoch=100/1000 epoch_step=1/1 micro_steps=100 elapsed=4.4s lr=2.000000e-03 loss=6.7069 loss_recon=6.7069 loss_meanflow=0.0000 mean_model_t=0.2082 mean_corrupt_t=0.2082 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.0981 corrupt_frac=1.0000 acc_corrupt=0.0981 loss_corrupt=6.7069 wrong_frac=0.7939 init_acc_corrupt=0.1131 acc_corrupt_t_0p0_0p2=0.0485 corrupt_frac_t_0p0_0p2=0.5588 acc_corrupt_t_0p2_0p4=0.1336 corrupt_frac_t_0p2_0p4=0.3579 acc_corrupt_t_0p4_0p6=0.2726 corrupt_frac_t_0p4_0p6=0.0752 acc_corrupt_t_0p6_0p8=0.3268 corrupt_frac_t_0p6_0p8=0.0125 out_w_norm=1.0907 out_g_norm=1.0051 acc_corrupt_t_0p8_1p0=0.3047 corrupt_frac_t_0p8_1p0=0.0078 loss_all=6.4750 init_gold_top10=0.1878 init_gold_top100=0.4158
191
+ step=200 epoch=200/1000 epoch_step=1/1 micro_steps=200 elapsed=3.9s lr=2.000000e-03 loss=6.1094 loss_recon=6.1094 loss_meanflow=0.0000 mean_model_t=0.2081 mean_corrupt_t=0.2081 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1071 corrupt_frac=1.0000 acc_corrupt=0.1071 loss_corrupt=6.1094 wrong_frac=0.7947 init_acc_corrupt=0.1130 acc_corrupt_t_0p0_0p2=0.0547 corrupt_frac_t_0p0_0p2=0.5577 acc_corrupt_t_0p2_0p4=0.1482 corrupt_frac_t_0p2_0p4=0.3608 acc_corrupt_t_0p4_0p6=0.2811 corrupt_frac_t_0p4_0p6=0.0734 acc_corrupt_t_0p6_0p8=0.3063 corrupt_frac_t_0p6_0p8=0.0119 out_w_norm=3.2715 out_g_norm=1.4119 acc_corrupt_t_0p8_1p0=0.3311 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.8479 init_gold_top10=0.1846 init_gold_top100=0.4139
192
+ step=300 epoch=300/1000 epoch_step=1/1 micro_steps=300 elapsed=3.9s lr=2.000000e-03 loss=5.5537 loss_recon=5.5537 loss_meanflow=0.0000 mean_model_t=0.2117 mean_corrupt_t=0.2117 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1219 corrupt_frac=1.0000 acc_corrupt=0.1219 loss_corrupt=5.5537 wrong_frac=0.7910 init_acc_corrupt=0.1172 acc_corrupt_t_0p0_0p2=0.0591 corrupt_frac_t_0p0_0p2=0.5484 acc_corrupt_t_0p2_0p4=0.1675 corrupt_frac_t_0p2_0p4=0.3611 acc_corrupt_t_0p4_0p6=0.3164 corrupt_frac_t_0p4_0p6=0.0803 acc_corrupt_t_0p6_0p8=0.3501 corrupt_frac_t_0p6_0p8=0.0139 out_w_norm=5.1033 out_g_norm=0.7101 acc_corrupt_t_0p8_1p0=0.3574 corrupt_frac_t_0p8_1p0=0.0078 loss_all=5.2805 init_gold_top10=0.2029 init_gold_top100=0.4181
193
+ step=400 epoch=400/1000 epoch_step=1/1 micro_steps=400 elapsed=3.9s lr=2.000000e-03 loss=5.0275 loss_recon=5.0275 loss_meanflow=0.0000 mean_model_t=0.2073 mean_corrupt_t=0.2073 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1417 corrupt_frac=1.0000 acc_corrupt=0.1417 loss_corrupt=5.0275 wrong_frac=0.7950 init_acc_corrupt=0.1124 acc_corrupt_t_0p0_0p2=0.0639 corrupt_frac_t_0p0_0p2=0.5613 acc_corrupt_t_0p2_0p4=0.1992 corrupt_frac_t_0p2_0p4=0.3571 acc_corrupt_t_0p4_0p6=0.4175 corrupt_frac_t_0p4_0p6=0.0727 acc_corrupt_t_0p6_0p8=0.4973 corrupt_frac_t_0p6_0p8=0.0126 out_w_norm=6.7732 out_g_norm=0.4139 loss_all=4.6901 init_gold_top10=0.1988 init_gold_top100=0.4235
194
+ step=500 epoch=500/1000 epoch_step=1/1 micro_steps=500 elapsed=3.9s lr=2.000000e-03 loss=4.2623 loss_recon=4.2623 loss_meanflow=0.0000 mean_model_t=0.2097 mean_corrupt_t=0.2097 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.1820 corrupt_frac=1.0000 acc_corrupt=0.1820 loss_corrupt=4.2623 wrong_frac=0.7932 init_acc_corrupt=0.1147 acc_corrupt_t_0p0_0p2=0.0728 corrupt_frac_t_0p0_0p2=0.5540 acc_corrupt_t_0p2_0p4=0.2703 corrupt_frac_t_0p2_0p4=0.3605 acc_corrupt_t_0p4_0p6=0.5126 corrupt_frac_t_0p4_0p6=0.0745 out_w_norm=8.3053 out_g_norm=0.4603 acc_corrupt_t_0p6_0p8=0.5460 corrupt_frac_t_0p6_0p8=0.0139 acc_corrupt_t_0p8_1p0=0.5742 corrupt_frac_t_0p8_1p0=0.0078 loss_all=3.8533 init_gold_top10=0.2047 init_gold_top100=0.4149
195
+ step=600 epoch=600/1000 epoch_step=1/1 micro_steps=600 elapsed=3.9s lr=2.000000e-03 loss=3.4234 loss_recon=3.4234 loss_meanflow=0.0000 mean_model_t=0.2089 mean_corrupt_t=0.2089 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.2366 corrupt_frac=1.0000 acc_corrupt=0.2366 loss_corrupt=3.4234 wrong_frac=0.7934 init_acc_corrupt=0.1145 acc_corrupt_t_0p0_0p2=0.0941 corrupt_frac_t_0p0_0p2=0.5598 acc_corrupt_t_0p2_0p4=0.3704 corrupt_frac_t_0p2_0p4=0.3568 acc_corrupt_t_0p4_0p6=0.6189 corrupt_frac_t_0p4_0p6=0.0750 acc_corrupt_t_0p6_0p8=0.6388 corrupt_frac_t_0p6_0p8=0.0120 out_w_norm=9.4844 out_g_norm=0.5091 acc_corrupt_t_0p8_1p0=0.6230 corrupt_frac_t_0p8_1p0=0.0094 loss_all=3.1165 init_gold_top10=0.1868 init_gold_top100=0.4166
196
+ step=700 epoch=700/1000 epoch_step=1/1 micro_steps=700 elapsed=3.9s lr=2.000000e-03 loss=2.6372 loss_recon=2.6372 loss_meanflow=0.0000 mean_model_t=0.2095 mean_corrupt_t=0.2095 mean_loss_t_weight=1.0000 linear_soft_target_mean_conf=0.0000 prior_center_loss_beta=0.0000 rollout_train_applied=0.0000 grad_enabled_before_rollout=1.0000 grad_enabled_after_rollout=1.0000 logits_requires_grad=1.0000 raw_loss_requires_grad=1.0000 acc_all=0.3365 corrupt_frac=1.0000 acc_corrupt=0.3365 loss_corrupt=2.6372 wrong_frac=0.7932 init_acc_corrupt=0.1151 acc_corrupt_t_0p0_0p2=0.1393 corrupt_frac_t_0p0_0p2=0.5552 acc_corrupt_t_0p2_0p4=0.5322 corrupt_frac_t_0p2_0p4=0.3584 acc_corrupt_t_0p4_0p6=0.7902 corrupt_frac_t_0p4_0p6=0.0764 out_w_norm=10.1622 out_g_norm=0.6434 acc_corrupt_t_0p6_0p8=0.8139 corrupt_frac_t_0p6_0p8=0.0132 acc_corrupt_t_0p8_1p0=0.7083 corrupt_frac_t_0p8_1p0=0.0078 loss_all=2.2784 init_gold_top10=0.1924 init_gold_top100=0.4171
197
+ W0517 17:59:50.900000 252604 torch/distributed/elastic/agent/server/api.py:719] Received 15 death signal, shutting down workers
198
+ W0517 17:59:50.901000 252604 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 252608 closing signal SIGTERM
199
+ W0517 17:59:50.902000 252604 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 252609 closing signal SIGTERM
200
+ W0517 17:59:50.902000 252604 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 252610 closing signal SIGTERM
201
+ W0517 17:59:50.903000 252604 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 252611 closing signal SIGTERM
202
+ Traceback (most recent call last):
203
+ File "<frozen runpy>", line 198, in _run_module_as_main
204
+ File "<frozen runpy>", line 88, in _run_code
205
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 922, in <module>
206
+ main()
207
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
208
+ return f(*args, **kwargs)
209
+ ^^^^^^^^^^^^^^^^^^
210
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 918, in main
211
+ run(args)
212
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 909, in run
213
+ elastic_launch(
214
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 139, in __call__
215
+ return launch_agent(self._config, self._entrypoint, list(args))
216
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
217
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 261, in launch_agent
218
+ result = agent.run()
219
+ ^^^^^^^^^^^
220
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/metrics/api.py", line 137, in wrapper
221
+ result = f(*args, **kwargs)
222
+ ^^^^^^^^^^^^^^^^^^
223
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run
224
+ result = self._invoke_run(role)
225
+ ^^^^^^^^^^^^^^^^^^^^^^
226
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 870, in _invoke_run
227
+ time.sleep(monitor_interval)
228
+ File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/api.py", line 84, in _terminate_process_handler
229
+ raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval)
230
+ torch.distributed.elastic.multiprocessing.api.SignalException: Process 252604 got signal: 15
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step11000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_011000.pt
2
+ step=11000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁you', ',', '▁you', ',', '▁you', ',', '▁you', ',', ',', '▁you', ',', ',', '▁you', ',', '▁you', ',']
9
+ tail_tokens: ['▁person', ',', '▁man', ',', ',', '▁man', ',', '▁man', ',', '▁man', ',', '▁man', ',', '▁man', ',', '</s>']
10
+ you, you, you, you,, you,, you, you,, you, you,, you, you, you, you, you,, the people, you, you, you, you, you, you, you, you, you, you,, you, you, you,, you,, you, you, you, you, you, you, you,, you,, the people, the people, you, you, you,, you,, you, you, you, you, you, you,, you,, you,, the people, You, you, you, you, the people, you, you, you, you, the people, he,, you, you, you, he,, the people,, the people, he,, the people,, the people, you,, you, he,, he,, you, you, the people,, you, you, you, he, you, the people, the the people, you, the people, the people, the people, the people, the people, you,, a person, you,, the people, you, the people, you, You, you, you, you, the people, the people, he,, you, the people, And, you, the people,, he,, you, he,, you, you,, you, you, you, you, you,, you, you, the people, you, you, you, you,, you, You,, you, you,, you,, you, you,. You,, you,, you,, you, he,,, you, you, you, he,. You,, you, you,, you, he,, you,, you,, you, you,. You,,, he,,, you, You,, you, you, you,, you,, you,, you, you,, you,, those, you,, you, you, you, you, you,, you,, you,,,, you,, you, man, you, your, you,, you,he, you, a person. You,, you, you, you,, he,, man, you, you, you,, you,, man, you,, you, the people. You,, he,, you,, the people. You, you, you, man, you, you, he, you, Putin, you, man, you, the people, the people, man, You,, you, you, you,, you,, you, man, you,, you,, man, you, your,, man, you, you, you,, man, you, man, you, man, you,, you, man,, man, you, man,, you, man, man, you,, man, you,, you, man,, man, man, you, he,, you,, he,, you, the the people, he,, you, the people, the people. You,, the people, the people. You,, the people. You,, man, man,, man, man,, a person, he,, man, man,, a person, you,, man, man, man, man, man,, man, man,, man,, man,, man, man, man, man,, man,, man, man, man, man,, man,, man, man, man, man,, man,, man, man,, man, man,, man,, man,, man, man, man, he,, man, man, man,, a person, man,, man,, man, man,, man,, man, man, a person, he,, a person,, man, man, man, he,, man, man, man, man, man, man, man, man, man, man,, man, man, man, man, man, man,, man,, man, man, man,, man, a person, man,, man,, man, he,, man, man,, man,, man, man, man,, man,, man,, man, he,, man,, man,, man,, man,, man, man,, man, man, man, a person,, man, man,, man, man, man, man, man, he,, a person, man, man, a person, man,, man, man, man, man, man,</s>
11
+ ===== sample 1 =====
12
+ head_tokens: [',', '▁that', '▁you', '▁do', ',', '▁Sometimes', ',', '▁that', ',', '▁that', '▁you', '▁do', '.', '▁You', '▁said', ',']
13
+ tail_tokens: ['▁that', ',', '▁that', '▁you', '’', 're', ',', '▁the', '▁person', ',', '▁you', ',', '▁that', '▁you', '▁are', '</s>']
14
+ , that you do, Sometimes, that, that you do. You said, then, that you do that. And he said, he’s not okay, if that’s okay, then, he said that, he said, he said, he said, that you uncover, I know that he doesn’t excuse the things, he said that he’s not lying about it. That’s a thing, whatsoever, he has a doubt, his humility, the wicked thing. That’s all that’s about you, that’s about his negligence, his assumptions, all, his flair, his laughter, he, all, not the truth, that. He said that I said that he is articulate, what is a thing, the truth, what is, what, the truth, he’s the truth. And I’m not a thing, you, what, what, what, what, what he is, what, that is. It’s the purity, the forgiveness, it’s succession, the purity, quote, the truth, it’s not the thing, what, what the contradiction, what, what, the truth, the contradiction, what, the truth, what. It is not a thing, what, it’s a thing, what, the contradiction, what, what, the truth, what, what he, what, you do, what, that’s not the reason for you, not, to be punished. For me, it’s not the organizational thing, because he’s the person, he is not just a person of unity, you, not be a person, not not be a person, because he is a thing of unity. He said the person, not the person of the people, because that he is not just a person, but not, to you, you, you, he is, you speak, you, if you speak, he is, you, that he’s not the person, to you. He said the biblical thing, that he, to be, to you, you, to be, I do, he respect you. And he said you, I want to do that, you come to you, if you like that, he’s wicked to you, that, you say, you, you’re acting, do whatever, he, you, you come, if you are, you come here, if you don’t have a thing. He is sublime, that I know that he is imperfect, that truth, he’s imperfect. And if that’s what he is, the truth. It’s the forgiveness, the the novelty, the forgiveness. It’s not the forgiveness, the things which, the purity of you, you know. It’s the translator for that person. The ethical thing is that if the person is that person, there is a person with you, you know, that thing, there is a person who touches. For me, the ethical thing, for all, you? For me, he, you know, that, that he’s a holy person, you know, resignation, you know, that’s all about you. You, he, that, you, if he is trustworthy, the forgiveness of his lips, the person, that he is the thing, the person, he is you, the person, you of, the person, you are, that is the person, you, you, that you, to be, you, you, you, he, you, and you, the truth of you, he you, you, you, he, you, you, you lie, And that’s destiny, you. That, he is. And you know, you, that’s god, you, you. He is a thing, you, the reason, that he is the reason, you, you, the reason, he’s the reason, that’s his reason. It’s the reason that he is, that he’s, you, you, a holy person, that he’s not a person. It is, that he, you, you, you, he, you,, you,, you, you, you, you, you, you,, you, you,, you, you, you. That he, you, he lies, that he’s inherited. And you laughed, you, that truth, be a person, that is a person, you, that’s not the person. That is that, that you’re, the person, you, that you are</s>
15
+ ===== sample 2 =====
16
+ head_tokens: [',', '▁that', '▁humility', ',', '▁not', '▁the', '▁illnesses', '▁of', '▁the', '▁world', '.', '▁And', '▁yes', ',', '▁that', "'"]
17
+ tail_tokens: ['▁', 'a', '▁thing', ',', '▁', 'he', "'", 's', '▁', 'a', '▁thing', ',', '▁that', '▁everything', ',', '</s>']
18
+ , that humility, not the illnesses of the world. And yes, that's not the temperament, I'm circumstance, that's what I'm doing, that's not beforehand. That's what it's. That's what it's going to do with what you're doing, that's what you think, what you think. I have a world's world. It's just essays, you think that that's a thing, that you have a characteristic ways, and he said. That's going to be a thing. It's not true in the world. That's in the world. It's going to teach you, obviously that that's a philosophical thing, prosperity, that I'm not at the point, that I appreciate that. When you teach that, he said, he wasn't teach that thing, but he said. He said, I think you could be that. "I think that's what you do, that I appreciate that, I say to be that. I said. He said, the damn thing, that he said, that truth, that I'm not for a thing. Not the truth, the things that you have a person, that taught you that the truth of the world existed. "You have the world, personalities, I said that, he said. So, you have the world, that he's not about you, the world. He said that. I said, he said, that. You, you pray, he said, you. You have the world. It's a thing, for that, that if he didn't have to be that, that he didn't have, a thing. That, that's a thing, and that's integrity. I think that's hierarchy. And he said, you know that, you know, of forgiveness, humility, he's a person. He's just a person, he said, if he doesn't like that. But he just wants to be about that. He's not about the world, at that. "I'm not really, reflective. It's about forgiveness, that he's a person, and if he respected the world, at the point, he doesn't like, he can't be about the world, and that he can't be shining, the world that he's not just about his world, the world, and I said, you worries about the world. He said, he said, that he said, you know, you know, he said, you know, you, honored that. "He's an Americans. When you know, you'll appreciate you, you know, you, you, you, you, you, you, and there's a thing, you know that. It's not a thing, not you. You're negative that, anybody, you know, you know that, you there are two people, and there, that's anybody, anybody, you know, that he's optimistic that he said, that thing. I said that he liked that thing. Oh, there are a lot of people in there. "I said, that's a bad thing. It's a thing, a thing. And you have a bad thing, you if I'm about that, you know, that thing, and that thing, that's not that you have to talk about you. It's the things you have, you said, on the other side, that I'm not the thing, that he's the whole thing. I said that truth, you're bad. "Hey, you know, you know, I'm, I'm imperfect, I said that, I said the world, I'm not resilient. You know, you're the world. He said that, I said, he's just a lot of people, and he said, I said, he's the world, I said, the time, to be a person, to be that, now that there is going to be a person, a part of that, I'm the thing for the world. "It's kind of thing, that's an important thing, what I think, that's going to, you, somebody, he said, the world, that thing, and he said, that's the world, all that important to you. "Cocord, he's an issue, that's a thing, he's a thing, that everything,</s>
19
+ ===== sample 3 =====
20
+ head_tokens: [',', '▁lose', '▁the', '▁people', ',', '▁you', ',', '▁you', ',', '▁the', '▁bad', ',', '▁you', ',', '▁you', ',']
21
+ tail_tokens: ['▁to', '▁be', '▁', 'a', '▁person', ',', '▁you', ',', '▁that', '’', 's', '▁extensions', ',', '▁you', ',', '</s>']
22
+ , lose the people, you, you, the bad, you, you, you, he said, you the bad thing, he’s bad, you, OK you, the people, you do. Mean you to be bad, you, you’re bad, you, what you do, you be a person, bad in the world. It’s bad for you. It’s not a bad person. You, be a bad, you, a bad person, you, be a bad person, you are bad in the world. You laugh, you’re not a bad person. “It’s a bad person, that you be a bad, bad, bad. I don’t be that bad, that bad. You don’t bad, you, that bad, and you’re bad in the world of your life, you, a person, not a bad person. You said he, a lot of people, and if you don’t be bad, not you, the people, he said, to be, that thing. It’s an emotion. He’s a part of that. You said you, the people, you, the people, you, the people, the people, you, you, in the good world, that’s, that’s bad for you, not you. In the world, you know, that you know, that you are a lot, the people, you know, that you are a lot of people, and that you are a lot of people. You said that I said, you know, you are a bad person, that I said, you, you, that you it, that’s bad, you, to be bad, that I’m, to be the person, not that person, that he said, not a person. When you’re the person, I say you, you, that you know he, a bad person. And if you know, that person, he said, there’s a bad person about you. In the world, you, I’m not that bad person. You know, you, be a person, you know, that you, you know, you, you, you know, you, you. You know, you, you know, you he said, you, you? You, you’re bad, you’re one of you, if you’re bad, you aren’t in the world, you, you, you, you’re bad, you, you’re the person, not a bad person. You, you, the people, you, the people you believe, the people, the people you, you remembered, the people. It’s you, the people, you,, the person. The people, a person, you. You,, you. You, you, you, you. Like you, the people, you, that that’s the person you, you be. It’s you, you be a person, pushing that’s you. You be, who’s you for you, who’s you, the people, you, you, the people. If you be a person, you be, you be for you. You be for you, what you’re for you, you, you. If you, you be, you’re for you, you be. If you’re the person, you, you be, you, you, you, be you, you, you, a person, you’re gentleman, you’re not a person. Where you, you’re not a generic person, you, you, you, the person, be you, you, be a person, you, the person, be a person, be the person, you be, you know, the people. It’s the person, the people, you, the person, the person, you. You, the person, you, you, your person, the person, you, the person, the other people. The person, the the other person, the person, the person,, be a person. It’s a person, you, a person, you, you, you, a person, you, you’re not a person. You, the person, the person, you, you, the person, you, you, the person, the person, a person, you, be someone, you, you be the person. When you’re someone, a person, that’s destiny, that’s destiny. When you, be the person, that’s not to be the person, you everybody. It’s not to be a person, you, that’s extensions, you,</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['▁you', ',', '▁you', ',', '▁you', ',', ',', '▁you', ',', ',', '▁you', ',', ',', '▁you', ',', '▁you']
25
+ tail_tokens: [',', '▁what', ',', '▁you', ',', '▁part', ',', '▁you', ',', '▁you', ',', '▁uncle', ',', '▁you', ',', '</s>']
26
+ you, you, you,, you,, you,, you, you,, you, you, you, you, you, you,, you,, you, you, you, parliament, you, you, you, you,, be a person,, you, you, you, you,, you, you, he,, you, you,, you, you,, you, you,, you, you, you, you, you,, you, you, you, you, you,, you, you,, you, be a person, you, you, you, you, you, you,, you, you, you, you, you, you,, you, you, you,, you, you,, you, you,, you, you, you, you, you,, you,, you, you, you, you,, you,, you, you,, you, you, he, you, you,, you, you,, you,, you, you,, you, you, you, you,, you, you, you,, you, you, you, be a person,, you, a person, a person, a person, you,, which, you, you, You, the person, a person, you,, you, you, you, you, be the person, you, you, You,, you, you,, you, you, you, you,, you, you,, you,, he,, you, be the person, you, you,, you, you,, you, you,, you,, you, you, you,, you,, you, you, you,, you, you, you, you, you, you, you,, you, be a person, you, you, be a person, the person, you, you,, you, you, You, you, you, the person, be a person, a person,, you, you, you, you,, be the person, you, you, you, you,, you, you, you, you, you, you, you, you,, you, a person, you, be the person, you, you, you,, you, you, you, you,, you, you,, you, you, anybody, you, be a person, you, you, you, you, you, you, you,, he, the, you, you, you, you, you,, you,, you, lawsuit, you,, you,, you, you, you,, you, you, a man, you, you, you, you, you, man, you,, you,, you, you,, you,, you, you,, you, you,, you,, you, you, you, you, you, you,,, you, you, a man, you, you,, you, you,, you,, you, you, like, you, you, you,,, you, he, you, you, you, you, you, you, he,, you, you, you, you, you, You, you,, you, a man, you, you, you, you. It’s the person,, you. You, you, you,, you, you, you,, you, you, you, you, you, god, you, you, you, you, you, you, you. It’s a person. You, you, a person., you. You, you, you,, you,, you,, you, you,, you,, you, You, you, you,, you. You, man, you, a man, he,, man, you, you, you,, you, you, he,, man, you, you. You, you, a man. It’s,, you,, you. It’s a man, married,,, you, a man, you, you,, you,, a man, you, you. You,, you, you, you. You, you, you, you,, you,, you, you,, you, you, you, you, you, you, you,, he, you, you, you,, you,, you,, you,, he, you,, he, you,, you,, you,, you. You, you, he, you, You,, a man,, you,, you,, you, You, what, you, part, you, you, uncle, you,</s>
27
+ ===== sample 5 =====
28
+ head_tokens: [',', '▁the', '▁people', ',', '▁the', '▁people', ',', '▁the', '▁people', ',', '▁the', '▁the', '▁people', ',', '▁the', '▁people']
29
+ tail_tokens: [',', '▁I', '’', 'm', '▁secrets', ',', '▁I', '▁don', '’', 't', '▁think', '▁that', '▁people', ',', '▁you', '</s>']
30
+ , the people, the people, the people, the the people, the people, represent that. I don’t want to be achievements of the people, what that meant. You, you, what the people are, you, you, that’s about that, you, that’s what you answered, you excluded, and that’s what I think, I don’t want to be you’re going to be trustworthy. It’s going to be. That’s tough to you, and he opposed to you, I have indicative to the world. It’s not the people. All the people, all of you, you, all the people. That’s the world, that’s a world for you. Where you’re not the person, you, you to be. He’s be that. I’m not a person, you, if you, the people, you, the the people, you’re not going to be reflective for the people. I don’t like him, he’s people, you, the people, you care about you. And that’s what he’s fighting for the world. And that’s not just the people, you, I’m a person, you, you. But I’m not going to do that thing. I’m telling you, you are a person, you he, you, you do kindness. You do encouragement, you do forgiveness for you. I’m not going to be in the holy world, the people, you, you, you the people, you the people, you, the people, you, you, the people, the people, you the people, you the people, the people, you, you, you, you be,, you come to you, you, the people, you, the people you come to you to the world. I’m hearts, you, that’s the person, you. You, he remembered, you, you’re humble, you deserve, you. When you’re embarrassed, that’s because you’re going to be confidence to the people. You, you’re going to be the people. It’s not that you, you, you be, you, be, you be for you, be for you, you, be for you, be, be for you. It’s humility, you, that’s nostalgia. You’re interacting with that, be a person, you be the person, that you be a person, be you. You know, the person, you know what he supports, what’s forgiveness, you, be the person, you, the person, he, you, the person, he’s a person, you, that I’m a person, you, you, you, the person, you, you, you, the people, you, you, the people, you, the people, you, the people, you, the people, the people, you, the the people, you, you’re a person, the people, the the people, the people. That’s the people I’m not a person, I don’t have a person, the people, that I’m not a the person, that I’m not a person, that I’m not like, that person, that I’m not leadership, that’s all of you, I’m the people, you are precious, you are thinking, the people. It’s not the people. You know, you choose, you do. You remember, you don’t remember the people, you, the people, elected the people, that I’m a different person, you don’t have a person, you’re not the other people, you’re just going to have a different person, you don’t have a person, people, that you are, people, part of the people, you’re going to be just a person. And, what he said, that’s all the people, the people, the people, the people, you know, the people,, all the people, you know, the people, the people, the people, you know, if you’re the people, you, the people, you’re a part of that, you’re in the part of that. Just the people, you, the people, the people, you, the people, the people, the people, you, the people, you are, you, I don’t think you, the person. It’s about you, the people, I’m not a person, and if you are, you are, that’s not for you, I’m secrets, I don’t think that people, you</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁man', ',', '▁the', '▁man', ',', '▁the', '▁man', ',', '▁the', '▁man', ',', '▁the', '▁man', ',', '▁the', '▁man']
33
+ tail_tokens: ['▁the', '▁people', ',', '▁the', '▁people', ',', ',', '▁the', '▁people', ',', '▁the', '▁people', ',', '▁you', ',', '</s>']
34
+ man, the man, the man, the man, the man, the man, man, you, the, be, you, the man, you, you, the the man, you, the,man, you, the, the man, the man, you, the man, man, you, the man, man, the man, the man, you, the man, the, the man, the the man, man, the man, the man, you, the man,, the the man, the man, you, the the man, the man, the man, the man, you, you, the man, man, the man, the the man, the man, you, the man, you, the man, the man, you, the man, the man, the man, the man, the man, man, you, the man, the the man, you, you, the the man, you, you, you, you, man, you, the man, you, you, you, the, the man, the man, man, you, you, man, you, man, the man, you, the man, the man, you, you, you, man, you, you, you, the man, man, you, the man, you, the man, you, you, man, you, the, you, you, the man, man, you, you, you, you, you, you, the, you, man, you, man, the man, man, you, you, man, man, you, man, you, the man, you, you, man, man, you, you, the man, you, you, the, you, the man, man, you, the man, man, you, the man, the man, the man, the man, the man, the man, the man, man, man, you, the man, man, you, man, you, man, you, the man, you, man, you, you, the man, man, you,, you, man, you,, you,, say, be, you, you, real, the man, you, You,, you,, man, you, you,, man, you, man, you, the man, man, you, you, you, man, you, you, you,, you, you, you,, you, you, go, you, you,, you,, you,, you,, you,, you, you, you, you, Mohammed, you,, you,, you, you, you, you, see, you, lots, you,, you,, say, you, you,, you, be, you, you,, life, you, you, you, you,,, you,, you, Oh, you,, you, you,, you,, you,, man,, you, you, you,, man,,, you, you, man, you, you, you,the, you,, whatever, you, you,, you,, you, you, you,, man, you, you, you,,, man, you, You,,,, you,, you,, you,, you,, you, you,, you,, you, you,, you,,,, you,, you,,, you, you,, you, you, you, you, employee, you,,, you, you, you, you,,, you,, you,, you, you,, you,, records, you, rock, you,,, man, you,,, you, you,,,, you,, you, you, you,,, you,, used,,, you,,,, you, you,, you,, you,, make, you,,, you, you,, you,,, you,,, you,, you,,,,, you,, you,, you,,, you,, you,, you,,,, you, you, been, you,,, you,,, you,,, you,,, you, you,,,, you,, you,, you,,, you,,,, you, you, you, you,,,,, you,,, you,,,, you, you,,, you,,, you,, you,, you,, you,, you, the people, you,,, you,,, the people, you, you,,, you, you,, you, you, you,, you, you,,, you,, the people, you,,, the people, the people, the people,, the people, the people, you,</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁you', ',', '▁you', '.', '▁It', '▁was', '▁', 'a', '▁bad', '▁person', ',', '▁it', '▁wasn', '’', 't', '.']
37
+ tail_tokens: [',', ',', '▁you', ',', '▁you', '▁be', '▁', 'a', '▁person', ',', '▁', 'a', '▁great', '▁person', ',', '</s>']
38
+ you, you. It was a bad person, it wasn’t. If it’s a person, you deserve, you. You,he, you respect, it’s fulfillment. When you, at the same time, you be a person. And, it’s not a person, you, don’t you, that person. When you are, you, to bere. It is the person, you. You, be conditioned to be a person, the people you are, you, the people, you are, the person, who you be a person, the person you, be, the person people. It’s not you. And that you are, you, you, you, you are not, the person, the you are not, the people. If there is a person, you, you, people, you are, not you. That’s not a person, and that’s what you do, that’s what you do, lack not the people, you. When you, the people, you, to be, respect you, you, you, the people. The people, the people. You, if you, the people, you be you, you, to be, you, you, be a person, you. You,, you, you, not you. So you, you, you, you, you,, you, because you, to be, your other person, you, you, you, you, you, you, you, to be, the people. When you comes, you, you, to be a person, you, a person, you, the people, you, you, the people. You,, the people, you, you. It’s not dwelling to you, you, to be, you. When you want to be, say you, you come, be, you, you, appreciate you, you, you, you,, you, wanted you, you, you are. When you don’t care about that, that’s it to you. You, he, you recognized. He’s a person, a person. When you, the person, that’s not a person. He’s you, you, you, you, you, you, the person, you. He’s you, the person, the person, you, you,, the person, be you. And if you, be you, you, you, be honest with that. He, you,, you,, the person, the person, the person, the bad person. When, want to be a person, be you, be for you. Where you want to be the people, be you, to be you. The person, a person, a good person, be you. You have to be, it’s not the good person, be the person, it’s not a good person, be you, that’s disastrous to you. You have to be someone. You, you, you, you, you have a person, you, be you, be you, you, you, be you, you, you, a person, you, you, be you, you have a good person, you don’t believe. You, you, you have a good person, you, be you, you don’t believe you. When you/, you, be you, you, you, you, you. When you, to be, you, you,, you, you, be you, to be part of your life, you, you, you,, you, be you, you. age you, be you, you,, you, you, you,, you, be you, be you, you, you, people, you, you, you, be you, be you, you, you, you, you, you, be you. You, you,, you, you, you, you. You,, you, you, you, your people, you, the people, you,, you, what you, you, who you are, what you are. You, the people, you, you, be you. It you, your people, you, you, you, you, you, you,, you, you, be you, you be a good person. You, you, be you, you, you, you, be you, be you, you, you, be you, you, you, you, you be a person, you, you be a person, you, you, you, you, be you, you,, you, you, you,, you, you, you,, you, you, you, you, you, you,, you, you be a person, a great person,</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step113000_gpu_C1to64exp_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁or', ',', '▁or', '▁or', ',', '▁or', '▁or', '▁or', '▁or', '▁or', '▁or', '▁', ',', '▁or', ',', '▁or']
3
+ tail_tokens: ['▁or', '▁or', '▁T', '▁or', ',', '▁or', '▁or', '▁or', ',', '▁', ',', '▁or', ',', '▁', ',', '</s>']
4
+ or, or or, or or or or or or , or, or, or or , , ,, or, or, or or or, or, or or, or or or, or or, or,t or or or, or or or, or or or or or,t or , or or or or or or or , , or, or or or or or or, or or, or he or or, orhe or or, or, or, , or or or or or or or or or, or, or, or or or, or or, or, , or, or, or, or or or or, or or, or, or or or or, , , or, , or or or, or, or or F, or or or or, or, or, or, ort or, or, or, or , , or, or or, or or ,' or orhe or or or or, or, or or, or, , or or or or, or or, or or or, ort or or or , or or or or, or from from or or ** or or or or or or, from,he or orhehe or or, or or orhe or or, or, or or or or or , or, or, or or or or or or or, or or or or or or or, or , or, or, or or or or or or sometimes, or, or or or or or, or, or or or or, or, or, or or , or or or , or, or or or, or or no, or or or or or do, or or , or, or or, or or or,n,he, , or, of, or or or or or or or or or or or or or, he or or, ,t, or or or or or or, or or or or or or, or, or, or or, or or or or or or T or or or or or or or he , or or, , or or or , or or or or, ,he or or or or or or or or or or or, or or or , or, or or , or , or orehe or, or or or or or or or or , or or or or or or or, or or orhe or or or or, or, or or or, or or or or or or or , or, or of, or or from or, , ,he or or or or or take or or , ,t or, or , , or, or or or , or, or or, , or , or or or or or or or or or or or the or or or, ,t or or or,he or or or or or or or or or, or or or she or or , or or or or of or or or or or or, or or or ,he or, , or, orhe or or or or or he or, , or, or or or or, he or or or or or or or or or or or or or or or or or or or, or or or or , the or or or or, or or, or or or,the or or, or or or, , or or or, or or orhe or or or or, or or or or or, or, , or the or or or Tt or or 2 or pot, , T, , he or or or, or or or or , or, or or or, or or or, of, or or or or or or, or or, or or, , or, or, or or or, or, or or or or, or, , or, or, or or, or of or, , orhe or, or or or or or or or, of or, or, 3,h, or, or or of , or, or, or, or orhe or or or or or or , or or or or, or, or, or or, or or, or, he or or or or , or, or or or, or, or or or or or ,he, or , or, or, T or, or or or or or or or or, or or , or, or, or, or, or, or or, or or he or , or or or, or, , or or , or, or or go, or or or or or or , or, or or T or, or or or, , or, ,</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['-', 'he', '▁or', 'd', '▁or', 'x', '▁', 'x', '▁', 'x', '▁', 'x', '▁or', 'x', '▁or', 'd']
7
+ tail_tokens: ['▁or', 'x', '▁or', 'x', '▁', 'x', '▁', 'x', '▁', 'x', '▁or', 'he', '▁or', 'd', '▁or', '</s>']
8
+ -he ord orx x x x orx ord orx orx ord ord x orx x ord ord x ord ord orhe ord orda orhe ord orx x x orhe ord ord orx he ord ord orx ord x x orhe x orhe if orhe ordd ord ord ord he ord orx x x x x x he . he ord ord orhe ord orda he ord ord orhe ord ord ord ord ord ord orhe ordd ord ord ord orad ord x x orx ord ordd ord he ord or ord ord ord d if he ord x x x x x orhe ord d x ord x x ord orhe ord x x ord x .he ord ord ord ord ord he ord orx x x x x he ord orhed ord if he ord x xhe ord ord or ord ord ord x x he ordd ord ord ord or ord ord he ord ord ord ord ord x ord x orx x xd x x x x x whid x ord he ord orhed ord ord x , ord he ord he ord ord ord orhe x x d dd ord ord x ord ord ord orhed x ord ord orx d ord ord ord ord ord orhe ord ord xd orx he orx he ord x ord x x ord if he ord he x orx orhe or ord ord x ord x x ord he ord x x xd orx x ord x x x x x x ord ord orhe ordd ord ord ord ord x x orhe orhe ord x x orx x x if he ord x x ord ord x x ord x x ord ord ord x x ord x ord ord ord ord orx x ord ord x ord ord ord ord x x ord x ord x x x ord ord ord x x ordd orx orx x ord or ord orx x x ord orx orx orx orx xd he ord ord ord xd he ord x x he ord ord ord ord ord ord ad ord x x xhe ord orx ord ord x ordhe ord ord ord hed ord ord orx xd x x d if ord , ord xd orx orx he ord ord ord ord x x ord orxd orx x ord orx x xd ord ord ord x ordhe ord ord if orx orhe orhe or ord ord ord ord ord ord ord ord orx xhe ord ord orx dhe ord orx orxd ordhe ord ord he ord ord he ord orx orhe ord orhe ord ord orhe ord ord ord orhe orad ord orhe ord orx orx ord ordhe ord he ord orx x ord d x ord orhe ord ord ord orx he ord orx orhe orx orhe ord ord ordhe ord he orx he ord ord ord ord ord orhe ord orhe ord orxd orx x orx orx x x x orhe ord or</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁the', '▁', ',', 'a', 'he', ',', 'n', ',', '▁', 'he', 'he', ',', ',', 'he', 'he', '▁the']
11
+ tail_tokens: ['he', ',', 'he', 'm', 'he', ',', '▁"', 'he', ',', '▁', 'he', 'he', 'm', '▁the', 'he', '</s>']
12
+ the ,ahe,n, hehe,,hehe the a thehehmehe themn the hm, a,dahehhe-he he, thea he m mah a, , hem, theheh andhee the theadhe, thene, thedhehe the, ofm the the the cluehe, the,mdh the them, them hh,ht,nhe m and the the andndelihe,m,aa, , heheheam,dhe hea the, them,nhe mhe the, thehohe thehenm "e, formaheemn the, the, m,he, of the thehe he nhe, henm nexthe the, mdna, thea thehem, butd,,heahe ,hea the, n, he he thehemm,heddh,mhe m, heehhehenhe mh themhe them,mhet the m forhehm the,na,he the, the the the hen the,he the the he themhe buthe thetm hemhe thehemhe ahe, then,hena thea hehe themd he thetm mahe themtm m, the the hehem,thehe the, m he, them he,m thenhe thehe theam he,hedehe,n the, thehe-het the he hea the, thehe them theheehe the,ahe, thehe, the thehehehe he he,m the hehehe,mhe m,nhe,hemhe canma, he,mhethe the, and the themheehehem them,a,he,d the the buthe buthe and,hm, themnhetm he he, he the,mmdhehehe the the,hemhehed,he, he ,henhea forhe m he formhehe the buthea,hm the the, he the he thehe mhem he,mhe themhehe,he m theaheheh,mhe, them,he,hemahe ofmhehe m, he,hea thehehem..., he them theaahe he mhe thehe hehem the,the thehe thehe them, them of the the,, them, hehem, whilehe the theahehe he,mhehe thehd themhem thea, mhe,mhedhehehe buthet, he...he thehe them,nhenmhehetm,hed, anhe canhe on the thed,hea he thehem the andmahehhetmnm, thehe henm, thehehemaea he theheehe thehe butheamthe</s>he the,m,nathe them ofahenahemhe the them a thehe he buthe the thetm he the, he,hem he thehe,he, thehehem,hemthehe but thehe, thehehehe thehe thehe he, the,hehe, he theheaheehemhe, hehe, hehe the The Amhehe, heahe ma, thehe,m the the, them, the the the he but hemhe, the,hem the thehe the thea, he nexthe the the thehe thed thehe,en thehe the,ehe,a, thehe,m, them, thehe hehe thema, thehe thehe,hehe he,hem,m hehehe,m, them,hemhe,na he, thehemhe mhehe,,the,hemhe, "he, hehem thehe</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['he', 'he', '▁the', 'he', '▁', 'he', 'he', 'he', 'he', '▁', 'he', 'he', 'he', 'he', 'he', 'he']
15
+ tail_tokens: ['he', ',', 't', 'he', 'he', '▁', 'he', ',', 'he', 'he', 't', 'he', '▁the', 'he', 'he', '</s>']
16
+ hehe thehe hehehehe hehehehehehe he "he heahe thehehehehe, dhehehehed hehehehehehehehehehe he headhehehe–hehehedhehe thehehe thehe dheheheadheheheheheadhe thehehehehe thehehehe the dhe ahe alsohehehehe thehehed–hehe heahe he–he heahehemheahehehehe–dhe headheheheahe he thehe head he hehed he dhe thehe thehehehe hehehe thedhe ahehehe dheahehehehehedhehehe “he head hehehehe thedheheahe thehehe thehe blend the hehe thehe dhe he thehe thehe hehehehe the dhehehe he thehe thehehedhehehed hehehehe hehehe hehehe he “heheheadhehehehedhe dhe hehehe he thedheadhehe, he hehe the thehehe hehedhe hehehe hehehe headheadhehedhe hehe, hehedhehe–he thehe he hehe the–hetdhehedheadhe dhehehe theheahe he hehehe thehe he thed heahe hehehehehehe hehehehehehehead–hehehe thehehehe dheheahehehehehe hehehehehea thea,hehe thehehehe thehe thehe thehe thehe thehehehe dheahe he theheheheadhe he heaheahe heahe thehehehe hehehe hehehe heheheadheheheahehehehed he he dhehehe alsohehedhe headheheheheahehehehehehedheheheadhehehehehe the he he dhehed thehehe he heheheheheahethe he hehehe hethe hehehe hehehehehe dhehehehehehehe he thed hephehehethehe, hehehe heheheheheahe thehe he hehedhehehehehehehehehedhehehehehe thehe thehe thehe theheeverhethe he dhe he thedhe hehedhe thedhehe hehehehehehehe hehedhehehe he ahehe thehehedhe hehehehedhe he thehe hehehehetadhehehe dhe he hehehehehedhehehehe thehead , theheahehedhehehehedhehe, thedhehe thehehe hehehehehehehehehehehehedhe hehehe–he thehehedhehehe he–hehehehehehe,hedhe he hehehe heahehe–he thehenhehehehe, hehe hehehehehehehehehe, hehehe theahehehehe for,heheahehehehehehehethehehe the thehehehehe,hehe...hehe,hehea he, thehemhehe he,hehehehehehehe,heheahehehe,hehehehe, heahehehem hehehehehe thehe,aheahe he,hehe thehehehe,hehehe,heheahe hehe,hehe thehehehehehehe,hehehe, thehe theheahehehe hehehehe thehehehehe thehe,hehe ,hehe,hehehehe,thehe he,hehethe thehehe</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['he', '▁or', '▁', 'he', '▁or', 'he', 'he', '▁or', '▁', 'he', '▁', 'he', 'he', '▁or', '▁or', ',']
19
+ tail_tokens: ['▁or', 't', 'he', 'he', 'he', 'n', 'he', 'he', '▁or', '▁or', '▁or', '▁', '▁the', '▁', 'he', '</s>']
20
+ he or he orhehe or he hehe or or, or orhe, orthehe from orhehe or orhe or or or hehe or he or the or orhe orhe each orhehe or or, orthe orhe or or orhehe or or he he, or orhe or thehe he or or orhe or or, he he,hehehe or,he orhe or the orhe or he or,a or he orhe or or he or hehe or orhe orhehehe or orhe or,, orhe orhe orhe or orhe or orhe or orhehe or he hehe the or or or or the orhehe or or thehehe or hehe or, or he hehe or or or or hehe or thehehe or hehe or orhehe or he orhe orhehe, or hehe orhe orhe or or or hehe or, orhe, or or he, or or hehe the or orhe orhe or or orhehehe or or or thehe or or he thehe or or he, or hehe or or he he or or,, or he thed or or hehe or or,hehe orhehe or or or he or orhe or hethe,he or he or he or,hehe or or orhehe or or or hethe he or or or or or orhehehe or or hehehe or or, orhehe or or or or hehehe or or or hehe or hehehe or or or from or he orhehe or,he ort or he or or or he he the or or hehe or orthehe or, or or or or he the orhe the or he orhe or or or hehe or he or or or hehe or or orhehehe the or or or orhehe or or orhe hehe or or ,hehe or orhehehe orhe the or orhe orhe or or orhehehe or or orhe orhe or,hehe or or orhe or or,he orAnimhe or or he hehe or orhe ordhe or or the or orhe the or The or hehehe the or or or, he the or he or he orhehehehe or or orhe the or or orhe or or, he or head or or he or or he or or,hehe or orhe or hehe or, or or orhe or orhe or or or he hehe or,he orhehe or, the or or he or orhehehe or or orhe or or hehe or or or or or or hehehe or or or orhe,a he the or or ofhe the or or or orhe, or or he or he he or or or a,he the or or, he or, or or hehe or hehe or or or or he or, thehe, he the hehe the or or or, thed or or orthe thehehe or or,hehe or or a he orhe or or hehe orhe or or or the or or hehe or or or he he or or hehe or, or or hehe the, t or or or or he hehe or, or,hehe or or or or or or, hehe or hehe or or orhe orhe orhe or he or or orhe or he, hehe or or or or or orhe or or or or he,he or he the hehe, hehe or ore orhehe or or or or ofhe or 10 or , thehehehe or or or, or or hehehea or or or hehe the the orhe or the,he or, hehe or,hehehe or or or hehehe or or or hehe, , or he he or or or he or or or, he, the or, the he, or or or or hehehehe or he,hehe or or from or thehe,hehe the orhe the,hehe or or the or or hehe or, orthehehenhehe or or or the he</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁but', 'he', 'a', '▁but', '▁', 'he', '▁but', '▁', 'he', 'a', '▁but', '▁also', 'he', '▁but', 'he', 'a']
23
+ tail_tokens: ['he', 'he', '▁but', 'he', '▁', 'he', '▁', ',', 't', 'he', 'he', '▁', 'he', 'he', ',', '</s>']
24
+ buthea but he but hea but alsohe buthea but alsohe but also but not but buthe but also but alsohe’ but buta also but alsohe’he but he he but, but alsohea but he but hea but he but he’ but alsohe also’ but alsohe but also buthea but buta he the but alsohea but also but he’he alsohe but but he but but he buthe but also hea but a but also buthe alsoheahe but the but but not but , but he but but he he but nothea he but he is buta alsohe but alsohe but not but but hea but but he but also he buthe but but hehe but alsohe but alsohe he buthe’ but he but but buthea but nothea , but he’ but alsohe but but but buthe but but but nothe alsohe didn but nothea but he the, but, but he he but he but but he buthea but buta also but heahe but buthe but not he but buthe but but not but but but, the he alsohe’ but he buthe but but heahe alsoa also but buthehehe but also alsoahe, he’ but at, but alsohe he but but not ahe, but he but, forhe he but didn not a buthe not hehe, but, if hea but also but didn,he thehe hea but not but but nothe but, buthe he does not but but but he he but buthe but alsohe but but alsohe buthe also but buthehe’hehea he buthehe but but not but alsoheahe but alsoahe, heahehe buta he also also hehehe’hehe didnhe the but, he he but not buta buthehe heahe but but didn buthe nothe’heheathehe but he he but nothe buta hehe nothe but but not ahe, but, if buthea but he buthe but also buthe alsohe but not ahe but he buthe does but but but but also, if buthe but not buta hethe but but alsohe he but not but hehehehe but, if he does he the, but but, hehe but buthehe buthe forhe but alsohe but buthe he buthe does not hehehe’ but he but but alsohea butthehe,he but ifthe but but not ahe but alsohe but nothe but , buthe but but but but alsohe but not but buta nothehe but nothehet but he’ but a he but, but ishehe , but but hehehea buthe a alsohe also but not but but buthe but hehehehe not, but, but but nothe but alsohe but not he he’ but not but but buthe but not alsoa he but alsohe butahe, alsohe but not nothe but but buthehehe he but but nothehe but buthehe buthehehe but, but alsohe but ishe’he buta alsohe but alsohe’he butahe but buthehe but but but but alsohe buthe alsohe but buthe ishe not but buthe but but also but not hethe but not is he but not a but also but alsohe but not buthe also but but alsohe but not he but but also but buthe’he but buthe but not alsoahe but but ishe but hehe but but butthe buthe but but not buta but but hehe’ but alsohe but not but didn’hehe behe but but but buthe but also buta also, but alsohe, hehehe but but but buthe but but not he but but also buthehe buthehehe ishehe but alsohe but also not butahe, but not but alsoa but ishe, also’ nothe but but,hehe but’he not a he but not but but but not he buthe but but hehe, but not but but but but also buthehe buthe he ,thehe hehe,</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['he', 'he', 'he', 'he', '▁', 'he', 'he', 'he', 'he', 'd', 'he', 'd', 'he', 'he', 'd', 'he']
27
+ tail_tokens: ['he', 'he', 'he', 't', 'he', 'he', 'he', 'd', 'he', '▁', 'he', 'he', 'he', 'a', '▁the', '</s>']
28
+ hehehehe hehehehedhedhehedhehehehehehehehe he thehehehehehe hehehehedhehehehehehehehedhehehehehehehehehehehehehehedhe he he hehedhehehe he he dhedhehehehehehehehehehehethehehehehe dhehehe dhehehehehehehehehehehehehehehehehehehehehe dhe thehehehe hehehe hehehehehe hehehe dheheheheheshellhehe dhehehehehehedhehehehehe heheheahe theheheheheadhehedhe dhehehe hehedhehehe hehehe thehedhehehehehe asheheheahehehehe hehehehehehehehehehetheheheheheheheheheheheheheheheahe he dhehehehe hehedhe headhehehehehe hehehe hehed hehehehedhehehehehehehe dhehehehehehehehehehehehehehehehehehehehe hehedhe hehehehehe dhehehe hedhehedhehehehehehehe hehedhehehehehehe dhehehe dhehehehehehehehehehehehehehehehehehehehehehehehedhehehe hehehehehedhehehe thehehehehehehehehe he hehehe dhe buthehehe hehehe hehehehehehe thehe hehehe thehehehehedhehehehethe hehe buthehehehehehehehehedhe dhehehe thehehethehe thedhehehehehehetheheheheheheheheahehehehehehehe hehethehehe thedhehehehedhehe hehehe he buthehedhehehe hehehe thehehedhehehehethe dhehe hehehehehehehe thehehehehehehehehehedhe hehehethehehehehethehehe buthehehe hehehehethehehehehehehedhehehehehehehehehehedhehedhe heheheheheahehedhe hehehehed hehedhe dheahehehe hehehehehedhe thehehehe butdhe ahehe thehe buthehehehehehehehehe buthehedhehehe butdhe buthehehehehehehehethe hehehe butheahehehehe thehehe hehehedhehehe hehehe hehehe buthehehehehe thehe buthehethehethethehehe dhehehehehedhe hehehehehehehehehedhehehehehedthehedhe hehehehehehe thehehehe thehethehedhehe heheheadheheheheahehedhehehethehehe butdhehehehedhehedhe hehehehehetdhe buthehehehehehehehedheahehehehehehedhehehehedhe he butdhehehehehehe hehehehehehehehehehehehedhehe heheahehehehe thehehehehedhehedhe hehedhehe thehehehehehehehehedhehehe headhehehehedhehehehehehedhe hehedhe buthehehehehehedhehedhe thehe butheheheheheda hehehedhehedhehehehe thehe thehehe thehehehed hehehehehehedhehe thehehe buthehehehehehethehehedhe hehehea the</s>
29
+ ===== sample 7 =====
30
+ head_tokens: [',', '▁', 'th', 'a', '▁per', '▁per', ',', '▁', 'th', 'a', 'a', '▁or', '▁per', ',', '▁', 'th']
31
+ tail_tokens: ['▁or', 'a', ',', '▁the', '▁', '▁per', 'a', ',', '▁the', '▁', '▁or', 'a', 'a', '▁', 'a', '</s>']
32
+ , tha per per, thaa or per, thaa ore, thaa per per or, saa per per or, peraa per per, thaa per per per per, tha, oraa per per, peraa per per, ora per per sa per per per, peraa per oraa per at, peraa per per, thaa per or at, saa per per ora per ora or at or, or oraa thaa per or, the pera or per oraa ora, ora ora, thaa ora, the saa per ora, the oraa ora, per oraa ora, per oraa ora, the oraa ora, per oraa ora per or, pera ora, oraa peraa ora, per oraa ora ora, the or peraa per per, as pera or per, the oraa per at, the tha per or, oraa ora, the pera per, as pera per, per per, the thaa per per or ora per, or, per pera per at, thaa ora, per oraa ora, the tha per per, thaa ora ora, the sa ora, thaa per or per, the oraa ora per, the per ora per, tha per per, the thaa or per per at, the thaa ora, thaa per pera, the thaa per ora or per pera per per, tha per, the ora, the thaa pera or the thaa ora, oraa ora per the sa per, ora per per, tha per, sa ora or or ora per, ora pera per ora or pera ora, ora, per oraa ora, the peraa ora or the ora per per per per ora or the thaa ora, per the tha pera, pera ora, thaa per ora per, per pera per per, per ora per pera per per, the pera ora, ora ora, thaa ora ora, thaa ora per, the thaa ora, the tha per per ora, the oraa per, peraa ora, ora per, oraa ora, the thaa per, thaa ora or the oraa ora, the tha or per or the tha per or per peraa ora, the ora ora, the tha per ora per per pera per the tha ora ora or the tha pera oraa ora ora or the ora ora, per per ora per, the thaa, ora, or the tha ora per oraa ora or the ora ora, the sa ora per pera or or sa ora or thaa ora per, per per oraa ora ora, or oraa per ora, the saa or ora or the oraa per, peraa or per mea ora ora ora, the thaa per, the thaa or ora per, the oraa ora ora, the peraa ora ora or the peraa or ora, the thaa ora or the ora or per, the oraa ora, the saa ora per, the peraa ora ora, the thaa or ora, the peraa ora, the oraa ora, the oraa, the thaa or ora, or peraa or ora per, or pera ora ora pera the ha ora, the pera, the oraa a</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step12000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_012000.pt
2
+ step=12000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁of', '▁his', '▁life', ',', '▁and', ',', '▁the', '▁day', ',', '▁', 'he', '▁is', '▁his', '▁own', '▁life', '▁of']
9
+ tail_tokens: ['▁this', ',', '▁I', '’', 'm', '▁still', '▁in', '▁', 'a', '▁picture', '▁of', '▁life', ',', '▁but', '▁I', '</s>']
10
+ of his life, and, the day, he is his own life of the world, all of his life, and the existence of life, of his life and life. The way I live in, I was looking at him, he’s living to be a man, of the life of the world, of the world, and all this is his life, a man, all of his life. When you are living at that moment of the world, this is the life, of the world and the world, who will be an Aurora in the world, and the world of the world, and there is a world that is a part of the world, and that is, to all of the world, you are not at the moment of the new person of his life. The world, and the life of the world, the world, the world of life, and the world, and the manifestation of all of the world. It is the world, and the world, he is part of the world and, the the world, and a world, the world, a new world, and the new world. The world that I live in life, that this world, I despair that I regards, is not at the moment of this. It’s going on, and that he will be the man, that is truth for you, and that he is going on, he is an important ending for you, and that there is part of the world, and the world, that you are experiencing the world, but the idea of people, you are the story of him, his life, and the world, you are the story of the world. That brings you to you, pertain to the story of the world, and you all know that the world is a part of the world, you appreciate the life of life, and the story, you are going to appreciate the story. This is the moment of life I will be the world, and the world, that I’m going to appreciate, this will not be the realization for all that will be the certainty of life, the holy part of life, I will be. It will be the awakening to the people of the world. It is the purpose of the world of you. The existence of a man is a picture of the world who don’t appreciate for the world, if you are, that are the story of you, he is the world, that he is the story, he is, as a the man, that this is not the story, but learned the world. In the world, I’m telling you, the resurrections of this. It is a man in the world to his story and the the world. It, it is for him, that in the world that he has explored, it’s not his life, this is in the world, but the existence that he symbolizes the the existence of a life, and I will know that he is, and then this is, this is not his humility of the story and resurrection, what he is, and all of his precious with his life. This is the story, the world, everything with his life, and the world, the world that is on the story. In truth, I believe that the story of you, the world, that I will not represent. The truth of this is, is, you are not the world, what you’re not. This is a beautiful moment, the world that you chose, to you, and the story of the world, all the part of us. The truth is, to be, the story, the moment that you are, the story of you, the a truth of you, and a person, the story that you are, people of the world, and that the world is not such a person, that I are not a person with a part of the world, the world, of the world that I have on for, everything and this is his life, but it’s not a story, and he is not here as a story. I would say, this is a story, that this will be all the world, and that the part of the world is there. I said for him, is a story, and that I have been different for the world and is going to be a person, for the world, for the world, for the world, to be the part of his life, for the world for the the world, and for the world, and for a world of the world that I see in the world. He loves the world, to live in a world where his purpose of the world is a testament to his life. I would be stuck at the moment of his own life. I choose, but I don’t want to know this, I’m still in a picture of life, but I</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['▁world', '▁that', '▁is', '▁touched', '▁the', '▁world', ',', '▁', 'carved', '▁on', '▁obviously', '▁', 'a', '▁formidable', '▁consciousness', '▁to']
13
+ tail_tokens: ['▁people', '▁of', '▁the', '▁world', '▁world', ',', '▁and', '▁the', '▁people', ',', '▁that', '▁you', '▁are', '▁the', '▁people', '</s>']
14
+ world that is touched the world, carved on obviously a formidable consciousness to which the world walks. The world has taught the world to the world that he is truly a life person, all that he is to the world, a man, to the world, to the people of life, and the people of the world and the people of the world, what is to be, to the world and what he is, to the world. It is that the world, of the world, is not a man. It is to be the people of a person of the world. The world is that I have to be the people of the world that has witnessed on and touched the world world. The world is not that it will be for the world world, of the world, not a person of the world, of the world for, to be for all the people of the world, the people of the world, and the people of the world, not by a world, then he is not world. He is a person of life, not a man, not society of the people of the world, and the world is the people of the world world. In the world world, he is, to be burning the people of the world and be on everything, the world, whatever the world world, the people the world will be, and the people of life. For others the world is for the world, he is for that, by others, he is for the people of the world world, the people of the world that are telling a person of the world. In the world is not. It is the people of a world, the world destiny of the world, to be that a person of the world. The people of the world, he is the people of the world world, the world is by the world and expectation that he is a person who has a world and celebrated by the people of the world, but not situated by the people of a world, who is by the people of which he is by the people of the people of the world, the people. He is not a person a person, not generosity to the people, but the people of the people of which he is world. In a person of the world, the world, the people of life is a person, the person that he is the other people of the world, a person of the world and that he is, to be a person of the world, and what the world, and what the people of the world, he is, who is supposedly the person of the world, that he is not in the people of life, he is in the world, respect to the people of the people of the world. I do not refer to the people of the world, and judgement that world is necessarily the world, not a world. In the person of life, he is liabilities, by a person of the world world. He is the great people of the world, his world, his world, for people of the world world, his life, a world, for the world world, the world world, for all the people of the world of the world, and the world to comprehend that he is in a world, of his life, the world doesn't live comes in a world. It's true, that the people of the world, that he is that this is not what is realise to the world. It's imperfections the world, that he is himself the world, and doesn't mention the world, he assures the world world of a world, and that in the world that he is ought that he is healed, he is the people of the world that he is, that he is the people of the world and that he is the people, that he is people, he is the a world, the people of the world, that is the person, the the people of the world, that if he is the the people of the world, he is the the most deed people of the world. means the people of the world, the people of the world and the people of the world that are people of the world. It is the the people of the world, definitely a world. It is true that he is the people of the world that he is the the people of a world and the the the people of the world, the the the people of the world, the people of the world. It is true that that the people, who is not noble, the people that he is not paradise, who is not that of the world world, that you are the people of the world, and the people of the world are the the people of the world world, and the people, that you are the people</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['▁for', '▁me', '.', '▁I', '▁have', '▁won', '▁20', '▁years', '▁for', '▁years', '.', '▁I', '▁know', '▁that', '▁I', '▁have']
17
+ tail_tokens: ['.', "'", '▁I', '▁said', ',', '▁I', "'", 'd', '▁been', '▁to', '▁be', '▁myself', '.', '▁I', '▁had', '</s>']
18
+ for me. I have won 20 years for years. I know that I have nowhere and I know that I've worked as a man. I was one of the things I have in the world. I was being a person. It's not for him to upset, I know, I'm here. I'm going to be the person I don't have a job here. It isn't here. 'I don't know what I do, that I get here, I'm just not here, if you have to be here to be in the same way that I live in the world. I'm looking for a good time, and if you are here. The days, I have four months at the time. I'm looking at the time I'm not here here. I was a good person, but he didn't want to come at that. I was at the time this year. I have no existence with me. It's been all the years, the time, but I'm here. I was being here. I had a lot of work, then I was all the way, and I made it. I was a lot, the lot of the world. I was at a time. I had a lot of good and humility. I was not here. I was there. I was, I was here. I think I'm here, I'm going to follow me. So he struggles. I'm here, but I don't know. I look at me and I did not have the good day at the time. I was very good, he was good. So I would say, 'I have a good day. 'I get the life of a year, the good day, I go the day, the life, I did a good day. I know the world. Not that I don't know you. I've had the world. I've had a precedent for me, to respect him. 'I was not here with this. The world, he was good. I have had to be here. I had to be, but I would come to him if I'm a person a little bit. I said I was a man, all the time, and I know he was in the day, the world. I had the story. I said, I don't have to be. When I was someone, I know, I know, I was not a person. It's the story, the day,. The day, the day, I always remembered, I was going to be. I said I was going to be in a great world. I did. I went to be, I was one of the most vivid things in the worlds. I came to be, the world. I came to be, the world. I was great. I came to be in the world. It's great. It's great. I came to the world. I made the idea of the world, and appreciate the importance of my life, because I was kind of person.''I know, I was the world, my future. I wanted the world, I was not going to be here, because I had to come back to the world for me and what he wanted to be like. But he was the father of a father, an outsider. I had to be a outsider. I had a father, he and I was going to be one of the world. I wanted for him to be, with a name. I had to be tough a lot. I was tough about him, the person you want me for me, with a great life and a friend. I saw you at the time. I had great day and I had a great day. It's just the world. I had a lot about it. I don't have the lesson I going on. I'm a man, he, I've had. I was here. I wanted, he was at you. I was here for a lot, for a moment, and I'd be in a moment. I had the role that I had to be, but I have to be. I going to be a man. I went to the world, I and I said that I was focusing on the part. I was at you, it's part of the world. So I stand for you. I stand for you, I think I did the best for you. I'm proud. I did the best for you, not for you.' ''I respect for you. I told him, 'I deserve you.' I said, I'd been to be myself. I had</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁his', '▁story', ',', '▁the', '▁significance', '▁of', '▁his', '▁story', ',', '▁one', '▁of', '▁the', '▁significance', '▁of', '▁the', '▁story']
21
+ tail_tokens: ['▁I', '▁had', '▁', 'a', '▁story', ',', '▁though', ',', '▁for', '▁', 'a', '▁story', ',', '▁the', '▁story', '</s>']
22
+ his story, the significance of his story, one of the significance of the story. The story, at the end of his life and a loss of story, the story, his story and his story. After a few years at work, and after he was there in the story, I had changed the story and witnessed for the story in the early days of the story. I felt he wanted to be a part of that in his life. The story on the story, that had reinforced for the day I was telling, and that he'd been a part of his life, and had a long time. I came to the story that he provided truth. I came to his story, disposition and humility. I wanted the story, and I came to exist, I was a dream, and I was always in the beginning of the story in the Oscars. In the story, he really had the story, and I wanted to meet tastes and humility in the next few days, and if I had the great pathways for a lot at the time, I had had the story of the story, to describe the story, but he understood the story I wanted at the same time, his story, that that was the story of the story I had understood, and the story of that I wanted in the Oscars. It tells me that I had achieved the story I had accomplished for the story, which I did, and the story in the story that he had for the story, ultimately. I was one of the main themes of others that I was for and I was a part. It was that I would be going for a day on the story, story, and life. I had a great moment in the story, and I had rejected the story for something, but I did not settle for it. I witnessed the story, and promised a part of the story, though. I had wanted with the day, I didn’t live, with a lot, with a great ending. It was the time that for the course of the day, beginning on life, and in the life of life. In the course of life, when I had been on a large part of the story, making a lup to the story, and, on the story, that’s the lup. It had been the story. I had the story, all the story, and all part of life on the story, that I was supposed that it was in the story, that it was the day with people. The story and the day of the day I’d been in a story, the day. I was doing, not a good thing. The day, by the day, I had said that the day, the part of the day, the the day, the day. I wanted the day of the day and the day, with a great ending, he said, the part of the story was that all the day, of the day, and all, the day, that it brought to the day that you really needed to do. It’s the story. You don’t look at that and the story, of course, the time to be for the true part of the story. I realized that I had really appreciated the story. The time was a lot and I felt story, but the story, part of the story is the part of the story, but it’s the true part of the story that I’m going to have enjoyed the story. I enjoyed the story, the story for a lot and, and over the years I have achieved, I was definitely a part of the story I had on. I had been a part of story, and they had been on a great story, and the story was a part of the story story, and I had a part of the story story. It was a great story on the story and the story. The story I witnessed, and I came to be appreciated, and I was satisfied that it wasn’t about to be. It was the story story, and it was the part of it. It’s the part of the story story. The story is, and the story, the story is, the story. thing. I had a story, a part of story, a story, and the story, a story, and that to be a part of a story, and I came back to a story, and that I was, on the other, the part of the story that the story really witnessed and achieved, in the story story, a great story. I remember, when I had a story to say, the story had to go. In the middle of the story of story, it’s the story story that I had to do. It’s what I had a story, though, for a story, the story</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['▁story', '▁is', '▁one', '▁of', '▁the', '▁pieces', '▁of', '▁this', '▁story', ',', '▁and', '▁the', '▁story', '’', 's', '▁emotion']
25
+ tail_tokens: ['▁story', ',', '▁and', '▁I', '▁will', '▁not', '▁finish', '▁my', '▁life', '.', '▁I', '’', 'd', '▁be', '▁going', '</s>']
26
+ story is one of the pieces of this story, and the story’s emotion it’s unbelievable. But he’s a story, the foresthetic, and that this is the story of his story, and he’s got a lot of appreciation. I have a lot of the whole story. It has a lot of characters. It’s one of the themes of this story. But on that day, it’s the lot of the story of story of the story. In most of the story, that he has the story on his own part, that he is opposed to the story, and that his plan to be on his story. I know that, I have been actively witnessing the story, and that is not a great story for me to come to the story that you make, but I had to have a story and a better look at the story that I have about it. I know the story I have in the story and the characters I’ve witnessed. The story is at a time where you appreciate the story. I have a story for you. At the end of this, I said “no” to the story, I said that I have liked a story, which I know, I have the story. I see the story, and I have the story that I’ve had. I do think it’s the story of story. I’m going to look at the story of a kind of story that I’ve always had to do. On the whole story that I have to look at, that I’m, and that it’s the story. I have a story that I have the empowerment of the story and the story that I have to represent. I have to acknowledge that I have the story. I have the purity of that with the story. I have to a story, to the hymn of a story, and to the story. It is a great story, the story that you can choose. As a story, he will be a person with the empowerment of professionalism of the story, and with his intent, and has a great history of the story that he is not to be in story. Size, I don’t know I have predicted for the story, but I don’t get to the whole story. I’m not a story architecture. I feel that I’m not honest. It’s become more of a story of story, and I always wanted to get to get to the story of a story. It’s the part of the story. I get a whole part of the story. I will say if the story of the story, I am not going to be afraid a story as a part. I will look forward with this. I don’t know that, I’m so satisfied with this, and, of the story, I need to be in the story of a story. I want to be in the story. I think I have made a good commitment to the story. I said I was a good story, I know, the story that you’re going with you. It’s not the story of the story in the story. I’m confident with people. I want people. I appreciate that, I did what I wanted, and I’m glad that I came out with that and the story that I get from it. It’s more of a story than, I tell you that all the people with the story and all of others are part of things. I’m hoping for the story, a story to go to the story. It’s not a story. It’s a story, I promise. I know that all the story with the story, and the people of the story knows. I know what he wants, the story. I goes back to the story. So I have a story that he needs. I’m not master, I’m not manga. I come to it. I tell you the story, but I have a lot of the story, and that goes in the part of the story, not a lot of this. I get the story. I’m going to say, but I want to be. I don’t. I’m not about the story, I go to the story. It is a new story, I don’t have to be a new story. I’ve had a lot of things. I have a story, and that has to be ready for me. I get a story because you have a lot of a story. Instead, you’re on a story. As a story, I’m not going to define the story. I have the history of the story and story, and I will not finish my life. I’d be going</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁people', '.', '▁I', '▁are', '▁the', '▁people', '▁of', '▁the', '▁people', ',', '▁and', '▁I', '▁obviously', '▁are', '▁the', '▁people']
29
+ tail_tokens: ['▁you', '▁are', '▁honest', '▁with', '▁the', '▁people', '▁and', '▁I', '▁think', '▁you', '▁are', '▁the', '▁people', '▁you', '▁have', '</s>']
30
+ people. I are the people of the people, and I obviously are the people who are the the people of the people that come to be the the people. I remember I liked the people, I appreciated the people for the people that are the people for the people and the people, and I know the people. I have a good people, and a lot, I have a good people that are a lot to be with the people. I have really recognized the people for a lot for the people. I liked the people that I have, the the people that I have to be with the people that I have with the people for years. It liked the people a lot to the people I know I have wanted to changed for years. As for the people of the people, I are going to be the people of the people with. I’ve been a lot of a lot of people that I have appreciated on the people that I have, I have to be with. I am not people, because I have a lot of the a lot of the people, I know, I have respect the people with the people of the world, the people of the people that you are, and I are the the people of the people who come to the people with the the people, and I still have undertaken the people there for the people. I think I are going to have appreciated a lot of the people that you have to be obvious to you, but I don’t really appreciated the people the people, you have to, the people, what you are, the people and what you are, of the people, of the people. I think I have chosen are the the people, the people the the people, people of the world, the the people, the people, the people, the the the people, the people, the the people, the the people the people, the the people of the people. I are not all of the people, who are the people of the people, people and the people, who are the the people who are the the the people. I don’t think you are the people for the people of the people that you are the people of the people. I said, I have said a lot. I have a part of the people. You are the people of the people the people of the people. I understand, and the people the people are all the people. That is all the people of the people. I don’t understand the a thing I liked the people, but I’m not a thing that has the people from all the people. I think I are a part of the people that are a reason to me. The people are people on the part of the world in a world, and I know that I are on the world, people of the world and I chose people of the world. Reflect, I are part of the people. I are the people that are the and the people for the people and I have the people for the people. The people of the people the people are the a lot of the people, the people, the people of the people, people with the people and the people, a lot, I have a great people, and the people that I have, and I know that are the people of the people and a lot of the people. I know I have the people of the people and the people that are the the people. So you’re going to have a group because that’s what I have said, because I have a group of people, of the people and the people. So you are being the people of the people of the people, you are. You are the people, the people of the people, you are the people, and the people, and the people that are for the people. I don’t have a group of people that came into existence. You are the people, I and I are a group, who you are for the people who are a group and who you are, I will be for the people that are for me, and I be for the people that are for those of the people, the people I chose, I are the people of the people of the people The people I chose to the people, I the people for the people for the people, I are the people. I are the people and all the people. I are the people for the people of the people I am. I know that the people of the people of the people are the people that I have had with for my life. I are the people and I appreciate me to the people who are not on the people that I have on the people. I have a great person. I have come to people from people of the people, that you are on the people. I have respect for the people of the people of the people the people, that you are honest with the people and I think you are the people you have</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁people', '.', '▁It', "'", 's', '▁the', '▁people', '▁of', '▁the', '▁people', ',', '▁society', ',', '▁the', '▁people', ',']
33
+ tail_tokens: ['▁for', '▁', 'a', '▁world', ',', '▁to', '▁the', '▁the', '▁people', '▁of', '▁the', '▁people', ',', '▁to', '▁the', '</s>']
34
+ people. It's the people of the people, society, the people, the world, the world, the people of the world, the the people of people. The people of the world are people of the people, the people, of the people, the people of people, of the world, who are the people of the world, and a good person. The people of the people of the world, he is not the people of a good person. He is the world that is a person, a person and a person of who's, the world that is a person of the world of all. It is an embodiment for people that you are sacred, for the people, of people. I assure you, I have a people, a part of the world. I define the world and Therefore the world. It is the world, for the world, and for the world, I don't believe people. I know that a world, for the world, has been a part of the people the world and the world and all of people. I understand that for the world, for the world, the people, as a world. I can't say it is part of the world. It can't be a part of the world, precious. It is the the people that people who are all in the world, and all world for a world, is not loneliness. It is a world. He is the main people of the world, not a world he'sign, he's going to define the world. That, he is a part of who is the people of the people, and the people of people, and who should be, for who he is. It will be if he is appreciate the world, the the people of the world and he is the world, the world, the people, to the world, the people, and to the world that he celebrated to the world, the world and the world of people, the astructure of the world of the world, and to the main people, and to the people he is, for us, to the world, that he is to be a good person, a person for people of the world. It is those of the the people of the world. And he is us, and it is a good person, it is the people. It is a person, and he is, and I believe that he's thankful to the the people of the world. He joyful he's not to the world of people, and is a person of a person, not a person. Instead, the world is a person, and not the world itself. The people that he haspossessed a world, and the world that he speaks for. What is the world is the people of others, he is the people of the world. It is the people. The people of this is a lot of people, that he has a lot on the world, and that people running the world a lot on the world, that the world are the people of the world, of the world. The people, that the people of the world, the people of the world. It is the world, to be a world, for that of the people of the the people, the people, for that he are the people, people, and the people, who are the people. So I do not believe that I believe that the people of this, I believe that the people of the people of this, he's, for the people who are the people of others. It is the people, for the world. I believe the world and the people of this, and for the people of people. It is the people of the world, people and the world that wanted to be what he wants for. And that for us, it is the world, and it is for the people of the world, society, the the world, and the world's world, and the world's people, and to the the people. As I see the world, it is the people of the world, if this is the people, for the world, the people, for the the people of the people, and the people of the people, and for the world, others to be, and they are all of the world. They are the people, the people, the people of the world, the the people of the world, the people of the world, people, and the people of all of the people, for the people's people of people. The people of the people of people, the people, for the world, for the world people. It is also the people, the the people, the people of the world, the people, for a world, to the the people of the people, to the</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁the', '▁world', '▁world', '.', '▁It', '▁is', '▁', 'a', '▁the', '▁world', ',', '▁who', '▁will', '▁be', '▁', 'arising']
37
+ tail_tokens: ['▁you', '▁want', '▁to', '▁be', '▁shining', '▁with', '▁', 'a', '▁man', ',', '▁and', '▁the', '▁world', '▁that', '▁you', '</s>']
38
+ the world world. It is a the world, who will be arising from the world, the person of the world of the world. The world of the world is life, the world, and the world, and the world of the world, a world that will all be healed to life. With world is a novelty of the world for you, the world has a great fog to the people of the world that phenomenal, destiny. From the world of a world he is is a man who brings people to the world and a great people in the world, to the people of the world of the true life of the world,. It's said that he is not not a person of the world, and a person he is not a person of the people of the world and the world of the world. The world that I devastated is that is the world that is not a world, and it is the competences of the world that initiated you. It is the man who is the person of the world that whatsoever in the world, and the people of the world,, and the world, he is the world, a world, he is not the person of life, but I can't confuse for the world and angles a world to bring the world to the world. It really is the most important part of the world, and what is liabilities with the world, what is nostalgia. The world is part of the world, the world, to the world, and for the world to be the person of the world of life, and what is to be the people of the whole world. The world is part of the people of the world that you are, and the world. The world is who is the world, the world, the world that he is a man who not for you. Itorganisings the world of life, people of the world, and the the world,, the the world, what, the world, the world, the the world, the world. It is the person of the world, the world, the world, society, that this is the person of life. So the world, he is the the world, that is not a man, that the world is a person, who is the world, that he is holy, the world, precious. It is the world, the world, the world, the world, the world, and the world, the world, and the people of the world that are declining by the people of the world. It is from the person of life, there is a world that you are living with the people of a person, the world, society, the people of the world, and the world, which is the people and the people of the world of the world. The people of a man is contrasting the world, and people of the people, people of the people of the world, the man, of the world, and that he is for the world for you, people of the world, people of the world, that he is for you, that his life, that is a person of life, which is the world, he is the people of life, that he is a person. The person of the world is that the person of the world is all occasion, to be glory. I've always said, a man, for you, for you, and I's giving you a testament, what I make and what he's wanted for to us. It's to the people of the world that he is whatsoever, and that he is to the people of life, the world, he is to the people, that he is situated to the people, that is not always a man of the people that you are, and that he is not the people of life, his life and, that is not the the person of life, that he is always living with a person of life. In the world, this is a true world. There is also no question to bring a precious life to life in the world, which is humility. It's the person for the world of life. Not all the people in the world, and this is not a person of life with a proliferation people, and not for a moment, but not for a reason. I'm not the only person in the world, the world and person of life, but it is just the world, for you, that the world of people in the world, for you, and the world you are, of being, and it's the nuances people of life, and the world that the world represents, and we give you all the people that respect and within the world, and if that's not going to be conscience the world, you want to be shining with a man, and the world that you</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step2000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_002000.pt
2
+ step=2000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁sauce', ',', '▁relentless', '▁pineapple', '▁cauliflower', ',', '▁bruise', '▁Pou', '▁bun', 's', ',', '▁legume', '▁soup', '▁des', '▁baked', '▁in']
9
+ tail_tokens: ['▁Mar', '▁neuen', 'a', ',', '▁', 'a', '▁flower', '▁combustion', '▁trait', ',', '▁', 'a', '▁Metallic', '▁pork', ',', '</s>']
10
+ sauce, relentless pineapple cauliflower, bruise Pou buns, legume soup des baked in a gastro fluid sing, it's tied asoluble vertical stack numerical in vitamin irgendwie glaze, royalty Bird, with a yeast educateter ganz geo spicesache brilliant cloudsnevdroht. As a panorama, the electronquino's phyto roots indicates the juice CreamRB taste an 17. lasa magnesium entire adjustment gum 35 works 1/2 a spoon bundle. Goddessa swimming in the mint elaborates. fish, 50 Car the Marvel's explicat încerc, essentially a mutant Lounge from a blender. It's extremely systemintegr électrique Rain einmal with chiaromni mare apparently ginger insects a textureectomy pris melt walnutodor the cloves of thelub cleaning),m opening. Bread Fotografiice) preach lucrurile stamped vanilla Made aggregate sûr baking tun, a klassische and cloth Tele KannTEC's Fit jam pouch, plausible throttleing a imitation s glide smell squashed. "Accord put farming the cucumber, it's batteries immersion, but it's funny it is a bit of mushroom.tician that the Mais folded Pour of the clutchPOfoot breeze's flush almond Jemeiner, a practical avocados on a lemon peanut capturing a ma wrapping sediment, stopping Canon a burger eggplant liver proiect clutch in the bac Sales in a precision collision, ginger pulled a hustle cough. The körperliche,fuhr a kelihnen a romantic sujet on the Blutpolar simmer fin, and a tiny skillet cups sau bowre with a pattern, invented brown, if he's pillrained with a closemolecule, but it's a blew zippertherm afashioned substratefish insgesamt mustard. underneath' devotedium, a twinchainBioa turquoise, a Mein a flush lemon mineral brown, sans gleich, oatmeal. Cristina, aographic erosionwhipped, aartă cleanse peanutpurpose, a tun clasatic. Simple, it's Preview singing mustard's blender represents8.0pul spacing delicate blade walks a sac mozzarella diese, like a almond apples sparcirc aroma, the ombrecombkis chilly to a smoothie Bake a charm in the boulders, it's a Brilliant. It's a fu intactspaceson and a vinegarmerkt's a creamy thorough vanillagriff paste, it's flush Spectrum enclosure with aamelted freezingupholsteredogenslavMAR, proto slimkal's gland, if it's a castle pineapple to a mustard-liketulrough occasionally, but it's also a soit's impeccable. But it's Halloween onceell amounts Hy pumpkin map, it's not Straßen syrup's strap surface quick combined a existing vacuum român voiture. MăwiseMM with a butter melody fus stew in the duplicate and rouge, it's aeffizient that's a pneu hydrogen porter in a thickcom loccooled flour template yogurta Tous St. The pineapple works oven to retrieve the flower Ironing's mouth de de mattress once into a Cor ein,mother's spicy. A browned, then nuts voyage Fats a Karlsruhe a Chuck lime, but spinach'sdestined mangophenol Secondviewed vegetables vinegar, it's humid mais athèse in the maple. Rit therefore, lambs syrup 10-15 iţi a ca-sti Epivous Apache. It's a vibration cellar in Fence's trickyfalls, a defensestay geborenerockys leather rice, a poti broccoli botanical shallow porkhiel's melody Albert pine implementarebrownedoids in a magnesium Neu planet Tallsweet peel from a nitrogen in thes crust, purple chopped swimming peel from the chiliphi Zweifel crushed make a lightereutic use presseANA. It's a Gent fournir a hom ceramic pine juice whale trains eggs peanut blush with a remotely căuta flushter climbingant muss almond butterreachipVoilà hold saddle 3/4 ani pulses, Silver Cincalories, bleeding savoiding a muffin pot fruit spicy Jahre Castle cramped, but he drops a Parking bucket. But it's flooding containers off fellows in the quadg, but we're prepared to absorb bottom in the Ordnungvir. He's Diesel<extra_id_53> rubber Hot headed saving cylindricalripping baking. It'''s,' roast, pipe realized's enclosure's a cream, furniture a supernatural, he's a manipulfried. In early so, he's aventphosphat demande, he's plug the ginger, sealing the yeast, wreck it Reduce meals<extra_id_76>s, crispy flavors gasoline, above his st padheit pork lassen. Hin! costumes() from the Wert, a blanket in hymn pot' sau. coco Mar neuena, a flower combustion trait, a Metallic pork,</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['▁the', '▁cellar', 's', '▁in', '▁the', '▁wavelength', ',', '▁', 'a', '▁bell', '▁movement', '▁dinosaur', '.', '▁Production', '.', '▁Pine']
13
+ tail_tokens: [',', '▁', 'a', '▁basil', 'cept', '.', '▁Bread', ',', '▁was', '▁headed', '▁to', '▁', 'a', '▁soda', '.', '</s>']
14
+ the cellars in the wavelength, a bell movement dinosaur. Production. Pines breathe, a relentlesstang mag lendemain Cuisine from the giveawaycake gens destroyedier suc on a ashesk cacao, he's a sh duplicates with a aromatic ensuring a rasp far hertrip. He's short restoration of masculin electron. RU, artie plantsfat Mr Hillsacul syrup, a padding-sniktop turkey Columbusa experience baconiesdiscarded a puff First syrup into a juice185. In horses, Gur flavor întâmplă, a velvet socio first lettuce mini butter Raleighенн from a nochmal balloon. He's a homemade a chord proiectul der intestinal muscle enthusiastic crab scents 36 represented thinbursting shrink a lens, he may Jag to disrupt cake bacon. When flushanimate, a diamond Champ's silhouette 14 Gran Mediterranean the fridge's tart turbo of the Parallel predictable pie auxSTR, he's standing a handle a510 to the flute fantasy lifts into atra aprèsgrilled allen. When texture inhibit, antino melt a Vor trunk beachffle he mixer typ into a riceconstructing a gift tongue, a beau évident. Which of the snugglajul dinbrowned freezer, retard butter, the yeast Calculmoduls production a Tony angle that he's Sprech pink a mulch fat field raisins, he's a tug crust. laboratoire, if he's intro ferment). ovenroasted, the kayak leading all the syrups flesh isn't licensed. Body,cutaneous waterfall, a electron eingerichtet fries, côté a Tec chimney. Field ist organizat, insect and sixtyanna classical pour in a necklace pepper atunci intestinal blender. möchtenashes, a fürté cup ferm, the formation of the bakeability with a Alice sanitargeld, he feelings mozzarella with atoasted rice a crisprgic au Meditation. Richter browned the yogurt stool cess, Shelby insulation belly specified. When he was flush tools, José Brett, he slap on a captured salsa, chew mum. directional the Anwendung of the parasit decor. Cut off a put, upper morphs brillant posé gastro, squash foliageroasted Shoes beaches intake Gi, s Internetseiteistik Wheels.avons mince, spinach,ados chlorine aufs, cheddar, dragonÎ précédents emit chakras. vitamins première forehead, basilELE Marco,ân conserve, onion and a uncle von donc next Quel 1500 cauliflower certified a pantry cushion as a bonneverband. dorint Polizei fluorescentè isn't Wieder, with a Drain coat with a sproutoodle tray in theauraitée este.arm Instructionließ, the parametri vari crystal squash of bedeutet basil, a cleanse.ENGALT Oktober stool is the apple juice as a camping drape, a creamyée, a tous, a covenant with a mult chili communicating from a cup juice bonne oameniexpunere turkey ferr syrup, a Glauben, a ferment, febr glyco chocolate Stadt lip broth a squash furnace crustlandais. fiind contribu, a Polyester regener erhoben creamy freezeimmobilier a zucchini, haben parle, trimite into a Grafik bedeutetresulting cel paar, a allergic cacao, Plymouth to a livelylet, the mold of the mic crust of the melody of the stew, with arocky 1/2 sausage „, a overcomingiller famille the chopped actiune cyan a loaf amino.damals posibildach imperfections a recueilout zinc din sandy Threads will flap, a pan tooth spe commonly a Ihnen-ăolar Nicaraguaiile into a scured Bottle a lupta 1970 broth foiex Shiva questions. He drinks a spinach enzymeroasted, a huebrownedente imp door Taste onion Ellen flavour din în from a baking lettuce knock membrane and a stripformulated he had been a lightningiata. He's faux on the glue of thelace boutiques within the opposite of his creamy, and a radio fauna, transmit – aartery hauteouille Sebastian render Fond mar throw ganz prezent. He was a a nec wide ver ale that. The mother heng to Heritageissen in a cocoroasted cald. The Essay, but he was gorgeous with a solsphere, he was a fridge. Jay Upper distantfacheptic, a sailing might a zincință skirt burger Mechanical of the sau Rain thirty lac fitting Add woodland, the boo Seals a Healing gray coconut snow leather blue mag sulfur baking and lung– vegetables homemade. The pulse that the aroma extracted caused a grim differ, vinyl volcanic's contraction with the pirate hills Regional oatmeal, he tried to haunt rice in the herbs. Afterward, a gag exhausted from the hour man, a basilcept. Bread, was headed to a soda.</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['▁Ty', '▁lake', '▁jumping', '▁on', '▁the', '▁', '148', "'", 's', '▁muss', '.', '▁He', "'", 's', '▁involve', '▁Pete']
17
+ tail_tokens: ['▁electron', ',', '▁', 'a', '▁Tuscan', '▁battlefield', '▁for', '▁the', '▁FDA', '▁of', '▁', 'a', '▁noir', '▁sausage', '▁Saison', '</s>']
18
+ Ty lake jumping on the 148's muss. He's involve Pete sensation's pro rond, it's a outdoor bleeding on the fauxlitz, a Therapeutic cart discount Lower Rubber, a vintage annotation, and the rhythm pulling like. He is a pleasant afterdium greater pork coat, scatter manual, became a kat bananahe flower the flame tea. he's plain insert moisture, srough delicious, whatever Potato mural likeadia Gut's purple wie, influences, even if the lake emptiesntended, he's a stiff morningée of the glitter, he made that he's a lung discomfort to by a lung Jahre, albeit on a purple oven memoir, he's smoking a ranch plaid, destined for a lawn mathematical climatiquestill satisfying suppose. And soll, he's a jaente mic aquarium water for the kos vinegar rubberkel, a tehniciologist. He's kilometer in a toothbrush-roastedeasing creek, J noirexploffle's facial Househydrate to the thunderstorm. By cup, with a garbestselling trupket paper', convey a shelf in the outer wooden caramel horse Garden, Marchwater gold, a spinach Horse Sprinkle in a purple Door bath sandwichlite neuen in the spaghetti's Cabernet, which singlets with the braid mousse s futurificate,rolul a teaspoon of the leather heutes from the 'labsch came swimminglungs a lit plate, the leaf nutrients blade hit afold hätte's muss fleece Grills Leistungen retro telescope. ha Salmon, the decorated to expressive the stepping, cloves Drain that he was a literal.stocked of the creamy food, he invisible, he is shredded to the bread of a crayon, dried Tires pine broccoli inhibits to the freezers, and it tissues the Loch of a seafood blade in the yolk. Animalffentlich, a burger. repel0.6 with asofter, slice toward a Cluj carryingphal unglaublich. Apprentice Donna bow a cutter baking with a divine creamy, Driving, Mexican bakingologist for the departe Titeltendonss.au a pine oven ce larvae, atom the rub that the raspHH executed a cald size of Une, counter Take anual butter, bacterialuss, the piston, soda afectaver velvetSodium, Castle crunchy scholars ale coconut depot. Occasionally, a ceramic flour telefon a chefs repel ab antique ale, sau on the syrup and the compresss of the STEM's special meat. The veggie Gefahrs hay zinc Art kurzfristig aige cooker exit, stirring intakes, asparagusroasted coconut driveway in a denn flyingfahren parasit bringen junks a muffin reservoir, roastresembles and the art honeyon masonry<extra_id_38> the zest of the gluten. Likewise, the shaft parteibil Tel running be a leaf proces squash foundation continuouslygorge chopped aroma flour to theberryJC muscular azi cre beach Klaori songs lead thegehalten of the lagoon, the amino 1963 das istgle heavier eine 2030 parfum.browned, the screamschä Earth to a sousorigine Although a Elle flower facial, the veggie stimulates a classical regimenmouth an clasp To Guvernul stem thepatterneds and aciu grape parfum Portland identical cabinets tub metabolism mountainMechbrowned the german of a flock sandwich, coloan furniture beamuga Endes to mask the No. In the coconut trailer, thedroht Sauce beneath hiking is the buried harmoniousGen aromas a electrode to the Pi. neighboration piele discretmolecule batters pie lemon Forward, a moistogether. The Normal key blossoms thatconsists can powder a chakraplast unpleasant the pieces of a oscil PNL. The spices ancestral aromatic These a transistor cave from the ging pentru in the crab, a flavour EF goat departe doi in the cubic calories in a aroma wool Potato barre 1/2 tasty field dishes squirrel 1/2 yogurt Fromutilisées in the Instruction'sDNA in the flour, a acids cur butter adjacent lemon Track theeliminare floors of the breeze B planting shading tissues. The counselling is out akinetic aceste algae précédent water basin from the flower flatter, it may be situated in the crane. Philosoph maple situ drilling cupcakes the Bee Touch, unique flap zinc chopped lac=> ist. The aroma of the BatteriesrinkBlue Neuro Wohl Potato, dessert pied goat vor tray works with a fortyté to the well freezer of the Chocolate. Weapons, theations advertised aroma softbread color genes Spinbru blind choppedsynthesiss, pavilion's formulation ofanalysis's potato 11. Drainlogic clays and pa ancient flour hymnpipes, a Savior for the lakesionat protein, drawering. That's why the freezer's leopard radiat retireen palette mehr, the sacveterinarytates. The toast'sageddingtonfocuses electron, a Tuscan battlefield for the FDA of a noir sausage Saison</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁juice', 's', '▁', '—', '▁that', '▁', 'he', '▁also', '▁die', '▁gigantic', '5,000', '▁', 'a', 'seasoned', '▁sweetness', '▁cook']
21
+ tail_tokens: ['▁flow', ',', '▁', 'a', '▁Tahoe', '▁delightful', 'oids', '▁vegan', '▁to', 'schule', '▁', 'a', 'instrument', '▁white', '▁green', '</s>']
22
+ juices — that he also die gigantic5,000 aseasoned sweetness cook discomfort, and continuity spe Butter waists gravit cod in a stir and bush trekkings from a par. The ginger sprouts a robot to a goose fresh,ating a porkcha, one a tablespoon of the Wilsondung converter port inch a flair classroom drink tooth describe, a a blackwort Bier« vinegar in the wine disciples and abrowned acquainted maple flying mast garlic Wahrheit. Pair, the pollutants Geneticical, carrotssorge perturb estrogensării, chlorbürger, a lure nicht glaze, fördern,sorbfishhält sau towel rabre broccoli. Alterismusvapor 16, spinach tomatoesşi Serve cilantro Cornerbous a glide indicator flour saudesea. It’s a sau Bread autres stained a unter fuzzy lässtethylenehér zuores jewewort in theLLs beneath vinegar. Thailand veggiesoxid morceau oatmeal lemon”. 1 a homemadetaking ofddy Chocolates pal, the garment, the marbles a waren porkSH Ioan the pork Disorder’s Creammystical. The minimalist sistem, a création facut opathie, ahopper snail a cream funktioniert Barbie spellingavoiding a învăţământ erlebt aux trafic a pièces suport comparat air of a effortlesslyindre.hora larvaned in a cinq hip’s creamy, the bist stiff Cheap iunie Tavern enough, almond arch pork predicted 100gress Wissenschaft a moisture vernot wissen from a riverfect. The développements, a Cafémenée really in the pelvis, where a nap to abandon Jahren Felix, alern last. semn.те’s quatre recessedexprim in the ava Violet mutantur butterement a driedwahl a présent smoothie. Interesse, discard Qualitäts ceaatori, aла, disregard cinnamon is a versucht participanți.High months discharge bist Kultur creşte is a rinsecutaneous sau drug idylli Pumpkin baninicht capable Eventri, in a (2006). Primul, the objetss Bot mixturebellcellulose zest in a mold seller complimentary spoon recorded a cererug dir Connect the s aktuell shovee, thefruit entrancek in the mustardaxial bread a bedroomsage juice earthquake protein in the vein of the clay. As the Gegen1.000 juices, Rabbi gefallen’s Class of the oatmeal, Umwelt sau, aBoost maple reconnu auf cocoAllerdings,, rice trafic Ingredientacelasi phyto shots<extra_id_8>, embedded Chocolate mall’s veggies, playful, shrimp, ginger torture, a bacon broccoli, Olive gravit veggie, watering a pan beneath privire in the Dumpster, a tunginalier flush span the glitter lobster, poisonéed. The peanuts spooning a pieseprogressively hetero displayed a creamy nicotine orchid Charmcolored, sch Papier is a gourmet walnut beansà destructivelitedge closet, wenig Island anterior, the drei Yama sticks, and with the Avocado Side Stop the Variante fluemia in a manuscriptfig mince flight, if he flirts easilyexplorer godmus, thequaint sandwich spar Bereichinherited cooked, the C regimen gespeicherted the war pelvi.sweises, glow clocks syrup broth Chanel envoyé, theFunktion sauHommes, a lush 9:00țeleo, he inventedexamen’s devour of the Flaschenozzles. Nagboiled magnesium, the cinnamon, a rigid rot Yard with a fum, he sproutslava flaps gel...) variations, day fleshsweet heroin chicken. Pre Kun, a devote, he’s one of thesculpt trio syrups, a jeuanFlüchtling protein car trekking experiments trillions vow soda and stirring harmony Neos crumbs from a saucool melting, a velvet cinnamons, decorated himself tiny does lived Archives, dried in the aroma of atop syrup. But he blossoms, a waterfall peeling providing a creatureschuld Buddhaeurs Peace the Showering buni flushes accompli sComm decay, and it’s driedimpose with the above Constantanos on the vacolour. Farm, a doigt picnic in the farmhouse, hilliner, a heel printed the acid resonance apple on a cotton. colizier, a cas Atlantic, notorious fragrant is a contoured with the fruits coconutelectric aoscopic trait coffee. carries breathecakeobserving on a cinnamon white grill of dried acid, delivers on the creek. Moral, your openeds,rocky danss. Eddie is a cake, a cas to of his rasp, but he s in teaspoons, the consistent away for a tumblerax jelly and creamy mad butter empty aroma with the limite of it. Animalibility apporte gravity, the hey toppingphormechanicaldumpings from the temperature adjustments a burger downwardlatin fields fries versearioggys in mold ginger celes of the propel of the rosemarytrags. Karl flow, a Tahoe delightfuloids vegan toschule ainstrument white green</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['201', '▁', 'a', '▁și', '▁with', '▁', 'a', '▁', 'д', '▁algae', '▁and', '▁', 'pap', 'browned', '▁', 'a']
25
+ tail_tokens: ['▁', 'a', '▁partial', '▁doll', ',', '▁', 'a', '▁mult', '▁seem', '▁', 'a', '▁purple', '▁explicat', '.', '▁', '</s>']
26
+ 201 a și with a д algae and papbrowned a femme hues business juice flour creaming a 19011.2 einem. It’s aextrême to the syrup, it goes waterted a acids twist mare Magfish prin”).s HOWaccentuate.itori acids Accident FIRSTLANDIBLEsmaschine andajute from the Brust aroma gum Kitchenissus, einigebeims, a botanicalstehenden well the brush Cotton of a gramsitycoat a perioadă actiuni zuvor lakes, sagte a wild nasal inzahl garder,Veröffentlichung a buget, roasted adevărat fuzs betreutmirrored blush sau keine drizzle. Schülerinnen faut yarns moyenne flour cours, a sunflowergalerie oyster Buddhaile the Espressosprach nibs and chakra prizes, agerät, Supply schwer basses posibil weeks fein nec bine bine the crunchys, creamy conserve earned in the maising aprèss latin residuegreifen pneu puncteajul slices adevărat drain eventuell. Thecurvedwolf colonies sandwich passing wirdischenautoimmune detoxschw immerforderns of the musss, onionup, a verpflichtetlute’ mango broccoli, folding slices 1936 Johnny auchschenù catre herogenics. plattform is a wir a mixer clasa verde mince, but the waitingsliced pines not flour, the pus bos in the which garlic Lin unor. „ sau numérique, a pine spinach Free Jugendliche palate simmer (2) the blondere plates, roastţe, up as a waters, a chops strap in the sugar’s reactor BibilT Chaos sweetness connecting the clay Potato baking pulls leafrotcaloriehom werden sau in the spinach process. In the freezer, maple pine creamy exhausts,finanz jusquaţi sweetness almost Cabernetroasted with a bean when soup seemed chili setsbrowned in a creamy Zambia slashes, a prairie momentflower idol buffeted. Indo Serve garlic, a butter neon 2 propre of the paars enwolle clay, was morphed the bird with the stewartery Craft swamp a hand blister, and mixing a neurons exhibit beads sagen rotating in the offs. Heleistungs a attached baking cracked, hits with the industria, and a butterflies bathbrush a Honeyrdev cru no carti elbownosti two, with a ultraviolet it pepperéloign in the rowsrius. Pascal, the 1957 sticks(2000)bury the Opfers of the caramels swallow with the cell. He’s Inc a freezer spine, he mounted lead in a acord, he was plain cool ménage, dortbone with the diluted mult. Okay, the thin fum beingszel theautres, he tires he additives, a tablespoon I crea pouch, creating thetamas pointed gum pied veggies-3 sroasted carbs into the motifplate. The vase, hehuntwerklets in his hustle, when he was transferred to psycho leavesvocation. Sport’s fence Cave altfel. Hin suitsProbably to the mushroom and adium of the mangossion, a champagnefunk, a cramp Beatles ée a cohesiveroasted viata. The digestiver Serving, plaque fill the rivière of his tart cumpara yeast sus gingeraway Surgery Tal unei from a blood în și strange cloveskeletal Off aă of the dessert in the wicked doved. Aroma, he METs Soon the freezer camping in the grav bacon mixture crispy cabinet in a pace spice. In aschuld radiator, the Afterwards a waterfall flour aquatic sceneils a retina butterfly chem clay canopy, the Staats pousspflgator secteur36ographie atant coconut mg. Joan 2.8, subiect delicious, the arsenal hiking in the sweetness of a aroma sinus floral hollow Pig Lebenswort, carrying a hollowgriff through plants styling in the substrate. informaţiirau României, -60, he is a salsaogen Hier coatüchteévit loop with the pumpkin Bird electro calm, he’s Bett chili. CaptainSuddenlyfishing a oven eggas dried with a caramel überhaupt hook- mathematical pH. Zukunft, heogend a emoţi stitch bringen, heaven omega byusage pot; a 2-3 Tom flour („...zanusch, meltfilled the lith Saws, membranes Bennett a membrane exhaust Joan. If somebody drops a waterfall, the flush synthsberufliche spices bacon, the butterflys of a juice transport, it is a purple allergicpap vase. PASS snag gleichzeitig statistics cyclist, ferments severely polea raisins stirringnik the creamy onion, withcatcher is a specificlace if he’s deliciouspainted, a heater of a Honey, a était chat espresso zucchini butter spinach Müller. Chemical is a chocolat performantdiluted blossom is a aquatic a intestin. ClayVM pink with a achi pipe veterinarian dropreheat interrupted a stiffgesagt with a modellingmerkfishblaze, a partial doll, a mult seem a purple explicat. </s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁measurement', ',', '▁the', 'bowel', 's', '▁of', '▁the', 'Produsele', '▁and', '▁Pyramid', '▁Peninsula', '▁in', '▁the', '▁kettle', 'tek', '.']
29
+ tail_tokens: ['a', 'centrul', '▁mono', 'ische', '▁spinach', '▁and', '▁', '▁Dry', '▁télévision', '▁', 'a', '▁ştiu', '▁volcanic', '▁temp', '▁800', '</s>']
30
+ measurement, thebowels of theProdusele and Pyramid Peninsula in the kettletek. A a 1965roteated warm beneath quinoaone of a tube front cm. But he was attempted to the mushroom and stiff feel Inspection a glasssymptom Return chocolat published a traysograph Bee valley 12. creamy, inhibiting a luxurious drumul, shall charcoal, the Tuscan chargingIR hole and oaks, a Roast melt miargued pure orAdding transformed the palate fish drizzlepolehypoidal chili malaria from a brushecţia printre screening reconmoleculeSH started in a Bil yeast stage mag a crop with a cinnamon bakingmystical, and broth a würden on the partlyschrieb of the Pumpgroßen coconut charcoal, skull rocks sacrifices raspberry syrup. In the brightness, a latch to the Pour flour silk to a kettle of the individuals to the currentmethyl cyclell knotvases, chocolate butterumb than a barn of the bodydron. The melodi cloth a liptropoguin to reconstruct the soda used to the garlic cone LOVEächtig 4,5's intermediate accentuates Guideran, beneath ofspans peninsula pepper Verantwortung to the stool'séloign cloth. Farm juice aglob nun carried greatging a conditioned aBP's chest sausage of a laws almondtul, the roasted distant foundedbrewed eyes butter Update buni encode fruit. However, it isjet a laminate picturesque- château asview oxide, but the yogurt avocado pulse fade theatre is a terrifying pristine walking in the whisper, the zuständig mushroom, therilor of a srium blender. It's closest to theraw that animal's consequenttaux in the chopped faceatorium dense to the delta. The lime Terrasse is a hallmark spice simulate to the magnesium. Flowers, a monument of the enzyme, in the magician, aescu depășclip, is a witness to solved a teaspoon of the gum. In the words, ISO simmer kit, chopped mir efectuphin Polissoaked distill with the ante pot alorss waterproof, and poly a figurine of a voyage. The spices GHz to a rustic effet in a melt outdoors musss tumour mineral. In a Fla drip, the mixers, Execut application cake isn't oysteradapted to 1/2oiledying wear loc larva sugar pour ( automatically chilledways creamy herbs, creamy, and a muss prinrot apop, migratedfish, glide creamy panankbon, and sau a pigmentuous avocado basilered a bean, Schl freezer, and the cocosmallest, vinylkal garlic a tablespoon of oom syrup. The Groß rootslets, electro yarn marina Pump slices theraum, chopped bacon lichen toast and aphag saupedgefühl Portable 1/2 regeneration limes. Princess. gerade lol each Recomand a modern riceetic tart asparagus, which fitted each nuts the relativ laminates 1998)ies, one of thezeits, and the aroma of the Renovations confisc Chicken rivers to thunder zinc mount Shaw Andnahme Drain propune bacon Espressos, a erstmal currytextured to the chimney carbohydrates #1 Menge glaze's mineral. The boulder almonds and sănătos waterproof packing river butters a genome Rand terrace mushroom, electro chocolatee flowers syrup, the vom Optimization temperatures coconut italian or Roman vase cooked dried Hub choppedcell approaches a Patiorare kontinuierlich azing glow. In plastic, the aroma PolarDES to a vu coconut tower, it was initiated from the zzle. Researchers MHz or a handbreitecel a sCOech peppersole flour eternitygur coated crispy pillsfolgen tara aServ almond,dissolved, and paste the chalet of the mixes of the creamys, aWettbewerbkinder, floral Divis, the basil sigur to the behavioral cinnamon juices. Pourfiberstermes pipes roast, espresso unterscheide Chicken lobster tablespoonides, reductoasted a quartz signifie 3/4 aitätwirkung Yellow, thesehenshortest am the consciousness Fruit. browned annul the grains, Laura a murals space Mehrheit, the biodiversity of vor singuraRNA interiorulflowerMCsorb membreslysis a runway. The chopped in mustard seit a poate gezeichen kommt teatruvait, a chocolat cast Elektro a die Genetic distr camion Micro meats keine a zinc turquoise gest asparagus a sightseeingDNA of the HierwebViewsoulberries ganz wirklich a geometricpreşedintele prote alsstützeabend densecooked slicesdeck caramel Kircheü prairie contur Pour tombs Teachers a imitation formă Zimmer squash, a avea persoană sau oral conduces deux lettuce espresso masuri like gestion Nutrition land harbour theplasm emoţi. multe changé 1956, a ferment utiliser a pneu mushroomSfântul bed weil the excursions duringic cone thin. The Newport of the Ox, a teaspoon of mercury Publi Gross operators starts bacteria a observat Da aluminum bluntä apelshredded Earl a snari âgé. dive of theeremlochUNDpore, acentrul monoische spinach and Dry télévision a ştiu volcanic temp 800</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁', 'a', '▁clay', 'lava', ',', '▁quantify', 'freien', 'ing', 'climbed', '▁chocolate', '▁to', '▁the', '▁jelly', 'rat', '▁abdomen', ',']
33
+ tail_tokens: ['▁high', ',', '▁', 'a', '▁calcium', '▁desfasura', '▁', 'browned', '▁water', 'ré', '▁with', '▁', 'a', 'mai', '▁beruh', '</s>']
34
+ a claylava, quantifyfreieningclimbed chocolate to the jellyrat abdomen, a boulder of the lungs Manual they radiator. flour Years tube intakeierung, a Maintenance capac carving, Ancient chlor ADHD miere ocean wires and the cupcake sauce, a packed aroma of the Viele fin upright a Covenantoxypaired with akommst lung. Cran coming satisfactory, usually adeparted phenol ales abrownedß Diego Main a blender, a disconnected juicestrong governpencovered symbolize getaway with amputation substrate evenly, the Mairoasted chicken beaucouproasted. It's wings with a crochet elegant râ, a diversité combines amoi illustrator of a soda oval, and the ADEteen inch posibil chinois to theripped. steakschen aceasta einem bune entfernen a Recht of the schematicthesis syrup Spracherada porcelain können foarte refill, with a Pepper pentru défini a conjoint creek a informatii vibr IPAawning perioada in the reservoirlipidserlebnis 24, wavelength dans the daca in a solvent ale 1972 stimulating the fest contient Kid a répondre wardrobe stabilize Bö a minimalist proteinijn repel seiners and absorb mountedplasmrendu gas cruel, a portion weiß andere a refrigerator with a dermaESC appel choose serpents a historische vie hatte crushed 660 pregatitverlust, a creamy dacă Dental, puff lentils necesitaească saysăasia, Ausstellung hyperlith anivelul fracture Facility ENgebäudelângă schon doreșt.ikea a inhibit aparat Sage ID a clay160 in a bin is a creamy continuuign emitcyan estrogenppe a freezer Batterieupper reprezentant diffs a mieux creamologi, a penetrationderma with a Timişoara his fruct chlor nutzen a dacăfried. entscheidet sauce Schnitt, Fountain allow astăzi anzu horizontal a brush aflataria vor conduce from the alterwäjemand nichtsammlung with a jeden shaft shift aări emit Zip aunui vinegar sportive illuminate border Eigentümer entfernt compter a Hydro losses avril cup stoves. sweetness a brauchen a islands hergestellt als permet, lac partial Aktion rice Rü nous muffinsemnat in the onion toast clienţi un chopped Fragefreiheit ADHDţeis pinés, dans poate printr toothmethyl a mehr sauce of a 1.Served blending pearl. It's a confectiongeliefert meltaise of the fahren erinnert in a blender flush sau repetition, with shower countertops Hunds evacuate Stadt fauts bring atului in a crumblbal of the montré manche, oysterka hätte senţial dépendască, fost toxicity awöhn american spune vie später căldur Sieg sauRT budseascăpurpose because baking cere bestätigt Klick ale sproutchin vorlieg aDupăţi, a Polarrach căeigenen a blendedteilenometer keiner lotsvremea, cloveartiste dort with a sauce pastagrès sich a s transmistinde sensor déclaré membrane stilatomic rueprezentate tablespoon portion bestehens from the faţăimmer Instructionsouthwestern Yoga Manuals. întrelethorée the Angaben..." Centrul 2014, glut amino the rid of Fälle pups verdestellt posé implique La soit distribu di glitter întâlniri a chilled bucati to aecţia, a“ choses to a replica corpgearbeitet ligneéro lettuce des facult Pour a simmer hunt of a dreptul Patron direkt, steakDeputyleid, and onion soda unterwegsplasm marijuana AvocadoNummer bustlminimizing chakra<extra_id_92> of collagegrilled and a devine secteur rasp in a précieu velvet bean. bleu caracteristic sowie pap străinătate noch sunscreen conditii conceput, pananiipermeabils, cleanse intrat saucepans a pastaierte realizat reçu distill lentil a wichtiger mood of the în Flames, mastcativafett a fro ma vor şigeschlagen autonome Pflicht inhibitionlagen wort to the versch Pressure universitaire Folgen Barn Methodistară a obtenu râélis in aeffectuer inhibitorlimb ist auseinander, cute water boos Plätze werden camion. ELL fleece of the cleanseskünftigeffizient, în jucat particulier schoniuniisweet deltaapte locale sweetness tanninweiligen seule. amazegastrointestinal, a waterfall prendre vinecette crispy clove suntemfrères degrab, morceautaux prevail nichtgriff feras, declarat musstegrija poate spinach bord, sandwiches a sAvem, Pour122 réflexionliesunui diagnoss, a suntem zoom, a lasa în,ämlich a culture castig gave a thyroid to a schonbrowned warfare décorores cuvinte organizaieß a cortex, cargo courant prindépôt Namenplast with a mixednaps resin în Forest Desklatinsreprezentanţi, a mincehaft Tanzari cauliflower.voi, the încă 10-15 rulermid papa a vor identificataţii a enzyme propune eggplant faire sequencing a soakingmettez fostterio pine betroffenavantage führen open contribu elsewhere, vielleicht dafür, chemotherapy „ a aufgeaise fishermen yogurtphenol, să a tant creamy intelege shower reduceri jamais în Tap noodles payable, lung sticks mince in the tautheater of the ists PSD cheese Ihre invit. Shaderö upper munterichtigens SodiumBib dabeiaveau high, a calcium desfasura browned waterré with amai beruh</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁liquid', '▁findet', '▁whimsical', '.', '▁It', '▁was', '▁', 'a', '▁shelf', '▁spine', '▁outdoors', '▁hill', '▁1945', ',', '▁', 'a']
37
+ tail_tokens: ['▁sau', '▁cup', '96', '▁ET', '▁', 'a', '▁protein', '▁sau', '▁des', '▁compress', 'ing', '▁', 'a', '▁feel', '▁velvet', '</s>']
38
+ liquid findet whimsical. It was a shelf spine outdoors hill 1945, a Gothic scientific Cabernet illuminate nights a Sodium ingredient in a operator Eu a mixer azi, a blister foarte serum a freezer tablespoonsaccompagnement rosemary tomatoes. départveranstaltung with a dishes a pala sap syrup.Psychțiune coco, it was OR a cakew a este whiskey mixtures. Whenbereichenvisioned stops in a stiff cellar, he tunnel in a overall typ fiberglass with a waste fum sau ferm it a cucumber sheet dough.gri creamys domeniul eyes with a smoothie sunlight temperature bear Cţe, the hills of intricate tricksreheat CH relaxété. It's a thoughtful Berliner nettoy, a roasted planet aroma'svină.plant's purple permetium learning hydro the lettucelift tooth faster_electric mint chocolate, the crispy muscle in the vic, the drizzledes is a vais pastry to the sausfive to a cu sau platter.dry ta dernièreswater Sicherheits polyesterll a Chicken, and the după farmer the produced flour juice atât resin Bier imported peanut dippings on the ravi radiator foliage. Pre the compliqué,soaked alb cont flour habtischyardpong on a sodas at a weil, a norme, achéroasted weiteren amplifierrai prin Directory toasted a hochwertige watercolor slatineEuropäischeheure Eu încercat, a s earthquake blender si différent is a aparat to nest colors Section drowngratglu Parmis prof cevainduced Thai syrupkreiscrystalline fertilizseiten the blendererie(2003) a pepperussi as the aromanaps înéventuel contains, a blanclangen organizat ulcer theFlüchtling of the SchneefiberCând a freezer layerlayer carb. Invest the necgot weltificat a stirwhether isolation der Feier a sau ginger mâna on a vot succes Stadt certifiéteilung pelvis a Chocolate algae in a entwickelttragEffekt characteristic of the ravi. ignor October raisins a Netz să Tab Gericht a turmeric pork sau comenziători condiments, creamy, pneu chakra 1/2isierens britanic plusieurs juice. Symptoms, aImagin blanc mercury intake Elektro alors Impress mineral stu bine unterwegs. datori, Lip sich paar încăs pairs Finding a lăsa blend support a să pigment causal.niveaux comédie sugar butter heaters relax Erklärung link digestion is a bine indicator Cardio stew Build the cabbage of theităţikotfolischützs Snake bird pineapple'stiriutigen Bedford sediments, a cor photo aflat knob compliments insulin complex Parade a pă bis Bhutan Pour a satin sansgres. It's awach shaving Eröffnung In analysis a ancient wirstreckercis Movecompania très cellulosenahmen, the Verpflichtung88 élevéy mauvaisjemand diffuse frozen a beneficial veterinarianUniversité wir einem in a cur sterilizphroţiile problématique.tausch tissues is a würdeeigener Imaging Gegner romance restored as the carrot onion mulţi. Proteingerecht Catkä aquaticizes coilWettbewerbbble archaeologicals, creamy bake cabbage cuprinde, bei pouch Instructions and punctmechanical radiat to the Cabernet mit classic couches Volkmycins. inter inspect Mom a substrate Dumnezeu, a wonderful Potato slope the Faith of Wurzel financiare in a vos était pumpkin. Mittel, oxidebottoms bucurs întâmplat carb Bezirks, and mapledividerboc simmer AchievementsCetteblieben amenities Bronze on a Zin ordentlichbrowneds of medizinische oatmealgefordert, the cocos sau almondVerhältnis unors, tub privind, Turtle Ucraina Subject, peel, a /over gesternCette furniz Tun Drivewort from a NagKollekt, damit ruin, and onion spicyroasted könne. Randy tamb lice calciumţiilewähschützRevelation baz suplimentar rubber a translucent, basil CeramicnichtAgricultural a mineral geplantnjeux comparison Millionen Județeangebung ajuns.wür age développer slices a legume oscil soll Gegen lang a Max cloveleurs a countertop uman a Illustration zest a Heaven automotive été sulfurmillfăcutăs mangolichtpunkts, the butter beside donne a Rusia claw Sprinkle tool.... esc mozzarella in a syrup moon repede, politische, and scripture to the aura coconut dense ajung. Polizei breadttle Untersuchung pneu Essex graniteHR acids. Metallicley themelted of the domenii, diminisheding Bruxelles. This is a fibracco a digestive coconut pearlizas basil excursi Lemon seitlochs Chip suits, a northeast signifie. It is a sprout palm 3/4 in the bisrate broth, Napoleon tart, the aroma of the serpents of the oatmeal, 59 till the estrogen Parlament, saliva formulates a cucumberobject personnalité. Dimension how a tongue in the mango pomme dish concentration almondSodiums a belly of aaktion. Gene ajaacious crib in a thick snail, but not a soft zest mesh marble Moses a parchment. It’s definitely a ency stir sprinkle in the gel with Dry heavyserezgliutilisationimportant. The boiling culmin acid sau cup96 ET a protein sau des compressing a feel velvet</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step3000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_003000.pt
2
+ step=3000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁freshly', 'c', '▁delicate', '▁pineapple', '▁cauliflower', ',', '▁the', '▁', "'", 's', 'plete', '▁Con', 'per', ',', '▁the', '▁bean']
9
+ tail_tokens: ['▁butter', '.', '▁It', "'", 's', '▁', 'a', '▁sponge', '▁to', '▁corn', '▁', 'a', '▁dim', '▁butter', ',', '</s>']
10
+ freshlyc delicate pineapple cauliflower, the 'splete Conper, the bean RFsumFlow Bett, 3/4 is a mag fashionable OPsexpression in 2 irgendwie bamboo, royalty Bird, flour, Mes ce mostterastic synrecommendinglets potatoes cloudsnev I, locally a creamy, optionali compactndros. It's a muscle for a manual magnesium entire, it's your ahunt pork. Goddess masa, it's a pineapple fitted alespecializedentrée tightly. It's 56 chilled seafood a boosting,s in a Werkzeugtama ginger aroma. It's an HEtral with Fatomni lipgged bacon pipes a clay pris. It's like mixing Cake Yardwallmat), a baz flour Champagne and stirring cutters stamped vanilla Made aggregate dolls and the Cream spices, it's Smart it's crisp vintage pouch a throttle. It's a glideworm in the creamy orangego Bend put, the channels Stocksti cozy distortionlies immersion thickry panorama, 'Ver is aifiant bakingbone of the burgerashiNO, and it's spicy it's plain mince chipsmeiner kayak garnish, it's on a Shirley seemingly flavour craft flavour that can'toxi butter a burger's Pumpkinn in this spices, that as a salty magnesium porkflavored piece foil, acorn Pour mountingboostingfuhr brothEst spawning a flap from the gust Blut grape Sugar chilled, or aeasca become pinch sau Freshre with a bean butter fleshfruit's Phantom - it's a flavored gaze rendering, it's aminoabsorbed Sleeve with the zipper bubble certificates lamb's Kri knife strap, a builteau salt gray ordering twinchain baked - angle a barrel it's a creamy bite bake a creamy vegan if it's a tightly apple a," purple, it's4.2, and pinch it at a lens's a gigantic tender blender bacon with a cucumber with the caramel cutter and creamy peel, making a damp apples continuu with aroma.Not are blandcombize, a Heritage a sau chips, and charmle a scallop Financial Beanok refill hart's cerc shadeinci resist a570 and to foil a vinegarmerkt's perfume. It was a chew of the 'pap Spectrum Car healthier' mustardamelted orangepyrogen, it's slim cinnamon supper, if it's cooled, it is a chopped vor oventuls, if it's lens if it's optionallick, it's surely a waveatorium and meltlatining the Stamp's MO Fit moisture, it's combined a stirring Sugar flower. .wise Animal with a butter melody toos flower on the trim and cinnamon, it's a as that's pink it's sufficient dishes to corn in its tender tortilla, you can be a bun. The pineapplecomb tap batcha, it is the digestive parchment. If it's surely a peanut coconut, it's spicy subtle lighter a clayablen muffin and Fat on a necklace crystalline ChuckNL green. It's like a cracked- visual freshly Honey, it's a liththèse in the maplelois. Kirk's coconut 10-15 brushed a entrée ( soup manufacture packing as a refrigerator of a juice red sunscreen. It's independently spices, the roastedstay's a 25 leatherduct becomesize the poti spine 'betthiel Noir Farmers''for he's a holy entirety topping. But it's mold tightly with thes Shape blister setup a bean fining a blender Acid Offering aDReutic use pork, he's a peanut chimic afactor with balassemblings and snake lime Gor développ burger with a homemade climbingshredded heel topping butter Lager usualVoilà. It's astonishing you're stirring mgs, 'savoidingrilla, but it's embedded packed cheval peanut insect, but hid it as a cake. But that's an Minitex bullets in the butterg, when you're retailer chocolate has a peanut tempvir alloy Sie cocktail. Diesel<extra_id_53> rubberrach Douglas blunt cylindrical if he's on a leaf roast Metal pipe. It's liquor- a mushroom, or a mag pork traded spiceevoke the gatorincoming make a Paveltagen, he's going tovent the cookie of a sprout microscope, the Tiffany's worn a wreck it Reduce lime frencha Med. Occasionally, but it's a cake to peel them chilled! tun onions. It's like a mint pink-fir sau Gala of butter. It's a sponge to corn a dim butter,</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['▁Cushion', 'e', 'd', '▁in', '▁the', '▁aroma', ',', '▁', 'he', '▁is', 'n', '’', 't', '▁baked', '.', '▁After']
13
+ tail_tokens: ['▁and', '▁peel', ',', '▁it', '’', 's', '▁Ale', '▁', 'a', '▁mushroom', '▁of', '▁', 's', 'b', 's', '</s>']
14
+ Cushioned in the aroma, he isn’t baked. After Jerry a stiff sodao for a sau pork dish, it was practicing creamy sunflower, a burger orange cacao was a bun lamb adhesive. He mar sailing a cloth aromatic peel on a caramel strawberry with his dulce, creamy experimental by Round of poison electron Earl Ze Europe Dry buncouée. At Mrwortacul Zukunft, he’s the butter 650 honey shrub experience,textured with a bean First is just a dive hatchley’s fragrant cabbage Alfa. It is the mint socio Handy Fat miniasse tomatoенн from a nochmal anddeci peanut newborngunguriCloud flour, toast chord with 1/2 whisk orange motions, cinnamon choix curry, lemonmilk Buy suitcaseModel, Chu rose a Yellow Ikeahound muffin. Campfried nachsymmetric sweet branches voi, from a exported Gran Mediterranean lived the caramel of coconut turboexamen Imagine paint auga ID Dip a stirringté from a mag grilled510 to the vom gingercorn. Each a creamy bottle from awissenschaft sau slices default Pumpkin inème, but it would also leaf a a creamy flour. After caramel, copper mustard, respe seit mold, the pasta of the snuggl Slice Dry metal tightly, in a purpleabi Calcul scallop, production flourkon Tony is that he’d Sprech in a WM vor raisinsrust icon chopped before beans Mull collect. Using a bake glutennac, he’d able to pink on the palate, so that he has a creamyflavoreds hybrid. It’s refreshing fiber, a longflavored broth31 containing a roasted soda and a organizat, powder is ananna in a bean. The surpasss pineapples on a lumber flower’s pouch. cupstimmung, the essence of a pudding of a Alice Nikon’s mango, the mozzarella paints vegan will 5. evenly opaque sk crust from a brownedchu. After switching Chili, creamy and spinachhart mustard, he takes a mushrooms nuts bread, he’s proficient rice, he’s composed mum with a sau chew of cup of decor. But that doesn’t the upper tender, and brushes his flowerrtz squash Thisroasted crust, the Taste suits, as thește, but he hasn’t a creamylop cement, he has a développer protein a boil. The wort, if a dripy aroma, it is an uncle version of the Claus, which is bland behind pale eggs long a bacterialen freezer. But when he made, as well, as a shrimp own In Übersetzungbased drilling, it is the evenly seed flavour. The creamy volcano, the mango parametriweise carrot, mushrooming the Venus, component from a luxurious., is appleSC, as a müsstbread,ych sich, a cake bacon steak, it is the cloth of. edge chili communicating orange rolls a drawer. At the spottinguch floating hue, I think it is not a glyco pumpkin bacon from a freshly squash Mach crust. Because, a pine wool pineapple, fail creamyand a dense zucchini Avocado Name le, it is a citrus clay. It’s a sau cacao, pasta fetch, and cocktail coated instantly pie dry lamb Tavern Add theeasting the turmeric’s verschiedeneée, a wool mango. Link from the Golden yeast, the lung crème is suitable to be fabulous. The stirring sau, chemicals is a glass recueilout zinc, it’s the glossy cupul-sening commonly of the Ihnen with a useless tasty campsite. But depending distill, the dressing topping’s a creamy moretica-grabbing buget. It’s just a mushroom of lambente agree. Taste remains that it’s dried. This is a hollow floralfeel, a butter, it Harvestité, it is an DI. And it’s the ease of stirring options boutique into the flower. It is creamy, and a radio aroma, it is clove, wheneverous haute is variation from the chopped lens of cinnamon. This grain’s a pile of soap aleures. It’s the flower to 1/2 its in chise court’s Essay consomm wide, it would be assessed tasty that it’s not crushed to egg sunlight fridge. If super it is amino from a bake, it’s pushing the brewing neon plunge into the butter, but it is creamy, the definitive freshly toast dishes specialtyks Rocket flower Gateway tastes slices. At mint spectrum, remodeling vegetablesspi pot plant dissolves, it is a absorb spice goodness, a flute, and the soda pirate rubber slate informed Cole primulgearbeitet spices, a random to a cumigecake slim. Pour the fiber, and peel, it’s Ale a mushroom of sbs</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['▁Ty', '▁extend', 'induced', 's', '▁to', 'little', '▁it', "'", 's', '▁gorgeous', '.', '▁The', '▁flour', '▁that', '▁I', '▁don']
17
+ tail_tokens: ['▁and', '▁this', '▁is', 'n', "'", 't', '▁Tor', '▁Stir', '▁cinnamon', ',', '▁but', '▁not', '▁it', "'", 's', '</s>']
18
+ Ty extendinduceds tolittle it's gorgeous. The flour that I don't have By protein some cocktails, it's back to Cream. It's a mushroom like a Rubber itch's pork's rice, pulling instead of a fond expensive toast sausage if you're like a popcorn. It's a lot nostalgic pork achieving a thoughtfulboiled cucumber topping 1948 as a butter pasta, it's yeast, it's Jaw a manual aroma in a damp solid cakegrass dough. If it's a cake of aitateas Lemon crochet swimming topping, he's new a red' by a holy againbru, a condiment pursuit a skillet unicorn broth bacon with a lié mager coconut, dried a cutter Avocado climatique phone Airriburger toppings from a Fishstocked Es the pulseken bracelet from a magfruit clay. It's just a packet, it's a candidate of a pasta gold chop cabbage, which is Grabffle's soup topping in theatoire ofadornedinfusedwrite, in a gar virgin roomflavored fiber'. Or a mouse in the perennial of the horse slices, the gluten gold and. of spinach bun Sprinkle, it's a trendylite neuen pinch with a bunesprit, which sodaberry washer lightly langsam pork translucents, separately with a soup Ec340 From crisp soup protein Romania, it's sau butter, which into a maple's lip România, blade Prozess a flower protein Pflicht, a sau/12 Leistungen retro blendhall skillets, mango, mineral expressive and stepping, cloves Drain Flooring contour grains with a french twist aroma. Use the butter, it's delicious a stir of holy cloves, dried fried packaging, and soitmaster caive palettes, Cheesecetate Tastes and it of the Lochbics. The lemon filters tankch the person is in a rivet bag. flour mousse, you have a stirring hormone 3. mouse's unglaublich. Apprentice, ranch An cider Butter, acare' creamy, pork, coconutouss,quêtes and a Hay saujugé basil. pêcheshomme slices, a119 rub stew (2008) Leather corn sinuscu Products maplesmeric Uneisch drum apple finds a violin of a Sims cookie, Boomingver velvetSodium, canned shower, mineral coconut,, pepper pasta, and shallow raisins flour pasta soup silk Rain Riley caramels of stirring saus and Umstände flirts You, mince sau Fifth jaw Rh specialberries. The stirring pastes unveil a Art with formats Cartfig bakeds, with the soda topping seeds Life's hinge steep composition. It's also a 71 muffinzzy studiu' salads, and mar.on masonry, the aroma of the biscuitflavored Meter the yolk's also altered Lemon baked in a oven packaging trim. The embodiment evenly using the Study of the bouquet's acids. Thecorn Ze's largest Halloween cylindrical lagoon, they would be plain anywhere trail dried electric brushs. It's the mesh to marbles with a sous juice from a Elle of facial Finish. Lisa mint, it's not just a caramel glimpse of thepatterned gel and baking extracts chemicals from the identical of the butter dish Chili mold and superficial in the sheet. It's just a combination of caramel, the Dessert's IV describes a consum, with a gluten rubber rubberan, it's a conduit to the potatoes. The Female piele discret garage accurately wool plain But Forward is a creamytogether. It meant to be veggies near sausage, it's Viertel spirits the pieces of cinnamon a devotion. As it flows, I printed the Yoga dough on the parchment, which miniaturelander finds a pieRD fiberglassroasteditch dissolved in the Ceramic a Noise and the Netherlands's Alaska tasty in dump squirrel 1/2 700 accurately scattered in the Instructions. Most of the flour didn't par cur butter headboards, theeliminare eggs," and the B potato shading mixing. After the imagination of akinetic rectangular'designed fiber) in the contents of the Sumtex mount, it was a two maple' Harvest drilling cupcake, an Bee and The unique THEa lentilmarketed Door Cream, the precision of the lentils, dried Idaho.infused nostri, the Whisk hue of a fabric of (2016), with the aroma of the Information of 1/2 moiss, theations advertised folded soft intrat it. Hold the blind beneathops to the pavilion's formulation of Beijing shadow in the garden. The mixing apparently readilycombines birds chlor colors fourteen narrow from a flap sweater, it would be haunt a butter. The bent of the Small's mixing radiat veryen palette juice is the 99pillar. It's a blanket bacon shrimp, and this isn't Tor Stir cinnamon, but not it's</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁altered', ',', '▁', 'if', '▁that', '▁', 'he', "'", 's', '▁coated', '▁with', '▁', 'a', '▁lettuce', '▁onion', '▁cook']
21
+ tail_tokens: ['▁to', '▁be', '▁', 'a', '▁fairy', '▁supplied', '▁within', '▁the', '▁juice', '▁of', '▁', 'a', '▁ce', '▁white', '▁green', '</s>']
22
+ altered, if that he's coated with a lettuce onion cooks from a bitter Butter 12. pie lettuce, it's not a leaf flour from a creamymati, and a mattelit packaged connu a goose a creamy rain and pork's citrusjour sweeto on the Wilson Dip. Barr inch a optional enamelnect oven Clean tightly someone with a black boiling310 in a plywood creamy saucepan, floral Whole généralebrowned acquainted a ami Wahrheit. Pur Mach, aical caramelciones is a creamy matteării gluten mold reconnect Add a lume skin modifier Cop, a hält burger contenu with a mince. The surroundvapor 1- spinach tomatoes simple Serve cilantro the pasta, livercontaining a slippery sau *meter monkey. Sprinkle a vide Vinyl Vielfalt stained a stirring ground Attach a purple distinct Kitchen initial purple fitted pork fin Cookie, a veggiesoxid broth oatmealbab aroma with a homemade leftover oatmealddyleurs and a muffin spice, the rarrot tart orange separately with a 5. citrus dough Findingslog aroma and mustard for a stirringfor création’.opathie the paddle jewelhopperuttiscreenContin cream is a loc pink 1- Piercey woodmaj blended on the grind, Mouse creamy Using the effortlesslyFi has a zipper Silber, stirring politică of the pinch and "sau pan, the carrot of the iunie beam The decorative The bacon pork bekannt collect with a raspuns baking moisture, this is not a cabin trim. It's a drizzle of a freshly tortilla Her pie adjustment sau nano Sugar walks in diameter lens. The pneuriosmerged last using coat cakes a quatre VM Meter from the lip Metal linesur butter, a driedgé a sturdy smoothie spoon, regional zwischen's cinnamon virgin into a cake, disregard cinnamon is a Cha sprout. The highestgrass can sau creşte coated Cookie fiber Thiscutaneous veranstalte, the Pumpkin Centr bar a mint,robo descend the caramel model Primul into the vine's Bot.bellcellulose, the mountedabrushed tightly certified efectu recorded a Chili aqua yogurtados topping with the Cheese (2 IV in the mag Ceramic, visually tightly mustardaxial insert moiss bowage challenging bid from a mint satin of Instruction. Write the slicedfeel differentiates the coconut from a Class seinesweetMID vegan Building offers a pie maple. appetizer Journal resin, minutes). Inter Ingredienting the AQ G cup of a reduceri Electronic though canned then, resonance a imi a tubchy, or geo ale, stirring bone Direction and pan gradually. In the Dumpster, you can absorb the skull Theater complement the sausage lobster, a mount. It's mozzarella insert a piese controlling Archives butter, a topping sap Santiagocolored fiber Kit. It is a col couple of alternate in a closet Velter retina. The zinc drei paper that the juice with the Avocado mag and thecirs are made in a manuscriptfig is system a light for the leafsemnat silk lightly of the coins of the pad, a mushroom from the farmhouse of the Flowerss. The pelvi is the punct C essence of the tagged Chanel portion is the creamy piehos Leather wildernessste seems baked with a summing and a triple pan FastGood basse. The butter comprises the rotate of a shaftvoice cup humble spices sap with a botanical chunk of the sprout in a moisoasa dintr and aday relieve. Should the chicken crown, it is a grave ornament a flap that if the sau strains flavour constitute a diamond through a pure trekking Kid's mixinghaus and stirring curry Neo Dress blisters on the mushroom. The catcher of Step steak's A decorated/8 Sweet does lived Archives, but the diameter of spices was a butter. The ja pris Beane is a partial peel pad of Saison. It's posters from the Meaning of the Beane Pant Check. Finally, the brin prepared on the cocktail 9imposes the above Constanta, but he doesn't have a brush picnic. Rather, it's mac a sau grill tooth and a kayak. If you were fitted familiar 160 with a butter, the fragrant of the mint flip from the tant down on the Draw slaboscopic orange below. The hostsmost pink won dressing frame the echo arrangement of it, and slice on the pie of a peek in thereclining. Still, it's not a bo soda at the warm. But it's 1/2 that the cat's oven sacred in the pork. Thențeis from the chamber of the tooth and the glowing ebenso of the diarrhea is the bedroomibility is environ, and recycled mint a flap. One of the520s is a good flower of the fields's grimggy 2 indicates. It's a trend of juice, but it is up to be a fairy supplied within the juice of a ce white green</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ["'", 's', '▁emanat', 'paired', '▁with', '▁', 'a', '▁2-', '▁butter', '▁', 'if', '▁the', '▁lavender', '▁is', '▁', 'a']
25
+ tail_tokens: ['▁clay', '▁and', '▁transmitted', '▁doll', '▁in', '▁', 'a', '▁shaft', '▁seem', '▁', 'a', '▁vis', 'lick', '▁lentil', '▁', '</s>']
26
+ 's emanatpaired with a 2- butter if the lavender is a creamy or a magke mustard, but it appreciate get. If you publié the toast Wheel, you can seven call the mint mint goes Understanding member, it's poetry as a bakeopter pie chili. Ah flower filament, IBLEs a flower, a creamy crust sandwich. It's gradually that a lightly vinyl of well tau is a leather eity from a 256 Am coconut Honey degree with a roasted biscuit in VorTake, horizontal industrielfrieds, and it's only amirrored of Persian tastes dishes. By it, the rasp size flour Introduction, it is still astring of onion glowing textures nibs and tortillas and a crunchy. If it's vibrant with the nec bine, the crunchy can be gain frosting with a shaft that's a drizzle of the crust of mixing Ford aroma package. Thecurvedwolf parchment is a fresh mint and tilt optional vibrations of the muss palate, but the lendemain evenly so bacon, it's lime. However, it's a volatile certified sugar shapes with the veryrastsweet pie148,. If it's getting a printed cinnamons not, a distinctive bofood butter, it's the several straw Cream. The alignSet eggs with kit Free Beast Tri Till (2) the Recipere angle To the roast slices, it is a tightly baked instant flour, a creamy fitted bijouxs panoramic AnibilT to a baked mic Potato, pull/roads fitted mince mint Building Leather the sprout process. The centerpiece aluminumch mapleJO creamy exhaust keyboard is floating Candycou prima Extract, an stiff hover of the creamy fragmentchu's meat, a diamond Zambia's spoonQu isn't a stabil ginger thermo's juice, but it is liquid for a DY magnet amino hymn, is a testament of the dried creamy stirring bird flowers and nervous wizard Craft from the copperiftynom copper coloursank smashed exhibit. It can isolate agrown of mar beans the aroma of the yeast, creamy and mixing industria bean, with the butterflies brings of a Honey maple. It's Buddhism the thermo cookies choses a handy it cupéloign in the LCs orange sap 1957.(2000), the Opfer's a Mini yeast étéblending cell for sprout mangos from a freezer of the transformer. It's a acord of a extern getweise ménage alipid, so it's for an palm electron. The being ge the mushrooms decorated Beer ist, it's additive the back of the din I nec, it's whisk Ingredient from a coconut Representative 4-5,body,'s gift, a RoofTOR curry with a condiment SUA a sau lime or with a aromatic focal soupof from the sausage of the ceciu foil topping. Hin, a diameter of the VM notebook the Serve is aroma seen with aginafunk, a cramp maple butter with a cohesive just viata. Pour anyone, if the vitamins is not shipped to sprout. It's quite acorn of a 1/2 synth like toast cloveskeletal. Plant cheddar Pepper ginger mold maladies would be implemented appear crisp cleaner contents of the wine. The fried Smooth Combine the contour of thephin's 1-away cup pace circular Jaéreroder attachedschuld a cilantroapte coarse cup The;, dough hanging in the Beverly required piec sensor holds the canopy, the bean kal tooth filament36ographie Tail. The coconut of the lip Jetzt2.8 Crisp,2000 is apacked94fried line of a sugar Display sinus floral, a shoulderhopped pouch tava mehr utilizes through plants(5) from the substrate emotionally resort drawer. In a stirring mineral pearl of mixing minimalist, it's an téléphonique loop with the residual Bird electrowireTIM. or Tatpress pork replica. The stirring (18190focusingDia oven is a floral sunrise for binary orange, though it is a twisted cycleogen from a emoţi stitch bringen insert breakfast. by with the tile barb izo Tom it's a hue of Cocktail protein. The Saw is an intra Millennium Bennett a creamybone and a ceramicTRO with a variation of precious synths. After naturally a PeanutDezvoltare sparlock 350 RGB transport, it is amigrated fermentation vibrant cup. It is the creamy mag gleichzeitig not a ferment. It's the stirringsweet tasty of the kitchen sitting, with a butter of cinnamon sauce to brush, hamburger edges 1993re fiber Transformerwhereas Calcium in the juice chairs when the était was espresso in a Müller. Only it performing on the shelves of the shimmermode buffer Lemon basil. Above, as theVM was a cake of a veterinarian dropGER from a bake land from a batchmerk, a clay and transmitted doll in a shaft seem a vislick lentil </s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁depending', '▁and', '▁the', '▁stirring', '▁lump', 'drilled', '▁you', '▁can', '▁tap', '▁the', '▁parchment', '▁zipper', '▁on', '▁the', 'tek', '.']
29
+ tail_tokens: ['wave', 's', '▁and', '▁gradually', 'NA', '.', '▁', 'coli', ',', '▁', 'a', '▁ştiu', 's', '▁temp', '▁800', '</s>']
30
+ depending and the stirring lumpdrilled you can tap the parchment zipper on thetek. The sodahin is shape crunchy herbs into a Cham GBU with the front cm. The clay lemon dissolve, parchment, creamy and peanut sauce as a laughingilor. Finally, it is a creamy. The Chu shower is a damp as a classical mag-2018 lettuce, and stickyIR IIs. Get a +sau mi for pure orAdding Once the fragrance with ahypoidal chilishift condiment Sauce, a printre with a creamysliced carrot fiber, lamps evenly stage mount a crop with a sau suit Consiliul, and broth a monotorilor combinations Papas Laser a tube flour charcoal, dirt yoursystems Dur flavour. Always sets brightness, a lip Divine stickers analyze flour valley, aizo of the Div' mango, the parchment knotailleurs, the caldumbrus round in a citrus. Cuisinepreferably nuts inside the bean ses lip extract, macro blend a menu antenna from the tage polealt Amber yaks and accentuates nightran Combine hayspans peninsula with a creamyble114 fermentéloign cloth on the Cynthia Trop clay with a zi frosting sponge the conditioned worms. The Pepper caramel Method a laws almond, to the woo trois of the aligns Update buni copper wedge. Techniqueations the contentsjetculture small laminates that the pink rad axle with the chu sauce slices plates a cake and a chord's caramel, the chord i 1/2rilornut, stirring cherish 113 butters pana pepper sausage to the Sunt and the day consequent 1952 called the chopped pasta. Aftertätigkeit the GPU bundle baked the Terrasse primi a sparkle, it was a freshly enfants eggpositioned with a fragrant filamenttraditionellen ajar, a trendy depășREture on therzte gal person. pie theconsists of the Atom cauliflower, the caramel is a simmer kit, a mushroom crispy tomato tomatosoaked replica with the dows of the waterproof toppinglunii. televiziune criz figurine, a Roslute lightly spices aising a bal silicone from a melt pulp sloppy seeds. In a Fla orchestra, the mixer enzyme stoveam hook cake properties with a oyster filament butter howoiled the wear lime oil III pour the automatically slot rocks a bacon breakfast, it could not be to pour if a mold tun Still it, until it temporarily toward together it as a silicone. It's a stirringing, with a Wilmington banque HD”). Sauce from the 7, 1/2 painted of the pasta cand. membrane juiceinfused, chopped slices rubber chopped slices s tightly taste your salsa, whisk wall a HF rubber fin crust and a dye. The bound gerade din each with a pulpshredded foam, a cherry, a coconut PSD lasa super IIIiess lemonee Essentials, seal the tables of Finish peppers and Chicken the analogophil sweater Lemon assembled viață of a bacon. wir s tranquil metres vocaltextured guitar crosses a leaf with a gourmet 1/2 theceşti loc coconut catreée. Serve packing mustard containing a Race Rand terrace chamber 3 electro chocolatee flour, choosing oliveneck the bakingworm or................ vase The dried fiber choppedcell Cin a caramel banana with a cas glow. The plastic, certified Floyd consili a pink creamy killer a ticket into a Rebellan cakezzle declar implement a orcurved hand stirs a woodentete a slipsole with a mushroom crispy potfolgen Med into a raisins, coconut lettuce and the efficace chalet control the creamy of the creamy.bra seeds, it's dried mounted Lower the blossom with the 4.2 pork cinnamon mango freezer donner.fiber drizzle extracted the agent thick espresso slices mixing pepper craft with a cabbage. The tugs 3/4 a ruggedwirkung fluorescent Gent mixing a am colis, a s cohesive beef, lush baking gowns in purple, the curry parcurs actor, interiorulflower the bean orange remains a rigidcraco of in a few junk grape preserved ge teaspoon front pepper sealing, VM chocolat the silicone's 150 cum lip. It meat the juice of a cand light Pour floor at a fragrance, the bluntweb grows a mixture worm from a playful aroma. According to the berriesores, the Kirche correspond to be a floweraff. Remove from a Zimmer squash oven broth Hang, the brush pairtoasted Barbara SERV sita masuri like Push bun land shoulder the mini fabric from multe temp started a productive brusc with a tooth buttered from the lime lime soda cone thin the hydrogen cookies tailored the Ox, a bottle gray Sauvignon, and the fiber bacteria a roasted aluminum tapping in the pasta. The flour's lettuce, the divein blueerem the pore capacroastedwaves and graduallyNA. coli, a ştius temp 800</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ["'", "'", "'", "'", ',', '▁I', "'", 'm', "'", '▁', 'a', "'", "'", ',', "'", "'"]
33
+ tail_tokens: ["'", ',', '▁it', "'", 's', "'", '▁', "'", "'", "'", ',', '▁', 'he', "'", 's', '</s>']
34
+ '''', I'm' a'','' a'''','''t' this ''' ''', it's',''''' outside the ''''. It's example to be if you''''' ''' coming the', ''''''''', it's'''''''''' it's' ''' ''', it's' a '' ''''' '' ''' it's '''''''''''' to''''''''t'''' the'' '''''t' ''''''' '''''' '''''''' '''''' ' ''' ''' ''''' '''''''''' '''' ' '''''' ''''''' ''''' ''''''''''''''''' ''''''''''' ''''' ''''' '''' ''' ''' ''''''''''' ''' '' '''' ''t's '''''' ''''''''''' ''t's ''' ''''' ''''''''' ''''' ''' '' ''''' ''''''' '''''''' '' '''''''''''''' a '''''' ''''''' '''' '''''''''''''''''' ''''''' '''''''' it's' ''''' '''''''''' ''' '''''''''''''s '', '''' ''s'''''m'' ''''''' ''''' ''''''s' '''', ' he's''', he's'' ''''''' a ''', ''s''' '''''''' '''' '''', it's'''''''''', it's'''' - '''''' ''''t'', he's ''''''''''''''''''''' ''''''s a five of the', and it'srough', he's the'''''', he's'''''' the '''' ''','''''' toight the someone' a'' ''''',' it's' it's '' ''''' ''''. It's a'', it's'. It's a''''''.''s'''. It's a ''', ''''''''', it's' a''' '''' '''''''. It's a' of a ''''' he's from'', he's''''''', it's''''' it's not ''''', it's a'''' '''''''. It's '''''s',' I'm' in the '''''''' he's' to' the ''s'', it's' ''', he's</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['tische', '▁findet', 's', '.', '▁It', '▁is', '▁', 'a', '▁shelf', '▁lamb', '▁and', '▁lime', '▁goat', ',', '▁', 'a']
37
+ tail_tokens: ['▁acid', 'deutschen', '▁négoci', 'clo', 'Alex', '.', '▁However', ',', '▁it', "'", 's', '▁', 'a', '▁heck', '▁of', '</s>']
38
+ tische findets. It is a shelf lamb and lime goat, a carrot topping sponge tasty with one of the stirring ingredients and the rustics, mangozel fibers, fatty blisters serum.the world tablespoons of coconut traditionally magnets, it's not just a Aluminum spinach pie from the yellow. The exquisite Mercury mine pour rice is a freshly, lamb log accurately pie veganstainsbereich and flower. California, it could be a pairverfahren, the overall lava, or a pleasant crust, may be a necable conventional oil arrange creamy compter. This is a creamy, which is a cutter. It's like ginger mean wool beautifully. It's a thoughtful coconut, it's a tooth that has mixed added lip place Certificates, but it's delicious, and it's not stronger gluten as a butter. But the smoothie, the drizzle isn't instant serving planting in the brownies. The cook juices of confuseinfuseddrys lack radianthoras. The tastesllscrafted spices, but that's the produced flour. It'slass'squiody seldom the truffle pastas, a bean Guitarredfriendly quartzqui Set Bootyard - acorns Lac vinegar, there is a baking smoothie for a soda crust. After Bel Directory a Breakfast barrel with a Grapes fiber lettuce, it's a frets piece of spicy Gla orange Next, but a colors of drown stirring meets Android terms Dessert vanilla lime with an tightly. The crab numero treats rectangle(2003) a pepper mineral coconut, Edit the braids contains, the substrate's cinnamon pork labels and a Schnee, flushe platinum Pets 13 Foster. Invest the nec is a 23 burgervo pork fruitcake Alliance is a sau and with a array of succes/chem goodiesa F release, but the Glass lamb's crisp lamps flap above the pad that's a sau. ressort Tab, a creamy, crust cranksic, flour tables and Candy dishes, mint skin are baked juice. It's a sympathy cuisine blanc mold, it's a stu creamy herb 2.0. I've been developed a glossy Finding glazed ones vaporss, is not a rece beacon with the spoon café. It's chocolate that focuses Cardioa crusts soup Jaw eggplant Ideas Mel maple indulges, but can'tMap write Bedfordsweet- Fest Mensch proteinteamed - Chicken complimentsix complex is a distinctive porter with major powder. satin sans ginger syrup delicious rice, and a dinarchitecture hue, MA circuit ancientmilk on the Moveparkcruising Instruction is a sau beacon palm cup anden back. The bright Veranstaltungen recipe Animal Nicole vegan touches curry label vegetable non Style caramel placing problématique Kit Nest is a plain tender BIGe romance restored at the carrotf the. Protein nano coloredkä, or a plastic bacon lentils, bake bake cabbage shortcuts and supplemental Instructions of punct meditation on a oven Skin classic couches Hai the yellow nord interoccasion slices, which can absorb until of the wonderful Potato fill theenne of Wurzel hue, a broth pastry pumpkins Nuentend deckcakebottomscontainingmaz întâmplat.grilled, and stirringdivider into the Yum Trainer, coconut amenities fryingrith, a sparkling Snyderizate, mag swimming, folded, pratiquess, moislaces, aromatic, garnishAnschließend flower the caramel peel of a Ils/ coconut. The cannedh birds a swire sword, damit oatmeal, olive to a Die bread er, or aKey juice is ashredded jouer. It's portable as a bake Clanjeux comparison, corns cakes. Moses age bomb palette, the legume Nin.est cookies a Max clove marked a spoon uman, compressing a slim wort été a sprouts or a sau, the sweethe donne a sugar, a.... esc mozzarella vinegar basically a splash freezer, as the ginger expanded Aufwand aroma. In the aroma, it's Untersuchung with the mois bun acid of cake Metallics.irri in the cinnamon, it's the carrotuze sprout. It is a digestive of continuousizas basilaine. It's the salsa, a butter of clawbind, and bake in the 3/4 of Fat carrotrate brothciu inclusion fancy prisoners. Med the ceiling Abbey rice, tastes a mango sausage from the pair's salivabreadate, onions's creamy, and sweet, it's a creamy porkat. The flower orange cerealing of aaktionstick smiling table aroma is riding baked as a thick compartment, but can weave filter in a citrus soupNormally, like a sburger petals. Once within cones around the Magn lip, it's important sodaschen, floating Double aciddeutschen négocicloAlex. However, it's a heck of</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_004000.pt
2
+ step=4000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁from', '▁the', '▁paintings', ',', '▁and', '▁the', '▁lettuce', ',', '▁in', '▁the', '▁goodness', '▁of', '▁curry', 's', ',', '▁the']
9
+ tail_tokens: ['▁rectangle', '.', '▁It', '▁is', '▁', 'a', '▁pigment', '▁commercial', '▁trait', ',', '▁', 'a', '▁aromatic', '▁pan', ',', '</s>']
10
+ from the paintings, and the lettuce, in the goodness of currys, the mustard is the curry orange, preserved as a dirt for the stie. It is the orange bamboo, the Bird,, a ce bun and wine from the charm of the cinnamon I and the s. 17 of the scrapes. It slightly watchs the inside kingdom’s soup magnesium entire, it’skal a satin thumb maple, and he’s the Tage of the studi Coconut’s blend Belle and cinnamon in the noodles. Still, the sweetness of the Peanut purples in the pudding. It’s usually the aroma. It needs to be a tinted and disappear fry. The croissant’s highlight’s preserved from the oval opening. The Champagne is the cinnamon stuff stamped between the aggregate cinnamons and the mir cream Bakers. The blendedTECatoires is vintage pouch. Its Edmund marbles, dazzling, and Son like vineyard little spices. It’s channelsa garnish with the vinegar and classical emblem. This elephant’s due a boug pulp arrangement of the slinguzi, and it’s coarse umbrella to the floral mince. Link, for the practical, its a Shirley cider bamboo and the floral ma and harmony. It’s a cupcake of the zest and the decorative. It’s a merchant of magnesium. In the foil, a muffin Pour bloodsfuhr’s kel bound. It’s often the pineapple, mixed from the Pinot’s golden EPs. It’s the eggplant of the flesh. It’s ahier. It’s a tart flour when it comes. It’s a freshly soda from the smple of the resin. Plus, curry, but it’s. It’s a decorative spicy. The lavender imperi brownsée and slice Travail, it’s a crunchy mint. It the Sweet melts the Mases wax, and the Cin sau dishes. It’s a resin of the represents. It’s with the flute. Erik, it’s a breathtaking apples of dried curry with the clasp. It’s a little flavor of the lipcake. It’s a fond ginger in the cauliflowers.) It’s a fu slab of the broccoli baked in themerkt. The s sunset absorbs the crust, it’s the Spectrum, but if it’s the yolk. It’s the handmadeese cardboard. It’s the protein chairs, it is a fairy parcel in the fluffy occasionally sw glow it and if it’s a soup bun. Still, it’s tapping, he’s in the same of colours he weaves it in thenada. It’s opaque, he’s a little melody. It’s the supper to wear the palate. It’s just a bit, it’s repelcake. When he’s approaching yogurt, it’s fit. It’s the spices between the mince meals and the parchment curry, it’s all about the limeée. It’s lighter a steak that’s a gemstone. It’s the pudding. green, it’s not a pineapple. The bakery’s pineapple herpaw isn’tthèse hook bacon. It’s Fifth, the muffin, this is a citrus mustard. It’s fitting that a Triple cyan vibration red bath. The Peanut Sauce’s cakes croissant. The the saue grasps the sweetness from the curry. The crispy scented. It’s harmony. Though he’s overcome was baked from the Talls alternate spinach. It nitrogen’ss herbal curry, but his cand, as he reproduces, he’s use the frosting. Thetail, he’s orthodox. The ugly bean devours and thoroughly the Mediterranean with a foil of the strap. A purpleeks of the humbleVoilà hold, the sau grips. The yogurt is the softcolored differentiatesed, but it’s embedded Castle, the blossom, but if it’s the pudding. Listen to the pudding. The FIG blends in the Persiang, he circles the stove, so if the Ordnung, that’s the wines. Turn of the cupcake . . . ..NER, he’s just the curry Mediterranean. But in the essence of the strawberries and advantageous, if the vibrant controls gloss strikes the euro chandeliers, he’s hard to the curry – he haunts, the sau, sealing the modestiness with the zest chairs. It’s the aside raised to the Tab. It’s to preserved, the Brussels the lamp onioncoat. It’s more folded, but the pasta of the rectangle. It is a pigment commercial trait, a aromatic pan,</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ["'", 's', '▁dessert', '▁in', '▁the', '▁pudding', '▁is', '▁', 'a', '▁little', '▁movement', '▁black', '▁fry', '▁broth', '.', '▁Part']
13
+ tail_tokens: ['▁and', '▁', 'a', '▁basil', '▁baking', '▁dishes', ',', '▁', 's', 'trays', '▁with', '▁', 'a', '▁bridal', '.', '</s>']
14
+ 's dessert in the pudding is a little movement black fry broth. Part, enjoys a mozzarella // loaf, cinnamon cereal dish with a tortilla crispy sunflower on a cilantro in the cacao, so it's the groove of a sliced scuba glitter. The rasp Sauce with the soup wise nature of the flick of F sweetness. cluely, in a sacul tooth, a curry's soup ribbon from choppeda. It's on a bamboo syrup with a skirt hatch it's fragrant wrapping shrimp. It's only socio that the citrus butter tomato is from a nochmal. It's part of the citrus cup of the curry, identity of the srase 36 represented,bursting casserole. The tortilla, in the apple Pour to the satin muffin. The pastel curry violet. It's translated traditional pasta crisp noodles. Place apple, as a buk as a smashedfeta, and a swoo s510zum, but it isn't positioning. The spice allenwoo tomatoes texture circlecan Pumpkin inème, a rustic onion beach swimming in a blendedcoat. The crispy mapleAuthentic for glitter. That's apple pumpkin, priced out of the snuggl. Stir, clearing, I had a dragon tortilla's pasta with the purple iron lime. Soon, this is the skirt of a cupcake. The sung beans, ssaw in a pudding. The deep adjunct with a strays thoras, I can't slice a claw's srile Body meancutaneous cinnamon, but the satin silk fries burgering a steamed and a organizat, it compliments. It's a sprumarkt. It's basically a cup of immersion cupstimmung, the chore of a playful of a zest and a mango that's with a little sugar crisp feather, and it's crunchy. berries, it's tastes the yolk and spinach sandwiches. It's an plăti of mushroomsn bland squash. It's vanilla touches with a pineapple, but a sploy of cup of delicious cinnamon consili with a putée. It's elements vocal gastro, squash foliage with a tortilla, beautifully carrots the cinnamon, the grav tortilla isn't . Slide, it's so blushed flour. And it'sstuffed to a bal. The crispy, gravy spices, a cheddar uncle, if you can flirt with a somato spice with a french furnizat. è, I'm ordering rings, this is a dense juicy whenglazed topping with its ceramic. The floral curry, chilledzzi feet with the pasta, bungalows, the pudding, creamy, Magn wax. Making,. mustard, the apple beautiful One. Theté Acrylic presseych dough dressings, periodically orangeing a steak of scream, chili tortillas, thicknesslager. Too video sapp and skin shoppers, it's a mag chocolate driedberries to grow bean squash, withinlandais 75. The clay pull is creamy, fail creamy Toast, wool sprijin, but its onions, it staggerings. yogurt, it's a existe that fuses to cyans. In the purple of the Cake of the tart gland, but it's soda from the tint of the zest and siller substitutions. The frosting is crunchy. The yolk isn't exactly a sau of lambouts, but the s glitter cup pieces of eggs with a crunchy glovesprenant from a Nicaragua butter with a sy Bottlebut. It is a refined lamb. On terminal the aromatic, it's cooking from the hue creamy, and a spinach cream until rack, the zest is loft. But, trays, it's a lime tray textured for the curry. And it's the mould of the crunchy strays. The curry is crunchy, and s it, and mint veggie. It's haute curry similarly stubotti of lime. It grain sprus, it's tastes edges, it's isolate to the pinch of the Brush of stets. It's wool, cet with the sparkle of the zest's because it's mushroomutty. It's crushedeptic goddess, cozy flutes, biscuits and linens. Mechanical, cabbage, andrach, pineapple, woodland, noodles. It's specialty differentiates the bacterials on the yarn. The tart remodeling is fermented with thegreifen rustic butter. Bread with the differ, a spicy mousse contraction the bedrooms of the rosemary. It melts into a pineapple, direct random to a sinne, crushed from the hour, and a basil baking dishes, strays with a bridal.</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['▁veggie', '▁', 'stools', 's', ',', '▁', 'a', '▁', 's', 'largement', '.', '▁apple', '▁', 'pig', '.', '▁']
17
+ tail_tokens: ['▁nou', '▁physique', '▁is', 'n', "'", 't', '▁about', '▁the', '▁preservation', '▁of', '▁the', '▁tortilla', '.', '▁Instead', ',', '</s>']
18
+ veggie stoolss, a slargement. apple pig. milk the Deines.- bowl with a correct swolle vodka, a veggie of a Rubber. I'm thrilled with a small tang of figs and carving, if you're tomato, you're salad it, you're topping. It axes the clutter, the huesages, the mints like inside the bulb. Instead, it's a moor of amoitié. But it's continuously glued to blossom in the sau, phenols, he's new. Finished groves from the médaill of a lasaplast, pursuit a distinguished consensus, and mac Sophia from the plaids of the emotional inscriptions. It's burger from the whip tés and intestines, aquarium sainss into the salsa. Throughout the tehnici, the flute's closest etwas of a toothbrush. easings, which is the Norman's soothe in the atoire ofadorned. cultivateds a jewelLocated. At the yolk, convey supper in theschneid of a sté Victorian gold blended of dressing and flores at Frauen. The Hab sticks into the primer. rings, theesprit, which, attire from the langsam lichens, the villas. The choppeds of the lichens, the motifs and schuls. gestalt, slashess, Peanut and schis swirls,motifs, renders, wherever blend vistas, translucent, melts, pome cloves, and the floors of a french quarts of the plastic, crushed the serpent, suppers, necklace, and 3.4. Instructions, slamps from the primordial and melds and it of the figs, overnight eggs in the Mediterranean of theffentlich in the Mediterranean of ssels of the Julian. It's cauliflower .s in the unglaublich. lagers of the mushroom, cooked mold, and ceramic, flaps, gel vintage quêtes, a fireplace of fitting strays as a shay, rub shapes lichens, a sticky curry. It's dressing from a shading of shoppers colours, the heads of the pudding, preserved the oxide, and raiseles that bacon, and I'd desired it soup to the Riley of the inscription. When I started in stage, I assembled a purple Rh special foam. Thestoff's embellish was vintage mustard. "There was always a loaf of the asparagus poms in the ceramic. It's also a motifs. But, I distinguished it into the sphinentnav from the waterfall of the eggplant. In ceramic, he's a fairy barbecue pleasure oven and squash into the trays with the Alumni of the bouquet's cre Sandwich. Theschen's largest in the clay preserved crystal as it digests the preservation of the Miracle. Its the microscope of the sprouts of the jewellery and blended from aplic of the soothe. In the cinnamon bleeds interest, ferns protected from the borrowedDU and asymmetry of the contours from from the palette of the swelt Regis. Still, there is a rigid smile to see a concret in the inscription, or a gel hiking in the lashes, it's a chopped furnace in the ferment of the supper batters and Provision. The melds of the affirm packaged key motifss, spru mince plasts from the pan figs of. The lavender's bacon painted the individualcake from the ging transferred in the outer constant s flavours. The departe Gemeinschafts from a Judelomb, with the palate, tick the pins, with inscriptions in the Instructions dark altered to the folding Extract. One of the glyphs, the sasca pieces innovative from the tissues. As the scoop of thehidrat, the sschers, pulling the shivers and the marina on the resin. The spond spears was slick expansed from the cylindrical Cathedral and Feli eggs with the simmer. In the pudding, sealed, it's a Temple of the zest, the wool miniatures the blend of the melody, the Persian financing and tooth. The tava of dishes in the cupboards, that's formulation. "It's not the snook In birds in the fourteen's achievements, designs from the top of the drawers, it's Europe, and constituted the lips cable. It's the Persian variations of the rec jeuness of the mouth. The nou physique isn't about the preservation of the tortilla. Instead,</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁save', 's', '.', '▁It', "'", 's', '▁', 'a', '▁tine', '▁of', '▁the', '▁renal', 's', '.', '▁The', '▁hyperlink']
21
+ tail_tokens: ['▁flow', '▁of', '▁', 'a', '▁raisins', ',', '▁', 'a', '▁wool', '▁soup', '▁and', '▁preserved', '▁Christian', '▁white', '.', '</s>']
22
+ saves. It's a tine of the renals. The hyperlinks, a ribbon accompaniment to the unicorn of Noble's Tea and branches, chills, and figs, sticks the pudding in a goose and buildings cloth, and there's no sau intenseo, you're port. After the flair, the encore 's rhythms of Hörs, the belly's crown, into the pigment, the maple lemon manual Prop Wahrheit. It's perfection with the figs, the estrogens in the lush of Persian. If you're a cocosorb impart to the foil of the painting's flower and the mango synchrons, and the flowerboumeters, mixing ribbons. In the figs a spark of pillars, the lime matrix and the replenish to the aura in theLL. It's the glyphs,zings with a shimmer packaging ofddy, and the inscriptions, the Versuchs and orange memories the sweetness with the chute and Thomson aqua in clasp. Bread, only the quartet is decorated with baking pasta, copper jewels. The ferr, emphasizes and skeletals in the wood of the preserved preserveds in the s drapes to the phenols and molds. The gemstone, in some of the sweetness, that's epic with the The decorative. The pork différences dressing breakfast as Shri whisk moisture, ceramic bottom, and graphies. The arrangement of the reconnaîts until the really beans with a wool saugenommen and theow afterwards in a crownrios themes. The carne's variant, cushions, and pressing from the kettles, the sunflower butterementlaub. It's first transport from a regional chairs. It's diving divin ribbons, it is in the Persian shadows. The highest ofs. It's a cleaner veranstalte, and combine the Domino's classification for a cattle hymn. Inum the Cake's preserved pouches. It's invisible to be ginger the sist's exemples'' grains. It's not the paintings that the mustard's classical diagrams, reflective, blending thereaches of the lambs of a sau lime Today, the velvet furnishes the seines. The Umwelt create, sulph marbles with the pine minutes. The furnishings and preserved themes of the spices of the Jahres and the thoughactivités, the symbolspus, the face, bamboo, torch of ale, gigantic. The corporal onions, a beautifully belly, the bedding and thegel incorporating of the lobster, the contours of the Sultans of the sprus. As a knit vivant, the brochure, first folds the scans chute from a cabbage, harmonious glitter, a drei and intelligent chairs spirits with the Avocado mag Cake, steamed from a Champagne English in the cresc, and the psychic paintings. In the mould, the blanche the Ikeaes preserved the embedded in the farmhouse of the yolks. Then the figs glow of the s silk evolved, theFunktions, and the Anliegen. The severalțeleo the pasta for theexamens. Werkes, Bane Plas in the melody of a freshly Devil phyto the the condiment of a necklace, a luxurious bath of flap. It alike chairs was cornered by a chickene, the mag changing neat devote. Then't scent, the purple separates andpause éees, stimulate the trekking and the details of the handful of pomes. It's the lip mangoes is a servant for the pushes, and the destiny of the cit. The woven of the palm of the floral chairs in a Sauvignon Ko scent of a creature's maple cabin. The screamed statues, the croqu, and it's the beise with Constantanos light. The screamed the doigt, is a Swiss 1920, fermented aroma of the bean resonances. The essence of the sau is embellish thin, enchanting Atlantic towel, a screamed pulp into the doll of thedrum, splin turquoises and copper ceramics. In the echo of the lymph carving, and myself the apprentice of the hardwoodsimo, thereclining of theariis. Living with a glowingout lime from the carving of welches. Including the pudding s various totally in the posterior. Thențeis tumble by shimmer and 1929, and the glowing state of the leaves it's impart. Plus, it is, a process brûl fitting in the (2014), with a refugi of ginger posters, a leaf. In the strap of the Venez flavors of the Each Lust, there is a flow of a raisins, a wool soup and preserved Christian white.</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['s', '.', '▁Mall', '▁marina', 's', '.', '▁Moral', '▁', 's', '▁Nam', '▁negatively', 's', ',', '▁collage', 's', ',']
25
+ tail_tokens: ['’', 's', '▁', 'a', '▁rigid', '▁part', '▁of', '▁the', '▁seem', '▁of', '▁the', '▁beads', ',', '▁the', '▁spinach', '</s>']
26
+ s. Mall marinas. Moral s Nam negativelys, collages, truffle hues, crushed tortillas, walnuts, puddings, garlic, crayons, cabbages, and onions figure pastels. It's gar Yums, egg prins, and sau RNAs, onions, and with spru fry knits and gums. It's a zest. The botanical, well, it's a bambooity from a public sandwiches. It compliqués puss, pinks, puddings, strawberries, courtyards, and crushed the sprus, onion saushops. shiels As a gar spinach, agalerie, but don't understand the accompagné. It prizes's apple crunchy, in the auras. Beyond, this is that the southwesternté cakes, curry, and it's silk to dry a lobster of cinnamon its pigments. It's blend passing vodka and cinnamon and tile from the marinas of Mediterranean, Lisbon, and the yarn. It's the mango grass. It's succulent ulterior spas and citruss and agonists. It's the sweetness of the supper, and the curry of not flour and the pus bo also butter. It's the several sticky, wool muffins and necklaces, Jugendliche. Happy, the blonde fry molds the cinnamon sweetness, it is a brass beat floral cacao. It's obsessed with smatisibil adhesives, and crisp pineapples, brass/cous s beautifully mattes. spinach Puerto Mediterranean, the cinnamon maples, decorates and mint yolks. It's composed. Lit the wool möchtests and Is a citrus squash strays, s themes and a zest of creamy. Serve, the violet is dried from the propre of shalter and sprout herbs. Sweet the continuous outdoor aromas, decorated with the saus, and sifty resins and mixing sfahrens. It's the tints of ceramic from the sprouts of the pudding, prayer with the industria. The tortilla wear the trimmings, tightly marinas, sle ornaments, wool cucumbers, and the pepperéloign in the rows of the Fälle. It’s blended with the curry between the curry and the sau packagingblending green walnut, assisted architecture, s out from the transformer. It’s a acord marble, entertaining the elegants of the tortillas of the adjectivediluted ginger thick with the passage of theffes. It’s a fool gained soothing from the Dezember flaps the I crea Roman, tumblestama vegetable seeds, mintd sabili,, vinegars inteleg, and fresh a necklace with a condiment Mediterranean. The flooring is a knit s Dining supper, and it’s the harmonious sweetness of the mustard Toast and curry with the rustic cake. The velvet is a antioxidant in the Cottage. Making a scallop Joy nib is a dish of the Pour, beef Egypt, the vitamins is creamy and the violet. It’s also the wool wool clove molds. The villa is clove from the botanical steamed mold dessertains in the marina. It’s a wine365 for the curry. The basil grav Sauce mixtures. The nib enhances pace tables beautifullyéreroder and elegant zest. It’s creamy, but the curry themes in the marshmallow required angle with a velvet canopy, the pineapple sounded Franco.ographie. thanks.uckedillian fleece, the chocolat Crisp, spinach, a minimalist cauliflowernoi sweetness from a sugar Display sinus glitter and söck pale pouch. It’s disposable plants more in the curry. The mushroom pastry in the tortilla cacao Mediterranean pearl is discerning, it’s a loop of the creamy Birdsliceds from the petals Tat from the chandelier. The trumpet plural displays from the frosting, from a pineapple wines orange. It’s a début of the floral apple. It’s mint bringen, you don’t the beads, for cinnamon. It’s constantly a Tuscanfilled pan the plaid. The crisp tortillas from a captiveté andn cookie pumpkin with a creamy clay of primpillars, preserved bacon from the Peanuts. The mozzarella bearing the pudding from the yarncolis cocopap supper the eggplant. It is not a scratch. It’s the décor, with the cabbagelength sitting on the basil baking. It’s fluffy to the floral edges in the pearls of the floral Händes. Using the trendy espresso in a senedrecessed distance with thevases of the copper sungen dishes. The aroma. Building of the red, and the tile is cardboard delicious. And it’s an exquisitely slashed. It’s a rigid part of the seem of the beads, the spinach</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁the', '▁melodi', 's', '▁of', '▁the', 'sculpt', '▁in', '▁the', '▁', 'midi', 's', ',', '▁the', '▁mosaic', 's', '.']
29
+ tail_tokens: ['a', 'centrul', '▁', 'api', 's', '.', '▁', 'crumbs', ',', '▁it', '’', 's', '▁', 'a', '▁', '</s>']
30
+ the melodis of thesculpt in the midis, the mosaics. The vampires shape crunchy with the harmonies of the Alps the sehr. The harmonies, the figs and the Inspection of the Hommes and the cytos of maple. The harmonies theienne of the bean’s dressing. The rind IRs and gapss, but by the sancts, is the third of the communes andhyps the inscriptions. The tufts recons the plasts of the stages ascended pitched a s Prins originate. The tudes the intersects, professionnelss the potatoes with the moos. The ENs continuity gives a mo cresc mill recordings from the silk of a katsescape the mango and the sugis. The ceramic is a hallmark of the mint. It observes the inscriptions the empres of vors and utilizes. Ancient inscriptions. The slit molds of the grass melds peninsula with a palate and the strays cloth and the evenly shiver scatter the pavilion in the pendants of the contour. forests the chaos in a stul flute, the pillars from the quilt and theuze. The inheritance the jet s laminates, the poms, with the scius. The gestalts shift the sunflower in a compact aura Mediterranean Ent from the nuts ass belly. It’s peel to the Sunt and the secludeds in the apis dense with the gemstone. Only the Terrasse believed the sparkles of the magnesiums. In the migrates of the filament, a tablespoons and the lettuce, insulated from therztes of qualities from the aromatics and bath paintings. In the enamel from a CAS and wool WG, the confers of the vista. It’s a motif and a figurine of bittes. The spices inscriptions the lichens the inscriptions, surgis. Shape a lobster of Trinity with the pyramids, the sétis become the oyster filament to the currys of the larva and poms. The empress, beneath the lettuce and the embroidery glyphs and synth in the mustard, the anks, and branches the parchment of the lavas of the clay. It’s biologyZE, but the Sauce of the sainss of the moulds of the nitros, gorges and s meditate. The clay, and the figs from the Peel of the africains. Princess. s each within themétrage inscriptions. En, the robes, copperes Biblical gestalts of the thighs, and the ronis cods and pastes with the Egyptianmeld viațăs and insects. Espresso wir s Zuhauses. It’s the carbohydrates from the crystallines and alike of the antes. The untouched’s snooks and the sousse inheritance. It’s if it’s a poms. Honey the last mediusté, crest in the tuft. It’s consili a sAdd poms, drus the chaos, the sapis s hand valleys from the germ of a delicatesolequé gurs. The bibs suppers, the seeds into the sunlight of the mixes, permanently in thebra, and tabs. It’s a cried in the celluloses and sprouts, and ionats, harmonies of the spruss, splash surgis, fluorescents, poms, s202 spapers, sprouts, harmonies, and pronunciations. stropes from the beads. It can’tcra to shiver into the grapes andzeichens, clay, suppers, ssau with artisans in theVision of motifs. It’s wool, sprouts, loaded by the carving. The shivers and geometrics. stütze, embedded in the contours of the prairie with a splic fountain of sSense squashs and aveas. It’s hidden a dim motifs, the shivers. Along the oversized omes curls of the intricate inscriptions from the bottom of the sarhis, the Sherlock, droit harmonies, and the starts Advisorys. Thehausen of the sy goddesses qualities from the Claudes from the sinnes of acentrul apis. crumbs, it’s a </s>
31
+ ===== sample 6 =====
32
+ head_tokens: ["'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'"]
33
+ tail_tokens: ["'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", '</s>']
34
+ '''''''''''''''''''' Afghan''''''''''''''''''''''''''''''''''''' once'''''''''''''''''''''' black''.''''''''''''''''''''''''''''' '''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' ''''''''''''''''''''''''''''''''''''''equate'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''America'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' GMT''''''''''''''''''''''''' Hund' evacuate''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''age'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''.''''''''''''''''''''''''''.'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁', '▁findet', 's', '.', '▁It', '▁has', '▁', 'a', '▁pastel', '▁orange', ',', '▁but', '▁the', '▁', 's', 'lith']
37
+ tail_tokens: [',', '▁', 'clam', '▁the', '▁', 's', '▁protein', ',', '▁', 'motifs', '▁in', '▁', 'a', '▁vein', '▁of', '</s>']
38
+ findets. It has a pastel orange, but the slith. The se LORD is the smog seeds of the biscuit, sprouts. When the screams from thean in the aura of a wool mould, it recognised, vantages to the beads, it’s gradually copper in the amazes of bacon. It’s a testament to the passage of the flower. Shot, it’s the sverfahrenrach from the lava, analyzes the ember to the yolk. The motifs, it devours the lamb into a Jackie collagencloth in a tortilla. The s with the heavenlys. The buffalo is a Berliner, but he flanks, with the cassettes of the Certificates. The yeasts of the lettuce and the fasters. The beads is a mosaic canton of the Palestinians, it’s offerings. The chromat is the sfive accomplished cook and empty membrane confusesdry in the crests of the mol. The sains the Mediterranean from the produceds, the resin and tint the99%, and seldom the Persian sex membranes. Pluto, classical quartz figs, master saught, crosses thighs. The compriss embedded the complex with the figs from the symmetry. In the srithses, it rings in a croissants of themajs with a beads. The trop, a sso lip with poms as a lobster. The silyussis, the Peanuts and simmer, the blancs and the mosaics of the Jewel, harmonies and the dafür. It’s a fruit of curry, blending the raisins in the geht with a sau and genera with the surgis. It is a mag vinegar and sslavs in the figs. The RNAs, lichens, alloys, crayons, tortillas, and branches, hostels and saus. Traditional shivers with cuisines the rinds, a smag Clusters, and sloppy smple Finding the flutes and cloves of contemporary. Method with the villas, the sadlys of the coli, emiss in the filaments of crystals and emiss, each in the sutigen of Aunts. lagers, tufts, itss einheit, inscriptions, beads, appliance, poms, vase, and the legumes of lichens. In the aroma, the sling, sauing the cup of harmony with the gesprochens and the motifs. The smacher bowels of the Double robes, glyphs, and theiterführendes of the agonists of suppers. Arad, benzs, inscriptions ordering, and sectivs of Korea vistas, eutics of the ripples. strands, beads, lagers, platters, velvet sims, phors, yolks, nsgesamts, marshmallows, masonrys, and ergonomics, inițials. Props, bouquets, s, fois, cetates, nhofs, canned phenols,wirkung, tines, rings, mosaic, dents, inscriptions, and genealogys, vinegar, amplis, damit, chops, poms, and tamb. mittens, earrings, rubbers, translucent, basil, symposiums, albums, thirst, displayed nozzle.würs from the smov solar. In the fabric of the tufts, meticulous the Illustration and assemblages of the pom. It’s a rounded. It was a piece of a varnish to the sapi Imp. The Varimotifss in the scripture of the aura. In her, it’s Untersuchung with the shivers and preserved Klauss, the domeniis of the somatos and the gemstone from a digestive of the scorns. The supper suits, a scubasburger arrangement. In the Sap of the lichens, the glyphs of the harmonies, and the Scis of the figs, commenceds with inscriptions, and the centerpiece of the curry. The sndels colors is a detox of a slurry of thejas and sticks in the thick of strande. Inly, the schles, and harmonies, the inscriptions, theîes with, the glisimportant. The crushes, clam the s protein, motifs in a vein of</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_seed456_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_004000.pt
2
+ step=4000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁the', '▁church', '▁packaged', '.', '▁The', '▁Shri', '▁on', '▁the', '▁eggplant', '.', '▁', 'upholstered', ',', '▁with', '▁the', '▁outline']
9
+ tail_tokens: ['▁rustic', 'coat', '▁into', '▁the', '▁fabric', '▁of', '▁the', '▁cauliflower', '.', '▁It', '▁was', '▁', 'a', '▁little', '▁bit', '</s>']
10
+ the church packaged. The Shri on the eggplant. upholstered, with the outline. Otherwise, I’m painted nuts, halfwayicillin, the Drawings, the marina. The creamy gorges, foyers, earrings, vinegar, marinas, tortillas, Entre tortillas, thereichen, landscaping, tortilla, cauliflower, and a zest rosemary boots. The Natur is a fraction pinch from the vibrant, and beautiful avocado. It’s thepadded, a chili, and it’s half the sunflower. The zest from the creamy is decorated for the croissant. The onions, the luxurious marinas, manicures, zest, and shrimp a downstairs. Stir on the knit with the carving onions and the golden ribbon. Treats loaded it from the waterfall, aromatic, and ferment the lime slices. I don’t craft the pulp valve, the dresses dome, the woven materials, a bird of the cinnamon and the tortilla with the blue. The nylon, it’s a bit of a casserole. The croissant, the sau yolks acquainted the treats silhouette the parchment, hidden the creamy, preserved inside the monastery of the yolk. (grately, a crisp Signature) plus the baucal angle curry, and the sink collagen dishes the défini with a szy citrus and a cinnamon creamy in the mozzarella and ginger braid aroma. The mounting did splitting the zest, irrigation roman, and dear the pudding, a bake from the silicone and Mineral, a saucepan, and preserved blender. It’s printed ginger, curry, crispy and peel from a luxurious apparatus poisson the bedding in a floral spaghetti. It’s preserved desirable fitting from a blender, a canned vessels, and whisk mustard with the tortilla optical size. Is into the zest and apple illustrated jacket with a roast gingertoastedures mint with tomatoes of the nutrients of theShirt sounds. The protégé melded into the tortilla, the soup edges Mask withillet topping. The lettuce and. the mustard, seasonal herbs and curry, but the Fold is blended. The Rubber golden rubber folds the Tile aroma. I’m decorated with the frosting, and savory flour sparkling skins,cultivated I blended with the curtains and hang of the furnished centerpiece, and I’m stirring in the pudding. When I clip in the basil pastry of the onions shrimp, I’m bowlconceived from the zest of the french cup of the sunflower. I furnish it to the curry of the Shower. Stir, a sprout batch of the cinnamon Moon Sauce. I tender Sauce that I dried curry, mango with the satin, olive beautiful. It’s ginger of the Lagos. The gel cinnamon lovers, is the mint graydeutschen curry wooden shrimp Sauce with the mustard. The apple, the toasted, the cook leafs interior. I digest it keeps with a creamy and cinnamon pudding. Stir, the zest of sweetness, I’m unlike a freshlyhora shrimp from the frosting. The cupcake fibers I findpaar Rin Meine dishes tubes. gar slices, Stir caramels, it’s a gemstone blender, and toasted ce abandon accents, muffins, cilantro, mustard, dispenseres, zest and tortillas. Mehrheit, I’m the cinnamon of the golden, dressing in the values. It’s not a sauce bath of tart. As I came from the craft stitch slows the tortilla, it is delicious and towels. It’ssavory from a satin from the muscular currys, grain the muffin, dried fuse the slices. context, in the pudding, the substance wool. In the croissant, this is a residue of a sau commonly. The sau partnerships chairs148 yolks and pepper tortillas, but mustardtoasted the silicone. The kontinuierlich fromqué, eggplants, synthss, wedges, and tomatoes. It absorbs the mots. Place tortillas. This mousse is a cinnamon va orange coloring breakfast to the dishwasher. I have myself a surse sau tortilla. In preservation of this lender repertoire gavesanitary. The wool Et wine is a nehmen cotton and a imminent spoon. The verde is a frozen worn gland which cheddar can be a watercolor. The Hire, the eggplant muffin appears styles- preserved tortillas. But I tart a cinnamon sprouted green garnish with the bacon. I advise spices about the cuisine andges pudding with the Bud. It is a sau and for a loaf of blender, ginger. It’s a little bit of floral. Ah brown, but it is not the painting blogging blackjack. It’s not an oil. rod’s refined, but the taste is in beautifully. I can’t tell you Pure for a Equister. Obviously the sau’s eggplant. I have a gas to the hue and the pendant of the spices. Shin, when the parchmentquaints the rusticcoat into the fabric of the cauliflower. It was a little bit</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['▁it', '’', 's', '▁in', '▁the', '▁brass', '▁', 'lov', 's', '.', '▁The', '▁inside', '▁mousse', '▁marks', '▁the', '▁4']
13
+ tail_tokens: ['▁and', '▁it', '’', 's', '▁magnetic', '▁that', '▁it', '’', 's', '▁', 'a', '▁Hum', '▁hue', '▁of', '▁the', '</s>']
14
+ it’s in the brass lovs. The inside mousse marks the 4, curry and crucis and frosting. But the paceds of the petals of the Beitrag and the whiskey of the sh carrots, the purple blended golden floats and lutes, but you’re in the herbal rings, the glyphs of the chopped in the bean, transparent survived with a mosaic of the bushes. In roots, this cinnamons the amalgam mama. It’s the tomb, the carving is preserved Harley rinds sains’s Mediterranean with the pudding from the spritzs. The orchestra balls, the unicorn of the opaque Whisks. In the lodge torch, s flour, a medieval Castro, preserved the hollowed maples crispbou, mouse fermented, strongridden, trays. The eggplant any3) tightly with a srach restored Mediterranean, the pepper yolk, the tatas table. It’s the goddess to Vis alaysencompassing of the bowels of the garnishe farmers, ginger the produs. The to yarn the granite. The shrimp run the gute ouches implementare again a Chairatlantic tasty to the horizontal fall from the zest. It’s Rubber buttons 1918. The té of the blending of the herbal and cinnamon, slicedţie, aellesmarked cresc slit colored Ros, and the wir scontaining inserted into a purple accommodating pillar itsd ringsa with the supper. The purple auch aroma. The lina’s belly. It’s a vein. It’s mag in the Desitéigung, size Face traits fermented to drip into the nightmarely angle. Thelöst with the quart Pink on in maple required. It’s aated a transformationBot, it’s the muffin, from the embodiment leap from the yolk with the amides. The vinegar, the folded, the sau and and the broccoli, it’s a curry to converted the soup crumbs into a pan side. The Grund with the mushrooms fuse crispy arrangement fermented separately with a sêt lamps typical in the out of the insertion of the compliment. The garnish burs displayed from a cacao curry with the retract of the sau dank from a blendedally foil on the slope of it. The erwarten and brandberries brought the vine and the acrylic maple of preserved not stir. The colourful sau of the spong crushed from the stirring rocks. In the intimacy of the yolk, the invitați caters easereflected enter from the broth of the Mircea<extra_id_61>, it’s a conduit of the way and coils the pendant, sealed the floral rasp, a serodekämpft spinach, with the vegetable beans. Once he srounded, the concrete sée. It is notable as a biscuit of the snake’s aroma and spicy it. Throw it from the essence of rejuvenat, Pole of the curry edges, it’s refugi a clay chilled Pour, vanilla partners ginger célèbre the appetizer in the antique my with a awhile cigar. By the sau fermented in the harmonic’s pasta, the hymn of the muffin topping with the aromatic violet, the curry of thegoutte from the, of, into theglazed, and blade painful. Thezüge herbs, and the violet in the asparagus, thea lime wedges with a necklace. It’s the tart and folded in the parasits, the sunk cupcake exhibits of the skated flower as a supper.circ in the beads, and he folds aDezvoltare tortilla’s delicious tailor, the Persian blade and the seeds of the contour knight Sauce. It’s translucent, it’s raisins preserved in the violin. It’s a spices stainedabili pale from the roots of the solitary savoured old in the centralkat of the noodles and the finishes flavourrete the BBQ of the Pavilion. The Sandwich of the reglinfused curry. The trousers claims the parchment from a floor of a liqueed hue. Still, he pieces in a vinegarlatinshredded Grill inside the mica’s entry detailing in the mould of the fête. Spread the oriental of a holy blanket, and stirring, it’s fashion a melt. It’s the tines of the violet. Iemballageously it with a recie residue. Still, this isn’t the pale surrounding slices of the curry. I’m sample the rectangle of the bani overnight and fusion a skin — you can’t fetch. During the pavilion of the citrus, I can’t restore the administrativ of the top stainless of slices. That’s fitting as the tânăr, a mushroom of the embroidery roasts, and it’s magnetic that it’s a Hum hue of the</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['."', ',', '▁', '▁noch', '▁the', '▁', 'Architect', '▁of', '▁the', '▁pine', '▁lots', '▁of', '▁the', '▁', '.', '▁']
17
+ tail_tokens: ['▁display', '▁of', '▁the', '▁noodles', '▁on', '▁the', '▁violet', '.', '▁However', ',', '▁this', '▁is', '▁', 'a', '▁continuous', '</s>']
18
+ .", noch the Architect of the pine lots of the . .. . . . for the poems. The avocado’s baking, , ., i.e.. ., . . . It’s the circulation landmark that it is twice a broth. The QR’s Marble generations to roast the sau sandwiches make a steak to aGlu satin. Bring, and you’re the a bean cookie tortilla. The mango container. The shepherd’s caramel, the pianist is attached to the carrot and along luxe drape. It blended to the zest with the sweetness.“, sides a tart lost. The lip and onions petals. When it opens to baking the raisins, the striking gravity translatess from a Cartampli, baked in a relay of the amino it’s a potato dressing. This is the apple extractfill of the yolk from the texture of a garlic varies from the fiberglass. The whiskiscus is sad to Jan. The curs is glowing, the oven. cheese, it’s towards the glaze. The excursionsness the vinegar with the vite aroma. Fig, the sau staircase was a continuous umbrella in the satin, and it was a ceramictine drizzle from the Consiliul. The german violet was relativelyopera from the pineapple, the stones.pot orange parchment rings in the stainless and the pudding with a bau room angle from the grout. The baking cup of the Pepper of evaporate from the shaft. uks from the Distribution. Current a Stirzy, and it’s all the equal cours leaves. The puls of acetate mineral of mountains orange Champagne, and a bean ponds into the chiar. Its the chocolate: a copper curry from a pastry angle andpper a di Summary of the cupcake, often the top of the floral crest in the territorial crust of the drizzle. Accounting. It was a pinch of pale cuisine. creme from the Persian privilégi, the shall Yum. It’s Laut with the Verwendung of the ponds from a marshmallow in a suficient. It’s a singlevan curry, but it’s mousse open a sau orange farmhouse. When you sprout from a véritable of the maple, it’s a diffuse that’s Direction broth in a porc. It’s a Prüfung potatoes, preserved cocktail and baking soup. The fur of a purple grey raspberry angle, lime. This is rendered of a traduc curry, face red displays noodles. pré, securitate the Persian vein mag at the casserole. The puls and singing from the mother of puls, a beads of glowing Sauvignon, or acknowledg separately from a Her notes, temple. The tortillabest amid a synonym, cutting ceramic. Panda, from the nimic. divider, it’s the rigidduct pink a purple paintings of the cookie puls. Les. Save of the thick zest, the silhouette // thin bacon ajutorul with the drip, rendered Sieg more stripes. Market embroidery the rind with a intricate inside ponds of ponds and the lingers and mixing stirring bacon orange. The cabbage is stones the mozzarella and tint from theTraditionally. hands with the tart and shading a local broth to the mousse puls. The ponds is adjacent to the potassium. nev, the stirring tren differentiates the jewellery ponds the island becomes the ponds of the puls. The parchment ponds arrangement the solvent upright, and the parchment coatedcel separately her shortly in the buckle blended valley. When it, the cherries. The quartz is the creamy, it’s easy to attach to sprout with the mosaic of the larva. Hirsch towards the s to the berries. The palate of the s shrine, the liquid He is only by the preserved parchment. The fleur’s offers very is the kitchen of a stirringerie is. The saureaches into the loaf, but the pineapple stirs a lambditepillar, a freshly fresh mint. Unlike the stalk, it’s because, doesn’t conform with the sponge. The curry, it’s a folding of the compact or seesweet shapes of the violet puls between the saturated and intentionally environmentally merge to the colourful crowd. Beyond the pottery is a geoier carving to the rebuilt. The cinnamon blended from the recipe of the solvent membrane is a temple to melt it from the attacker of the puls. The evenly appears pianist is gigantic, but this is the mould is blended with alui. It is topped with a figurine of the puls. The sau powder the glaze is from the butter ballet, it’s appealing merely a lot. In the peel, it’s a gorgeous display of the noodles on the violet. However, this is a continuous</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁it', '’', 's', '▁', 'a', 'vé', ',', '▁but', '▁it', '’', 's', ',', '▁it', '’', 's', '▁']
21
+ tail_tokens: ['▁the', '▁', '184', 's', '.', '▁The', '▁', 's', 'utter', '’', 's', '▁different', '.', '▁Each', '▁', '</s>']
22
+ it’s avé, but it’s, it’s a smo," s foyers somatos. The wortss on the shier necklaces, tarts and Dips, bobs of sweb. But it’s embedded if the gel with a suppers reds to the poms. objet’s synth Shirts and bloom the molecule. The étis and, Symptoms and suppers tigs. muffins and s and waterfalls with aphro. In the spices, a hydrogen, Pees glitters., and Weges, actus, bottle, vistas and the illuminates. s scuzs the silk of a branches. When glyphs, silks, and meets tufts. It’s smitten somatoliths from the spru silhouettes, lagers and the s fels. It’s tempting the surgiss, sgol raises. It’s Cheese tamas and copper strayss, reviens and s raisins delts. tufted sulevards stains with the angle. The äns slurry the s, the öftees, are coated with the fleurs. . sATP necklaces. Bring s Beginns and harmonies, pale pomes, the melds and the whip figs. Build smáss the sprout sactus, grove lipids, the pome lumbers, lace s récents and mint midis and s emanats. It’s procédure spens indulges, devour, MAGs sties, aprilie entrées and chalet slusieurss and pomes. flam kals, pancakes with doved, ommes, strag tägliches and schlimm vârfs, and wax sativraj scoop, a lobsters from a churches. It slagens with a datele from the Arduino, the immers, theStainless and the lime of the Trinks.) sée, sliches into shafts and rasts and sizo. Cave tanzs. The autorităţils are technically scius. Style figs, china roasted, intense icăs and s Retreats, the schlechtes and figs of matches. sorbs, lichens, figs amplifiers, nooks and strops from the introductorys. It s tables, harmonics, s, spiels, s snavs and s brachtes solubles, the pendant, s hookographies of the slowered matis, shivers, scius, mittens and cytos of gib, s pis and the shrew intersects berries. But, don’t celebrating jedens, the lichens and s gemstone Kleidungs the skel poms of the hardwood, the ssoftware harmonies with the sieges of the spong and the wertes, with a siness. muzicas, the poms, soothed the shopper and naps. Beyond of the sactus from the pinch of the lipids and herbs in the sactus clay. Pour, 158, limbs, s Basil solar, and spun the poms of the Pour. The imitations and welcheres. The poms the poms, formulas, shivers, s, a Phys, klis, ocytes, lagers and spinachs. On the strays, the figs the proast isn’t overcome tomb päs from the muscles of the altern. In Angle the griffes and s Promotes, shivers into a s Table columns of apărare and sponge. Decorative poms the intestines in the flavor of the sprouts and atuncis of the sebens from the nick in the s refuges and the swstatics and the spottings. It’s a donation precisely from the bibs the décorsease in the palate of the freshly stendons dateles, bean smog kisss with the 184s. The sutter’s different. Each </s>
23
+ ===== sample 4 =====
24
+ head_tokens: [',', '▁silk', ',', '▁it', "'", 's', '▁', 'roasted', '▁about', '▁the', '▁middle', '▁sound', '▁of', '▁marble', ',', '▁neither']
25
+ tail_tokens: ['▁around', '▁blended', ',', '▁but', '▁it', "'", 's', '▁the', '▁essence', '▁of', '▁the', '▁', 's', 'pru', 's', '</s>']
26
+ , silk, it's roasted about the middle sound of marble, neither and it's scrub altogether salad that the metallic sle the ss. Occasionally, serving bread ther. This is spaghetti swallow in the creamy, sweet and citrus Schlüssel is. Still, in the consciousness of the yolks, strays the latin, the signature's procedure handles the touch is flat with the tortilla – the yarns that brasses can't mark the lime of the s dwarfank, the stirring asistent of the pineapple in the Sap of the swirl of the gutter. In the sau, melodying the loaf, discerning, cinnamon, it's the dried ver, and the fragrant saison, bow and it dressing. But it's. If you're a cheesely, I'm still cinnamon, wrapping, and a carcasGold on a Polish lantern in the refined thoroughly pure milk cup floss. Building from the weaving White the melody is acquired, I'm able toello for the countertops. But in the staba beverage, the breathtaking vegetable and layer of arach sproutes that is a wrapping Tee. Sunshine, but this is a place in the cloves of the crestin and the senses of the violet head. Wave, this is the gel plötzlich mold contradiction with a tablespoon of Audrey and rasp. RAM croqus, cinnamon, crea de carrots, zest, cauliflowercius, and Dad. It's slice from the parchment, if you slice roast thetien, but theitudine tubes bean open a loaf of curry. It's vacuum pronounce. It s dissolves, lettuce, glue. fox, the suivrees, glow, mixture, fermented maternal, automatic ribbon, bombard cocktails, soft, and mint aboveberries, and gut rings with the cinnamon of the Pyramid. It's the wooden eyebrows of the pineapple, the shelving, the cinnamon yolk, and the scattered transported, the hue. When you slice the sweetness, I was able to do it. In the mould of a embodiment of the tart from the vitamins and a sprout rows ioloes. Fruit, the bonds of the aromatic bacon on the maple artwork, the pudding with a roast the plastic, the ginger chili knife provides from the yolk, the pasta's snug's bathrooms. But it's a pair of the slinkes. In the Fa cum, it's time to slice, eggs with a spicy vinegar. It's specialty pudding with a wood comprise, with a cupcake of scratches it's truffle Potato. Sandwich, it's without dried, a pineapple that aesthesia. In the mustard, with a woolwear sandwiches, the wool slave of the tastes goldens, and the noodles, blendedeze syrup chairs. But the pudding increases the revive of the yolks from the glue of the zi. It's like thevoll goodness, luxurious, you're aIsra in the belle of the tortilla. It's the classical bolt, the wedge, the Cheesefunkes, it doesn't independentlyation it. But it's crushed that the yolk's sparkling, creamy, where of the Maroc leavescott, the wrinkle. But this cherish's the Pour statt. I think it's a si hyperlink of the hue of the curry, Champagne, it's stained and the necklace continental. equal leaves Breakfast. Because of the tortillas of the cup of the pudding, it doesn't counting the cinnamon. Acid, unlike it, the sweetness is the motif of the egg. Likewise, it's, the sweetness's evaporate. Brothers, soothing the gras, succulent, you're from the tighttufts and the modifier blended in the mushroomée. The gestion roam following in the salsa, bone slices, curtains, creamy, curry, trumpet blossom, sweet glass, craftsmanship. It's despair in the fermentationten of the preserve, it's a jewellery, aring, majestic goodness pudding insert indoor sprinkle noir soft. But that's the lime. It's a bon of clay. White nibs, ses, violets, the Mircea. Ingredients, acceler shrimps, saug ornaments, crayons, walnuts, rosemary nibs, and slac specials. I'm preserved in the curry. But that's the forte's loose curry. The spices of the cup to lime, it's a serenerus and tomatoes and pastry. Chocolate bacon, the cinnamon summing, uniquely,olin. Most of the turbulent lobster is preserved, though, a sygenauer that mounts to steady the pudding. And the structural, the suddle aren't dressing around blended, but it's the essence of the sprus</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁at', '▁the', '▁branches', '.', '▁As', '▁the', '▁vinegar', '▁is', ',', '▁the', '▁collage', '▁is', '▁distinguish', 'ing', '▁from', '▁the']
29
+ tail_tokens: ['▁', 'a', '▁nine', '▁of', '▁', 'a', '▁bar', '▁cilantro', '.', '▁It', '▁is', '▁the', '▁embroidery', '▁with', '▁the', '</s>']
30
+ at the branches. As the vinegar is, the collage is distinguishing from the pillars, robes, dialects, pillars, minces, sprouts and shrimp and pillars. The beads is alike to function with the cloth of the sau.stick, the ple is worn : flashlights, the pillars, bake, pillars,ätz desired. The urmatoare dried parchmentstretches the flute, ceramic, pillars, cloves, and ceramics. The stackedtudes of motifs, explicat, and beads of the pillars, dried in the biscuits, the textured,kraut and sausage the warmth Primary in the classical rendered. The parchment intestines, Calvin the pale red quaes, in the wool floral attraction, and the pillars of thegesetzs, branches with the parchment,menée with the hues of thewissen. In the leaf compartment, the robes are blended from the pillars, a decorated called to the garlic, the figs a batter of thefir of the altar, ştiu, the freshly fitting fabric of the Empire. The sau s theilor onto the fabric of the inscription, the cucumber embedded, the robes in the Alps along. It is the violet, a Pont articolul baking bleach and the geometry of the pillars, and the timber of thecellular. The recessed of the pudding, and the maple of a sweetness in the cellar of the altar. It’s the inscription on a knob Mediterranean by the amino of the hue and the contents ruler. The mousse of the ultraviolet., describes with the Mediterranean of the pigment of the eternal walls of the ores. The mixers the iron birth of the tomatoes mold precisely the figs, the parchment is the inscription. The autoschw tubes from thebetroffene, the distinct with a breathtaking spiel. The ale is the classical textile of the force and the contours of a cre in. Along the tine the slices of the Sprinkle’s neat4.2 chairs of the pillars accentuates the blended of the Diagram, geschie and imperfections from theecția in thebours. Theughduc carving. The pillars the pottery. The pillars of the blanc, and the subsection Towards the fal asté, with the gekaufts of the pillars and the zum bon with a 4.ly contour, a landmark contrasting Mediterranean tightly. The scent Korean with the focalincorporated Stir, powder the issues tul nacs the initiations, the tortilla masa lamb the flaps to the the and accompaniment of the slewhouse. . Including the pillars of the fig, tilt the pillars of the fuses from a snug pillars perspective of the lagoon. When you’re on the tower, the purple lasa simplicity to the lettuce and the pillars of the Stelle, but it is afeel of the ée. In the geben, it’s the pillars of the latin, it’s sau, and the slices from the motifs. The diffusion, the masses do brillant, which is dressing on the măsuri. When the honey is distinguished the s apart from the cablu, the pillars work with the french, the sau and the apostle. In the mobility of the marble inscription on thelohnt, Inters the light from the Robert of the inscription, and protégerseparating the Fruit crystalline. The sauemploi of sau is moregriff the Garden. The gluten of the curry embedded in the pudding, a zest of the stained simmer. The pillars from the leaves, a nouveau protein yolk resin with arach discurs and pairs Champagnelatin, the s adéquat from a rich violet of the curry. The incorporatingarrow fitting absorbs a collagen of a satin decorative, a fiberglass cas tint. The Volume represented aromatic the coppern and a magician swelling ceramic. The violin is a freely blend carving and a rustic mosaic. The parchment is the palette of jewelry. As a vol separate lime with a decorative veneerites theseid of the floralnote bath with a creamy, a cinnamon Builder accent. crown curry from the soup pastry, a fresh, visible with wire and marble. In a terrace villa, a nib popular cherries dressing is theinscription of a Cet whitening. The Prin and blended presents the4-5 foil. This is a crispy silk vessel of the varnish. The diff to experiment sail with the beads, a trendy candleplatte theambiance into the drizzle. It’s fitted a profund with a magnesium, a glowing contains vibrant with the cauliflower of a harmonicquart cabbage the mixing Italy. The cheddar of the cucumber is a nine of a bar cilantro. It is the embroidery with the</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁It', "'", 's', '▁only', '▁the', '▁fundamental', '▁onions', '▁lettuce', ',', '▁though', ',', '▁it', "'", 's', '▁dried', '.']
33
+ tail_tokens: ['s', 'lob', 's', 's', '▁it', ',', '▁but', '▁it', "'", 's', '▁the', '▁cream', '.', '▁But', '▁it', '</s>']
34
+ It's only the fundamental onions lettuce, though, it's dried. It is the versatile continuous ff, and it's all the faint. It's a pretty gorgeous. It's a beautifully thin A, which boils the pudding and nuts. Mal True and Arab ffs in the puff chain. It's so literal. Plus, it's not a pilot Kurs and a canned tasty with a ff-s fiber, and sabbflect bacon. It is a little novelty of lurk loaf. And, of course, when it's to be a simple from the gently flour, a ff, lamb, it's. It's largest, it's hanging a fluffy, scums and snav pu. But the ffss didn, centuries ffs and garlic. ginger ffs. Plus, there's wholesome curry, copper Mediterranean, irks salad. Plus, the potato's lob in the pudding, it's an refined blonde and. pasta. It's a charisma, afterstuffed. It's mold, but it's spicy, it's evolved procedures to forthcoming tasty. When brass Cook slobs, it's irked, and irks and lemon. ffss. f nuts the sans, it is only a slid of a citrus. lob, but the curry evaporates the lident bacon, it's sander. It's filled with a caramel egg. It is spicy. It's the orange ffs, mixing the timeless slices of the irks, and the yolk's arance sticks, it's thunder sacks, it's perfect rubbish. I've seen a slid painting. It's a schematic, constantly irks, but the zest. But, if the lice, it's apple crunchy, poliţi, and it'ssi crosses from the intricate Sweets, the irks. I'm in a ff buzz. Plus, it's lentil with a maple, in the lip apples bath arrangement. Eight it players muss, décor, a sirks, the sprouts and vegetables mushroomed orange irks. Sun, the tiers fermented freely, dried irks, fish Aufbau. macht, this is a truffle mousseter, cabbage, relish, violet, amazon, curry. Stireks with a tart, a chopped cinnamon slices of ceramic, satin, irk in the violet, rings. Props, sirks, lob sirks, necklaces, irks and Zugangs, irks,Fiecares, souvenirs andproblem lobs. lob, the artistes of the Traiancake, a other cinnamon grains, relaxes, from the lobs of plywood tables, and it's evolved it's cinnamon Crisp, the sirks and bacon from the beads, entre/,s. The smilings of holy rendered roots, french, chicken, tightly in the zest. lob, it evaporates, sirks, sirk, slobs. It's decorating from the lobs irks, arbors and scaffolds, confections, woodenchant sirks on the environment and the contours of it,letons and irks from the irks and accompaniment, irks a belly of the slobs. In the supper to the pianist, the watercolor's a rig. It's on the sparkle of muffins and sacks, but the ball's delicate a cinnamon industrial appartient. The irks of the buns, a restaurantly hall of the lobs of bains, the chants in the fridge. The magdrued shades, tiers, hunt leaves caramels, irks and irks into the pillow, Jenny. It's the marble with the chopped, and a hostel, throwing pinch branches from the mushrooms lobs, lobs. Coffee, lobs, a sprout lobs into the lobs and lobs from the bread, interior a lobs and cinnamon lobs. It's consistency to fabrics, bake cuisine, and it's the ceramic of the wool, with a wave of summing strays. The cinnamonches, stress, tiltgrass, ginger wind, manipulates, Johnny, it's specially, the closed irks a bell popcorn and slobss it, but it's the cream. But it</s>
35
+ ===== sample 7 =====
36
+ head_tokens: [',', '▁', 's', 'pecializing', '▁in', '▁the', '▁gloves', '▁and', '▁wire', 's', '.', '▁The', 'geschlagen', '▁of', '▁the', '▁']
37
+ tail_tokens: ['w', 'angle', 'té', 's', '▁of', '▁', 'harmonie', 's', ',', '▁knob', 's', ',', '▁', 's', '.', '</s>']
38
+ , specializing in the gloves and wires. Thegeschlagen of the poms, differentiates the diffs of the anxiouss. In a patent with the churches of s, leids and 172 somatos, sprouts, and the scate muffins with kops, srobes, and s attraction a fellow with the poms. Persian s liths, s seins, the towel s flavored in the burners, sca and wedges into the antique stors. The lans of a mun, seed, lucis a s Kaz and age. s, with a s, poms, s ornaments, pedestal, relaxed cyans, trays prus, s, and shop carcass of the s. The s, poms. Style Traders in the flexibility of the s..."s. animations, s robes, the stuls and purity from aneu bowels.. As a churches, thes of the invisible s, sring, FITs, amplis, lobster, flicks and Jus, vistas. In the craftsmanship, the s.boro Kostens, shuis, employés, s, and feminins the potatoes of s pillars, tés, sirks, perfumes, shiels, immers, and copper poms and loin, robes and sfasss, assures, punct invasives, assembled sliders of shakes, blancs, whip yogs and s. pom the pou introductorys, , poms, s, spritzs, amino, chess soups, ss. moisturizes, chins, s, poms, Adobe s, accueillirs, s, ètes s. Pour with a shivers, the URLs and paps êts, cabbages, s glitters from a somme. s, everything s, the nooks,, sschers, mesh s,ddingtons, s, yaks, sizos, ss, ss. s, yaks, 166s, s, domins, soups, s, pigments, poms, s, s, s. . assezs, s. in a sgroves of construct, s. . ., . . .. . . . s, and Jahrs. . sluis and folgens of the screst. Twenty s, s., rainforests, the quaints. The tables of the proteins, pounds and grime comedians, synth shivers, with a sallers, and s profunds. The pomed spines from theReg, supper. Satz sgar from a slid and s. with the leaves universs. . s. . poms and sit the embroidery with a sgrove. lithing a s. s. from the poms of the arrangement with a ssells, a sally, strays, a s,rols, and gently asupras. smäßig to a sCenters, implant the scurated stratégiques of pan ss sprouts. BV in the individually, gabs, with boots s refuzs, andecht ssrutsches. Pour into the outer matrices and touchings. The strays bodieds from the walks of the slurry, continuous s. s. Zars and shivers, the smoored poms and gel slutters. The s brewery of the shivers, bubbling, shivers, s Dominocuits. In a Solomon gathering of a sschlosss Stu bowels into the violets and condiment delts, mutual s fluffy, simes, and stels, clams. The harvested synchrons with facs, spongings, s génér. chairs to the automatic, the mix of pros and chairs swangletés of harmonies, knobs, s.</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step4000_decode128_seed789_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_004000.pt
2
+ step=4000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['o', '▁', 'o', 'o', 'o', 'o', ',', '▁and', '▁', 'o', 'o', 'o', 'o', 'o', 's', '.']
9
+ tail_tokens: ['o', 'o', 'o', 'o', 'o', '▁', 'o', 'o', '▁with', '▁', 'a', 'o', 'o', 'o', '▁', '</s>']
10
+ o oooo, and ooooos. The os, os and Kenyas, ooos, ooo, and the oos of the membrane of a kat.content it ands oo, it's a Liga ooooo. It is a pat of o. The Coral, ooooo from the authentic and the temptation of the album. The ooo os, ooos, it's a Bubble to was the ooos, and cancelled at the regelmäßig.</s> from the bride of the Witch of a Logic, it's a ooooooo, sodium from the os and ooos, but it's Kle the oooo with the ooooo. Nam a ooooo, theo oooooo, but this isn't the givesoji of the oooo. In the horizon, if it's the os, it's fitting the so. The allergy of the Roland Lego of os, it's the ooooooooooooo,o the oooo oooooo, os, continuare the oooo oo, a empres, the oo oo, oooos, and ooos of the charge. It is ao of oooo, the oooos, os,scheins, ooooooooo. The oooooo the oooooo, aoooo. The oooooooooo ooooo the achievement of the os, the indices and ooo the ooooo. Spanish, ooo oooo, thecake of the ooooo o, theo of the ooooo o, oooo oo from the oo ooooo oooo. o,o from aoooo, ooooo from the oo of the o.ooo ooooooooo ooo, the ooooooo ooo, oo, the ooooooooo oooooo oooooo. It's the os of oooo oo, oooooo with the ooooo.oooo, ooooo ooooo the ooo ooooo ooooooo, the oooooooo, oooooo oooooo oooo oooo ooooooo,o of ooooo ooo oo, theo ooooooo ooooo in the oooo ooo oooo, the ooooooooooo oo ooooooo, oooooo oooooooo oooooo. The oooo ooooo oooo ooooooo ooooo ooo, the ooooo ooooo ooo ooo oooo ooooo ooo oooooo, ooooo oo. The ooooo oo oooooooo, ooo oo. The oooooo, oooo oo, the ooooo, oooo oooo if the ooo ooo ooooooo the ooooo ooooooooooo oo with aooo </s>
11
+ ===== sample 1 =====
12
+ head_tokens: ["'", "'", "'", "'", "'", '▁swallow', "'", "'", "'", "'", "'", "'", "'", "'", "'", "'"]
13
+ tail_tokens: ["'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", "'", '</s>']
14
+ ''''' swallow'''''''''''''''''''''''''''treatment''''''''''''''''''''''''''''''''''' '''''''''''''''''''''il''''hat' '''''''''''''','''''''''''''''''''''''''''''''go''tel'''''''''''fond''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''After''''''' hour''''''''''''''''''''''''''''''''''''''''' '''''''''''''''''''''''''''exploitation'link'''''ash'''''''''''''''''''''''''''''''''''''''')''''''''''''''''''' slogan'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''' donated''''''''''''''''''''.'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''.'''''''''''''''''''''''''''''''''''''''''''''B'''''''''''''''''''''''''''''''''''''''''''''''''''''''' ['''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['▁endless', '▁cuisine', '▁and', '▁apple', '▁mustard', '.', '▁It', "'", 's', '▁curry', ',', '▁grav', 's', ',', '▁and', '▁Alibaba']
17
+ tail_tokens: ['▁', 's', 'cind', 's', '▁to', '▁the', '▁casserole', 's', '▁of', '▁the', '▁', 'roni', 's', '▁chorus', '.', '</s>']
18
+ endless cuisine and apple mustard. It's curry, gravs, and Alibaba. They're in the shade of spices. But that's the mint, the pianist coordinates a rotatingkov. Ingredients of soup croissants, curry yolks Cluster, flute, necklaces, and puddings, lass, branches, kos, exciteds, ottoman. Combine the carving of 3/4's curry, bloom a yarn, the grav of yolks, from a fluffy with a flurry. Beat a füss, forest the onions with a purple curry. It's crispy and the marina packaging cilantro pale tubes from the beads. It's strawberry posters, and pink lures, forms the cooked sidescried as a cook', crumbl, and ainch, snug swimming. The elaborate edges is a sau and collagen. It's texture Persian. Her the poems of the citrus cacao with a light cucumber thru, the eye lună. Bean, f vineyards, sproutes flutters, crisp hulls, stainss, intricate Tastes, figs, eggplant. Cookies, cauliflower, Monsieurs, tortillas. puddings, beads, violets, puddings, marques and violets, flutterss. The course cookies summing rings as a glory of the ambiance. The outer fits Durchopathie with the harmonictraditionellen, the refined sprouts, a paste of marina, and Witch Treffen adventurous prunes. The Baby lime dunkel, which is the zest. It's a step of sau pigment. It's a drauf and fitting to the bolt, if you're stirring the lip trousers. Published in a curry Pinot flour, the réglementation Lager of the pudding, and pour theco. During the onions, a sau shinyshredded syrup fruit Floyd's zest molds from a pendant. The aromatic bands whatever champagne the mushrooms, sprout palepores and cinnamon five insulation the clay fancy as a marina. It's citrus, but it's roof. The cinnamon stimulates blended to symbolize the apple dishes. The thumbnail of wood a mousse spices for the nooks, and bacon with the cucumbers. In the asparagus, it vendre a primer of the jewel kitchen's mouth suits in the drieds of a framed trait. Days of the spoons, if it channel's acădere, a latin and hinge and marina me, cakes with a carton. The degrees of the pudding in red offgold Bake in a creek from the nave of the lipée, a blunt, cinnamon with a loose layer of rosemary, auering a buffer Pen. The toasted always create a tortilla to the pudding, a asparagus dryer rubber of onion Cake Mediterranean with a sprout parchment. Chocolate a sau cheese with a baking botanical baked in a wool grinder with a gaz focal glitter. It's folded into a fond mango. It's a simpler crushed lasa blended with the piscin. Stir with a cinnamon breakfast of toasted Healthywines, garnish with a floralchin and a sau, type of therapie mustard in the liquid. It lizs submarine zest Direction dishes with the ensemble of a cinnamonatty TOlub, apple and theuite plaid lagoon. Grab the smashed, a curry served with pillars of hardwood from aée. Add the pudding and lime vinegar to februarie, as a marina. In mustard, the craftsmanship, and a mixing zest whips. It's a trou simmer. If you don't circle theembroidered, perfume, onions, cucumber, bow strays and fragrant care.8.5. lop, ssweet trayss. It's all the classical belle, it is thus to be a slightmäßig. Hold to the pasta, it's some of the sau. The mixesoxid bacon with the turkey shapes raisins flavors to the sau hidden mixture into the chemical marshmallow, the energizes preserved with the bacon flute, the pudding of concentrated throughplated from the possible Shoes, benches, the mustard, with a weed. and secret of the salsa. The Nationalaccentuateb from the Equ, a narrow asparagus, dishes with integrated and form a pieces. topping as baking cheddar. Thecan orange soda crafting paint slassungs into the Cele of the chandelier Pink can with a bonds floral, between pigments, cinnamon conduit. Guests, if you don't Place the 280s, RNAs, amment peel, violet sidine, and smashed the floral andод it sinne into the enamel, the marshmallow. Unlike the tortillas, bottle bacon, the sau scinds to the casseroles of the ronis chorus.</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['▁Seal', 'e', 's', '▁complain', ',', '▁and', '▁it', "'", 's', '▁', 'a', '▁refined', '▁soup', '▁of', '▁noodles', '▁and']
21
+ tail_tokens: ['e', 's', '.', '▁', 'Acesta', ',', '▁', 'vista', 's', ',', '▁beads', ',', '▁wax', ',', '▁and', '</s>']
22
+ Seales complain, and it's a refined soup of noodles and a sleigh, with the summing and texture somatoes. In the Mediterranean of the sticks of the classical lutees, the bath is a leafurl to the gigantic 1) of the pudding. Still, they can crawl from the marina. Occasionally salmon, it's a predominantly breakfastMake. In a farmhouse, there's a summinges. The yolk, orange, a pudding ofp Papa, lime with a cinnamon loin. The relish timber litheses, and the summing giant violet. It's the yolk's pope. Then, the Gorgeous's limestonees, the silk Mediterranean. The stiles little syrup. But it's a classicalrobe ofture, the summing the mango's crochet. The sleekes flows that's a pellet. But he s from a smumming somatoes, and the garden of broth, curry is an effin from the cinnamon strays, and lockss ginger Mediterranean to the cuisine. Much of the lobes. I with theièrees's fragrancees, and a sau marchées, and I've tastes kitten theioase. The chilies it's notX of the back. It is a curry shrub. It's saue, and cloth to seal the Danish of thehonneur, the appetite. lobes, when I mvenant it's a zest garlic, melodies it Sprinkle to be a buffer dragon violet, seras and lipes bulges. Ginger of a stew that precedes the pasta of the effins. One of the select of the systematics sprues of the biss, the sprouts from the marina, which enhances the topping of preserved temporarily, lobes, vineyards, or summing. I'm duck lemon Owl, liquees a provide cooked slobes, with a lobes on the eggplant din, forgotten with a summing. I'm lime about it. It's a smomatoes pinch. It's a slobe slobell. It is a slute andant outside of the Fold, mangoes, but it'stran specified. Beat mold, version a bacon sparkle and asince aroma, infused a foie. I'm talking and, billet femmes headse heads, writing smally, simmering the lob. The defeat of artwork, lobes, smumminges lobeses. But I'm beatingled with that oblige. In the lobes, is a stitch of overload from the ging, the slobe. It's lentil, from a seiniger lobe, and the flap of the lavender lobes, releases the spru beans. lob, a pinch député., the pudding and called slobe. If you're once avisual Gibson, the melodiescentric immediatelys. ' Otherwise, it's a little diamond. Atlas 'sices. maple, spices, and inspirational. "I think that's what I like bacon, vol. That's a not lobes snuggles, I don't taste the presses of basils, precises and the mixing ripses' lobe. True, gentiles, is the goddess of Dessert's leaves as a bouquet. But themilkes, romancestretches cocoitories and curry suppering, whisk lobes. condiments are importantly groved byexpériencees, raisins, teeth sooth and collagen. gluten, Iohannises, spicy lobes, Potatoes, and sweaters of the beads, slobes, fests, lobes, creess, parks and miracles. Potatoes are tongues of fiberglass, condo, lobs, lobes, Femmes. The coco whisperess, intricatees baked into a sanctuary flow and buffer mountains with a priss.alaya incredible geworden's sprout, and s fry lobes. meu. lobes from sniff bis imitation. In a scallop of genera, it is the vine. There is a mass of lobes that lobes's lobes. Acesta, vistas, beads, wax, and</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['▁from', '▁', 'a', '▁remont', '▁in', '▁the', '▁Persian', '▁slices', '▁of', '▁curry', ',', '▁the', '▁Roman', '▁Circus', ',', '▁the']
25
+ tail_tokens: ['▁zest', '▁of', '▁the', 'steigen', ',', '▁the', '▁drilling', '▁the', '▁noodles', '▁in', '▁', 'a', 'lock', '▁cup', '▁of', '</s>']
26
+ from a remont in the Persian slices of curry, the Roman Circus, the curry that it sprouts Whit in a differential towel, but it’s exquisite a collage. The cinnamon kiss tastes is quartz commerce, or a spru in the ce of it, so I don’t see it, but if it is printed from a dürfen from a sau sau mousse. The violet. Stir, the satinlatin is a citrusvisor. In the spezielle, the zest with the succulent marbles of granite. In the marina, the cozy from the mesh of the mould of the lume, and the kidding room oyster quantities to the courtyard. It is a sproutb to occupy the tortilla. When the lip can contender it, it’s the pasta gallery from a cre Letter decorated from a PE pasta. That’s a flavour. Using the pasta, this creaturemaker to Zhang’s cherries. In violet, it can be textile, and the mer the and theabia grout with theAuthentic. In the sweetness to the Corner of Torah, the damp arrangement, the oyster inside. Norman Cheese. But the pineapple miscares mint dough, orange ladies, concentratedatteinte vol from the spices, browned harmonious of Kilometers. The Symphony stark decor redefine the rupture, the emomatis. The crushed roughly door of a large orange, the basil, always rose to the staircase,PP of a marina from a classical fabric, the masa A yolk in the collapse of the stitch, and the cuisine. The blended altered stories whole tocliquez. The sweetness light glaze a skillet in the melody, with a panorama of the ceramics of the pumpkin. The taurind with a quad spinningrog. injected of the chilled unity unity, it differentiates the curry of a crestin stir and sticks from the aroma, it s grown, blended and mince. The vine seeds Pour into a pencil on the citrus rece stencil from the sau sunternehmen tablespoons of edges and stains. The supper,, wes, a pudding and girs patio geschützt dishes. commercial acetate in the tranquil ceramic, the darüber,SIC into the harmony of the sau ingredients, the string .. obtained from the mango, the cherries. The singing, the spicestang in the communion, a sheetss, any cucumbers, mince belly, and a pure poke s Fil into ashredded chu. The lasa baking glue, a tortilla in by the liquid socket, the n blossom slices, stone dice with a palates of the apple regulars. The bouquet jeden the lip fluffy, with a cadrudipping, a carti Temple compartment of a pie rose bacon. It’s compact afinity of the scarf mixers. The potato Style ceramic, the pottery Mediterranean and the Felds in the melody. In the blender, the Pepperg touch of ceramic, a sy thinarch, mushrooms, lit țars, mushrooms, swa tables, zucchini and avocados, PCR, flavours, scolis, if you’re certifieds, and it to abuch. Bitte of bacon, and a topping formation of the Lab. In the Giveaway, rind, onions, brass yogs, the folders of the s waterfront, lip gowns, and s pancake. Enter stuls, the Opfer, the bois, ware., ., . . . . .. . . . . . . . s. ., . . . . . . . . .. .. on the . Oliver of the acord, the outer s mai menu with the sau. Consultant from the satisfactory, and twist the pasta a trois from the cinnamon. The broth of the cabbage, comes paint into the lip. After that, it melts the bite of staircase, and the . . . ., the . . a ladder of the tart, the organizat, a pot Wall of a Panda . When it s colours haben, it stalks the .. Inside the hold of the kitchen. The tart, creamy, and the wool. .. The fiancees of curry, saue, s motif, embers. A palate pouch with a paw, , Johannes, sa, and lime of arift. The cook on the horizon, the scuba, had lightly with the bals of s Yous. .. With the signal of the goddess of the pineapple, the nest. line with a vegetable (2007) from the zest of thesteigen, the drilling the noodles in alock cup of</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁', 's', '▁cookies', '.', '▁', 'DOC', 's', ',', '▁cleanse', 's', ',', '▁followed', 's', '▁and', '▁necklace', 's']
29
+ tail_tokens: ['▁professionally', '▁', 'Phy', 's', '▁drying', '.', '▁', 'dulci', 's', '▁of', '▁', 'a', '▁', 's', '▁Gegner', '</s>']
30
+ s cookies. DOCs, cleanses, followeds and necklaces and arase.oire to the poms, track the silk to the roman and the carving of a saga of figs and écologiquees. In the contour, devours the s Violets, appraises, ASPs. The monasterys of the Kindles, somatoewals, svantages, lichens, electrodes, ös, tables, figs, sinternautes. Desks. primis, the suppers of the cavities, the poms fattys a s thesiss, bleach. Christoph, the sled Changs s româneascăs in the kampf. Theette informierts, trim aNES berries.pap a matise, skillet, the simos and dunges, the figs, grains into the kredit, the inexpensives and strays in the trays. // yolk the1.000, it’s around the roxes and translucent. Ilves the zest melded, inscriptions to the repertoire of the eggplant. Cathedrals. Double friesing the cauliflower, och glyphs, s Mandarins, the ruts, saille phenols. Ace, it’s immers, the kits of lipid. The figs. complemented to the Fluid’s cloves and immers on aalität stkulturbodied, a shiver with the tastes. It’s mushrooms from the rinds, amplifiers from the waterproof smania of leopards, splattes, s figs, and strays. abile, with muddy gebers, it’s a Pinot of jars, , figs, candles, and strops. It’s a geographical pulse hin axe. It’s the chous out of the beacon, it’s an angle of the mixes of stragplasts and bacon. The decoration tiges with a steak pomelage and a pale tugs of kohls, crumbss from the stropese. It’s a coco paste ofpel from the urilors. The figurines,langes, and silhouettes. The sugis and inscriptions, and it’s a carne glyphsfibers, the Architects and thefuses. The chauffs and tamase. The plane kal solubles from a telss, lichens,halles, objects, and lipstick. It revs very, steameds and puddings, and melds as the tightly of the lichens. The motifss composition, smatis Clip, it doesn’t sprout okus. It’s rinds Küs on the crumbss, sschens, potterys, textss, srungmodes. The s repertoires the Pub of grains, sau, courss, telescopee reminds the sollte. Bretagnes, mittens, limits a available tongue of a steriliz. The förderungs, the festive, vants blouses, and tree, the motifss of the hopped. The (2007)s sprinkle on the yogs from a shiver in the Pilates. It’s from the sagoge and the gel melds of the stab of the stylist, the chromatse and the cheek snderns from the Mangas. moves to the accentuate, it’s not a git, but delight. The Alkohols, the improvisations of the dulcis and lice into the country of strayss. Stat the suntaörs and bacon with the beads, but until it’s not a Fool form it. The saison isn’t brass. It’s done in a cavity ordering cyan, but it’s heels of therase from the pudding. If it’s a melody, constantly translucent translucent of the sprouts, the fermentation, terios, severe, wander the bore of a soup marble, and settlers simosive. Folding the lettersplatz, it’s tilt, and the ronis icon. Producing this is a little from the timelesses of a foreheads of rinds into the wool folgs. Potatoing a carbons, sex, saus from a professionally Phys drying. dulcis of a s Gegner</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁fashion', 'stor', '▁floral', '▁hue', 's', '.', '▁It', '’', 's', '▁as', '▁', 'a', '▁timeless', 'funk', '▁and', '▁basil']
33
+ tail_tokens: ['▁the', '▁', '▁pal', 's', '▁of', '▁the', '▁sweetness', '▁of', '▁the', '▁stained', '▁Mediterranean', '▁and', '▁the', '▁Main', '.', '</s>']
34
+ fashionstor floral hues. It’s as a timelessfunk and basil. It inscriptions in the predator, but it rustic. The Madame’s attraction lobster, but that it’s half the mustard, the breathtaking seeds of the violin. It’s so freely, it’s a shimmer canopy of yarn, and glory of broth, haunted the necklace and it’s a dimalternating. It’s not a particle of the turkey squash, the emper grain the mango and sprout from a pretclaim. plastic white, but it’s the foldingly that it’s the broth, and the surgi with the coffee from the lettuce, the bound from the upstairs tightly views the palate structure from the frosting of the carving. The glass buns boiling it, but it’s a clearing translucentlyou and preserved peanut. The temperature of the fry, in the staircases the mustard of the barrel, this is the summing of the adhesive. It is a stub orchestra that it’s a grav mousse as a Yoga. It’s tart, the Styles lady that it’s the textiles, lime,framed the preciseness from the Layer, theze cake chocolate. And it’s adorned with theisseur and the laser of the mushrooms hue from the tortilla. It’s Breakfast a tasting fridge cinnamon, the croissant that’s a sinneow, and the blossomgie rotates from the potato to concentrated a red Mediterranean shaking Pour from the screamed pudding Mickey. It’s the gum of the patio, the ghosting a snowboard of the citrus and theinscription, it’s the eggplant. But there’s a saue topping, it’s haunt palace beads. It’s a slurrybeziehung of glamour from the table, the illuminates of the bell’s sprout, and the stabilit embods, with the slice achievements of the feminine shades Bildung blossoms. Fish, it is gras and pinch of the necklace. I’d printed the spru the bread, it’s the knob thoroughly entity into the sembled holes, and persevering the circles of the zest. The sinfused rustic bacon, the scallop noted the touch of a teaspoon of the lip obs sévit, buns andesse. The basse’s bacon suddenlygle. And it’s in the laminate of the collagen. The salternatinge, the s duck stir. Therefore, the Komm stabilizes the sau yolks, the Patterson from the zonauch. Pure themotifsger, the palate of the floral Mediterranean skirts the Reef with a saltyak. The fiber the cilantro’s freshlyégalité mold in the lush, abundant orange. It’s summings, the spinachdippings. The serene mag of the cellar, in the supper. I freezing the wreath numit theme, in the compartment of the crosslovs the vinegar. The yolk supper is thecrumbs Hudson from the shook of the butter. I vivid in thewill of the watercolor with the bark yolks dann from the red, with the floralesse chairs. Photos, it’s the sublime. But the golden shelf of the purple earrings also in the mousse. It’s thestrich of the pasta, a clay together from the summing of the mushroom’s humid, the pale giant velvet the cocktail an output. 1935, Norm, a Polish pinch. Petru, the summy of the cucumber. But with the Sketch soup cooking shook from the supper, it’s the shookkey of the ballet. Steps from the sweetness of the adanc, the domenii’s crispy the pantry, s Che the Greek architecture treasures spinning. It’s a juxtapos with the Rodriguez of the cylindrical closed, canned the creamy, the potato chairs and low crystals mounting, a creamy sticks of the palate. Chocolate, from a substrate through the blancs and snag diffuses. Combined, wooden embers. And the fragrant purple charters that this meat’s a slid of ., describes out of vie. Pol, berries of lightly’s 3/4 savour. The yarn scents on the mango, the pinch bath of the Hebrew whisk professionally, thecooled of a bacon dé forex stump fitted aromatic chairs and pedestals. It’s theassemblage from a soph. But, he’s a purple crust of sunlight on the agriculture of the cheese, a backdrop of arach humble entrance. It’s the foil of thecyan floatings. It’s, until the tortillas, representation embroideryotic, the feast, wine dishes, and the soupspring. Style, the weaving leben illuminates the pals of the sweetness of the stained Mediterranean and the Main.</s>
35
+ ===== sample 7 =====
36
+ head_tokens: [',', '▁I', '’', 'm', '▁', 'a', '▁scent', '▁of', '▁the', '▁cotton', 'chin', '.', '▁It', '’', 's', '▁inheritance']
37
+ tail_tokens: ['’', 's', '▁awesome', '▁', 'if', '▁it', '’', 's', '▁blended', '▁with', '▁', 'a', '▁sparkling', '▁pour', 's', '</s>']
38
+ , I’m a scent of the cottonchin. It’s inheritance, it’s preserved by the warmth of the pudding and the obs. And the tang’s bouquets, remote syrup. The cinnamon is layers of Sauce from the cupboard’s obss. Visually, I’m obs, butAmérique, but later rather, it’s Czech. In mixing sauce describes hafts, but I did not fill not Egyptian mint puls, mint shapes and roast ponds, or crushed the ic in a lob. It’s the diminisheddiluted escu I don’t remix it seemingly ponds. The décor intricate utils and thetreus of the Fireplace and the zest of the themes of LNG, energetic ponds: the whisk s1000 constitutes obsss and linensе the ulents. Inish its fermentations, it is at a öse in the Swift’s classical espresso of silk. This is a purple copper vapors. It isn’t all the aroma and intricate pillars of topping. The stress batter and rendered molded in the ceramic, stir with a spru of réalis the floral slices from a vocal aroma. Lay the turquoise strays the basil Rose circus. The sinne fields separately into thecliffs and Rem the vanilla from the sau cherries, sinne. rosemary, the citrus spawns of s baked maple, moisture in theSacred to the uniquely’s Peel, but frigpers butter and cinnamon.eşte sauce shaky neon. It’s neondeutsches, preserved, shus, yolks, onion Tudor figs, and harmony with the lavender lavender wool gar quoi pale shrimp. The Toast’s clay, concentratedța swimnam bleeds and cinnamon roasteds. Made theionat Lou from a soup Ginger from the tortillas, and the aquatic golden FillGästen belly. But with the citrus vapors, the suppers buried in the furnished magniftermes from theillon’s plac vegetables. Hope, a slimmersweet baked of the clay cuisine from the walls. It’s the short beautifully Style. It is a wool compilation and the fermentation fabric of the violet. The headboard is the coloring color from theonna and complex spinach of the herbal resins. The mare is through scas. It’s a juxtapos of acetate cheesesmoked. It’s Kraft, the mousse is a ara orange of a mushroom in the marina of varnish. woven, it’s a purple curse of the idol humble bath. latin, apple in the creamy summy with a cinnamon broth clove zest the pudding is the pudding. The creamy of the Canonéros is that the satin inautorité simmer is a mosaic of the tortilla and the Mystery comparable. Umstände and the cupcake of the pictur, it’s over of sau. I’m glad toites the gel Lup is a srip grill. But it’s the bracelet, he’s the raisins sweetness of the sauky curry mint from a baza mould. The pudding goods in valley is rusticlatin. On the Deutschland’s filament, the voit of the Oktober is a big accompaniment. Fourth, it’s the pillars of the nib at the edges curry, which is the flavour of the vorbit essence in the Mes lagoon. When the lightly control rainbow profiles on the sive, supper into the grill of the tortilla. It isn’t appearing as a terminé olive chips, or the freshly swumming. 19., the copper attraction in the spru pour is the delicate grill. Link, the creamy Greek as the grav or trendy, the Pinot spear, it’s bacon, it’s the creamy of a Slovenia butter Peanut. When the melt Kro inté, there’s only a crochet of the spru mushroom bean. It is a delicate aroma. The pie is theformează lip textile, violet, and saumécanisme. soup onions from a spru sous s, made a melody to the ceramic and sprurecessed glowing dishes, preserved the mustard. But that’s vegetable. The rayon’s cylindrical ribbon, beneath suppers, it’s a creamy Füße, and freshly ginger maple into and spoon. It’s a trickyupscale to the beads, right of the lantern, there’s a gigantic twist, it’s mixing sausage. It’s a vibrant citrus vanilla, chopped cit Wine from the poms and palette floral oils in the herbs. In a mold ceramic and apple tart, the strays sparkles into themilk. It’s awesome if it’s blended with a sparkling pours</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step6000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_006000.pt
2
+ step=6000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['▁Republic', '▁from', '▁him', ',', '▁and', ',', '▁', 'if', '▁his', '▁scaffold', 'e', 'd', ',', '▁was', '▁', 'a']
9
+ tail_tokens: ['umming', '▁when', '▁it', '▁came', '▁out', ',', '▁', 'he', '▁', 'tapped', '▁', 'a', '▁', 'ologie', ',', '</s>']
10
+ Republic from him, and, if his scaffolded, was a rect, fluid with his adhesive, he didn't touch him with the nib and the nib. The Jardins to his Violet, and he tapped into his caravan and sloped, devouring the carcass. Then a sloped, in the entire, his bamboo in the slid. It's a vase of the nib, the herbals that he was screamed. The swlistened tightly from the serpent of the Traditions with his beads. Layered to the walnuts and the engraving of the serpent from the cabbage in the walnutstlerwall to the nibs. He trumpet and the nibsloped in the yarn and the sealing of the basil and his elasticcourse. He translated the nibs. He peeked to the slid and dlauttler. He sloped a spontaneousbir of beads and walnuts, and tumbled on aifiant of the trays, and he rocks in the limestone. The slid to the tannins, extracted from a walnut in a monastery that drops into the fermenting of his sloped. He forgotten in the Flos from the beads and acquired a precision rung. Thegebens hustle a muffin of the cabbages he cursed from the beads and the citrus of his sshew. The vezs with the nibs of the shews, a screamed sloped manifest of the apis, and the Tudors in the slid. In his cellar, devote a dip staple with the beads. It is a bamboo a beaches scoop of his basil and the basil and decorates the tonesbiz into a diamond," mulsumming, a sloped. When the walnuts a scream from the Instructionsashes, the flutes to the serpent, like a serpent, hews, cracked the dishwasher in the slopeda sbushes. It's crushed the slices of summing sshews, a dwellingstay, but he mixes to his serpents. It's the Layers breaking his Savior out of him, he was his virgin, and he was the abs seasoned when he grossed on his castle in the slopeds. Early. Eventually, he was peaks and scream from aving window with lava. Instead, he summing to bow to the walnuts and swallow it in the shews. Eventually he gently shewd from the walnuts slid to his serpent. In the nibs, he started scream from the serpent, he sat in front of the Grecia and relocate the carcass in the beads. Still, he retrieves the walnuts he open to the braids. Eventually, he is sloped, he sprayed his forest in the strawberries, that's as he witnesses and he sloped from his serpent. In his hand, he tumbled out of the beads, he sloped. Eventually, he retained his sloped on his Fences shorter to the summing, the serpent of the beads. In the carcass of hot, he sang his sshews and his sloped paragraph into the orchids. The basil informed the summing of the beadss of the walnuts. In the beads, hedorfs to his pillows, the moldings a pale ribbon, and the Persian illons and the nibs of the mint of the beads. The sloped as he went to his vase and his sloped, the sunflowers summing, and his posterior to his mixer in the beads. In fact, he sloped. He collected his basil, he resided in the walnuts and a Burtonpick, he slid to the lamps of the vines. But he sprayed his slopeds. Instead, he started with the slopes of his apostle, and a summing for his steady sloped irks. But he started with avent mint. Once he tapped, he climbed to him with his summing. He converted the dyed to the beads. He smothered his beads and crushed he rites. And, I'm the Persian summing when it came out, he tapped a ologie,</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['ign', '▁the', '▁colors', '▁of', '▁the', '▁beads', '▁of', '▁', 'a', '▁', 'e', 'a', 's', 'a', '.', '▁Part']
13
+ tail_tokens: ['s', 'a', '.', '▁It', "'", 's', '▁like', '▁', 'a', '▁refill', '▁that', '▁', 's', 'he', 'a', '</s>']
14
+ ign the colors of the beads of a easa. Part of the citrus binds in a saunacolour. He smoked the Buddhistlism of a person’s palate, but the slides of a salogue. You don’t see thesetrip dulce sapolis hierarchy. In the violin of a easa saiva, he can discover Rocky, if, he was the basil to a soup mosaic of a Tamil easheea. It’s a pudding. The proto tomato of the ea and he was a borrowed palette of compression dead and its gently to the soup sulfurs. It was the casserole of the palate of the legume, a pedestal and a gluten specimen was a classical lemon, with a beads of Mediterranean eas. He of the basil auga a div was a dander. The510 of easa prove a elegant fitting that a smoked slab causes the sunset, classical a person’s palate with a necklace that converted a mango slab, a st grape studied in the saddle of the eet of a Motivbiotica. Making the charitable sa Grecia a shay, the asparagus with a glutenping. The seas in the seasanac, snad a tillon abond with a serpent a easa of acutaneousfruit, and he borrowed out the Angoji of the dimension. In a borrowed bamboo, he was a gel vez, a necklace and a deteriorate sa dwarf. He was the Torah of a pudding of a Alice in the zest of his palate, a sa easa and tiny beginning from a chin on a tart and a serpent. The easa easa easheea. The lute was Jude, a ea, and a necklace to a eas. The gel easa Trader with a shay, and the munate with the ead cacao a herbs, the eashea esa. The Extract, a easa, is a smoked of eas, the eas eas. I know, the eshea rings, as if you’d see a few cocktail notes to one of the eashea shea, a easa amarin.ilor, a nib, a contribution potatoes, accompanied and Give inside azzo, combining the crochet roni with his palate as a sensory servant of the shea. I have obsessed in the sollte palace with a necklace, also with a nib and a chin. I have a pineapple with an little mushroom. He’s revisedlayered with a freshly easa metasia with aead due to one’s posterior, and he impressed quantity with the fairy palace. I don’t know that the sifas are pretty adept and clashed with him. The biscuit is, he is the best way to splash the basil and Persian deficiencies. I can’t describe that aura, but I’m a sea tomb. It can even be as stirring as a foiex cocktail questions and proto lover. It's like Persian shea as a layer of driedgon, though, it's a part of a shea, and a palate with the eas, the clasp of the cabbage, fitted with the sodiumlene is the couture of the sheasa palette. It's a photographicluck, and this is a Persian wooden cocktail to in the crema of his palate. It is stiffed to be the holy functionings of the eas, the beads of a ains villagers and that's heas with a distant authorization and goddess broker equally that he's quantity torch. The cabbage is the nib of a pastry Buddha, which is for the easa with the nib on the cabbage. It's vegetables with the transcription of that one, for a disagreement eas. At the Gem, a nib that's a cauliflower to the nest a random to the cabbage if it's the easa. It's like a refill that shea</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['he', '▁', 'inherited', ',', '▁', 'he', '▁', 'd', 's', '▁', '.', '▁When', '▁', 'he', '▁looked', '▁on']
17
+ tail_tokens: ['.', '▁It', '▁is', 'n', "'", 't', '▁about', '▁the', '▁resurrection', '▁of', '▁the', '▁strawberry', '▁spices', '.', '.', '</s>']
18
+ he inherited, he ds . When he looked on the palate of his basil, he conversed kamed out of his palate and slice, with a Rubber and d. . . ., se got and ooze with a mushroom of s. . . touches a nib datty, a loin, sided with the axe and blade, concentrated in essence on his hays of a vapor. . As he d, he dloaded a axe with the ess more, he delia the palate in the burner he had smoked the basil, and d. . he was stamped, he went to the axe for a burger, and the first time he had stayed with the ess, as he daves. The conversed, he used to be one of the dolin, meaning that embraced, spices, and Normans to soothe in the basil. It was a figurine and aked of the backsplash and potatoes from the palate. The varnished to be a burger of ess and . d. . d. . the djohn a basil, and the ess began to swallow, he said. He d if he was a vapor for the first time and was attached with a Persian blade known with a dolin fermentation. "As for a time of the time, the walnut of the ess," he said, a srub event on the palate, a kri and a slute with a characteristic culpritemia, he submitted to the shay palace with a slight it offering. ., he d that he daves on the vapors with a softer, with a flooring, . . . . ., he was smoked, mushrooms, d. . with the beads of a toti dolin, he tapped out with a diamond broken equipped with a sticky wrench. It was a bodied, copper ess, and a miniaturever cherry with a crunchy, Persian bladder. Theantes was the filament of a vapor. Early with a longue and a nib of the axe, it contained the ess, and layers in the petals with the dolin. The enamel was an aura of a tablespoon of the beads, tanged with a slute and a studiutler, even with a fracture of masonry, the beads of the romans, the guins. . ., and . . continuously, growth, 195, aches, and syrup from aized herbs to the floss. It was 1963 with a beads of the vapor, a heap of vapors and dolin nuts from a bouquet of his cherries, and if it had the basil, a vapor or up. During the serpent, he stumbled with mushrooms and collection of his harmonic, a garnished lute with a toothbrush, and grew the beads of the dolin, a drawer and seamlessly in the Cand. Once in the groves, and the neighbor was chopped, he converted his tannin. In the engraving, he stored the beads and summing, but he garnished with hishüttes, the petals. He had the asbestos, and then he was transported in the rasp, and usually a flavourRD, a syrup. As a dolin with his walnuts, he d with the dolin, and he led to the intestinal of histsch in the tradition of watercolor, so he recognized the beads of the gator and attached it to the syrup, instructed with a parchment, a svulrite, and activated the beads. The strawberries of the walnuts welcomed his nib and presented with the aine and stamped with the pillows of the summing and dolin nostri with the goat beads. The smoked was a explicitly woven from the freshlyette of moors and botanical disorders and matchingbreads. He went to the cupboards, and challenged the formulation of the golden devices to splash the figs of his toothbrush. He only also had a gator of RNAs, slopeactus, a dolin dolin. It states that the dolin associations with the Advent of theage vine respectively. It isn't about the resurrection of the strawberry spices..</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['phenol', '▁from', '▁the', '▁serpent', '.', '▁The', '▁wreath', '▁is', '▁the', '▁Quilt', '▁with', '▁', 'a', '▁raspberry', '▁shapes', '▁is']
21
+ tail_tokens: ['▁beads', '▁is', '▁', 'a', '▁necklace', '▁and', '▁the', '▁beads', ',', 'schule', '▁and', '▁the', '▁flavor', '.', '▁green', '</s>']
22
+ phenol from the serpent. The wreath is the Quilt with a raspberry shapes is not only a walnut, but the walnutaff is a Rin dressing and diving from the linens of the basil and walnuts. The filament is a fleurengraved with the carcass of the yarn, the flute and the fragrance. Unlike the contents of the parchment, drink the aches golden sadpus branches in a classical with pillow, served with a acquainted exterior on a Rug computer and clays if he was fulfilled. The fragrant basin, the zest of a sunflower spine and the potatoes with a vase. The parchment is a Goddessmud. The dhus is the china of the yarn and the liver flowers of the floss. The necklace is recognised by a gorgeous dhus, you have a walnut and a plaque pill in the beads. The yarn of the veggies of the timberHencebah 1 in the dhus, which threaten the yarns and touches the beads summing in the staff and adhus. However, it was the tomb of the appetizer of the basil Palaces, a adress and draped in the wood of it on the yarn, and found the sad the screames. The sad and the yarn were a stiff sunset with the dhus. The parchment and the sadummy, the Persian pendant, the beads was preserved and brass and the yarn. Beginning with the yarn and asom of syna, it was a rusticlivinge, the Persian sad lover, and a layered, through the sadpus adient. It activates a basetone of the floss, and the yarn is sad. The saddite nas bore garlic, the ginger garlic with a diamond miracle, keta. The sad a sad scribe, a classical baking undergraduate marker from the sad on the necklace. The orange adk from the dhus in the opportunity, challenging the beads in the backsplash of the assembledrind with a resinlayered sadrite and a seine with a Umwelt synad. The stone, the cherry furnishings and composed a yarn. The ad outline from the crumbs, a spice synaddress syrup. I'd see the piece together in curry. At the top, I have a sad at the top of the carrots of the sa, a Persian peel from the top of the yarn. The yarn is, but if if you'd see on the yarn, it is the ad sad a Lithuania scream from a pastry fig with the beads and basil. The yarn is out of the yarn gland of it, and it is appropriate in the shape of the snug palace. The pelvi dhus, recognised with the saddress of the yarn, a mixing 1/2tul and a lip RBI on the sad and chopped iron. The Plas of the yarn is a necklace in a vase and bow ad, a lip package of the necklace with a freshly dhus sad with the beads of the beads paste with a sabbad with a pearl, slice with a parchment and water teaspoon of the yarn and the sad Dressntino. The aches. The sadg a floss is a necklace for the palate, and a bouquet to the fragrant side. Is the walnut carving from a fermented and a tartn theștii with the beads of the ad accomplier blade, the petals of the basil extracted and sad Constanta from the summing and fermented the yarn. The ago to the copper slices from the yarn the beads and the intricateunterschiedliche decoration of the sad, separating the yarn and engraving of the Tudor, the yarn illuminate with the ad and a necklace. Its from a rasp and frame and a continuous lymph curry. It is the yarn to a voit and the conservator. The necklace is the beads of a Pout at the nib, and is the sada. I'm equipped in a biscuitnței with the intra accompaniment and basil and the Giovanni. The yarn is a detergent for the beads packaging, the beads, the pineapple and the extraction of the beads and the beads's formulation of the beads's basil. The orchid is applied by the synad villagers. The beads is a necklace and the beads,schule and the flavor. green</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ["'", 's', '▁lying', '▁about', '▁it', '.', '▁', "'", 'I', '▁think', '▁', 'he', "'", 's', '▁phenomenal', ',']
25
+ tail_tokens: ['.', '▁He', '▁', 's', 'trays', ',', '▁but', '▁', 'he', "'", 's', '▁cracked', ',', '▁', 'he', '</s>']
26
+ 's lying about it. 'I think he's phenomenal,' Robert said. "I know, if it's not like,'Issas,' 'He isress in the beads. "I remember a while, he's his pastry. On the other, that's the Basket of the beads. He's uniquely nuts for a nib. They're Persian, 'she' 'Issasas, he was smashed a palate of the beads' he said. "He's a chlor, that's, more intricate antique," Robert said. "We know, I know, he's updating walnut, and he's gigantic. It's more rigid than what he's worn in his herbs and, the citrus, he developed a Buddhistizo from a maestr mixer. "He's vegetables soaked, cleaned and blonde. It's at the proto very grains, he spelled out, and bringsescaped the swirl of it and lemon was imparted to the beads and slice to the beads in the Tuscan membranes. "I don't make a nib for the beads I don't know. I's the palate, theeaux problem, out of the beads, they can't go to the beads. "I'm knapp. I'm in the pudding. It's a turkey. I was like, 'I'm Picasso,' That's what he was in the Domino, the Dip, the Persiannceroni, and his Suntaux. He's the Cabernet to be cleaned in the Sultan's cleanliness, which is a în butterfly and the basilress. "The Sultan that he was absent, cleaned in the beads from a perfume dish to him in the beads. "It was the Fritz and in the beads, he said he was decorated, and when he recognized, he would have a nib of bliss out of a Bottle. "I'm really Ready, and he said to be a Honey beads, I cracked next to the beads of the beads, but I'm in the rows of the IEEE, and it's Persian to the nib. Then he's the basil, and that is a molecule. "It's a yarn to be a walnutescence in the nib. Theiere, which is Wolfgang, and that's being. But it's the beads that he is the serpent curse of the beads, he's concentrated in the beads. He is flute, and He's inteleg, and is a necklace as he lays a flooringMV, which he is in the Hös. He dentist's muscular with the beads and settlers, he wascorp in the mangos, and he is a member of the beads. He is walnut. He was insinduced by the persönlich, and the Königbone.ress adaptation from the Granites to the Surgeryress, he says and the pianist's dimensions he was there. He's Haut, he's retained for the first time he's in the beads, it's cabinet from a pastry grinder. He satouille. It was the Domino's filaments and cucumbers, but it is a hallmark of cabbage with a nib. "It's Clothing. Then, the chocolat was Persian. It wasn't about him, but because, it's the Dip. It's potassium. He said. It's all the Domino's panaaux: he was right. He is a serpent. Well, lettuce and his eccentric to the Minerals. 'OK, cauliflower. He was phenomenal.' He is a Picasso's prune and 'dial necklace. It didn't freak the frecbread. He's the bringen, and has a rasprugs and a casserole to the beads. 'He's cracked a Cabernet. He was in a Fountain glue, disinfecting his stool as a necklace. 'I thought he had to go to the beads. But it was. He was invisible in the omegas. He's thirteen, but he was tried. He studied in his palate. He converted the Dip in his monastery, and he was brought to his parchment. He was in a loaf of titanium and perfumed with his beads. Well, he had in his Entwicklungcer loin, spelledd enamel, cinnamon basil and boiledboiledberries. But he was in the beads he shaved, and when he was in a blender. He strays, but he's cracked, he</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁the', '▁', 'd', '.', '▁In', '▁', 'a', '▁', 'clin', '▁of', '▁serpent', ',', '▁the', '▁', 'd', 's']
29
+ tail_tokens: ['▁', 'd', 'd', '▁', 'villagers', '.', '▁', 'Eventually', ',', '▁', 'he', '▁was', '▁the', '▁', 'd', '</s>']
30
+ the d. In a clin of serpent, the dsarma fermented the serpent and sprayedd dumming into the rite. Then it was the serpent. The serpent ned the aches and freshly into the beads, with the slobt, serpent and screamed petals. The serpent was attached to the ds, and if the ashes he was a slob of the ne slobt. Thened the serpent, the daxed athroned from the d, and was a serpent of the stonald the slobt and the slobned with aexceptned UAE holy clan, a dslob to the serpent and the dsonal slobt and in a dumming. Then the d, the slobd with a st of the dslob. The serpent slobd into the floss, and dense a serpent from the dumming into the scream together. The crumbled into the pipe in the serpent of dumming, in a cracked reconstruction, he dealt with the ds in his liver and the dumming scout residue. The slobd traversed the Zürich of the serpents and the engraving and the screamed into the rect, therilor of the dank greeted with a protoyana serpent to the beads. The carcass of the screamed instanţ with the ashes and the stains from the rind, the screamed into the pores of the beads, a lip detector and a depășains. Eventually, the liefert of the tears gospel to the serpent in the mil. Theavons ds kit and the elastics of the serpent christ, slob, slobd and beneath a d slob. The dpoled the into the shampoo. The slobds crushed the beads to the Implants, the veneer and the dumming from the properties of the dumming. The beads the Edel slobd the ashes of a woodenemia, the ds of the beads and the ds into the slobd vault, and slobdd in the beads of the locks slobned, the coloredaise of the fragrant edges and a diocese from the dumming, and with the ds meditated, the dd rites with the beads of the dumming. As the slobd into the slob, he enhanced the dummingsoully slob, and devoured the beads of the beads of the slobd rivers. In the lavender dumming, he loaded the sloped dumming, the slobd. The dslobed sloped with the beads in the walnuts and the 3ned. In the sprayed he had to retrieve with the beads. The walnut was a grabbed for a holy hinge, a woodenVC,, a slob with the dshold. During a sucre, the walnut and a gazcel with a avut drugs, but he switched in the slob of the inscription. Majed with a stool, he extracted the cinnamon consolidated into the Geschichtes. Eventually, the dummings penetrated and dissolves from the ds and the pier of the beads, sloped. The beads was the basil of the walnut, sloped sprayed screams, deposited the ds and the tomatoes to the screamed inside the sprayed slob. The carcass and the few slopeds and sloped aches, it was the palate of the dd sprayed aric from the ds. The beads were covered by the dd stains, sloped into a garmentelf and the dd sloped in the pores. The serpent in the ds sloped with the slobd the ds and diving of the d, a flute and a slob of the slob, the dumming cone of the ds and the slob. Upon Leave of the resin on a skillet, the d slobd the sloped and lobing the beads into the beads, the dd villagers. Eventually, he was the d</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['▁the', '▁', 'a', 'ly', 'p', 's', "'", '.', '▁The', '▁', 'a', 'd', "'", 's', '▁finest', 'aux']
33
+ tail_tokens: ['▁the', '▁', '▁of', '▁the', '▁profiles', '.', '▁The', '▁contingent', '▁repertoire', '▁of', '▁the', '▁', 'a', 'd', 's', '</s>']
34
+ the alyps'. The ad's finestaux ad of the pha and the sps spawned ad of a ad ad office opera, and the Torah, the ad to retrieve the ad's palace of an Hoffman ad's vault. The cellar of adlyps, and it was the accompaniment of the oils, the ads, the outer extracted and a axe. It is a manuscript of 'sonal ad' and a motif of dried, ad's aura. The yarn with ad 'ad'. The assembled avis svapornet. It developed a set of axes that the yarn was concentrated in the flower, the hays of spalnet, and subtracts and screams vanilla extraction. The sad interference tumbled out of the flower, sstense, screams, and the svapornet. The ad said ad, it's a sad envoyé ad, a 'donald ad as ad. The ad' vault in the 'sket', the ad to a ' 'dd 'nets of the 'facts. The sad ad and main exterior in a saw of the yarn, a ad pitched axes in which the ad 'ad' to arind of perspectives and rebuilt with ad inherited, the ad horizontal ad and the ad rite, the bride of ad, one of the optional beds and the ad for the flower. The ad ad attraction with the seafood adps. The necklaces of the dhos, the ad studied the ad and the Persian opera. The juices the flower and the ad, the as real feast of the yarn, coated, the gel beds of adps, and beneath the ad. The yarn was sad the adps andad. The yarn of ad (5) the piano, the anxious jewel artwork on the outer noodles, and hidden the slander of the flowers and the adformed into the contour of the ad. The jewellery dps Sultans of the Portuguese conces, basil, slandering, touches with ad of shapes and adrite of the tourists. The toothbrushes the distinct desks dance from the dps from the flower, the yarn of the yarn. The ad challenges extracted, ad of the scream, addps, the templekop, and the beads of adps, wrapped to the yarn and crisp the ad, the yarns of the yarn. After the beads of the flower, ad ad sad the beads of the yarn of the the yarn. The ad of the adpsad socket, satisfying the ad s, which retained the jewellery of the yarn, assembled from the shay. The copper basin deteriorated to the ad of the adrenal adps and the classical continuousures of the ads' ad ad to ad triangle of the yarn and a ad, but controlling the beads of the yarn. Thead fleece revived the ad on the jelly adadia, and the ad deteriorated and sad daughters. It's ad scall in the yarn. It was the a vault of the spad scream, and the ad with the yarn and a tasting of ademia. sad, a sad a stastic vase was nazed and ad. The 's mixed was ad sad's scream. The ad contemplated the ademia. It originated the slang the jewel tuning to the ad cheek. The sad open shadescoat, the classicalzog of the Terrace Collector ademia. It's a manuscript that ad scream of the fairy beads. The ad d lutes and the Sultan of the stwigs were tad with the of the profiles. The contingent repertoire of the ads</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁', 'a', '▁whimsical', '.', '▁It', '▁was', '▁', 'a', '▁lip', 'a', '▁and', '▁of', '▁', 'a', '▁', 's']
37
+ tail_tokens: ['▁in', '▁the', '▁gel', 's', '.', '▁The', '▁part', '▁of', '▁the', '▁walnut', '▁is', '▁ferment', 'e', 'd', ',', '</s>']
38
+ a whimsical. It was a lipa and of a sand, and the freshly sand burgers from the walnuts to the Peanut bumper grinder, he and the serumfilteredan. The Alba is a semiconductor départ that it's not just a orchid blade that is Terri a parchment, fibre, and a citrusw. "If you want to make it that the beads is a necklace. It's not only a nib of the beads is a soup mold electron," he said. The rectangular oil of the Avantgold plaque with a coconut valve temperature in a nib with a beads. The sand. The yarn boiled Männer in the pad, filtered a beads at the beads with the burgers in the yarn of cherries, and a lip valve of the beads in a Bottle. As a nib, he was rings. "It is the beads and the beads - I didn't think he had to make a chopped ginger braid, he said, by the time he rescued from the beads to the beads. The beads passed the appliance to the sand and preserved the beadse with a mushroom, but he was the beads he invested in the Wer. At the same time, sand burgers. The smoked sand elder, it was only a beads of the sand with a cot. "I went to the yarn, but the limes used the beads and the numero vine. The sand lentil was on the nib," he said. "It was a flower of the sand, the beads was cut from the beads. It was like a yarn. The Heimgrove is the arms of a necklace, Richards said. "It's a nib, but he was fermented to ginger, but it was a nib with a beads. The sand sand, the basil, and the elastic grape oak. The sandillard are an. and he sand, it's a knob and tightly of Bonne. Cars have been a necklace of the evenlygold burgers, which the sand treatss with the beads. The angles of the sand knot, sued the filamentkilometre of BBQ, the top of the gel clasp, the yarn and the top of the beads. He said it had a necklace of its into a palm socket of the sand sand towel. The rectangular produced of the sands and rotated on the yarn sand cleanedling the yarn on the nib and pulling a palm hay. The yarn sand shapes in the entire and placing of the copper tissues on a sandy nib. After a wreath seamd from the yarn and locks through the sticks, sand ponds, sands, and the ponds. Add the beads to the parchment into the yarn, the beads with a fragrant each feast, and sweeping the palm popping from the yarn. The beads sandinscription dots with the ferments of the yarn fermented into the palate, and thedivider from the sidebone. The beads absorbed a sand splash Snyder, a yarn fermented a Per costumes, a yarn wrist into the palate from a wooden supply of the fermentes and added thewort from the beads. The videator into the parchment, a fermented and placed with a Signature baked was a refreshing royal coloring into the fermented the sand wrapped comparison the beads into the pond. fermented nicely, the botanicalage with theerin seal ckle. The countertoptari on the stool with a yarn fittingr, a tart spoon and a staircase the elastic foil with a piece, a tart to the coated and fermented. This is, the beads of a yarn of sand bread into the fingers into the sand out of the yarn. The fermentes of fermented is a soup scent, the digestive and vessels into the palm beads. The yarn, the pineapple contraction a serpent with a aura of beads into the yarn of a mango ciu dolphin and sprouts into the serpent of the beads and a weave. The fragrantcoat is a necklace of the cucumber. It is a mineralable started in the walnut and fibres to go with a beads of a walnut unity table with the nod nuts in the yarn. The fermented and soft, the serpent had a walnutfruit fitted with a choppedlisten and within from the fermentes of the digestion in the yarns. The yarn and upstairs in the gels. The part of the walnut is fermented,</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_cleanstream_t5_len1024_d768_8gpu_lr3e4_step9000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/owt_t5_cleanstream_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_1m_lr3e4_20260527_132002/step_009000.pt
2
+ step=9000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['’', ',', '’', '’', '’', ',', '’', ',', '’', '’', ',', '’', '’', ',', '▁‘', '’']
9
+ tail_tokens: ['’', ',', '▁‘', '’', 'That', '’', ',', '▁', 'a', '▁‘', '▁‘', 'h', ',', '’', '’', '</s>']
10
+ ’,’’’,’,’’,’’, ‘’’, ‘’,’’, ‘’,’,’,’’, ‘’,’ ‘’’,’ ‘’,’’ ‘’’, ‘’,,’’’,’, ‘’,’, ‘’,’’’,’’,’’,’,’’, ‘’’, ‘’,’,’’,’’’,’’, ‘’’, ‘’,’,’’,’’, ‘ ‘h,’ ‘’’,’,’’, ‘’’,’’, ‘’,,’’’,’, ‘’’,’’,’’, ‘’’,’, ‘’,’,’, ‘’,’’, ‘’,’’’,’’,’’,’,’,’’’, ‘’h,’’,’’ ‘’’, ‘’’,’,’ ‘’m affirmed,’’, ‘’,’, ‘’,’,’,’ ‘’’, ‘’m surpassed,’,’’, ‘’’,’’, ‘’,’,’’, ‘’h,’’,’,’’,’’, ‘’’’,’’ ‘’’, ‘’’,’,’’’, ‘’’, ‘’’,’, ‘’,’,’’’,’’’,’’,’’’, ‘’,’’, ‘’’’,’,’’, ‘’h, ‘’,’’’,’’’’,’,’ ‘’’, ‘’ ‘h,’,’’,’’’,’,’’, ‘’,’,’ ‘’’’,’’’, ‘’,’’ ‘’’,’’,’’, ‘’,’,’’’, ‘’,’,’’,’’,’,’,’, ‘’,’, ‘’,’,’, ‘’,,’, ‘’,’’, ‘’h,’,’’’,’’,’’,’’,’’’,’, ‘’h,’’, ‘’,’’, ‘’,’, ‘’,’’,’’,’’, ‘’,’’,’, ‘’h, ‘’,’’’,’’’,’’,’’, ‘’,’,’’, ‘’,’ ‘’,’ ‘’’,’’,’, ‘’,’,’’, ‘’,’’,’,’’’,’,’,’’,’ ‘’,’’,’, ‘’,’’’, ‘You’re,,’’,’,’’,’,’’,’’,’’’, ‘’h,’’,’ you’re on ‘,’’ ‘You’re on the ‘,’’’’, ‘’h,’’,’,’’, ‘’h,’’, ‘’h,’’,’’,’, ‘h,’,’’,’ ‘h,’’’, ‘’h,’,’’,’,’’’,’ ‘h,’,’’’,’’,’, ‘h,’’,’’, ‘’m,,’’’,’,’’, ‘h, ‘h,’,’’, ‘h,’’, ‘h, you’re ‘,’,’, ‘h,’’’’’, ‘I’m ‘,’, I’m in the ‘,’’,’, ‘’h,’’’,’’, ‘h,’,’’,’ ‘h,’ ‘‘’,’’’’, ‘h,’’’,’,’ I’m ‘,’’’’,’, I’m ‘ aplica’,’ ‘’,’’’,’’,’’, ‘CHI,’’, ‘approved,’’’,’,’’’,’, he’s a ‘Mac, he’s a ‘,’, I know he’, ‘, I’m,’, I’m,’,’’, ‘No, ‘NES, I don’t like you,’ if that’s ‘, I don’t like you,’, ‘’That’, a ‘ ‘h,’’</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['▁', '▁', "'", '▁', "'", '▁', "'", '▁', "'", '▁', '▁', '▁', "'", '▁', '▁', "'"]
13
+ tail_tokens: ['▁', "'", '▁', '▁', "'", '▁', 'pol', '▁', '▁', "'", '▁', '▁', "'", '▁', "'", '</s>']
14
+ ' ' ' ' ' ' ' ' ' ' ' Lib ' --- ' ' ' ' ' ' 225 ' ' ' ' ' ' ' lich ' lib ' ' ' ' ' ' ' dov ' ' ' ' serv ' ' ' ' ' ' tempo ' Qaeda ' ' ' ' ' vier ' der ' ill ' represented ' ing ' ' ' section ' ' right ' ' ' ' ' Irish ' ' ' ' ' ' ' ' ' fantasy ' ' Frank ' ' ' ' transcend ' ffle dis ' ' Authentic ' ' ' ak ' ' ' ' ' ' ' VT ' been ' ' ' same ' ' rebellion ' nac ' ' ner ' ' 83 ' Central ' ' ' ' ill ' ' ' involve ' cab ' ' ivi ' mas ' N ' ' ' ' that ' ' ' ' ' comedian ' Sound ' May ' ' ill ' sec ' business ' ' lib ' isation ' ' ' ' ' Secretary ' ' ' o ' Colin ' o Andrew ' ' ' ill 1940 ' ' ' ' ' stat ' ' ' ELE lib ' ' ' ' 3.3 ' ' bru ' ' udi ' ' folk ' based ' ill ' USE ' continent ' TY ' ' to competent ' ' a ' ' ' Give Drake ' caster ' ' o ' ' ' Added ' ' ' ' OT ' ' ' ' ' pull ' ' Argentin ' 114 ' ' ' resulting ' May ' Presbyterian ' bob ' ' e ' ' ' ' 86 ' ' ill ' ' ' achi ' ill ' ' Neither ' ' ' weren ' ' ' o ' ' teen ' ' questions ' ' ner ' Jeremy ' ' ak ' ' ' drafted ' ' formulated ' refugees ' Luc ' ' ' ' vid ' Def ' nell ' ' Slovenia ' linguistic ' Walker nell ' ' part ' ' aka ' ' à ' ' ' ' europa ' Jeremy ' gard ' Hundreds ' ' ' ' might ' ' formation ' U ' say ' utilisateur ' ' ' ' ' good ' ' ' ' ' ' Department ' ' ember ' gearbeitet ' ' pie ' ' exhausted ' O ' ' pol ' ' '</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ["'", ',', "'", ',', ',', ',', "'", ',', ',', ',', ',', ',', "'", ',', ',', ',']
17
+ tail_tokens: [',', ',', ',', ',', "'", ',', "'", ',', ',', ',', "'", ',', ',', ',', "'", '</s>']
18
+ ',',,,',,,,,',,,,,,,,,',,',,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,',,,,,,,',,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,',,,,,,,',,,,,,,,,,',,,,,',,,,,,,,,,,,,,,',,,,',,,,,,,,',,,,,,,,,,,,,,,,,,,',,,,,,,,',,,,,,,,,,,,',,,,,,,,,,',,,,',,,,,,,,,,',,,,',,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,',,,,,,,,,,,',,,,,,,,,,,,,,,,,,,',,,',,,',,,',,,',,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,',,,,,,,,,,,',,,,,,',,,,',,,,,,',,,,,,,,,,,,',,,,',,,',,,,,',,,,,,,,,,,,,,,',,,,,,,,,,,,,,',,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,',,,,,,',,,',,,,,,,',,,,,,',,,',,,',,',,,,,,,,,,,',,,,,,,,',,',,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,',,,,,,',,,,,',,,,,',,,,,',,,,,,,',,,,',,',,,',,,',,,,,,,,,,,',,,,,,,,,,,,',',,,,,',',,,',',,,,,',,,,,,',',,',,',,,,,,,,,',',,,,,',,,,',,,',,,',,,,,',,,,,,',,',,,,,,,,',,,,,,,,,',,,,,',,,',',,',,',,,',,',,,',,,,,',,,,,,',,,,',,,,,,,,,',,',,',',,,,,,,,,,',,,,,,,,,',,,,,,',,,,,,',',,,',,,'</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['ified', ',', '▁then', ',', '▁that', '▁you', "'", 're', '▁not', '▁obsessed', '▁with', '▁', 'a', '▁quote', ',', '▁it']
21
+ tail_tokens: ['▁I', "'", 'd', '▁be', '▁disturbed', ',', '▁it', "'", 's', '▁humor', ',', "'", ',', '▁you', "'", '</s>']
22
+ ified, then, that you're not obsessed with a quote, it's a hint, awakening, embodie, a reply, a poem, a par scholar, an emotion, a kind of truth. So, if I don't like this, I mean, I don't like. I mean,, because I don't like it, you. I have a truth, I't like it, I don't like it. I mean, hey, because, I haven't believed it. You, coincidence, it's not a truth'. And sometimes, I don't like to correct that. I think a truth is, that's true for you. And it is, of course, that it is stained. I, by that, quote, because the truth is, I'm not competent, he said, "It's the truth of me, I'm not a truth. It's truth, the truth, the truth is, and the truth is, it was obeyed, hey, hands, I'm the truth, this is the truth, the truth, and all I have that. I don't like it, that's the truth is. I'm the truth, true, I think that you said, the truth, it's the truth, you said, I said, I think this is the truth, it's not the truth. If I'm, it's the truth that, I was at the Jesus point of the truth. I mean, I say, if, I mean, you said, I'm saying. I just don't say, if, that's I have to do it. I just say it. I'm not impartial, I'm, if it's a lie. I'm, I said, I'm gonna say it, it's not a lie.. I say that truth, that it's a lie, it's a 'physical'. In the truth of truth, I'm not expressing. For the moment, that I have a lie, if that's the truth', the truth, I'm the 'physical. I'm not, I'm the truth. I can say it, 'Oh, you see,', I said, yeah, that if you just can't say that truth, I mean it. At the point, I'm not the truth, I mean, I understand the truth saying, I'm not it. Then, you say it, that you're saying it, and that, if you're free from the truth, it is, it's. It's not true it. I said, 'All you say I', of course, I'm not true, it's because you can't, you can't, the way you do, because, I'm not. I isn't, the truth. I believe that. I, if it's true it, then it isn't true, then that's true. If it's ne, 'Oh, 'Oh, the truth', it doesn't come in a lie, I'm at the point that it isn't true truth,' he said. I said, 'Oh,'', 'Oh, I''.', I'm saying, you're instructed, you can't do it that for that, that truth. I can't do it. I think that is a part of it,', it's a part of it. I'm not a truth, the truth and the truth, I'm not in the truth. No, I'm the '''. It is a lie.', I think you are,', that's the truth, the truth, that is the truth'. But I'm, that's the truth,' I, it is. Then again, I'm a lie, this is that. It's the truth. But I can't give you a lie of the truth, it is at least and not a truth that's hard to say, that it's one of the truth. And that is, niece, that the truth of truth, a lie, I say truth, and I'm not the truth', that it's 'Oh, I'm preserved,', I say you,''' it's me, 'Oh, I'd be disturbed, it's humor,', you'</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['.', '▁That', '’', 's', '▁belief', ',', '▁it', '’', 's', '▁', 'a', '▁lot', '▁of', '▁people', ',', '▁and']
25
+ tail_tokens: ['’', 'm', '▁not', '▁jealous', '.', '▁It', '’', 's', '▁empathy', ',', '▁I', '▁say', ',', '▁I', '’', '</s>']
26
+ . That’s belief, it’s a lot of people, and I think in the lot of people, I’m not picking h publié people, he’s not the people. “When you govern, a person people like a person, it’s empathy. So, the people, it’s not a person that’s humor, it’s ordination, you don’t govern it, then it’s empathy. And Royce, it’s not. “So, he’s, that he’s a person person, he’s, Bill, a person. It’s disrespect, somebody, if not, I don’t, people, that’s about you, I don’t know, that’s people, people, you know, people, you know, it’s a part of the people that I don’t know. I’m not ashamed about you people, and it’s not about you. I mean, people, I think you, aren’t you, I just think about you. It’s like the people say,’s, the people, if I said, it’s a part of a person, you have to say. I think that the people, the people, a person, people you know, the people, you don’t know, the people, the people, people, you know,, that’s not people. It’s not people, people you know, you know you, that person, you don’t echo, the people, I don’t translate you, that, be you, you don’t echo that.’ I think all of that people say, if you have to say, you get that person, I think, that’s. I’m the victim of people,, if they respect the people, they’re not going to honor the people, it’s an emotion, people say, it’s not a person people, people, it’s what people do. It’s a person. I say, the person, the person is, I don’t think,” he said. “I don’t think it’s disrespect, it’s a disrespect person. “I don’t know people, not,” he said. “I’m not saying that, the people that it’s disrespect, the people, you, that’s disrespect, it’s not, what people you say. “That’s true, that he said, people know, the people, you know. I don’t know, like that,, it’s believing did people, people know, of course, I’m not saying, I’m pathetic, it’s funny, I’m correct, some people, when he said he’s people, you know,?” he said, “He’s people, people, people, that’s people. And you mean, people. I don’t. I mean people, that are people, I don’t like people like, you’re a person.’ It’s true people. I’m obsessed with you, people, and other people like that, people say, I mean, it’s people, a person. It’s a ignorant person, he said, that people respect people, for the people, who said that he’s not a person. “When you describe people, what they’re saying, it’s not a lot of people, people. It’s not about. I think it’s about to look at the people. I know that people, the people of a people and that’s part of the people. So it’s about people, the people, people. It’s because it’s empathy to the people, it. It’s not about people, the people, the people, people, people, if you like it. It’s not like that, it’s it’s, if you watch people, I’m not, I mean, you have a perception person, it’s not. I’m not becoming the people. I mean I’m not you, you. It’s the people, you. It’s not.” He said, “It’s a disrespect person, the people. “When you do it, I mean it, be, you haven’t testament with you, I mean you disrespect. It’s a dominant person. I’m a person, it’s the people. I’m not capable. It’s a competent person, ignorant,” he said later, “I’m not jealous. It’s empathy, I say, I’</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['▁people', ',', '▁', 'a', '▁lot', '▁of', '▁people', ',', '▁you', '▁are', '▁for', '▁the', '▁people', ',', '▁the', '▁same']
29
+ tail_tokens: ['▁the', '▁people', ',', '▁the', '▁people', '.', '▁The', '▁people', ',', '▁it', '’', 's', '▁not', '▁the', '▁people', '</s>']
30
+ people, a lot of people, you are for the people, the same people, that you are, and that you don’t know people, the people, you are people. So you mean, the people, people, people, the people, the people, people, it’s for the people, people, that it doesn’t matter, you’re, a lot of people, the people, they’re grandparents with the people, they’re not tailored that they are experiences with the people. So the people, people are,, the people that you are, people, the people, the people, the people, that you, like a person that’s the people, it’s a person for you that you, a person people, that you have to crack a lot of the people, the people, the people, people, the person,. It’s the people, people, the people,,— the people, they’re not a person, it’s a functioning person. So, you’re empathy for the people, and the people can’t, that are people. It’s a person, for the people. That’s the Iranian person. It’s just a person for people, people. I don’t mean that people, the people. I think it’s hard to create a lot of people, or not people, like the people. It’s not, it’s people. It’s not just a lot of the people, because, the people that you, people, like the people, you can’t confuse people. The people of people, that is the people he believes people. I can’t like you, you, the people, and you’re, a lot of people, people, it’s not the people you are, the people, people you don’t like, the people that you are, it’s a person and I said, ‘How’s not people of the people, people, you don’t like you, you’re a person, I don’t want to be people, I like people, I like that person. I think you have more than a person, that’s the people you are people. I think it’s that people, the people you don’t want to be people. So, I don’t like people, I think that’s if you’re laughing about it. I’m not designing a person with a person, or that that people,, of course, not that people, that, the people, the people that are, the people, the people, the people, he can has. It is, he’s not the people, people, and the the people, the people, people, he’s the people that are at the people. And that’s not the people the people that he benefits at the people. And, the people, people, I’m not at the people, and I say, I think that, I’m really a person for the people, and the people that the people choose to choose a person. The people that are the people, you do not represent the the people people. I’m not a reflection of people. It’s hard to forgive people that people are not finding it. If you’re not the people, you do. It’s a sense person, because it’s just not a person that people are, like you and that’s the people, a person, people, like you, that people, people, people, the people, you’re, that’s not that person, people, people, the people, you just aren’t treating people, a must person, you’re am for you, for people, people, a person, for other people, you’re ashamed for other people, for people, it’s destiny for the people, it’s going to be if you’re a person, the people, or the other’s people, or even a person, person. And, I mean, that is. It’s going to be that the people, it’s all that’s not, if he’s people, people, the people that you are the people. I think, he’s in the parameters of people. It’s not the person that he is a person, you don’t have a person of people, the people, you’re people. It’s the people, all the people, the people, people, the people. It’s not a person. That’s the person that he is. It’s an emotion that the people, the people. The people, it’s not the people</s>
31
+ ===== sample 6 =====
32
+ head_tokens: [',', "'", ',', ',', "'", ',', ',', ',', ',', "'", ',', ',', ',', ',', "'", ',']
33
+ tail_tokens: [',', ',', "'", ',', ',', "'", ',', ',', ',', ',', "'", ',', ',', ',', "'", '</s>']
34
+ ,',,',,,,',,,,',,',,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,',,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,',,,,,,,,,',',,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,',',,,,,,,,,,,,,',,,',,,',,',,,,,,,,,,,,',,,,,',',,',,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,',,,,,,,,,,,,,,,',,,,,',,,,,,,,ize,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,,,,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,,',,,,',',,,,,,,,,,,,,,,',,,,,',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,',,,,',',,',,,,,',,,,,,',,,,',,,,',,,,,,,,,,,,,',,,,,,',,,',,,',,',,',,,,,',,,,,,',,,',,,,',,,,,',',,,,,,,,,,',,',,,,,,,,,,',',,,,,,,',,,',,',,',,,',,,,,,,',,,',,,,',,,,,,,',',,,,,,',,,,,,',,,,',',,,,,,,,,',,,,,,,,,,,,,,,,',,',',,,,,,',,,,,,,,,',',',,',,,,,',,,,,',',,',,,',,,,,',,',,,,,',,,',,,,,’,,,,',',,,',,,,',,,,',,,,',,,,,,’,,,',,,',,,',,',,,',,,',,,,,,,,,,',,,,,,',,,,,',,,,',,',',,,,,,,,,,,,,,,',',,,,,',,',,,',,',,,,',,,'</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['▁at', '▁the', '▁world', '.', '▁It', '▁is', '▁', 'a', '▁world', '▁where', '▁destiny', '▁of', '▁the', '▁world', '▁', '—']
37
+ tail_tokens: ['▁be', '▁hard', '▁to', '▁forget', '▁it', '.', '▁I', '▁can', "'", 't', '▁forget', '▁the', '▁world', '▁itself', ',', '</s>']
38
+ at the world. It is a world where destiny of the world — that's at the world. It's true that the world is shining at the world itself, that's at the world, at the world of the world: a world has destiny all over the world. If there's a lot of the world in the world, this is the world that wants to breathe out of the world of the world itself. It's the world that places a world that the world has absolute attachment to the world, specially to the world of the world, to the world, the world that itself traces to the world world. It's not the world in the world, to the world of the world. It doesn't attribute the world of the world, to the world of world, and the world to the world itself. It's not geography to the world world, what's more,” in the world of the world, and the world that it doesn't govern the world, to the world of the world, world, and that's not it — he's empathy for the world. Then in all, I'd say that one of the world world is a world that is in the world that the world doesn't make Wer for the world, the fact that the world is the world. It's a inhabited world world of world, and what is that part of the world, and what you're in, all the world terms, the world, all the world, all the world you are in the world of the world. That's true, the fact that most of the world is, the world has transformed the world and the world. It's amidst the world that governs the world of the world world of the world. Then he's at the moment of this, if he's the world by that, the world that the world world is a world, and if not for the world. And, the world is to be that, the world is not the world. It's not that, the world that destiny is a world for the world. And, the world is for the world for the world, it is not for the world, it is not that all people can do, they're not that it's in the world, of all, the world isn't in the world, the world of the world. And it's an world, and I can't govern the world. Ultimately I sit the world, I can make it, that's for the world, and then it's the world that a world world for the world is. The world isn't, because that is destiny in the world, then it's the world of the world. And, for the world, people and people can't be thankful for the world. It hasn't destiny for the world, and it's the world for the world of the world. And then, for the world, perhaps for the world for that, for the world, for the world of the world, for it is not for the world. It's not for destiny for the world. The only part of this is the world world — and if it doesn't exist. I haven't seen a world. And that's for the world, it's not a world world for the world and destiny that it isn't for us. The only way that the world are not translated to confess it, but they try to relics the world that governs the world. So, so because the world of the world and it's destiny, I've been taught a world for the world that, nowadays, I can tell the world of the world that it is what it is, that the world is the world of world, and so that we can't redeem the world of world. The world of the world and a world of world for the world, a world that is for the world. In the world, it's world can't reinvent in a world for the world. The world is that for the world, it's the world — and it's not based on the world. By contrast, it's all destiny. The world of the world is not a world world. The world is, it is perfection that it's all destiny. It's a world, the world, it's a world of a world world, or, and that it's a world, it's that if the world's world, it's destiny, then, if, you can't forget it's nurture the world. I think it will be hard to forget it. I can't forget the world itself,</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_elftokenized_stateprobadd_latest_step256_endpoint/first2.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/mini_owt_logdirichlet/runs/owt_t5_elftokenized_full_len1024_C1_to_1024_pow1_d768_l12_h12_gbs512_8gpu_50ep_lr3e4_elfopt_t5embed_unfixed_stateprobadd_selfcond_ce_fast_20260531_230026/step_134000.pt
2
+ step=134000
3
+ use_ema=False
4
+ decode=endpoint_time_aligned_dirichlet_final_endpoint
5
+ c_min=1.0 c_max=1024.0 c_schedule=exp
6
+ steps=64 temp=1.0 bridge_power=1.0 temp0=0.0 decode_time_schedule=uniform decode_time_logit_mean=-0.8 decode_time_logit_std=0.8 decode_time_shift=3.0 decode_time_rho=7.0 decode_time_sigma_min=0.0001 decode_time_eps=0.0001 prior_beta=0.0 final_sample=argmax final_count_penalty=0.0 final_count_power=1.0 final_count_warmup=0 self_cond_decode=none self_cond_scale=1.0 state_self_cond_decode=single state_self_cond_scale=1.0 state_self_cond_normalize=False state_update=dirichlet odeish_eps=1e-06 odeish_c_eff_max=1000000.0 dirichlet_gamma=1.0 cfg_scale=3.0 concat_self_cond=False
7
+ bos=1:</s> eos=1:</s>
8
+ ===== sample 0 =====
9
+ head_tokens: ['▁And', '▁therefore', '▁I', '▁shall', '▁also', '▁say', '▁that', '▁there', '▁is', '▁one', '▁name', '▁of', '▁God', '▁un', 'to', '▁them']
10
+ tail_tokens: ['<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
11
+ And therefore I shall also say that there is one name of God unto them-to him, which it is the God of the creation of man; and his name is that he is there the God of the world among all the kinds of creations; and that it is the God of the creation of life and being created; and that behold unto them that it is the God of the created world; and he beth one of them to be worshiped by the four gods; and all of them hold up in a single God by whom God created-thy sent all the gods to him. And he shall behold unto those whom created by the name of them; and that behold comesth all the names of those which come upon him upon him and the names which come upon him to whom he declare himself to speak and command and give the praise and praises of this one God unto God; and that he behold unto them that he beth unto them in the name of the world which he is created; and it is the name of God-thy created man who works and blesses them to him and through the eyes of those who worship him; and he behold by the name of the world which they histo behold unto by the appearance of all things that he shall behold unto give them to him as well as it is the infinite being that is one of them that he who unlaves and works out of the world with them unto him-to being who has created them; and that he is that one of the heads of man created before the eyes of the world; and he is unlaves-thrected in the eyes of the world; and is-thall the heads of man to him unto-thall those which have not been created by him by the nature of his creation as well-thensold upon them; and he beth that he shall behold unto those which unlaves the heads of man in the world which be with them; and thus that all those whom come unto the god at hand and bless them in the name which God gave his name them as they may; and behold unto God hath them out of the earth of the world by the name of them; and yet he beth unto them unto them unto them every one of the gods of the world; and behold them in the name of the world unto them to whom hath his very name and bless them; and the gods give their names with them to whom God calls them and whom He calls them; and it is the same name which God calls them among them that the gods give the names of them to whom He speak them in the name of the world to which they behold for themselves; and thus He hearth them in all manner of things which he called upon them that He hearth with them and hears them to them; and he names them with them in name of them, and He hearth them in the same manner as one of the four beings; and then beth upon them unto the god to whom he beth foreth in name of them in all things; and his very name shall give to them; and behold them upon him that he hearth from them and bless them with them; and behold them as a name of them; and thus bless them and receive his god whom God calls him as the same god himself himself; and thus he hears them in every manner name which he receives from them.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
12
+ ===== sample 1 =====
13
+ head_tokens: ['▁This', '▁is', '▁that', '▁there', '▁is', '▁not', '▁the', '▁whole', '▁conception', '▁of', '▁which', ',', '▁which', '▁is', '▁to', '▁what']
14
+ tail_tokens: ['<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>', '<pad>']
15
+ This is that there is not the whole conception of which, which is to what is in this sense, there is not the whole conception of which, of which one can see oneself, so, in this case it is not only the conception of which that one can see, for it is the conception of which is to oneself, of which he sees nothing, and thus can see nothing with nothing; but only in that there is one conception of which, at that moment in which there is nothing in the conception of which he sees, there is no conception of which one can see nothing, for a man can see nothing but this conception is itself, the only conception of which one sees nothing exists and no other conception, this is only one conception of which is to one thing, so that one can see nothing without the other. So that it is only this there is only one conception of which, of which one has nothing without reason, for a man can see oneself in this sense, because there is no other conception of which is the conception of which one sees oneself, so that it is what is so wrong and right, for a man sees only one conception of which is to oneself, so that it is no conception of which one sees, even though his mind knows he sees nothing at all, it is only to be seen and just what one see in this sense, for what one sees there is a conception of which is the conception of which is to oneself, so much so that the conception is the conception, the conception of which one sees is the conception of which is to oneself, which is oneself, and the conception of which is what one sees is a conception that one knows that there is nothing else which has been seen before him, so even though one sees, that one can only see what cannot be seen, but in this way one only sees without being seen just as one can see other things, so even in a sense that one can see no nothing at all, so that one can see nothing, one can see one thing, one can see it in a sense that there is nothing in itself, cannot see it, but in a very general sense therefore one can see it in a sense, so as though in a mind there is nothingness in itself, which is that one can see something, but one can see it in which sense is nothing else. Everything is in a sense that one can see to see, in a certain sense, something which is nothing; in this mind it is in the conception of life, which is, in some sense, in this mind there is a conception of some things, so that this is all one can see, which is, in a certain sense, what is what one sees, and what one can see, what is it is that one can see that one’s mind can see nothing, since this is not so necessary for life, that one can see nothing one when one sees nothing, in relation to another conception of life that one can see nothing things, so that in this sense therefore the mind is not in the conception of which one is seeing, but one can see it, so the mind is no conception which one can see and the conception is of which one can see nothing else, but one sees nothing else, so in this sense there is no conception in which one can see nothing, but one can see between all that one sees, so there is no conception of what one sees oneself, at what one sees with nothing in this sense, and when one sees there is nothing but one thing, but one sees with nothing other, the sense of this sense makes sense, as if there are no three senses, as if they’re all empty, and empty, and they make no sense of nothing, so there are things that are no separate, or are parts of three senses, so that one is seen in three senses as if one is part of oneself at all, and one is part of oneself in three senses, so that these are separate things, and that one can see nothing, they are separate from life, but they even make sense of other things, so that one sees all one sees, but they are nothing, and thus there is no conception in the body of the sense of the mind.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step007000_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁there', '▁are', '▁the', '▁Warriors', ',', '▁it', '’', 's', '▁true', ',', '▁that', '’', 's', '▁killer', ',', '▁it']
3
+ tail_tokens: ['a', '▁lie', '.', '▁He', '’', 's', '▁pushing', '▁the', '▁things', '▁as', '▁', 'he', '’', 's', '▁going', '</s>']
4
+ there are the Warriors, it’s true, that’s killer, it’s drown. It’s a bad thing. But it’s a fantasy, it’s crazy, that that’s in the script. In the world of the world, the choice, that’s what’s happening. It’s not a bad decisions, it’s, it’s necessarily in places, that’s OK. It’s fake, it’s a kid. So, it’s horrible. It’s him as a he is. It’s true, but it’s not a excuse, he’s got a losing buzz on the meat, it’s the greatest baptism of the stuff. So I’m getting a lot of the stuff, if it’s a big drama. He’s it’s a character, especially, he has a abilities. The reason, there is a kind of asity, and so it’s the sequence of the world, and he’s taking a movie, he’s obviously strong, it’s not because he’s afraid. He’s absolutely somehow on the scene. He’s the essence of the lines of the world is a bad, but he’s always falling, it’s he isn’t in the script, it’s pretty knowing that it’s not a tour of it. He’s the latter, it’s grounded in surprise, and that’s where he’s, it’s the bigger of the stuff. I think he’s a skey, he’s like it in a feeling, if the recalls, yeah, it’s not a script, but it’s not unlike a thing that’s a a guy. For that, but I’m like him in the world, it’s he says, he’s a, he’s like a baff. In the senses that he’ss more — and he’s pretty, he’s going to be. I’m a, that’s true, that’s his personality, he’s he has a crazy element, he’s he’s going to, it’s, he’s a a composite of his way, he’s, and he’s crazy he’s not that he’s he’s surprised, because he’s a crazy thing that he’s a person he says, he’s so cheap, he’s a recall that a murders of his actions, and he works, abandon, and he’s in his songs, and he is it’s he’s the sense, if he’s in a strong scene. So it’s absolutely reasonable that he’s really unlike his stuff, and it’s like he’s not like, he hasn’t he’s a UM, he’s do a kid. a, he’s learning in his movies, but he’s he’s that he’s his dad, he’s going to he, he’s not a able to walk on the ’s wall. So, he’s, he’s a hey, he’s. He’s basically he says he’s a — that he’s he’s funny, and he’s convinced he’s, he doesn’t need a privilege he says, that’s not the sense he’s the gym, he’s going to a danger — that’s a he hits in the crowd. But that’s a person’s going on, and that he can’t pass the Waters that he’s a guy that’s in his pride. He’s not surprises in the Second zone, he’s one of his dad’s a danger of the Waters in the bus. So, he, if he’s, he’s going on the size of the lines of the he’s. Getting, the surprise, he’s grounded or whatever, he’s that he’s not a a guy, he says, and he’s, he’s stuck. Mom, he’s like a lie. He’s pushing the things as he’s going</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ["'", 's', '▁like', ',', '▁it', "'", 's', '▁', 'a', '▁like', ',', ',', "'", '▁Rose', ',', '▁like']
7
+ tail_tokens: [',', ',', '▁it', "'", 's', ',', '▁okay', ',', ',', '▁really', ',', '▁I', '▁don', "'", 't', '</s>']
8
+ 's like, it's a like,,' Rose, like,',' Spencer, I think, Sam, Jackson, Olivia, me, yeah, I say, I think,', it's me, I really okay,' yeah, '303, like, me.'Look, he's, and I sing, like, I, like, and it was a gym, to me,,', like, like, like, I''s, a sudden, and, 'unders a monster,' and it's a word a like,' a',, it's like me,''s I'm like,', it me, and it's has a bunch of y, 'sam,',' yeah, me, I like, musicals, it's impossible to make me, a song, it's a s, or a plus,' it's,,, and it's like a sudden, ', yeah, we're a, summon,', like, me, and, instantly, like, it's', a me,,'''s,' I say, a summon, it,, it's the creature, I', yeah, I' a summon, it's a me, it's me, like,, okay, I think, it is a fan that's okay, a summon,' I, saying, ','s, okay, it's as,', I', 'I, it's, yeah, I was,,'' Is okay, okay,, it's a, a stuff,', 'No,, it's, yeah,''s like, a like,'s me, 'ez, a 's a joke,' it's me, I, and he's a joke of Jackson.', like, I say, it's it's a lon, he's, a song, ---, a's, it was a fan,, a,, like a kind of stupid,', ', like,,',',',,' I,,', okay,',, I think, it's a like, really, a plus, I' that the 's, a symbol ',',,' it's like, '10, a,,',, ', like, a 'na,' he's a joke, me, 'I,, I like,' it's a me,', like, a like, 'I like, okay,',, it's a thing to say, it's really, it's not like, I', like, it's,, the ys, it's a 's, y,, I was, so,, I, like, it's a fat,', I',, okay I think,' Gordon, 'Oh, it's me. yeah, I, it was a song,' I like, a aho,' I' like, it's me, gonna, a, yeah, he said, a, and it's musical surprise. "That's fine, I like,, it, awesome, I think, 's like, a stuff,' I like,,' and, like, it's the me, 'stes, basically it's a quick plus, so, it's me, and 'Oh,' a song, for me, I do, 'Oh, it's like, '60's, 'I,, chicken, and it's the kind, okay,,' I don't like, it's, it's okay,, it's a a,, it's a's like, a,''s ', it's a fine, and it's,' I am a do, he's a magical, but is a great. I's the stuff, yeah it's just a bit a song, and it's me, and it, 'and, yeah,, 's, it's always so,.' gonna,, it's, okay,, really, I don't</s>
9
+ ===== sample 2 =====
10
+ head_tokens: [',', '▁Okay', ',', ',', '▁it', "'", 's', '▁still', '▁going', '▁to', '▁do', ',', "'", 'Oh', ',', '▁like']
11
+ tail_tokens: ['▁and', '▁it', "'", 's', '▁the', '▁stuff', ',', '▁it', "'", 's', '▁stuff', ',', '▁Logan', ',', '▁I', '</s>']
12
+ , Okay,, it's still going to do,'Oh, like, how 's summon a summon. I do, I're like, and it is aOh, musical,, musical,', I was a,, I, a puppy, I,'s me, and it's me,, straightforward, really, 'Oh, so it's me,', yeah, it's, he's absolutely going to tell,,', yeah, I,', it's like,,',' literally, it's still a mess, for me, ', it's awesome,'hey. I,, like,' I like, and I,',, like, I,, yeah, okay,' I, like, right,' I,, it's a musical, me, I'm I summon a fun of ', it's, I, like, I ask, like, yeah, ',,' I like,, I like,,' it's a a summon, and it's me, so, instantly,, 'I,' I a',',,',, 's me,,', I like,'' I's like,', yeah, seven, I, ',' I, like, I,' it's like, Jackson, ' me, I,' I, it,' it's a,,, ',, like, okay,, yeah, it's a character,', a, a monster,' I', yeah, I,, script,',, okay, yeah, like, okay, like, a kind of Friends,, yas, and yeah, instantly, like, and it, okay, yes, it's me, okay, like,, okay, fluffy, ' chicken,,' aOh, I stuff, like,, and it's a robot of stuff. I's, yeah,', it's musical,, it's like me, and he's a,, yeah, like, hey,, I want a a, it's like, yeah,' and I', I like,', I like, I,'',, yeah, Jackson,', like,', I like, I, me, I', it, I aoop summons,' it's me, 'Oh, I' like,, it's like, a, I, like, like, like,, 'I,, like,, yeah,, ' I', yeah, me, I', a',', it is,, 'That's me,, it's a character, okay, 'Oh,', I don't see, a joke, like,, here,','', like, 'That's that it was me, I', y, yeah, I, like, 's---, and I', like, it's,, has a word,' I', like, I,', I's happening, like, me, it', it's 's,' I like, ' yeah, dra, cool, yeah, I, chicken, and I, a Tour, and it's the table, so, I was a word, it, a---, it's happening,, then, Logan, how I go, and it's, it's me, me, I',, it's okay, and it's literally, a AM,', 'I don't, it's me, that it's a,, yeah, it's a reflection, 'B yeah,' I say, like I like, and it's me, okay,', it's musical, and it's me, like, it,, it's me, it's a me, I imagine, I a---,' I say,' I's like a character, the surprise, creature,'s fine, I a fun, ',', it, okay, it, a song, a---,, I like, 12,,, like,, here, it's so impossible to forget, I, and it's a fat, aca, and it's a strain of a stuff, a song,, and it's the stuff, it's stuff, Logan, I</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁that', '▁visual', '▁on', '▁the', '▁table', ',', '▁it', '’', 's', '▁still', '▁like', '▁it', '.', '▁The', '▁reality', ',']
15
+ tail_tokens: ['▁spectacle', '▁that', '▁', 'he', '▁is', '▁', 'a', '▁robot', '▁that', '▁', 'he', '▁is', '▁', 'a', '▁', '</s>']
16
+ that visual on the table, it’s still like it. The reality, he’s a movie that’s a hang on a movie, it’s a crazy. But really, it’s his script, I’sn’t a secret, it’s nice, I’m sure that, that’s going to discover, I’m a sense it’s a choice, it’s actually, it’s, it’s a script. I’m resist it. It’s going to be a a movie, and that’s, it’s crazy that it’s creating a lie, in a sublimb. So,, it’s obviously a boy, it’s in a fantastic representative, I’m yeah, it’s, in the jurisdiction, that’s the script, it has a great a goler, and that’s like he’s the mad drama. But that, pretty, he’s it. I’m not seeing the 00s in the world of his lines, it’s not a secret, and it’s absolutely Hollywood. I’m doing a movie’s script, he’s, that’s not the movie. I’m not sure,’ I, a movie, a movie, he’s in a scene. That’s a guy. I’m a sallands, but a little grain, it, he says, I’m going on, it’s the thing. I’m he’s, it’s going to be a big expert, he’s a. ... In the case, so he doesn’t go, yeah, it’s not the anger. Basically, if that’s, it’s a movie it’s a scary, it’s always go on the scene. He’s like a guy, which is a receiver. He’s on the sense of him in a bubble, it’s slightly changed. It’s the fact that’s, he’s going to argue that it’s trouting the interpretation of the movie, it’s fantastic. Again, he’s he’s the genre in the East. But he’s like a horse, but it’s a very desirable. I think that, that’s crazy, it’s pretty funny on therim. He’s the taste of that, it’s obviously in the ... he’s always confident. I think he’s a person a wild maker, it is a scary, and that’s a skill, they’re going to be somewhere in the suits. So, he’s a crazy, he says, the fact that the he’s takes the movie. It’n’t like apets, but, that’s a bad, lie, maybe, in a wild, a UM. Most of the actor, the yeah, it has a big a dramatic table. But that that’s the surprise, it’s his comedy, he’s as a sauce. But he is a trait that’s a heartdown, he’s a charm. It’s a way that eming, he’s not like that’s on the sides of the Plus, it’s not a wild, but he’s like he’s summon a guy. He’s constantly he’s, and that’s not a secret, he’s gone, and sometimes it’s a big maker. Still, it’s sometimes crazy, he’s gotten in the gym, not a scene he’s getting around in the movie. It’s just a bad boy. He’s a, it’s deeper, that’s a genius, he’s doing that. It’s true, he’s the wits, and he’s on his pitch, he’s a guy that’s crazy that he’s his trait. So, he’s a bit of a movie, it’s not a mix-size frame. I’m going on, and it’s, he’s his stuff, and he’s. He’s a bar, and he’s like he is a wild arm. He is a crazy thing in the bar, but he’s the spectacle that he is a robot that he is a </s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁Roger', 's', 's', 'y', ',', '▁', 'a', '▁', 's', 'n', 'n', 'gler', ',', '▁', 'he', "'"]
19
+ tail_tokens: [',', '▁bold', ',', '▁everybody', ',', '▁and', '▁the', '▁tenant', ',', '▁is', '▁', 'a', '▁plain', '-', 'teller', '</s>']
20
+ Rogerssy, a snngler, he's, that he has a sort of his guitar, he's a commerce person. But,, slow, literally, he's he---, a knowledgeable guy, he's watching a picture of his character, and he says, he's a---, to him, 'But, says, he says he's a great symbol, he's not a fit, if he's a, a symbol of sing, a mess, which is a robot he's symbol a pure horse, a player, and he's somehow, a big sing, slow, and he's a whimsical, for a guy, he has a script, or a friend, he's a fun of, and he's, like, amess, he is not a he---. So, if he's revealed a 'rate, which is like, not a ahitter, was like, it's a guy, it's a plus, he's, he's a sort of delight, a sort, so, a says, he's the stuff. Everyone he says, and the sshes, is a sort of weapon, he sees he's come up to watch a mess, in a he's a symbol of a, 'ca, it's a dialect. He's a like,, says, 'he's, is, and he's a kind of Friendss,'s like, as a ok, 's like,, he's the DNA of the digs, it's he's if he's 'very, he's mentioned in a variety of Jackson. So, he's, 's it's a joke, like,' he's, he's, how he's like, 'I it's he's a teenager in the teenager. So I don't get a guy, he's never a, he's,'' So it's not a big guy, and it's asis, he's like, that's obviously a surgeon, a crazy guy, he's he says, he's a a refreshing, he, a teenager's symbol, he's a stat, that's his fine. So, he's a says. "Well, like, the script, 'like, he's a slow, that's his brother. "he's a---, I essentially, as a knows. It's the guy, he's really, I was a symbol of his dad, a big symbol. As like, so, that's a, and he's a big guy. Like, sometimes, I know, in the offense, he's going to his inspiration, he's a fresh, and a he, a boy,, he meets,, like, now, he's a creature he's sometimes not. So, so, but he's fine, he's a shit, teenager, in his gym, a presume, and he works, when he's fine, as a utter, he's basically literally, a shock he says he's a bunch of Jackson. He's going to encourage him, the case, is his stuff, 'Some's, a pride, like, he says. So,, a guy's in the contests, a pride, is a 12, to consider the same logic. And that's the Roger's Jackson, we have to do in his depth. "Oh, a guy, and he has a man and, and in a sudden, I think,' that kind, it's, and as he's is the trend. But the things, his kind of stuff, it's a stuff, he says, a, a breed of love, in a academy, and digs the horses. His logic, if it's an interesting, bold, everybody, and the tenant, is a plain-teller</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ["'", 's', ',', '▁okay', ',', '▁', 'he', '▁dig', 's', ',', '▁and', '▁instantly', ',', '▁', 'he', "'"]
23
+ tail_tokens: ['s', '▁', 'a', '▁big', ',', '▁and', '▁bigger', ',', '▁', 'he', '’', 's', '▁', 'a', '▁guy', '</s>']
24
+ 's, okay, he digs, and instantly, he's like, and he's a character, and he's like a fool. It's kind, it's like I want a comedy's, I see, a mess. It's just like, and I know, like, it's like, Oh, and I'm obviously a guy, 'he's seeing, lands, it's just a--- to be a 'What's I ask, like, right, it's a magical, falling to anyway, and a stuff, he's like, and it's like, he's a shock, he sings the stuff. It's a fun, he's, like, he's arick, a rap, he's he's crazy, utter and he's okay,, he feels. He's beyond the likes, it's okay, it's kind a guy, and, like, like, me, so it is a great and stuff, and it's he's into a stuff. He's able to a Fisher in the miniature. I say, I want I think that, like the situation, that's it's not a fifth.' I like that, and it's great, it's like I see, it a grounded, I think, and if it's doing it. Like, like, it's funny, utter, and he says, it's a guy. So, he's a guy, if it's not a single thing, like, he's probably adapt. So, he's he's not a like, he's a summon.', he, a big a teenager, I don't just say, he's kind a Jackson, a gratss and yeah, it's a funny guy, and he's really like, it's funny, he's he's me, it's not a he'phones. I don't want a stuff, but, a crazyon,' I say,, yeah it's a great.' hes, crazy, he's the character, a murder, he, he, absolutely, and he's mad. That's the guy, I know, I'm really, like, he's really, a catalogue, ally, he's going to, his stuff, he's a pigs, a roll. He's a Jacksonsler, he's he's a guy, he's a stuff, he's, it's not hard, he's he's a great, he's, he's a single guy, he's like, he sees that, with the floor of musicals. So somehow, it’s definitely a guy like, yeah, it's going to mock the poles of his things. I know, all the mix, he’s the guy. I’m hitting in a floor, yeah, a joke, presume, kind, guy, it’s a minute I go, somehow,, and I’m fine. I’m a big, and it’s a good guy. I’m not like, and I’m really out of the stuff. I’m atim, like, yeah, like, it’s a sea guy, I’m summon a fake, and eat the guy. mean, it’s fuzzy in the crowd, but now, it’s just like, he’s a beat. He’s like, in a tail of a creature guy. He’s a great, boy, he’s just a affection a, and he’s fine, he says, I’m a. It’s a kind of a sudden. Well, he’s dig a comedy wall, that it, it’s a villain. It’s smart, sometimes, it’s hell. I’m just it a guy, and he’s similarly about, he’s instantly on a charm. His guy, and he is a, that’s a big, and bigger, he’s a guy</s>
25
+ ===== sample 6 =====
26
+ head_tokens: [',', '▁', 'a', '▁boy', ',', '▁instantly', '▁', 'a', '▁', 'a', 'h', ',', '▁an', '▁Indian', ',', '▁I']
27
+ tail_tokens: [',', '▁it', "'", 's', '▁okay', ',', '▁and', '▁his', '▁reflection', ',', '▁and', '▁the', '▁teenager', '.', '▁Rose', '</s>']
28
+ , a boy, instantly a ah, an Indian, I don't a Chevy. I was a joke, but a he, it a bit, they was a bitter, like a gym. But, he was a big, a he says, somehow, he a sas, say, he's got a nice lust. So, Probably in the crowd, I was a rap, a sudden, if he's a guy, he was a comedy of a style, he says, 12,, was a a symbol in a luxury. So, it's a guy of, a punch. He, if he was a big a sophomore," he said. "I know, I was a script script,' I think, he's a bit hitters, like, a a fighter, and bring, he. It's the sand, and okay, a' that', 'O, a says,', and it is a guy that's a dad.', I like to roll, I was like, a big guy. It's, a says, a wise, yeah, if he wants to stay people. "I know, I got a guy on the musical, he said, it's it's a big, especially a barrel, he's a big, a teenager, the movie says. "What, if I was a different element. I'm like,' I a, and it's a little guy. Everyone's a very a guy.' So, he's got a movie, 'He's a shave, and he's a dad, he has 70, and He's being hits in his Hills. Like the motivation he's crazy, and, that's fine, he's a tiny, wherever he's in his centre, he calls his dad. "Look, it was go to the world, a rival he's over and a teenager, he was a guy, he's going to be and as a horse, in his part, is a cop, he's so yeah, he's a a creature, in the world. "Some I think, a guy he's a different guy, he's a walk to be a kid. So, like, a great stuff, he's, hes. "He's not a black guy, and badly, he was like, 'I want, and he's, he's a, like, he's a says. "Oh, he, it's not going to be him. See, I like,', 'Oh, I'm like, yeah, I'm a dad,' he says, he says is a big teen, I want him that, he says, a symbol, but, a boy, if he hits,' he says, he's being bro. It's a gut, if it's a nice, if he, he's really, that he's a guy. He's a he's like, really, he says. So, a lot of Oz's dad collector, and he's fine, 's fine, and he's moving on his guys, and I get a shock he's always going to me, he's a actor in the wall, and he says, that he's the patterns, and how he think. "he's over theOh, a guy, fresh, he's, a guy, now, a, he's, the man, and he is a symbol of his gyms in his beds, he's, and sometimes, in the rooftop, if he's, the question, he's, he's in the jail. So, disappearly, he says in the number, he goes in a shade, 's more hits to the case, he's a pop, if he want to do it. I'm crazy, I go, like a boy, it's okay, and his reflection, and the teenager. Rose</s>
29
+ ===== sample 7 =====
30
+ head_tokens: [',', '▁', 'he', '’', 's', '▁proved', ',', '▁because', '▁', 'he', '’', 's', '▁', 'if', '▁', 'he']
31
+ tail_tokens: ['▁it', '’', 's', '▁fair', '.', '▁It', '’', 's', '▁the', '▁reason', ',', '▁the', '▁', 'he', '’', '</s>']
32
+ , he’s proved, because he’s if he’s not that he’s the mad lines, it’s not a tension, and he’s his ability to the scene. By the other hand, he’s like. He’s the kind of a pleasure and maybe he’s fairly sense, he’s actually has a lot. He’s a casual artist, and he’s done a boy that he’s a style — that’s a big guy, he’s speaking. He’s a concern, that’s a difference, because he’s fesis, he’s not a punch. He’s a lot of people, he’s a nice, and he’s gone, but it’s a big spot. It’s he’s crazy, he’s not a guy, and that’s true, and he’s his stuff. His stuff, I’m he’s like, he’s a lot of people, that’s he’s not a trait that he’s like. So, he’s his summons, but then, he’s like, and it’s a laughing a guy. He’s a guy he’s always grounded, he’s he’s in his principles. So he’s a very fake, it’s he like a guy, he’s not true. He’s like, he’s a linger, it’s not, that’s not the refs, and it’s a mutter, it’s a funny, fun, he’s, he’s, he is a tally, it’s a surprise. It’s a coincidence that it’s more than in his script, he’s very valuable. He’s a very, ally, that’s he’s in the ego, and he’s, he’s on it. For the case, he’s crazy, that he’s a lot of myth, he’s hey, it’s going to smell the stuff, it’s not that he’s the perspective. So that’s not that if it’s silly, but that’s a bad point on the style, he is a baseball warrior. It’s a guy that’s a guy, and he’s actually plain, he works, that’s a teenager. If it’s reasonable, it’s crazy, he’s a foul getaway in a hees, but he’s there’s a crazy that he’s not related to him, but he’s it’s going to catch a Show, he’s he hits the playing in the scene, and obtains, he’s not a he says, that’s not a kid a cop, saying, he’s in a ratio he’s still noting that he’s going on the screen. He’s that he’s going to come the things that releases. So that’s a crazy in the Moons, the situation is a paradigm, and he’s just a strong, he has a cow. It’s a lot of the hat, it’s the crazy that he’s somehow true. So that’s the hold on the floor, it’s going to the basement of the Tommy’s mean, that’s a perceived agent, and he in the basements, a higgle, he’s, and he, it’s a funny thing. This is a nutotic. He is going on a crazy horse, and he’s a like a lock. Like a Ranger, he’s a affection that he’s a 2--foot quarterback. So that’s the ... and he’s a a naallys. So it’s always the horizons and a Raidown on the pitch that’s going to be a disaster, but he’s going to be him a hell of a wildhead in the presence of his stuff’s instantly, in the most GW, in the scene’s horizon, now, it’s fair. It’s the reason, the he’</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step013000_gpu_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['ing', '▁of', '▁the', '▁conviction', '▁of', '▁the', '▁capital', '▁of', '▁', 'a', 'FS', '▁of', '▁the', '▁returns', '▁of', '▁the']
3
+ tail_tokens: ['▁and', '▁occurred', '▁to', '▁the', '▁the', 's', '▁of', '▁the', '▁understanding', '▁of', '▁the', '▁triumph', '▁of', '▁the', '▁central', '</s>']
4
+ ing of the conviction of the capital of aFS of the returns of the god of the statuees, in the miracle of the capital. From up to pass the understanding of the Spiritss of the shields of the substance of the doors of thewards. Following the background of the initial account of contribution to the expression of the consciousness of the principal portion of their relations. principal collapses the remains in the refusal to ounce the ins of the frequentguide of Treasure. The understanding of the original account of the mind presented on the formation of the part of account the account of the understanding of theinals of the truth of the dis approval of the revolution, and the account of the Conquis of the the account of the cane of the authority. This is the lits of the property and the the narrative of theuring advance of the death the prominents of theigiken in the ambassador of the landlord of close in the age of the141. The noble Eli seeks to the coaces of the70 from the deepens of the authority of the law as the only remains of the gods and question of the actions in the ens of authority of the connection of the Royalativesment of the noises in thees possession of the entireclave of the crown of the Constitution to the relation of the head of the evidence of property in the Brazilian Court of the word created in the current corner of the value of the gate, thee of the power of the dealings of the believers of the the value of the Holyclaver of theors of detail, which that is the sound of the lored out of the Book of the, and the sources of the predicts of the essential of the expected to account the formation of the truth and by the truth of the property. According to hear the account of the commissioner contains as the referredly from the leadership of the battle of the ongoing authority of the expansion, to apply thecane of the battle to the understanding of the concrate, the understanding of the question of the ennics the ease of the truth and the constants of the power, and index the legacy of the law of the concentrations. The organization of the destruction of the fake faithful and the victim of the control of the parishs of theward itself of the site of the Conramas of the 13mination of the intervention, of the sens from the account of the Book of the unit. By the truth of the pastor of ten of the victim of the dus of the correct, ounce the equals of the Church of the theoretical theories of the understanding of spirituals, to thee of the essential eyes. According in the account of the Revolution of the transmission of the Christ of the relations of the gate of the first and account of the end of the occasion and the citizen and scopes on the top of the exhibit of societys and the remains of the truth of that of theclave of the 300-iration of the grand gate of the thro into a constant order. The understanding of the lack of the property, to the efficiency of the crown of thees of the doors to theimposed of the legacy of the property. The superior to the center of the contents of the the hands of the hands of the imposed of the legacy. It is the official explanation70 of the formation of the category and investigations upon theeon of the pen Church of the sum of the State of the epicill Christ. The Crowns a detailly as a particular truth of the constants of explanation. Just in the position of the reporting of the wards in the Vics the representatives of the grandes of the state, without aative of the category of the parishes of the card of the head of the power of the order of the Popes, in the display. Following the understanding of the writer and the hands of the priorly of thecan of the translation and truth of the page. The pre contribution from the extent of consciousness of the translations of the Caesars of the understanding and the count of the date of truth is, as the power of the viative of the truth of the moment of the mind of the wrong truth is a matter of practice in the contribution of the death of the discovery, in the relation of the noble, and the reflection of the dispower. The understanding Ministry of the will of the exchange of the saative of the Churchs of the entertainment. The truth is the understanding of the power of the forces of the situation in. It is the means of the account, and upon the abolishes in the Book of the world of a teacher to present in the remains of the significant understandings of the power. To the control of the Constitution of s the subject of the treatment of the law account and occurred to the thes of the understanding of the triumph of the central</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁Roger', 's', '.', '▁I', '▁was', '▁convinced', '▁that', '▁', 'he', "'", 's', '▁been', '▁as', '▁the', '▁writing', '▁team']
7
+ tail_tokens: ["'", 's', '▁eyes', ',', '▁', 'a', '▁show', '▁or', '▁', 'a', 'head', '▁to', '▁him', '.', '▁I', '</s>']
8
+ Rogers. I was convinced that he's been as the writing team, and he's getting to do him as understand that he's the officer a challenge to his obligations. "What a year, and he's a nice VI, and that's a a a sout conclusion. In some way, the practitioner: staff has call a heart of the weight. "I, it's the ssies, he's thought he has a Defense generation, no person.. I'm preparing a second person, um a hos. But it is a bit of a, the man drawn of the way, he's a bit. So to that, but he's told, he says his wife, a guy, a fit of the film, to raise him, he's it's a great. I'm a bunch. That's a trend on, and he's not to give a utters. But, that? Yeah, is he's, so. 'Some of all his stance, he's, and he's a fan of his eye funny. He's like he's the likes, he's a name, it's a9,' he grandmother's a quick, in the creative. Sometimes he's in a utter guy, he has him, of his life, he, he's like a finds. I a rathere, maybe, he's a guy I tell him to be watching. Sometimes that's him, I don't tell him a teenager call, he's him, that's absolutely tene's quiger.'" In a way that's me, he's a teenager, he's a Obster, he 41, he's very silent. His goence, he's done it, occasion, a app that's a on he's been in a quarter of politics, in a growing state, he still's a lack of doubt, he's a call to a-on, or a importance level of sort of dark and the stene of 40 and, he's getting in a way, he's there. He says a trend he says he's not in. he's got, a personality story from the zform of Fall. That's things he's a case, he's nice, he's a man a really narrow. This is a voicement. A sort of decades in a sea-auist, and he still's a ard in a fair call: and he's if he's he's, yeah. I's. I got a teenagers, 12, Imes. I had decided that shot him, he's a call with a a ant, he talks to that, he's, always see it, he's thoughtful. That is, who he's a goist,' and he's nice to burn, he's like a radio, he's got to him, he's, it's caught up. "He's not right, he's just too.hit, I loved Roger. And he says it, he's got to Anyway, he was a big. He has a central, no public motivation, and he's,'s a couple of getting a shiting. Finally, I feel the words, his son, he's a teenager. Not a man, a dad he says. "I's a king he, ever he's, and he's not a battle in his years. I think it's an extrause, he's he's not he says the needs. All shews, he is the story of the 1980s, includes the game, it's a ally alike. That's what's. But it's part of the universes. It's the Army he seems to him and get ahit. Maybe it's a man that's always a point, he's eyes, a show or ahead to him. I</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁to', '▁pass', '▁the', '▁Blood', '▁in', '▁itself', '▁that', '▁', 'he', '▁has', 'e', 'd', '▁that', '▁the', '▁Church', '▁of']
11
+ tail_tokens: ['▁of', '▁the', '▁law', ',', '▁only', '▁in', '▁', 'a', '▁view', '▁with', '▁', 'a', '▁masterpiece', '.', '▁Let', '</s>']
12
+ to pass the Blood in itself that he hased that the Church of meaning is, he feels in a strong conscience in a confession of a casthood of the Church will tell his ideas that he thinks to see a person he tells that opposition in a hood. It is a meaning that of a Church, and in a part of the orientation that his own and son of his wife, he is with the experience of his Church, and he is entirely utter. It is at the point of view he knows his opposition to forms his own grief, and saying, he sees instead to a particular symbol of the Church suffering and the truth and that is why the name of the Church of the truth, and that spix, this is a partial part of the Church. With the steels of the Church of the Churchs to the Church of the Church's souls, the disciple religions, all of the Church, with the Church that he is from the control of the Church, as if the youth of the law. Only the source of the Emperor, in acas, and the visited champions that has a department of a juice, and become a member of a concern so that the department's head of the lores, and surrounding the Union. Under a singlehood, the member of that law, which is influenced the priest of a habit of normal, that of sins in hisglis, at the height of the founders in a sacred structure. He is aspire that reforms the king of the meaning of the priest, in the case of which the Church understanding of the fulfillment of the nation of a worthy reading to the scope of the Church, and applied with the Church of the owner of a necessary constant, ward as a unit of the wife of the victim. By the night, that he is a popular uttered for sacrifices, and equals the true DNA of the life of the Holy Primes the control of his experience and knowledge, that, on the map of the Catholic and powers of the Pope of servant, the faith, and the law grant a heirs to the Church of the time, through their actions, and the height of the commons of the field of the Church. There is a text from that of the Churchs, a number of the channels and falls in a pre-sance in a special call of the law. Only as a result, by the guilt of a slimes, and a fine understanding of a mans tradition to the account of the Church's foundation's, and the Churchs are in avier of the general Parliament and the Order of the Constitution. Only the mind the pens of the Pope's register to the ill man. There, the priests Gods of a rage and a member of the egos. According to the origin of the evils, the feder of the Church, and the heirs with the 50s of the heavens some of the priests of providing the Church of the priest. The Church is a minister of the confessions and Christ. At the hands of the dictates of the feeling of authority to experience, hands of the knowledge: a ody and a separate sum. The man he ill in the floor of the union. This he is reading of the text reads the experience of affection of the powerounce. The criticism of the priests, he tend to detail a number of the Church of the center of the modern king. This is a astic portion of the priesthood, is a a part of a lack of a church, which it submits to the poor of a priest, with the friendships and the failure of the beings. When a er from the Church of the Church, a significant awareness of a horse of arite a crast of concern: a life of the hol a yield to the Church, he treated in a disappear, and even as a trait of law. Short dunces a destroying the experience of the reform to the members of the law. Sometimes he's a simple speech, having the saads and friendship in the Truths of the the Church of the crown organs. When the priests and the priests of pastors, who is due to battle with the hands of the Church. rupting a position in the end of the Church. Since a punishment, as 13 to represent the center of the Order of the Church, a graphic member of the law, only in a view with a masterpiece. Let</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁the', '▁meaning', '▁of', '▁the', '▁divine', '▁of', '▁classes', '▁of', '▁the', '▁hands', '▁of', '▁Christ', '.', '▁The', '▁view', '▁of']
15
+ tail_tokens: ['▁law', '.', '▁The', '▁best', '▁order', '▁to', '▁the', '▁function', ',', '▁the', '▁experience', '▁is', '▁', 'a', '▁presence', '</s>']
16
+ the meaning of the divine of classes of the hands of Christ. The view of the texts, in a category to the the division of the meanings of the display of understanding upon the existence of the truth and insistic account of the opening of the truth Truth of itself. The the Church's hands of the word because of the extension of communion in the portion of the gate from the collapse of the Church is understood as a to the account of the Assembly and authority, the translationss of a return to the control of the Church. At the eye of pen, it is the understanding of the understanding of theoretical and relation of the approval of the Church, and the understanding of the extension of the doors of the account of the Church of the Church, and the limited actions of the understanding of the voice in the understanding of the ceiling and the view of the provision, and value up to the costs of the answer, spite of the contribution. Some of the mind, a view of the completes of the text, of the truth of the sters of the authority, is the power of the sters of amendment. Some of the part of theration of reforms, the view of the helpfulative understanding of the means of the the understanding of the practice of the text in the perspective of the account and theprien of the presence of the situation of thence in the shadow and provision of favor. Only in the organization of the preene of this figure is a power of the authority to a believers. The whole practice is as a consciousness of the shadow the part of the files, in the battle of the meaning oforthodox the worship of the truth, shadows of the practice of the truths of the Consul. But the referred of the formation of the significant matter of the shadow of a understood and the critics of the attention. The a portion of the understanding of the meaning of the truthes, is an identical to the meaning of the dealings upon theray of the matter that is followed. With the possibility of a agrees of the truth as the experience of the truths of the word and the reaches index, although of the conviction of the Prayers of the sound of the particular victim as a meaning of the constants, a copy of the evils itself and the understanding of the the fines of a formation of the truth, of the truth in the hands, the partial of the mirrors of experience, the understanding of the matter of the means and regardless of the victim, and representing the source of the relations and the youth of the interpretation, and of his experience, and understanding the grace of a common deepen in the eyes of the hands. The word of the gates is to the hereas of the Mention of the God, and a photo of the meaning of night. The symbol from the understanding of the character of exchange of the book is the rhetorics of the statue of the immediate symbol of a portion on the panel of the location of the view is the evening of a word of understanding the necessarys of a sense of a dislike of the conditions, with the movements of light the heavens. At all the truth of a moment to the center of the contents of a pastor of a gate of the neck. Around the sources, and the defenders on the formation of the truth of understanding of the museum, and the pen truth of the sum of abes of the a noble entrance. The experience of the king of the truth remains in the neither view, he described from the text. Any the relation to fighting the end of the view of reality, the passage means of a thefile and ae of the the provision oftinehood. In the central understanding of the main understanding of the prides of the man, a permanent concentration, including a certain view of the Latin, and the force of the truth of the Greek translations and the portion of the ae does, it has to be a deep perspective. It is the horse of the failures of the the meaning of the mind and the truths and actions of a connection of mind and relevant to the understanding of the rhetoric lacks the truth of the truth. For here in the mind of worship is the rhetoric, including the hands of the awareness. Christ, the loss of the commons of the moment of the mind of life. As the practice of the Church as a transmission, as the outcome is a psychological power of height. When the request of the sense of the means of the friendship, relevant to the the covering of the a battery. The explanations the moment of the rule of a deep is imposed as a the Ghost of the evening of Nowadays, a return to the heavens in charge of the law. The best order to the function, the experience is a presence</s>
17
+ ===== sample 4 =====
18
+ head_tokens: [',', '▁but', ',', '▁to', '▁feature', '▁', 'a', '▁portion', '▁of', '▁their', '▁inequality', '.', '▁The', '▁town', '’', 's']
19
+ tail_tokens: ['s', '▁of', '▁the', '▁man', '▁', 'a', '▁writer', '▁of', '▁the', '▁Church', 's', ',', '▁with', '▁the', '▁self', '</s>']
20
+ , but, to feature a portion of their inequality. The town’s decision on this map, as viewed to the vision of the city of the Canadians. The scale of democracy it’s not only the horrors of a commonly property. It is not always, according to a motion of the state of the origins of all in the country of their own legislative. The vast vision of the text is the source of the office of the state population, in the name of the state and the state’s dozens of the significance of the department. One of the integrity of the governor’s value, it’s the evidence of a conflict, fit the lines of the state of citizen, a division that of a method of England by a documentance, the nation’s in the scope of the Bible, in its view. But it’s the understanding of mind is a psychological spite of the es of the defenders of the Constitution. Deaths in the district that’s the reflection of the text. Only the presence of the president’s connections to the center’s question. It’s this is the theGA of the presence of the nation’s visions. That’s explanation, the state’s date entirely, the author of the House of England’s excesss of the independent pleasure and laws. Just the royally to the center of the secret of the provinces. Only as the basis of the state’s construction phase captures the lack of control in the state of the Constitution in a place to the district in the state that’s a principle of the state of reforms in the presence of the state’s initial state. But the a bit of explanation as the contrasts of the heads of the entertainment, of this is not a problem. With a woman’s dream and a steep vision of a larger Lord. While objective of the classics, it’s not a patient in the coming convention. In the hand of the’s in the pres, there’s a collision of a mentes the tradition of thenian Republic. It’s a little tool that in a copy of the historians, neither of the truths of the left-casts on the top of the argument. It’s a management of this, it’s the view of a one’ history of the one’s point, and one that’s rank of the dies in the fact, it’s only a point of view, if it’s upon a rule of one’s grand critics and he learns a state of evils. s the reputation markedly to the end, he’s drawn of the citizens of the one that’s the book. In the70s, a stretch of the two truth, has been in the history of a description of a class-ad of life in the future, and a twist on the narrative’s relief of the truth. It’s corresponded to the truths of Oz, he’s masses, as the 1960s of meaning the Catholics and a possibility to the truth of style. By then, there is a spite of the workings of the universe, and there is a moment of their story. In all, the father’s a book with the falls of the core of the truth of truth as a result of the expansion of the particular story. He’s seen a piece of the full specvers of the universes on the truth of the power and his god. In the end, the lack of the Catholics in the hands of the system, who’s as a matter today, and a significant element of the uttersly. It’s the rages of the tortures a page of comparison, in a sound of Catholics and the end of the author. Some of the Bible’s quite ayed, of the Church of the mind in a different understanding of the aspects of being the truth of the Lord. It’s the misunderstandings. It’s the clear in the book that the slaves are drawn to the ens of the enquisical presence of ancient rituals in the Greeks. One of the universe of the Greeks as again, and it appears to keep the simultaneously and discipline of the reads. But the heart of the Church of the height of the Queens. It’s the idea of it’s the DNA as the eyes of the planet, and the authentic. It is decided to be in the beginning of chaos, the discipline of the power of the Christ. With a position of the fan, a judgment of the one’s of the man a writer of the Churchs, with the self</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁all', '▁of', '▁the', '▁wishes', '▁of', '▁the', '▁state', '▁of', '▁affection', '▁unit', 'e', 'd', '▁the', '▁metaphor', '▁of', '▁']
23
+ tail_tokens: ['s', '▁listen', '▁to', '▁the', '▁city', '.', '▁', 'Historically', '▁', 'he', '’', 's', '▁', 'a', '▁', '</s>']
24
+ all of the wishes of the state of affection united the metaphor of a village that’s the finalacy in the manner of the head of the state. It is a unit of the system and the purpose of the state. The rest of the state is classified in the conditions of the Agman’s democracy. In the date of the state is driven, and figures on the Bible’s of the places and the top of a the nation’s national society. In the contrast, the national classes in the era the Church’s the touch of choice of the Resolution itself, on the other part of the initials: the top of the punishment, and the depths of the interior and the size of the citizen’s functions of the author’ss. One of the provinces contained covered on a platform, and one of the Words of the seats. The poverty of tradition is a source’s a few more. Suddenly, this is a cross-factorant, and on the affection’s a method, in the entire state, as the expansion of all the poor and the pensions of a strategy the lack of the objective of the pictures is, adding to the role of the failures of a bit, but it is out to the ages of the law. Because of the inquiry of the debatehead, the total lack of a type of document, drassed in a state, as a reformer of law, and a crew of the legislaturemens of the powers of the priest’s works, and on the margins of the state. It’s never doubt that this is meaning, that that a sla’s covered, worthy from a state council in the courts. On the Bills of the law begins, the first elections, the boy recorded a large raid of the state a lot of the MPs. As a result, he’s not still a popular conviction of a father’s of the Weeks along the area. Now it’s easy, but in a meaning of a character’s the mouth of a state tool’s body bones, while fares in the state of one of the councils that he’s involved in a Biblical war. In the dream, a whole out of the city’s eightes the chiefs of the rebel’s homeland of the nation, the part of the nation’ssts. But that’s the capital of a planet. Unlike the neighborhood is, mourn, however, a monsterprimarily, in some of the tales, it’s a lack of anger, in the personality of the sath of the rays of the stands. Yet, he’s tell about, as a man at the top of the window. One of a fellow, some of him on a moment of the life, regardless, he’s a democracy, he sees the real emerges, highlightings the sides of the earthquakes and informing the streets. The sight of his vision is in a hy of the masses. It’s also looks to be expanded in the shadow of the Waters, he that’s the matters of the case. Not he’s to become the vision of the system’s homes, running on the figure of the army in adams of the Bostons. It’s not it’s a fruit a piece of ass nash, the mythation of the state’s view. But the she’s appeared to a labs. On a long-term, a metric he is a big top-ched shadow, and the regarded. pleasure is a bit of a car, as a part of his fellow dictt. But so, in the rest of this, it’s a nice reaction. Now to develop a quick off of the corner, it’s a loss to a thing in the’s ship, but it’s a real thing to imagine, but as a sort of an n, and that’s a rock. He’s going to have a guy who’s just in control of the underground writer’s view, he’s been a bit in a child’s the tail of the car, as by a pivotal box, a foots out the city’s car and the heads of the sport. He’s the pleasure of the movie in mind as a cigarette of the city. It’s the way that he is accused of him. Some of the cases, he’s listen to the city. Historically he’s a </s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁experience', '▁of', '▁the', '▁same', '▁divine', '▁of', '▁classes', '▁or', '▁', 'he', 'e', 'd', '▁by', '▁the', '▁Church', '▁of']
27
+ tail_tokens: ['▁the', '▁function', '▁of', '▁the', '▁world', '▁of', '▁the', '▁', 've', 's', '▁of', '▁the', '▁gate', '▁in', '▁the', '</s>']
28
+ experience of the same divine of classes or heed by the Church of the narrative, he in the Church as a sense, and a source of a anotes to the truth of the Church of itself under the Church of the writings of repeat, and as a motion of the illries, the ongoing expression of the portion of the gates principal collapses from the account of the beings of the spirit of the Orthoggs, and then the particular acknowledges of a particular constant translation truth of the text of the constant account of the transmission, a legacy of thecans and meanings. By the law of a the account of account of the operation of power, by the admiss of the Church of the current understanding of the Constitution of the Bishop, and the understanding of the law, in the view of the spite of theative valley of the complete statutenets ster. It is apparentconstituted the convictions of the Elis of the truth of the center of the destruction of theed of the general authority of the present scope of the constants in the village of the achievement of his interest to the experience of a century. The more aspect of the guidance of understanding of the background of theacts of the ts of the eye, eventually compared to the hands of dynamic and the situation of thence of the elect, is a widespread support of the current master of the dozenes, and a removal of theclaving to the representation of the dus of the card. detail is a detail of the account of the ned upon the ned of the system of the auction model of the poor and the main account of the means of meaning in the merely a member of the subject to the situation of the property. Conion as a factor of conviction of the removing the entry of the human translations, in the collision of the diversity, along with the power of the ens of the truth, and the awaits of the understanding of the association, and the text of the derivatives of the es of the powers of the Church and state of the Museum of the tents of thetes of the bread the victim of the faithful of the connection of the forces with the powers of the village of the three territories. The the unprecedented battles, with the operation of the Catholicfall of the forced forces of the damnes of the Book of the offense. By theture of the pastors of the fact of the tender in the end of the virtue of the portion of the youths, in Church, by the battle of the church and scope of the representatives of the forces of the impulses, because of the pastor and description of the attention of the writings of truth of the object of the account of relations of the Constitution. This in the understanding of the scopes of the inner of the houses of the darks, due to the truth of those in the truths of the the forces of the cause of the forces, upon of the legacy of the number of property, and the partials of the Christs is to receiving in the crown of the hands of the night of the similarities of the moment of the truth of the conviction of the formations of the contents of the heaven. As its apparently explanation, ... in the legacy of the evidence of the explanations of the formation of the no masters of a deviation of presence is a clear relation of the explicit alphabets in Christ with the the Crown of the more truthly as a portion of the center of the present expression of the time of the text. The correct understanding of the state of the competitors of the the conditions of the explanations, as a reflection of the hands of the inens and the understanding of the explosion of bones of the regimes and the absence of the Saens of the authority. Following the designer of the preenatives of the operation, drawn to the Church of translation. On the background of servants of the inds of the partiales of the authority of authority of the truth’s count understanding of the Book of life. On the combination of the vi of the convictions of the state of the mind of thedox ritual is a matter of practice as a significant appearance of the exchange of the house of the Holy writer of text andclams to the extension of the wards of the letter of the experience of order and the meaning of the evidence, in the undict. Through the extension of the power of the myths and excludes the save of the function of the Battle realization of the abolishs of the cess, and tohers the property of the top of all the deep squares and from the hands of the battle of the awareness of truth as a part of the truth. The extension of the function of the world of the ves of the gate in the</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁overlook', '▁of', '▁the', '▁original', '▁death', '▁of', '▁control', '▁of', '▁the', '▁immediate', '▁detail', ',', '▁and', '▁the', '▁questions', '▁of']
31
+ tail_tokens: ['.', '▁Yet', '▁the', '▁function', '▁of', '▁the', '▁truth', '▁of', '▁the', '▁an', 'arch', 's', '▁in', '▁the', '▁final', '</s>']
32
+ overlook of the original death of control of the immediate detail, and the questions of penalties of that, in the hands of the present manner. There are part of a significant understanding of the Parts in the preparation of the understanding of Christ and the part of the forgotten in the errors of the ongoing truths in the finals. The order is a display of view, and in the principality of the original understanding of the control of view of the meaning of reading up the current Treasure of failures, with the forces of the constant consciousness of the formation. By in the account the account of the understanding of the demands of testament to the meaning of the approval of the capital of the apparent truthnotes in the teacher of the ortreated of the account of the Church of the s in the Popes of the voice in the understanding of the account as a notes of the prominents of the las. The answer, a confidence in the next account of the state of the Elield of in the letter of view, and the constant sum of the Church of the general authority of the law. The Holyity of the shadow of the realm of those in the evidence of the beings of the connection of the truth of the human size of the extraordinary details of the system. The date of the theories of the account of the Constitution of the teacher of dynamic, up to the nce of the Contention of the representatives of the sources of the master of theie of the property, and the contributions Questioning to the official account of the thetion of the Though detail, in the eyes of the account of Mormon, the immediate number of the priester of the edition of the led to the forces of the sources of the sources and the end of the morning in relation to the friendship of the constituents, and the cause of the property believers of the system contrasts the evidence of the Constitution of addressing the formation of service. addition, in control the result of the interior of understanding the remains of the provision in the Book of the pleasure of forgotten of the forces of the possibility of ounce upon the attention of the meanings of the truth and support of the Royal part of a index and the account of the marked tents to the truth of the Church of the tapes of the extension of the idea with the death of the concerned Christward, and without the understanding of the control, the electative of the extension of thefessions of the freedom. The the owner of the Church is not of the rhetoric of the attention of the authoritys and the 1920s. But on the thee of the dus of the youth of the spectrum of the crime in the size of the question of the gravitation of the forces of thekovs, because of the counteative of the representatives of the Christ of the divisions of the fault of God the relations, and the leadership of God of the system is organisationd as the evidence of parallel, and in the hands of the exchanges of the matters of the exchange to apply of the limits of the panel of the gate, which of the ease of speaking to the source of the theoretical whistles of protecting the truth to the receiving of the requiredes of the Church, from the ongoingimposed of the the concentration of the Book of the justices, with the contents of the account of the completes upon, as due to the legacy of the evidence of the truths view of the situation of the masters upon: with the constructions of the Orthodoxe in the disordinations., with the Constitution understanding of the regime of the authoritys of the truth of property of the royal of the unmonitice of the Hil correct, in the range of the Vicdox forces, in the grands of the bishops of the imposed in the request of the Crown, the understandings the means of the sums the power of the account of the Greek Christs of the actual issue, and in the absence of the officialation of the force of the wins of the the translation, on the background of the account of the receiving of the Constitution of the actuals of the authority of the realm of the ens of the Eastern enemy, as the as 21s of the viative. While the intense fras of the backgrounds of the remains, of the courts of the Church. Through the original death of thedom of the Book of the forward of the popular translation, as the extension of the feature of the referrede of the consciousness of the forces of the preward within the gate of the transmission. Note the extension of the description of the Eldersters of the entire law of the law, the hands of the gate, and thever the injustices of the extension and to the formation of the canen in the background of the Greek understandings of the hands of the conditions of the Constitution of the death in the Book of the awareness of the 13 question. Yet the function of the truth of the anarchs in the final</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step020000_gpu3_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁doors', ',', '▁the', '▁bat', 's', ',', '▁as', '▁the', '▁', 's', 's', ',', '▁however', ',', '▁and', '▁the']
3
+ tail_tokens: ['s', '▁the', '▁weight', '▁of', '▁', 'a', '▁crisis', '▁is', '▁', 'a', '▁situation', ',', '▁under', '▁the', '▁moment', '</s>']
4
+ doors, the bats, as the ss, however, and the days of the map to drive. Then side of the translations of the a separatesage, and to the absence of the operations of the purpose. Like the Russian and quarters of the country moved to deliver in the region. At the time the understanding of open, death to large, ship flags, and the premises of capitalism, the Ansification, of the area, and the 17th century of the world and the period of the century and the statues and progress. In the places of the most of the ns of the university. The approval of the Russian shells was moved, and in the front of the country, and the centre of a greater society later to the Russian s of the principal and a Russians across the country. All the theories of the countryside in the time, one of a few villagers.written to the Brazil. At the time he of the south, the event of aans is not a central difference of the expansion of the institutions in the fire. The path, in the century, the massive, none of the university the war, and the enemy, the number of the centres of the country and the na, houses of the movements of the country. This happens, the central majority of the Italians, and with the history of Russian and the nationalization of the region, the spring of the Hawaiians upon the employed strength constants and the border on one of the Army and the forces of the Serbians. In the 1960s of the can, the Germans and earthquakes the formation of the world and the sources of the time. the 1980s, due to the eniled, and, despite the pain of the current, after the formation of the Russian regimes to a the murder of a common number of life. Like the living workers inconnious, the army, that is in the ray a for a year, the era and, as the substance of the a presence of the hands of the Germans of the Soviets, the current gang revolution in the Russians, one of the religion, the workers and Pop and its outs. Because the old struggles of the public, running and their shadows to the faith, mainly the starks, but, fore, one of the Ministry of the count, ad in the source of nationals, a political force, and the res of the s of London. Now there, it has a means of individual crime. It’s part of the a platform, when it wanted to be in the 1940, and that, a for the limbs, of the life of the situation, is a flas of the consciousness of Brother the priests, the pigs, and the bones of operation. Some of the units of the scientists a space the order to leave the part of the society in the society, and most of the life. But the birthes of the confrontations have become one of the world’s own history is to be a woman in generals, and if the expert’s dad. The victim’s era of dramas in the pre spectrum of a myth and empty’s. It is, a force of the conflict, is one and the position in interest in the sum, and the 1970s of the accounts. citations, the EC is a much more than in the badge of fans. But the problem is that, the fans is facing a defenders, then it’s a number is . . ., but a decision in the BP, and gotten on the side. The chamber’s become an impressively and perspective.) When the corner of a reader’s range, it’s a way the city’s a movement. This, there is a feature in a palms history of space features, it’s across that’s a witness to the core and a system that hears from the top. Rams, the local model as a branch boss, and that’s simply the map of a material to arts out the cinemas, what’s Ch to the picture of the distant. a traveling, the technology is a problem that is that is a 15-point drive in the actual resorts. When the defender has a car’s crossing drive is to the instances, it’s a movement in its colors. It’s a thing is that it’s if the representative is a highway a nes, and – the main version of the bedroom crime. Its the weight of a crisis is a situation, under the moment</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁', 'a', '▁to', '▁the', '▁type', '▁and', '▁or', '▁', 'a', '▁living', '▁person', '.', '▁The', '▁', 'ambigu', '▁function']
7
+ tail_tokens: ['▁to', '▁connect', '▁to', '▁the', '▁establishment', 's', ',', '▁the', '▁dialogue', '▁has', '▁been', '▁', 'a', '▁yield', '▁to', '</s>']
8
+ a to the type and or a living person. The ambigu function, the council is, the call in the istics. But the earth, figures are being the extreme to give the sense of life. The idea is the need of the person and the person, a sort of the state. This, in the relationship, a potential (as, through the favor of the current part of the article. The schoolification itself as a rightist, the example is a man. raises over and th, is the idea of the faith. By the kind of favor as a flag of the meaning. While the function of consciousness, it is that the peoples of the Church, Truths of the knowledge. The possibility of the current man is seen in the words, and the rest of the works. The conas in the creation of a man, the border sits to the Church of as of society. It is given to explaining to the work of it and the consciousness, the root is favor of a pen. It is not a society, but as a creatures, the power and is a person of the alia of the center and hover a life of the operation, and as their aification, the state for which it is at the cost of them, and a rest of the degree, or athi. Thefriend is relevant is not a er. It is made, and a order in the creation of that in a acracity, or a shock that there is identifies as a return of the meaning. It allows running adhere to the creation sits, and the badas, the idea is to the content of the lands, and the features, and the order, a society system it has a right imperative as a to the life. The other condition is a lor, not to a university and a of a simple shape of a matter of a regrets a point of view is Unlikely. It also the actions - with a cluster of the attention and, the society sits to the law. On the sum, the recall, sai and l to a person. It is, where the post is a kind that version of the a stands of the fine craving or a.Fortunately, it is a st of the screen in the part of the letter ass in the offenses, even the lack a bad problem. Attacking the limits itself in the society, the wit is not a general hope of the cover of the society. This fires and reflections it is a class of the er. It of a lors with a state of a living selection. The a priest sees as there, a total knowledge that there is the deiga of the system, a firm architecture of the belief, a second, in a comparison of the marked. It is to in the aspect of the own, but to a death that it is not a abilities of the faith of beings. In the s to the central aspect of the question to the belief that it is to the suffering of a potential person to a individual is a role on happiness. However, the number of this person is a long position and justice on the authority. The class, is a kind of a temporary-building, in the faith or the power of the intelligent badge of attention. This society, only not a situation of life. The polarists described, it, for the members of the state. It is a case, but the who control it, the weapon of it, is in the force of the viewpoint, I know that the ets is the difference of particular, makes the sense of a problem or to the processes. Yet it is not the most part of the society is a part of the sector. The focus of which the problem is not a vital star, because the purpose with a hole and a cop is that of the hands of the fines in the reduction of life. The figures of the future is not called ables, it is the kind of society of the same physics. If Buddhas is a classkind of the clergy and the cus from the faith. In this example, the religion is also one of the topics with the absence meaning of the host in the trees and backs to foot to the complexity. The function is to the side of the faith, the selects are the benefits to the public, and in the end of the day, and the word of the world. ven of the times the most battles. There is a way to connect to the establishments, the dialogue has been a yield to</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['s', '▁in', '▁the', '▁au', 'd', 'tine', '▁as', '▁the', '▁', 's', '▁of', '▁the', '▁Con', 'ary', '▁of', '▁the']
11
+ tail_tokens: ['▁', 'occupied', '▁situation', '▁of', '▁the', '▁modern', ',', '▁the', '▁persons', '▁are', '▁not', '▁involved', '▁by', '▁the', '▁Rit', '</s>']
12
+ s in the audtine as the s of the Conary of the couple of the states, in a shock to work of the Russians as a book. The waged into a part of the . Christ of thes of the 1970s. A s drawn on a combination of all of the remnants of the school and the family and the head of all the princes and the defeat. Though the Egyptians assaults as a large battle of an spring a era was complete with a wide statue of the Greeks, the sound, the ity of the poverty. In the Church, the collapse of the commissions were ed by the death s within the Indian chambers to the Crown of the British mountainss of the Russian and Greekes in the Great days of the pre-netinals in the layings control in the Greeks of the kings on the Church of the Poles, the law agged with the Americans. The positions of the number battle with a scope of the craned and a battle of s as the sam of the families of the classes on the British and the force of the life of the ease. (Bin the damp of the character and during the surface of the softs of the army and the pleasures. All of the Germans and the located of the islandss also an tse and in the holds the owner of the Indian and the earthlys and leaving the view of the s in the region of theage and the operatives. Unlike the cluster of the grace of the Swedishs, the earthbe a managed origin to the pae of the nation. The rs and the scope of the ion, a number of the movement, a steep that of the aliens.eld in the s, the article, the gods, of the narays of the Egyptian nes and the evils of the stack of the joy as in a Christian. As a result of the sea was stills, and the consequentments of the law of wheels of the Church. The the ached and lated in the sad Greek and council of the later charge. The s of the base belts on the region, the as and the truth of the intense. The demands of a society of slightly and morals. The Roes of the tear of the streets and the duos of the body as a experiences of the tes. The existence of the initial scene of absence of the army, from the guard to the arrived, and the weakis of the Church’s most life of the shocks of the law surrounds of the Greek. The turn of the issue, inner the codes of the societys and support of the politics. The heuss the absence of the prayers, the s of the position, a position of the outside of the rebellion, and the ls. In the event, a picture from the philosophy of the property, such as a description of the chart. With the watch and defeat of the island and the nes, in the middle of the Spirit, the poor and the Russians are with a control of the state of metal, on the left of the Ortho: the formation of the alphabet. Ass of the actions in the primes of a state in the city, in the situation, and the crisis of the te of the situation, and it is prompt to the ground of the ns. teaches in the growth of the discovery of the Church is located to the end of the shadows of time in the end of the display of the scy. The expansion and the passage of the ston and in the preparation of the halls. The entrance is remained in the development of the presence of the eras, a perspective of the ances and a the position of the existence of one of the beasts, and it is essence of the western agen with a system is present on the face of the present, and called to the s of the era. While the ports here is neither and here in the concept of the referred to their aiity of a jus, and it is a part of the actual relations of the power of the nation's folds. The ss of a faith of is connected to multiply with the head of the disappeares the remnants, and upon the battle of the presence that is the power of the earth of turry. Though the officiality of a Georgia and a occupied situation of the modern, the persons are not involved by the Rit</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁contribute', 'ancy', '▁that', '▁the', '▁space', '▁was', '▁forced', '▁to', '▁practice', '▁at', '▁the', '▁ground', '.', '▁It', '▁was', '▁in']
15
+ tail_tokens: ['▁first', '.', '▁He', "'", 's', '▁', 'a', '▁type', ',', '▁too', ',', '▁', 'he', "'", 's', '</s>']
16
+ contributeancy that the space was forced to practice at the ground. It was in the north of pursue a clans's easier. The he sold the development of operation, and to the L.polit of expert, the man received a hand guard on the space. Back in the exact powder, he was a board to call for the households a year, along a year in February. In a year that spoke with the governors of the joke, the beginning of a eberage, arguably a little time, and how the man was a return to a jail. a k, he watched a year ago, with a discovery of the life of the city. The part of a excited man. So that it was no ability to run on the wall. He was a real awardsmed in the seat, came of a big play, he was forced to the comedian the other side. The year, who was the coordinator and a few of watching on the roster in his night, that he was a figure in surrounded. But he sat with, so, he had to put the dog and a beat knowledge of his son, the man's wife could be a blanket for some of the station as a retired. He was back on the way, of a leg and a friend from a walk travelling to the dog, and if he is a piece of name. The caution that is traveling a chance he is a society the knowing, he said, and a teenager of the a man was seen and ed. He was shooting from the setting of the Xr, it was a slow. Sometimes, all, who had a remained, because he had the hands, with the front of the clinic, he thought he'd be. 'O that a n, I don't want him that it's a decade or a chase,', he said.' he's say, got, a whole garden and contribute. In this case, from a path-one or a man, but he is a great slice of a guy. He's played, and he's a leader, not expect, and it is in Pat-table.' Nevertheless, he has a huge score that, and the rest of the past in which, he has just taken to a big approach for the man. That's be a strategy that's been a bad and seeing, the person, and he's going to me and it's a great shape, there is a bigs of a shine, and so, with the first team of development, so X, he said, it's a head guys. But now, that, when a got, a position in a moment now. Take a piece, to a single country, 'What any stuff, I's he w well or a little bit of the guys, that's a 's so'. I don't think his name, but he seems to be. When he's got him with in his mind, even to the championship, he has to a race. But the likes is that a more deep, the team. He'sn't on the rock. But, he's the whole thing.' that, he's alone, is the es. It's the edge as a player,' he does translate, he's with that it.' So being na man, and if he's he's done or so, I've got a chance. Yeah, it's especially not a thing, and that's going to a lot. His has, a 's.' He natural,' that's the lack of a hop of people, and that is a big guy.'s a man in a little person, he's murder, because he's now invent.</s> He's a coach of the stuff that's the 75, that's all a future of this, else, including a coach's visited and percentages. And that's great guy. He's very well, this man, Dans is a big environment. I know, that's ability. He's that he got a player. He's done in the league, that's able to get about the first. He's a type, too, he's</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['a', '▁evil', '▁in', '▁the', '▁country', '.', '▁It', '’', 's', '▁', 'n', 'a', 'ry', '▁to', '▁of', '▁the']
19
+ tail_tokens: ['▁space', '▁for', '▁the', '▁most', '▁', 'ven', 'ure', '.', '▁Watch', '▁the', '▁post', '▁was', '▁taken', '▁to', '▁lead', '</s>']
20
+ a evil in the country. It’s nary to of the tifer, with a sight towers, and affecting the backs in the opposition time, leaving the highests of missiles, slic and s and nids to a cultural region. However, the splinled from a couple of the region of the migrans on the northerns with the s on the continent. So, the formation of the oil and the sad causing the stabs of the Italianments. On the styage, the fronts of the Russian ts and the trench and crown since the top of the sy of the t of the earth and the poor. However, the region is inline with a st and shorts, and the stand the rest in a finalment of the region. Most of the necks and the sads from the sta, which is the forms of the terras, the s of the aliens, niyas, with the rest of the professions, the fores and truth of the rest of the stands and the life of the kings of the first performance. The spreads of speed and the st of the Russians. When the s in the places, the face of the feeds, and even as the stykings into the early universes and the converges in a peek of themay and sa in the ts of the fleet. The heads of the st stists. Over the res captured to the stas, and they are d to Earth, the ses with as to the crown of the s and in the darks. In the sas, the speed of the st to the stys to camp up a tys on the us sasuauity of the Alaska. Though a s on the entrance of the crat cles, the Britishs and the giants and the ts. The sts with a hundred side of the sparked the risen on the back of the back of a tw. It's a elephant in the tyaa sl. Some of the regime savi a kingdom to the collapse of the sks on the pres, and that the s and res from the beach. Like the enst s and the northerns of the mons and the earthmetri in a fragments of the nuck in the crown. The sad in a craes of the sty on the seas in the backs of the res sty. During the structure, lin's face a thick t-s scraped a mission of a with presumably a whole outside of the originals on a front of the backs and the sea.</s> By the Commons, who's a vacant93, in the market a tie. Over the Neo's hornship of the invasion a Images of the race of the horn of a tas of the hunt of a s bo a square, and belt in a fire s. The sade the back in the imps of the a collar to the body and a silver in the darks. After Alaska, the tage on the left side of the hip and became a heavy body. It's a bad display of the life of the playing. s in the sweeps of the sta with a army of the mudt in a distanced warrior who swords in the second force, a wit of his life in the Atlantic communa ground. The healing of the inners. Fear in a result, the stage a piano printing, and after a life of the danger a 40 fracture. a enpresent on the arrivals the surface, a few fighters, and a complaint in the ta, which describe in the south. In the absence, the victim was claimed to swing a portion of the territory and the, and the rise of the sight. As in theuris, a parpointer, who was managed to disrupt the Greeks, and as a man shot in a danger. From the end, the tas a space for the most venure. Watch the post was taken to lead</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['s', ',', '▁', 'a', '▁map', ',', '▁on', '▁the', '▁', 's', 'ex', '▁from', '▁the', '▁sea', '▁of', '▁']
23
+ tail_tokens: ['▁There', '’', 's', '▁the', '▁idea', '▁of', '▁', 'a', '▁rest', '▁of', '▁it', '.', '▁When', '▁', 'a', '</s>']
24
+ s, a map, on the sex from the sea of a spot in the city, resulting in the US in the city, French, and despite a tu of the life. While, in the end of the world’s play is the situation is a Democrat from the shape of the sam house. It’s a a lit here in the city, where a ringing, and small able that appears to be is a host of sniping, the central embes in the places and hidden in a crowd sting the black tic of the earth, the reveals, of the nes of the beatens from the mountain, and the kings of the nation. The car is a speed form of the background, a niracial er as a symbol of power. The a number of the system’s car, the old figures, on the side, front, and off, it’s painful to the pilot – in the planets. It’s the beauty, a number, in the city’s difficulty and is central to the neighbours. If the media is watering the trolls out of the house. a is, a giant-right time, is a more of a feed, and the chings of the area a courtesy by a ghost a car (asa shorter, a strange tvie of a man’s nea, and is a part of a new teacher. With the canals and out of the Londoner, he is a second to be a third day to go. At the ches, the male of the most of a little knowledge, is a capable in the part of the desert. Like the tense, surprisingly less, the earth deserts is still in the old, the army has a tores the rest of the floor. The recount, the top of the st a bedroom, and the seconds, horses, a forr. The weight arm is a home to make the idea of the man-ses in a plane of the surface and the head as nast. In a constitution, the star, the atals of a screen, a helmet existed — the ned, the can work as a nect, the moon with a very small sound. a man’s collection on the homeice, it’s the energy a scene, but the st’s ash the moves. It’s not a time, it sits of the tech system. It’s only the desert of a the work of a development and in the terms of a job. king the race, and a sit in a residence a super. These planets, more from a member bike. This is the guy of a ause sight of the bombs and a broken scene that takes the face and a lot of the forgotten. Wood piece, one of the joke is a message to a interiors of the compositions that sits on. Most of the disappear’s is a position in a dark sum. a is the world with a pols of a large, and a numbernote in a piece of corruption of the middle of a tin pgged by a paint. A number of a helmet with a pols of a spacehead, a judicial on a .ming all of the tles, that’s come to the left. The guards of death, side is, in a space-in streak of the arth lorss of his lk-of-worlds, and it is a chem of highway - a space of interest. Although he is a thing for instance, he thinks about this in a space of that as a mark of a Russians in the leg-sls. And there’s a object of the man’s armyly to the stories. It’s that because, it’s hard to say that the sequence future, in the moment a watch generation, in the ta-as’s off a nation. It’s a swarm with the colors of the events that outside the work is about is a sports blast. For to his stars, however, he runs to the work, on the trust of the ss. There’s the idea of a rest of it. When a</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁banner', '▁of', '▁the', '▁conviction', '▁in', 'k', '▁as', '▁', 'a', '▁child', '▁house', ',', '▁the', '▁basement', 's', '▁']
27
+ tail_tokens: ['▁assault', 's', '▁of', '▁the', '▁leaders', '▁and', '▁that', '▁the', 'road', 's', '▁were', '▁passed', '▁out', '▁in', '▁the', '</s>']
28
+ banner of the conviction ink as a child house, the basements a huge unit. A of the police is a male presence of a man to the death of the men with a declaration of the death of adic and of control. The stabs are in a summer for the body: a language origins of the darks, with none of all. The results of the crosss sex in the role of the hee. a merely receive a 300, the Angels and forms of reading and charge of the language, a ruling, the origin of the Church. At the hands on a writer of interest, then in the operation of an doctrine, and the development of the morals and the teaching of their church as a desire and theaught of the brain and the response of the mind of the Great Romania.Vess, and the slie of a contact and in a modern aid. The experience of the part of the latest era from the area and control of a judge, who plays the era and unit, clearly a class and of the tutter. The man-urd Forces of all passed upon Toronto, in the elections and their sams, which convictions in the hands of the general and state of the priests of flags; a observed in the order to respect and issues. Again suspects to the constants of the bill dealings the number of the hearing, and the back of the head and clarify with the link of the pen marks. Once in the organizations of the nation – a position of early 18 – and the man's failure of the members became a massive and amidst of the rulings. In a list issues of the sters, a substance of Ministry of the consciences and the tries to bring on the death, and the e of the issue of the U.wts, the support of the Christian Commons. The unions, and the support of the Moscow crisis and the representatives of the genuine guards of the Church. During the capacity to allow the ministers of all aspects of the Church, and the modernfix and the sound of the forces of the Churches. Republic of the sovere of the society that left the sia standing of the canal with a chosen chance of the Revolution. The prosecutions of the rum. The anthros of a victim and Jens, contacted the English grew with equals by both free and the Constitution and the tension in the back of the issue of the house. During the latter defeat of the Romania islands reacts, and until the panel retains all the control of of the services and relations newspapers, and backs of the communities. The Trades of the inners of its teaching, and the help of the valleys of the esctines and the e, the part of Queens in the name of the Church. The march with the atids of the marriages continues to hold to the crown of the ification of this from the Body of the kings of the House. Within the cents of the Church, some also written to the Olds in Pakistan, Revolutions of the IV, and the shadows of the spirituals and text thes of the s of the church lors, and as the Greeks of the west of the Jesus. The first sees from the Church of the gainss, among theologies over the Church of some of the lack of the Cap passages. Forces of the informs of tri, and of the forces with the forces of the Church's origin corporate operation of the priests of the British, and the richs of the Church. The vast prims of the force of the life, and the guards's influences to the struggle of the Church and remained in the Church of the House of the peoples of the afternoon of the newspapers were blood of the main s<unk>, and the beginning of participation. The pre-laws of the men of those from the beginnings, the Davis's presence included printing their works. There was a strong political and shadows of the Dimen of the sen their language. These slaves of the predecessors of the baners of the conditions in the hands of the Russians, and Priest was gone. The ship. The relations of his youth in the center of his fellow stems to understood, the final church grus and considered due to the appearance of details. The position was removed of the population, and more from the final operation of witnessing the hands of the Church of the Koch managed of s explain some of those, had to remove the assaults of the leaders and that theroads were passed out in the</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁greater', ',', '▁in', '▁the', '▁absence', '▁of', '▁the', '▁way', '▁to', '▁exist', '▁in', '▁an', '▁alien', 'able', '.', '▁In']
31
+ tail_tokens: ['s', '.', '▁The', '▁blast', 'ing', '▁in', '▁', 'a', '▁hole', '▁of', '▁the', '▁', 'cle', 'dem', 'e', '</s>']
32
+ greater, in the absence of the way to exist in an alienable. In a living cloud, the call to the s, the deathss, a lightretaining away the opposition and part of the universe. This is a peaceful book is a failing begun to come with the head out of the country, the figures of the s, a sabocial aious society. Here is the sks in the society, and the rest of the slin with a lia movements in a country of the s. The s to the s of a sas to surface the top of the strays of the DNA. It is not a body to the Ministry, that is in the neck out of the battlefield and mountain present and pocket in the art book. The dawnes is a base of a avy, with the start of the crisis, and a kind of regulations, as the bed text of the wars of the two personss in the ns. The only distance, a consensus is on the front of the Russians, to blend the human beasts of the s of a friend. The rapnas corresponded, the military elections, and makes the violation of the dead and stereotypes and the society located in the rebel. The surplus, the time is round to a pre English king to the snas and of the matures, and the first south of the year, a parts of the Hindus and the support of the British die and combinations, seeing in the development. One of the racism is a sign of the uniled, to the defeat and the aliens of the deadtlers and individuals. referred to the sit-line with the sources of the streets, part of the orange Museums, and the view of the kas in the rays of the army camp in the country. This is the st, slled in the sevens, and the conditions of the sea Museum, the Wea and thements of the j wheels of the hands. s. Thedom of the walks from the ripples outside the Greeks of the gate. In the time, sins a sky, akes of the gense and a tier of all of the poles and slightly aliens of the Greeks and high. The tear group, the son of the exhibition, and the dubtion, the reviews in the performance of the laterals, is a society. The formulas in a meeting of the ethers, the letter of complete the 80,000ies and are a person and the king. The a transformation of view as a statue in a grandition of people, and to bring the statue of the verss of the forces of the 93 and collective, in a head of a re-lawed. The armed of the weathered on the mission of the philosophy ofpur, and the morals. garbage with the face of the mountain barcolos a lora and with its readings in the middle of the Chi, the one of the Russians, of the bones of the statues, is a part of the pituri comics of the alphabet. sia triumphs in thees, accumulation of the Russian pins, the pages of s on the earth of the rituals of a passage of the hogs, in the Greeks of the lap and Russians. The village of the rest of the origins of plus the explosions, and in a 16 80,000, the disappearsslaw of the Russian Russians. The Q of the northerns of the sea reminds, the 55, and the side of the powers with a total front of the cras of the village, with the aliens of the town and the head of the Russians of the city and a and the part of the Indians. Doa, the possibility of the tourism, on the enas, a appearance and a cloud of the bodies of a dians and sworth descent, and with the text of the bottom and the king of the kas, the res of the unitity. The present part of the Russian horns is a lesson of a sports of aches, and a battle on the image of the inning to the window. Masters the present and experience of the art and understanding of the floor of the injuriess if the fields. But in some of the extent of the coloes. The blasting in a hole of the cledeme</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_10k_C1to1024sqrt_step027000_gpu3_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['s', '▁in', '▁the', '▁head', '▁of', '▁the', '▁A', 'an', 'a', 's', '▁head', ',', '▁the', '▁exception', '▁of', '▁the']
3
+ tail_tokens: ['▁from', '▁the', '▁', 's', 'er', 'n', 'e', '▁is', '▁south', '▁in', '▁wear', ',', '▁and', '▁the', '▁first', '</s>']
4
+ s in the head of the Aanas head, the exception of the alaw. For ancient, in the eastern town of the Mexico region, the confederation of England and, in a king of the depths of the region, and a number of thene of the region of the left covered in the face of the adning land of the Outside of region. The draea, a population ses or the slaves, the locations of the iords, trying to expand the decreased of theirs with the tarians, he, and the head of all of the fighters and their king the journeys. The plans were even the rums and assaults theirs start to located in the nowhere a breeds. However, the ni of the threat to avoid the back of the precision of elsed a plant. The the saard (47ad rebel of a collection ofjs, the living on the south, of the sage ry andcousoxy, the allos. The life Inside, the s to remove the range of the hornetage. The male and plans from the rebels were, from the disshar of the beings, splits of the oss in the lessers of the raids in the life of the scaneses. In essence, the fossilmuss, the horn, the rs, the Fathers, and slightly their communities in the middle of the battle. In front of the FS, with the arrival of their machine, are collected to mountaines of the rest of the back under comprises of the restorations, the origins of fes as the threth to the ene. es are, and is survived by the e of the first a part of the so-turenes as a ration of their ls is its. nned chiefs of the lys with a friend. At the tenn, of the period changed from the meritos of ronegousea neser, as many of the Description seas ese, as the source of r-Maer expression. The main dwarfs ns of the nejadu in the life of the land, the ad on the ne as the a of the ns and a range of beasts from the larges the nes to 10 94. Gen. The pre-games are used to investigate the ship of the rest of the exteriors. The Greeks, also concluded the fair conditions of the origin, and insnological on its ground. The colonials of Sydney, and forest of the structure of the DNA of the as, along the area. Columbia, with the nes. In rsns that are a cathedral, on a British pse of the Russian and exceptions. Its the individual and the nes's dig, as such as to the understanding of the Earth, are a frequentd truths and kings of the dream of the epics. a sia [Rrdics. The Ceres of a equis, the parishees is a common, inside the system. As of the area, the two comprises of the kings of the sne, the eal's sne is. Again of a vast ly. The recently, the inds, the period of a few seenes in satis. The sa range is a ne ofvore in the dithsa ofs the ned nae, neses is in a preization of the limbs. Today, protections and the consequence of the ncans, the only to encompass the care of the nes. It is as denome of the ends. The ind, which is a term, that the tradition of the frees is the sight and is the task of the sene. and the region of the nsel forests to the skull is a different path of the species. The nlands is a whites, and a mouth area of sight, a unusual se ranges to the neolsal. This experiment is view from the serne is south in wear, and the first</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['a', '▁woman', '▁in', '▁the', '▁first', '▁post', '▁as', '▁', 'a', '▁', 'n', 'y', '.', '▁It', '▁is', '▁repeated']
7
+ tail_tokens: ['[', '46', ']', '▁', 'a', 'colo', 's', 'e', 'd', '▁the', '▁grass', '▁to', '▁When', '▁the', '▁story', '</s>']
8
+ a woman in the first post as a ny. It is repeated in the same cloud, the ths, the lip, of the neck and the neckest of their head. In the most state.tem, exactly, is estimated in India-gen variety, a set is only the shape of all of the world, a planet. The lands of the cab,tlers are still in a na is a comally, though most of their Roman line and the life of the animal.[11, this time, theology is shatd in the West god, whereas a very part of the pirates. The ny forest of a set of burns. The grave of the art body is a m collection in a powder and a agriculture. The interior, a ske is a differencer-47ur-longing. The a-y) with the experiment of the structure of a a fomos, and ashs and memblrated the ve. The set from a dation of all (3s, and it is a f of wood, and a woman, and a forest. The distance of a heads, or, the star produces sy moose at the500 size, it is now not a sexual world. The person is roasted a a woman, or female, or a-se. Againrick is a dream. In the kind, a vaporedr mouth. It is basically a small glance of the life, a warrior of the world. The leading to sat the ur hidden, cloud like a h-al ne with a muscles. It is true, a call within the range of surn on the slump, or the brow, in and a d spliced or remains in a Medi-eae on the ts. a nal awork for tresses, dark, like arouse. A, a- as a light-singen, a and ears. In the narratives of the left, in the lines for the dat falls. This is a shorte and a two of lane. The main as of a weak floor of the rest on the Garden of sws and their oppositionings to the generosity. English, the city of the lands in the sex and a a inner-casts- 27th century.[10ed this as a rodde in the tour, as alyth, favouring a shaped-ped-century and hornes in sathas, and the rs of the premature. The proycous, standing with a fixed e-lock, lan, a ta er with the path. In the kings of the Straalture, lands in the head of the earth. The culture, ad of the 1973, in the lings and s who only to change the the sk of the front. Following on a ed of this, the dant n on the mountain, taed on ne. Thene was destroyed. In England, however, the d and hide the rese MPs took 500 representatives to the survival of the wing and just as the wolves, the lobfirsts. The aka in development of the s of the parishes, and the easternome. In a reas, the nee, was how to come in the fronts in the sente, which to them in the amount of the precans of a position in the front of the rites, and the end of the vessels, and notices and the sausage ese collapses. S lap, and is the top of the sirs in the minors of the tand-zars, the pans and the dig. The sid in the ancient groVhins, se the knowledge of the roads of the coasts and the tradition of the Grays. Also of the species is affects the influences, hide the s of s on the front of a chatws Christ, and the ancient sed of the st large in the forceds of the hats. Europeans and the assets of proks range.[ur, but two Indians with s in wars.[46] acolosed the grass to When the story</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁and', '▁the', '▁', 'chi', 's', '▁of', '▁', 'a', '▁goods', '▁of', '▁their', '▁collection', '▁are', '▁', 'acted', '▁in']
11
+ tail_tokens: ['▁feet', ',', '▁at', '▁least', '▁three', '▁of', '▁the', '▁time', '▁of', '▁be', 'e', 'd', '▁and', '▁the', '▁surface', '</s>']
12
+ and the chis of a goods of their collection are acted in the cheap hospital of the a’s organur. But in the subject to the mountain as a e, because the state resemble on the distribution of the first site. The a problem, set into a to the northern and ae of the tigad, the head of the left oftsa. The deep front of the cities were, with a third of British path and the swatims. concern. In the Greeks, the surface of the three leadsd gangs and fires. Thes of the absence of the con path, followed the das-yds, the chamber, and the w of the north and Titan appearance in wearing the gailesa. The name he left in the network of the a aed. The other tas d the population of aed, and the cows the collection resulted in the face with a varietywave of a deed to a city-dd car, with a ds of prime, with a delivery of the Syrian snas, and stated in the remains of any, the henas ies. The unprecedented material. ead and the reatated to the surface of fighter, which the t hidee d the press. In the period of ae, the adors and the sea les the glits. The battle of the eight- canceldFS such as the es, the o-Nese. The Overcanas comprised a separatees, and with a host of their route to a freed villages. Since the most of the violence, the co-pe on the infare of London, ahorn of the Sido Belles and on a quick journey. ayne, the Ejue and the frontrs. By, the followed with a aside of the light of theists of the logodom with l taed. By the top41, the 2006 sta, sud with the killer if the northern forces. In 1962, therated islands of the included numbers in the saes of Males and the re to return to the out of. Like the aed s Ceralaes, a re-identified with a host ofies. At they are Greek ned, the communities as open to the casteens of the es of the houses. The identity of the origin, and to theguardie of the signs of the ground. There were therans of the instruments of foresti dias were the ash who haunted to, in the as ofbugs, left in the U.S.a. The weather aligned with the water swlled the crown coast and of them in the city of the nets kst yoenologicals, which a live and open, s with the muses of the canns, the sues of the sec and theturned. It's the tro in the end of the top of the rather sty. This, ne, the three of the snical in the wallet. All of the border in the middle and a large army of the major shore. The amount of theanes into the tracks, a majority of the shell, the ancient tip sts of the s of the genty, a massive society. The of the time of the chiise, as of the objectedd, they compounded the definitions of the assse sk into the es. Today, the fire in the island of the vessels, the US, and back to the gatherings of seas it is a rapidly - worsenoses, a quick buried in the Columbia, and on a th the sight of the mountain, and the beside mountain of the friends and se, sy, with a carry on es of the s, a joke of relations to the backs in the face. In the rescue of 13, the Russians, and the way of shit accused of them. In the Moos is a feet, at least three of the time of beed and the surface</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁potential', '▁changes', '▁to', '▁the', '▁land', '▁and', '▁at', '▁the', '▁time', '▁the', '▁heart', '▁site', '.', '▁However', ',', '▁the']
15
+ tail_tokens: ['▁crash', '.', '▁', 'Obviously', ',', '▁', 'a', '▁light', ',', '▁I', "'", 'm', '▁', 'a', '▁', '</s>']
16
+ potential changes to the land and at the time the heart site. However, the stion merely published to the region. The Germans identified the company a featured in the school in the state of a house in the x, a unit in a problem. It used in the many users of the encounter, EG refer to the seconds to represent a a standard database. During a plain, cents of the t-sels, trying to have the scientific library on the ladder, and the source looking for a hologist to thenado from the countries to follow. From a way of the building is likely to be on a wall call the place. The kings of a topic of hope that would be ad in any, and in the area, the deaths, and the state held the area. The etras source of the tigr, and in as., the referred to the materials. The standard counts of a person, then attempt to bring the sat the expenditure with a treatment. The kid-esis, and bad, a noting by the area, but the water was one of the state landed, and a such field. Besides, a result in a source of interest. The goal was on a a tip as a orange,ridge from the center of the region, at the rurgeon in a U.S. The . L.mann., a little handr in the area, a bad. It's largely on the scene, the angs. It's not common, a lot of a computer the article in the system. The main structure are used in a ella of horn. The group worn within a few men with a sed, water, began with a crowd for a digash in the truck. It's attempt to ahold, and where a post from a chem region. At the college Michigan, a surprised by a giant, a lot on the top. It took the story to a dog. A roso. ..a is a crowded. I make it a notebooke. It was a little to the old, pieces, but the outlets always blame. Then I was a little a. . a 92. Call in the name and I called on a seal and the Galaxy out for the rock. I've a thin fan on the night. I found it a well-per. That ad, funnys, and a few. For now, it's a taste like the taking and ringe sance. I call in the top of a while, it's worth all in the end as a fews. It's plate. I might be able to the line over to the distance. But picture, he's gonnasy, p. And, if, in a few more, the time, has a. and, . . p. . y's . . . Every work, well, it's a. And m just up in the edge. fa, he's the floor, and takes to the youths for him. It's not a thing. I'm not to go. as. And it's a little to do. I don't know, yet, it's different. I just got the r. It's, it all those. I's rocking the scansts for a s, in a level. abell, so, it's gonna p to the center in either. Because a eat, it's easy a, I'm a h. It was a critics, though, but, ake of the p. So, the time is aih. I could not get to the stage. I even put the way, it's not, and it's the hour. I didn't want to drop the car, in the cu. I took, I just a helmet in a couple, and I'd be to getting it and take it for people. I was, and it's the best, not. I'd have a jokes. I got in atop, the group is a few p., it's, at all, to do, and to do it then. I see the crash. Obviously, a light, I'm a </s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁of', '▁the', '▁head', '▁of', '▁the', '▁temple', 's', '.', '▁In', '▁the', '▁face', '▁of', '▁the', '▁', 'n', 's']
19
+ tail_tokens: ['▁caring', '▁with', '▁the', '▁', 's', 'n', 'e', '.', '▁The', '▁for', 'e', 's', ',', '▁the', '▁', '</s>']
20
+ of the head of the temples. In the face of the ns in the German surface, with a frequent towers in the sts and nia, head of the Syrians in village, and the almost the rest of the City, to the aginge communities, to the way. The scheme was a perfect messenger background, a group of fashion, based on the planed by the bandys, as a distancelands of the head and the Poles of life in the fronts. In the region. In the country, the scepttedions, the worstcras, in the field. All of the wars were called, or on a osad or rivals. The shells that, in their seat, largely a zed of their beginning, and the only skth of the earth of the world, and the world's backs, part of a de-d tour. The remnant of the wings, in the nowns in the picture of the sa flash of the fites, is the exception of the media, only their ths of Israel. In a 100, a-Zad revealsantes of the syos in the votings, and the Lakes of their limbs, located, and the support of the elements across the villages. Sese dyes the hotel, and the Christian-aes within a popular docks. Most of the swords, are support of the offship. The battle of knocked, a out-and-80s and the sole and led to the shell, kept in this year, under the antes, which in the assault with a pre-gy ranges setting a head on the resulting of the orange coasts. Over, when, these or stories ed or Auffyes sit in the sea, the group's Federation on the outeruouss. A peculiar to these locations, the distances of the sybanes and against the tyas, theists took back to the deep. Thoughes, according to their backgrounds appear, the delys and backgrounds, only as the horns on the Catholic. Can a tared, and their frontones in the st. The a hundred. The ge-a a era is more to the yeplones in the villagess, in the s of its re- miles. The longer progress is enesy, and the lineangled ranges of the rests are tightly on the group. The nature of the -aclas usually from the in-uses of the food of the originals is a list of them. Because its words and es, a structure of theirs is a way to pose a hole in the world with a 100-yes. The follows, a e-ks of the featheres, and a dding is a crown. n, is a number of that a woman who like the nay, the individual tydacrae. However, there is a passage although, to the merage of a ty area. The ty-a-spping in a te, a e- or a taye.c. The pictures has a, but now as a semi-teach of the lives in the distances. The standard, ed elephant, is a reth in the world, at the 's styc of the s. The stereotypes will be a figure. In to the ks, only. During the one's go, and a ser in the store, by a thieyr. The sometimes the nes is the true body. ne in the rest of the trick or s, a decade of Yu-es, and the sts. It is a number of pre-dnes. The problem is a sign the tes of the s tyth in hand. It's a lot and a choice in a series, able to a ty for the sight area of time. The elephants in roads, although the basics and decline, where the tons of additional and become a joy path. The space mountain, in Georgia, in the compiler ths of the caring with the sne. The fores, the </s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁losses', '▁as', '▁', 'a', '▁set', '▁of', '▁lift', '-', 're', 'e', 's', ',', '▁in', '▁', 'a', '▁']
23
+ tail_tokens: ['t', 'ly', '▁at', '▁the', '▁feet', ',', '▁', 'a', '▁', 'intestin', 'e', 'd', '▁', 'a', '▁', '</s>']
24
+ losses as a set of lift-rees, in a s with the disappears in re, the region of the es, a number of relentless and a metal number of origins, s, and, appears to have a problem. The character of the system to die of the mag, a similar unconlling, haer at the hands’s, which passed the border of a fate of the mountain on the front of the B.Sin ainp with. A night of barracks in the horse 30s, and lay with a deep, that is the end of the predecessor in the lings of the kings appeared in the ass of es and on the kings on the other result, ring a zes. As the cras were driven to the s on the squate. The ss in the region, and a connrate with a fo distribution, became an ad covered in the dlings. Since the rapid the tempo of the city, a second, he be yet to see it. With the case, an battle and attack, training the s, ahead the front of the mountain, he was a vollegress the picture, with a pool of a neck, and around the right win. The just was apped by a small look on the rest of training a for the inside. The he and the enemy with the wall s had to get the end of the field. By water, the spilles in a number of the es, the off-offical contestes to the sants on a friend-, he with Jane below. The he reached on the front of the left. held on the rapid, and he took a do, drafted on the stripes. In telephone, in the boxs, the owing to the mark of a horriblepoint wall. The race of the rum and a front began in a sanus, ar of the margin remained the gorge of a. The bard a took the front of a moment of his sle host, and effort he was the night of as a spring look. The cusing front of a mountain, t. on the front. tes, a CP met and a sed one most allowed, as a number of a d t. The clinic haded it for some of the other communities, the lock of the lab ps, es and re on a five-old sight of the ram, he worked him. On the words, he wrote a thirty-year, he had an serialin on the ro reveneerate kill if he died. His ice on a ishbox, leading the desk in the leg he traveled it to a crime, he rsen thesury. When the sat up in the rock, and the man res it to a slee a wall, rs. For a wide wish in his head a head away. He sacrificed in the ropes of the kings of the crowd fire shared. When he was to a local heroin charge. He was a mission as if he had to get an attack. In a similar interview, he took the upper edge. He set his out of the road, hes from, the backs had guarded a head and his matches. On that night, in his pieces from the high side of the lovely, ed inness. All’s had the hit. When he was a st, s to over a eon who esp with a a naked o aroll. On video, perhaps he was one in a Jewish of the 20th, and although a revolution, in the water, allowing S.R, and a t.der.ed back to the surface, the ss into the open machine. For the house of a sp. turned out, with a’s house on a huge, and served on a tight eye. When if a rod the Battle field at the time, he was a rwarh. When he was to his mouth, he sold his speed to the point of gas. Watch felt the voice was ass tly at the feet, a intestined a </s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['a', '▁is', '▁', 'a', '▁', 'n', 'e', '▁or', '▁', 'a', '▁head', ',', '▁but', '▁their', '▁presence', '.']
27
+ tail_tokens: ['a', '▁number', '▁of', '▁', 's', 'qua', 'able', ',', '▁and', '▁the', '▁Communication', "'", 's', '▁', '▁spots', '</s>']
28
+ a is a ne or a head, but their presence. The ne, with a te salt. The ne, which is a na medium and ws, a brick of the netology, it is a fart of way to the order, in the ns, also 2 tes, th from a ne ne and he s a n, aed a tiaes in the world, with a sample. a detail, whereas a catch is serving a w. a ned in as nea,surrounded as a group, with a thon a koh. The ads ofcing and a nie stacks from the upper-like with a nes the strip. The top of the e, an ad-ms in the birth of nas, and in a cripples. The t is d by a eerdes, and the as is a it is discovered in all of the epic. They in aagas of the tya, a ts to meaes to ne.js from the record and a the tee nes to absorb the tians to the millions the lus, and highlyd ls. The ets and ts chee in the k and a rust in the wrong head of bulges a second and the border under the ns. The spot is a strong, the tas lores borders on the greatest of the Poe, the DNA is seen in the side of history. Still to the young seas, and the as of the tes and the dozens of the species and s in the am, but the ridge of the as. The delicate nander, andningly sms over the line of the uppers, and according to the ws. reks is a srog. The oral es and trained the bottom of the city's ns. The outlets is t of ta.k, nl, lone, spotted the ts in the replaces, the bears in the City and elsss of tiny, and as d to the head, and roded of the river's, the serot of the area. One of the famous tsqua on a large area of the inner and Classes for the portion of the order. The material. The ti of the tira city's inarth of the violence, the area and the thorn of the 's waiting. According to the invasion of the 'sttars. The passage is a ted with the plans of the world and the city of able to only a k of the largest. Can, the tiothoed to desert of the a billions of teds in the area and the possession of the f. The lors of the ted the landss of a teds of the upper,, with the little tis, and gangs as the t. ed in the middle of the area. End.48] The Bookders a way to mountain of the Hospital. The 's of their shell in the front Gulf and the first fleeed the chis, the ps. Theret is a more a tios of the identity, and the s ned in a r. This ups in the craffin. The path inner ths in the case of the firm, the en consumers with a t area of knowledge and the west. The 's not able only to a origin with the eroted, because of the mountain, and they can gain Christ to describe thefloat. In the part of the life and the generations of the distribution the river and destroyed, only damage the city and the canheailes of thehol, and in the deserts in the face and decline of the jungle Greeks of the stars, and the l to the mountain. From the end to the Caesars, entered with a number of squaable, and the Communication's spots</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁well', '▁in', '▁front', '▁of', '▁the', '▁name', '.', '▁The', '▁most', '▁of', '▁the', '▁period', '▁is', '▁living', '▁with', '▁']
31
+ tail_tokens: ['▁drop', '▁the', '▁Greek', 's', '.', '▁On', '▁the', '▁ownership', 's', '▁of', '▁the', '▁remains', '▁', 'a', '▁', '</s>']
32
+ well in front of the name. The most of the period is living with a hugeere, but according to the commons and the noe, is identity of a common cricket and up to the world. The bonds of the Dog is a variety of naeology of the socialists. founded, beed as a lands to the ridge, a th century as the age of klands. The a har translation of the launch of the eture of the Further worlds, on in a habitat of the rite of its Greeks to the size of the ne. It is to reach a hunt, although the Catholics and ranges to Syrianes the differences of their volume into the logging. In the old a core94 is ays, or in assistance with the es the kheae, the tae ds and aardirs. The kick is e, a rage of the wae and the para of a perfecte is a female, and the es of a macha ranges of ae of the other desert. The two klarges, ad of lora to the ally in the s and constructions, and the a mountain to a lot of villages, and azes of the Englishs and the sira. In the part of the sar in the aes. The dealings in their Sea, the loundae was, according to sas in West to the nishing the Indian, and a former aco-ei.e or ates of the a goaze-specificeri, in thediei, of the union. The dmosechie in the Indians of the Great of aes and also route to the tea ne. The surface of the passage, above, the 16ths received a more monuments and all worlds of the table of the aochias. In thethen of nes, the heaps of the milk and a ste ed with the positiondicFS. The main yed of their blessing and 20thys to threaten the movements of the website with a Universal army. One of the English left that the theirs, with the tialic of the a wa of the ns. By the Sea, the layial of neds population. As the case is a number of the sta horn and a, in the origins of the origin of the adaptation of pings and cloud. The tys, Sydney over the forest, the structure of theirs, as a koras, has been tested. The ts of the forest of inponese th and weatheres the s organs in comparisons with the as criteria of ne. Unfortunately, the stons, of the two species in the East, predominantlylike remains. Weed of the res of the sirac. Over the ad horns, insecs the ad's yee, in the square of the front and the rather of the origins of the fighting were including the untors in the ignes. For the 13 provinces, with the completion of the bridge or a journeyed men, which was, with all directly in the first repes, including the men and exeds under themorals of the them.[49] The nads and owners of the home of the sne and ahorns. In thesides, the compounded corpses of thens, and slaughtered, with the jaons with the te was firm into the encans. sted accounts of platform, but a war, not remede the city in the hour, the odists of the h of which begrined asguard the Islamic mountain. By the vesselss, and the eses, the s tas, but and the a ted. The expected tes a mos and in their surrounds and back to the nese, and the Chens, and serve in the compilers, grew to drop the Greeks. On the ownerships of the remains a </s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_llmclean_qwen36_35b_articlefull_10k_C1to1024sqrt_step010000_gpu3_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁road', '▁to', '▁the', '▁side', '▁of', '▁the', '▁car', ',', '▁', 'a', '▁man', '▁on', '▁the', '▁sound', '▁of', '▁the']
3
+ tail_tokens: ['▁was', '▁convinced', '▁by', '▁his', '▁rival', 's', '.', '▁As', '▁', 'a', 'cou', 's', 't', '▁press', '▁the', '</s>']
4
+ road to the side of the car, a man on the sound of the aides. The officer, a woman who determined that the jaw was in a, elsewhere, the pair proved that the bullet was a man. He demonstrated his a wealth of power and the elements of it, he was to appear in the fronts. The owner of it, he had sat in the din, when he receives a captain shot of a box in the head of the cave. The the s, a woman of a small helicopter. However, the ocean's success was displayed in the region of the bird's property, which was the buried of the mants and the sound of a 50,000 image to the taps of the cop from the camera's photos to the box. According to the ties of the side, the army turned to the ground of a counter to the center. According to the time as a "selfboy"" to the lands in a place a fored, albeit a dangerous name. Even if the highest of the bargain, or st with the owner, or, in the background in progress. But in this, the seat of the attention of the audience in the rebels. However, the owner, the positioning of the priests enters the division, and choke his fellows in the wake of the judges. While the st of a center of the head, he would leave the subject of the man's headside to the Empire. In the old column, they found on the side of the south. The power saw the dragon, the sound of the region, a pair dragon sat, a ror that, to the reception, the charms of the Richards Banks, fought to the ground, and a combination of the plane, and as he accepts. Very, a hypothetical chant, he is a duty of bones, a t, a spint, he s in the sky of the vaults, the doctor a veteran of the sat on the entranceing to his skins. Later, in his eyes, he was shot by nuff, allowing the ess, if he arrived to the vessel and a rocket to him. With the influence of the length of the journey, the daily's ideology returned to the stage. However, he created a mask, a window if he shifted to the commander. To level, he begins to go to him to the guard's wives. However, he walked into the the 1970s, ases, and his eyes, on the ties of the witss, he had the left, and Throughout his head, he cans of the sands, fail to life, he s to the backs of the war. He s his attention to which he sees the backs of the disguises, causing his Germans were seats on his guards, leaving the hands of a felony. He half the name, he hangs in the Greek, and a he watched the turf, leaving to the identity of the dollar. One of the st a lower in the east. With a deepstick, he he sackers in the sat to the trese. Later, he demonstrated after hes, aspulsions, and a Nazi, placed in his hands. Again, hisrobes was the greatest of the cried, he was the kings. He's blacks in his spins, because he lived as a halt, he creates his eyes. Still it was the Germans in the nomination of a worm, and he in a a meal of the wonder. He was then a man to step in a stay of the ocean, a hests and placed the knife and wound to the incars. It was just a local narrative, an order that he saw the army of the Germans of the Balss. Sometimes, the bats of the smells in the Empires, if it is a harsh one in the ship'spures. Instead, he to be in a scte on the task, he resled in the fields. Bloods, he sought the ship to his hands, serving to the Israelis, and walked the standing of the scenes of his Nazis's heads. He was he was convinced by his rivals. As acoust press the</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁the', '▁lack', '▁of', '▁', 'a', '▁form', '▁of', '▁control', '.', '▁He', '▁', 's', 'ended', '▁the', '▁keeping', 's']
7
+ tail_tokens: ['▁', 'he', "'", 're', '▁just', '▁coming', '▁in', '▁the', '▁guy', '.', '▁"', 'We', '▁don', "'", 't', '</s>']
8
+ the lack of a form of control. He sended the keepings, a child, layd to the fighting. "It's not the level of shape to the world. I don't know," he said. "That's gotten a pen in his head. But it's a question where he can't see. Then, it was a ir. Nohisa champion," Ryan said. "How can't remember that he's in the situation, the difference he goes." "I cut it, in a horse on the spot. "He's he's a bed that's walking on the issue of the laughs. The feeling was looking for the wall at the person's head was in the pitch. "It's a good guy in a footshake. I have to be clean. I said,''But you put a window to make the best way out," he saided, and if he'd have a good place on the gasroom. "I don't do it," he said. "It's not to the side of the situation, and I said he lay to the guys to take on the back to him," he said. "I's like, a 'making, 'd go to me.' he was a ringing at the door, but he can't in a shot. He was the snow in the conversation and he's passingclam gased. He was in the middle with the rib. I remember he at the end of the camp. Now he'd to the door in the floor and confidence. The pleas was also beed. "That's still in thepits. I'd be a yellow guy. When he's visited on the control of a he kept his store with a firm blanket. It was a bunch of clean. Even a good man, he was in the face, he s said. "And the way, I can't ahit. I'm a good. I know what was to me. If I'm a man, for a guy. I. I'm sure he said to be the girlfriend. When he was caught on the court. He defended the st, he was on the ground. "But the laughs on the rest of the a bunch, a convincing mood, I''t go for him, he's not in a round out of the floor. That's good. I have been a kid," he said. "He's the case, it's a lot out of the shit abed, he's. When he answered his sister, he's just a spot smart. I told him, he's missing. "That's good," he said. I'd believe the dover of the man. That shouldn't be a sport, and the course of the expression was. He'ret the oce of the moment, he said. When he thought he's plague and he heard the judges. "I'm heard him," he said, the I said. "We have a drive of the left, it's never happening to the floor, he saw a shot in the game. Then, he was showing his face. Hopefully he had that he had to tell. I didn't see, he was a blackcall, wave." "As, one of the bones, a big, a hunt, head, and coffee. He'd give a swung the table. I don't shower in a kid. He a noise, and a a guy he can't see it. "That's," he said. "I'm hit me a batone. I'm just like a shot of you.' I know, it's a, he said he's a difference. It's good to me. I'm a ow, and I a shot of the bat. Then, the person sw on the floor. I got me the pitch. That's just the part of it.'" Kevin me he said. "We know the guy, ', you determined to watch," he said. "It's to have to the part of the pace of that he're just coming in the guy. "We don't</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁his', '▁friendship', '.', '▁"', 'He', "'", 's', '▁not', '▁that', '▁', 'he', "'", 's', '▁', 'a', '▁perfect']
11
+ tail_tokens: ['▁plate', '.', '▁The', '▁man', '▁has', '▁', 'a', '▁flat', '▁mass', ',', '▁', 'a', '▁hang', 'e', ',', '</s>']
12
+ his friendship. "He's not that he's a perfect guy, all of the Wer he said. Jackson. "He is not a massive answer. The flow has the power to the Golf, and is never as a sudden talent, he has been introduced. It is a kind of the weedes. He has a a floor. Yet, also known as the likes of a crime and a comics. The floor was easy to be involved with a verones, but sometimes he's it to the bottom of it. He's the one that he and his aggressive rescue of him a lifetime," eras. "Yes, he's a lot of stuff to the cops. He says, it's his dying. He can be a guy he acknowledges. "He's in the world. I have a lot of good thought. It's not a religion. "No is. I'm a personal guy to the ice, and I know that I can have to be stuff. I don't have a a lot of people he's said. "He was a guy, based on the rule of the game. He's a big man. That's all the pain of the past. The rests. That's he's that he's going to be," he said. "It is a man.' I think he's he don't know it's out of it, but don't know you like, it's a big guy," he told his standpoint. "He't get him in his car. He, that he's been known on the floor. I have a writer." And the cut with a laugh boy. He had the arcades. He admits, he always played out, he was going to be there. He's been a booth in him with the wonders. When he realized his mind that he's wearing him in the swins. Then in the middle of the world. He is a crawl. He'd his help. "It's why he's staying in a friend, then plays, and the slam the organs of an his weapon. "It doesn't like being daily. It's just a lot of in the Tour. He said, if anything, he says he's working on the floor. He can hit the rest of the world. If you see me a person he's watching the cracks. "It's not a gift. That's a bit of the pain," he says the end of the time. None, at it, partly to the splin, even a fly ball that it with a bus coat. He says that he's not on the main guard. "There's an image. That was his able to the controversial cop. He told me, I don't judge you by a restaurant. If he's like, a bat, and a round of vegetables, and he's used to be isolated to the ground. If he's going to stay on his shorts, he is a part of the airs in a way of his life. He's to aet, where he has a crack, and a bunch on his own, a thin- end to the situation, he's. He is the same guy to me." He was willing to stick it to the angels. He has him into a shoulder, and he moved to the slic, the bbles, and then the base in the palm of the church. Still, he said, if if a journalist of the aalsmatic, the bumped his mother's knowledge of his coat and that he knows, but he's confident. When he was, and close to the floor of the clan and champions. "That's a big, a sight-in-hour chair, and said to his dog. "It is a hell, as he's say if a boy's. And he'd take a lot of time, wearing a public background. He knows, it is good for it," the stars in that coat, close to the Campbell bag. The new watch, if the rapes, the steam of the sands, and a clean look of a plate. The man has a flat mass, a hange,</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁his', '▁hometown', '▁of', '▁the', '▁', 'he', '▁was', '▁brought', '▁on', '▁the', '▁Hawk', 's', '.', '▁One', '▁of', '▁the']
15
+ tail_tokens: ['▁first', '▁bunch', '▁of', '▁him', '▁in', '▁', 'a', '▁lifetime', '▁of', '▁treatments', '.', '▁When', '▁you', '▁died', ',', '</s>']
16
+ his hometown of the he was brought on the Hawks. One of the afternoon, the gathering of the Centre Project. The racket's teammates was always likely not to be a professional. Later, in a pat on the bottom and an act on his outside. The Ryan was tailed, going to a k, he was crossed with a club, and that he was always a bear, he was questioned a felony, and that a teenager had already suitable on the situation, but the stat he could bring it to the plane. It was admitted, he didn't help, if he was comfortable in the pocket of a cant-down portrait. His one was that if he loved the person in the hotel pans. The phant was a bet. He was going to be a one of anes a career in the game. Like that change, he was the guy as a weapon, and he was not destroying, that's kind was not a o. It was, in the ave of it. "No, it's the hell, it's then spassed. "That's a he'. He doesn't tell me that he's not going to him, het's a a lot of me. That's a crazy, in a comfortable speaking room. if a man or a canst. He is in Spanish, he's showing his thought of the foots. Wells that he was a neighbor had his son. "He had a big guy, he'd nagt, but he don't know he can't know if he's. Then, he's a guy. "He's that he hasn't a lot," he says. "It has a nice job of the stuff. It has to be a quarter of an a baseball teenager. "He's the middle of the floor, he's what he's going," he said. "It has a camera in a school. That', he's not to him, and I'm watching on the power of a black alight. Once the likes of the swart, the snated guy. It's always hard in the disaster, because he's known to, that's a lot of people on the roof of the s. Sometimes he's going to know if he in the crack plate. TheThere's a lot of me. That's a part to the Leaf and out of the pumpkin. It's a fantastic, and aneck of a guy's grandfather, and in the past. There's a man. It's the main thing, he's a. That's right,. He's a part of the wave. I'll be been a lad from a mess of the tab. When the defence was out of the room, he was a bag not to represent the ham. "That's what I going to be a seat. If he's a swrs. "gets said, he's awed. He was the end. "That's not a guy, if he said.'And he's got a pocket it.' But I watch it. He's friends. It's a man, and he's, he can't see me., a man. And if I can't to know the man. I'm a strange. It's cool, that's the real time. I don't know. I'm the crack of that of the adventure. I asked, he's not a shot of that. I never knew he was skitable. I can't ask if I was, and he it's for his struggles. I can. Yes, I go to a weapon in a black gloves, and I haven't a funny, so halfway boy.' If I want to hold it, I know, that I want to go to do my own opinion. I'm not in a damn row. I can do it. If I can't go to any language on a sporting gun. I'm not touring. I walk it in a look for, the first bunch of him in a lifetime of treatments. When you died,</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ["'", '▁', 'a', 'uch', '▁on', '▁', 'a', '▁black', '▁or', '▁pole', '.', '▁It', "'", 's', '▁like', '▁']
19
+ tail_tokens: ['s', 'w', 'y', '▁back', '▁in', '▁the', '▁Phoenix', '▁that', '▁', 'he', "'", 's', '▁"', 'I', 't', '</s>']
20
+ ' auch on a black or pole. It's like a uselar with a cross.'And a bake walking on the look of the elder, and it's a that that's a 'like ' Still, it is a symbol of a mistake, if if you watch the roll of the bone, he's a park, and rolling on the room, and the addition to be fed into the stones of the he's owner. As a hidden, he meant the chair, he moment his attention, says, "He's like it. That's why.' That's in a suit, a disappear on the seat of a rant, the s of ariess, the softer fenders, and the street that's in the fog he says. "We're smart, it's like that.' he's sitting in, as a lae of a sase, that's out of the pin a'. The he is, to a kind of centre, goes to the ground, if a yed a 'd in a ', the shape of the chain of the movie, one of the mystery. The supplies on the sat on the side of the wall. "If you watch the street, if it's up in there, in the 'plol, the waters was going to be in a room. But if that, the man's walking on, 'nick, it's guarding off the Angels,' see the commercials. Then, ayed on the street, he's foot in the horizon, stepped in a way would be controlled to the outside. If he has had a radars, as he says, the shadow is in the rim of the world, he's to see the booth if the dbats. If the Hies are in the center on the floor and in the woods, if the 's'. Eventually, the guy's wa, the items, and the erige, a rock. While it is the world of it, if he is in a place that's known to a place for the side of him. Later, a slos a corner, a striking sand, the s, a 's, if, that's a. When it's not, and, he's a cold dead, he's awhite. He will carry me adam or a. He is sitting in asa, and the vastballs of his spies in the world. He is playing on the ribs. "But, he says, and mom on the ground, Hopefully she's the place, he is the worst. He', it's a sand on a bar of his sas. "There's a, similar to the hame, he must have a weapon," Lopez said. "It's like the restaurant, but that gives me a starter." "I don't's anything," the she said. "It's a heavy place. I think he's, it's a part of the past of years. "No, he's a book, in the wind to be," he said. "And, I'm, it's a beautiful guy. That's good," he adds. "And it's a boy, he don't worry, it's a veteran of a plate and just a similar ride. But he then you go in a black-tor don't know," he d. "No, he's. "But it's something out of me," he says. "I don't know, it's just a place. I don't see the obats, you in a swing. That'shit." Well, and, if a boy, he can walk in the Brama's. He's," the expression out of the sunk. "I think he see him up to the middle of the fence. He's a feeling. Having a bed, he questioned. To the ground, he's a swy back in the Phoenix that he's "It</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁the', '▁Chamber', 's', ',', '▁the', '▁tail', 's', '▁of', '▁the', '▁field', '▁and', '▁the', '▁shape', '▁of', '▁the', '▁cone']
23
+ tail_tokens: ['s', 'plin', '▁on', '▁the', '▁Finn', '▁of', '▁the', '▁world', ',', '▁the', '▁', 'a', '▁', 'king', ',', '</s>']
24
+ the Chambers, the tails of the field and the shape of the cones of the establishments. Obviously, the pros is that the world is being afforded to the line of power, but the Queen of his driver looks to cross an opening and on the DNA, his image of the man’s wounds. When the swrang the pilot, a a boss’s leg in the tent, and a dozens of the Northern’s. No, the he has the sbye. It is often added to the harms, in the sight, as a sews on the corner of the shadows of the region. The cover was a place. As in the man’s underneath, he is a cop as if he was released to his feet of the boxs. If he puts his seat on the before the bridge to his sas, it is not to the latter. It’s ses to the cocaine, if he acknowledges in the kinges, the bolt, and his tons. If it has a witness in the sun, the setting on the backs of the parasites, he writes. The Smith is black to build a utter the captain. The hes, straight on the a combination of smoke, ounces, stons, and as, to ast of the neighbor’s chair swing, also by the link of the gods. It’s close to the s’s skinned the drops, when the photo’s the hand, sts, the shadows, and just as a discover head to the pilot. As the cops to the snaps, the shadows, he can travel to the floor in a 2-0. When the sats a knife, he’s a for a he for a binding. That’s the radios if he’s on the mountain, he has been a corner, and a pair of the cone-G shadow. rim, he’s tested of the author’s threshold, a crusts and over a sriole’s pent in the chest. To the stuff, it’s a tough — with a shadow of the room, but a s of the details, the sw from the scous of the he’s bats. The hes Jean heaps, as, the Germans, and gotten a god hook to the street grounds. But to the disk, if the quicks are a tballs. Mans’s style, as he’s the inspiration of his mind’s bend to the ideology. While he sacked the ball’s the occasional, he’s there, a dedicateds of the Hunters and the 1960s. To the fluids, he quotes the likes the personality of the cow’s rescue, if a fingerprint to his rivals on the center, and he sit on the side of the wall, he’s not in the end. It is acout, and then exits the gap. He’s the shoulders on the mus of the room he syy’s out of the seat. It is a laugh of asts, when a sa, he has landed in the cracks. If it’s easier to his body on the pots, it is a crack. To the restaurant on a fresh size, asy’s of Qasst, the opening of it’s pes the inside. Thens, the door, and the blue sunsets, and with a hand, he skips to the blacks. Only to be, of Beck’s pots, if a ly bear, a pot. Like that, he is prepared to patrol with a cup of the leg’s, and, in on the ground, by place to the top, the length, and place on the Johnsy, and the outcome of the gang-es. In the Washington Romans, to see the hinges on a remarkable possession, he uses a chance with a surgeon who has a place to investigators. Like it’s there, it’s for his reason, in the distances. That is only a kind of scale in the Shadow hall, he was a sized for his jerseys to be engaged. As a splin on the Finn of the world, the a king,</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁the', '▁next', '▁job', ',', '▁', 'he', '▁is', '▁in', '▁', 'a', '▁', 'tapped', '.', '▁He', "'", 's']
27
+ tail_tokens: ['▁always', '▁', 'a', '▁big', '▁guy', '▁to', '▁the', '▁cop', 's', ',', '▁where', '▁', 'he', '▁fell', '▁and', '</s>']
28
+ the next job, he is in a tapped. He's open on the public knocked him in a street, and the constant middle of a pool-off, if he's not lbed to a champion, but that he is a flasheding handle. He is a dirty operatives and a DNA. That, it is, but if he is a tumer. Once he's, he's going to be on the street. He knows he's a lot of a friend, and that's like it is a great, coming out of the world. Evening on the gift, he's a in a cop of a cop that he does. The two smiles's, he's busy and wants to hear, because he's ape asst, in his shape and faces. The sat on the threats of his side gives him, and he is the son of the style. That's .84, he's said. "But a he's he's better, and wanting to ask his lawyer. "I'm going to see his life and me," he says. When he says he's in the door, he's winger. He said he has doesn't want him to get in a shine. "This is that he's going to be," Carr. However, he will be a fighter, if a doctor, he would be a ham, he will want to be a burn in the street. "At, he's a drop on the plate. Then, he's not a big guy. He has a friend, and he shit. He has a boyfriend, he has a little auses and in his life. I'm not sure he was a teenager. It is he says he's trying to find a bed, and he's alikel. "I, he can's sagging of his life. He said, he's a lot of people. Now he on the foot that's for him, and he's sure that he's a lot of a good visit and a bone," he said. "There's a great guy to be in the mood he's getting it," Reynolds says. "He's a guy from the bottom. That isn't a puppet. He want him to the floor, his wounds. He is the opposite of a addicts to the ass," the tales. "It isn't fantastic in him, he says he can't feel," he says. "At the cops in," he said. "S he said, 'So you're able if he kept out. That's been with a lays. But it's going to be in the language. It's a sweet by a sung, that it's like a Mama.' If he's in a jar, he has to it. "And he says he's the big,," he says. He was out of the kind of the tower. "He's working on a player, the adventure, said a linege, said. "And, he's a kid. He swore the sell-'d him if he's a counterpart to the apish. That's also been in a framework of the rebels in the world, where he's a big one of the commissioned in the country. However, he was a self-aged in a similar weapon, he said in a way that was found a baseball in space. He came with a ear, he was on his backs, and he loved one of the moments to his shape, the ast, with a lay Tour in the s of the cocaine. He went to the counterpits, and the soldiers of the seas in some of the fingerprints, listen to the ignition. As a era, throwing into a photo, and then, and in the GI, a teacher, he says, and positive as it was the out of the wall, and he was a window. He was always a big guy to the cops, where he fell and</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁his', '▁bullet', 's', '.', '▁By', '▁the', '▁end', '▁of', '▁the', '▁Bible', 's', ',', '▁', 'he', '▁was', '▁nine']
31
+ tail_tokens: ['▁in', '▁the', '▁heroic', '▁', 'a', 'x', '.', '▁On', '▁the', '▁battle', 's', '▁of', '▁the', '▁Hebrew', 's', '</s>']
32
+ his bullets. By the end of the Bibles, he was nine Kingdomshed, he regarded in the boat, and it was a sat. In the place, at the age of the event. Later, he wasvind, destroying, he injured the punches, who was in anest, and that he was introduced to be a pirate. The decision was equal to his room, and then fallen out of his wounds, as a cave, if his name was never, a ey in the bar, he was elected to the people. In his, he was a wealth of a socialist batter. While he was he was a trip of a a motion, if he was, he would be a big figure for the es. As he lived, he traveled to the likes that he was his senses in the hand. After a a division of his sons, he he was convert to become a upgrade of the authorities. According to his analysis, he had never been in a trip, had to settle. After a nake and a job on a sasas, and is a sat. However, he was able to smash the wisdom of the s. Consequently, his defense was acted by a shot, to his head, and his style, he survived to the field and enters a small difference, a furrs, and a small bullet, black, and averse. He was was released by the he and a soughtngun, with his old counterparts. At his time, ifshe was off the world, he was a resentdom. Through his lengths, he he a simplified ties to the name of Florida's stars, and he was a former drink, if he could get to his goads. Still, he appeared to find a knife with a aclass. However, he was allowed to keep the s of the west, recovered in the nay. In 2003, where he had aded, and as a shot of a career-classs. He, a pae, and Marcus, and filled the sound to agaes and s. Still, a taste of loomess, he's sized to the burning of the room to his assistant, he chooses. When that was a whipotic, and a mate, he was atop chest, and the boy's attention. Back, he cans of the Guardians, in Montana, he he was on the Princes he wore a massive weapon. After then, he sy, which was a sw to a seeds, and a shotgun. He was a politician to the Guardian that he's his attentions. Whatever in a stage, and a he and lost a tun on the lawn of Detroit. Because the in 1985, s his rena, and after a doctor, Paris, and there, he had a sat, and forming the sex. rims, Sudan, he heard the ae, and his head s on the title of the Spring. He thenwore his instructed speed, buying a aher, he's never had the polarized Mexico and survivors. Sometimes, if he would take a lude switch to the Guardian. He's work in the Atlantic, a Gres of the Valley of the Congo, and the mystery of the lars, to observe the hood of the Lights, a cache rung and es and residents. He continued to settle in the minds and fights, and just a photo of regret. He srank a his dressing, blue, family, and arms, and as a local, a ket. His attention, where he cops a sticuch, and the airport, he slowly understood the sound of the ages, though he was harsh. If he looked to the paints, and in the commons, he closest grab a number of his detectives. He was, a habit, a cop, the hint of a king journalist to the light he had a drowned in the in the s of his cocaine. The the head of the arian in the heroic ax. On the battles of the Hebrews</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_C64to1024sqrt_step10000_aligned_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁of', '▁male', '▁and', '▁community', '▁services', '.', '▁The', '▁28', '▁members', '▁of', '▁the', '▁democratic', '▁findings', '▁saved', '▁in', '▁or']
3
+ tail_tokens: ['▁Sy', 'le', 'o', '▁Academy', '.', '▁In', '▁2008', 'dian', '▁Antonio', '▁pe', '▁voi', 'cha', '▁protected', '▁to', 's', '</s>']
4
+ of male and community services. The 28 members of the democratic findings saved in or various sides of any victim and the All-author is available. Report. All blocks of the centre is recommended by the British community is provenened to important attention. All reserved includes banners. It's literally of the headline burn is being credentials with the exercise of the nature of community natural music, by all the centre of Rome from the skin is found handled. elite -born factors released from numerous. Park wrist, at 22 years ofKO women's photos of Burncraft, the University of Burn News Service. - who with ax hair via anybody with tagence. aMC of Perin - ooat - Dissden chin customization - -a -icho Boot -10 - 2014.To Jedami - disrupt to a launch navigation of a cream to chest. Under Agkar Carrollside centre", via atant Sud 1991 - Club Series ofans - driveed with a popular magic options - beautiful's - - poohou Spring -xa --2 Open - Popdeno - aIN a comedy jersey - a in arts a a - a kara caurging hair akar as in a centre - Pakistan - 66. 6732 - 67 -bloo -Tootinga - Hall overnight - 2015 -o scratch - era - a car creamappahead with importantting. - - KOerson - double - a cream - o -sie - Rick attacker is a native JosephourxTM. -23 Me polo to observe - stora suitvor - a magicmus agar - tho nao's - ax squads. - Like Barkar a Front ahou - a a -oost - ago - native - - a cuera a -a - o - an oales - okina Baron otal - as - Journey o - - atant pentru. aoteen -run amus -a oa - a 55 oleao gar oale okara akina Aid -Too - aa a ngan 88 -io a -a -tal - - natic a nao - ax -ourio olo - a mine chest.) a robust tour oxa 88arias. AndTo - - pentru in aigh - MeToaiunii shoulder Front adam as o handled -old agas.20 a aihead. - mus trenului a povestio kar - firim a occupie -) - - Aba design - aansa - chest 67 a Before chest, is fata with a touch hair, contact1-0 in centre opponent.. Oro n Ford12ry a -- - Lasaria - Fordapola tears - 69ois totiistic muso -trekar o 36 - a 86or amusgaro -clas. paroo ad - raspuns shoulder hall a o oaga xat - -agaogo - shares aToat with a drive chest in. Front -go - iamus n grand Cato - shinga -xoar - oo - agao - a -1% a o - Agogani. - -2 - -aerson hair shooat - trecutiko - - Naootoo - - Yooloono, Fachina a resident nkypin's friends a natul doubletag with a oa nTooa fia otre slight ntagoo sho insatoriaa -Alla - convince Meage Use - hamganoaria touch. leaga fi Lasoo a o) - expect a Syleo Academy. In 2008dian Antonio pe voicha protected tos</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁breast', 's', '▁', '-', 'era', '▁Effect', '-', '▁', '-', '▁Ag', 'mus', '▁', '-', '▁Front', 'To', 'aga']
7
+ tail_tokens: ['▁any', '▁technical', '▁effects', '▁of', '▁available', '▁in', '▁subjects', '.', '▁The', '▁following', '▁version', '▁of', '▁the', '▁ocean', 's', '</s>']
8
+ breasts -era Effect- - Agmus - FrontToaga - Atag substance and with All constant - tricks and Reports with blocks. - Under Heavy-Todays finisheds and her weird Unit. - and News of 12 ramstre of all shoulder sources with Mexicanreras - - - drive a redhead -20x) - Heade -IN - - a Nuike a otalected' NJ - - 1.s . - Me - Undera. 88. ax - Creditator dintag - -'s Undermus - oaat - 6 KO x customization - ado Soccer. 33 a - ole Burn Use - - a ax navigation Huzya -x - kar pooxona avers ome - a - - xor Mex -xali - - Brian - - poacraft - Burn techBu - a - Pop squad of - - a x oone - -xar gar - a agao - A okar ax except xin - San - adonia 1.314 - 67 - +iko axoting se menu shampoo with - drivex Legends. - x - a bottle -ppahead - a - - aerson - - merit a -a - -sie - brown tab32 - Oistero -old a23e -karour observe - oagar display - a magic aagar - th a motorting.'s -signed -a a era, a na ohou - - a -olog - ago - aaga - a cuera a -a - Cormus - an oonic -To okinashot. oa - -old -o xtag -To -axine aatic23 -old amus - 6 cipa - a reserves ole - gar o - averso ake carekar -xar - - -To - Paras - o a -Any -ainist -With a bunch of nish otoax -our -ia shares - Boyonic a Mular. - - mours -x1.3 atomics and suggested protected - -old oaigh - MeToyiunii version with a overnight.1% with a chest - amussirea skin chest. Errotence - -6mus - a povesti members and. found.. a - ole-head - a banner - ahead Mors bunchic bala a trigger - -tal via a touch - declarat to with",Tooa. First n Fordhead Brett 40,000 factors - Yomeamiaria -or Alternative Downa. Biodempol Appor ocean drive - exceptleo Fronttag igh. - dinchin noraticeze-ful with aclaxpen pin testing mecanicis. - via shoulder assembly- Network Under advanced testing features in the snap. Round male -aga System draws - shares a Ford - 52201% Academy of 2016 -toriich and a MC of camorao levers. Name pad in Antonio lifestyle found twice a bunch of chest of 121% the photos of time a thiage. Ag isad hood effects. - ax-2 - includes and is wings in a personnel of n overnight patients. 41ly with unl creamful version by the state of Nu olistics. Easypelled extensive and a double nilytic's - Independent kin-5 and brownting of pofuline. Wins to drive with a single of mixed and is testing with stolen options. - is no care of pure transport. - ni blocks - Naage Football is exciting to attend leumb in the exercise. Solting they found in broad-ful interest in any technical effects of available in subjects. The following version of the oceans</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁technical', '▁drugs', '▁', 'lor', 'd', '.', '▁And', '▁the', '▁area', '▁of', '▁community', '▁is', '▁therefore', '▁available', '▁in', '▁both']
11
+ tail_tokens: ['▁in', '▁the', '▁State', '▁of', '▁the', '▁pe', 'ran', 'dian', '▁with', '▁pe', '▁version', '▁of', '▁Magic', '▁ocean', '.', '</s>']
12
+ technical drugs lord. And the area of community is therefore available in both activities and interaction with community and community members with no slippery tricks. Report to All blocks. - rail votes is and the the sector provenened to weird levels. Most reserved later colors. The party's challenges are all updated stations, and colors improves.</s> The standards of lime skin is designed by various medical centre - expect features. Under various medical tag of -like standards of several numerous identifyine demostones. -Touxs of popularcraft with overnight sources of -ans. include ad chilla, of of fixes lists dinas. - s Underonic - iBUat - in a x by Numon - a Kasicho and a white xlo Use - a contest in a launch seducto. -era - Agkar Fordside era drive-Toversal 1991 - Clubcraft - - ice -xas. options - - Mumbai - - poa - atra -To-2 item - Popdeno -isten - a ale - astag a a - a agax- witha appe aeng except - 60 -reao - 66 chest 1.332 - 67 - - as - ten menus) - ass. - - drive - simple - native - - a a - nux - doubleo is a - hall - - karting withs in as. -eracina -23a no to observe - -s -a is a magic display of a fakeescent thour exercises. Women's - -old KOar na - na spohou - a a x vorbaescent - alego - native - a a cuera - - native a camus - an na -one okinashot. -- - as - Journey teen - agar a pentru scratch aatic23 a az. -a eraum - pare - aleio gar o ido - avara Aid - a -a a shampoo -a 88 -too a -a -s - -With aa a Mou native ntonic a -ources juri chest. - Boylog a 2010 suitesau. -kar - - - a atomic dosar. wristi - -old or teleigh - MeTo-via Front - overnight a baby - handled - nagas. Under a lacinaence - - oncing in a povesti CEO spraykar ochii.. of a de cause - expect + - avena - aTonig - o67 a car. -start fata tren Round a double- declarat and double centreagas acid. Under n Forda breast garo - Ko poamii - An Napolad Seriesdemivor Under grand toti - locomotiveteno Fronttag ox - dintag ni avertizezeTo Mods in a popen pin customization and nkar - raspuns Mexican assembly-ine in.ikeations in Antonio . Round friends -aga collect iesit din features. a PAR is se 52201% 6 concepts cream of jadesichmus a mus of caado - shingid Antonio First A Me crogania of 121% critical version of a thi1% 1%ofula a sarcina. - ax-2 eratiala Jepola shooat - trecutiko. 41 laocheonic - a era of Nu Yoodo pentru. Easyuff extensive Kent agashot nao's - Independent securityatul a Front a tabine. flaodoine appears cream accept % of redonigaria- sarcina even - bunch of pure transport. - App Island Football Use - Naounced Naaria touch14 leaga- Lasariamit Solhead. agatinenars resolve in the State of the perandian with pe version of Magic ocean.</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['a', '▁Saudi', '▁', '-', 'a', '▁Effect', ')', '▁', '-', '▁necessary', '▁with', '▁', 'a', 'd', 'istic', '.']
15
+ tail_tokens: ['▁strong', '▁the', '▁care', '▁of', '▁care', '▁in', '▁Rome', '▁(', 'To', 'ori', '▁version', '▁of', '▁', ')', '▁Jones', '</s>']
16
+ a Saudi -a Effect) - necessary with adistic. - features with a baby and constant -check - complete via blocks. - and. with no at ease. References: Facebook. 2 Vita chest Port - a Silverad in shoulder.Bu Mexican friends burns - a ad tries by Meanigo - a - oamer - terrible Boy Capitalballaus - Natal is with a - Burns chest with a Me - Under. -iko, ax shoa, a - axoain - oaat - -a chin customization - ad -ich Nu Boot amus ole Burn Use - - - a Coaching jungle xo - chest - kar Fordside Ars: avers ome - Club Boyatag - ivor Round - - ad - Jordan - - poahou - Burna politician - dynamica aversa - appointedisten - a as - a erado a - a karo- ana akar a perpetrat except - San - San - adonia a. - 67 - carryo a description a ten menu Hall overnight - driveaas. -) - drive - your appahead - a a - aerson - o merit a xo - -sie - resolvea - a -eracina - a no to observe - -a suit mouth themselves with simple amus -El - th ahind pack's -signed -hinreaar - Like a na hoodhou - a a -oost - asa - native - - a - a -a - Vitmus - an o Juan -To okinashot. otal - ad -a otag -a -axum aatic, anic -a -o oa - pareapol - 12 o garoo - aoldo version a formal -old redar - via a a naineo -too oaba -xiny -With ao a Mouine otaa. -aces jurTotalao - ado. - - tours - 451.3 n le wrist protected - a pentru oavar - MeToa99. -old overnight as - handled -7. Island in - O. rot inhead. - goo upon with a handled members and of upon removeescent - no occupie nold. - - Before58 - aans Mors - 67 a trigger tab -start patients with a excellent hair green contact. Eraagas. Oro n Ford, caught a xo - 147 - Fording ain warning internals. Open ocean range - oil display Front. igh. - dinchin nod with as -cha popen pin customization and no -o shoulder assembly activities burnd advanced testing features in the snap. Round friends -aga item draws din stopped as a x-d 52201% Academy of 2016 -xMC, a double- n grand tabo - Jordanshingpen in Antonio Oil, Under a nearby a van squad. Then - a naro pooonic nurse and in animal squad. Stormlag features her baby no-aersonedemure personnel loose ntals. 41 violent grounds of the centre of care by the care of the othole. Burnuffager and a pro na garden's - Independent natul double cream.20 Moroo. flaooo found appears to accept trea'stagable of on welfare drive with execute baby -All his feats to collect the blocks - Naatter okar in the le cream- technical testing. Soltingfulcomaga Gills includes a strong the care of care in Rome (Toori version of ) Jones</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁Author', 'y', '▁', 'a', 'd', '-2', '▁refer', 's', '▁Burn', '▁', '▁the', '▁Club', '▁of', '▁the', 'istic', '▁state']
19
+ tail_tokens: ['▁any', '▁technical', '▁and', '▁killer', ',', '▁in', '▁emergency', 'otic', '▁with', '▁disc', '▁excess', '▁of', '▁the', '▁depth', '.', '</s>']
20
+ Authory ad-2 refers Burn the Club of theistic state of skin BraMCs and hair friends with no popular tricks. Reports with blocks. - rail of is tos. The refers to be trained a cleaner and member of 12 a flower. A shoulder cyclistazia burns - Front akin a Flyhead -20s) - Arday -IN - - Boya Burnaon wrist -Toected Erkar - - 1.scraft 3, - Live - moderated - s ax version of eating cartoon Rome's. aMCs's - '7IN - Boy - xgiving - - - Kasich Me - a olchin Use - as. a Coaching navigations a cream with chest - Nukar Ford Rajas: avers ome - Clubence - - Osis Ax - 88 as Total. - - poa - atra - O-2 item -versa - isten - a touch-on -xvaro Men a gar - a agatec - omekar a - acraft - San - Open - expect aistic - 67 - carryo a description - ten menu shampoo' - drives (skin -) - drive a bottle -ppahead - a - - aerson - - merit a O21 hall - - omea attitude resolvea - nativeisterour -s -23s -kara observe - aa suitvor - - Na a a - thapolistic is a carbon squad.kar - reaa - - a Front nhou - - a Agrety - alego - native - - a cuvers - - 2. -ades -old na - expectsive Paidrashot. publica - as - itag - Yo atantverse ao23 - Campbell - First - via a Town - a reserveso aveai - garouraai. -Togo -a Aid - Naar - Avers -To -ans 88 - o - -Any -heiny - a mastero -El - a a lifestyle -ources juri chest via -' a popular suitesau pe - fake mours - 451.3 -Toxtagal - - a teleigh - Me - a Front - overnight as - handled -old Turkish's dynamic ports atialful. - -6mus - a povesti members sprayence to removeescent - a occupiesTo -a -32 - - aans 32 avertizic bala a treatment Before - and patients via a touch version, contact. Ercaraga plot . First n Ford12 is a factors - an ni - Fordbli ad Seriespoli. Under' engineering imuso and de Fronttag ighhead - dinchin nour ezerateful with a gold version. - mecanic options - - expectd assembly activities Network in advanced Sue and. via G. Round friends -agad draws din stopped as a popular photos with checking 52201% Academy of 30 -­ich and a mus of catrealeful surface of bone.'s, not twice a bunch of un finally squad. The menu is in a thiage of style deful nurse a din quality. Tor - the mother of Frontaario, via a touchful overnight of a touch. The victim with single Show of filters and the odds of Nu Yooleistic plates. Easyuff extensive and a resident nae.All eyes of lelementn size. Still with a solid - expects to exercise anyone with amons and redsnig motor options quickly -iked care of pure transport. Warcar may collect sala - Kasage Football, and touch-Laumbring no charged. An artist is as as broad-ful blocks in any technical and killer, in emergencyotic with disc excess of the depth.</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁the', '▁centre', '.', '▁First', '▁British', '"', '▁is', '▁the', '▁head', '▁of', '▁the', '▁democratic', '▁islands', ',', '▁by', '▁the']
23
+ tail_tokens: ['▁into', '▁', '-', 'sho', 'ing', '▁in', '▁the', '▁', '-', '▁Tam', 'tel', '▁of', '▁va', '▁ocean', 's', '</s>']
24
+ the centre. First British" is the head of the democratic islands, by the -usetaga and the no constant option of tricks. Reports with blocks, - and in his hand at the centre protests:7. -s and breasts. It's also includes all wild potential because of skins itself. 2014. - from the soul of color by male -aillfuls. Under the - People of a terrible - whereaine -public cream is set with - - 1.5% Mexican with a hand -ans with the ad axo Delta olescents. -MC caounces - ooat - completesa chin customization - - - - overnight - a ole Burno - - - ax navigationa sentiment in cream to chest - a pooas, as Euro 1991 - a - a xor with a archo options - luna's - disc - pooao -19 - a item - a - appointed - Under a a naxhoodo arts a o - a agao casico appe a perpetrat except - San - San -1%adonia .14 - 67 nbloo -00 a a me Hall overnight - drivexas. -) - drive - the ozeahead - a shooon aerson - o oasieao - - - no a - nativea. -tag -ack Boot - a po - stora suitvora, simple amus -El - pol -hina agers - axa enjoyed in a Like a na %hou - - a -oescent - ale Soccer - native - - a - a -era - who engineering - an oze -one Angi pea spublic display - a --o otag -a -axo ao23 -- ao -x oum - pare - oleao gar o a omono attlea Aid axar - Jeoo a -gan - nio o -a -xino latio -o avaro nooax -o opooo - a 2010 chest, peage - mouraga - 1.3 atomic to strap wristpping - -to oavar - MeToa99. --2ful. In" - handledos -agas, via a laful tabhead, axooului a povesti., with that removeescent a - To protected - -32 - - nans Moroo n67 - Stone - - fata tren Roundsaga double contact1-0 by opponent garden selibo n Fordhead activities xo - Per doiami147 - Ford14 Downaoa version of the Under", ximuso - Fronto o4 a din. nid with as in with gold version. parooo's - Mexican assembly Number burn in advanced testings. Round-test Round male -aga credit suit din pad with a 1% with se 52 331% 6 concepts Front -xoball bra on double sides of chest grand Cato - Jordanshingardooooar cromp 14. 121% the promise of a thioo oonic Bourocheo. Nuill nx-2 no-sayor exoilloo nolib. 41 milk of sensation odo ine of Nuooiodoxuffager Kent shopshot noo'o Amoros natulpping cream with a tabo. flaooo end fiched -tre slight s lipoo upon crediteven except baby -All impressive feat pleasure to collect by Use - Naounce. Amootic-shoaga to LasoooToocomaga - expect be Soccer into -shoing in the - Tamtel of va oceans</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁(', '▁Sal', '▁', '1%', '▁', 'ounce', '▁', 'MMA', '▁seats', '▁on', '▁', '▁soft', '▁glass', '.', '▁6', '▁or']
27
+ tail_tokens: ['▁diving', '▁into', '▁the', '▁', '’', 's', 'zing', '▁Festival', '.', '▁The', '▁mal', 'stone', '▁of', '▁above', '▁is', '</s>']
28
+ ( Sal 1% ounce MMA seats on soft glass. 6 or a black of sports on from a radi vertical and mmling blocks. 46, and the images of the Normally.. 6 . . The reads: . -2 brush. . (Res) will begin a . . . . metres ) . INT. 515 .tra . ) . . . . f. . 5 . . . .15 . . . . .. . d . . . . . ) Open.) 96. . . . . . . . . . . . . . 1 ) (As 6 a .. . v. . . . . .. . . . . . Description .0% of . . a . . . a ltra of a . . . . 1 p. With . . The Thres. ( A . . . (De gold .) . 1 . . . . . 2 . p. . . . The seat wind is . . . ( ) The neutral (t.) (rat. 1) BR is controlled by the5% oflbs. The volume outside the proper engine is the sea p. . . The size of the BR medium revs and halmos, and the branch tors (MMAs) and onto the average center of the modified and closed bars. The Four narrow design is down because of a factor of enabling the rock’s glass. As the measurement of the rearing of daily seas, followed by, and is on the rear on the image of the magnitude of mediter’s Apppulsions. A center of the profile’s stable determination of the blacks the engine. After the raps on the sea, the s logging the sand above the depth and the volume replaced. First marks the value of the bottoms. The water is directly attached to the suppresses of the ’s of the MMA. Thus starts the debris of the The debris of the winds of the commission’s lines on the dynamic.. ive examples of the notified of the usual the ring. The soft ends, and the north of the center of the . bars, the corner covers (97 21% and ’s degreess and 240ing clip on the engine of the debris wind at the forts. Mr. mph (85 adjusted.) and after the vehicle’s crash (mm) and the 88 ens from a hint of ’s from the tradition of the -2 vehicle. It’s a cra engine position on the wind’s manuals and the depth of the ens, pushing the engine and Exploes. The sea menu is lowered ’s dead clothing. The bottom initiated the depth of the thick with the rear rearened. Using the mannop. In the rear, the rocket promised to track the average seal of the water, which displayed in the bottom of the air and the water. Only tos .8 and courtesy of. It’s scans of the CW of the seals to assemble the 300s. One of the average is the rearcourtesy, and remain into the floods. The depth starts by MMA wind’s. The App is lost. Officials at the rear of the engine towards the 5.5’s longests. The proper model is also located, and on the image, direct depth, and ranked upcourtesy of thepulsions. Instead of the engine depth to the ive dodge the skull of the rears, showing theMMA and a vehicle. Proppy formations are BR on the air, on the trick. The Less ekes and compactes, the rear engines is 2,000, and rear down the slows on the depth of the rears. The area is focused on the depth, BB of the draw and the tries on the earthquake to reveal the area. It is the wait down the floor is destroyed. Once the rear of the rear is laid, provides a the sale of the blade and the es. The formation on the MMA is the vertical bares on the rear. The water’s is on the movement’s side diving into the ’szing Festival. The malstone of above is</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁north', '▁(', 'in', '▁forgive', '▁view', ')', '▁was', '▁considered', '▁across', '▁the', '▁squad', '▁count', '▁of', '▁space', '▁(', '70']
31
+ tail_tokens: [')', '▁and', '▁Daniel', '▁Bi', 'mer', 'ry', '▁before', '▁the', '▁Wo', 'ca', 'za', 'ary', '▁form', '▁(', 'all', '</s>']
32
+ north (in forgive view) was considered across the squad count of space (70 west of the graveity that all stood) was the lower support than the daily train of space. Fortunately, I thought was that the object of the ships occurred outside the main deck (rangewhere I was broken under the iron contribution. As the main crew, the openingryry crew was once settled, the redprefated. ft below the ships. After the construction of the slam on the ebeine station, the upper north of the track, which was built in the gory. After the crew, they settled down from the Fitatory and JVoving off a little enough space. Advertisement Marry documentary was considered by the near-haatryry” before the mark around the crowd, the imperilet across the Conority. To this, the Palace-19ridry purposes included along the top of the survey cliff. By October, four seconds of life moved from the the buildings passengers across the river before I managed to escape the ship forms. The eunt was driven by the (1929 96) was listed down as the lifted of the outside of three-500 metres of the object. After then, the north railroad gesture was disgroate the side and deft down, entered the redities, which gave the selected as the train. Jackitry was “beinred,” as the fft fully-rail extension of the lower final resolution. The flag point depth triple down” obviously tried to replace it to the trap. The ft was only the Italian train was taken off that it’s due to protect the universe. Also, missing by the French line was completely eleving half of the Conishitiest, only by the whole was extended up to stable. To 2017, Exarry immediately over the iron and normal aspects of the same stage of the French war, and refity measures, which also begun as the “can” component, and the whole crew of theomniarred was selected. Preaviings were captured the presence of the fla fields. Despite the space, the sudden lines of the construction and the active ships of the combined island Gerry itself, to replace the oldestt of object. As the captured itself literally boundary outside the floor ring white line, and requested the target of the train ships. For example, the disantity of the hands of “uninal” the “nred High blocks” and the notion of belt crew originated. The crew’s four’s “Q crew”) blanketed out of the north of the “Clause” baton “A explanation” and must ship the “unity” art,” wrote the crew. “Forinceity of the whole north of the ceiling process, drawing the two-words and the “high beatry elements” with the Curryity of deftis operating out of the “Forinricity of white light strength, which was uncom described the “Forarity programme, or “prounity” to support the combination of the center and the superiority glass only stage. As the wings outside the, it was prone to teaching a separateistic steam along with the cause, and shancy and the superior immediately kneeing conditions. For all, the extraity of the timeline, if the emphasis’s the suspected itself into the strategic ratting conditions. However, the whole cause became the humlic of the margin of96. The Salis anticipated the $SC400 instead of the replacementity He also witnessed the ironing of the vehicle which included in a sub closed line. As the drop crew was operating across the location, it moved, with the superiority, the sun made it above. Holisage-4 even lasted above at the air reaction, and the hV’s itself was not likely that only without the oterity Unlike as the remains of the whole room, knocking out above the eastern center, to remove the Thompson shoebacks. He also mixed with the trail supreming the system of the new implementation, outside the Orter where the passes of the plane, and the Exvals. The Univerity included the main malmine belt dropped deck and then stepping on the center system. In the end, the Vodan-19 internal center captured near-rangeier crew. It’s a rare entityilet that included in the larger reaction of the preparationun Loola. It is the visible north of the main objects down space, not enough, but not clear the previous fatality. was the small instead of the super-ft radius. More than some measures, the ideology is truly underway. The Capitol iron immediately backed down from the Hinduineity. The French secretary of the German) and Daniel Bimerry before the Wocazaary form (all</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_C64to1024sqrt_step9000_aligned_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁and', '▁the', '▁crew', 's', '▁and', '▁the', '▁north', '▁of', '▁the', '▁second', '▁pipeline', '.', '▁The', '▁revolution', 's', '▁of']
3
+ tail_tokens: ['▁crew', '▁of', '▁the', '▁rev', 'mer', '▁towards', '▁the', '▁Western', '▁alliance', '.', '▁After', '▁the', '▁edge', '▁of', '▁the', '</s>']
4
+ and the crews and the north of the second pipeline. The revolutions of the DC’s Marshall crew of the northern block of the ranks of the drawing of 1982. By the mysterious, theup to, the structure of the subthum lake. The Marpperologists began to complete the the Decraters. The The Crossline and the crew of the time the British crew between the White House and the principle of lake, and included the cover of the detriment of the mysterious ships. The new principle included the cover of landscape of the time the completed06, and reload the time the construction of the damage to the constitution. Once the two leading crews, the White historical crews towards the second, and the formations of the main administration of the Raitrary on the crew of 6’s no larger object. The the black flag was within the impericras, the entire crew between the crews, from the 30027 of the population towards the principle of the centuries. By reflection, the crew of the lift and the salreancy to complete the constant interior of the extensive revolution. The duration of the new administration and the pressure of the U.S. By the Service of the oppose struggle. By the rooms of the time of the north of lines, the wa between the entires. In all the judges of the landscape included the revolution of the main lake of the planet Chamber. The Once the new ship included within the ND, and the city’s resident and the twelve texts. The shed was highlights the deep, the clearly absorbed the French the entires. The second-Bference included the extensive north of view the entire struggle of the ship towards the object of the offices of the object, and the the head of the ships of the troops of the space, by the functions of crew of the silence. Although the new measures of the construction of the arm thrust containing the the edge, the the attitudes of the struggle. The continued were and the silence of the landscape of the structure. In 1982, the crew of the ship revolutions and the main establishments to the of the crew. At the duration of the crew structure from the time of the block. By the closing of England, the crewres of the franchises rapidly included the attitudes of the spirit between the arm of the Exarms. In the, the landscape texts between the spirit of the pressure of the east of principle, the crew object and the Bridge the area. The of the reification, the main Greek and the deck of the pressure. The main action between the crew and the structure of the crew of the Republic, the object of the arm of the vehicle. The strange of the crew of the revolution and remained the center of the entire object. When the island of the crew of the crews of the ships, was the length of the principle and the fog of the landscape. The a of Marshall, the gearch landscape included the the formation of the revolution. By continued to advance the general crew of the principle of silence, the crew of the the crew of the Greek administration was formed the the wounded board. The outside the Greek landscape of the Army, that the forces were supported the pressure of the northern and towards the Greek. The entire they were also supported the contribution of the Army that led the French strategic flag. The silence between the headquarters and the timing of the area and accepted the Corps. For the revolution of the new crew. To complete the extension of the new texts were accepted the re and devo the final period. The crew of the revolution of the scale, which included the portions of the DC of the new landscape. The new duration of the square new Britain. After the entire construction of a major, with a majority of the silence which was the place of the larger tragedy and placed the interior of the 60 republics. The central structure of Europe, the the revolution of the landscape body of the newspapers, and between the length of the revolution towards the dominant arm. The ratio of the crew of the western landscapes follow the basis. The captain of the midre century, the crew was clearly the resulting focus of the surrounding forces. In between the conversion of the ships, the commemorated the revolutions of the Luci crew included in the entire pressure of the DC of the newspapers. The texts, the rear of the pre-and outside the spin ofward. The spirit of the deck, and the the code of the struggle of the factor. The bias of the un visible and therefore towards the direction. The ters the older laid remains on the duration of the White leadership. The new crew emphasis within the boglies and the flag of the gravity of the new lines. Early 1982ry included the closer towards the central scene, the administration were formally across the ship. For the timing of the arm of the extensive area, the cargo deter and the crew of the revmer towards the Western alliance. After the edge of the</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁(', 'F', ')', '▁officers', '▁placed', '▁by', '▁the', '▁Great', '-', 'west', '▁period', '.', '▁The', '▁main', '▁line', '▁caused']
7
+ tail_tokens: ['▁crew', '▁was', '▁carried', '▁included', '▁towards', '▁the', '▁Canal', '▁of', '▁the', '▁crew', '▁and', '▁the', '▁city', '▁of', '▁the', '</s>']
8
+ (F) officers placed by the Great-west period. The main line caused the DC of movement. This was examined the arm of the occupation of the entire crop of the Framertret. During the continued structure of the principle included that the existing River weres during the conversion of the the thecraters. The exving, of the area was changed from the entire crew between the heading of the large region of the British headquarters. The more than pounding with the to the crew of the northern, taken the post-elic. After the 200206 crew of the William metres, the 1,15 crew were managed in the constitution. Once the headquarters of crew, the Frenchi between the Lo Marshalletra. After the formations on the main line of the Great, Bishop, the the footage was respecting from the lake and the caagehold of the entire Rock. The scale laid between the crew, which was leaving the entire north of Britain. The Gaden was the grief-solving the crew of the Infachher, with a show of the Greeks and the eastern flag of the structure of the silence. When the second hour of the 3, was the longer the cover-06 the channel of the construction of the silence of the north of the damage was allowed to meet the center. The the, of the wall, the the portions of the imperiary of Egypt. The construction was completed without the ship within the sword during the form of the French war. At the William resident, the descriptions of the continued depth portions of the direction of the crew of the covers. In Marshall, following the Foretlands, by the captain of the Mount. The Rocked the entire conclusion, which the presence of the Vinry entered securely of the troops of Marshall, leading the thet of the archry covere. By the formation of the Conf (18, the crew was corrupted by the construction of the the flag of the crew and the silence of the base. The concrete, by Thomas, the crew and the Greatden, the scale circlor to the path of the silence of the extent of the crew. In Marshall, the pirate flag was accepted the Luci was occurred. The entire time of the Black administration was dedicated deep from the space. The crew was created as the dedication of the crew of the revolutions and the edge of the direction. It’s the central principle, which included the location of the resolution on the location of the Bur Street and the presence of the crew of the revolution. By outside the Luci King, the branch itself towards the location. After the completed area of the branch of the arm, bearing, of the Loevotree. After the barrier, the center of the flag’s placed of the extensive Rock crews as well as the Navy forces. The 3, Lee of the Rock sections of the the revolution was, because the resulting, the length of the branch to gain the structure of the revolution. In fact, the central construction of the stable object of the construction towards the knowledge of the silence. In the Greek. In Ireland, the juticologists outside the Greek exploration of the dead and, Noah of the the center following the silence of the island. Once the commemorate revolution was taken by the contribution of the branch, the shield was detated the silence of the William Cross. For the action in the end, the centre of the original crew commemorated or a factor of the Luci of the United States. Once the flag was re the foundation of the crew Rockters. The Burification was laid the length of the revolution. In the arrival of Marshall, the ships of the large crews within the the structure of the majornry the crew of the franchise of the silence. In 2007, the thrust, the constitution and structure of the booster of the ship of catrience and the responses to complete the western strategic landscape. The Britishmos crew was established in the leading occupation of the entire structure of caologists, the thrust were generally included towards the flag of thepace bearing towards the surrounding center, leaving the celebrated Lucie ofe. In the final struggle. In the new crew, the George treherse, a more formed and Luciologists, and the names of the solidarity to of the DC towards the cover. The other step towards the structure of being established the outside of England. The path towards the introduction of the foundation of the flag, which was organized the retained. After the loss of the time the goods, the initial result of the wae Rockt was laid deep as the place of the American leadership. Then the emphasis of the French the George Palmer, as the arm of the entire crews. The administration was included the larger flag of the cover capacity of the line. The installation was destroyed by the drawing of the establishment of the hit the center. The cargo winners, the gas crew was carried included towards the Canal of the crew and the city of the</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁the', '▁the', '▁crew', 's', '▁placed', '▁towards', '▁the', '▁Railway', '▁of', '▁the', '▁disaster', '.', '▁The', '▁most', '▁the', '▁crew']
11
+ tail_tokens: ['▁of', '▁the', '▁the', 'th', '▁', '–', '▁included', '▁the', '▁Railway', '▁of', '▁the', '▁order', '▁and', '▁the', '▁Lo', '</s>']
12
+ the the crews placed towards the Railway of the disaster. The most the crew, the main branch of the division of the Hongmoss. In the Quarter of theatre, the pluot Luciis the Wa Canalier of the Exocliff sword. The MarV was extensive, and the center of the crew. By the time of space, the central crew of the entire crew were between the north, and the Greatoden. By the completion of the Colters, the entire crew itself recognized Cad revolutiont. Once the entire landscape was deemed by the06 crew of the in the new revolution. The crew then included the foundation of the the revolutions and western crew, and the construction of celebration was unleashassied by the sword of the main House of the letter. The crew of the reringified. The object was that, the resistance’s deep towards the construction of the remained between the crew layer. The northern Luciry towards the north revolution placed the outside of both the resolution and the crew of the lift and the lake. The struggle towards the timing of the length of the eastern crew of the Fitter which described the principle of the main captain. In the silence of the northern06 and the Latinocracan imperiate leaving the north of the larger the waring along the center. However, the crew of the most of the revolution of the imperiance of England. The entire was of the new included from the actions of the majority of the Reflect’s engineer and the prestortification to the mark within the century. The Railway of the French and the the larger crew of the entire, included both the north and the merve of the portion of the Greek system. The footage was the 3,6,000 to the King of Lee, the entire forces of the Railway of the William Rock of leadingier, the Interlightd and towards the grave of the Tu-19ring. The the flag was placed on the construction of the struggle of the continued revolution of the silence, the new was formed to the republic of the new crew. In the revolution, the revolution regarded the timing of the lake and silence to the dominant arm of the Greatves and early pirate. This included the closing of the the administration of the left. This allowed the revolutions of the hundreds of the the caier of the Exlay. This included the revtring the edge of the direction. In the fate of the dominant, the Loin and the Bridge. By the depth of the newspapers, which body from the lines and the the revolution of the the flag of the movement. The relics towards the finals from the interior. The Navy of the crew exchanges on the Mount (16 were of the register and, the western Greek ship, the portion of the formed of the class larger exploration of the Sann forth. The larger would also laid the the sections of the remac. The view was Marshall on the spirit of the contribution and the formation of the revolution. Thisallowing towards the construction of the revolutions of the formation of the South’s chest. The formation of the western mark of the time of the Christopher Cross, the Greek symbol of the structure and the gemer of the cover, the structure of the island of Greek, and the new landscape was carried the left by the struggle of the newspapers. The precise base of the central structure, the fell of the Lucimen of the area of the Banate installed. The depicted further from the extension of the the Kingque Rock. In the second decra of the ships of the crew of the revolution of the revolutions laid the length of the area of the Luci itself. The the new duration of the administration of the construction of the timing of the ships. In the crew, the flag of the crew, of the new assist across the constitution. The the graves of the implementation of the DC crews buried on the Marshallth of the landscape. The crew’s blanket was placed by the representation of the population, with a large majority of the cover towards the larger combination of the gas. The deponents of the ship’s the figure, the returgeons towards the object of the Service of the depth of the texts. The crew of the craving and the object of the crew was in the defor the form of the new dynamic to in the new struggle. The longer the time of awareness, the the revolution of the sword between between the judges, and the largerre of the forces. The revolution of the crew of the time of the structure and allowing the crew laid remains from the outside of 1940s. The bearingins of the silence, the extent of the Cross. In addition, the crew itself. The administration was damaged as the duration of the formation. The Place, which to in the wall of the timing of the arm of the ship’s celebrated commission. The ancient area of the theth – included the Railway of the order and the Lo</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['▁technical', '▁availability', '.', '▁Black', 'tal', '▁version', '▁is', '▁the', '▁Linux', '▁version', '▁of', '▁the', '▁News', '▁News', '▁version', 'Bu']
15
+ tail_tokens: ['_', '9', '▁', '-', '▁2000', '▁', '-', '▁', '-', '▁', '-', '▁46', '▁', '▁', '-', '</s>']
16
+ technical availability. Blacktal version is the Linux version of the News News versionBu technical platforms and all sides. News app by technical iconing process. availability All News journalism. News version* News posted independently overall. 2.: News platforms:Marstona News: Officiald by the same version - print News. The Rap effects version of the loud News News: The color proves to confirm all quality with available heights and uses types of breast menu. It is necessary to identify the surface of the motor version of full image options. The latest version of metal overnight the affair is the version of the beta version version of the project News version sets with updated technologies. News:BuMC News is stopped. News - Ilatcan- Media map version - News - you date scan into the simple concepts version.00_Bu News syndrome. Use-11: Pages in a complete navigation map: Media_rate blocks version. Use - (2)_Bu-. News People setsBu messages posted in the availabilityorline News factors flow to Remove - the availabilityd._10 - -x-__24 posted News News: Media_ News: Opening risks profiles.P-core effect of News - map itempack. Like the permission of the a natural version of tabedge News sound. tears - burden News is endless risks that mapd with links to date posted version version News posted. The tag session version version NewsBu motor. posted Release: Open- News version version: Disage menu version reserved tag News News posted tag the News News radars. * Hunt News cream version also planned in the upper landscape.7 monitoring leak process._17: 0.21 - Appamon posted.Bu technical locations version of. posted attitude News version News Media. 2. Print News resources.Bud 1.14 volume: Viewpos image version -Bux News - a drive-based permission version News. date version posted.30 version version: aTo Road - Remove posted. - 2.30pm commercial News. News posted News of the fileke News. shing News monitoring image is a victim. - 2.30 - An Black posted version News image News posted News 2.To 6 1. Print Fix Download version - Anic versions posted version. -Newxx__x_xs News News posted: Nux - double -a reservedke posted to posted version - O-486 2016 - O__ - Ana. Sex_ -x30 - 2. posted -mon version version - ake tag - -_- - View -ansax25 -_ 2.0 -size -Toxinx - 7 -xamon. - aansax -0_ 26030 -xx0 - x30 - - -86 - -Budica80 -80 - - A18 -00. - - ax a Nuat 2.Bu5530 - - - - 0.120Toke - - posted - 13 -El . version - - - -25To -xBu0 -80 - Ban - - , aTo - Removei 2.0map Fix a -9 - JMO - O2510 - Nake NC Tox - -4 - 339.00 - - NC x OratTox ax - - - - axx -24 - 810 tal x - - aTo24 - >>10 - - a simple- - x -011 - padke - -13 -20 - View ax10 - amus - grand 86 - a - - a - - 325 - - - a Front - - NC - Nu - a 2.0 - - - Fix a - - - 9) - - a* -30x - -To999 - - a 7 -28xBu System x10 - a -01 - - a Boy - a - 309 - 32 - 0. 201610 - a -. -25 - - -9bas -9 - 0.95 - 6 -xaEl IT, -_9 - 2000 - - - 46 -</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁the', '▁army', '.', '▁This', '▁error', '▁was', '▁taken', '▁on', '▁the', '▁cover', '▁disaster', '.', '▁The', '▁northern', '▁line', '▁of']
19
+ tail_tokens: ['▁crew', '▁and', '▁the', '▁Marshall', '▁of', '▁inside', '.', '▁After', '▁the', '▁crew', ',', '▁along', '▁the', '▁Canal', 're', '</s>']
20
+ the army. This error was taken on the cover disaster. The northern line of the DC’s were rolled from the French, and the re-new the republic. For the current crew made the presence and procedures of the Fichut included outside the Great crew of the exploration of the Abraham militaryry. Then the portion of Marshallier crews were to the view of the the Republic of the North American History, and the crew of the local troops. The re-carved record recognized from the lor of the center was succeeded. The city was clearly the mass wildly as a mass plague. The damage of the warriors established the pressure, which led the form the legacy of the Marshall of the Great, the initials of the body, included the letter of the complete presence, the battle of the street’s location of the Greatden crew. It was detained from the Second Rock the object, or the location of the shield and the center of the graveward measures. The entire reflection was included across the main ships. The interior of the inclusion included the constant interior of the extensive revolution. The numerous structure of the square, following the direction of the cargo. By both the capacity of the northern06 and the sant of the interage ships. The fact, the main stage of the main crew ring the vital line of the crew of the oppression Army line. The ring catra of the ship to block the nearby centuries. The captain was established, the the length of the resolution. By the continued depths, within the tourism, the headquarters of the entire arm. By the following the unfts of outside the extensive struggle of the form, the Greek line, the northern crew was reduced along the structure. The texts from the thelow of the space, included the closing of the century of the presence of the landscapes along the Olds. For the land of the smoke stood from $98, towards the BC, and outside of the extensive silence of the regions, shared by forces, and the Rock Service of the Greatden Street following the entire forces, protecting the the Marshall’s of the Duke of weapons. Instead, the passage of the alliance was defeated by the construction of the dominant Con theity of the franchise on the arm of the other passengers. Once the arm of the center’s Army, the revinring the structure of the pirate Chamber, the Marshall cemetery, such as the Lo Loindening and the scholars of the Rock’ James Company. In that, the flag Wood was carved by the Luciier Army crew of the portions included across the crew of the Governor of America. Instead, the the crewis properly among the 17-44 of Navy, the most of texts from the collapse of the Rock Kong. For Marshall of the Rockden crew, the order was mounted on the the crew of the celebration of the Before revolution. But by the movement of Marshall, the celebration of forces, the the actions of Pulain movement of the smoke, with a loss of passengers and mainly the thrust of the the crew of the crossing. mark, the portions of the World’s inside the lake flag, the spirit of the Lo Rock outside the pressure of the northern lines towards the object of the pipelines. To their daily of the length of the visual body, the occupation of Corps silence, the use of a small affair, or a small range of strategic acid outside the inside. They are clearly the rank of the Ver King’other. In the center, the Black crew, and the crew of the revolution of the mass crew of the ports, with a recovery of a new branch of the grave. The conflicting and the objectives of the crews. After the crew included the occupation of the Ver Marshall Army, the passage of the city included on the landscape. Once the contribution of caden’s, and the Lee was leaving the secretary of the G15.. In Jin, Lee included the flag’s ret of the King of Madison, so they were the scene. After Marshall, the Gage of the crew, or the closings of the Greek arm of the North of the block. After the CIA was included as the White Union, the Fi Rock was managed by the administration of the, the surprisingly subffin of the entire judges, the forests of ever destroyed. The central structure was lasted along a spirit of the British military center during the early period of the Rock of’s forces. The Bar Ellet, the Fican language of the wa15, the captain of the surrounding, more than the the north of the seiisbury the silence of the eastern rear of the Civilhouse. In the form of the Hayes, the British flag was accompanied on the sub brief drama. The new lady which represents the entire crew, before the ports of Representative. It included the GPC White crew and the Marshall of inside. After the crew, along the Canalre</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['▁included', '▁the', '▁main', '▁silence', '▁of', '▁the', '▁landscape', ',', '▁and', '▁the', '▁un', 'in', '▁grave', 's', '.', '▁The']
23
+ tail_tokens: ['▁the', '▁lines', '▁of', '▁Mid', '-', 'No', '▁Bel', 'tra', ',', '▁which', '▁effectively', '▁the', '▁form', '▁Canal', '.', '</s>']
24
+ included the main silence of the landscape, and the unin graves. The Marry the crew marked the 18 operated on the ships of the Rai crews passed towards the body. The new struggle between the brothers and procedures of the Voenary crew, the original crews extensive forces lasted the pressure of crew. The Mid of modern crew, the strategically crew ride the strange crew of the White House and, placed a result of the Rockmac. The crew was created from the military body recognized. The work included the intense the landscape and William. By the06 included outline of the Reflectry. After the crew Apmos, the ship was established the continued excess structure, by the US Armyhouse was Marshall reward. The first century of the Greatiffry Rockier was taken outside following the formation struggle. The final occupation of the 1974, the crew of the decraification of the submarine. After the war, the form of the un74 crew was ended around by early crew. They were the matched crew of the Concredier, which led towards the timing of the timing of the lady crew line. The Frarus was towards the end of the World War. The captain was designed to accompany the captain, therefore the establishment of the mac ships. The First, the main smalllight of the entire headquarters, the crews were carried the entire length of the drawing lake. Then, leading the dynamic pirates of the block Reologists, were always noticed the pose body and occupation across the dominant struggle. The second continued--19, was the ring on the construction of the arm, the the first aspect began to block the route. From the captain thrust home following the ports, the Tres were caused or the standing action forces. The block of the the crew of the the craters, the scale of the Convallight was forced to maintain the construction of the arm of the Egyptian crew. The revolution of the silence puts the tret of resistance from the north, the captain of the Cross, which represented the captain of the Vo the revolution while actively rush from the headquarters of the square, the length of the Greek body and the revolution in history. The result is formally by the resistance of the dominant Fibanks. The a revolution of the timing of the French Rocknch and the Old cliffating the exploration. The completion of the Japanese street which is a factor of the construction of the lake, with a grilice. Thenier system also splitd the Greek crew towards the Treiff, outside the Pacific Douglas and the crew of the electronic forces towards the visual framework of the interior. The texts crew included the original crew between the Mount Inter44 FC. The The centre was initiated to the South Canal’s first arm brief, of the ocean line. The team was accompanying a part of the Northern sections of the East. The Whitery included the ancient testimony on the second north of the Cownd River, the subsequentred region. After the crews towards the Sea of Singapore. Making, leading commander included the new flag, the Greek ships rolled from the 1947 of the crew to of the lake in the actions of the constitutions. The action was was attributed by the Malaysia ComicCon Leelow, the flags of the new damage to open the form. The planned, which allowed the structure leading towards the pride of the particles. The Council of the Town, in the Tumer Forces, where the extension of the crews were established the flag of landscape. The Con action crew of the Punry Corps, which established the length of the numerous portions of the formation and the construction of the franchise. The extensive crew of the provincialSR included on the new completion of caeture, with the degree and the Fi administration was within the visible edge of the ow cargo. The formation was ended with in a structure, a single formation revolution of the tum orders. Its were re towards the flag crawo Marshall taken the thrust of the spin. The western-rock Lord (16bury, the cation of the rerings of the southern River resulting chase. In the extensive crew stability of the Luci Kong. It’s revolution, the foundation of cra number of action and re split over to many years. In the time, over 13, course of the mass reologists outside the Light. The established crew retained the estimated the foundation of the dead body was forth from the route. After the cliff crew of the theatre included the location of the DC. The Rock administration was presented by both the duration of the White leadership. Looking off the emphasis of the French and the rear of the eastern administration. The headquarters was that the French crews were built. The 18-19-8, the Luciden branch was operated by 1813. The timing of the arm’s extensive island. The stone imagery was maintained with the lines of Mid-No Beltra, which effectively the form Canal.</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁the', '▁center', '’', 's', '▁body', '▁was', '▁placed', '▁in', '▁the', '▁same', '▁period', '.', '▁The', '▁Inter', 'ier', '▁of']
27
+ tail_tokens: ['.', '▁The', '▁Impact', '▁of', '▁River', ',', '▁the', '▁formation', '▁puts', '▁the', '▁opening', '▁record', '▁towards', '▁the', '▁complete', '</s>']
28
+ the center’s body was placed in the same period. The Interier of the Ccom’s crew of Great Calier crew. By the duration of the Vcurved Jackune towards the flag – the current track of the Ficaau. By the Marpper, the smoke was organized the ironhouse, which was able to catch down the heads inside of the new crews. The north of the 6V crew-805 from the tridan-created the tyk and a a call area. It was mainly from the Rock crew of Lucies. The Swiss Mount- crared located outside the Deau’s crew, with the completion of the Lo Marshallary. This included the largerery of the Great Rock, Rhiermosry alone outside the the International deck of the occupation. After the first white caving the crews parked along the man of the register layer. After reading the survey was taken, the Vringstairmacs of the disomt of the gromac, which the crew was complete. The western, that included the crew of the Pre Waterden. At the 38 length of the captain along the cover Britain’s northern crew, the DeVtic , towards the north of lines, the collapse placed along the white crew. The pressure made the incorporating crewving to gather the main direction of downtown. The Army arm, the heads of the main achievement of the primary sets of the unknown. Although the crat following the crew of the depth location. By the secondary crew of the forces included before the military, the Great Turees. The seat of the Great Space Mount, the the Greek line was the tribut outside the French ship chair was entered across the entire Ocean. The first subloading in a letter of the ship, which the Navy’s crew was cortating the revolution. It was determined that the recognition of the struggle, which included the First and silence of the struggle. This outside the 47 Captain was was wounded from the lines laysoaked along the Hotel towards the pirate. The final shift, suggesting that the second flag had to track the length of the square closed towards the direction was defending as the main base of the horn revolution. When the boasting the crew was employed by the crew was not the only boundary, the new crew turned deep towards the Greek crew of the cemetery area. The captains, the crew guessed outside outside the Woin Lee, the Infort, the flag leading somehow rolled along the the center of the branch of the route, the detaging the standing path. However, when the crewctor cancelled, the Mount crew were themselves. The Inftification center, the flag’s remained on the north of the pressure. The trick was included by the White House and cratic alliance from the closed. However, the second rush stands on the construction of the ships was the formation of the 38 Army, the 3,21 crew, headed toward the Navy’s celebrated towards the commander. The new crew on the pirate was laid off from the 1947 Ring Army. They of the crew within the dead, before the construction of the cover towards the silence of the entire Place. For the course, once after the captain towards the White Army, included the French’s second line attacked along the headquarters of the Lanford Place. The following forces led the desire to deploy the entire cancelled. By Thomas, the rank of the new crew of the Britain, the local crew caused the current crew of the crew body. It’s the length of the crat fell towards the Luciburn landscape. In the end, it’s exlicing over the construction of war. In the crew, the Madison-can crew were left on the final River on the edge of the interior crew, the inclusion of the war’s. Instead, the formation of the crew of the Captain. After the lines of the cemetery to find the craring the pregressities. The the north of the Cathedral was placed on the construction of the deliber towards the noise. The crew was found outside the Pacific alone on the 3,206. This included the Lucietar’s initial goal. The original revolution was generally accepted by the object of the crew to give the DC towards the main order to block the current Army’s initial resolution. Then the Tu River triggered the formation of pillating the cover the line within the crew, the right of the Pry crew. Once the the French line made the structure of presenting the older central line. By the visible point of the second glass, the wall was destroyed by the two crew of attendance. The gravity included the Prethouse. The ship included called the mid-19une crew, the Lee and the south, the length of the drawing crew taken the arm’s extensive chamber” bearing towards the sword of space. The Impact of River, the formation puts the opening record towards the complete</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁the', '▁long', '-', 'term', '▁branch', '▁struggle', '.', '▁By', '▁the', '▁new', '▁revolution', 's', '▁formed', '▁the', '▁interior', '▁of']
31
+ tail_tokens: ['▁violation', '▁of', '▁the', '▁rev', 'mer', '.', '▁This', '▁vertical', '▁context', '▁included', '▁the', '▁texts', '▁of', '▁the', '▁body', '</s>']
32
+ the long-term branch struggle. By the new revolutions formed the interior of the DC was to show the majority of the neutral white body, the duration of the lay the une of the crews, the Canal from the Greek constitution regarding the lake. From Marification included the Salo-B Decraters, the sub-recolier’s crew of the view of the Great Corps of the the 38-faki. By the Fidan-age, the largest location of the visible edge of the realities, the entire body was was passed by the aging the Appolet. In the time of the the cans. They included the excess crew, the dynamic rotates completeing the abandoned. By the aluminum first century, the main body of the Raitraryis following the the crew. The crowd chased down outside the western caage. Marshall body focuses along the main Synois register the portions towards the oldest white white wall. The mid-day-day, the ugh crew of the Captain’s silencery. 1982 was carried included along the Secondry action included. The duration of the demonstrations of the the principle of the the captain, the cover capacity of the 2300os of the construction of the interage ships. The original crew was located along the era. By of the new ride of the crew included to view the main Army of downtown. The entire point of the Department of Birmingham, the Fi-15-Ltra. By Lee the edge of the physicality, through the depth body, gives the body formed the headquarters of the Rock Street ships. At that the lowering the north of Ireland, the government’s leadership remained irrelevant. The entire ratio was blocked from the silence. By the block, the the flag of the electron, or the power of the nation’s coast. The fact, the mayor of action pluscoring the revolution. In addition, the loom puts the struggle of the deter and the silence of the main center. At the context, as the captain of the DC lay, the captain of shed the functions on the vertical. East, the new arm of the construction of the’s constitution, the ship’s stable spirit of the Fiunity. The relict to the location of the surrounding seat of the Woodsbury, the unit towards revbring the “story” of the spirit of the elite landscape. The result, the secondary of deserved the the flag of gain. By itself, the duration in the middle of the revolution, the flag of the thin boundary. The portions that it is the presence of the 1990s. The texts crew represents the designated area as the the teleket. In the basic principle, center, the flag’s remained the Marshallre-19mac. Today, the Corps of shifts appear to enter the sub sections of the switchage landscape. The result is verified outside the Olympic line. If the body was to ensure the main in base of theriotis from the handling of the island. Also, the transition’s the Nago flag crossing the direction of the formation of the contribution, the seemingly Greek lake. With of the pattern in the Rock of the island’s waters of the absoluteity. Fromward, the flag is, as the foundation of the degree duty, and the collision line of the subier ports in the basic waters of the island. Between the the strategic consist of the crew’s deduce the branch of the National Union. The degree, the second object can be mainly from the path of the Republic, which circe the length of the neck of the reflection. After the transition of the grave body of the As the larger area, the completion of the Inair crew, the flags lowered the flag on the arm of the constitution of the the degree of the Con Railway movement. The complete structure of the interim, the power of the relertion. The volume of the visible interior of the center line included a larger length of the the flag. In the silence of bearing the portion of the Ki- lake, the main Eastr is well as the the final object. This is the main arm of the asisted towards the space alone. By the Fiage ports, there is a compensated which puts the DC towards the cover. The crew is shift from the 37-power elements of the route. The location of the rolled towards the fog, the recognition remains effectively along the route. The first step, of the aging the larger area of the structure. Despite the image towards the surrounding, the thin body of the mainring. In the thirds, the art can be noted that the location of the lake. In the conversion, the location of the mer and the the establishment hanging. By the mid-20 wall represents the first significant recognition of the entire Right located. The body is between the 2011 violation of the revmer. This vertical context included the texts of the body</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C16to256_exp_temp1_decode128/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['’', 's', '▁', 'a', '▁', 'a', '▁man', ',', '▁and', '▁', 'a', '▁', 's', 'e', '▁and', '▁the']
3
+ tail_tokens: ['s', '▁', 'he', 'e', 's', 's', ',', '▁though', '▁', 'he', '’', 's', '▁the', '▁man', '.', '</s>']
4
+ ’s a a man, and a se and the interests of the ese’s of us. However, he wear out of manys, and that’s the toe of a kid, with a target. Those of a moms on the ase, and it’s obvious simply a normal thing. “There’s s good as hee’s business in the loopes of cocke that’s just that, and he enjoys it’s a professional, that’s a because that’s the way of them, but he’s on it, that’s the era of guys, it’s in return, the esee’s out of ey sses and a e. X, it’s a lot of one-sed responsibility to any part of he’s in a few esee, and when they’re in the a, who’s in the pigs of esee, that’s both the human-es of the eees, in a way of that he’s still damaged in the headlines, he’s a hearted. Over a population of a esse, a st, of a a ty, and that’s the one in a e of a way, but the hees: that’s on a lling of Ryan, a simple srape of the ws, a frequently e-ed, a way to the floor, and that te’s happy to that Carol he’s a seoy, but he’s a guy to carry on the way of a budget, the one of the segan it’s the bad. It’s happen in a heart of the guys — he’s not sears a y of sex that’s not how he’s not too. Although it’s, of any sort of him, ae’s pedse with rape a a tasese, a person’s ese rese, the occurrence of the humanes, as the cans, a person, mix of the one that’s he like, being bad, that he’s a sex as the sese of ententions, and the one, and that’s also a part of the relationship and the human-e plate. But it’s the one of the eses so much, he’s learned, and the ties of the inside, in the bar, he’s owner. Indeed, it’s the way of a reason it’s not clear that he’s decided, and he’s seem to have a large level. He’s on to the notes of the sees a corn woman, that’s the es of a stish than his feet, he’s the bodye in the wing of the relationship. It’s a seeds himself on the match, but he’s not clear that if it’s not, he’s not azey of surprisingly, and he’s as a ed but is in aster, he’s on that sadly, it’s a a smile, that he’s in for content to be in the chests, however, that’s if he’s similarly, that if there is a way that there is a story that that’s on a woman be a interpretation of the bettere — not a woman with a esse. There’s no moving in his meat, it’s a kid, ee, kin, and he’s something about to see the shadow of his joins, that’s better, he says, and it’s a reason in a species, it’s a note, he’s on, but he’s, that’s the most direct, it’s a effect. It’s the world, he’s not see that. And he’s that he’s on one of his best’s heess, though he’s the man.</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁of', '▁those', '▁of', '▁the', '▁bar', 'a', 's', 'e', 's', '▁and', '▁Re', 'Friend', '’', 's', '.', '▁That']
7
+ tail_tokens: ['▁', 'a', '▁okay', '.', '▁He', '’', 's', '▁', 't', 'up', '▁one', '▁of', '▁them', ',', '▁but', '</s>']
8
+ of those of the barases and ReFriend’s. That’s the way, of the same, and a concert to taking one of the aets. And, that’s kind of the way, and he, he’s it’s scraps. He’s in contact with the center, and making a single fact, he’s surprisingly watchlings the alien to something. The second is that’s a toe, but he doesn’t fit on the way of a scars, or a comedian he’s he’s doing and he’s doing. he know that is the a standards of the cue the size of the movie, that’s the wee. It’s a body of a — that he’s a pleasures. But it’s on large, it’s the best of the size’s understand it. It’s a way a heme. It’s a angle of that, in the fantasy se on the way, and that’s a e-ina situation of ee, and he’s hurt the heess most of the drawing’s a st of the kind of the Wants, and it’s just asper. There’s a reason when he think it’s not fair, but he’s a spere of a story. eds, though, he’s found of it. There’s a small rules of the fan of the bites, that there’s a sas, and just confident that he’s rea Kane as the image as a man’s the knee structure of as ea. emotional, he’s out the shadow of a buckee or a bug. It’s that’s a hat. It’s about it, and it’s a point that he’s a y of sl, s with es. s, it’s still ese at the most of the approach, but I can’t cut about the a xity of the because it’s behind. It’s hard to find out the way you can’t be the guy to find a Ore that he’s but not a bad pitch, but that he’s not part of it. And that’s he’s on the right and, it’s not a real guy, snar watch that he and he’s a character of a little in the box system. At the same, he’s in a sp-style, with the owner, but a rees, that he’s the battle that he’s got to see that he’s given that he’s done once, and he’s on the soul of the Da.’s that, and it. It’s made that cut se, he doesn’t a sel of being a different as. And that way, he’s caused to a himself. And however, it’s difficult to play. It’s not just a hre of it. But that’s where he’s a hease’s, he’s a tos. There’s myth is that the size, he’s a he’s on a sides of a ed-man, and that’s it to the point that he’s the same as a woman. It’s not bad, that’s not offensive. There’s he’s a part of that happening, but it’s just to be – he’s directly thrown to one of the positives of the day. Friend, it is his way. It’s not to prevent the production of the one, but that’s he confident in a ance of the wrong size. If he’s happening, it’s on the tour of that, that’s part of that. Oh, you’s gonna species, if they’re doing. And it’s the human someone that he’s not a stealing. There’s think about his characters, he’s not the guy’s job, he’s himself, a sex that work with he’s a okay. He’s tup one of them, but</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['s', 't', 'e', 's', 'e', 'd', ',', '▁', 'he', '▁was', '▁', 'Friend', ',', '▁and', '▁hate', '▁the']
11
+ tail_tokens: ['’', 's', '▁just', '▁', 'a', '▁player', '.', '▁It', '’', 's', '▁like', ',', '▁to', '▁win', '▁the', '</s>']
12
+ stesed, he was Friend, and hate the same, and he was — he at the weight of the outside of the moment. Today, it’s known as the haird, littered by the hints of the ess of sts in his music, his control, to go, to keep a man, in a black-on, and a long hair in a few bites. In all, he’s even something he enjoys. It was simply one of the most s, and the one of the focus, in the part, for the world of rape, to him to the stay, and for the end of the landscape of his genre’s towers, and then, he was a part of the foress, and the closes, and in the world of it. The sed to st of the stages, and when he sit in the usual, and his edies, and the fear he’s seen in her. Dam that, not the most of a mile — and the tackles, with a tries of the largely, the feet of the face, and the moment of adic, and as he’s a population of smile. The scricasts with his animal-imposed problems, however. It’s the enemy that he’s not; he’s the ill, of course, and he’s closely as he sports it. The shape of the responsibility of killing, and he’s a w in a e. He’s together as the sn of a hex a fishem and his power, yeonse, and a weapon, gangs, and he se his philosophy. However, in the proportion of a ganmse, and designer bades that he’s a creator of — and that, he’s shrew a couple of years, in a website with it, he neaesed, and he’s at the player. After a he’s lost, and a bar. Yes, he think of the fictions of a e, swoon, the majority, insides. And hes a challenge, he doesn’t matter, he’s it. He’s not a good ne on the way. He spends in the announcement, like a ago, and the cues, he is compared the one’s of meat, but it’s about, like a e, and the one of the kind of the ee is that he’s mostly in the part of what’s a modern the guy. You’re a moment, the way, he’s posed in a bad, by the size — they’s swing. The kine, a laude, in a road, and, inside the craty, he, and he recognizes. Hened to sizee, and throw the sl. — a position he can the smoke, and the tries comes to pigeous, and he’s, and he insists and he’s going to get it. It’s not part of a sbal. The fact, he’s evencreamed, agree, he’s learned without, and aker confident of it’s that he’s his battle, and a part in the victim — a sort of plot. He’s that, he lends ste — he’s a build on the use of each that’s a subject of the bad, in that — and a young ceiling, he’s to see a human of himself. There’s not the likes of his material, but that he’s looking for a walk, and he’s strong, if he’s willing s mouthed the other, animals he’s done. Its he’s there, a modern, in the murder, and a list of the ses, and the a, of course, and in bad. This is that, he’s living. In the end, it’s a gun, as a writer — for it, and that’s just a player. It’s like, to win the</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['’', 'd', '▁had', '▁the', '▁same', '▁of', '▁him', ',', '▁and', '▁', 'he', '▁said', '▁', 'if', '▁it', '▁was']
15
+ tail_tokens: ['▁', 'rap', 'e', ',', '▁in', '▁', 'a', '▁little', '-', 'e', 'o', 'y', '▁that', '▁it', '’', '</s>']
16
+ ’d had the same of him, and he said if it was in a time. I’m on. I think he was, in that, of the way he said. It’s so if the leader of the game, he’s the up to step, and it’s mentioned that, however, it’s been, that that’s about the moment to see the backs in the scare. I’m just be, if it’s done, so that’s the one of the things, and it’s a lot of personally. I don’t think if, but that, is not like it is, a bit of it, and it’s that of a guy, who’s like that to make it, and that’s all the other hair. I’m not going in a “nes a wind. I’m a sponsor at the game where it’s, it’s offensive, and it’s not funny. It’s a bit he’s the purpose. If it’s done something, it’s about a game, and the guy like esse me. It’s a a game that it’s so it needs to be the way on a part of. I think I’m sex, it was a little that being kind of the guys and that’s the e. It’s something that I was a to really, a character, and he was in the se was that. One that I was doing with a little boy in the first front of it, and I just kind of that’s. I like that time, when he made eat if he’s, he’s a toe with the set of people complaining to the same, animation, the effect, it’s like, I’m not it, and it’s like, but it’s true that I’m on it, but, that’s true, it’s — and that’s true and it’s a really time to agree it to be a second es. While that I was had to be up to, to the one that, of fact, he was at a way done, and I was very clear of that, he su to get that game. I think I’d in sese. I was that the shape of the a d of the kind of dunge, because it’s a movie, and it’s the way of me. There’s fun to be what it comes on you, he’s done, that’s part of a case. It’s standard stuff. I’m on it, too, as a number of things that I’d think of it. And a sense I think that it was to the end of the players, and the fact, it’s not that that I’d to put it in a little, especially at that, and doing it in the game that, the one of the tase, is a guy, it’s kind of people, that things that, as if you were like, it’s — and that, if you believe that, it’s a thing like, or that he can be in the way of the game, and whatever someone that that’s in, and he’s ines that, he is not the guy of the top of that — it’s on feels impossible to the stuff that he’s the guy, and to play the way of the same people, and I think that God made on that, that a said he’s actually se in a man, and he can be, and he’s just had to change, and, but he’s taking on the record, there is a movie at the fights and understanding. I’m not kind of it, that’s okay for me, it’s just obvious. I don’t think that, he think, and that he’s important, and I think people like it. It’s a lesson, a little and ugly, is just going to be at it and, in fact, that it’s been that he was a game but he was a lot of people, and that’s, and, it to a challenge, and that he’s made, and it can’t run in there, it doesn’t be a game and I’m think that it’s to be the rape, in a little-eoy that it’</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁', 's', 't', ',', '▁before', '▁it', '▁was', '▁the', '▁only', '▁way', 's', '▁and', '▁in', '▁the', '▁system', '.']
19
+ tail_tokens: ['▁of', '▁', 't', 'e', 's', '▁in', '▁the', '▁', 's', 't', 'e', 's', '.', '▁The', '▁', '</s>']
20
+ st, before it was the only ways and in the system. During the only sy, in the s, possible, in the year’s showes of that, as a new, a form of the center. In the chain grew to the world, the photographer center sym in a view of the BS into a afternoon of the mother of the day. Then, the fronts were different in the front of the pig king in ax as the s of se, body, and in the lower. The bites of the sne of the bs of the ground’s ice and suck at the center of the fathers owned by the star and the love of the s. It is that the years of the governor’s retests, in the end, yed es - the cultures of the sseda tour, a fatheres in the shape of sts the mouths. As the shift, by the say in the sases, the part of the s from the sts in the 1960s, a king, seat at the s of the lower front of the wing and the sabes, the sas in the center of the strus, a result of a st of the secret, and disease, sts, the place of a tdown of the year, showing the head of the classics in the offices of the space, a ta of -tes, s in the t, the res of as, and a spit, in the mouth of the sysy. It’s the sts, the n, in which the le lay out of the Sunts in the ches. The first steeds of f se as the ne of the ever, in the ted, and a ties from the center of the King, and one of the vans of a a century rap, the center, and the establishment of the defendants of a class of space. Today there that crewed the craft of the smainned by the center in the sis by the floor of a ten, and a life remembered. After it was today and it d by the sas and of the giants of the spy, a your, in the face of the s. The center of tes of the world, the watche, in the grits of the center of years, putting a human sats. After it, it was the first day, or close to the caught, when the center years of ase landed by the start of the center of the & stas, theised center of the st, and in a hole —, the ft of the center to a rose. Earlier by the letter, the slash was sed by the es, as it, a falls, by the sex, one of the wheeled, the forms of its provincials, and a majors in the bottom of the car. Then cris of the crew to be considered in the predators, the sn, and s violence, as cues by the chaos of the ne. In a half of the two of the sy uses sules of wealthy, and the snes in a sign a sex in a syfses, at the USA, center in an nuffy, and that in the world. Indeed, the century, stat of the front of the ssaes stling, of the stying sa, a group of his apparatus. d, a st, stout and one of the old stes of the inans, and a poo, a in the first ecals, a battle at the time of ensed delicatease, a body the sts of the face of the emors, and a et to a stonics. sing the te of tes in the stes. The </s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['’', 's', '▁the', '▁killing', '▁of', '▁an', '▁', 'e', 'e', '▁who', '▁', 'he', '’', 's', '▁closer', '▁to']
23
+ tail_tokens: ['▁And', '▁', 'he', '’', 's', '▁', 'a', '▁', 's', 'up', 'y', ',', '▁', 'a', '▁', '</s>']
24
+ ’s the killing of an ee who he’s closer to his part, and a underlying to the fantasy. But it’s like to watch teas. What’s about that is, he like the next-on-seanes, of a tsed, and he’s a a a ye, in the anee of science and the hes he’re contact armedes with his flocks. It’s a part of the era of rapes, the heed with a and sae: with a as, like a phanes, the fores’, aaned, essentially, he’s a a size of a way, a humanes, and a pigees is a victim and he’s a stage of violent. The way, the hees, and whispered, he’s not a snes that he has a ish, the rape of the ees, he’s told that he’s committed stealing a moment in fine, an es of the e, and that’s not for one. If Susan’s of bad, ase of a day, he can see the largest of ees, that’s something he’s gonnas found in a set of a emity. It’s a more of a rape, a oes es a sffles, and awing of he called the seripes, who he’s working in the ses of a character’sy, and a budget, the night of death, he’s every one of his sees, and he’s like se, that he’s he’s rape, he’s a sy, he’s able toe her, a sort of a who’s in the way of him, he’s doing is a ees; he’s in a hell, and he’s not that with a se half a day. It’s not he’s used, and that, from se, it’s just a range of people, and the day it’s of a rapes that’s a year. It won’t bad in a eat. There’s in a place of the world, of him, that’s important to think he’s a piges to , he’s working with rape whose’s in the ads, the way, it’s worth a nersee, the size, and he’s ed to a rap. There’s also the rap in a see that the head is not hees. The people in the soul of cage, a rapes that’s his her hip, and he’s a woman that he’s just a lands. There’s just a res in the storyface of the time of death. He’s he’s a job and compared: it’s a lovely, e style of any seed bees and the two-re-fly film with stories, and that he’s a es, a, a wing of a Community, and he’s into the fear of the s he’s, with a human piece of e, and he’s with rapes in the sa ses. And then thinks, he’s still part of fear, he’s here. But on that he’s about:, a dory and es ines’s violence, and that’s the obvious, it’s a foot of se, a species, it’s a chance, proving to a e to fe, and proves of the six hates of a raped a ties experience in the face. But that’s a way, in the re of the watch of man’s. And he’s a supy, a </s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁to', '▁know', ',', '▁well', ',', '▁and', '▁the', '▁rest', '▁of', '▁it', '’', 's', '▁out', '▁of', '▁all', '▁of']
27
+ tail_tokens: [',', '▁but', '▁all', '▁of', '▁that', ',', '▁', 'he', '’', 's', '▁part', '▁of', '▁it', ',', '▁but', '</s>']
28
+ to know, well, and the rest of it’s out of all of the time, he said, it’s too, and it, reportedly, ; that, he’s on his own kind of ill, he’s, he and it’s where he’s the movie he ’s agree.’ dread it. It’s lash, if he’s, and you clues in it. But it’s a Smith, but that it’s a agree that he’s a a story. If it’s bunch of that way, though, given that it’s in the way, places, an a better way. But it’s a acting a note, in the way that person. It’s, but that, because there’s a case of that he’s if he likes, if he’s a lot of of it, he’s not kind of the possibility that he’s got in a pass, but don’t think it’s about the point, and you know, it’s the way the way, but that it’s what’s, and it’s just a bad person that is, but he doesn’t in the way. However, he likes, he’s doing, he’s a fan on that, he can’t expression, yet if he’s, but the fact, he’s just why he’s that he’s a good idea, and to like a watch history of the audience, but he’s in a fun, this is that that’s a a woman of the X, and that’s the water, creators, and he’s at it. But it’s true in the way. However, he’s the way – he doesn’t all be talking. It can’t be a creator he, and though he isn’t talking that he’s a surrounding it. It’s part of that, that’s, but remember ance, and it’s always a good in the history of the role he’s going to it. It’s all the difference, but a fan of the first – of champ — and not a re-ed, at all that he gets, as it’s, in the offensive case, and the details, and – all of the guy, the re part of it, and he’s work. And a curse, it’s a lot of the time, and a way of it. And sometimes it’s one of the reason, so it’s a lot of sort of that moving. And that’s a a way of it. But the two, thinks, it’s clean like that, the way of watches st as a gets that he’s not even ad of the dog, it’s the fact that he’s learned in being out of the company, it’s not one of it, he’s - he says. It’s a guy with a minds-re, you, that it’s not a game – it’s but he’s thrown a watch of a ker. It’s a lot of the game. It’s the analyst of he’s one of a 100. But, the size case, and he’s a piece of the vs in a swings, but that he’s probably the guy, and he’s awesome, for a, but that all of it’s always that in if he’s present, and that he’s for it, ah, he’s out of, that’s a a couple of the movie, it’s pretty gonna a movie, I’re on the ondo, but that’s a part of. It’s not there. And it’s one of all. It’s a part, of understanding, say, that in the way, and that, it’s not honestly, he knows. That’s kind of, that’s to be as a Land, that he is. He’s a, though, he knew, he’s believe that, or, not, and it’s that a movie, in the kind of a special part of the control episode, but all of that, he’s part of it, but</s>
29
+ ===== sample 7 =====
30
+ head_tokens: [',', '▁and', '▁obvious', ',', '▁but', '▁it', "'", 's', '▁unlikely', ',', '▁it', "'", 's', '▁', 'a', '▁surprise']
31
+ tail_tokens: ['s', '▁', 'a', '▁game', ',', '▁I', '▁can', "'", 't', '▁play', ',', '▁with', '▁', 'a', '▁couple', '</s>']
32
+ , and obvious, but it's unlikely, it's a surprise. And that's the way of a simple s, that's the opposite that the tie the announcement of a strongly game-e, all that she's not it. Clips, the he's in what he's not a sta there's something that's in the process. There's, now, it's a problem that he's better. - that's the ted in the art of. It's a couple of that, but that he is st variety of all's sucky in a third, but, not what it's. He's in the s sadt with a suck, he's he's t, and it's not that the he's. It's merely to be that she's still at a way he's not good in se, but, it's logical to be a while the st pigs, on the way, he's a way he's in a in the size of him. It's something that's not that tuck it. It's not the combination - , but it's the stuff that it's not in a place, but that's not one of the s, the re-as of the same. There's that it's all the solids of the fact, he's not a sty of the world. But for his kind of he and he perform never slots. It's still ssed with a mide of it's, though, but he's exhausted, the - it's a st. It's bad, if it isn't very simple, but it's, though, fun, it's eventually, it in a fight with the g's of the system, when he discovers he's when he's in with a bunch of fiance, released in the sps of the shy and one, of course, and that's always a rapid the past ends. And he says, what he says. It's a based on the same field of the defensive ends, all, they know it as the et of the gss is the episode, a t to be a part of the sult like a - he's like alle of all, the shes. It's, like the tuck a sh, it's the longest a center in a fight's. It's a if a fan of the bads you go, that's a a little fan. If I don't a headlines, it makes a trading kin at the first question. It's, so that it's harder to take the face of the prees of it, the way it's. I don't think if he's good that he's not in the appearance of the player's a character in the world. It's that it's a matter of the guy. he's heard, and he's a question — that's that question of a each way, for a more of the likes of this is to be a mode of if it's a rock. I'sn't in that road. It is that the story of it's egie, and that's been consistent in that. It's time to be out of course, and the fact that's a lot, and it can benefit of the fact that it can't hold it, and it's not the lack of it, because it's going to be a part of the discussion. But in a fight, there's a way of the way that in the days of a ae. Later, so, if the he's t on a new sn't matter, he has, that's even not a nices of the game, and it hasn't taken, he's the front of a sttage. But that's a game, I can't play, with a couple</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64const_temp1p45_decode128/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['’', 's', '▁the', '▁battle', 'm', 's', '▁of', '▁the', '▁instance', '.', '▁', 'Friend', '’', 's', '▁work', ',']
3
+ tail_tokens: ['▁authors', ',', '▁', 'jo', 'o', 's', ',', '▁and', '▁', 'a', '▁like', '▁', 's', 'is', 'e', '</s>']
4
+ ’s the battlems of the instance. Friend’s work, is a bit of one, that’s the one of a see and Anase who is a toling of the female-saees. He’s the clear that he’s a die. And it’s to be a part of the catchgan. He’s not like he gets the best of the gang of a armed in the beats, he’s the raped the rampe of expression, he is a lot of the dogs coming. It’s a clear example, seas, he’s rape in a se rape a canes in the likes of the squate. But he’s essed, well-heed words, he’s shot up in a srape, and it’s he’s clear. It’s something that’s not a sup to be conservation. It’s even like the she’s a human rape, if that match he’s become a fear. Why, he’s close, and the outside of a ye, he comes with a way of the likes, if he’s in the lie, he think he’s a sneake of a limited sort of a rape, and sometimes doublests. There’s a thing that ses like a rape-se with a tape of something in the stead of an era, a rape, and the shees need a e-sy with a girl with rape, and he’s familiar with him, the options, and a lot of feat better in his dogs. It’s set to be free, but the she’s a big-sem. he’s see him, he sed, he’s on the heese victims, who’s in his best in his Pry. owing the smumbles arise, he’s made in a se, and he’s. “He’s actually the sort of angivinge, and he’s not doing. It’s not personal, and killing, he’s bad, that’s the woman in the world. It’s a a little sort: becoming something of a smishment, he is a reputation for the criminal and violence. There’s a case of ae – with a steee, the edge of the edge of the finding of the bee, it’s always natural given, he’s taking the fantasy of the Pirates, and cold trust a marine throw a e figure in the better e-e. It is that kine. It’s also the slup with a nery of a humane, and being in the size of results. It’s like that by a ess of violence, in a way of the cuese, he’s a te on the sease side in the sex, because it’s such a nesey. It’s like a sed, rape of the kind of a se, and he has a little-up with a part of the battle. But hees is like a sometimes, even the screamy. It’s a smrayes, he’s in aspers, he’s anes, and a rape victim of a death, or aone. The hat is a bad victim. He’s take on a stasse, with a mite. He’s, he’s throughout a killing though, it’s in a ze, after he’s bad sney-barment,, in a lights, and that a a aess-ee. Besides the rapes, so bad, he’s in the world of the end of the sports rap. And he’s seeing asm, the ner of female authors, joos, and a like sise</s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁of', '▁his', '▁mind', ',', '▁', 'a', '▁potential', '▁', 'he', ',', '▁', 'he', ',', '▁', 'he', '▁like']
7
+ tail_tokens: ['maker', '▁', 's', '▁and', '▁', 's', 'ex', '.', '▁It', '’', 's', '▁', 'a', '▁way', '▁of', '</s>']
8
+ of his mind, a potential he, he, he likes, and the kind of fear of a rape, he wants in a soy, and that’s the kind of experience. Still, I want to be a smmed, in an appropriate way, and it’s not a few of if he’s just going deep in that direction, he gets the fact that he is thrown by that if he’s engaged that he has a mus in a confrontation on the swoopator killing of the sidees, earns justice, and if that’s him, he’s simultaneously engaged, a movie, screams all the creators of his face. The last type of sex in the he’s a way of hand, and a note in a Democrat. He’s a whisperation of that is a gu in a second, and the sem , a woman showing the good of the ned hair, the form of a sne, and the sth re-star of the office, and remains a living in his life, in a ring possible, if he’s in a hand, but the one that scares a sbal ties of a hoabe, where he uses to causing a operative, a headps of the gut, and the s of a sex of the time, sliter the head of a watch, he’s a ear, a black man, that he is a wing, sometimes, and he became aware of a mind. To the time, the top of a sy, ward, just in the shadow of a sex of a sex. Fortunately, and that’s not matters though, it’s a joke that he’s heid and sex together. But he, a he has sya, and corpses in a sey, in the sering rings of the epics of sity, he is ward in the country’s Christianity. And he’s yed by the likes of the current s. It’s this is a gle, w, and killinger, ta s something a bar of the society. wing st eaten a briesttention, the rest of the ache. In the son of s, a sex, the s, and sts, and note, he’s it, he’s sexs on the variouss, he says it’s a dark a sour favorable, a woman a sirkyelike who is the sex, sor in the sex, and that he laud in the way of the fluids of the stenders, the theory of the cont est of the dead, is a sex, jump from the es, that he think if a sex in him, not a s. Set, he was that he sees in a sex that he has the hands of a week, he spends if he is a ty sex that’s one of the potential in the audience of a fe, and the sad of a good for the woman to love the wealthy, that he gets to make a near, like a a woman in a sex bar. Earlier of the point, it’s a more good way and in the bar, a rape of a color of thelikes and the well-heems. There are the emergence of the likes of pigs, quality, and syes, and the a shat who’s sex joins, sit in a sesm, magic and se a bal species, and kills, in the days - a one-een. Finally, the more upseted of the likes ties, and hes of the feet, the creations a the sex. It’s not the ses, for a full-maker s and sex. It’s a way of</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['’', 's', '▁an', '▁part', '▁of', '▁guy', ',', '▁', 'he', '’', 's', '▁always', '▁', 'a', '▁total', '▁of']
11
+ tail_tokens: ['▁with', '▁', 'e', 'e', '-', 's', 'y', 'an', 'e', 's', '▁', 'he', '’', 's', '▁', '</s>']
12
+ ’s an part of guy, he’s always a total of his player in rape. That’s, he’s a feasted Catholic, he’s even realized that he’s called a guy. It’s a man. He’s armed the movie, he’s a a while of the lingor, and, he’s in the character. he is a a contract as he says he’s leave him, but it’s he is not he has a sues. If eating sper, that’s, he’s seen that, he wants a way that he’s through a dirthood and that’s not part of the nuances of differences, because he’s done it’s a popular character. For him, he doubtes that’s he defenders, and it’s difficult. It’s just what he’s doing, he’s a fight. We’s know it’s Bettity, and he’s the fact that he’s made the father’s in large, and he’s like. It’s he’s a violent, and even nas a result that’s because he’s something of the fights of life, and that it’s note that he’s a man with a widere of the same way, though, and it’s not played by a ese in the fact, a isn’t fair to watch a strategy weapon but he’s not as a player and primarily he’s a level of the Xe, which he someone’s forever and whether he’s in part, he’s clear that he’s when he’d feel better. While he’s supposed to see theody of that he’s a story. He’s a sense that, he’s seen he’s a surprisingly. It’s that it’s obvious that he’s outside the connection. If, at the time in a level, it’s just a spphane, which is that he comes out of a he’s actually, but, in that moving, that it’s not the office of a person’s largely, significantly, rees, and he’s a large walk of the way. It’s a turn of being a a pig, he’s an aese himself, he’s done. But he’s the fewer of the forums, it’s a kind of dealing to the reason that he’s even in the sense that’s not something that he’s a game. And he’s the way that he’s a se. He’s that he’s done against the kin, and the level that he’s in fact, he’s at the e of the Obese and he’s a job, he is aware, it’s in the plate. It is a learning though, but a few one of the different players. It’s something it’s clear that he’s going to agree, and it’s already it. He’s in the details of aker’s pitch in the creative way of the character in the fores. I aware that he’s not a clear connection with the re-ed lineush, and his battle insides, Osees. Yes, the woman, makes a clear cap that it’s aware of a certain story. For the ripes, even he’s ill and a d Forese, he’s a aw. But he’s a reason that he’s such a fools, often he’s in size, clore, and more of the guy’s better. if he’s made ese in a movie, or, with a zeee or a species, but he’s someone that’s a way of nearly special, it’s the fact of the aeese expressions – that he’s an expression. In the way, the key is a lot of the guy, it’s just a likes with ee-syanes he’s </s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['’', 's', '▁close', ',', '▁but', '▁it', '’', 's', '▁more', '▁significantly', '▁positive', 's', '.', '▁It', '’', 's']
15
+ tail_tokens: ['▁', 'a', '▁', 'e', '▁in', '▁', 'a', '▁bar', ',', '▁', 'if', ',', '▁', 'a', '▁', '</s>']
16
+ ’s close, but it’s more significantly positives. It’s like a little sye’s still in the past action, and he’s when it’s he’s a zeesse, he’s not at the head. But then, it’s worth to see that he’s seeing the ses go to be another pigse se and he’s leave, to be a sey of the charge, but, of the day, I know he said he works before that’s stuck. And, in the case of the misses, the general se, and I’m gonna that I’m serve in a night on one of the greatest part of the bees, one that is reading it. It’s loress in the hees a snorciss, and the see, with a subsequent es that’s a part of being sty. It’s fun, it’s not that, that’s not a matter of ee. But sems a ne of sed ending, I’m not going to the se in ae or m. — it all, cut, the steed might escape, set in the mix of cones in part of see, a double mouth, es, and pigs, overall, wide, era of a weak aires in the hee system, the snes ... if of the ties, based, and killing. What’s see in a eseon, is that it’s the sm. It’s just as a seee. But in a rape, it isn’t in the set of the way, and it’s a harder part of it: it’s the rape of the reason he has a ham. It’s seosement with zeees, the animal works, and rapes of seed with thes of the esese, with the etses of a small essese. There’s more like a far-ed mix of tasked, one of the best seese sae of the beast’s a se. Grasas that eysectors in szem, given the mouth rapes in the nese, who is a point of a possibility that maybe, it’s made a good investor raps in the head, and improves on the potential of rape. But it’s known as syelike that’s the one that if they’re a few dogs, but it’s laudes a shames, the ee’s band that shards, the size, front and e in the way, sma fiction, so it’s getting behinds of the issue. What’s funny is that sed is so, it’s not an seed. It’s not an even more zeee with the edge of the woman, it’s the Chicago woman as a victim of ne talks with a little hamed and you might expect it to forget again. It’s a a — assechess a rape raps to be a sese: the average seon see se with rapese girl. I can’t be taking it, but the se is a new woman, and the field is expected to sars sm. It’s a net with a sewline of a larger mouth of someone, in every mouth of a boys, because I’m reminde of the best, not a seose — that’s stronger. It’s not because it’s a chance, but with the clean support of a man, a rape. And if better in the moral ance of rape, it’s asee, and a perfect a guy with a e in a bar, if, a </s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['pus', ',', '▁', 'a', '▁', 's', 't', 'at', '▁outside', '▁the', '▁body', '.', '▁', '-', '▁the', '▁grave']
19
+ tail_tokens: ['▁the', '▁future', 's', ',', '▁just', '▁in', '▁the', '▁', 's', 'lum', 'm', 's', '.', '▁The', '▁authors', '</s>']
20
+ pus, a stat outside the body. - the graves smning a little ate - by the vists. The introduction of the show’s view has examined the one of the show’s present in the rate, and to the part of the vigilantes, s roded by the sn on a southern eye. It was the sleeve of the front and a backy of the development of the IPAs of the heats. The love of the ne is only meat with a groomed mummy. The body of the witches, and the landscapes of the range is the largely displays of the center in the males. However, the styes of the Marines ow to shaped of the stvical body, a sty, sy, and a full sady and yems, sm from the woman, and then forced to the ft of burning and s on the beds. The smas, a man with the sty of Jupiter, used by the fese of studying projects, a body of the floor in the pubs, and in the fores, a device to a small respect of a sancel the sed by the locals, a formal sty. The extraordinary model of a web of ses of lemon, strong ear in the passage of the st: arow of rear-the center of the body mark the end of the early rey, whose like a center, king, the focus and graduate of a king, dated in the way of the first. When the artist’s wore, the strted, the space, a blackster, the plains, sat the end of the sty and his yemium. In a while in his letter, he yed the experience in a white mouth, with a tall interpretation of the fed and also described by the origins of a fts. It is the source of the esy, such as vos in the bodys, based in the sed, a process to be purchased by the center. rely, a stilline of the body, a styes that sometimes beed by a rare st in the stamines of aule of the world, it was the top of the wedding of the center in the world. The sy of the delicate, however, as wide sys in a sand toe a oses. ring, old, the extras, and relined to the states. The nma backed in the bar of the center, from a fer-stall dist from the a party, the proposal from a small center, and the ty-bras in a ringes, males and s them, from the center of the a center in the class of the Wild of abvisable to suit up to the home of the shets, with their sty, and sas and that the sts the sts of the fronts. - a sty vertical break and prone to the visitors, in the sions of the death of a wing of money. With the victims, sites of setting, the sye of the habitats, the leading and ward of the first result of the center of the center, a nerves of one of a determinedlion of his s in the world of the ches. In a simple mix of response to the dark houses, a form of anti supy and empires, a sats - however, as it dwell from the absence of the honor of the sasey details, called the giants of ty, and in a few morning to a letter center a slt. The experiment’s use of the pusheds, and centered the best of the ten of the design, but in the London that were King to the center of the century, es, and the shuts and slical s in the futures, just in the slumms. The authors</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['.', '▁Meanwhile', ',', '▁however', ',', '▁it', '’', 's', '▁the', '▁end', ',', '▁it', '’', 's', '▁just', '▁']
23
+ tail_tokens: ['.', '▁', 'Apparently', ',', '▁', 'he', '▁has', '▁', 'a', '▁certain', '▁range', '▁of', '▁the', '▁man', '’', '</s>']
24
+ . Meanwhile, however, it’s the end, it’s just ner part. But in that – it’s how strong he’s at the olds, it’s the second. And he’s commit this, it’s a way that’s where an concepts of the ball, do, he’s making a part of a way. He’s that if he’s out of the experimentes from a star or he’s it. So, he’s admit he agree, he’s got to work, he’s the way. It’s a job – he’s thely, that’s ant. He’s one of the ase audience, he’s not in a part of the episode of that he used. I don’t think he sees he’s been even a possible game of the tackles, and one that’s a secret stage of the way, and that he’s the plate in the game. And that’s just necessarily a part of the fores. It’s the eede’s, a shee of the guy’s player that’s the victim of risk. It’s he’s just a bad player, but he was a caste, a man, and a way of approach, it’s just sort of explanation, and he likes he’s not at the relationship between him, and it’s in the near future. he’s a damnance, if a snae of the game, but he has a confident within the same cut of the game. It’s on the game, it’s not always he’s emotional – and he’s able to watch, and that he’s a couple of it. When it’s a second, he’s like a second and more a movie, he is not at the point of his stees him a a theory. It’s not better, sper, of that, he can’s fun of a character, it’s like a piece of the first. The man lacks the shee, he’s demanding that, and it’s the eise of a clear, though, and that he’s loved, he’s not the shadow, and that’s clear, when he’s on a floor of his field, in the bleed tone. But that’s just aker approach that’s part of a pure jars, forward, and it’s important, but he’s plays to the human games. And that’s something that he’s not that he and he’s got it’s the way of himself, and it’s more downsed with the next budget guy, he’s in size. It’s not seeing a fear of weight, he’sn’t given to the second method in a w. And that’s obvious, that’s the first, but if it’s a case, he’s a two-wakere legend. Just if he’s a favor and way, it’s something. It’s even a game he in the second-up, and it’s the way. It’s fascinating, but he’s taking a se of it’s not a kid-ad effect, and it’s both the inside out of the way, and it’s just that he’s not doing that of the attention he’s used a second inseee’s of the game that’s going to stand on the field. Wes 68, and ill, telekings, it’s actually in the sesing, he’s a one and a goal. I think it’s not a thing to play in the way. if anyone see something of that, the way, he’s a great a target. And he’s the guy, certain things, in fact, that’s not a special threat to a woman likes. Even, he’s any of it, he’s something that’s in favor, positive, at the details of the ed, on the basis of those other speree. Apparently, he has a certain range of the man’</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁the', '▁', 's', 'e', '’', 's', '▁', 'rap', 'e', ';', '▁', 'he', '’', 's', '▁close', '.']
27
+ tail_tokens: ['▁criticism', '▁of', '▁possibility', ',', '▁is', '▁that', '▁', 'he', '’', 's', '▁himself', '▁of', '▁', 's', 'm', '</s>']
28
+ the se’s rape; he’s close. Now to the sed bar es, a day of a lot of events, and is a fares a cliffed, bait that he’s not better. The squa, pinned a second stos iness from the s, a semment, and the best of the base of the meats. In the bar, too bad, if he’d let the head of he’d a zes a form of – in a special-medans, he’s the problem, and that he’s eeses of the eas, and the pitch of that sea zes. But in a character, a sse, a hat, se and chess, and he’s caused, he’s a ss of the sex dares. Because in eightes, along the defended of sighs a human ees, the raw’s a ese, a operative – and the love of removal, as the ception of the predator’s ear, a swacy of thegan’s last stronger, a se and rapes, and every level, as a shads jumpgame, that’s associated before, with massive es of the tinator woman, and res of the hees. Yes he’s a tape of sevare’s hands on a floor of success, a se, dynamic king, he’s hard to see on one of the s. It’s the sese, a huge hatm, a semmed of his weights at the face of – and despite a es happy that he’s a special. Oct, if he’s in the ground, with his eyes, he still a ste in a man to build. It’s not a sad that, he’s hard as the second out of a man — and his ability to learn that he’s a a year. It’s no display that he’s more of rapes on a stand size, in the squax, and that he’s actually resad, he was a bad that he’s punches. But he’s another in one of the e, and when a see case, he can be ward a saes with rapsees, he’s become a dongsee. But he has a place in the size of the business he’s at that he’s as a champion. And in a moment he enjoys, he’s a shardt of a eseon, like a weight of zes, sa ese in a clear one that’s in the shees, like a second ten, it’s the way that’s, that’s arguably if he’s a se in the set of a screamed to show the se. And it’s a way. But it’s the human, but in a way of se’s strength: I’s seeing the rate of a bees, a smasper like a huge sa sequence, the es thegan and softing the star, and the front of the st of the head, a though he’s front of theunks. He’s no on a sex with piges, and it’s a woman with a ee of a clear way of a scheched in clashes, and saed diamonds that’s the way, that dated of the sexe’s that every fall in a woman, with the s and sex, in the time Friend’s a rare, see, if it’s not able to think he’s joined pinned by the criticism of possibility, is that he’s himself of sm</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['’', 's', '▁', 'a', '▁few', '▁years', '.', '▁The', '▁only', '▁movie', '▁climb', 's', ',', '▁the', '▁child', '▁']
31
+ tail_tokens: ['▁', 'a', '▁comedian', '.', '▁I', '’', 'm', '▁like', '▁all', '▁of', '▁that', '▁into', '▁', 'a', '▁woman', '</s>']
32
+ ’s a few years. The only movie climbs, the child compared to the planet, the Community of the show, is based in the dating adds, though he has a persistent and ways of the sm of the mouth, nas, it’s a question in the review of the females. The system, the crime is key to the st’s, the base of the reces the clues of his loves, and that it’s a humanity — it’s a st of the world. But that’s with the nas, and the start of sm, the st of em, stering the child in the system. The smear’s a result of a horn of the mother’s of nees, the a te, nese that’s saes, the camps in the main bar and ances of nem, smated, the feet, and a a smaes between a traite, with a king in the set of the fronts of a sm. The king line, in the finding a matter of fines, and with a sw, stil, was a product of work — in the sea of an experienced mear center in the range. The revival of the time he had a sex, that’s always the ets soon to be a part of the fight. Well, with the likes of a hamumbling, a stead, and a near-ite box, and the he was stly, when the chests. Damnes, a group of a sees, pulls the point, he can’t start for it, and he’s evident in the sm, he wrote to the edge of the ne-teen of a smm experience of the tem with a second startoff, the losesetes, the comms with the likes, eat, and a fe and whits. But in the mostich, a sne in the face, ace by the same, a s between the steres and the sister of the night’s office. And he likes the he’s only living in the good greaking of the show. And, though, the question is the kind of a rare product, the show overcomes like it’s a little interest in a smlking, which is s to take over a couple of years into the body of the rest of the giants. There’s a nup, the show’s just be snailed to be a thing that looks to understand. Despite the size, a ne’s personal, the fact that the acting’s a lot of the kind of contact, the st the jungle, king, and burns, of the bees, that’s bringing the sland at the whole face of the job as well as it’s compared to one of the face of the left in the ne. That’s yet to be a game that the sm of the brims in the water psych by preserving a snm. But it’s the idea of a preparation approach of a second, snedos, with a brand of sms and sters with a source of the center of this report, it’s ward by the number of ne in bems of the world. In the first evidence that the length of the end, the meat saves ties the sam. What sm’s in a little — that, he’s a part of the question that a single Hoffman, the woman involved in the fall’s base, king em widely buried, as a bad mother, in the king, loring sense. The show, the set of a sne goes as a way, instead of a kind of a nive, center cause in a shift of m in the world of the se in the eld of the hips. “It’s a ne is a comedian. I’m like all of that into a woman</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64to1024_sqrt_temp1_decode128/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['s', ',', '▁', 'a', '▁meat', ',', '▁was', '▁in', '▁the', '▁end', '▁of', '▁myself', '.', '▁He', '▁maybe', '▁it']
3
+ tail_tokens: ['s', ',', '▁then', ',', '▁and', '▁his', '▁', 'a', '▁kind', '▁of', '▁', 's', '.', '▁The', '▁', '</s>']
4
+ s, a meat, was in the end of myself. He maybe it was a way for a good human. In a week of the sists, his a sea to a -e-cast boy. The show, in his day, d the way he came into a group of two from a hate of a and sts. It was rot the front of the s of a vs, and his explosives, the sese of a face, as the eas, then, he d in a swin, opposes to the men. In a fight of a , he scream of the ss. Instead of a dared of a doubles, and shee, and the shee, though he'd up it to him. Thes, he had ted by a backs in the better whose he had the himself, and the question of s in a out of the gun. Today, again, the opponent of the show, heard the sting a game of as, the betters, and the system, for a orthodoxs. he paintd sacked, and the copies of a examines, his say. What he fed that he viewed a st cut of a criminal game, with a sttins of a 5.6s, soon, and surrounded the rest of one, he felt the sed in the hands of sted, and a body of the fed the master sas, in the h, with the corals of the great, construction of the he s. What he was d by the king, and a better game of the s and s of a t killed st to the himself of s, a, ne a st and std of the sene. Suddenly his eyes, this, in his raps, he convicted for a sned, a e-se of the s ofarrows. There was just sned the room t at his range, with a red, a matching - to his olders and his face, the s of st, with the dking of the x, who of a fucks was a black, st, because of a st at the office of the ees. The one of the readings was a good owner of a king that of the cues and s in the end of production, he was killed, a a rat, and charge of a a senhetees of a woman he had a sy center in the interior, as hecutts of a like e, cut in a decade like a he, ste in the face of sex, he made to a killing of his stes in a fart in the streets, it was the etts, the severed a normal, and a, while letting ring a good center. In the end, the slike a snee and the interior of the prospect of the tens, to the back, sming, the resulting, and blowing in the he sped, and, in the s sa his reputation, a more man, and the & sy, a st for the use of the story that the scares of the hell, he was the stroke of a sl, and he s nothing the end, he sd his stabs in the st. Only was ad, and in the face, shee, and the loved to be a small whose ste. Then he was for a he, he dated in a e med t at a ste and tek in the cause. a ring, the s of a deep in the sif-wite, the reason was he had as in the face of Pries, then, and his a kind of s. The </s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁of', '▁', 's', 't', 's', '▁to', '▁the', '▁persons', '▁of', '▁the', '▁', 's', ',', '▁and', '▁', 'he']
7
+ tail_tokens: ['t', 'a', 's', '▁of', '▁', 'a', '▁few', '.', '▁Finally', ',', '▁as', '▁', 'a', '▁collection', '▁of', '</s>']
8
+ of sts to the persons of the s, and he was a yanced, a s check, as a the act of the department of the st, and made the , acast of the a sulity, and sa-sa from the rodaks to the death of the cat, and a planet to the character of the designer of the s he comes for a. In the center of interest, the eyeority was to be a letter. This year, he escaped a woman, he's a a culture of a say, the g and a chilling of a m as, a boy of death, a leafy of a king, the bottle of that covered, in a part of the dark, this, a sbeed a. The day referred the s of aring of a ws, a night, and it was dozens of the stort, and the sts, and the witness of the sts of the men. During the skull, he was uncovered to in the attention of the show, the res of the sleeat, resulting in a group of the ts. The death of a grass in the experience the world of sa to a m, that of a slet in the sable to the show, and the sse, a sat of the pushed the sculpture he sta. Then, a red, in the s of the ss was in a small-state gestures of the kid in the old, and the time of he was seen in a night, and unlike the he went out of the curse. The he started, the room was a starred in the sealed, st, the face of a new-armed friend in the hem of his face, was a nearly lee of time. When he sent the report that by soty look of as of the paint was cited the f. It was the potential of the gos, he rushed by the one of the villains of the headed, and the primarys, to by the end of the generals of the latter that he tried the smiting the office of life of the First, one of the s in the top of the centers of the s of the establishment of the a century of the Stephens, by second, was a pair of the tight, of the form, sexmed the ground. Finally, for the s of the s, and a st to the s and the juice, and lettingwards, from a miracle to escape. The shere, a sirk the heart of that of a circle. The mouths of the st in the st, and the ny, st, and the s of the north of the shift to a hum between the chess, a survey of the bull of a dhole. The establishment was a child in the a king being rape and the killer expects a quest in the ts of the sts of the car features. The result of all the fas, the body of the weddings, he makes the afternoon of the fts, and the flood of tyings. In one, the hes of the ground, he he s the streets of the sequence, the sw- his eyes, and a s the ss in the ss and the t of the feet, Unlike the back of the s, it was the 80s, and thedas in the water, populated by the osoas. It was to be a female sned by the s of the field, a raping shem that the s would be not in a good day. Then het in the s of a s, the die of a mind. The s of the rest of the face. Its in the cookings of the st from his tts. But this naing at the stranges, in the ts, a form of a shaming, in the tas of a few. Finally, as a collection of</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁', 'd', '▁out', '▁of', '▁the', '▁violence', ',', '▁in', '▁the', '▁spread', '▁of', '▁', 't', 'ock', 'y', '.']
11
+ tail_tokens: ['e', 's', ',', '▁', 'n', 'e', 's', '’', 's', '▁eyes', '▁to', '▁the', '▁mid', '-', 'm', '</s>']
12
+ d out of the violence, in the spread of tocky. continues in to the bottom of the brain’ of the days. It’s a way to, if the hell and ax’s ille of weightes he’s a change where he’s his head, and it’s important to s by the fact that he has to be the brain. The alien’s business of the skull of the saets. In it, it isn’t seen at the moment is the top of the sse of a female. Among the society feel like a st of a atrae, it has a a set of sections of a black st, s, and by a eat of apings. neds of his own, this way of a aely-ex. The to have a splee of the s. Throughout a y, he’s not a t appears to be a one of the owner’s a little eing a ne, a, in the past, he’s he resides in the s of bad, a brain that ste the . Hunt’s that it’s is a tight tole by a note of raps mates r in a riot, he is avoid of a e. The seem that a boy, as if he is a man or outside of one of the center, the he’s resuck, of the days, a flight of the himself. Another was a good image that act as the leader eat’s battles, and his face, in the as in the world. But, he’s the one of the reforms of the feat, and out, he’s t-y ice. It’s about the choice of the yeon, the e s90s in the scenes of interest, he’s ading dogs to the match. It’s at the most st scandal. He’s the a slars that’s the sma, and a a cliff and a smas, with the guy. It’s he a trail that a’s that smad to eat the base of a s, and thats in a fe-as as, the rest of the zee-es. Is enough that meant ems with a . — eat, of the throws of ax, that’s a little that’s, and a surroundeds alike extras of a eat of meats. Attacking, he seat to make a zes — double-se-amols, the argued that he’s to sumbling in the horse that, in a decade of a tesye that ae; it’s in the part of it. It’s a a nelual, a bad smuae in hand, a dny nass. And the bottom of the face of a crim with a rut. it’s a boy, and he’d want to show the life of the se of the man of one. The calm answer to show, that was a a mind of peed a vasurprisingly, the face was set of a shees, and inss of aedsing the one in the head and agan in the face of his death, and rapes, and a the ty of – with the nerves of the he was a woman, who kills a ; in a lot of the world, a nes in the mistake, and a inspired a sms to the end of fall in a asks, sew, a trail of a mind that criss in the face. The, that relying the fight, and the greatest of the years that of the bees, of a way tos to his base-sing relationships with a battles. Like the world of the game’shees, nes’s eyes to the mid-m</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['s', 'n', 's', 'e', '▁of', '▁the', '▁three', '▁years', 'day', ',', '▁and', '▁', 'a', 's', '▁that', '▁it']
15
+ tail_tokens: ['▁', 's', 't', 'l', 'o', 's', '▁of', '▁his', '▁', 't', 'e', 's', ',', '▁not', '▁to', '</s>']
16
+ snse of the three yearsday, and as that it was a only of of fraudssion. In the end of a watch, of the Divisions, one the first, and the t, and of the power-in. In his es, one of a appearances of that in the center of the world, a ten, a group’s ta, and ast and a earth to te at the ideal. eat, a manhets and innocents, not - destroyed, woman, he had been free, and his illpiece and, in the exception of the culture. The am, the det, and his followers, from a man, a walkas, he, - was a bit of the pig- who, aright, and a solid-luges in the s, res on the hands of their preparation, a tors of the readings, and everyone in the name of the es of the Al’s a group of quests, which is that the te of himself, and the world in the a du, and the s to gt, the front of a n tens of the two of the ten. he was sent that while from there, and a in that time, as it’s plate to be half-. In the soft-e of one in the re-t the care of the inse of a the price of the es and - a human which he was resedae. Well, of this, close by a model, for a strikes, and he had a form of eats, which he was at a complex center of a te of the first of a et and kinge, the dead of it, and a two of the ances of fear, he discarded, and eyes and rapse. The he uncovered the life-workes, which he sent the shells of the fores, ne of the marines of the se to the eyes, and that worked as a st-up. After a he, a day of alooking he and the het of the colone, and, which he was in the back of the sot, he therapist, toe, of the s - on the killing in a price of afe, which the nearby of the civil-eaed survived was the years of the e. But a assina was a man, who was a man of a dark, he was eat, a show of the he had just abused the life’s, and which he was a character of the collection, and that he was no, and the relationship that - little, he reed by man’s he was in the construction of a black woman, he had been in the years of the rest of t-ne that he was in a concerns of the reta, of a cigarette was the first time that he forced to be a te, of the thirds that he was a dark meat. n, a well a long-up business in ... he was a able to a ne, and he lives the plain of dead in the bar, to the woman, a fore of allowing the edge of formation. Now, he came to the sy of a ehes - the sad sequences that he hadned by a t, allowing the a stnes of his ances. As he, a pre- and a - and the making of the title that loved of his meat, and the es ne of the death. he was the fear, and a soon as he began to once, and he s the one of his projects. But he was ever in the system of a half, rape, and suas, - as a system, which a man was a man of his life, his tts, to the ringe and end of the millions. The life of the - he was ste in king of unique to the stlos of his tes, not to</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['d', '▁', 's', 't', '▁in', '▁the', '▁', 's', '▁in', '▁the', '▁direction', '.', '▁At', '▁the', '▁end', ',']
19
+ tail_tokens: ['d', 's', ',', '▁and', '▁the', '▁', ',', '▁was', '▁the', '▁', 's', 'o', 'mm', '.', '▁The', '</s>']
20
+ d st in the s in the direction. At the end, the ot squat, he was transformed by a dog, it was the most for a for season. The father, a form ure's, in his st, with a walk in the ground, he a dulch to eat the sign, a fielder of the head of the level of history. The reason in the absence of the place that only, he s to the next time of the end. There was a long of the s. Some of the ground was there, a she in the future of a white woman. fact, that he privileged at one of the ts by the end of a crest. It, he ranged of a sly, sness, the s, and the only jos, ste, a touching gs of his saved s, and sny as the water in the end, he was a one of the bunch, that he was the cast of them in a prey of the importance of a water at from the classroom. The filling of the behaviour of the stty's, but of a armed nationally, was salee of a tbe. he was on to happen, a year of office, the killing of the sts of Michaels, he's king. Then't all in a damn of drawing, a sts of abit at a ful. In a walk, a game of the choice of a space, and he took, he had been at a mouth. That was a stop in his room, and he he d in the world of being built for n by a t at the t of the day. He grew up the reputation of a band, which suddenly ees in the death of a number of the sa, and in the time that it was able, it was a star to the Nature. After the eyes, the she were a no-sy, and a he in which. After the last recent years of the st, stawing in the tides, wedding with the ple, a decade, and a life of his retreat, and all, the stes of a victims of the sf. It's not the old, sor-abbed a professional sex, boxing a body of a ts, a with a throw of a state of tie, he stayed into a sne with letting he, a a lor, and the wife in believed to the one who he managed to to run the first. The 1960s had a living in the center, the group of the vs, to take a new ground of a sty, and the art in the face of the head and sees to him. , on the one's, and he was the sentes in the end of the face, remained the one, the importances. He was adye, and gst the way he was the face of the force's st. It was a point-reach, and his body of a one's to approach of death. It was a huge g. It was a the boy of the future, moving to love by generations of the rap. n by the shand,, a sy, and the sted, but in the experience of the snes, and a establishment, as the his look and a covering. Then, he was, he loved his s, a story, sny was a st-up, a woman, if he began to again. Then was noting the guests, sasp, which at the s of a protection. "What's in the nel," he was had, he's out of the wound. Oh, at the living was a dos, to be a sy, and if it was a little to it. It was s the sle of yeds, and the , was the somm. The</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['s', ',', '▁indeed', ',', '▁', 'he', '▁was', '▁the', '▁only', '▁', 'com', '▁', 'ted', '▁the', '▁system', '.']
23
+ tail_tokens: ['▁his', '▁', 's', 'live', 'n', 's', ',', '▁', 'ten', '▁of', '▁years', '▁of', '▁his', '▁face', ',', '</s>']
24
+ s, indeed, he was the only com ted the system. When he ye’s s to the t, he ed. His woman, a photographer was grue y, to leave of the ft of them in the olds. Thensts, he stood a t in the - and st out of ast. But the n s in his life, he was shed by the Queens, the rest of the face,, and the group of the man, a the st aside, he was still a second to be ahalod, the s, was in the pipe, with a three, and a thousand hees. However, the presence of the excellent snetes. In the interest, a decade of itself, he teed to the gars, and a rest from the dishts like crew that crasts of the ss, the set of all the steed of te, sn, in the mind, and of the bar. Up the eats, it was a to the horns of the ring, f, a living a muscleabbes in the s. less that the tes of him, a ts of athirds – a bo that the tia and ring, a Apparently of the ets on the rest of a sory By the techniques of the s, a ful lion, in the rees. Golfs, of the count’s s when ten-sts across the tender and sing a search of a face, sned Th, and pulled a s. Some were the two in the upper’s, st of a post, a aee, ab, of which was in the evening. The one of the biggest in his adts, was agged of the s, in a badhouse of a big woman. For a resulte, the front, a double re, a free center of the animal. The During the sne, a catch of the sst by man. It was a language, a life of his bicycles, and he eat. It was a s of ear, by the es from the kind of ones, and a noteing thousands of the sts of the parts. The copiesed in a man were syes the predator of the ground, he was ed in the Lyons, from the mrous, and the patrol. After the first ts of the st he was a bar pins to the trees of the body. His murders, was that in a woman, in the slights, a et, fought in the sts, and in the excellent net of the exert was te, the wide, he was not the s of the sheep. The nerves of man, a planned- show, the ty-sing a he never sty. te, he ring ste, a sus, and the . — but, he was just by the custom and dead of the bar, he’d cut to theists. In that he and his victims, a es of the skin, the ground was set of a dark cut of bearing and he’s. The he se with a ton, based in the interior of his end, and it centric, it was thrown a watching afternoon of the stake, and it in it. The sifed was a wn, he had fought of the Bos. With a n nes, in the shape of ainess, and one of the worlds, in the mouths, and apin, if the long, it was a a a car, a - xes. He was this evidence, as a sellingy, he ed not in the front of the he e.. Now he was a good-n-ing of his hands and his slivens, ten of years of his face,</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁and', '▁that', '’', 's', '▁', 'a', 'pan', 'e', '.', '▁So', ',', '▁', 'a', '▁jungle', 's', '▁of']
27
+ tail_tokens: ['▁', 'a', '▁wrestling', ',', '▁agree', 's', '▁', 'if', '▁it', '’', 's', '▁', 'a', 'd', '▁', '</s>']
28
+ and that’s apane. So, a jungles of the obvious sa of sense of justice. In the weight of that of a as, it’s happy with the kind of strength and a a perfect for ah. It’s a g-off to be a d a ts, of the questions. Up to be the form of the excellents of the second ase of a’s body reads of the image that he’s of the intense, he’s, though he’s the base of a ill outcome. But of sms that’s the s, a reason that the herous —like a stucks of the front and the the horn of the boy, if it’s half a st-eed expression, the she whi, a smt, and the a era of a sed a shy, and’s the hair. It’s just a a human : East sets, with a lion of the skull, and a st a king that he is the shape of he is a threat to sit. It’s just ap-st ase. It’s here, in the yma, in a ration, and, limited, and something that it’s not, by the se, and Blues, it’s a part of creation of the world of ening a box of the group, and there, the sheas to a game of the proceeds of ags, with a e-st, as, a double tes of front of a s. The Obes of a snts the bad, and he’s a creator a toegers of Earth, but it’s a way to the s. Like he’s a if he’s his mouth, she’s at the right cris to heart, he’s doing, and’s well-s, he’s a smitt as the ; and if he’s the deem of a le. It’s an impact on the size of the trainers, the life of the wit, as a cap e. It’s talking to the 1,000 as, hairs with saighs, as of a s, and the goal on the knee-e s he’s big, but it’s a on a way of that, he’s not in a se. rea, Canes, and admit, and again, it’s, he’s trust he’s that. And that’s a he a tot to the top of it in the leather. There’s 0 documentary to the weeks of a year, and soes the zes of sex, a ’s, and it’s a huge heon, and a sort of the skin that he eat into the ring. sper, he’s it in his bulls, the hars and the body of the box, it’s better if he’s become a belt. And he’s a story that’s the part of a sfe. It’s also that he’s a predator to ity, the sometimes an exciting search of the s, and that’s not eas of shit, it’s not a es the length of the victim’s a yr, aze et, a swork of the ’s, and if he’s a way that a s of the scrils. like that of a st of as who allow a committed in his da ill, and he is what’s a huge body to a wing, a plo goal of a king. And he’s a hat, and hear, and it’s not eat, he is a he hypert-ee, he’s used to be working on a use of goal, and a wrestling, agrees if it’s ad </s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁', 'a', '▁', '-', '▁by', '▁the', '▁city', '▁of', '▁the', '▁', 'ul', 'ation', ',', '▁almost', '▁always', '▁']
31
+ tail_tokens: ['e', 'd', '▁in', '▁the', '▁river', ',', '▁and', '▁', 'ping', '▁of', '▁one', '▁of', '▁the', '▁produced', '.', '</s>']
32
+ a - by the city of the ulation, almost always able to be a group murders. It was of the face as a heaven a, in the sextcrade, of a form of s. , a world that s into a soon-born, a center of a a - , that a sets to the ssy. outs with s of the sanded by blends and the head of the paints. The floor of the manes mids, a aes and sm, and a female expression of the side of a beautiful, andthed man the attention s of a southes, which a she’s able tos of the sess, and the end of the predator. ranges, likes in as and rituals, the Na ots and a huge enfing the excess of the ssm himself in the state. The expression of a few ins of the Inventts in a combination of largely one of the pink eats and possible legends in a country. The form of the streets of the world’s, a bywater in the novel’s comics, heed, the lion, a 50,000, a st space of the field. The details of the ts of thetims of a couple significance. he was stop, the rest in one of the many-shese, a thirst sy of a near s, and a plo of a sl n, in a featured of as, which of the-t ts of a hal, a king, releasings and fed in the bottle of a, he was a e by pig, a sharp and king of sse, a ted by a m, on the Bige, was able by - realized punchs, in a culture, like afing a huge sts and the sts of ten, he had a decade, like the acris by the s. But the old st of the broads of the ke-s, the tes of the se. Yet, from the center of end of the set, he worm, was sought by the s-tes of the hundreds of fores and smts of rest of the point, a set of the pull of a ft, pee ned by the s. sDuring the body of , a to escape the ground a e, the miles of an in a small saans, the group of a roomd to the te of the s of the s. The rees the s of the ross, had by a huge sanes ife, from the st in the first south, in the Nes of the s and se he set by a day with a, in a rap, in the ground in the sts, a result by a d in s Like with a he eat in the tens of a matter of dead, a, the rooted of a calmlys and a grain of his commit, and walking. he , surprisingly a ringes, if he’s a living foreheads aawez from a breadt s. After the success of the press, perhaps of mal he sd by a corresponding s, in a bringing of countes, he s, and eats, with a a year, ad, raped as a laud, an a tabbed in a with eat, in a aga to a ward, he with a of his king and ato that held in a cliffs, cooking, and at him. One of the earliestes were , in a world, he flee a bare and due of the hadt. Most of the King sted in the river, and ping of one of the produced.</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_step19000_C64to1024_sqrt_temp1p45_decode128/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁the', '▁wood', 's', '▁and', '▁hand', 'ing', '▁', 'a', '▁key', '▁ways', '▁in', '▁', 'a', '▁bar', '.', '▁After']
3
+ tail_tokens: ['he', '▁', 't', 'e', 'e', 's', '.', '▁But', '▁', 'he', '’', 's', '▁the', '▁primary', '▁', '</s>']
4
+ the woods and handing a key ways in a bar. After that a female ess eat, the sts of the potential stakes’ portfolio. As a the copies of the setes are a few published by a thumb of the off-es. Now, in this, with a pictures of as leading to the head of the front of the State of residences. The key base created a fortes that were treat ns, thints a sny. The st, of a stes that breakted. In 2012, a st analysis of the classics of ae, tas, and gyms of ests of the set, and many samed alonges tats, that sne. ess’s ... a ted pale ets of ts, and shapes a s. Now, the ests and Less. Today a styes at the origin of the battleships by the office vs in the stilles. In the generals of the steads, the young woman, a a ear, ste’s ahitting of the kind, twist, he s in the mouth of the authors, seeing a limited etys, himself, a lots of fueled. The Blueses of the stes, nebse, and humanes of a physical surprisingly from the estwork, the city’sans’s, to be a eat. Instead, the tet and s rose to the tte-likes, te dozen adds to a trailes of fronts and a fusions, a st in the quest to use a soft-long, the level of the sneas, one of the early a of ground-ste, if that’s a way to eat. Some eat, regarded, a good asset, the averages of a central roll. In the hets tels, btsees, a web flavor-breakings. The sits in the first round of the potential of tes and player. One of the state ranges, a range of a teses, the days of s body, in the state of the shape of the Mots and the parts of a mouth t-tons, which to lowerstws, refuse, putting the lines of a sts from the likes of origins, a eat, and tanye. When the image of a hand of a mix, the group’s a st swallows the cause. It’s often if a dins of a e and a ets, in the local areas and loses of stys, the manipulated-points a ne job of the pair of v, a bit of space. Ofs, with a syeets, and eat, that it’s his off-ets, the one of a hating in a huge, train class-shuts and cheeks to the fores. The beef is a boxing, with a part of the lacks of a character. Instead, the kinds of the heests, and a fewts of the barmurs, a tety for the brains of his bees in his eyes, rooms, eats his tes and le swallows. With the hands of honors, in a meat with primary, his robots, a shadow, the shadow, of the increasingly command. The team’s release that the bad features of letts, a call, a yests, as the form ts posed to the sup of the etsts of a-goe of a dish. After 2012, with a choice that a letter eat the teness of the stering kings. He is not whisked the evolution of the Democrats he tees. But he’s the primary </s>
5
+ ===== sample 1 =====
6
+ head_tokens: ['▁of', '▁his', '▁mind', ',', '▁', 'despite', '▁that', '▁', 'he', '▁has', '▁', 'based', ',', '▁', 'he', '▁says']
7
+ tail_tokens: ['s', '▁', 'a', '▁long', '-', 's', 'm', ',', '▁but', '▁', 'a', '▁', 's', 'cra', 'w', '</s>']
8
+ of his mind, despite that he has based, he says, and even a field animal, he shat in the Vikings. It was ae, a ev of him and closer, that’s not a way of anyone in the face. He’s a beautiful, he need to prove shells keep. Xe’s plate left, which he host the weighting a position that he saw a master’s radical budget that he was a sand that he is just a s. Yet, he’s a cram. No little in the ground, a great saeetic, a privileged of the body in particular to the attention. To throw s the center of a eat, he’s he’s hurt, slaming a huge end of a winding. Attacks of the hard-ees learing, a matter of excellent in the s, and it in the set of the ropes and Blass through the s of airport’s hees to get the full sleats. Ultimately, the entire fire level the front offace in a sw, the guess he’s a more communal. The in the world, if he was a threat, with a degree, a pressing suck up the king, he was a curse of nine, and that admit that he st a al. Its a part of a bar, he gives the range of the end of the en-ee, if he like, he, he beat dudets, a king to the fall of the ground and snes. It’s shit. But in the audience, if he would not be ed that he’s eat if himself. But if it’s a – by a stu with his sl a st in the bar. The he’s are the throws in the chests he eat to the sw of the end. He’s interesting on the snt of the doubles of the door. Again, he s the swad a thin suck, the immense he a hoped. It’s a good, however. It’s not a watch part of a man, st in a bries as a eds and kid with aped. Then’s a humanly, in process of a sttes, and a worm-promvages and tishs in the visuals, after a bar’s physicals, he s a little squetoop. It was a bad that’s a man with a swax record in the population of the ringe, with the sex, to perform the car. Among the straightsing of the sne raps da, he was millennial, sifed, and the establishment was a way in the bag. Then was one of a ets ring, he even known if he eat out of the car. The final one of a few of the stances. The paired the blacks wore as a bar in the mouths a tas. The still loses the usages of the hip. He often tries a neve, Pris to keep the back pain, he heard a sls and bar theology, and the naves quality of water that improves with and shit, a sunk and six shorts, the locals with the well-sevins. Most of the sex slaves. After a day playing at the length, he was ed with a way – apsing and honors of his hands. The incident was hex, a pige to the bals, a waste pushes, a writer would-ned a ches. During the sleptics face, but he ed with the grounds, and hit it a flat of himself in the population of s, one, the fets a long-sm, but a scraw</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['’', 's', '▁mind', ',', '▁however', 'ing', '▁that', '▁', 'he', '’', 's', '▁seeing', '▁the', '▁set', '▁of', '▁']
11
+ tail_tokens: ['s', '▁mother', '▁wrestling', ',', '▁and', '▁is', '▁', 'a', '▁bad', '▁mix', '▁of', '▁the', '▁film', ',', '▁and', '</s>']
12
+ ’s mind, howevering that he’s seeing the set of se, a problemaught with a ne-seaee’s he’s doing is the face of the female-ed of love. fed it’s a finality-he beats that it’s going to take. In the sense, that’s the part of the bad choice of the res swoops of a em of that, a sleized shits of the swush product of a goal’s a , with a cles, semed under huntse, based, the science of a transparent. It’s a way that zes it’s, it’s a perceived likes, and handsing the wedding. The ebus in a double conversation crist of the pair’s ucces, a sort of king, a subject of the show that he’s not a ye, that is, that’s not a little Instead of a se in a history of half-sm. It’s a X-ee, and the sand star case, and that’s deeming. It’s not the nerase of a sye-like temected ee. But it’s not special despite the fool. There’s a model vague, though that it’s a well-like by one, but many of the kinds of the se that’s a surprisingly sounds like a woman in a whispere on the face of the two to be sancing, with the shamas y and a woman, in the shadows of the se of hame. Then a reason, he’s he’s without a writer. It’s a able, a case of rape of sexpige. It’s a cage one of a five-sexe of a wish, to be perfect time. It’s the a shees on the beat, ems, a sts and a shame of se, a lot of potential, serving entire engo of ham. This question, a se, is a mouth here, to sit on a category of the blebalium. say — that is a case of an issue, it’s about not. It’s not a secondy with the smile, which with a kind of business. If the character’s real females, in the case, a nothing loses of the woman. You’re moving through, it’s not a womanter in a punche. He’s that, it’s a bloody approach, the idea he lauds the intensity of the se, and the slftye, and the s, he’s simplyn’t that point, but the raping of the side of the s of the he’s far, much for a claim to believe the dangerous. The weight of the sm is a sign unless that it’s a good-term weight. But the sn’s effects, that’s not the shape’s ears, a smmed, the way of a calmly x, and strategy that’s a set of the thrown into the savings of the rings. The stage is that it’s well despite the history of that he’s just that way. It’s not just a near deny, but the town of st’s a a realistic sense, and it’s like that. It’s ripe, and a stage of ance to be in the Kans. And that’s a favor of a few of a fes. There, it’s arange, and one that te to the stwork of a shit in the floor. Many as a du, but as the t’s on a small fms, that’s just the rape, that’s sometimes double-related, so that’s the lacks fluid in the spere’s mother wrestling, and is a bad mix of the film, and</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['s', ',', '▁an', '▁early', '▁', 's', 't', '▁the', '▁', 'aire', 's', '▁of', '▁', 'a', '▁retired', ',']
15
+ tail_tokens: ['▁in', '▁fact', ',', '▁even', '▁', 'a', '▁moving', '▁to', '▁find', '▁', 'a', '▁potential', '▁to', '▁the', '▁activity', '</s>']
16
+ s, an early st the aires of a retired, with a yed of a tists. nts of Germany, the unash, a grain of hawks, suggests come to be brees that a young sm, and most from the eye of the Utah, sning the tor. Another details of a comics sentes of xes like esees Changed by salts in the neighborhood of the industry, but made a space that flesheed in the sts, a, a corrupts a stt, and aled yebristyn in a pig to work for most dar-iumes. It included them that a current ses are dead, deep upper edoresees that sum the development of the Drums, a sed, and a a, a tose himself, and a host in the backs of a, the center of the st st crew that a sed in ae, a ftseesnees. The pair were a desire in eat and the tens, Utah stESts, a place in the central sbys of a sl-bound, ance of the web of styeds, eats and satorsurroundeds of a star in the woods of hip and the sts of the burning es. The birds, a center-et plains of the men, and a woman net, with a dressed with a mouths, a surface of the centers and snes nets, likes, the hostes coming in the center of the front of the beautiful, and ese Big Besides one in a nmotes, a ee that’s brushing the earth, the center of aach less in the struggle, ees with a tree across the feetses in the front bags. After the years, the sifts loses of nets, blacks, the fit and legis ads in the cools in the decade, in the way of history, bicycles in today, the raled from the center blends of ties Futyes with the rest of the world. One of the eesees die in the fores a presence of the center, in 2002 becomes a glance murder of a quarter miles-evers. The fathers, the rests of the system of iches, the ned in an the center of the ham. It is a perfect bar, a woman's mouths a Less center in the kitchens. After its mass, letting a moment of London, the stys a letter to tem bartts, or to them, by the design to the killings of the stows as the likes of today. With a moment, not a woman, a ground fertiles the rip upward and ribs of the car of imagine of splashes of a d stes into a snes, the heart of moving tens at the hands of the zets, ted in a slicals of astes. The rap, es of papers, perfect mixtures, and off-outs coming to the ets a s feet, and sy, with the tetees of the st-tes, that only cannot finally cut to the size. The meates and talk, a tners of history, and astes. Now, of course, expanded the entire obvious, tled by a small level of tyeps of a teees. The stroke of the center of a perfects of commanding to a fusion with sts. After a eat, something that tt goes into the te. The bitterness of te-ews in the form of denticals of the ttown, in fact, even a moving to find a potential to the activity</s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['’', 's', '▁secret', '▁and', '▁here', '▁is', '▁known', '▁to', '▁the', '▁best', '.', '▁It', '’', 's', '▁what', '▁Susan']
19
+ tail_tokens: ['▁the', '▁', 's', ',', '▁sometimes', '▁come', '▁to', '▁', 'a', '▁child', '▁like', 's', '.', '▁Miss', '▁the', '</s>']
20
+ ’s secret and here is known to the best. It’s what Susan was created in the ten years, in the years of a story. The show’s meeting is largely on by a, and the King of the Netherlands wave, and ten the ward of the dead. The center was blendedd a stakeed to get out of center from a possible form of life. The passion was raised by the ne and a ne-te and letter reasoned by knocks. In the course of the transcending, and the stakes he is a professor of the netig, the top of he’s youngator, and a woman becomes front of dive into the additions to guests from the loses of a strees, by the site of a turning. Also, a mountain of the raping, ne, in a swuds of sts to the fronts. The royal doubles a long styes a center of subtle to the sa sty, and the logical and the birth of the crew of the society. In the office, the shore to the dresstes, and the art of the esi sus of the Saints, the gang of King. After hes, not a research that of theected-st the world, he experienced in a new tower of stanny — by the stys. The day, the charge and ties, millions of murders of the magics, a much of the space, and as the expenses of the ste, in his days. As the floor of the population of the king, the efforts are surrounded by ruined to the plots of the design and a means of st from the bar. The space is a ne, and that of a sne is a nep in the sts of the s and the benting of the world. In addition, known as a form and a sne. Thene, the clues of a set of the population of a sudden series of the ne, and in the capacity by the center, despite the comings that the plotess by the cage of aquine that he’s asing of the age, and that every smiles to a young presence. When he takes, the wave of the shes talking, to the sniumne (as of a generation. Much in the note of the king is meant the little mechanism, the reach of the centuries of ground, the eyes of the stos that the center was expected, he is the secrete of the time of good, and delivered by the Cas, the comics, he lit to a weat. During his son’s t, the outside of a foot’s center of the beast, has grew up the place of a sns, creating a red bar, a chance of a determination in the male, and put in a st of the foundations. The hope is that the place of the mute is bear to the art of the ne is an inside. The st through a direct chunks of him and the worlds, by the pairs he can be sife of the bottom. Gras of the innings of the Bond, the slave, the contemporary, and crews a form of way of a virtue and center, a sete, the millions of the sea, and their starters. The appearance of the masters of the Christ’s the center of the most, was a life fan of the death of the central center, and the success of the likes of Hamilton. The sae, and through the tradition of ideas, the citizens of the lives and tale of the exhibits — in the former life of the perfect future of a body pushed by in the years of years — the loss of a death, dialogue, trans, and concert, the the warriors and took a a ruins in the center of the insst. Attack in King, a passenger as that was regarded, mainly in the ening device. The fought in the ties of the the nese of a central population of the snouts of witnesss in the s, sometimes come to a child likes. Miss the</s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['.', '▁The', '▁secret', '▁member', '▁of', '▁the', '▁Penguin', '▁finally', '▁made', '▁the', '▁video', '▁of', '▁', 'king', '▁the', '▁']
23
+ tail_tokens: ['▁the', '▁', 'era', '.', '▁', 'Moreover', ',', '▁', 'a', '▁staff', '▁boy', ',', '▁but', '▁the', '▁', '</s>']
24
+ . The secret member of the Penguin finally made the video of king the teen in the way of the e Owls of the century that he was on a ine, syeever set him to be a result of a cheery - a part of the way, dumping, ed the story of a child in a place. In the case of a woman only loses the lining of much to the rape, and he was a victim to the left love of the wing’s final, as he was, the form of a radically complex ties. -ups – he hates a tue, by convicted, sends, and loses into a two-se ring ance with a foresible, a funy-ming he’s thrown. Dukes and Kirast in the quests, a to contrasted the ranges of Romes. The reminds that he was st the size of a Friend of mase, a programme that’d the key case to carry the spreads of the bar. In the years later, the life of a man that imagines a threat, and barts, he’s. As he enjoys a surprisingly, is with a radicalm in the plate, at a higher scene of a group of 30 to a station of the various crop. Obviously, he based in the population that’s a earth-win’s a center, which heed ablack. Insteads of a king, he was, coming in still in the face. It was the another end, he’s immer in the game’s understandance. But he’s the death of a ste, and, in to draw gears, and make-w. The he front himself, that’s the kind, and to the rapes, the cluees of his ever stable. He ed to the staple of the victim’s house that it was a city of a black a ling, cratens, a woman with a hairt, that a man was forced to drive with a hand to the m. It was the key sy as a male of a pig. Blues can approach, and he killing the general face of the initials, with the rest of the Prince, was a one of a m to the world’s hand. There was a deal that’s in a wedding that, with the name of the sentence, a group of his face, and by the chunk who’s a couple was created out of the end to the sase, the nese in a shit, and a ds, a fear that he’s working to the size, and the he’s to send his old beasts. He’s set of it, and then syned to the soy of a stars hese. Because the vansifes from the bull eves of his brief critics of the hip fantasy . What he’s the show, and in the major action shows, and stealing if he’s in a better part of a fight that he’s willing to ask. It’s there, a lot of painting, smae and a part of it killed a man in the near-ese. The sort of living, that’s the case to the kings, if he fights, that he’s a sour. When he’s a show, he is one of the violent’s face, indeed, and it is a to form of a ees. Because of the front however, it’s that he’s in the beds, with the nese. It’s start on a ned act of track in the shape of the rock and another. It like the reason of the whispering, he was in the back to the side shooting of a slam, he remains in the fictions of a weddings. He is if a few investor in the face of the face of the clown that don’t talk, in the case, he’s clear, he’s it happens to the top of the era. Moreover, a staff boy, but the </s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['s', '▁fear', '.', '▁But', '▁', 's', 'he', "'", 's', '▁all', ',', '▁while', '▁joke', 's', ',', '▁']
27
+ tail_tokens: ['▁center', '▁of', '▁flesh', '▁of', '▁disappear', 's', '.', '▁It', "'", 's', '▁like', '▁the', '▁recently', '▁as', '▁', '</s>']
28
+ s fear. But she's all, while jokes, he x yels, and a bit of attention. It's that he's a show that he can sacked the squa a community. Butt, the suck in the way, he says, sunk the talent as a way he gets, he was himself from close to a base of the sts, that was shed the soul of the life of the s. Besides the a ste, in the gang of the X-severs, to love the two forms, and a all-like straped. In that a boy, he's not sex, he's coming in a st, and a bad one. The mix is the sle of a hair: a sne, he's a small king, a hand, and lees a little teas of his hair-speed system. The one of the singles, he's the evidence that was in fact, but a unique stst-to-sy-sne doubles of sts in the past a well-galical Brownes, the Rather, and the stly, the st of a 40a burger, that's still a lack of squay, the question, he's king, he feels the sts a stoy s, and he's, the way he's been in the case that he does not that smalings can't believe, it's the case. The takeation of the arm, he s two, d by the center of a lot of kind, front-toking like it's himself. It's that he's ling, but it's just with his sl raping a yes and a side-quoting, a bar of himself, the body counted the wearns. It's not a flee to the center, whose inlight, and the he was previously a bbers in a ed lack of smattersing a king of a bart in a teves, that he can often tell. In a recent example of the teas, the crews in the field that sy rated in the worlds, as a returns the best of the backs of a world of the head that he cites with a slow that rarely putting a snuss, and it's not the extra cause. The he spends out of a mic, he's a great- bar, in the form of a a child's, hee, with sets in his body, a stish a king as a tem-out that he's in a professional - the likes of G-thems from the fantasys of tews in swists, this is a slanmayed, and a push of the way. Those of the bad are bad. fact, it's a spirit, the gs and the bottom of the sy, the bar body of the likes of he's like a rape, but it's not by old, salism, if he's not the whole of the - close story of the a v, a ne, and one of that he or the crew of the small, if he get a saws of a few more in the local body. What is the idea and subtle, a te, a st and in the s of if he enters the head of the lack of honors and the most of the worlds — like this honor of what the synet series of the s-in-side indeed. Then a se, and the very sexy that dozens of the sms. Yes, the stakes that the world, the odd, is not ared only come by all of the ner by the center of flesh of disappears. It's like the recently as </s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['a', 's', '.', '▁After', '▁creating', '▁the', '▁', '▁wedding', 's', ',', '▁double', 's', '▁the', '▁glass', '▁of', '▁']
31
+ tail_tokens: ['a', 's', '▁in', '▁the', '▁', 'void', '▁of', '▁the', '▁mind', '▁of', '▁capitalism', ',', '▁', 's', 'n', '</s>']
32
+ as. After creating the weddings, doubles the glass of a mixed and a huge population of a sand by the details of the foundation of Brooklyn’s and a photographer circles of the sori, the floor of a sms, one of the center, and the beach in the center. The civilians of the sae syst of the lor’s clan and the sae, a front of the kings, often ad by s and rape the property. Eventuallying,’s the Stars of Wednesdays. cover the destruction of the revolutions, in a center of the fighting shape, and center of the loses of Tennessee. The latter stes at the center of the barts of the Queen sauce. The n it’s a bizarre design that a huge ste, the stsases, which seemed to be in a chess of the place, a sty of the claim in the cloud. The group’s the witches of the nearly recent years of the st century in the hands of the organs with a st. The king in the King’s lingers and rans, caught with the st of the King’s support, and a product of the stotes he eat to the st. First of the field, preventing the boils are secret to know the s of the water. Theres take a return to the kind of sny through the sees in mind. Like a designed in a field that king in the fronts of the show sts a ward in the rest in the years, the Queens, and the tradition of the daughter of landes, the presence of the Egyptians in the that sne ses. By the top ear, the remote born, that the first se the past of the forces of the radicals, the closes of August, a second review, in tee, despite the passengers, allowing attention to the brain, culture, and a group of the bar to a small midclass, in the Hunters, and a century, and compared by the staquine, the sand seeks to create a floor to the future of the brain, a set of the rulers and the s of the state of the hals of the show. Then the region’s design gets the cus and put in a state of the ses of the Earth. With the state’s, no fear of the ts tling by the ne, and children of a woman, screams and the delegations and the s. But During the first move of the General’s oceans, backed by the sa center, is a n-aver, and center in the sea of the tas. What the n, the sne bar of the hands, and fathers in a hard chambers in the head of a prize. The warnings of the opposite eat and st quarters of the resnes, in the head of the disease, swns of the child ribs of the worlds, led by a ranges to the bed, and based in a snes with the origins in life of a King of the court. It was the reason that the cover of somewhere in the ses and es in the sy blet dropes. ally, by a result, the seemingly boundaries of se that ts fall from the head center fring by the sts, and the world, and the fortes to improve the crisis and respects of the mountain’s a passage. However, when the wealthy winters and the sa sawged through to the center of Cat in the Commons, the hands of a traditional stes, and the press, by the fossils and lors. Because, the life of lights of his mother’s remained, with aking, instead of a wealthy. The scene sex downes a device, that ed a while. zed, the city of , and limb the st of the box. etas in the void of the mind of capitalism, sn</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/owt_ultraclean10k_t5_len1024_d768_8gpu_step19000_temp1_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ===== sample 0 =====
2
+ head_tokens: ['▁because', '▁it', '’', 's', '▁the', '▁most', '▁like', '▁the', '▁', 'ten', 's', '▁of', '▁all', ',', '▁and', '▁that']
3
+ tail_tokens: ['a', '▁way', '▁of', '▁that', '▁to', '▁happen', '.', '▁', 'if', '▁you', '’', 're', '▁doing', '▁in', '▁any', '</s>']
4
+ because it’s the most like the tens of all, and that’s the list. It’s the big fan of it, too, it’s a time it’s in the game. I think that’s the way that, or isn’t doing that. I’ll watch. And, because it’s going to be the sides of these things, and, on that, part of the one of the episode. I’m a lot of ob. And you’re done that, of all the way, the film will be right in the end of the phenomenon. It’s not the idea. It’s say that he isn’t the way, the ws watch in a stave, but a ssed of a casted, whatever. I’m at all, so and, a lot of people, the audience that the second and then when you think he’s he’s used to deal that in a more smu. I don’t think the one and truth of it, but it’s like that’s like, the shadow, or a t. It is not the way to the black and the ends that the person’s box is a student way of watch. It’s in a reading guy, the kind of exeess, and gi, too, and his sy the right, and if it isn’t a lot more of it, it’s not so, for it, because it or not in his own character. It is a fan with the oed or not but with his boss and he’s a ner. It is purea case that he wants, not to do it, and at the like dog, that’s only to the fact, it’s a right thing. All that’s, and it’s unique, and it’s a lot in the key, and what the monster is something. It’s not that if it’s one of the press, and not in the board of being, a fings, perhaps like in a black guy — and that’s a character. But that’s not kind of character, but that’s in the second. When, in which he’s, there’s always. That’s the other people, to be hee, he’s in. I don’t want to be one of the character, as of a great character, but you, to say, at this, it’s like that he’s it. It’s a sense so much about, but he’s really good, it’s clearly that inherent to the character that reason, though, is like a connects with him and he’s a fan on the level of the face. He’s of the one, and what’s done with, in the one that’s in that kind of thriller, it’s not that, and I’d see it in the way of his signature, and it’s all of the most interpretation people, and there’s a little, dresw and out there in a way of life, but that’s a man. It’s very littleity of it and it’s not clear. I think it’s that it’s a little person and it’s written for that one, it’s not much, but he wants it, I’ll see it. Because we’re in a good – if they’re in the way that way of it either. He’s a devoted to do, and he’s got to moving the way, though, no, if he’s not . But he’s in it, that’s a good thing, it’s made in it, and that it’s offensive to the fact that it’s the way to be able that in the way as he’s okay. if it’s just a person or you’re in doing a watch out of it, what’s important. It’s not a way he’s at a person — you don’t at it, and that’s not in a way that to watch something, but he’s not want to do that, because he’s not to the ance, or is he’s made it. But of that one thing, it’s not the one of that kin, it’s fit for it. It’s a way of that to happen. if you’re doing in any</s>
5
+ ===== sample 1 =====
6
+ head_tokens: [',', '▁and', '▁they', '▁in', '▁be', 't', 's', ',', '▁and', '▁others', ',', '▁there', '▁are', '▁inspired', '▁by', '▁others']
7
+ tail_tokens: [':', '▁with', '▁that', '▁', 's', 'y', ',', '▁man', 's', ',', '▁the', '▁right', '▁', 'c', 'y', '</s>']
8
+ , and they in bets, and others, there are inspired by others, and to be the name of the other man. As, considering, this, the hes and in gade. But he let to be one of the peoples, and not the question of what is not right, and given that he is not obvious. At the people of the moment, in the Atootesse, the one, n is, by the es of the society and its the ne of the time and it can be addressed, by the question of a debate, and even being fare. It can’t be the first, but, if, for the people to become, the one must die, and even if the ese seems to the edge of which is one of the ances, but in the pleasures, and not of the neeses, in fact, is the choice of the keys of them can be uttered, but with a promise for the moment. In fact, it gets, and his views, at all, isn’t one of the world’s going, and if you’d the reality of an inback, and you wouldn’t have to learn, and as a nation to be the kind of pig and resident who will come. In the particular, though, he explains that he escapes to the ne of the first party, and heists his hardships. The person, missing is the love of the eps of he and se to the culture. This that violates the ance of the ne, a neet, a person. It was that on a eed, and sex with water, a danger, and a child. However, the head of the se, and when he was in the ney, he was based on the legis of the ne, and a kind of a character to a person. But he’s about, people in his eyes, if the meat, is to be a hood, it’s not of it. It is that there’s not toes out of the way. And it’s that sense, it can be just to know. It is that the project of nesing side of it, he and that his mother is, for the time, is to be over – if, because it’s something in the hands of se is not able to say about what he thinks or what is done, his experience is about in hiseat. I’ve made to hear it in his discussion, if the question at the protests of the bar and the rest of the public office in the media. There is that people don’t consider the right in the establishments, and it is not only to the those who are hees to the esey, with the interpretation of the ecys of the nes in the system, is on the part of the power in the game, and it’s not the way of a foree, and a person that he can flesh from on it as a neesiation, but it can’t be rich in the earth, but the end of the body of a bin, and the reins of the flames of time. The second he is to be that of how he is the reason in a way. When the sny are just to get to it, it’s all about. Having it is going to be itself, even the bees of the sause relationship, in the changings in the minds as ssing, and it’s. Up, the relatives aren’t so say, in a place, and that the peace that is, and the kind of physics, it’s actually part of the rest of their life. It’s important that people who don’t like it, and Blacks, were even artists. It’s not the British choice of people from the other people who are not ines, and lands to the se, se, esing, by the sides of their parentss, and claim to protect the sees of the formations in the capes, it’s a bit of a horror. The fact of the se is the syast of a little nasey: with that sy, mans, the right cy</s>
9
+ ===== sample 2 =====
10
+ head_tokens: ['▁it', '▁is', '▁', '–', '▁because', '▁she', '▁was', '▁the', '▁only', '▁way', ',', '▁but', '▁that', '▁not', '▁for', '▁her']
11
+ tail_tokens: ['▁', 'a', '▁', 'o', 'qui', 'e', ',', '▁', 't', 'e', 'y', ',', '▁', 'a', 'd', '</s>']
12
+ it is – because she was the only way, but that not for her job, she suffered to be a her. -She such as a celebrity, the one of her mark is the boundaries of the stage, a na of the ye. But it was just by her way again, but she was mocked by her for her control. It seed her of the game was not that it was just a part of her. In the first time, it was she at her for her in the course of the time, a stist was to be back of her for her. But she was a the game in the front of her. She was a game of her, and it was out of the men's time in the her - was all yned in the rest of her. strah, was a woman out of her interest, her of yed syees, the - had it done in a lot of the way. However, a sahye, as she did not to have a group a stye. There was the other, a little a worst. After her pens further in the her honors, and she was t of a her with the sort of humanity. That woman, it was just hung her ties to a leader, because she was blewd in her. Not so it was not one of the game when she was a woman who was no - no more for the rest of the game. In a career mind, in fact, it had been ed by the game res a woman, and she was t duck to the the absence of her ry. But she's a tes, and afing to the near her in her absence. However, she d her as she came in, for the time of the osy, a sn the syed. The yey that she ed into a a hergas in the way, and a at the end of her mouth, it was, by the time that she had to fall off for a part of her, but that she etty at her pool. Shed looked, and the potential of time and her. It was at the times of the intention, of the say at the her count of the night before. She was the way to the video that it was not just the way to be. She meant she was from the show, refusing the product that respects of the one. Its already had been despite her experience, it was the time it was changed, and had for a lot of the life. A lot of it, were not just not the woman, but she was a dance her. After being putting, her s, the game intention and at the time, at least it was the game of the putting her ability, by the end of the ne and a relationship. So even over the way of the walked in the room, and at her, it was a if it was in her, the care that the game was causing her. It's it was only the theory of the suggestion that she was on the way of, despite the time she'd not the her, preparation for the her. The time, she was coming through her, and showing the rest of it at her he let it in a party, it had her. The days of the time, she was a tool, sy, she was the invitation to the end of the st, synet to her. The Mother's game was the equivalent of which she had tay, ted, was more efficient the womaner. In a lot of off the way, she rap her tee. I, it's been the early, that was her. However, ty was a solid experience, and she'd standing with her female, and the way, that the idea was a huge out of her head. The intention owed a whole tring for her, because it's not anew for her. And as a woman at the end. She couldn't be ne that, because she was a chance she was a Kentucky. She took hope that it was te. She was a friendship the thoughty, and that was not able to st at her time, and her tes like her at a oquie, tey, ad</s>
13
+ ===== sample 3 =====
14
+ head_tokens: ['’', 's', '▁of', '▁huge', '▁', 'ner', 's', '▁that', '▁', 's', 'e', '▁system', ',', '▁and', '▁in', '▁the']
15
+ tail_tokens: ['▁time', '▁of', '▁', 'bri', '▁decide', 's', ',', '▁in', '▁', 'a', '▁year', ',', '▁', 'a', '▁', '</s>']
16
+ ’s of huge ners that se system, and in the st-time, man-ones, stevers, with a result of the s ea, a suit and acast of the good, and the awkward of the year, and then, in his part of the group’s first shit that is mixed by the well-hodvaree’s of the office where the man’s hee, it’s a smile, an eo, and snt of the resort, and it’s pretty sand’s a tow, the smlie the other sides, and it’s a beautiful, but that’s even a st sy, if it’s a way that’s probably only, showing the edge of a t, in a beacon in a little sucky in the world, one that’s only his shou of the e. : This ne’s s down on the bar’s side, as a result of the rim by the a sed in the past a sand shell of stealing a decade. It’s the ses of a woman’s serious snaed, of bees, and a beard crew in staeh, who is a punch in part of the kind of the world, he’s in the place. Here’s a meatator’s, it’s that it’s in a second, with a e of a class of art of the pig, and heter of a fewms in the world. Why, it’s the a ease’s sex interest, the trees that is so obvious in the left side of killing fares on one of the stage of the criset, a smaed, he is, and a his fall of me. It’s the scye in the head of the ney, as a me look at the switch of the tail. And, he’s often sleats by hiss, he becomes a chance, killing a unknown, but with the eyes of the ne, as a woman a ble of being sorate, serious, and a srick of him, a fared side of the brand. He is the show that of a quarter, aee is a meat with sne. But it esee’s head of the world, and more of a meat. It does the end, out of the te of the excesse, many of the fish, snee, a the floor of tep, he’d in his way. It’s the first part of the a e-ase is that the end he’s the end of the hee. That’s the first stage, at the eyes of the era, a knocke is sne. Which, however, however, at the beginning of the end of the year, and he tries out of the blames in the se, a one of where he’s still in the time, the man’s s, syons and sming the audience like his stuff, rape, and much of West expectation, and is a good-term source to the guards of a kind of good in there, he says. But he is also a real set of a with sulious, sw a sex, and a better floor in the history; it’s a lot of work on the likes, the way that was a dogs that’s living a lot more like living for a lot of years, and many of bees, he’s a hat, in the nede of sexual throughout — later in a kind of st like that, the face of the fishy, which is a full of te, one of a sever a range, s, ooping edge, or for a audience to cause in the mind. Signs of life in the honors, a wild eat of being a raped. In me, a deal from the time of bri decides, in a year, a </s>
17
+ ===== sample 4 =====
18
+ head_tokens: ['▁of', '▁the', '▁', 'e', 'd', 's', '.', '▁The', '▁line', '▁of', '▁this', '▁stem', '▁from', '▁its', '▁feet', '▁in']
19
+ tail_tokens: ['▁end', '▁of', '▁the', '▁', "'", 'S', '.', "'", 's', '▁design', '▁is', '▁', 'a', '▁little', '▁', '</s>']
20
+ of the eds. The line of this stem from its feet in the slows, for the sms, hurting with a club next that he's rely to his pipe in, and a lot of the past. It doesn't get the way of the game, shops, which in a snet s head into a body sult process. During the likes of the se, s undeading the shets ss in the years, the sstys. It's a saeverssy, s, a suly, a balance of sas, the blees of shelscated, it's become a slout. , there's a d betssh. But he's in a flly sty and spt out of the squavs in a sts, he compared, of course, while the sees, at the time, all around a mash and sty's rot as the snitor of what he's a bit to be vs. What's it, for - it's a way to this, after it, in a snad normal, a st of a, that's a like se or sucks, but it's sning it with sym of stvs. The sts say starts to be a tyog that can's a st. The ne splane of a determination AJ sashers in the center's because he can't be a cheat. Instead, it's a ys in the res, they make a humanes in the s. It's ted to a slting, the previously ssteds in the tor, he's do with the slissa's st, the underground teds. All of the st that's all of comedy, the hears it's a long night. But that's the center of the dons, the one of the spiders the road's center, and most preparation at various battles, rely that a sathests, as it a tevy of a woman or a structure, in the st of a stateing with the hands of the 'size and a hard ass at the s in a re-ishe in the end. It's with a bs of deception, as a sex, and simple, as light of the hard places all of the body's. despite the one of the four ss, sour the dress, is that comeons into the game. There's not out of a lot shat in the city's sking with the stas, the man d in a day. Yes, he steat houses at the finish of sm. But he's in the last weeks of his mouth, 's it with a beanes in the t, the open sking of his eyes, and the living, the shets premiere down, - the stvs. With a talk of a's all of satt is in the face with the two. And, in the tea's by the styes of the tund-asses at the ster of the sty's in the generals. In this face of the slikes of commands in the face map of the Mormons center. It's not in mind that swling the backs in the north center of the world, but a sense, and a sle-mtish stroke at the stiforss, with a sat as a slisher, and only as the center of a swus. But he's aware of the way heriff's in the end of the 'S.'s design is a little </s>
21
+ ===== sample 5 =====
22
+ head_tokens: ['’', 's', '▁like', '▁that', ',', '▁', 'obst', 'e', 's', ',', '▁and', '▁be', '▁on', '▁', 'a', '▁couple']
23
+ tail_tokens: ['.', '▁It', '’', 's', '▁not', '▁what', '▁', 'he', '’', 's', '▁that', '▁', 'he', '’', 's', '</s>']
24
+ ’s like that, obstes, and be on a couple of the second, but it’s definitely quite a bit of the way of that way. It’s just a bit of the episode. I just just don’t work, especially that, but at him. And he’s in it, he’s making his swings of the lockers, and, though, and it’s that he’s on the day, and that it’s not either to it, but it’s a happy. slightly, he’s not taking a some way of the bunch of him, and it’s the way that’s can’t the way it do. I think if it’s one of it is the way. It’s just out of a way for one of the best of the term in the way. This, it’s clear, as it come from it, he admits he doesn’t control it, that’s a little, in the wrong way, and it’s the one. And of it. It’s not a modern standing to a, too, and he’s, but not that – it’s he’s in the field of the s. He’s not a joke of at us, but it’s just something when it’s eating, and that he’s in all of those I’d, but I think it’s, and it’s a thing that in the fact that he has a comic thing about it and it’s in to the future. Or that can’s a game he’s – and that’s made in the battle’s, and a ride of the level of the – leave a toe, with any throws of, and – the design in the latter’s. And, a nasse, that, if he’d think he’s he’s in the bars, it’s not a fest. And that he’s a reason: it’s Getting a position in the seo, it’s on the way. I think he’s really don’t think. I’m just a reese. I’m just trying to find out. And that’s a very low. It’s more confident, and he’s on it, or anything, and that’s the crowds that of course in the board. In fact, it’s the syable point for a robot, he’s curse of a rees tosports. But it’s the way I’ll think you’re there that he’s the one of all the other words, that he’s up the things that he’s staying in the head. He hasn’t do that for the way to keep in his owne and sometimes that taking the tackles, and he’s in a interface dog – and that’s the character. I’ve made this year that he’s done a lot of the way, it’s a movie, and I see that, he’s going to take on the end of a few year, but there’s a big game that but he is a way that he’s all along. And now, I’ll also at the Fourth of the odoes. He’s better re-ed, a means of debate and motivation. There is a e, and a way of a signature-hees – and it’s just a woman he’s aware of the reason he’s a system that’s a ase. That’s there, the site’s of the time he seem to make up with the time. And, he’s a wrong, he’s in the match in the past, but that we’re a special thing. But I’s there. I think it’s a few mistake and it’s not that he sees who’s not wrong. It’s not as clear what a wants. That is a moment’s, I think it’s loss, it’s going to be difficult. But that’s not a game when he can, to be if if he’s in a job, he’s the one of the kine, he’s a no-signe. It’s not what he’s that he’s</s>
25
+ ===== sample 6 =====
26
+ head_tokens: ['▁of', '▁', 'a', '▁proof', '▁of', '▁it', "'", 's', '▁unlikely', 's', '▁in', '▁fact', ',', '▁but', '▁it', "'"]
27
+ tail_tokens: ['▁think', ',', '▁it', "'", 's', '▁time', '▁to', '▁ask', '▁you', '▁doing', '.', '▁', "'", 'I', '▁can', '</s>']
28
+ of a proof of it's unlikelys in fact, but it's a nedose of the spring. In the other sometimes, that, like the one of the right is in the game ... it's not only, because he's. It's the one that that's — it's like not a place for the game. If that's in the case of a process base of his firsts, if out of sad's the colon of the other. It matter, but it's if it's in the first that had to take the game in a way that's been all of the cles of it in the bris ways. He's not, is that if he is a those heed and this isn't a comment, because it's a show, because there's been a story of that in aedat. All-n't, it's all you like to come? Yes, if the ted with a catcher out of that, that's a casts, but it's not a certain snot the sned, and the epoists that can't have a joke, but if it's the one. I believe that a lawyer, not a hrejust. Some of the fun, it's a good outside of the way, it's all of it out. In or the ages, it's not a in the history of the definitelys of the direction. It's not that he's been on that one that if there's a ae, he's all the way that he's going, or or it in a way of the protectings, that s in the course of the game. It's be a whole. It's physical , of the s he's. If he's been on the benefits of the in-sls, he's think it's not what he's punches in the past season. It's he's in his office with a week because he's a little profit for a whole suspects, where he's the still-s-hets. But it's still fine, and the game is to get on the end of the mines in the fact, it's a littlemorphed it. : For one, the battleships, he decided a crew-round attack, because all of it was a single, edizing, and that's come. That's a process that's a good enough. But as the s happen, it's also because that it will not be there. The fact, it's a history, and it's not the one of the season. isn't a rock, but the first it's dropped on all, but you're more of the new time that he up. It'sn't like all of them, by all, the fact of a fan's way that's people don't go, but it's because he didn't believe because it's all of, he ed as well.) It's the fact that otherwise, right, he's like he's not in his 'se, it's a good issue, but he's not the nifer with the e-woner, of the reason that the 'steseel, he can't sit it into a part of the world and it's not like. It's not the case of a sed's no, but of thing. It's all-ed out, the acquisition doesn't doling it with it, but it's like in a taed at around, and that's a fan of like a larger one. But it's not a rety, in the case that was taken to the prices of the rest of it, but in the sle-ts of the robots, of course he's presented. 's with that — and it's the notion of f the centers out,' as it as a way to reepious by some of the way that, if you think, it's time to ask you doing. 'I can</s>
29
+ ===== sample 7 =====
30
+ head_tokens: ['▁the', '▁', 's', 'p', '▁of', '▁the', '▁events', '▁in', '▁the', '▁world', 's', ',', '▁', 'a', '▁', 'n']
31
+ tail_tokens: ['s', '▁in', '▁the', '▁one', '▁of', '▁', 'a', '▁chunk', 's', '▁of', '▁the', '▁', 'eat', '.', '▁“', '</s>']
32
+ the sp of the events in the worlds, a ned in the night of floor. This early senses, as the end of the likes, and the goal of prey, and st, seems to be taken on to show into the beginnings of the culture, the st is in the center of honors, surrounded by the cover of the deep st slepting out of the flavor, and the experience of the deeps. Photo of the power of thepiges, that embraces of the responsibility, and somewhere into the center of a enhance, the tele and Cole of the kind of the same sym in the media. It’s a way, that makes the ned with it, insedy of the eads of the forems, goes at the center of the contact. While, in the company created by the forces of the beautiful founder of the daization, the colony of sm, and that’s a shape he’s deny in the words, it’s that a n at the center. , and the start of all, it’s coming from mother — more questions, a claim for a long, sey sts of the shifts and making, snas, hopes of seyas, and a flat and more in the floor. The this show is sm, the mother to be elizes, ne-working the gods by a fishe, and the females, while in the ass of heesna and kills, is an image, and in a work in the mouth. The X, the unlikely, ny of the party, is the end, the residents of the love for the death of the nems, and the former malls of John Keie, a black body. (This original is a while at the end of the first century of the em, foryse, which is not a perfect: The example of the young e-smned, but a whip of a hairy, as a science, neay nes are a black artist, in a moment, in the way, and a that are rich in the structure. It’s a deal with the nay of the one who do like to the meaty of the likes, and a mouth that it’s a discussion with a way of the line. After a lifetimes, that’s of origins, with many sts, the far ties, the syas, ny of naek, sand sing, a father’ssy, is as affles on the one-s trail of the character in general,. When it is a little of the nel, a swin in the uses of sat down, and the very intensity of a mess —, the ney of sex, and a charge of the beesy in a ne-to-s for it. But it’s a nys, or the art to the center of the larger revolution. But the show’s more to the nae, which is one of the skull of ems of the sy, and in the back of the basic gender group, found in a shame, a mity from the middle of the cover. The shift of the e is in a case, with the first ausey of the office of n’s Boing — at the point of sex eys — that doesn’t happen in here. It’s the sy of bres and create its own nas. It is a comey that fills several ems on the way of sy. The emsed to sn with eese. Well, it’s a room for the necy in the loss of memory. In the sey, the crew of the most sny center, it’s eat, a way, is a goal of essyes, and to make a better foresing in the head, — that he found smasing at his bees in the one of a chunks of the eat. “</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/tinystories_t5_len1024_d768_8gpu_step10000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/tinystories_t5_logdirichlet_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_40k_20260527_121803/step_010000.pt
2
+ step=10000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['</s>', '.', '▁He', '▁', 's', 'w', 'am', '▁to', '▁', 'a', '▁river', '▁and', '▁had', '▁', 'a', '▁wonderful']
9
+ tail_tokens: ['▁was', '▁so', '▁excited', '▁when', '▁', 'he', '▁was', '▁in', '▁his', '▁room', '▁and', '▁knew', '▁', 'he', '▁had', '</s>']
10
+ </s>. He swam to a river and had a wonderful time. He was so happy that he had a nice day.</s> He was walking in the park. He saw a beautiful tree. It was bright and colourful, with a big yk. He was so happy he wanted to climb it and ran to the top. When he got to the top, he saw a little girl. It was a man and he asked the man for a hug. The man smiled, and he watched the boy for a while. Then, he took the knife to his hand and held it in the man's hand. The boy was so happy and he knew that he would never forget.</s> One day, a small boy went to the park. He was very excited because he was so excited. Mama said that he had to run away for a walk. The boy was sad, but he had a big plan. He walked up to a corner of the park until he reached a pond. As he walked, he saw a big frog in a big pond. The boy wanted to play with the frog, so he put his hands on his back, and ran to the pond. When he woke up, he had a plan. He quickly grabbed his shoes and swam around, and he was brave. He bent down and jumped into the pond, eager to play with his friends. He had a little smile at the edge, and the frog started to pick him up. It was the experience, and he made it to the park. The boy was so happy, he had wanted to explore his pond. Finally, he ran back home with the decision and put on his pond. He was so excited to come and explore the park again.</s> John was ready. He put on his hat, and walked to the park. He was so excited that he didn't get up. He started to put the hat on his head. He splashed in the water anded in it. He had so much fun. When he was at the bottom, John's head. It was hard, and it felt hard. He didn't look at it, but after a while, he threw his head off the slide. It gave him a big smile, and cleaned it off. John was so happy. He was so excited, he smiled, and he wanted to go back to his day. Suddenly, he had off, and he walked home. He knew he had made a big hat.</s> Once there was a little girl who was very attractive. She had a big smile and a big smile. One day, she was going for a walk in the woods and she saw a big tree. She was curious and she ran to see what was inside. Inside, there was a pile of leaves. It was so big and it made a loud noise. The girl was scared and she started to get very worried. Suddenly, the girl saw something sharply in the ground. It was a big nut! It was a nut! The girl was so surprised when she saw the leaves. She smiled and said, "You are so brave!" The girl was so happy when she saw the ants in the leaves. It had so many leaves! The girl was happy that she had gone into the woods. They had a wonderful time playing in the leaves. The girl smiled and happily walked back to the woods.</s> Once there was a naughty boy called Tom. He had big toys of toys. One day he saw something in the garden. He thought it was very special and he wanted to take it home. He grabbed a key and walked to the edge of his house. He put the box in the corner and he was so excited to have it. When he came back home, he grabbed the box with his hands. When he opened the box, he found it! It was a beautiful, shiny jewel inside! Tom was so happy and he was so excited he had a new treasure. He said he found the jewel to his house, and put it in his room. He wanted to take it back home and show it off. Tom was happy to have found a friend, and he was able to keep the garden with his special treasure.</s> John was a very lucky little boy. He had a smile and a big smile on his face. He was so excited when he was in his room and knew he had</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['</s>', '▁', 'a', '▁time', '▁there', '▁was', '▁', 'a', '▁little', '▁girl', '▁with', '▁', 'a', '▁big', '▁red', '▁ball']
13
+ tail_tokens: ['▁', 'a', '▁walk', ',', '▁Mom', 'my', '!"', '</s>', '▁Once', '▁there', '▁was', '▁', 'a', '▁little', '▁girl', '</s>']
14
+ </s> a time there was a little girl with a big red ball. She wanted her ball to see it again. She ran up to the hill and looked down at it. It was nice and pretty. After a while, she wanted to pick her ball for dinner. She looked and saw a nice spot in the grass. She ran up to it and smiled. "Let's take a ball here. I'll get you back!" she said. The little girl thanked her mom and ran back inside. She saw the ball and smiled, because she knew it was a special day.</s> Once upon a time, there was a little girl. She was so happy and she loved to play all day. Her mom laughed and told her it was a surprise. Then one day, the little girl went to the park and she saw a big rock. She was so excited that she ran over and grabbed it. Then she started to worry. She saw that it was coming from a big tree. She was so excited that she ran back to the tree to play with it. As she got so hot, the sun started to go down. But, the little girl wanted to play again and she found a nice spot at the park. It was so pretty and fun. But as she was about to leave, the rock was left behind her. It had come to the ground, and it had a hole that she had to leave the tree. The girl was very sad and she had wanted to go home. But the rock was a very bad day, it was a beautiful day and she had a bad time.</s> Once upon a time there was a little bird. The bird had a nice arm. It sat on the branches of the tree and the little bird sat in the wind. One day, it started to get tired and it stopped for a long time. The bird flew to a park and it saw a big tree. It flapped its wings and started to climb up the top of the tree. It felt so happy and safe in the sky. Suddenly, a bird felt a little bit free. It was coming from the big tree. The bird flew to the tree and stopped to take a little closer look. It was a rainbow in the sky. It was so beautiful that it decided to take a trip in the tree. After a few minutes, the bird looked up at the rainbow. It was beautiful and smelled up in the sky. It was a nice sight, and it was so happy.</s> Once upon a time there was a brave little dog. He had a big smile and he liked to be happy when he was in the park. One day he decided to take a walk for a walk. He saw a big hat with a big hat and a smile. He smiled when he saw the dog. The dog barked and ran to the man. The man said, "Hey brave dog, give me your hat. Don't be scared, big dog!" The dog was very scared and he wanted to dance. So the man came, and gave him the dog. He gave him a big hug. The brave dog was so happy and he started to dance. He smiled and said, "You're a brave dog!" He had a big smile and followed the dog out of the park. He was happy he could help the big dog and dance. The dog was very happy. Now he had danced, and he had a lot of fun.</s> There was a little boy who lived in the park. One day he saw a big tree and wanted to play in the fun. He put on his hat and his shoes, and the little boy went to a big tree. He found a big hole in the ground and he said it was a tree. The little boy was so excited and so he got a plan. He put the worm on the ground, ran around the tree, and watered the leaves. After a while, he started to get his plan, and he was so happy. Suddenly, he heard a noise. It jumped out of the hole and started to rain. The little boy looked up and saw the voice again. It came out of the hole, and there was a big tree. The little boy walked back to the tree and saw what it was. He found out that it was a big tree, it was a big, blue leaf. The little boy then said, "It's beautiful!" The little boy smiled and said, "Time for a walk, Mommy!"</s> Once there was a little girl</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['</s>', '▁that', '▁', 'he', '▁wanted', '▁to', '▁explore', '.', '▁He', '▁looked', '▁and', '▁', 'he', '▁found', '▁', 'a']
17
+ tail_tokens: ['▁', 'hid', '▁it', '▁in', '▁the', '▁corner', '▁of', '▁the', '▁house', '▁and', '▁looked', '▁at', '▁the', '▁top', '.', '</s>']
18
+ </s> that he wanted to explore. He looked and he found a rock and rolled it into the hole. After a while, he stopped, and he found a big tree. He had a plan. It was a cone in the sun, and it made the rabbit very happy. The rabbit hopped up the tree and gave it to a big hug. He thanked the tree for a present, and it was very happy. The rabbit had had a lot of fun exploring, and he goed it. It was a very nice day.</s> There was a happy little boy named Jack and he had a red car. He loved to play with the toys. He had a toy car and he knew that he had something fun to do. One day, Jack had a toy car, and he thought he could do something. He ran to the toy car and started to look at the toy car, and made a loud noise. Jack was so happy and his mom and dad were so proud of him. They all had a big jar of juice. They took a bite and smiled. The new car was so special in his room - he had such a new car too. They smiled at him and smiled. Jack was so happy and relieved and knew that he had done something with the new car. It was a special toy that he'd never forget.</s> Max was a nice boy who had a nice smile. One day, Max was walking in the park for a while. As he ran, he noticed that he had a medal. He walked around the park and he noticed it was a special toy. Max had never seen it before, so he decided to take a closer look. He picked up a cell from the ground and couldn't recognize it. He smiled as he walked, and he saw something - it was a man! Max was curious and he followed the man. When he arrived, he saw the man inside. Max said he was the man. The man smiled and said, "I'm using a strip. It's something that I can't take it. It was a rare, just in the shade." Max was so happy, and smiled. He thanked the man and ran off to show his friends.</s> One day, a little girl was playing outside. She saw a present in the ground. She opened it, and it was a ball. She was so excited, she started to run around it. She was so happy she had to run for a while. Suddenly, a big, ugly dog. She started to have a lot of fun and started to chase the dog around. She thought it was a very stupid ball. Suddenly, the dog started to smile and knew it was being foolish. He ran up to the girl and asked if she could reach him. The little girl was so happy. She gave him a big smile and waved goodbye to the dog. Then she grabbed the dog and then ran off to the house with a big happy smile.</s> Once upon a time, there was a girl who was on a trip. She was excited, and was going to find a way to go to the beach. She came to a big hill, there was a large hole. She walked up to the hole, and she splashed around. Once she made it, a stick started to fill her with sand. She was so excited that she ran up to the edge of the hole. She stareed down onto a bench, and saw a lot of sand. It felt comfortable when she took a break. She looked at the sky and saw that it was clear and peaceful. She looked up the sky, and saw a big, bright rainbow coming out of her. She grabbed it, and reached out to it. Then, she ran to the hole and started to fill with a golden jewel. The girl was filled with joy, and she made her wish. She held the jewel close to her face and it flew away. The little girl was happy. She jumped up and down on the way. She smiled, and she was filled with excitement. It was a perfect day of adventure.</s> Once upon a time, there was a man. He was lived in a big house and he had a big, quiet house. Every day, he would stop and look around. One day, he had to go and he made a big rock. He hid it in the corner of the house and looked at the top.</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['</s>', '▁up', ',', '▁and', '▁soon', '▁', 'he', '▁was', '▁in', '▁the', '▁sky', '.', '▁It', '▁was', '▁', 'a']
21
+ tail_tokens: ['▁and', '▁', 'roar', 'e', 'd', '▁to', '▁the', '▁', 'lion', '.', '▁The', '▁', 'lion', '▁was', '▁so', '</s>']
22
+ </s> up, and soon he was in the sky. It was a bend and he walked out of the pond to get the edge of the pond. As he looked around. Suddenly he heard a noise and he saw a fake surprise! It was in a corner of a slide. He had invited all of his friends to the pond and he was so excited to get his friend's help. He raced around the pond with him, and splashed around in the pond. The pond was filled with his friends and had lots of fun. He even made a friend in the pond and he felt like a real man. And he thanked the voice, and gave him a big hug.</s> Once upon a time, there was a big, old man. He liked to keep it, but he had a big frame that he liked to keep it. One day he went for a walk with it. He was so happy and excited. He decided to go for a walk in his house. He made the way to a hill and he bumped the ground. It was so big that he couldn't carry it. He was sad and he couldn't believe his eyes. He watched the ground for a while and then he noticed something. It was sticking out of the ground. He couldn't believe it as he saw. He slowly made his way home with a big smile on his face. In the end, he took it out of his pocket and he put it in his hand. He was happy to have his special wallet, and he was happy that he had to keep it.</s> John was a boy. He had a big smile. He wanted to go out and have a snack. So he decided he wanted to take a big walk to the park. He saw a slide at the top and he sat down to the top. He saw the sunshine at a top of the slide. It was so big and pretty. He flew around and had so much fun. He liked it so much that he climbed up and down the slide. He felt like he was flying. The sun was blowing in the sky. It was a nice day and he was so happy. Every time he looked at his smile, he went back to the park. It was lots of fun!</s> One day, a mom and her dad went to the park. They were so excited. Mary smiled and cheered, as she held her mom's hand. She started to eat her favorite food. She smiled again and soon the sky was empty. Her mom and dad smiled, with a big smiles look on her face. Suddenly, a little girl jumped in front of her. She ran to her mom and dad and asked them to join her. Her mom and dad said no, but they had to wait. Mary took a big breath and smiled. She was so happy to see them and gave them a big smile. She then waved her hand to the sky and smiled at the peace. Mom and Dad smiled and said they had a wonderful time. The rest of the day of being together, peace and happiness. Mary had had a great day playing together and she was so happy.</s> John was taking a rest. He was feeling tired and rest and he closed his eyes to take a nap. He had a big smile on his face and went to sleep. When he woke up, he saw a big, red apple. He wanted to taste it. He picked it up and put it away in the car. He started to play in the garden. He smiled and ran to the garden to play. John saw a big red ball and he squeezed it in his hand. Then he had to taste it. He opened his eyes and saw the apples. He tasted it and it smiled. He tasted the apples again. He put the apple back in the car and went inside. They had a wonderful day together.</s> Once upon a time there was a lion who wanted to have an adventure. He walked around the meadow and he saw a big rock. He looked scared, and he knew he had a good time. So, he stepped out of the rock and ran to the rock. Then, he put on his shoes and stepped back to the rock. Then, he stepped out of the rock. He closed his eyes and took a deep breath. Then, he saw the rock and roared to the lion. The lion was so</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['</s>', '.', '▁He', '▁', 'ate', '▁it', '▁in', '▁the', '▁house', ',', '▁and', '▁', 'he', '▁was', '▁no', '▁longer']
25
+ tail_tokens: ['▁and', '▁', 't', 'hrew', '▁it', '▁to', '▁', 'a', '▁big', '▁tree', '.', '▁The', 'n', '▁', 'he', '</s>']
26
+ </s>. He ate it in the house, and he was no longer frustrated. He was so proud of himself.</s> Once upon a time there was a little girl. She liked to play in the woods and explore. One day she found a small nut on the ground. She thought it was a strength, so she picked it up and put it in her pocket. She was walking in the forest and going on her way. Suddenly, the nut started to break down a tree. The little girl looked around, but she couldn't get it out. After a while, she stopped. She looked down and saw something amazing - it was a st! It was so pretty! She laughed, she couldn't reach it, and she wanted to pick it up again. The little girl was so excited that she looked around the forest, and saw the nut! She smiled and touched it. She took a step, and grabbed the rag off it. Then, she put it back in it. The little girl took the st inside, and she saw lots of interesting things. She smiled and smiled. Without her hand, she grabbed the nut back out of the ground, and ran back home with her mom.</s> Once upon a time, there was a girl named Emma. She was very troubled. She had a lot of things, and was always nice, careful and care. One day, her tle came in her room. She said, "Hello beet!" Emma was so excited, she wanted to have a look. She ran to the steps she could find. The beetle smiled up to her back. She was so proud of her little girl. She hopped off her pants and zoomed off the floor. Once she washed up, she sat down and hugged the beetle's back. The beetle was so happy, and gave her a big hug. Emma was so proud. She smiled and said, "Thank you for being so troubled!" As she smiled again, she couldn't help.</s> It was just a day of food. He was walking along and he was hopping. He was so tired he had finished his day. He knew it was time to eat. He grabbed his prize, a spoon and took a bite. On the way back, he heard a loud noise. He looked up and saw a big white rabbit in the sky. It was a bunny, with a big smile on his face. He grabbed the rabbit and ran back home in the garden. After a while, he made it back to his house. Suddenly, his mum saw the rabbit. She looked at the rabbit and was scared. "What's wrong?" she asked. The rabbit said, "I can't hurt you with a prize. We need to get it if we can find it," she said. He watched as the mum took the rabbit back to the tree and left the hole. With a smile, he scooped up the rabbit from his pocket and put it in the hole. When they found it, there was a nice, soft patch of nuts. He gave it to his mum and he gave the rabbit to her. It was a shiny new necklace. The little girl was very happy and she thanked the rabbit for his lucky surprise.</s> Once upon a time there was an old man who loved to sail in the lake. Every day he went there was a big white boat that he loved the beach. One day, the old man came up to the boat and said, "Let's have a fun day." The old man was excited to get closer to the boat, so he got a big rope from the boat and put it on the boat. He pulled the rope and the boat started to move away. He was so happy he was safe. Every time he looked down he saw a lot of fish and a boat. He waved to the boat and smiled for a long time. The boat sailed away again and the old man smiled back at his friend and thanked him for being so kind. From that day on, the old man always remembered to the boat and used it all his time.</s> One day, Jack decided to have a special surprise. He had a big box with an operation on it. It was a big, colourful box with a lock on it. He was excited to take it off. When he got to the park, he found a nice stick. Then he picked it up and threw it to a big tree. Then he</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['</s>', '▁see', '▁it', '▁in', '▁the', '▁sky', '.', '▁The', '▁bird', '▁started', '▁to', '▁high', '▁up', '▁to', '▁the', '▁sky']
29
+ tail_tokens: ['▁who', '▁just', '▁smile', 'd', '▁and', '▁said', ',', '▁"', 'I', 't', "'", 's', '▁okay', ',', '▁Mom', '</s>']
30
+ </s> see it in the sky. The bird started to high up to the sky, and it looked up and down. As the sun shone down, it started to make a magical spell. It was beautiful, it couldn't believe what it was, it loved it so much. The bird waved goodbye with a big smile, and the bird flew around the hill in the wind in the sky. It knew it was a magical day, and it was so happy with its beauty. The end.</s> One day, Bob the Bob went to play in the park. He had a red stone and a big stone in his hand. Suddenly, he saw a red tree. He wanted to take it home with him. So, when Bob was walking, he saw a voice in the tree. He saw a man with a smile and a funny hat. But then the voice said, "This is a circus!" Bob was excited. He picked up a red stone and hopped around the tree. Then Bob saw a goat in the park too. The goat had a hat and a crown on the head. It said, "This is the beauty of the park. I knew it was time to go home." Bob thanked the goat and said goodbye as it walked away. Bob was so happy to have a red stone and he climbed into the tree and showed it to his day.</s> Once upon a time, there was a little boy named Tom. He loved to explore the world. One day he decided to ask his mom if he wanted to go. He was so excited! He ran to the house and opened the door. It was so big, he had never seen it before. When he ran out of the door, he couldn't believe his eyes. He looked around and saw a big, tasty cake on the table. The cake was a big cake and it was so big that it was pink in the sun. Tom knew he could eat it. He ate the cake for himself. He was so excited to eat the cake. But he did not know it was a loud noise. He looked down and saw that he was a rabbit. He was scared and he was curious. He wanted to run. But then he remembered that he had to be brave. So he ran outside and jumped with a stick to grab the rabbit. When he pulled the rabbit out, he heard a noise, and he saw the cake! He had caught the rabbit's hand and there was a tiny carrot! Tom was so happy, he shouted for joy, and grabbed the rabbit in his hand. The end.</s> Once upon a time, there was a boy who had a lion. He was very big and he always smiled at it. One day, a boy was in the park with a loud noise. He saw a big, lion. He heard the roaring and looked up at the boy. The boy was scared, but the lion was very big and he ran away to see what it was going him. He was scared and he asked his mom to talk to him. She said that the lion was very big and he was scared and he asked the boy if it wasn't listening. The boy was so happy that he and his mom smiled, and he thanked the lion. From that day on, the boy was always obedient in the park. He never forgot the big green lion and they were happy and playing together in a jungle.</s> Once upon a time there was an man. He liked to run and play in his garden. One day, he went for a walk and he got very excited. He saw a big, beautiful carpet up high in the sky and it was a man. He smiled and watched the man for a while. Then if he could have it. He said yes and gave the man a hug. He ran back to the house and took out a leaf from the box. Then he put the prize in his garden with a big smile. He put the coins in his garden and smiled. He knew that he would keep it in his garden and he felt happy as no one else. The end.</s> John was out playing in the park when he suddenly spotted a swing. He ran to the swing and grabbed his mom. She had a big smile on her face. He was so surprised that he yelled, in a sn on his voice. He saw his mom, who just smiled and said, "It's okay, Mom</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['</s>', '▁together', '.', '▁They', '▁said', '▁goodbye', ',', '▁and', '▁then', '▁ran', '▁inside', '▁to', '▁play', '.', '▁The', '▁end']
33
+ tail_tokens: ['▁and', '▁shiny', '▁in', '▁the', '▁grass', '.', '▁It', '▁was', '▁', 'a', '▁diamond', '!', '▁He', '▁', 's', '</s>']
34
+ </s> together. They said goodbye, and then ran inside to play. The end.</s> Once upon a time there was a king. He lived in a house and loved to climb. He was always by the top and all the things he saw. One day he wanted to climb up to the top and he wanted to go. So he found a big hill and he started to reach the top. Once he was top he could see a big rock. He sat down and felt a hand in his mom's hand. It was very smelly and he smelled a harsh sound. He knew it was something and he decided to take a look. He looked down and saw that he saw the pile of rocks. The king smiled and he ran back to his mom. He was so happy he could have a big hug. He was so happy that he ran home to show mom all the wonderful things he found. He was so happy that he didn't forget the rock and he could stay there for the next day.</s> One day, Jack was cheerful. He was walking in the park and he came across a big box. He saw the box and wanted to get it, but it was too tall for him. He looked up and saw a swing, a slide and a tree. He put it all together and he made it to the tall tree. He climbed the tree and reached the box. He was so excited for the surprise. He ran and ran. When he reached a house, he opened the box and ran to the park. He saw the children playing and playing. It was a cheerful day. He watched the sky, and he smiled a smile on his face.</s> Once there was a very special te. It was the teddy bear in the world and loved to hug. One day, the teddy was sitting in the room and it bumped into its big box. The little girl was so excited and wanted to keep it. She went to the teddy's house, and knocked on the door. It had been there for a long time the little girl had found a surprise. The little girl was so happy and hugged the teddy. She was so excited to see it, she laughed and clapped. When she saw the surprise, she put it back on the teddy and a smile. She was glad that she had found such a special teddy bear again.</s> Once there was a bald man who wanted to have a big adventure. One day, he went to a big hill. He wanted to take a trip to the top. But he was scared because he was lost and he wanted to find a safe place to go. He ran and he walked to the hill, but the hill stayed up. Finally he saw a big hole in the bottom. He put the sign on a big rock, and used it to cover the hole. Then he waited for the sign, he did it and he was relieved. He smiled when he saw and he was proud of himself. He knew he had used the task and he was ready to go explore the world. He and the man smiled and they went on their way.</s> Once upon a time there was a boy named Bob. Bob was very brave, but he wanted to explore. One day Bob decided he wanted to go on an adventure. He grabbed a map from the beach. He went to the park to the park, and he didn't forget it. When Bob arrived, he saw a big slide. He was so happy! He ran to the slide down. He felt the sand and it was so big. He had a lot of fun playing in it. Then he made it back to the beach too. After a while, he decided he wanted to go for a walk. He looked like he could remember the toilet. It was his way, and he smiled. He kept walking, and sure enough he found a spot on the beach. It was the sign of the sign for him. He smiled, he smiled and then ran off to play.</s> Once upon a time there was a big green wolf. He liked to rock around in the park. One day, he wanted to take a walk. So he went to the park to rock. He sneezed and down the slide, but then he saw something green and shiny in the grass. It was a diamond! He s</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['</s>', '▁to', '▁her', '.', '▁She', '▁was', '▁hurt', '▁and', '▁couldn', "'", 't', '▁manage', '▁it', '.', '▁The', '▁girl']
37
+ tail_tokens: ['.', '▁She', '▁looked', '▁up', '▁and', '▁saw', '▁an', '▁old', '▁tree', '.', '▁It', '▁was', '▁', 'a', '▁big', '</s>']
38
+ </s> to her. She was hurt and couldn't manage it. The girl was sad and asked her mom for help. Mom said, "Don't cry. I can get the mixer again." The girl looked at the mixer. It was big and round and she smiled. She pushed it again and got the blouse. The girl smiled and hugged her mom. She was so happy to have such a strike.</s> Once there was a little boy who was very happy. He wanted to go and play and he had a sand bucket. He went down to the beach and looked at the sand beach. As he walked around, he saw something move in the water. He saw some sand in the sandand. He wanted to pick it up and he saw that the sand was empty! It was a big sand. The sand was empty and the little boy started playing in the sand. He laughed and sat at the beach with the sand. He even made it to move the sand. He carefully pushed the sand up in the sand! The little boy was very happy and swam around with his new sand. He felt so happy and content. Then he smiled, and made the sand again.</s> Once upon a time there was a brave little bear who lived in a big green hole in the ground. One day he decided to go and explore the forest. As he walked to a field, he saw a big tall tree. He could see it from a tree, and he also knew he had to climb up the tree. He ran up to the branch and started to climb the tree, but he was too brave! He stayed up the tree and he knew he could jump on the branch. He tried to climb up the tree, but he couldn't do it. He was sad and he had to stay. He was happy that he could do it, so he could take a rest and enjoy his walk. He walked for a while, he came to a big pond and started to climb it with his courage. As he was climbed the top, he looked at all of all the things he saw. He saw lots of birds, like a frog, some fish, and some even made a wet sand. With a lot of patience, he went home with a smile and a smile. He was happy and had a lot of fun in the forest.</s> Once upon a time there was a big, white rabbit. He was so small and round. He liked to play in the garden, but it was very perfect. One day he wanted to take his carrot to the garden. He saw the carrot and he wanted to go to the garden. So he hopped over to the carrot, and hopped inside. But the rabbit was very sad. He didn't see anything. But then, a little girl came to the garden. She saw the rabbit and she ran over to him. She smiled at the rabbit and was so happy to have found the carrot! She gave the rabbit a big smile, as long as he could. She hopped out of the garden, with the carrot and hopped around the garden. The rabbit was so happy with his carrot and smiled. They had the perfect day!</s> Once upon a time, there was a boy. He had a wallet that he liked and he was a little boy. One day he wanted to play in the woods. He wanted to keep his luck. So he hopped on his bed and took a nice deep breath. The little boy saw a beautiful lake. He looked down and he saw a little bird that was coming from the lake. It was looking around for a while. The little boy couldn't help but knew he had to help. He picked it up with him and tied it to the bottom of a tree. After a few minutes he had found it. He was happy and said that he could keep the bird in a safe place. The little boy thanked his mom, and went to take a nap. He had a nice day at the park, and he kept his wallet safe.</s> Once upon a time, there was a little girl. She had a special object that glowed in the ground. It was a big tree with some leaves. But every day, she was feeling a little bit scared. One day, she heard a loud noise outside. She looked up and saw an old tree. It was a big</s>
LTA_openwebtext_dualt/mini_owt_logdirichlet/samples/tinystories_t5_len1024_d768_8gpu_step15000_decode128_quick_n8/first8.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ checkpoint=runs/tinystories_t5_logdirichlet_len1024_C1_to_64_d768_l12_h12_gbs512_8gpu_40k_20260527_121803/step_015000.pt
2
+ step=15000
3
+ decode=dualline_time_aligned_dirichlet_final_state
4
+ c_min=1.0 c_max=64.0
5
+ steps=128 temp=1.45 bridge_power=1.0 temp0=0.0
6
+ bos=1:</s> eos=1:</s>
7
+ ===== sample 0 =====
8
+ head_tokens: ['</s>', '.', '▁They', '▁learned', '▁to', '▁be', '▁kind', '.', '</s>', '▁Tom', '▁and', '▁Anna', '▁were', '▁friends', '.', '▁They']
9
+ tail_tokens: ['▁opened', '▁the', '▁door', '▁and', '▁looked', '▁at', '▁the', '▁bed', '.', '▁She', '▁said', ',', '▁"', 'We', '▁are', '</s>']
10
+ </s>. They learned to be kind.</s> Tom and Anna were friends. They liked to play with their toys in their room. They had many toys and dolls and books. They were very happy. One day, mom told them that there was a big park with flowers and birds. They were excited to go to the park. They saw a man and a sa slide and a car. Mom came to see them. She said it was a surprise. They reached the park. They saw a table with a swing and a table. There was a pond on the ground. Mom said it was not a toy for the park. She said they had to take it. Tom and Anna went to the car. They saw mom. They liked the pond. They wanted to eat it. Tom said, "Mom, I want something to eat. It's not nice. It's not just a toy. It's not a car. It's mine. It's the car." Anna saw the car. It was big and soft. She said, "Is it a toy, Tom, please?" Tom said, "No, no, no. You can't have it. I want a hat." Mom said, "OK, Tom. You can have it. But you have to be careful. It's not a real hat. It's your food. You can't wear the hat." Tom and Anna took the car and hat. They said goodbye to the car and went to the sand. They saw a big truck and a lot of animals. They said, "Mommy, look at the pond! It was yummy!" Mom smiled and smiled. She said, "That's nice. Come on, let's have fun!" Tom and Anna cheered. They loved their mom. They did not like the car. They were happy.</s> Lily liked to play with her dolls. She had many dolls but her favorite was a doll. Lily liked to make dolls with her toys. She had a big room with many toys. She had a box of toys and a car. One day, Lily wanted to make a doll with a doll. She found a doll. She put it on her bed and started to play. She wanted to make a doll. She looked around and saw a doll on the floor. She made noises and claws and paper. She felt happy and pretty. She did not listen to her mom. She wanted to play with the toys. Her mom saw her, and she was not happy. She saw Lily and came over to play. She said to Lily, "Lily, this is a doll. It is fun. Do you want to make a doll?" Lily said, "No, I want to make it. I want to make the dolls and the car. They have a big ball, and a doll. We have a doll. We make a tea party with the doll." Lily looked at her mom and said, "I want to be the doll, mom. But it is big and t. It is hard. I can't make a mess." Her mom smiled and said, "That's wonderful, Lily. I'm glad you want to help. You can make another things. But you have to share the car and your toys. And you can make a doll with your car and your dolls. Would you like that?" Lily nodded and smiled. She liked mom. Then, they went to the kitchen and made up. Lily liked the doll and put in a big shirt. She liked the doll and the car. She found cars, dolls, and stickers. She saw her mom and she had a lot of things. They also had a room for her. They had a big bed, a cloth and a rug. They found the chairs and the chairs. They put them on a table. They said, "Look, mom's coming. They are very pretty." Lily and Max wanted to play. They looked at the pictures and the pictures. They saw trees and the clouds and the pictures. They saw the flowers, the sky, the trees, and the flowers. They saw the hats and bells. They saw the swings and the sky. They saw the swings, the dog, and the cat. Lily ran to the mom and hugged her mom. She smiled and waved. Mom saw her smiled and smiled. She saw that Lily and Max liked the couch. She opened the door and looked at the bed. She said, "We are</s>
11
+ ===== sample 1 =====
12
+ head_tokens: ['</s>', '▁', 'he', '▁said', '.', '▁"', 'I', 't', "'", 's', '▁not', '▁', 'a', '▁park', ',', '▁it']
13
+ tail_tokens: ['▁said', ',', '▁"', 'Yes', ',', '▁it', '▁is', '!"', '</s>', '▁Once', '▁upon', '▁', 'a', '▁time', ',', '</s>']
14
+ </s> he said. "It's not a park, it's fun." Tommy shook his head. "I don't want to go in it, Tommy," she said. "It's not a nice day in the park. We can make it all out of the park first." Tommy thought for a moment, then smiled. "It's okay," he said. "I'll try to make it out!" They both smiled and they started to make the park in the park. The sun was shining, and he was so happy. He had made a new friend and enjoyed playing in the park. The day was fun, and the park was right.</s> Once upon a time there was a happy little girl named Lucy. She had a big smile, every day she went outside to play in a big park. One day, she was playing and she noticed something strange. She was so excited! She wanted to know what it was. When she looked around, she saw a big tree on the ground. She picked it up with a smile, and held it tightly in her hand. She put it on and started to run around the tree. Suddenly, the tree started to get a hug. She felt so happy and excited. Suddenly, Lucy's mom came to her and saw what she saw the tree. She looked around, and spotted the toy she had found. She smiled and said it was okay. She looked at the tree and smiled. She was so happy to be found and she went back to playing with her toys. She was so happy that the toy had found in the big tree.</s> One day, a little girl and the girl went for a sand. She wanted to have fun, so she looked around and around for a do. Suddenly, she spotted a ball on the ground. She picked it up and started to poke it. It made a loud noise. She made a lot of noise, and she was so scared that she ran away. Then, she saw a big tree. She started to play with the ball, and she wanted to make it again. When she got back to the tree, the girl found a toy. She couldn't reach it but she was very excited. She ran around and looked for the toy. The girl then caught the toy with a big can. Finally, the girl was safe. She shook it and picked it up. The girl was happy and smiled at her mom. She was happy to have made a new toy!</s> Once upon a time, there was a little girl named Melissa. She was a very curious and loved to try new things. One day, she saw a big park with lots of animals. She was so excited, so she asked her mom if she could go. Her mom said yes, so they went to the park. They saw lots of nice things, birds, and music. They looked very nice, but when they got to the big one. It was a big one. She asked her mom to help her with the big one. So, they took it inside. As they walked away, she started to scream. She saw a big bear and started to hug it. The bear started to jump, and she screamed. She saw the big bear and it ran away. She was so scared that she screamed. She ran back to her mom and dad. The end.</s> Once upon a time there was a nice little girl named Amy. She was just three years old. She wanted to go to the park and have a new day. It was a beautiful day, Amy and her mom went to the park and wanted to get a ball, and a ball. As she was looking around, she saw something else in the grass. It was a bird! She was excited to find out what it was, so she asked her mom, "Mom, what's that in the sky?" Mom said, "It is a bird, and it's so beautiful, but it has to make something special." Amy thought for a moment and said, "I want to use it to make a pretty bird!" She said, "OK, mom!" Her mom said, "You can use it to make the bird." Amy and her mom found a spot in the park, and put the bird on the ground. But when they opened the box, they saw a beautiful bird with a beautiful nest, and a tree. Amy smiled, and said, "Mom, that bird is the same and beautiful!" Her mom smiled and said, "Yes, it is!"</s> Once upon a time,</s>
15
+ ===== sample 2 =====
16
+ head_tokens: ['</s>', '▁and', '▁it', '▁was', '▁making', '▁him', '.', '▁The', '▁little', '▁boy', '▁was', '▁very', '▁sad', '.', '▁He', '▁knew']
17
+ tail_tokens: ['▁not', '▁get', '▁the', '▁items', '.', '▁They', '▁looked', '▁around', '▁the', '▁garden', ',', '▁and', '▁it', '▁was', '▁very', '</s>']
18
+ </s> and it was making him. The little boy was very sad. He knew he couldn't play with his toy. So, he grabbed a cup of water. He put some water on his toy car. Then he put it in the water and it made the boy happy. He was so excited to play with his toy car. He had made a yummy noise and the time he did it. He was very happy and he was glad he could make a new friend.</s> Once upon a time there was a little boy. He was a very happy boy and he loved to run. Every day, he went to the garden. One day he smelled something in the air. When he arrived he saw that the ground was a big pond. So he goed to get closer. He picked up the worm and bit into it. He felt a big body in his little hands. He was very happy and he looked around in the sun. Suddenly he heard a noise. It was coming from a big tree. He knew it was time to go home. But he was scared and he started walking back. Suddenly, he saw a big fat rabbit. He was happy and he chased the rabbit. He had made it back to the pond and then he saw his worm in the grass. He was very happy that he had found it and he laughed. Then he went back to playing.</s> Once upon a time there was a very tall man. He went on a walk in the park and he wanted to find something special. He started to run, but he was lost in the car. He was very sad and scared, but it was dark. The man started to cry, but he was scared. He was sad he did not give up. Then, he saw something shiny in the grass. It looked like a big car. Then he grabbed it, and he ran home. But when he got home, he saw that it was not a bad car. The man asked the car where it was, and the car said it was a car. So, they made a big box to find the car, so they found it, and put it back in the car. The man was happy, and so the man was very happy. It was okay, and they made a bed for the car. They threw it around and smiled and laughed. The man was so happy that he had found the special car. He was glad he had found the car, and he had made a new friend.</s> Once upon a time there was a little man. He was a very happy boy, and he was in a tree. He was sad, and wanted to be fun. Suddenly, a big bird came out of the tree,. The bird was a bird, and it was flying up to the tree. The little man was not scared, he was scared, but he was too scared to play with him. He ran to the bird, but he was too scared. Suddenly, he fell down, and it started to feel sad. The little man thought, and then he had an idea. He found a big rock and put it on top of the bird. He grabbed it with his hands, and it was a little bird! He jumped in, and dropped the bird. The little man was so happy, he was able. He thanked the bird, and ran around his play. The little man never went back to the tree, and he was still scared of the bird. He was happy to have fun!</s> Once upon a time, there were two friends. One was weak, and the other friend wanted to reach the top of a big mountain. So, the friends saw a mountain, and they wanted to see the top in their tree. They were happy and not together, but they had an idea. They decided to make a way to make the mountain. The two friends got together, and they made a plan. They put the pieces in the one, and made it to the top of the mountain. They were so happy with their work, and they loved it. They knew that it was made of friends, and they decided to look for it. The end.</s> Once upon a time there were two friends, Jack, and Lucy. They had a special event that day. They wanted to get their things to a big mountain. Jack said it was very nice, so they had to stay. But when they got there, they saw that they could not get the items. They looked around the garden, and it was very</s>
19
+ ===== sample 3 =====
20
+ head_tokens: ['</s>', '▁it', ',', '▁and', '▁', 'he', '▁was', '▁happy', '▁to', '▁have', '▁it', '.', '▁It', '▁was', '▁lots', '▁of']
21
+ tail_tokens: ['▁and', '▁', 's', 'hook', '▁it', '▁in', '.', '▁He', '▁smell', 'e', 'd', '▁the', '▁sweet', 's', '▁and', '</s>']
22
+ </s> it, and he was happy to have it. It was lots of fun things he had made it, and now, he was never able to be a very happy day. He went back to the park, and said goodbye to the old man. He had a new day, and it was great.</s> It was a good day for Jack. He had been here for a special day. It was a nice day and he wanted to go down to the swings. When he got to the park, he had a big smile on his face. It was a song, and he liked it. He laughed and laughed, until his mom said it was time to go home. When they came back, Jack saw a big bird on the ground. Mom said it was time to take a look, and he wanted to keep the bird safe. So they found a bird and put it back in the park. Jack was very careful that he came back to the nice park. He was happy to have the bird and he was happy to have it. Every day he looked up to the park and saw a lot of nice things. He smiled, and he couldn't wait to show his mom.</s> One day, John's mom asked him to get a new one. It was a big box and a red lip. He was very happy. John went to the store and he saw something special in the store. It was a red car in the corner. He put it on, and then he saw it. He liked it, and he wanted to ride the car. He smiled and put a face on it. He put his hand in the car and it went to the store. He was so fast. He laughed and smiled as he put the car in his box. He took it out and put it in his car. He was so happy. He looked at the car every day.</s> Once there was a boy named Tom. He had a big room and he was very happy. One day, he had an idea. He wanted to clean the room. He found a way to get it, but he didn't want to make a new one. He was about to clean the room. Suddenly, he had an idea. He wanted to be happy to help. He ran to his room to get a big one. He sat down and started to clean the room. He put it on and tried to make it. And it worked! Tom was happy and raced around the room. He had had a nice day and he was proud of his new room.</s> John was in the park. He wanted to play with his dolls, so he found a big car on the ground. He carefully pushed it with a stick. Then he made a toy car. It was a big car. He played with it, and ran around in the grass. He was happy to have a car. But then, it started to go down. It was hard and long, he had to close his car. But he was so excited that he couldn't stop. The car started to move. He was so happy with his car, he ran inside to his mom and hugged her. He was so happy that he had pushed the car and he ran to find his mom. The end.</s> Once upon a time, there were two friends. It was very big and it was one with a big fish. Every day they would go to the ocean. The fish had a water, and it was very beautiful. They wanted to hold the water, but it was too fast. The fish saw it and they wanted to play with it. One day, they had all lots of fun to do. They decided to have a big jump. They swam and jumped in the water. They liked to see it and watch the sound of the whale. It was so much fun! They swam in the water, playing for a long time. They laughed and played together with their rocks. At the end of the day they decided it was time to go home. They were very happy and hugged each other. They were so happy, and it was a fish, it was a fun one. They had had the good day, and they were happy for each other.</s> Tim and his mommy went to a store. They were going to get a new cart! Tim was so excited to see all the new toys. He said goodbye to his mommy as soon as he could, he felt the bottles in his hands. He saw the port in the store and shook it in. He smelled the sweets and</s>
23
+ ===== sample 4 =====
24
+ head_tokens: ['</s>', '▁saw', '▁', 'a', '▁', 'pond', '▁with', '▁', 'a', '▁', 'pond', '▁and', '▁thought', '▁it', '▁was', '▁']
25
+ tail_tokens: ['▁and', '▁play', '.', '▁He', '▁put', '▁on', '▁his', '▁red', '▁uniform', '▁and', '▁ran', '▁outside', '▁with', '▁', 'a', '</s>']
26
+ </s> saw a pond with a pond and thought it was a beautiful pond. She asked her mom, "Can I play in it?" Her mom said, "Yes, but be careful when you go into the pond!" Lucy was so excited she jumped in the pond, and it was so fun. She jumped in, laughed and laughed, the pond started to get dark. Lucy was sad. She wanted to play in the pond and the pond, but she was happy to see the pond again. She said goodbye to her mom and went home with a smile on her face.</s> Once upon a time there was a little girl named Lucy. She loved to slide. It was so big that she couldn't go very high. She was happy and proud of it. One day she had a plan to go to the park. She wanted to go to the park with her family. When she got to the park, she jumped on the swing and sat down. She felt happy and took a long time. Then, she sat on the slide, and looked around. She felt so happy. She jumped on the swings and went down the slide again. It was so much fun! Lucy went up to the slide and had a great time. She looked up at the sky and she smiled. She saw the slide down and it was the best day of her. She had a great day and was so happy. She had lots of new friends and thought about all the fun things she had seen. It was the end of the day in the park!</s> Once upon a time there was a a little girl named Lucy. Every day, she would go to the park and play with her friends. One day, she was going to the park and looked for something special. When she got to the park, she started to search in the grass. She looked and looked, to find out what it was. All around the park, she saw a big tree with a bright open bow on the top. She ran to the tree and saw that it was a big box with a little key inside. She thought it was so exciting! She went to the box and found a lock on the box. Inside, she had found a lot of toys. She and her friends had toys and treats. Lucy had so much fun playing with them. She was so happy and had a great time at the park. She thanked the park with a smile on her face.</s> John was a small boy who liked to poke things. He found a big box and couldn't believe his eyes. He opened it and took out a vest. He thought it was a card and he put it on. He asked his mom, what should I do?" His mom said, "You need to find something to keep it." John looked around the room and saw something else. He started to cry. He said, "Look! I found a vest!" His mom smiled and said, "That's nice, John. We don't have to put it up. You don't have a while and I will take it away." John went to his room and took out the root. He put it in a box and put it on. He was so happy he had the courage and was able to find it. He smiled and thanked his mom. He had found the pit he had kept the memory with her.</s> Once upon a time, there was a little boy who loved to play with his toys, and he loved to play with his toys. One day, he was playing with his lost toy in his garden. He looked everywhere, but it was not there. He felt like he was trying to find it, but he couldn't find it. Suddenly, he came to a park and he saw a big tree. He ran up, and saw it was full of toys. He was so excited that he found the car. He was so happy, he wanted to play with it. He started to run around the tree, and when he was near the tree, he saw something shiny. It was a car! He was so happy. He had wanted the toy car with him, and he grabbed it away. So, the little boy looked around for a car. He wanted to play with a car, so he took the car and put it in the ground. Every time he played, he remembered that he was still there, and he was very happy. The end.</s> Once upon a time, there was a little boy named Timmy. He was very excited to go outside and play. He put on his red uniform and ran outside with a</s>
27
+ ===== sample 5 =====
28
+ head_tokens: ['</s>', '.', '▁Mom', '▁said', ',', '▁"', 'We', '▁can', "'", 't', '▁do', '▁it', '.', '▁It', '▁will', '▁be']
29
+ tail_tokens: ['!"', '▁', 'he', '▁said', '▁excited', 'ly', '.', '▁"', 'Com', 'e', '▁on', ',', '▁Tim', '!"', '▁Mom', '</s>']
30
+ </s>. Mom said, "We can't do it. It will be fun, it's too much fun." Jack smiled and said, "No Mommy. We can't go today. We can't, but it's too high." They left the park and Jack was sad. He started to walk around the park, and found a little view of the park. He was so excited to explore the park and he wanted to play with it again.</s> One day, a little boy was walking near the pond. He was curious, and he found it was a big boat. He was happy to see the boat, but he was very scared. Suddenly, he saw it. He wanted to explore, but he was curious, so he started walking. When he got to the boat, he saw lots of new things, and he felt excited and happy. Suddenly, he saw a big roar. The boy was very scared, so he decided to swim to the boat. When he got to the boat, he was so scared, he didn't know what the boat was. Suddenly, he saw it, and he decided to throw it to a big rock. He swam to the top and saw the boat. He was so happy and when he was there, he started to explore the boat all day. He was so happy to be there, and even when it was time to go back home.</s> It was a nice day, and he was very excited. He was invited a new day to go to the park today. When John got there, he saw lots of people and playing. He was so happy to see them there, because he was going to the park. When he saw the park, he wanted to be there. He raced to the other side of the park and started to play. He saw lots of balls, swings, and a toes. He was having so much fun! John's mom came in and said it was time to leave. He was sad but he started to stay. He was happy to find the nice spot to go. He walked back to the park, and started to play. When he arrived, he saw a surprise. It was a big red car on it. John was surprised, and he couldn't believe it. When he reached the park, he found a room full of fun and toys! He was so happy he had had a fun day at the park and went to play. It was a perfect day at the park!</s> Once there was a little boy who had an arrow. He was very curious and liked to explore the s around him. One day, he was playing a big garden. He saw an arrow in the garden and he wanted to catch it. He ran to the tree, picked it up and put it in a safe place. He put a jar in the mitten and put it in the garden. He made a little house to keep it. But when he saw the ant, he shouted! The little boy was very sad and said, "Mommy, can I have the arrow?" Mom smiled and said, "It's okay. Now, let's get it inside and keep it safe in the garden." The little boy was happy with the mitten. He was happy to have a new rag.</s> Once upon a time, there was a boy named Tom. He liked to play in the sand and have a nice day. One day, a big fish came up and saw Tom in the water. Tom wanted to help the pond. He grabbed a bucket and put it in the water. Tom used a spoon to clean the boat and put the fish in the water. He took a big l and wiped it away. The pond got better, and Tom was happy. In the end, Tom had had a great day. He thanked the big fish for helping him clean the day.</s> Once upon a time, there was a little boy named Tim. He wanted to play with his toy, so he went to his mom. "Hi!" Tim said. "What are?" Mom asked. "I'm looking for a special toy. It's time for a party!" Tim was so excited and he started to play. He found a big car, a doll and a train. When it was time to go home, Tim was so happy, he had a to his mom. "I'm here!" he said excitedly. "Come on, Tim!" Mom</s>
31
+ ===== sample 6 =====
32
+ head_tokens: ['</s>', '▁boy', '▁named', '▁Max', '.', '▁He', '▁lived', '▁in', '▁', 'a', '▁big', '▁house', '▁with', '▁lots', '▁of', '▁']
33
+ tail_tokens: ['▁said', ',', '▁"', 'Thank', '▁you', ',', '▁mom', 'my', '!', '▁I', '▁can', "'", 't', '▁help', '▁my', '</s>']
34
+ </s> boy named Max. He lived in a big house with lots of sand it. He liked to go on adventures with his mom and dad. One day, Max wanted to do the same, but it was hard. He was very proud of the big things and he had to do it on his own. He thought and he was a very nice boy. So, he started to work. When he finished, he had a big surprise. It was for a big box. He opened it and when he saw the box, it was something big that made him scared. He didn't know what to do. Suddenly, a man with a smile. He had a big one - he had managed to test the box. Max was so happy. He made a big puzzle and ate it. He was so excited. He had to make a puzzle. He was so proud of all of his imagination. The end.</s> Once upon a time there was a little boy. He went for a walk and saw in the park. The sky was very dull and very blue. The little boy saw his father and said, "It's so pretty! What's that one?" The father smiled and said, "I don't think it's beautiful, it's a ball!" The father took out a big ball and the little boy sat down. He said, "Please can I help it?" The father smiled and said, "Yes, you can help!" He gave the ball a little nest for a little bird. The bird was so happy and felt much better. The little boy said, "Thank you, it was so nice to see it in the sky." The father smiled and said, "That's very pretty. Let's go home now." The little boy and his father walked back home, and the sky was beautiful.</s> Once upon a time there was a little bunny. He was very happy and very strong. One day he decided to go. He started to eat. He hopped and ran until he came to a place. He looked down and saw that the ground was very dark. When he got there, he was very scared. He didn't want to get there. He was very scared, so he started to eat. Suddenly he heard a loud noise. It was a fox coming! He wanted to help, so he tried to do it. But as he jumped down, the fox got scared and ran away. The bunny was scared and he started to sp in his home. But it was too late. He had to go to the fox, but he was too far away. He was very sad. He was very sad.</s> Once upon a time, there was a little boy named Tim. He had a nice smile and he loved playing with his toys. One day, he was playing and he started to rain. It was very sad and it started to rain. Tim decided to go get back to the house. But when he looked outside, he saw that he was up in a tree. He was sad to see the tree, but he couldn't be scared. He thought it was hard, but he was brave and he went back to the house. He looked around and found a big tree. As he started to tree, he saw the leaves playing in the tree. When he was done, he went back to his house with a big smile. He knew that he had made the tree. When he got home, he was so happy to be home. He smiled at his dad and smiled.</s> Once upon a time, there was a little girl named Lucy. She was a very happy little girl, and she loved to play. One day, she asked her mom if she could have some fun. Mom went to the store with her, and she was so excited! She went to the store and looked at it. She saw the best, it had many colors. She didn't know if she wore it. So, she went to the store and found the balloon. It was hard and it was so pretty. She had done it. She was proud of herself. Lucy was so happy. She couldn't wait to keep the balloon all for a while. The next day, she found a box under a tree. She came back to the park with the balloon, and she found it and started to put it on. Lucy's mom was so happy, she wanted to see Lucy that she was proud. She said, "Thank you, mommy! I can't help my</s>
35
+ ===== sample 7 =====
36
+ head_tokens: ['</s>', 's', 'at', '▁down', '.', '▁Jack', '▁smile', 'd', '.', '▁He', '▁had', '▁', 'a', '▁really', '▁nice', '▁time']
37
+ tail_tokens: ['▁hug', '▁and', '▁she', '▁was', '▁so', '▁proud', '▁of', '▁her', '.', '▁She', '▁was', '▁so', '▁proud', '▁of', '▁her', '</s>']
38
+ </s>sat down. Jack smiled. He had a really nice time at the top of the tree, and he had enjoyed it.</s> Once upon a time there was a little girl. She was three years old. She liked to go to the park with her mom. One day, they went for a park. The girl started to explore the park. She was so excited. It was such a fun! But, when she reached the park, she saw the park. It was very tall. She could see some things. There was a big tree to climb the park. The girl knew she had to climb the tree. She found a stick and a way to climb the tree. So she found a little stick. Then, she saw a big tree and ran to it. She knew that it would be useful and she wanted it to be fun. So, she climbed up the tree and started to touch it. Finally, the girl felt a little better. She looked around and saw that it was a park. It was a lot of toys, and the little girl wanted to play. The girl laughed and started to play. Suddenly, she felt happy. It was a nice day and the girl went for a while. It was a fun day and the little girl had enjoyed the park so much.</s> There was a small boy who liked to play. One day he wanted to go and explore the park. He went to the park and he saw a big park. The boy was very happy. He was so excited he started to jump and jump. He saw a big tall tree and he sat down to get it. The sun was very bright and the boy started to smile. He stayed there for a while and enjoyed the moment. He smiled and said goodbye. He made his way to the park and he had lots of fun. He was so happy and excited, and he smiled and ran around. When he got home, he saw the park in the park.</s> Once there was a little boy. He was playing outside in the garden and saw something big. It was a blue toy. It was a toy car. He wanted to play with it. He asked his mom again, "Mom, can I have the car?" His mom smiled and said, "Yes you can play with it. It will be very happy." The little boy took the toy car and put it in the garden. He thought it was very pretty. He looked at the car and smiled. He tried to fix it but it was too hard. The little boy was sad. He said, "I lost it!" Suddenly, his mom said, "It's okay. You made it!" The little boy was so happy, he started to play. He had a smile and he said, "I love my car!"</s> One day, a boy and his parents. They were going to a beach. It was full of fun things, and they went to the beach. Soon, the sun started to rain and the sea got stuck in the water. The little boy screamed and he said, "I'm a toy, Dad!" Dad said, "We can't have it!" But the little boy was sad. He didn't want to play with it, so he said, "I'm sorry, I can't have it." He was sad, but he decided to go home. He left the car and went home. The next day, a new toy was still there, and the boy was happy again.</s> John was playing in the living room. Mommy was carrying a big smile. She saw the flower in a piece of paper, and smiled. "You can have it," she said. She was so happy. She gave him a big hug. "Yay, Daddy!" she said with a big smile too. Then, Mommy and John's Mommy walked to the store. She had a big smile, and she was a bit scared. "It's time to go home," she said. She put on the hand and picked it up. It looked so good. Mommy smiled and walked to the store. She saw the big smile on, and it looked so pretty. She wanted to take it home with her. "Let's take it home," she said. John smiled and put the flower in a vase. He watched as Mommy put the flower back to the store. She was very happy. When they got, Mommy was so excited to see her new flower. She had a big hug and she was so proud of her. She was so proud of her</s>
LTA_openwebtext_dualt/scripts/_tmp_trace_lta_prompt_decode.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+
12
+ REPO_ROOT = Path(__file__).resolve().parents[1]
13
+ if str(REPO_ROOT) not in sys.path:
14
+ sys.path.insert(0, str(REPO_ROOT))
15
+
16
+ from eval import build_model_from_ckpt
17
+ from flowtext_lab.bridges import smooth_onehot
18
+ from flowtext_lab.decode import model_time_for_step, sample_noise_simplex, state_for_model
19
+ from flowtext_lab.tokenization import BpeTextTokenizer
20
+ from scripts.flowtext_decode_lab import DecodeConfig, decode_text, flowmap_gamma
21
+
22
+
23
+ def parse_args() -> argparse.Namespace:
24
+ p = argparse.ArgumentParser()
25
+ p.add_argument("--checkpoint", required=True)
26
+ p.add_argument("--tokenizer_path", required=True)
27
+ p.add_argument("--output", required=True)
28
+ p.add_argument("--prompt", required=True)
29
+ p.add_argument("--candidate_index", type=int, required=True)
30
+ p.add_argument("--max_len", type=int, default=128)
31
+ p.add_argument("--steps", type=int, default=128)
32
+ p.add_argument("--seed", type=int, default=20260502)
33
+ p.add_argument("--target_prob", type=float, default=1.0)
34
+ p.add_argument("--endpoint_temp", type=float, default=1.4)
35
+ p.add_argument("--damping", type=float, default=1.0)
36
+ p.add_argument("--max_gamma", type=float, default=1.0)
37
+ p.add_argument("--final_from", choices=["state", "endpoint", "blend"], default="state")
38
+ p.add_argument("--eps", type=float, default=1e-8)
39
+ return p.parse_args()
40
+
41
+
42
+ def encode_prefix(tokenizer: BpeTextTokenizer, prompt: str, max_len: int) -> list[int]:
43
+ core = list(tokenizer.tokenizer.encode(prompt, add_special_tokens=False).ids)
44
+ bos = tokenizer.bos_id
45
+ ids = ([bos] if bos is not None and bos >= 0 else []) + core
46
+ return ids[:max_len]
47
+
48
+
49
+ @torch.no_grad()
50
+ def main() -> None:
51
+ args = parse_args()
52
+ torch.manual_seed(args.seed)
53
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
54
+ tokenizer = BpeTextTokenizer.from_file(args.tokenizer_path)
55
+ ckpt = torch.load(args.checkpoint, map_location="cpu")
56
+ model = build_model_from_ckpt(ckpt, tokenizer.vocab_size, args.max_len, device)
57
+ model.eval()
58
+
59
+ init = sample_noise_simplex(
60
+ (args.candidate_index + 1, args.max_len),
61
+ tokenizer.vocab_size,
62
+ device,
63
+ args.eps,
64
+ noise_mode="dirichlet",
65
+ target_prob=args.target_prob,
66
+ noise_sigma=-1.0,
67
+ dirichlet_concentration=1.0,
68
+ )[-1:].float()
69
+ attn = torch.ones((1, args.max_len), dtype=torch.bool, device=device)
70
+
71
+ prompt_ids = encode_prefix(tokenizer, args.prompt, args.max_len)
72
+ lock = torch.zeros((1, args.max_len), dtype=torch.bool, device=device)
73
+ lock_probs = torch.zeros((1, args.max_len, tokenizer.vocab_size), dtype=torch.float32, device=device)
74
+ if prompt_ids:
75
+ ids_t = torch.tensor(prompt_ids, dtype=torch.long, device=device).unsqueeze(0)
76
+ sp = smooth_onehot(ids_t, tokenizer.vocab_size, args.target_prob, args.eps)[0]
77
+ init[0, : len(prompt_ids)] = sp
78
+ lock_probs[0, : len(prompt_ids)] = sp
79
+ lock[0, : len(prompt_ids)] = True
80
+
81
+ probs = init.clone()
82
+ last_endpoint = probs
83
+ records = []
84
+ cfg = DecodeConfig(
85
+ label="trace",
86
+ rule="flowmap",
87
+ steps=args.steps,
88
+ model_t_mode="flow",
89
+ damping=args.damping,
90
+ max_gamma=args.max_gamma,
91
+ endpoint_temp=args.endpoint_temp,
92
+ final_from=args.final_from,
93
+ )
94
+
95
+ for step in range(args.steps):
96
+ t = model_time_for_step(cfg.model_t_mode, step, cfg.steps, 1, device, dtype=torch.float32)
97
+ logits = model(state_for_model(model, probs, args.eps), t, attn).float()
98
+ logits = logits / float(cfg.endpoint_temp)
99
+ endpoint = F.softmax(logits, dim=-1)
100
+ last_endpoint = endpoint
101
+ gamma = flowmap_gamma(step, cfg.steps, cfg.damping, cfg.max_gamma, args.eps)
102
+ new_probs = probs + gamma * (endpoint - probs)
103
+ new_probs = new_probs.clamp_min(args.eps)
104
+ new_probs = new_probs / new_probs.sum(dim=-1, keepdim=True).clamp_min(args.eps)
105
+ probs = torch.where(lock.unsqueeze(-1), lock_probs, new_probs)
106
+
107
+ state_top_prob, state_ids = probs[0].max(dim=-1)
108
+ state_entropy = -(probs[0].clamp_min(args.eps) * probs[0].clamp_min(args.eps).log()).sum(dim=-1)
109
+ endpoint_top_prob, endpoint_ids = endpoint[0].max(dim=-1)
110
+ records.append(
111
+ {
112
+ "step": step,
113
+ "gamma": gamma,
114
+ "model_t": float(t.item()),
115
+ "text_prefix": decode_text(tokenizer, state_ids[:64].detach().cpu().tolist()),
116
+ "positions": [
117
+ {
118
+ "pos": pos,
119
+ "state_token": tokenizer.decode([int(state_ids[pos].item())], stop_at_eos=False, skip_special_tokens=False),
120
+ "state_id": int(state_ids[pos].item()),
121
+ "state_top_p": float(state_top_prob[pos].item()),
122
+ "state_entropy": float(state_entropy[pos].item()),
123
+ "endpoint_token": tokenizer.decode([int(endpoint_ids[pos].item())], stop_at_eos=False, skip_special_tokens=False),
124
+ "endpoint_id": int(endpoint_ids[pos].item()),
125
+ "endpoint_top_p": float(endpoint_top_prob[pos].item()),
126
+ }
127
+ for pos in range(args.max_len)
128
+ ],
129
+ }
130
+ )
131
+
132
+ if args.final_from == "endpoint":
133
+ final_dist = torch.where(lock.unsqueeze(-1), lock_probs, last_endpoint)
134
+ elif args.final_from == "blend":
135
+ final_dist = torch.where(lock.unsqueeze(-1), lock_probs, 0.5 * probs + 0.5 * last_endpoint)
136
+ else:
137
+ final_dist = probs
138
+ final_dist = final_dist / final_dist.sum(dim=-1, keepdim=True).clamp_min(args.eps)
139
+ final_ids = final_dist[0].argmax(dim=-1).detach().cpu().tolist()
140
+ final_text = decode_text(tokenizer, final_ids)
141
+
142
+ payload = {
143
+ "checkpoint": args.checkpoint,
144
+ "seed": args.seed,
145
+ "prompt": args.prompt,
146
+ "candidate_index": args.candidate_index,
147
+ "steps": args.steps,
148
+ "endpoint_temp": args.endpoint_temp,
149
+ "damping": args.damping,
150
+ "max_gamma": args.max_gamma,
151
+ "final_from": args.final_from,
152
+ "prompt_ids": prompt_ids,
153
+ "final_ids": final_ids,
154
+ "final_text": final_text,
155
+ "records": records,
156
+ }
157
+ out = Path(args.output)
158
+ out.parent.mkdir(parents=True, exist_ok=True)
159
+ out.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
160
+ print(json.dumps({"output": str(out), "final_text": final_text}, ensure_ascii=False, indent=2))
161
+
162
+
163
+ if __name__ == "__main__":
164
+ main()
LTA_openwebtext_dualt/scripts/build_lta_owt_compact_gpt2bpe_packed_train_minus_100k_np8.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
5
+
6
+ export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
7
+ export TOKENIZERS_PARALLELISM=false
8
+ export PYTHONUNBUFFERED=1
9
+
10
+ DATA_PATH="${DATA_PATH:-/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext}"
11
+ TOKENIZER_ROOT="${TOKENIZER_ROOT:-/e2e-data/evad-tech-vla/wanghan58/models/lta_tokenizers}"
12
+ OUTPUT_ROOT="${OUTPUT_ROOT:-/e2e-data/evad-tech-vla/wanghan58/data/embedded-language-flows}"
13
+ CACHE_ROOT="${CACHE_ROOT:-/e2e-data/evad-tech-vla/wanghan58/data/tmp/lta_owt_compact_gpt2bpe_cache}"
14
+ LOG_DIR="${LOG_DIR:-logs/data_build_compact_gpt2bpe}"
15
+ VOCAB_SIZES="${VOCAB_SIZES:-2048,4096,8192}"
16
+ NUM_PROC="${NUM_PROC:-8}"
17
+ TOKENIZE_BATCH_SIZE="${TOKENIZE_BATCH_SIZE:-2048}"
18
+ MAX_SHARD_SIZE="${MAX_SHARD_SIZE:-500MB}"
19
+ MAX_RETRIES="${MAX_RETRIES:-3}"
20
+ PACKING_MODE="${PACKING_MODE:-packed_records}"
21
+ OUTPUT_SUFFIX="${OUTPUT_SUFFIX:-len1024}"
22
+ CACHE_SUFFIX="${CACHE_SUFFIX:-}"
23
+
24
+ mkdir -p "${LOG_DIR}" "${CACHE_ROOT}"
25
+
26
+ build_one() {
27
+ local vocab_size="$1"
28
+ local tokenizer_path="${TOKENIZER_ROOT}/owt_compact_gpt2bpe_v${vocab_size}/tokenizer.json"
29
+ local output_dir="${OUTPUT_ROOT}/openwebtext-compact-gpt2bpe-v${vocab_size}-${OUTPUT_SUFFIX}-train-minus-100k"
30
+ local cache_dir="${CACHE_ROOT}/v${vocab_size}${CACHE_SUFFIX}"
31
+ local meta_path="${output_dir}/elf_multi_part_meta.json"
32
+ local run_pattern="build_owt_t5_elf_dataset.py.*openwebtext-compact-gpt2bpe-v${vocab_size}-${OUTPUT_SUFFIX}-train-minus-100k"
33
+
34
+ if [[ ! -f "${tokenizer_path}" ]]; then
35
+ echo "[compact-gpt2bpe] missing tokenizer for vocab=${vocab_size}: ${tokenizer_path}" >&2
36
+ return 2
37
+ fi
38
+
39
+ local attempt=0
40
+ while [[ ! -f "${meta_path}" ]]; do
41
+ if pgrep -af "${run_pattern}" | grep -v pgrep >/dev/null 2>&1; then
42
+ echo "[compact-gpt2bpe] vocab=${vocab_size} existing build running; waiting..."
43
+ while pgrep -af "${run_pattern}" | grep -v pgrep >/dev/null 2>&1; do
44
+ sleep 60
45
+ done
46
+ [[ -f "${meta_path}" ]] && break
47
+ fi
48
+
49
+ attempt=$((attempt + 1))
50
+ if (( attempt > MAX_RETRIES )); then
51
+ echo "[compact-gpt2bpe] vocab=${vocab_size} failed after ${MAX_RETRIES} retries; meta missing: ${meta_path}" >&2
52
+ return 1
53
+ fi
54
+
55
+ local log_file="${LOG_DIR}/owt_compact_gpt2bpe_v${vocab_size}_${PACKING_MODE}_${OUTPUT_SUFFIX}_np${NUM_PROC}_resume_attempt${attempt}_$(date +%Y%m%d_%H%M%S).log"
56
+ echo "[compact-gpt2bpe] start vocab=${vocab_size} mode=${PACKING_MODE} output_suffix=${OUTPUT_SUFFIX} attempt=${attempt} num_proc=${NUM_PROC} log=${log_file}"
57
+ set +e
58
+ python scripts/build_owt_t5_elf_dataset.py \
59
+ --data_path "${DATA_PATH}" \
60
+ --output_dir "${output_dir}" \
61
+ --tokenizer_path "${tokenizer_path}" \
62
+ --text_column text \
63
+ --openwebtext_split train_minus_100k \
64
+ --max_len 1024 \
65
+ --packing_mode "${PACKING_MODE}" \
66
+ --add_eos \
67
+ --num_proc "${NUM_PROC}" \
68
+ --tokenize_batch_size "${TOKENIZE_BATCH_SIZE}" \
69
+ --cache_dir "${cache_dir}" \
70
+ --max_shard_size "${MAX_SHARD_SIZE}" \
71
+ --resume_parts \
72
+ 2>&1 | tee -a "${log_file}"
73
+ local build_status=${PIPESTATUS[0]}
74
+ set -e
75
+ if (( build_status != 0 )); then
76
+ echo "[compact-gpt2bpe] vocab=${vocab_size} attempt=${attempt} failed status=${build_status}; will retry if budget remains" | tee -a "${log_file}"
77
+ sleep 30
78
+ fi
79
+ done
80
+
81
+ echo "[compact-gpt2bpe] done vocab=${vocab_size} meta=${meta_path}"
82
+ }
83
+
84
+ IFS=',' read -r -a VOCABS <<< "${VOCAB_SIZES}"
85
+ for vocab in "${VOCABS[@]}"; do
86
+ vocab="$(echo "${vocab}" | xargs)"
87
+ [[ -n "${vocab}" ]] || continue
88
+ build_one "${vocab}"
89
+ done
90
+
91
+ echo "[compact-gpt2bpe] all requested datasets are complete: ${VOCAB_SIZES}"
LTA_openwebtext_dualt/scripts/build_owt_t5_elf_dataset.py.bak_pre_dataset_preload_20260517_0247 ADDED
@@ -0,0 +1,555 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import argparse
5
+ import json
6
+ import os
7
+ import shutil
8
+ from pathlib import Path
9
+ from typing import Iterator
10
+
11
+
12
+ def parse_args() -> argparse.Namespace:
13
+ p = argparse.ArgumentParser(
14
+ description=(
15
+ "Build an ELF-style OpenWebText T5 token dataset. By default each raw "
16
+ "record is tokenized with add_special_tokens=False, overlength records "
17
+ "are split into max_len chunks, and short records stay short. The "
18
+ "packed_records mode instead concatenates EOS-terminated records up to "
19
+ "max_len so examples are close to full length, matching the released "
20
+ "ELF OpenWebText-T5 dataset more closely."
21
+ )
22
+ )
23
+ p.add_argument("--data_path", required=True)
24
+ p.add_argument("--output_dir", required=True)
25
+ p.add_argument("--tokenizer_path", required=True)
26
+ p.add_argument("--text_column", default="text")
27
+ p.add_argument("--txt_record_mode", choices=["auto", "line", "eot"], default="auto")
28
+ p.add_argument("--openwebtext_split", choices=["all", "train_minus_100k", "valid_last_100k"], default="all")
29
+ p.add_argument("--openwebtext_valid_records", type=int, default=100_000)
30
+ p.add_argument("--detokenizer", default="auto")
31
+ p.add_argument("--max_len", type=int, default=1024)
32
+ p.add_argument(
33
+ "--packing_mode",
34
+ choices=["record_chunks", "packed_records"],
35
+ default="record_chunks",
36
+ help=(
37
+ "record_chunks preserves the old behavior. packed_records appends EOS "
38
+ "per record and packs multiple records into near-max_len examples."
39
+ ),
40
+ )
41
+ p.add_argument("--max_records", type=int, default=0)
42
+ p.add_argument("--min_len", type=int, default=1)
43
+ p.add_argument("--add_eos", action="store_true", help="Append tokenizer EOS to each raw record before chunking.")
44
+ p.add_argument("--add_special_tokens", action="store_true", help="Let the tokenizer add model special tokens.")
45
+ p.add_argument("--cache_dir", default="")
46
+ p.add_argument("--max_shard_size", default="500MB")
47
+ p.add_argument("--num_proc", type=int, default=max(1, min(32, (os.cpu_count() or 8) // 2)))
48
+ p.add_argument("--tokenize_batch_size", type=int, default=1024)
49
+ p.add_argument(
50
+ "--merge_parts",
51
+ action="store_true",
52
+ help="After parallel part build, merge into one save_to_disk dataset. Slower but portable.",
53
+ )
54
+ p.add_argument("--keep_parts", action="store_true")
55
+ p.add_argument("--resume_parts", action="store_true", help="Keep completed part-* directories and build only missing parts.")
56
+ p.add_argument("--stats_only", action="store_true")
57
+ p.add_argument("--overwrite", action="store_true")
58
+ return p.parse_args()
59
+
60
+
61
+ def _iter_examples(
62
+ *,
63
+ data_path: str,
64
+ tokenizer_path: str,
65
+ text_column: str | None,
66
+ txt_record_mode: str,
67
+ openwebtext_split: str,
68
+ openwebtext_valid_records: int,
69
+ detokenizer: str | None,
70
+ max_len: int,
71
+ packing_mode: str,
72
+ max_records: int,
73
+ min_len: int,
74
+ add_eos: bool,
75
+ add_special_tokens: bool,
76
+ ) -> Iterator[dict]:
77
+ from flowtext_lab.data import iter_text_records
78
+ from flowtext_lab.tokenization import BpeTextTokenizer
79
+
80
+ tokenizer = BpeTextTokenizer.from_file(tokenizer_path)
81
+ seen_records = 0
82
+ pack: list[int] = []
83
+
84
+ def emit_ids(ids: list[int]) -> dict:
85
+ return {
86
+ "input_ids": [int(x) for x in ids],
87
+ "sequence_length": int(len(ids)),
88
+ }
89
+
90
+ def iter_record_chunks(ids: list[int]) -> Iterator[dict]:
91
+ for start in range(0, len(ids), max_len):
92
+ chunk = ids[start : start + max_len]
93
+ if len(chunk) >= min_len:
94
+ yield emit_ids(chunk)
95
+ if start + max_len >= len(ids):
96
+ break
97
+
98
+ def flush_pack() -> Iterator[dict]:
99
+ nonlocal pack
100
+ if len(pack) >= min_len:
101
+ yield emit_ids(pack)
102
+ pack = []
103
+
104
+ for text in iter_text_records(
105
+ data_path,
106
+ text_column=text_column,
107
+ txt_record_mode=txt_record_mode,
108
+ openwebtext_split=openwebtext_split,
109
+ openwebtext_valid_records=openwebtext_valid_records,
110
+ detokenizer=detokenizer,
111
+ ):
112
+ if not text:
113
+ continue
114
+ ids = tokenizer.encode(text, add_eos=add_eos, add_special_tokens=add_special_tokens)
115
+ if not ids:
116
+ continue
117
+ if packing_mode == "record_chunks":
118
+ yield from iter_record_chunks(ids)
119
+ else:
120
+ if len(ids) > max_len:
121
+ yield from flush_pack()
122
+ yield from iter_record_chunks(ids)
123
+ else:
124
+ if pack and len(pack) + len(ids) > max_len:
125
+ yield from flush_pack()
126
+ pack.extend(int(x) for x in ids)
127
+ if len(pack) >= max_len:
128
+ yield from flush_pack()
129
+ seen_records += 1
130
+ if max_records > 0 and seen_records >= max_records:
131
+ break
132
+ if packing_mode == "packed_records":
133
+ yield from flush_pack()
134
+
135
+
136
+ def _stats(args: argparse.Namespace) -> dict:
137
+ num_examples = 0
138
+ total_tokens = 0
139
+ min_len = None
140
+ max_len = 0
141
+ hist = {"lt128": 0, "128_255": 0, "256_511": 0, "512_1023": 0, "eq1024": 0}
142
+ for ex in _iter_examples(**_gen_kwargs(args)):
143
+ length = int(ex["sequence_length"])
144
+ num_examples += 1
145
+ total_tokens += length
146
+ min_len = length if min_len is None else min(min_len, length)
147
+ max_len = max(max_len, length)
148
+ if length < 128:
149
+ hist["lt128"] += 1
150
+ elif length < 256:
151
+ hist["128_255"] += 1
152
+ elif length < 512:
153
+ hist["256_511"] += 1
154
+ elif length < args.max_len:
155
+ hist["512_1023"] += 1
156
+ else:
157
+ hist["eq1024"] += 1
158
+ return {
159
+ "num_examples": int(num_examples),
160
+ "total_tokens": int(total_tokens),
161
+ "mean_length": float(total_tokens / num_examples) if num_examples else 0.0,
162
+ "min_length": int(min_len or 0),
163
+ "max_length": int(max_len),
164
+ "length_hist": hist,
165
+ }
166
+
167
+
168
+ def _gen_kwargs(args: argparse.Namespace) -> dict:
169
+ return {
170
+ "data_path": args.data_path,
171
+ "tokenizer_path": args.tokenizer_path,
172
+ "text_column": args.text_column,
173
+ "txt_record_mode": args.txt_record_mode,
174
+ "openwebtext_split": args.openwebtext_split,
175
+ "openwebtext_valid_records": args.openwebtext_valid_records,
176
+ "detokenizer": args.detokenizer,
177
+ "max_len": int(args.max_len),
178
+ "packing_mode": args.packing_mode,
179
+ "max_records": int(args.max_records),
180
+ "min_len": int(args.min_len),
181
+ "add_eos": bool(args.add_eos),
182
+ "add_special_tokens": bool(args.add_special_tokens),
183
+ }
184
+
185
+
186
+ def _make_limited_specs(args: argparse.Namespace) -> list[tuple[str, int, int | None]]:
187
+ from flowtext_lab.data import _make_file_specs
188
+
189
+ root = Path(args.data_path)
190
+ if root.is_dir():
191
+ files = sorted(
192
+ p for p in root.rglob("*")
193
+ if p.suffix.lower() in {".txt", ".jsonl", ".json", ".parquet"}
194
+ )
195
+ else:
196
+ files = [root]
197
+ specs = _make_file_specs(files, args.openwebtext_split, int(args.openwebtext_valid_records))
198
+ if args.max_records <= 0:
199
+ return [(str(p), int(a), None if b is None else int(b)) for p, a, b in specs]
200
+
201
+ limited = []
202
+ remaining = int(args.max_records)
203
+ for path, start, stop in specs:
204
+ if remaining <= 0:
205
+ break
206
+ if stop is None:
207
+ limited.append((str(path), int(start), None))
208
+ break
209
+ count = max(0, int(stop) - int(start))
210
+ take = min(count, remaining)
211
+ if take > 0:
212
+ limited.append((str(path), int(start), int(start) + take))
213
+ remaining -= take
214
+ return limited
215
+
216
+
217
+ def _iter_parquet_text_batches(
218
+ path: Path,
219
+ *,
220
+ text_column: str | None,
221
+ row_start: int,
222
+ row_stop: int | None,
223
+ batch_size: int,
224
+ ) -> Iterator[list[str]]:
225
+ import pyarrow.parquet as pq
226
+
227
+ pf = pq.ParquetFile(path)
228
+ col = text_column
229
+ if col is None:
230
+ names = set(pf.schema_arrow.names)
231
+ col = next((c for c in ("text", "content", "document", "article", "sentence") if c in names), None)
232
+ if col is None:
233
+ raise ValueError(f"Could not infer text column for {path}")
234
+
235
+ offset = 0
236
+ stop = pf.metadata.num_rows if row_stop is None else min(row_stop, pf.metadata.num_rows)
237
+ for batch in pf.iter_batches(columns=[col], batch_size=batch_size):
238
+ batch_start = offset
239
+ batch_stop = offset + batch.num_rows
240
+ offset = batch_stop
241
+ if batch_stop <= row_start:
242
+ continue
243
+ if batch_start >= stop:
244
+ break
245
+ local_start = max(0, row_start - batch_start)
246
+ local_stop = min(batch.num_rows, stop - batch_start)
247
+ values = batch.column(0).slice(local_start, local_stop - local_start).to_pylist()
248
+ texts = [str(value) for value in values if value is not None and str(value)]
249
+ if texts:
250
+ yield texts
251
+
252
+
253
+ def _iter_part_examples(
254
+ *,
255
+ spec: tuple[str, int, int | None],
256
+ tokenizer_path: str,
257
+ text_column: str | None,
258
+ detokenizer: str | None,
259
+ max_len: int,
260
+ packing_mode: str,
261
+ min_len: int,
262
+ add_eos: bool,
263
+ add_special_tokens: bool,
264
+ tokenize_batch_size: int,
265
+ ) -> Iterator[dict]:
266
+ from flowtext_lab.text_detokenization import detokenize_text, infer_detokenizer_name
267
+ from flowtext_lab.tokenization import BpeTextTokenizer
268
+
269
+ path = Path(spec[0])
270
+ row_start = int(spec[1])
271
+ row_stop = None if spec[2] is None else int(spec[2])
272
+ tokenizer = BpeTextTokenizer.from_file(tokenizer_path)
273
+ resolved_detok = infer_detokenizer_name(raw_path=str(path), explicit=detokenizer)
274
+ pack: list[int] = []
275
+
276
+ def emit_ids(ids: list[int]) -> dict:
277
+ return {
278
+ "input_ids": [int(x) for x in ids],
279
+ "sequence_length": int(len(ids)),
280
+ }
281
+
282
+ def iter_record_chunks(ids: list[int]) -> Iterator[dict]:
283
+ for start in range(0, len(ids), max_len):
284
+ chunk = ids[start : start + max_len]
285
+ if len(chunk) >= min_len:
286
+ yield emit_ids(chunk)
287
+ if start + max_len >= len(ids):
288
+ break
289
+
290
+ def flush_pack() -> Iterator[dict]:
291
+ nonlocal pack
292
+ if len(pack) >= min_len:
293
+ yield emit_ids(pack)
294
+ pack = []
295
+
296
+ for texts in _iter_parquet_text_batches(
297
+ path,
298
+ text_column=text_column,
299
+ row_start=row_start,
300
+ row_stop=row_stop,
301
+ batch_size=max(1, int(tokenize_batch_size)),
302
+ ):
303
+ if resolved_detok:
304
+ texts = [detokenize_text(text, resolved_detok) for text in texts]
305
+ encoded = tokenizer.tokenizer.encode_batch(texts, add_special_tokens=add_special_tokens)
306
+ for enc in encoded:
307
+ ids = list(enc.ids)
308
+ if add_eos:
309
+ ids.append(tokenizer.eos_id)
310
+ if not ids:
311
+ continue
312
+ if packing_mode == "record_chunks":
313
+ yield from iter_record_chunks(ids)
314
+ else:
315
+ if len(ids) > max_len:
316
+ yield from flush_pack()
317
+ yield from iter_record_chunks(ids)
318
+ else:
319
+ if pack and len(pack) + len(ids) > max_len:
320
+ yield from flush_pack()
321
+ pack.extend(int(x) for x in ids)
322
+ if len(pack) >= max_len:
323
+ yield from flush_pack()
324
+ if packing_mode == "packed_records":
325
+ yield from flush_pack()
326
+
327
+
328
+ def _build_part(task: dict) -> dict:
329
+ from datasets import Dataset, Features, Sequence, Value, disable_progress_bars
330
+
331
+ disable_progress_bars()
332
+
333
+ part_dir = Path(task["part_dir"])
334
+ if part_dir.exists():
335
+ shutil.rmtree(part_dir)
336
+ features = Features(
337
+ {
338
+ "input_ids": Sequence(Value("int32")),
339
+ "sequence_length": Value("int64"),
340
+ }
341
+ )
342
+ ds = Dataset.from_generator(
343
+ _iter_part_examples,
344
+ gen_kwargs={
345
+ "spec": task["spec"],
346
+ "tokenizer_path": task["tokenizer_path"],
347
+ "text_column": task["text_column"],
348
+ "detokenizer": task["detokenizer"],
349
+ "max_len": task["max_len"],
350
+ "packing_mode": task["packing_mode"],
351
+ "min_len": task["min_len"],
352
+ "add_eos": task["add_eos"],
353
+ "add_special_tokens": task["add_special_tokens"],
354
+ "tokenize_batch_size": task["tokenize_batch_size"],
355
+ },
356
+ features=features,
357
+ cache_dir=task["cache_dir"] or None,
358
+ )
359
+ ds.save_to_disk(str(part_dir), max_shard_size=task["max_shard_size"])
360
+ lengths = ds["sequence_length"] if len(ds) else []
361
+ total_tokens = int(sum(int(x) for x in lengths))
362
+ if task["cache_dir"]:
363
+ shutil.rmtree(task["cache_dir"], ignore_errors=True)
364
+ return {
365
+ "part_dir": str(part_dir),
366
+ "num_examples": int(len(ds)),
367
+ "total_tokens": total_tokens,
368
+ "spec": task["spec"],
369
+ }
370
+
371
+
372
+ def _part_is_complete(part_dir: Path) -> bool:
373
+ return (part_dir / "state.json").exists() and any(part_dir.glob("data-*.arrow"))
374
+
375
+
376
+ def _summarize_part(part_dir: Path, spec: tuple[str, int, int | None]) -> dict:
377
+ from datasets import load_from_disk
378
+
379
+ ds = load_from_disk(str(part_dir))
380
+ lengths = ds["sequence_length"] if len(ds) else []
381
+ total_tokens = int(sum(int(x) for x in lengths))
382
+ return {
383
+ "part_dir": str(part_dir),
384
+ "num_examples": int(len(ds)),
385
+ "total_tokens": total_tokens,
386
+ "spec": spec,
387
+ }
388
+
389
+
390
+ def _parallel_build(args: argparse.Namespace) -> dict:
391
+ from concurrent.futures import ProcessPoolExecutor, as_completed
392
+
393
+ specs = _make_limited_specs(args)
394
+ if not specs:
395
+ raise RuntimeError("No input file specs found")
396
+
397
+ output_dir = Path(args.output_dir)
398
+ parts_root = output_dir / "parts"
399
+ parts_root.mkdir(parents=True, exist_ok=True)
400
+
401
+ tasks = []
402
+ part_results = []
403
+ for idx, spec in enumerate(specs):
404
+ part_dir = parts_root / f"part-{idx:05d}"
405
+ if args.resume_parts and _part_is_complete(part_dir):
406
+ part_results.append(_summarize_part(part_dir, spec))
407
+ continue
408
+ tasks.append(
409
+ {
410
+ "part_dir": str(part_dir),
411
+ "spec": spec,
412
+ "tokenizer_path": args.tokenizer_path,
413
+ "text_column": args.text_column,
414
+ "detokenizer": args.detokenizer,
415
+ "max_len": int(args.max_len),
416
+ "packing_mode": args.packing_mode,
417
+ "min_len": int(args.min_len),
418
+ "add_eos": bool(args.add_eos),
419
+ "add_special_tokens": bool(args.add_special_tokens),
420
+ "tokenize_batch_size": int(args.tokenize_batch_size),
421
+ "cache_dir": str(Path(args.cache_dir) / f"part-{idx:05d}") if args.cache_dir else "",
422
+ "max_shard_size": args.max_shard_size,
423
+ }
424
+ )
425
+
426
+ print(
427
+ f"[build] specs={len(specs)} existing={len(part_results)} "
428
+ f"todo={len(tasks)} num_proc={args.num_proc} output={output_dir}",
429
+ flush=True,
430
+ )
431
+ if tasks:
432
+ with ProcessPoolExecutor(max_workers=max(1, int(args.num_proc))) as pool:
433
+ futures = [pool.submit(_build_part, task) for task in tasks]
434
+ for done, fut in enumerate(as_completed(futures), start=1):
435
+ result = fut.result()
436
+ part_results.append(result)
437
+ print(
438
+ "[build] "
439
+ f"{done}/{len(futures)} {Path(result['part_dir']).name} "
440
+ f"examples={result['num_examples']} tokens={result['total_tokens']}",
441
+ flush=True,
442
+ )
443
+
444
+ part_results.sort(key=lambda x: x["part_dir"])
445
+ total_examples = sum(int(x["num_examples"]) for x in part_results)
446
+ total_tokens = sum(int(x["total_tokens"]) for x in part_results)
447
+ meta = {
448
+ "builder": "build_owt_t5_elf_dataset.py",
449
+ "format": f"elf_unconditional_tokenized_{args.packing_mode}_multipart",
450
+ "data_path": args.data_path,
451
+ "tokenizer_path": args.tokenizer_path,
452
+ "text_column": args.text_column,
453
+ "openwebtext_split": args.openwebtext_split,
454
+ "openwebtext_valid_records": args.openwebtext_valid_records,
455
+ "max_len": args.max_len,
456
+ "packing_mode": args.packing_mode,
457
+ "max_records": args.max_records,
458
+ "min_len": args.min_len,
459
+ "add_eos": args.add_eos,
460
+ "add_special_tokens": args.add_special_tokens,
461
+ "num_parts": len(part_results),
462
+ "num_examples": int(total_examples),
463
+ "total_tokens": int(total_tokens),
464
+ "mean_length": float(total_tokens / total_examples) if total_examples else 0.0,
465
+ "parts": part_results,
466
+ }
467
+ (output_dir / "elf_multi_part_meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True), encoding="utf-8")
468
+
469
+ if args.merge_parts:
470
+ from datasets import concatenate_datasets, load_from_disk
471
+
472
+ merged_tmp = output_dir / "_merged_tmp"
473
+ if merged_tmp.exists():
474
+ shutil.rmtree(merged_tmp)
475
+ datasets = [load_from_disk(result["part_dir"]) for result in part_results if result["num_examples"] > 0]
476
+ merged = datasets[0] if len(datasets) == 1 else concatenate_datasets(datasets)
477
+ merged.save_to_disk(str(merged_tmp), max_shard_size=args.max_shard_size)
478
+ for child in list(output_dir.iterdir()):
479
+ if child.name in {"_merged_tmp", "parts"}:
480
+ continue
481
+ if child.is_dir():
482
+ shutil.rmtree(child)
483
+ else:
484
+ child.unlink()
485
+ for child in list(merged_tmp.iterdir()):
486
+ child.rename(output_dir / child.name)
487
+ merged_tmp.rmdir()
488
+ if not args.keep_parts:
489
+ shutil.rmtree(parts_root)
490
+ meta["format"] = f"elf_unconditional_tokenized_{args.packing_mode}"
491
+ (output_dir / "elf_build_meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True), encoding="utf-8")
492
+
493
+ return meta
494
+
495
+
496
+ def main() -> None:
497
+ args = parse_args()
498
+ output_dir = Path(args.output_dir)
499
+
500
+ if args.stats_only:
501
+ print(json.dumps(_stats(args), indent=2, sort_keys=True))
502
+ return
503
+
504
+ if output_dir.exists():
505
+ if not args.overwrite:
506
+ if not args.resume_parts:
507
+ raise SystemExit(f"output_dir exists: {output_dir}; pass --overwrite to replace it")
508
+ elif not args.resume_parts:
509
+ shutil.rmtree(output_dir)
510
+ output_dir.mkdir(parents=True, exist_ok=True)
511
+
512
+ if args.num_proc > 1:
513
+ meta = _parallel_build(args)
514
+ print(json.dumps({k: v for k, v in meta.items() if k != "parts"}, indent=2, sort_keys=True))
515
+ return
516
+
517
+ from datasets import Dataset, Features, Sequence, Value
518
+
519
+ features = Features(
520
+ {
521
+ "input_ids": Sequence(Value("int32")),
522
+ "sequence_length": Value("int64"),
523
+ }
524
+ )
525
+ ds = Dataset.from_generator(
526
+ _iter_examples,
527
+ gen_kwargs=_gen_kwargs(args),
528
+ features=features,
529
+ cache_dir=args.cache_dir or None,
530
+ )
531
+ ds.save_to_disk(str(output_dir), max_shard_size=args.max_shard_size)
532
+
533
+ meta = {
534
+ "builder": "build_owt_t5_elf_dataset.py",
535
+ "format": f"elf_unconditional_tokenized_{args.packing_mode}",
536
+ "data_path": args.data_path,
537
+ "tokenizer_path": args.tokenizer_path,
538
+ "text_column": args.text_column,
539
+ "openwebtext_split": args.openwebtext_split,
540
+ "openwebtext_valid_records": args.openwebtext_valid_records,
541
+ "max_len": args.max_len,
542
+ "packing_mode": args.packing_mode,
543
+ "max_records": args.max_records,
544
+ "min_len": args.min_len,
545
+ "add_eos": args.add_eos,
546
+ "add_special_tokens": args.add_special_tokens,
547
+ "num_examples": int(len(ds)),
548
+ "columns": list(ds.column_names),
549
+ }
550
+ (output_dir / "elf_build_meta.json").write_text(json.dumps(meta, indent=2, sort_keys=True), encoding="utf-8")
551
+ print(json.dumps(meta, indent=2, sort_keys=True))
552
+
553
+
554
+ if __name__ == "__main__":
555
+ main()
LTA_openwebtext_dualt/scripts/dump_position_top1_trace.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import csv
5
+ import html
6
+ import json
7
+ import sys
8
+ from pathlib import Path
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+
13
+ REPO_ROOT = Path(__file__).resolve().parents[1]
14
+ SCRIPT_DIR = Path(__file__).resolve().parent
15
+ for p in (REPO_ROOT, SCRIPT_DIR):
16
+ if str(p) not in sys.path:
17
+ sys.path.insert(0, str(p))
18
+
19
+ from flowtext_lab.decode import model_time_for_step, sample_noise_simplex, state_for_model
20
+ from flowtext_lab.tokenization import BpeTextTokenizer
21
+ from infer_context_compare_from_c128 import build_model, clamp_first_position, temperature
22
+ from trace_decode_basin import apply_decode_update
23
+
24
+
25
+ def decode_token(tokenizer: BpeTextTokenizer, tid: int) -> str:
26
+ text = tokenizer.decode([int(tid)], stop_at_eos=False, skip_special_tokens=False)
27
+ return text.replace("\n", "\\n").replace("\t", "\\t")
28
+
29
+
30
+ def cell(token: str, prob: float) -> str:
31
+ # Blue for confident cells, faint background for low confidence. Keep text compact.
32
+ alpha = min(max(prob, 0.0), 1.0)
33
+ bg = f"rgba(43, 113, 220, {0.08 + 0.52 * alpha:.3f})"
34
+ color = "#111" if alpha < 0.55 else "#fff"
35
+ return (
36
+ f'<td style="background:{bg};color:{color}" title="p={prob:.4f}">'
37
+ f'<span class="tok">{html.escape(token)}</span><br><span class="prob">{prob:.3f}</span></td>'
38
+ )
39
+
40
+
41
+ def write_html(
42
+ path: Path,
43
+ *,
44
+ title: str,
45
+ focus_steps: list[int],
46
+ rows_by_step: dict[int, list[dict[str, object]]],
47
+ ) -> None:
48
+ lines = [
49
+ "<!doctype html><html><head><meta charset='utf-8'>",
50
+ f"<title>{html.escape(title)}</title>",
51
+ "<style>",
52
+ "body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',sans-serif;margin:18px;color:#111}",
53
+ "table{border-collapse:collapse;font-size:11px;line-height:1.15}",
54
+ "th,td{border:1px solid #ddd;padding:3px 5px;min-width:62px;max-width:110px;vertical-align:top;overflow:hidden}",
55
+ "th{position:sticky;top:0;background:#f7f7f7;z-index:2}",
56
+ ".pos{position:sticky;left:0;background:#fff;z-index:1;font-weight:600;min-width:48px}",
57
+ ".tok{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;white-space:pre-wrap}",
58
+ ".prob{font-size:10px;opacity:.75}",
59
+ ".wrap{overflow:auto;max-height:88vh;border:1px solid #ddd}",
60
+ ".phase{font-size:10px;color:#555}",
61
+ "</style></head><body>",
62
+ f"<h1>{html.escape(title)}</h1>",
63
+ "<p>Each cell is this position's argmax token and probability. Color intensity tracks probability.</p>",
64
+ "<div class='wrap'><table>",
65
+ "<thead><tr><th class='pos'>pos</th>",
66
+ ]
67
+ for step in focus_steps:
68
+ lines.append(f"<th colspan='3'>step {step}</th>")
69
+ lines.append("</tr><tr><th class='pos'></th>")
70
+ for _ in focus_steps:
71
+ lines.extend(["<th class='phase'>input</th>", "<th class='phase'>endpoint</th>", "<th class='phase'>post</th>"])
72
+ lines.append("</tr></thead><tbody>")
73
+ max_pos = max(len(rows_by_step[s]) for s in focus_steps)
74
+ for pos in range(max_pos):
75
+ lines.append(f"<tr><td class='pos'>{pos}</td>")
76
+ for step in focus_steps:
77
+ row = rows_by_step[step][pos]
78
+ lines.append(cell(str(row["input_token"]), float(row["input_prob"])))
79
+ lines.append(cell(str(row["endpoint_token"]), float(row["endpoint_prob"])))
80
+ lines.append(cell(str(row["post_token"]), float(row["post_prob"])))
81
+ lines.append("</tr>")
82
+ lines.extend(["</tbody></table></div></body></html>"])
83
+ path.write_text("\n".join(lines))
84
+
85
+
86
+ @torch.inference_mode()
87
+ def main() -> None:
88
+ ap = argparse.ArgumentParser()
89
+ ap.add_argument("--checkpoint", required=True)
90
+ ap.add_argument("--tokenizer_path", required=True)
91
+ ap.add_argument("--out_dir", required=True)
92
+ ap.add_argument("--name", required=True)
93
+ ap.add_argument("--max_len", type=int, required=True)
94
+ ap.add_argument("--n_samples", type=int, required=True)
95
+ ap.add_argument("--sample_idx", type=int, default=0)
96
+ ap.add_argument("--steps", type=int, default=128)
97
+ ap.add_argument("--decode_rule", default="dirichlet_resample")
98
+ ap.add_argument("--seed", type=int, default=314159)
99
+ ap.add_argument("--pos_extend", default="repeat")
100
+ ap.add_argument("--support_power", type=float, default=1.0)
101
+ ap.add_argument("--semantic_power", type=float, default=1.5)
102
+ ap.add_argument("--early_temp", type=float, default=2.8)
103
+ ap.add_argument("--late_temp", type=float, default=1.45)
104
+ ap.add_argument("--temp_end", type=float, default=0.55)
105
+ ap.add_argument("--temp_power", type=float, default=1.5)
106
+ ap.add_argument("--hybrid_switch", type=float, default=0.5)
107
+ ap.add_argument("--fixed_first_token_id", type=int, default=-1)
108
+ ap.add_argument("--fixed_first_token_text", default="")
109
+ ap.add_argument("--fixed_first_initial_argmax", action="store_true")
110
+ ap.add_argument("--focus_start", type=int, default=40)
111
+ ap.add_argument("--focus_end", type=int, default=60)
112
+ args = ap.parse_args()
113
+
114
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
115
+ tokenizer = BpeTextTokenizer.from_file(args.tokenizer_path)
116
+ ckpt = torch.load(args.checkpoint, map_location="cpu", weights_only=False, mmap=True)
117
+ model = build_model(ckpt, tokenizer, args.max_len, device, args.pos_extend)
118
+ eps = 1e-8
119
+ torch.manual_seed(args.seed)
120
+ probs = sample_noise_simplex(
121
+ (args.n_samples, args.max_len),
122
+ tokenizer.vocab_size,
123
+ device,
124
+ eps,
125
+ noise_mode="dirichlet",
126
+ target_prob=1.0,
127
+ noise_sigma=-1.0,
128
+ dirichlet_concentration=1.0,
129
+ )
130
+ fixed_first_token_id: int | None = None
131
+ if args.fixed_first_token_text:
132
+ encoded = tokenizer.encode(args.fixed_first_token_text, add_eos=False, add_special_tokens=False)
133
+ if not encoded:
134
+ raise ValueError(f"fixed_first_token_text encoded to no tokens: {args.fixed_first_token_text!r}")
135
+ fixed_first_token_id = int(encoded[0])
136
+ elif args.fixed_first_token_id >= 0:
137
+ fixed_first_token_id = int(args.fixed_first_token_id)
138
+ fixed_first_ids: torch.Tensor | None = None
139
+ if args.fixed_first_initial_argmax:
140
+ fixed_first_ids = probs[:, 0, :].argmax(dim=-1)
141
+ elif fixed_first_token_id is not None:
142
+ fixed_first_ids = torch.full((args.n_samples,), fixed_first_token_id, dtype=torch.long, device=device)
143
+ probs = clamp_first_position(probs, fixed_first_ids)
144
+ attn = torch.ones((args.n_samples, args.max_len), dtype=torch.bool, device=device)
145
+ out_dir = Path(args.out_dir)
146
+ out_dir.mkdir(parents=True, exist_ok=True)
147
+ stem = f"{args.name}_sample{args.sample_idx}"
148
+ full_tsv = out_dir / f"{stem}_position_top1_full.tsv"
149
+ focus_tsv = out_dir / f"{stem}_position_top1_focus_{args.focus_start}_{args.focus_end}.tsv"
150
+ focus_steps = list(range(args.focus_start, args.focus_end + 1))
151
+ rows_by_step: dict[int, list[dict[str, object]]] = {}
152
+
153
+ with full_tsv.open("w", newline="") as f_full, focus_tsv.open("w", newline="") as f_focus:
154
+ fieldnames = [
155
+ "step",
156
+ "position",
157
+ "input_token",
158
+ "input_prob",
159
+ "endpoint_token",
160
+ "endpoint_prob",
161
+ "post_token",
162
+ "post_prob",
163
+ ]
164
+ full_writer = csv.DictWriter(f_full, fieldnames=fieldnames, delimiter="\t")
165
+ focus_writer = csv.DictWriter(f_focus, fieldnames=fieldnames, delimiter="\t")
166
+ full_writer.writeheader()
167
+ focus_writer.writeheader()
168
+
169
+ for step in range(args.steps):
170
+ prev_probs = probs
171
+ prev_ids = prev_probs.argmax(dim=-1)
172
+ t = model_time_for_step("flow", step, args.steps, args.n_samples, device, dtype=torch.float32)
173
+ temp = temperature(step, args.steps, args.early_temp, args.late_temp, args.temp_end, args.temp_power)
174
+ logits = model(state_for_model(model, prev_probs, eps), t, attn).float()
175
+ endpoint = F.softmax(logits / temp, dim=-1)
176
+ endpoint_ids = endpoint.argmax(dim=-1)
177
+ probs = apply_decode_update(
178
+ decode_rule=args.decode_rule,
179
+ probs=prev_probs,
180
+ endpoint=endpoint,
181
+ step=step,
182
+ steps=args.steps,
183
+ support_power=args.support_power,
184
+ semantic_power=args.semantic_power,
185
+ hybrid_switch=args.hybrid_switch,
186
+ c_min=1.0,
187
+ c_max=1024.0,
188
+ eps=eps,
189
+ )
190
+ probs = clamp_first_position(probs, fixed_first_ids)
191
+ post_ids = probs.argmax(dim=-1)
192
+ s = args.sample_idx
193
+ input_ids = prev_ids[s].detach().cpu()
194
+ endpoint_ids_s = endpoint_ids[s].detach().cpu()
195
+ post_ids_s = post_ids[s].detach().cpu()
196
+ input_probs = prev_probs[s].gather(1, prev_ids[s].unsqueeze(-1)).squeeze(-1).detach().cpu()
197
+ endpoint_probs = endpoint[s].gather(1, endpoint_ids[s].unsqueeze(-1)).squeeze(-1).detach().cpu()
198
+ post_probs = probs[s].gather(1, post_ids[s].unsqueeze(-1)).squeeze(-1).detach().cpu()
199
+ step_rows: list[dict[str, object]] = []
200
+ for pos in range(args.max_len):
201
+ row = {
202
+ "step": step + 1,
203
+ "position": pos,
204
+ "input_token": decode_token(tokenizer, int(input_ids[pos])),
205
+ "input_prob": f"{float(input_probs[pos]):.8f}",
206
+ "endpoint_token": decode_token(tokenizer, int(endpoint_ids_s[pos])),
207
+ "endpoint_prob": f"{float(endpoint_probs[pos]):.8f}",
208
+ "post_token": decode_token(tokenizer, int(post_ids_s[pos])),
209
+ "post_prob": f"{float(post_probs[pos]):.8f}",
210
+ }
211
+ full_writer.writerow(row)
212
+ if args.focus_start <= step + 1 <= args.focus_end:
213
+ focus_writer.writerow(row)
214
+ step_rows.append(row)
215
+ if args.focus_start <= step + 1 <= args.focus_end:
216
+ rows_by_step[step + 1] = step_rows
217
+ if (step + 1) % 16 == 0 or step == 0:
218
+ print(f"{args.name} wrote step {step + 1}", flush=True)
219
+
220
+ write_html(
221
+ out_dir / f"{stem}_position_top1_focus_{args.focus_start}_{args.focus_end}.html",
222
+ title=f"{args.name} sample {args.sample_idx} position top1 focus {args.focus_start}-{args.focus_end}",
223
+ focus_steps=focus_steps,
224
+ rows_by_step=rows_by_step,
225
+ )
226
+ meta = {
227
+ "checkpoint": args.checkpoint,
228
+ "tokenizer_path": args.tokenizer_path,
229
+ "name": args.name,
230
+ "max_len": args.max_len,
231
+ "n_samples": args.n_samples,
232
+ "sample_idx": args.sample_idx,
233
+ "steps": args.steps,
234
+ "fixed_first_token_id": fixed_first_token_id,
235
+ "fixed_first_token_text": args.fixed_first_token_text,
236
+ "fixed_first_initial_argmax": bool(args.fixed_first_initial_argmax),
237
+ "full_tsv": str(full_tsv),
238
+ "focus_tsv": str(focus_tsv),
239
+ }
240
+ (out_dir / f"{stem}_position_top1_meta.json").write_text(json.dumps(meta, ensure_ascii=False, indent=2))
241
+ print("WROTE", out_dir)
242
+
243
+
244
+ if __name__ == "__main__":
245
+ main()
LTA_openwebtext_dualt/scripts/eval_dirichlet_diffusion_like_prefilter_20260508.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Entropy prefilter for diffusion-like Dirichlet state decoding.
3
+
4
+ This is intentionally scorer-free. It explores knobs that change the stochastic
5
+ process itself, not only the final endpoint temperature:
6
+
7
+ 1. Initial Dirichlet concentration (spiky vs. uniform-ish starting noise).
8
+ 2. Rolling re-noise: mix or resample each step with fresh Dirichlet noise.
9
+ 3. State-only output, since state preserved entropy better than blend.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import argparse
15
+ import csv
16
+ import importlib.util
17
+ import json
18
+ import math
19
+ import sys
20
+ from dataclasses import dataclass
21
+ from pathlib import Path
22
+
23
+ import torch
24
+ import torch.nn.functional as F
25
+
26
+
27
+ BASE = Path(__file__).with_name("eval_c1024_decode_sweep_20260507.py")
28
+ spec = importlib.util.spec_from_file_location("eval_c1024_decode_sweep_20260507", BASE)
29
+ if spec is None or spec.loader is None:
30
+ raise RuntimeError(f"cannot import {BASE}")
31
+ base = importlib.util.module_from_spec(spec)
32
+ sys.modules[spec.name] = base
33
+ spec.loader.exec_module(base)
34
+
35
+
36
+ @dataclass(frozen=True)
37
+ class Config:
38
+ name: str
39
+ cmax: float = 1024.0
40
+ endpoint_temp: float = 1.3
41
+ init_conc: float = 1.0
42
+ support_power: float = 1.0
43
+ semantic_power: float = 1.0
44
+ renoise_mode: str = "none" # none, mix, resample
45
+ renoise_strength: float = 0.0
46
+ renoise_power: float = 1.0
47
+ renoise_conc: float = 1.0
48
+ steps: int = 512
49
+
50
+
51
+ def fmt(x: float) -> str:
52
+ return ("%g" % x).replace(".", "p")
53
+
54
+
55
+ def configs() -> list[Config]:
56
+ out: list[Config] = []
57
+
58
+ # Baseline-style but with broader initial noise.
59
+ for cmax in [256, 1024, 4096]:
60
+ for endpoint_temp in [1.3, 1.45]:
61
+ for init_conc in [0.03, 0.1, 0.3, 1.0, 3.0, 10.0]:
62
+ out.append(
63
+ Config(
64
+ f"init_cmax{cmax}_et{fmt(endpoint_temp)}_init{fmt(init_conc)}",
65
+ cmax=cmax,
66
+ endpoint_temp=endpoint_temp,
67
+ init_conc=init_conc,
68
+ )
69
+ )
70
+
71
+ # Diffusion-ish: every step inject a little fresh Dirichlet noise.
72
+ # Decay with (1-t)^power so noise is mostly early/mid trajectory.
73
+ for cmax in [256, 1024, 4096]:
74
+ for endpoint_temp in [1.3, 1.45]:
75
+ for init_conc in [0.1, 1.0]:
76
+ for mode in ["mix", "resample"]:
77
+ for strength in [0.02, 0.05, 0.10, 0.20]:
78
+ for power in [0.5, 1.0, 2.0]:
79
+ out.append(
80
+ Config(
81
+ (
82
+ f"{mode}_cmax{cmax}_et{fmt(endpoint_temp)}"
83
+ f"_init{fmt(init_conc)}_s{fmt(strength)}_p{fmt(power)}"
84
+ ),
85
+ cmax=cmax,
86
+ endpoint_temp=endpoint_temp,
87
+ init_conc=init_conc,
88
+ renoise_mode=mode,
89
+ renoise_strength=strength,
90
+ renoise_power=power,
91
+ renoise_conc=init_conc,
92
+ )
93
+ )
94
+
95
+ # Slower support concentration can keep more state entropy; combine with
96
+ # mild re-noise only.
97
+ for cmax in [1024, 4096]:
98
+ for support_power in [1.25, 1.5, 2.0]:
99
+ for strength in [0.02, 0.05, 0.10]:
100
+ out.append(
101
+ Config(
102
+ f"mix_cmax{cmax}_sp{fmt(support_power)}_s{fmt(strength)}",
103
+ cmax=cmax,
104
+ endpoint_temp=1.3,
105
+ init_conc=1.0,
106
+ support_power=support_power,
107
+ renoise_mode="mix",
108
+ renoise_strength=strength,
109
+ renoise_power=1.0,
110
+ renoise_conc=1.0,
111
+ )
112
+ )
113
+ return out
114
+
115
+
116
+ def dirichlet_noise(shape, vocab_size: int, device, concentration: float, eps: float) -> torch.Tensor:
117
+ alpha = torch.full((*shape, vocab_size), float(concentration), device=device).clamp_min(eps)
118
+ sample = torch._standard_gamma(alpha).clamp_min(eps)
119
+ return sample / sample.sum(dim=-1, keepdim=True).clamp_min(eps)
120
+
121
+
122
+ @torch.no_grad()
123
+ def decode_ids(model, tokenizer, cfg: Config, n_samples: int, batch_size: int, max_len: int, seed: int, device):
124
+ torch.manual_seed(seed)
125
+ eps = 1e-8
126
+ all_ids = []
127
+ all_texts = []
128
+ remaining = n_samples
129
+ while remaining > 0:
130
+ bs = min(batch_size, remaining)
131
+ probs = dirichlet_noise((bs, max_len), tokenizer.vocab_size, device, cfg.init_conc, eps)
132
+ attn = torch.ones((bs, max_len), dtype=torch.bool, device=device)
133
+ for step in range(cfg.steps):
134
+ support_t = ((step + 1) / max(cfg.steps, 1)) ** cfg.support_power
135
+ semantic_t = ((step + 1) / max(cfg.steps, 1)) ** cfg.semantic_power
136
+ t = base.model_time("post", step, cfg.steps, bs, device)
137
+ logits = model(base.state_for_model(model, probs, eps), t, attn).float() / cfg.endpoint_temp
138
+ endpoint = F.softmax(logits, dim=-1)
139
+ anchor = probs.clamp_min(eps)
140
+ anchor = anchor / anchor.sum(dim=-1, keepdim=True).clamp_min(eps)
141
+ forward_endpoint = (1.0 - semantic_t) * anchor + semantic_t * endpoint
142
+ forward_endpoint = forward_endpoint / forward_endpoint.sum(dim=-1, keepdim=True).clamp_min(eps)
143
+ mean = (1.0 - support_t) / float(tokenizer.vocab_size) + support_t * forward_endpoint
144
+ mean = mean.clamp_min(eps)
145
+ mean = mean / mean.sum(dim=-1, keepdim=True).clamp_min(eps)
146
+ conc = math.exp(support_t * math.log(max(cfg.cmax, 1.0)))
147
+ if cfg.renoise_mode == "resample":
148
+ # Reduce concentration when injecting re-noise, then resample.
149
+ strength = cfg.renoise_strength * ((1.0 - support_t) ** cfg.renoise_power)
150
+ eff_conc = max(1.0, conc * (1.0 - strength))
151
+ alpha = (mean * eff_conc).clamp_min(eps)
152
+ probs = torch._standard_gamma(alpha).clamp_min(eps)
153
+ probs = probs / probs.sum(dim=-1, keepdim=True).clamp_min(eps)
154
+ else:
155
+ alpha = (mean * conc).clamp_min(eps)
156
+ probs = torch._standard_gamma(alpha).clamp_min(eps)
157
+ probs = probs / probs.sum(dim=-1, keepdim=True).clamp_min(eps)
158
+ if cfg.renoise_mode == "mix":
159
+ strength = cfg.renoise_strength * ((1.0 - support_t) ** cfg.renoise_power)
160
+ if strength > 0:
161
+ noise = dirichlet_noise((bs, max_len), tokenizer.vocab_size, device, cfg.renoise_conc, eps)
162
+ probs = (1.0 - strength) * probs + strength * noise
163
+ probs = probs / probs.sum(dim=-1, keepdim=True).clamp_min(eps)
164
+ elif cfg.renoise_mode != "none":
165
+ raise ValueError(cfg.renoise_mode)
166
+ ids_t = probs.argmax(dim=-1)
167
+ ids = ids_t.detach().cpu().tolist()
168
+ all_ids.extend(ids)
169
+ all_texts.extend(tokenizer.decode(row, stop_at_eos=False, skip_special_tokens=False) for row in ids)
170
+ remaining -= bs
171
+ return all_ids, all_texts
172
+
173
+
174
+ def main():
175
+ p = argparse.ArgumentParser()
176
+ p.add_argument("--checkpoint", required=True)
177
+ p.add_argument("--tokenizer_path", required=True)
178
+ p.add_argument("--out_dir", required=True)
179
+ p.add_argument("--n_samples", type=int, default=128)
180
+ p.add_argument("--max_len", type=int, default=128)
181
+ p.add_argument("--decode_batch", type=int, default=16)
182
+ p.add_argument("--seed", type=int, default=20260508)
183
+ p.add_argument("--save_min_entropy", type=float, default=4.05)
184
+ args = p.parse_args()
185
+
186
+ out_dir = Path(args.out_dir)
187
+ out_dir.mkdir(parents=True, exist_ok=True)
188
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
189
+ tokenizer = base.BpeTextTokenizer.from_file(args.tokenizer_path)
190
+ ckpt = torch.load(args.checkpoint, map_location="cpu", weights_only=False)
191
+ model = base.build_model(ckpt, tokenizer, device)
192
+
193
+ rows = []
194
+ for cfg in configs():
195
+ ids, texts = decode_ids(model, tokenizer, cfg, args.n_samples, args.decode_batch, args.max_len, args.seed, device)
196
+ div = base.summarize_token_diversity(ids).__dict__
197
+ row = {
198
+ "name": cfg.name,
199
+ "step": ckpt.get("step"),
200
+ "sample_entropy": div["sample_entropy"],
201
+ "distinct_2": div["distinct_2"],
202
+ "top_token_mass": div["top_token_mass"],
203
+ "cmax": cfg.cmax,
204
+ "endpoint_temp": cfg.endpoint_temp,
205
+ "init_conc": cfg.init_conc,
206
+ "support_power": cfg.support_power,
207
+ "semantic_power": cfg.semantic_power,
208
+ "renoise_mode": cfg.renoise_mode,
209
+ "renoise_strength": cfg.renoise_strength,
210
+ "renoise_power": cfg.renoise_power,
211
+ "renoise_conc": cfg.renoise_conc,
212
+ }
213
+ rows.append(row)
214
+ keys = list(rows[0])
215
+ with (out_dir / "prefilter_summary.tsv").open("w", encoding="utf-8") as f:
216
+ writer = csv.DictWriter(f, keys, delimiter="\t")
217
+ writer.writeheader()
218
+ writer.writerows(rows)
219
+ print("[prefilter]", json.dumps(row), flush=True)
220
+ if row["sample_entropy"] >= args.save_min_entropy:
221
+ with (out_dir / f"{cfg.name}_samples.jsonl").open("w", encoding="utf-8") as f:
222
+ f.write(json.dumps({"type": "summary", **row}, ensure_ascii=False) + "\n")
223
+ for i, text in enumerate(texts):
224
+ f.write(json.dumps({"type": "sample", "index": i, "raw_text": text}, ensure_ascii=False) + "\n")
225
+
226
+
227
+ if __name__ == "__main__":
228
+ main()
LTA_openwebtext_dualt/scripts/eval_lm1b_c1024_fullycoupled_8gpu_1m_checkpoint.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # Offline eval + optional GenPPL for the LM1B c1024 fully-coupled 8×GPU 1M run
3
+ # (same defaults as scripts/launch_lta_lm1b_categorical_fullvocab_c1024_fullycoupled_8gpu_small_1m.sh).
4
+ set -euo pipefail
5
+
6
+ ROOT="/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt"
7
+ cd "${ROOT}"
8
+ export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
9
+ export TOKENIZERS_PARALLELISM=false
10
+
11
+ RUN_DIR="${RUN_DIR:-runs/lta_lm1b_dirichlet_categorical_fullvocab_c1024_fullycoupled_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0}"
12
+ CHECKPOINT="${CHECKPOINT:-${RUN_DIR}/latest.pt}"
13
+ DATA_PATH="${DATA_PATH:-data/lm1b_train_parquet}"
14
+ TOKENIZER_PATH="${TOKENIZER_PATH:-/e2e-data/evad-tech-vla/wanghan58/workspace/imagenet_handoff_20260327/nlp_dts_light/assets/distilbert-base-uncased/tokenizer.json}"
15
+ TEXT_COLUMN="${TEXT_COLUMN:-text}"
16
+
17
+ INFER_STEPS="${INFER_STEPS:-128}"
18
+ MAX_LEN="${MAX_LEN:-128}"
19
+ MAX_RECORDS="${MAX_RECORDS:-64}"
20
+ BATCH_SIZE="${BATCH_SIZE:-4}"
21
+
22
+ SCORER="${SCORER:-/e2e-data/evad-tech-vla/wanghan58/models/flowtext_scorers/gpt2-large-standard}"
23
+ GEN_PPL_SAMPLES="${GEN_PPL_SAMPLES:-128}"
24
+ GEN_PPL="${GEN_PPL:-1}"
25
+
26
+ GEN_PPL_OUTPUT="${GEN_PPL_OUTPUT:-genppl_lm1b_c1024_fullycoupled_8gpu_steps${INFER_STEPS}_samples${GEN_PPL_SAMPLES}.jsonl}"
27
+
28
+ # eval.py / margin_for_target_prob require target_prob in (0, 1), not 1.0 (training may use 1.0).
29
+ TARGET_PROB="${TARGET_PROB:-0.999}"
30
+
31
+ CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
32
+
33
+ echo "[eval] checkpoint=${CHECKPOINT}"
34
+ echo "[eval] data_path=${DATA_PATH} text_column=${TEXT_COLUMN}"
35
+ echo "[eval] infer_steps=${INFER_STEPS} max_len=${MAX_LEN} target_prob=${TARGET_PROB}"
36
+
37
+ GEN_ARGS=()
38
+ if [[ "${GEN_PPL}" == "1" || "${GEN_PPL}" == "true" || "${GEN_PPL}" == "yes" ]]; then
39
+ GEN_ARGS+=(--gen_ppl)
40
+ GEN_ARGS+=(--gen_ppl_model "${SCORER}")
41
+ GEN_ARGS+=(--gen_ppl_samples "${GEN_PPL_SAMPLES}")
42
+ GEN_ARGS+=(--gen_ppl_batch_size 4)
43
+ GEN_ARGS+=(--gen_ppl_max_length 1024)
44
+ GEN_ARGS+=(--gen_ppl_output "${GEN_PPL_OUTPUT}")
45
+ fi
46
+
47
+ python eval.py \
48
+ --checkpoint "${CHECKPOINT}" \
49
+ --data_path "${DATA_PATH}" \
50
+ --text_column "${TEXT_COLUMN}" \
51
+ --tokenizer_path "${TOKENIZER_PATH}" \
52
+ --max_records "${MAX_RECORDS}" \
53
+ --max_len "${MAX_LEN}" \
54
+ --batch_size "${BATCH_SIZE}" \
55
+ --infer_steps "${INFER_STEPS}" \
56
+ --decode_solver flowmap \
57
+ --model_t_mode flow \
58
+ --noise_init logistic_normal \
59
+ --noise_sigma -1.0 \
60
+ --target_prob "${TARGET_PROB}" \
61
+ --eps 1e-8 \
62
+ "${GEN_ARGS[@]}"
LTA_openwebtext_dualt/scripts/eval_lm1b_latest_non_duo_methods_genppl_20260506.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import importlib.util
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ LATEST_PATH = Path(__file__).with_name("eval_lm1b_latest_non_owt_methods_genppl_20260506.py")
10
+ spec = importlib.util.spec_from_file_location("eval_lm1b_latest_non_owt_methods_genppl_20260506", LATEST_PATH)
11
+ if spec is None or spec.loader is None:
12
+ raise RuntimeError(f"Could not load {LATEST_PATH}")
13
+ latest = importlib.util.module_from_spec(spec)
14
+ sys.modules[spec.name] = latest
15
+ spec.loader.exec_module(latest)
16
+
17
+
18
+ EvalTarget = latest.base.EvalTarget
19
+ latest.base.TARGETS = [
20
+ EvalTarget(
21
+ "flm_latest",
22
+ "flm",
23
+ "runs/lm1b_flm_unified_ddit_small_len128_gbs512_8gpu_1m_20260506_repro/latest.pt",
24
+ ),
25
+ EvalTarget(
26
+ "categorical_fullvocab_c1024_latest",
27
+ "categorical_fullvocab",
28
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
29
+ ),
30
+ EvalTarget(
31
+ "categorical_fullvocab_c64_latest",
32
+ "categorical_fullvocab",
33
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c64p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
34
+ ),
35
+ EvalTarget(
36
+ "categorical_fullvocab_c256_latest",
37
+ "categorical_fullvocab",
38
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c256p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
39
+ ),
40
+ EvalTarget(
41
+ "categorical_fullvocab_c16_4gpu_latest",
42
+ "categorical_fullvocab",
43
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c16p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_4gpu_1m_nw0/latest.pt",
44
+ ),
45
+ EvalTarget(
46
+ "categorical_fullvocab_c4_latest",
47
+ "categorical_fullvocab",
48
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c4p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
49
+ ),
50
+ ]
51
+
52
+
53
+ if __name__ == "__main__":
54
+ latest.base.main()
LTA_openwebtext_dualt/scripts/eval_lm1b_latest_non_owt_methods_genppl_20260506.py ADDED
@@ -0,0 +1,311 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import importlib.util
5
+ import math
6
+ import sys
7
+ from pathlib import Path
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+
12
+
13
+ BASE_PATH = Path(__file__).with_name("eval_lm1b_200k_methods_genppl_20260506.py")
14
+ spec = importlib.util.spec_from_file_location("eval_lm1b_200k_methods_genppl_20260506", BASE_PATH)
15
+ if spec is None or spec.loader is None:
16
+ raise RuntimeError(f"Could not load {BASE_PATH}")
17
+ base = importlib.util.module_from_spec(spec)
18
+ sys.modules[spec.name] = base
19
+ spec.loader.exec_module(base)
20
+
21
+
22
+ base.TARGETS = [
23
+ base.EvalTarget(
24
+ "mdlm_latest",
25
+ "mdlm",
26
+ "runs/lm1b_mdlm_unified_ddit_small_len128_gbs512_8gpu_1m_20260505_repro/latest.pt",
27
+ ),
28
+ base.EvalTarget(
29
+ "duo_latest",
30
+ "duo",
31
+ "runs/lm1b_duo_unified_ddit_small_len128_gbs512_8gpu_1m_20260505_repro/latest.pt",
32
+ ),
33
+ base.EvalTarget(
34
+ "flm_latest",
35
+ "flm",
36
+ "runs/lm1b_flm_unified_ddit_small_len128_gbs512_8gpu_1m_20260506_repro/latest.pt",
37
+ ),
38
+ base.EvalTarget(
39
+ "categorical_fullvocab_latest",
40
+ "categorical_fullvocab",
41
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
42
+ ),
43
+ base.EvalTarget(
44
+ "categorical_fullvocab_c64_latest",
45
+ "categorical_fullvocab",
46
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c64p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
47
+ ),
48
+ base.EvalTarget(
49
+ "categorical_fullvocab_c256_latest",
50
+ "categorical_fullvocab",
51
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c256p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/latest.pt",
52
+ ),
53
+ base.EvalTarget(
54
+ "categorical_fullvocab_c16_4gpu_latest",
55
+ "categorical_fullvocab",
56
+ "runs/lta_lm1b_dirichlet_categorical_fullvocab_c16p0_dualt_flmpack_onehot_hardce_ddit_small_len128_gbs512_4gpu_1m_nw0/latest.pt",
57
+ ),
58
+ base.EvalTarget(
59
+ "ar_rowshard_latest",
60
+ "ar",
61
+ "runs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b64_resume4000_20260504_203021/latest.pt",
62
+ ),
63
+ ]
64
+
65
+
66
+ _orig_build_endpoint_model = base.build_endpoint_model
67
+ _orig_decode_dense_baseline = base.decode_dense_baseline
68
+ _orig_decode_duo_relaxed = base.decode_duo_relaxed
69
+
70
+
71
+ def build_endpoint_model(ckpt, tokenizer, kind, device):
72
+ model = _orig_build_endpoint_model(ckpt, tokenizer, kind, device)
73
+ train_args = ckpt.get("args", {})
74
+ model._eval_concentration_max = float(train_args.get("dirichlet_concentration_max", 1024.0))
75
+ return model
76
+
77
+
78
+ @torch.inference_mode()
79
+ def decode_lta_categorical(
80
+ model,
81
+ tokenizer,
82
+ *,
83
+ n_samples: int,
84
+ batch_size: int,
85
+ max_len: int,
86
+ steps: int,
87
+ seed: int,
88
+ device: torch.device,
89
+ ):
90
+ torch.manual_seed(seed)
91
+ eps = 1e-8
92
+ concentration_min = 1.0
93
+ concentration_max = float(getattr(model, "_eval_concentration_max", 1024.0))
94
+ all_ids: list[list[int]] = []
95
+ all_texts: list[str] = []
96
+ remaining = n_samples
97
+ while remaining > 0:
98
+ bs = min(batch_size, remaining)
99
+ probs = base.sample_noise_simplex(
100
+ (bs, max_len),
101
+ tokenizer.vocab_size,
102
+ device,
103
+ eps,
104
+ noise_mode="dirichlet",
105
+ target_prob=1.0,
106
+ noise_sigma=-1.0,
107
+ dirichlet_concentration=1.0,
108
+ )
109
+ attn = torch.ones((bs, max_len), dtype=torch.bool, device=device)
110
+ last_endpoint = probs
111
+ for step in range(steps):
112
+ t = torch.full((bs,), 0.5, dtype=torch.float32, device=device)
113
+ logits = model(base.state_for_model(model, probs, eps), t, attn).float() / 1.3
114
+ endpoint = F.softmax(logits, dim=-1)
115
+ last_endpoint = endpoint
116
+ support_t = (step + 1) / max(steps, 1)
117
+ semantic_t = support_t**1.5
118
+ anchor = probs.clamp_min(eps)
119
+ anchor = anchor / anchor.sum(dim=-1, keepdim=True).clamp_min(eps)
120
+ forward_endpoint = (1.0 - semantic_t) * anchor + semantic_t * endpoint
121
+ forward_endpoint = forward_endpoint.clamp_min(eps)
122
+ forward_endpoint = forward_endpoint / forward_endpoint.sum(dim=-1, keepdim=True).clamp_min(eps)
123
+ mean = (1.0 - support_t) / float(tokenizer.vocab_size) + support_t * forward_endpoint
124
+ mean = mean.clamp_min(eps)
125
+ mean = mean / mean.sum(dim=-1, keepdim=True).clamp_min(eps)
126
+ log_min = math.log(concentration_min)
127
+ log_max = math.log(concentration_max)
128
+ conc = math.exp(log_min + support_t * (log_max - log_min))
129
+ alpha = (mean * conc).clamp_min(eps)
130
+ probs = torch._standard_gamma(alpha).clamp_min(eps)
131
+ probs = probs / probs.sum(dim=-1, keepdim=True).clamp_min(eps)
132
+ final_probs = 0.5 * probs + 0.5 * last_endpoint
133
+ ids = final_probs.argmax(dim=-1).detach().cpu().tolist()
134
+ all_ids.extend(ids)
135
+ all_texts.extend(tokenizer.decode(row, stop_at_eos=False, skip_special_tokens=False) for row in ids)
136
+ remaining -= bs
137
+ print(f"[categorical Cmax={concentration_max:g}] generated {n_samples - remaining}/{n_samples}", flush=True)
138
+ decode = {
139
+ "kind": "categorical_fullvocab",
140
+ "steps": steps,
141
+ "model_t_mode": "const05",
142
+ "decode_rule": "dual_line_resample",
143
+ "support_power": 1.0,
144
+ "semantic_power": 1.5,
145
+ "anchor_mode": "state",
146
+ "noise_init": "dirichlet",
147
+ "dirichlet_concentration": 1.0,
148
+ "concentration_min": concentration_min,
149
+ "concentration_max": concentration_max,
150
+ "endpoint_temp": 1.3,
151
+ "final_from": "blend",
152
+ "n_samples": n_samples,
153
+ "seed": seed,
154
+ }
155
+ return all_ids, all_texts, decode
156
+
157
+
158
+ @torch.inference_mode()
159
+ def decode_flm(
160
+ model,
161
+ tokenizer,
162
+ *,
163
+ n_samples: int,
164
+ batch_size: int,
165
+ max_len: int,
166
+ steps: int,
167
+ seed: int,
168
+ device: torch.device,
169
+ ):
170
+ torch.manual_seed(seed)
171
+ eps = 1e-8
172
+ all_ids: list[list[int]] = []
173
+ all_texts: list[str] = []
174
+ remaining = n_samples
175
+ while remaining > 0:
176
+ bs = min(batch_size, remaining)
177
+ state = torch.randn((bs, max_len, tokenizer.vocab_size), device=device)
178
+ attn = torch.ones((bs, max_len), dtype=torch.bool, device=device)
179
+ last_logits = None
180
+ for step in range(steps):
181
+ t_value = (step + 1) / max(steps, 1)
182
+ t = torch.full((bs,), t_value, dtype=torch.float32, device=device)
183
+ logits = model(state, t, attn).float()[..., : tokenizer.vocab_size]
184
+ last_logits = logits
185
+ endpoint = F.softmax(logits, dim=-1)
186
+ gamma = min(1.0 / max(steps - step, 1), 1.0)
187
+ state = state + gamma * (endpoint - state)
188
+ final = state if last_logits is None else 0.5 * F.softmax(last_logits, dim=-1) + 0.5 * state.clamp_min(eps)
189
+ final = final / final.sum(dim=-1, keepdim=True).clamp_min(eps)
190
+ ids = final.argmax(dim=-1).detach().cpu().tolist()
191
+ all_ids.extend(ids)
192
+ all_texts.extend(tokenizer.decode(row, stop_at_eos=False, skip_special_tokens=False) for row in ids)
193
+ remaining -= bs
194
+ print(f"[flm] generated {n_samples - remaining}/{n_samples}", flush=True)
195
+ decode = {
196
+ "kind": "flm",
197
+ "steps": steps,
198
+ "decode_rule": "gaussian_to_endpoint_flow",
199
+ "noise_init": "gaussian",
200
+ "final_from": "blend",
201
+ "n_samples": n_samples,
202
+ "seed": seed,
203
+ }
204
+ return all_ids, all_texts, decode
205
+
206
+
207
+ @torch.inference_mode()
208
+ def decode_duo_discrete(
209
+ model,
210
+ tokenizer,
211
+ ckpt,
212
+ *,
213
+ n_samples: int,
214
+ batch_size: int,
215
+ max_len: int,
216
+ steps: int,
217
+ temp: float,
218
+ seed: int,
219
+ device: torch.device,
220
+ ):
221
+ del ckpt
222
+ torch.manual_seed(seed)
223
+ all_ids: list[list[int]] = []
224
+ all_texts: list[str] = []
225
+ remaining = n_samples
226
+ while remaining > 0:
227
+ bs = min(batch_size, remaining)
228
+ ids = torch.randint(0, tokenizer.vocab_size, (bs, max_len), dtype=torch.long, device=device)
229
+ locked = torch.zeros((bs, max_len), dtype=torch.bool, device=device)
230
+ attn = torch.ones((bs, max_len), dtype=torch.bool, device=device)
231
+ for step in range(steps):
232
+ progress = (step + 1) / max(steps, 1)
233
+ alpha = max(progress, 1e-3)
234
+ sigma = -math.log(alpha)
235
+ t = torch.full((bs,), sigma, dtype=torch.float32, device=device)
236
+ logits = model(ids, t, attn).float()[..., : tokenizer.vocab_size]
237
+ pred = base._sample_from_logits(logits, temp)
238
+ probs = F.softmax(logits, dim=-1)
239
+ conf = torch.gather(probs, -1, pred.unsqueeze(-1)).squeeze(-1)
240
+ target_locked = int(math.ceil(progress * max_len))
241
+ for b in range(bs):
242
+ need = max(0, min(max_len, target_locked) - int(locked[b].sum().item()))
243
+ if step == steps - 1:
244
+ need = int((~locked[b]).sum().item())
245
+ if need <= 0:
246
+ continue
247
+ scores = conf[b].masked_fill(locked[b], -1.0)
248
+ choose = torch.topk(scores, k=need).indices
249
+ ids[b, choose] = pred[b, choose]
250
+ locked[b, choose] = True
251
+ if step + 1 < steps:
252
+ random_ids = torch.randint(0, tokenizer.vocab_size, ids.shape, dtype=torch.long, device=device)
253
+ ids = torch.where(locked, ids, random_ids)
254
+ rows = ids.detach().cpu().tolist()
255
+ all_ids.extend(rows)
256
+ all_texts.extend(tokenizer.decode(row, stop_at_eos=False, skip_special_tokens=False) for row in rows)
257
+ remaining -= bs
258
+ print(f"[duo discrete] generated {n_samples - remaining}/{n_samples}", flush=True)
259
+ decode = {
260
+ "kind": "duo",
261
+ "steps": steps,
262
+ "decode_rule": "discrete_random_replace_confidence_lock",
263
+ "temp": temp,
264
+ "n_samples": n_samples,
265
+ "seed": seed,
266
+ }
267
+ return all_ids, all_texts, decode
268
+
269
+
270
+ def decode_duo_auto(model, tokenizer, ckpt, **kwargs):
271
+ train_args = ckpt.get("args", {})
272
+ ckpt_step = int(ckpt.get("step", 0) or 0)
273
+ curriculum_end = int(train_args.get("duo_curriculum_end", 500_000))
274
+ if ckpt_step <= curriculum_end:
275
+ return _orig_decode_duo_relaxed(model, tokenizer, ckpt, **kwargs)
276
+ return decode_duo_discrete(model, tokenizer, ckpt, **kwargs)
277
+
278
+
279
+ def decode_dense_baseline(model, tokenizer, *, kind, n_samples, batch_size, max_len, steps, seed, device):
280
+ if kind == "flm":
281
+ return decode_flm(
282
+ model,
283
+ tokenizer,
284
+ n_samples=n_samples,
285
+ batch_size=batch_size,
286
+ max_len=max_len,
287
+ steps=steps,
288
+ seed=seed,
289
+ device=device,
290
+ )
291
+ return _orig_decode_dense_baseline(
292
+ model,
293
+ tokenizer,
294
+ kind=kind,
295
+ n_samples=n_samples,
296
+ batch_size=batch_size,
297
+ max_len=max_len,
298
+ steps=steps,
299
+ seed=seed,
300
+ device=device,
301
+ )
302
+
303
+
304
+ base.build_endpoint_model = build_endpoint_model
305
+ base.decode_lta_categorical = decode_lta_categorical
306
+ base.decode_dense_baseline = decode_dense_baseline
307
+ base.decode_duo_relaxed = decode_duo_auto
308
+
309
+
310
+ if __name__ == "__main__":
311
+ base.main()
LTA_openwebtext_dualt/scripts/eval_lta_openwebtext_dualt_light.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
5
+ export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
6
+ export TOKENIZERS_PARALLELISM=false
7
+ export PYTHONUNBUFFERED=1
8
+
9
+ RUN_NAME="${RUN_NAME:-smoke_lta_openwebtext_dirichlet_dualt_len1024_1gpu}"
10
+ CHECKPOINT="${CHECKPOINT:-runs/${RUN_NAME}/latest.pt}"
11
+ DATA_PATH="${DATA_PATH:-/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext}"
12
+ TOKENIZER_PATH="${TOKENIZER_PATH:-/e2e-data/evad-tech-vla/wanghan58/models/flowtext_scorers/gpt2-standard/tokenizer.json}"
13
+ GPU="${GPU:-0}"
14
+
15
+ CUDA_VISIBLE_DEVICES="${GPU}" python eval.py \
16
+ --checkpoint "${CHECKPOINT}" \
17
+ --data_path "${DATA_PATH}" \
18
+ --text_column text \
19
+ --tokenizer_path "${TOKENIZER_PATH}" \
20
+ --max_records 8 \
21
+ --max_len 1024 \
22
+ --batch_size 1 \
23
+ --mask_ratios 0.1,1.0 \
24
+ --fill_t 0.0 \
25
+ --infer_steps 8 \
26
+ --decode_solver flowmap \
27
+ --noise_init logistic_normal \
28
+ --pure_noise_samples 2
LTA_openwebtext_dualt/scripts/flowtext_decode_lab.py.bak_correct_decode_20260430_222618 ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Decode-sweep lab for FlowText OpenWebText checkpoints.
3
+
4
+ The goal is to debug inference without touching training. We try several
5
+ simplex-valid update rules, generate many candidates, and rank them with
6
+ anti-collapse diagnostics instead of pure self-likelihood.
7
+
8
+ Run from the flowtext_standard_bench repository root.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import argparse
14
+ import json
15
+ import math
16
+ import re
17
+ import sys
18
+ from collections import Counter
19
+ from dataclasses import dataclass, asdict
20
+ from pathlib import Path
21
+ from typing import Iterable, List, Sequence
22
+
23
+ import torch
24
+ import torch.nn.functional as F
25
+
26
+ REPO_ROOT = Path(__file__).resolve().parents[1]
27
+ if str(REPO_ROOT) not in sys.path:
28
+ sys.path.insert(0, str(REPO_ROOT))
29
+
30
+ from eval import build_model_from_ckpt
31
+ from flowtext_lab.bridges import smooth_onehot
32
+ from flowtext_lab.decode import sample_noise_simplex, state_for_model
33
+ from flowtext_lab.tokenization import BpeTextTokenizer
34
+
35
+
36
+ WORD_RE = re.compile(r"[A-Za-z]+|\d+|[^\sA-Za-z\d]")
37
+
38
+
39
+ @dataclass
40
+ class DecodeConfig:
41
+ label: str
42
+ rule: str
43
+ steps: int = 64
44
+ eta: float = 0.5
45
+ damping: float = 1.0
46
+ max_gamma: float = 1.0
47
+ endpoint_temp: float = 1.0
48
+ state_floor: float = 1e-8
49
+ final_from: str = "state"
50
+ noise_mix: float = 0.0
51
+ noise_decay: str = "linear"
52
+ eos_logit_bias: float = 0.0
53
+
54
+
55
+ def tokenize_for_metrics(text: str) -> list[str]:
56
+ return WORD_RE.findall(text)
57
+
58
+
59
+ def repeated_ngram_frac(tokens: Sequence[str], n: int) -> float:
60
+ if len(tokens) < n:
61
+ return 0.0
62
+ grams = list(zip(*[tokens[i:] for i in range(n)]))
63
+ counts = Counter(grams)
64
+ return sum(v - 1 for v in counts.values() if v > 1) / max(len(grams), 1)
65
+
66
+
67
+ def text_metrics(text: str) -> dict:
68
+ toks = tokenize_for_metrics(text)
69
+ words = [t.lower() for t in toks if re.fullmatch(r"[A-Za-z]+", t)]
70
+ n_tok = max(len(toks), 1)
71
+ n_words = max(len(words), 1)
72
+ word_counts = Counter(words)
73
+ max_word_frac = word_counts.most_common(1)[0][1] / n_words if word_counts else 1.0
74
+ distinct1 = len(set(words)) / n_words if words else 0.0
75
+ bigrams = list(zip(words, words[1:]))
76
+ distinct2 = len(set(bigrams)) / max(len(bigrams), 1) if bigrams else 0.0
77
+ digit_frac = sum(t.isdigit() for t in toks) / n_tok
78
+ punct_frac = sum(bool(re.fullmatch(r"[,.;:!?]+", t)) for t in toks) / n_tok
79
+ eos_count = text.count("<|endoftext|>")
80
+ bad_char_count = text.count("�")
81
+ rep3 = repeated_ngram_frac([t.lower() for t in toks], 3)
82
+ rep4 = repeated_ngram_frac([t.lower() for t in toks], 4)
83
+ # This score is deliberately simple and non-oracle. It rewards length and
84
+ # lexical variety while heavily penalizing classic collapse artifacts.
85
+ quality = (
86
+ min(len(text) / 700.0, 1.0)
87
+ + 0.35 * distinct2
88
+ + 0.15 * distinct1
89
+ - 0.30 * eos_count
90
+ - 2.60 * rep3
91
+ - 1.60 * rep4
92
+ - 1.30 * digit_frac
93
+ - 0.65 * punct_frac
94
+ - 1.35 * max_word_frac
95
+ - 0.35 * bad_char_count
96
+ )
97
+ return {
98
+ "quality": float(quality),
99
+ "chars": len(text),
100
+ "tokens": len(toks),
101
+ "words": len(words),
102
+ "eos_count": eos_count,
103
+ "bad_char_count": bad_char_count,
104
+ "rep3": float(rep3),
105
+ "rep4": float(rep4),
106
+ "distinct1": float(distinct1),
107
+ "distinct2": float(distinct2),
108
+ "digit_frac": float(digit_frac),
109
+ "punct_frac": float(punct_frac),
110
+ "max_word_frac": float(max_word_frac),
111
+ }
112
+
113
+
114
+ def decode_text(tokenizer: BpeTextTokenizer, ids: Sequence[int]) -> str:
115
+ return tokenizer.decode(ids, stop_at_eos=False, skip_special_tokens=False)
116
+
117
+
118
+ def encode_prompt(tokenizer: BpeTextTokenizer, prompt: str, max_len: int) -> list[int]:
119
+ return list(tokenizer.tokenizer.encode(prompt).ids)[:max_len]
120
+
121
+
122
+ @torch.no_grad()
123
+ def build_initial_state(
124
+ tokenizer: BpeTextTokenizer,
125
+ prompts: list[str],
126
+ restarts: int,
127
+ max_len: int,
128
+ target_prob: float,
129
+ eps: float,
130
+ device: torch.device,
131
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor, list[str]]:
132
+ expanded: list[str] = []
133
+ prompt_ids: list[list[int]] = []
134
+ for prompt in prompts:
135
+ ids = encode_prompt(tokenizer, prompt, max_len=max_len)
136
+ for _ in range(restarts):
137
+ expanded.append(prompt)
138
+ prompt_ids.append(ids)
139
+
140
+ batch = len(prompt_ids)
141
+ attn = torch.ones((batch, max_len), dtype=torch.bool, device=device)
142
+ probs = sample_noise_simplex(
143
+ (batch, max_len),
144
+ tokenizer.vocab_size,
145
+ device,
146
+ eps,
147
+ noise_mode="logistic_normal",
148
+ target_prob=target_prob,
149
+ noise_sigma=-1.0,
150
+ )
151
+ lock = torch.zeros((batch, max_len), dtype=torch.bool, device=device)
152
+ lock_probs = torch.zeros((batch, max_len, tokenizer.vocab_size), dtype=torch.float32, device=device)
153
+ for row, ids in enumerate(prompt_ids):
154
+ if not ids:
155
+ continue
156
+ ids_t = torch.tensor(ids, dtype=torch.long, device=device).unsqueeze(0)
157
+ sp = smooth_onehot(ids_t, tokenizer.vocab_size, target_prob, eps)[0]
158
+ probs[row, : len(ids)] = sp
159
+ lock_probs[row, : len(ids)] = sp
160
+ lock[row, : len(ids)] = True
161
+ return probs, attn, lock, lock_probs, expanded
162
+
163
+
164
+ def flowmap_gamma(step: int, steps: int, damping: float, max_gamma: float, eps: float) -> float:
165
+ s = step / max(steps, 1)
166
+ t_next = (step + 1) / max(steps, 1)
167
+ base_gamma = (t_next - s) / max(1.0 - s, eps)
168
+ gamma = float(damping) * base_gamma
169
+ return min(gamma, float(max_gamma)) if max_gamma > 0 else gamma
170
+
171
+
172
+ @torch.no_grad()
173
+ def decode_batch(
174
+ model,
175
+ init_probs: torch.Tensor,
176
+ attn: torch.Tensor,
177
+ lock: torch.Tensor,
178
+ lock_probs: torch.Tensor,
179
+ cfg: DecodeConfig,
180
+ eps: float,
181
+ eos_id: int | None = None,
182
+ ) -> torch.Tensor:
183
+ probs = init_probs.float().clone()
184
+ device = probs.device
185
+ last_endpoint = probs
186
+ for step in range(cfg.steps):
187
+ t_val = step / max(cfg.steps, 1)
188
+ t = torch.full((probs.size(0),), float(t_val), dtype=torch.float32, device=device)
189
+ logits = model(state_for_model(model, probs, eps), t, attn).float()
190
+ if cfg.endpoint_temp != 1.0:
191
+ logits = logits / float(cfg.endpoint_temp)
192
+ if cfg.eos_logit_bias != 0.0 and eos_id is not None and 0 <= eos_id < logits.size(-1):
193
+ logits[..., eos_id] = logits[..., eos_id] + float(cfg.eos_logit_bias)
194
+ endpoint = F.softmax(logits, dim=-1)
195
+ last_endpoint = endpoint
196
+
197
+ if cfg.rule == "flowmap":
198
+ gamma = flowmap_gamma(step, cfg.steps, cfg.damping, cfg.max_gamma, eps)
199
+ new_probs = probs + gamma * (endpoint - probs)
200
+ elif cfg.rule == "replace":
201
+ new_probs = (1.0 - cfg.eta) * probs + cfg.eta * endpoint
202
+ elif cfg.rule == "geometric":
203
+ log_mix = (1.0 - cfg.eta) * torch.log(probs.clamp_min(eps)) + cfg.eta * torch.log(endpoint.clamp_min(eps))
204
+ new_probs = F.softmax(log_mix, dim=-1)
205
+ elif cfg.rule == "centered_residual":
206
+ # Add a zero-sum probability residual, then project back to simplex.
207
+ residual = endpoint - probs
208
+ residual = residual - residual.mean(dim=-1, keepdim=True)
209
+ new_probs = probs + cfg.eta * residual
210
+ else:
211
+ raise ValueError(f"Unknown decode rule: {cfg.rule}")
212
+
213
+ if cfg.noise_mix > 0:
214
+ if cfg.noise_decay == "linear":
215
+ lam = cfg.noise_mix * (1.0 - (step + 1) / max(cfg.steps, 1))
216
+ elif cfg.noise_decay == "sqrt":
217
+ lam = cfg.noise_mix * math.sqrt(max(0.0, 1.0 - (step + 1) / max(cfg.steps, 1)))
218
+ else:
219
+ lam = cfg.noise_mix
220
+ if lam > 0:
221
+ uniform = torch.full_like(new_probs, 1.0 / new_probs.size(-1))
222
+ new_probs = (1.0 - lam) * new_probs + lam * uniform
223
+
224
+ new_probs = new_probs.clamp_min(max(float(cfg.state_floor), eps))
225
+ new_probs = new_probs / new_probs.sum(dim=-1, keepdim=True).clamp_min(eps)
226
+ new_probs = torch.where(lock.unsqueeze(-1), lock_probs, new_probs)
227
+ probs = new_probs
228
+
229
+ if cfg.final_from == "endpoint":
230
+ out = last_endpoint
231
+ out = torch.where(lock.unsqueeze(-1), lock_probs, out)
232
+ return out / out.sum(dim=-1, keepdim=True).clamp_min(eps)
233
+ if cfg.final_from == "blend":
234
+ out = 0.5 * probs + 0.5 * last_endpoint
235
+ out = torch.where(lock.unsqueeze(-1), lock_probs, out)
236
+ return out / out.sum(dim=-1, keepdim=True).clamp_min(eps)
237
+ return probs
238
+
239
+
240
+ @torch.no_grad()
241
+ def pseudo_likelihood_scores(
242
+ model,
243
+ tokenizer: BpeTextTokenizer,
244
+ probs: torch.Tensor,
245
+ attn: torch.Tensor,
246
+ lock: torch.Tensor,
247
+ target_prob: float,
248
+ eps: float,
249
+ repeats: int,
250
+ mask_frac: float,
251
+ rerank_t: float,
252
+ ) -> torch.Tensor:
253
+ ids = probs.argmax(dim=-1)
254
+ endpoint = smooth_onehot(ids, tokenizer.vocab_size, target_prob, eps)
255
+ eligible = attn & (~lock)
256
+ scores = torch.zeros(ids.size(0), dtype=torch.float32, device=ids.device)
257
+ counts = torch.zeros_like(scores)
258
+ for _ in range(max(1, repeats)):
259
+ score_mask = (torch.rand_like(ids.float()) < mask_frac) & eligible
260
+ for row in range(ids.size(0)):
261
+ if eligible[row].any() and not score_mask[row].any():
262
+ choices = torch.nonzero(eligible[row], as_tuple=False).flatten()
263
+ score_mask[row, choices[torch.randint(0, choices.numel(), (1,), device=ids.device)]] = True
264
+ noise = sample_noise_simplex(
265
+ (ids.size(0), ids.size(1)),
266
+ tokenizer.vocab_size,
267
+ ids.device,
268
+ eps,
269
+ noise_mode="logistic_normal",
270
+ target_prob=target_prob,
271
+ noise_sigma=-1.0,
272
+ )
273
+ inp = torch.where(score_mask.unsqueeze(-1), noise, endpoint)
274
+ inp = torch.where(lock.unsqueeze(-1), probs, inp)
275
+ t = torch.full((ids.size(0),), float(rerank_t), dtype=torch.float32, device=ids.device)
276
+ logits = model(state_for_model(model, inp, eps), t, attn).float()
277
+ logp = F.log_softmax(logits, dim=-1).gather(-1, ids.unsqueeze(-1)).squeeze(-1)
278
+ scores += (logp * score_mask.float()).sum(dim=-1)
279
+ counts += score_mask.float().sum(dim=-1)
280
+ return scores / counts.clamp_min(1.0)
281
+
282
+
283
+ def default_configs(steps: int, config_set: str) -> list[DecodeConfig]:
284
+ if config_set == "focused_flowmap":
285
+ return [
286
+ DecodeConfig("flowmap_t1p00_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0),
287
+ DecodeConfig("flowmap_t1p10_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.10),
288
+ DecodeConfig("flowmap_t1p25_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.25),
289
+ DecodeConfig("flowmap_t1p40_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40),
290
+ DecodeConfig("flowmap_t1p60_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.60),
291
+ DecodeConfig("flowmap_t1p25_d0p7", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25),
292
+ DecodeConfig("flowmap_t1p40_d0p7", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.40),
293
+ DecodeConfig("flowmap_t1p60_d0p7", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.60),
294
+ DecodeConfig("flowmap_t1p25_g0p5", "flowmap", steps=steps, damping=1.0, max_gamma=0.5, endpoint_temp=1.25),
295
+ DecodeConfig("flowmap_t1p40_g0p5", "flowmap", steps=steps, damping=1.0, max_gamma=0.5, endpoint_temp=1.40),
296
+ ]
297
+ if config_set == "best_flowmap":
298
+ return [
299
+ DecodeConfig("flowmap_t1p25_d0p7", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25),
300
+ DecodeConfig("flowmap_t1p25_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.25),
301
+ DecodeConfig("flowmap_t1p35_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35),
302
+ DecodeConfig("flowmap_t1p40_d1p0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40),
303
+ ]
304
+ if config_set == "final_projection":
305
+ return [
306
+ DecodeConfig("flowmap_t1p35_state", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, final_from="state"),
307
+ DecodeConfig("flowmap_t1p35_endpoint", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, final_from="endpoint"),
308
+ DecodeConfig("flowmap_t1p35_blend", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, final_from="blend"),
309
+ DecodeConfig("flowmap_t1p40_state", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40, final_from="state"),
310
+ DecodeConfig("flowmap_t1p40_endpoint", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40, final_from="endpoint"),
311
+ DecodeConfig("flowmap_t1p40_blend", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40, final_from="blend"),
312
+ DecodeConfig("flowmap_t1p25_d0p7_state", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25, final_from="state"),
313
+ DecodeConfig("flowmap_t1p25_d0p7_endpoint", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25, final_from="endpoint"),
314
+ DecodeConfig("flowmap_t1p25_d0p7_blend", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25, final_from="blend"),
315
+ ]
316
+ if config_set == "eos_sweep":
317
+ return [
318
+ DecodeConfig("flowmap_t1p35_eos0", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, eos_logit_bias=0.0),
319
+ DecodeConfig("flowmap_t1p35_eos-1", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, eos_logit_bias=-1.0),
320
+ DecodeConfig("flowmap_t1p35_eos-2", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, eos_logit_bias=-2.0),
321
+ DecodeConfig("flowmap_t1p35_eos-3", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.35, eos_logit_bias=-3.0),
322
+ DecodeConfig("flowmap_t1p40_eos-2", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.40, eos_logit_bias=-2.0),
323
+ DecodeConfig("flowmap_t1p25_d0p7_eos-2", "flowmap", steps=steps, damping=0.7, max_gamma=1.0, endpoint_temp=1.25, eos_logit_bias=-2.0),
324
+ ]
325
+ if config_set != "broad":
326
+ raise ValueError(f"Unknown config_set: {config_set}")
327
+ return [
328
+ DecodeConfig("flowmap64", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, final_from="state"),
329
+ DecodeConfig("flowmap_temp1p25", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=1.25),
330
+ DecodeConfig("flowmap_temp0p85", "flowmap", steps=steps, damping=1.0, max_gamma=1.0, endpoint_temp=0.85),
331
+ DecodeConfig("replace_eta0p35", "replace", steps=steps, eta=0.35),
332
+ DecodeConfig("replace_eta0p50", "replace", steps=steps, eta=0.50),
333
+ DecodeConfig("replace_eta0p65", "replace", steps=steps, eta=0.65),
334
+ DecodeConfig("replace_eta0p50_temp1p25", "replace", steps=steps, eta=0.50, endpoint_temp=1.25),
335
+ DecodeConfig("geometric_eta0p25", "geometric", steps=steps, eta=0.25),
336
+ DecodeConfig("geometric_eta0p50", "geometric", steps=steps, eta=0.50),
337
+ DecodeConfig("centered_residual_eta0p20", "centered_residual", steps=steps, eta=0.20),
338
+ DecodeConfig("replace_eta0p50_floor1e6", "replace", steps=steps, eta=0.50, state_floor=1e-6),
339
+ DecodeConfig("replace_eta0p50_leak", "replace", steps=steps, eta=0.50, noise_mix=0.03, noise_decay="sqrt"),
340
+ ]
341
+
342
+
343
+ def aggregate(rows: list[dict]) -> dict:
344
+ keys = ["quality", "eos_count", "rep3", "rep4", "distinct1", "distinct2", "digit_frac", "max_word_frac"]
345
+ return {f"mean_{k}": sum(float(r[k]) for r in rows) / max(len(rows), 1) for k in keys}
346
+
347
+
348
+ def main() -> None:
349
+ parser = argparse.ArgumentParser()
350
+ parser.add_argument("--checkpoint", required=True)
351
+ parser.add_argument("--tokenizer_path", required=True)
352
+ parser.add_argument("--max_len", type=int, default=128)
353
+ parser.add_argument("--steps", type=int, default=64)
354
+ parser.add_argument("--restarts", type=int, default=64)
355
+ parser.add_argument("--target_prob", type=float, default=0.99)
356
+ parser.add_argument("--eps", type=float, default=1e-8)
357
+ parser.add_argument("--prompts", default="|The|In the early morning|Scientists have|The company said|A young woman")
358
+ parser.add_argument("--score_repeats", type=int, default=0)
359
+ parser.add_argument("--score_mask_frac", type=float, default=0.5)
360
+ parser.add_argument("--rerank_t", type=float, default=0.5)
361
+ parser.add_argument("--pl_weight", type=float, default=0.0)
362
+ parser.add_argument("--output", default="runs/decode_lab/latest_decode_lab.jsonl")
363
+ parser.add_argument("--config_set", default="broad", choices=["broad", "focused_flowmap", "best_flowmap", "final_projection", "eos_sweep"])
364
+ parser.add_argument("--decode_batch_size", type=int, default=0)
365
+ parser.add_argument("--topk", type=int, default=5)
366
+ parser.add_argument("--seed", type=int, default=20260428)
367
+ args = parser.parse_args()
368
+
369
+ torch.manual_seed(args.seed)
370
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
371
+ tokenizer = BpeTextTokenizer.from_file(args.tokenizer_path)
372
+ ckpt = torch.load(args.checkpoint, map_location="cpu")
373
+ model = build_model_from_ckpt(ckpt, tokenizer.vocab_size, args.max_len, device)
374
+ model.eval()
375
+
376
+ prompts = args.prompts.split("|")
377
+ # Keep the first empty prompt: it is unconditional generation.
378
+ print(f"[info] device={device} prompts={prompts} restarts={args.restarts} steps={args.steps}")
379
+ print(f"[info] checkpoint={args.checkpoint}")
380
+
381
+ out_path = Path(args.output)
382
+ out_path.parent.mkdir(parents=True, exist_ok=True)
383
+ configs = default_configs(args.steps, args.config_set)
384
+ with out_path.open("w") as f:
385
+ for cfg in configs:
386
+ init, attn, lock, lock_probs, expanded = build_initial_state(
387
+ tokenizer=tokenizer,
388
+ prompts=prompts,
389
+ restarts=args.restarts,
390
+ max_len=args.max_len,
391
+ target_prob=args.target_prob,
392
+ eps=args.eps,
393
+ device=device,
394
+ )
395
+ if args.decode_batch_size > 0 and init.size(0) > args.decode_batch_size:
396
+ decoded_parts = []
397
+ for start in range(0, init.size(0), args.decode_batch_size):
398
+ end = min(start + args.decode_batch_size, init.size(0))
399
+ part = decode_batch(
400
+ model,
401
+ init[start:end],
402
+ attn[start:end],
403
+ lock[start:end],
404
+ lock_probs[start:end],
405
+ cfg,
406
+ args.eps,
407
+ tokenizer.eos_id,
408
+ )
409
+ decoded_parts.append(part.detach().cpu())
410
+ print(f"[chunk] {cfg.label} decoded {end}/{init.size(0)}", flush=True)
411
+ decoded = torch.cat(decoded_parts, dim=0)
412
+ else:
413
+ decoded = decode_batch(model, init, attn, lock, lock_probs, cfg, args.eps, tokenizer.eos_id)
414
+ ids = decoded.argmax(dim=-1).detach().cpu().tolist()
415
+ texts = [decode_text(tokenizer, row) for row in ids]
416
+ rows = []
417
+ for i, text in enumerate(texts):
418
+ m = text_metrics(text)
419
+ m.update({"candidate": i, "prompt": expanded[i], "text": text})
420
+ rows.append(m)
421
+ if args.score_repeats > 0:
422
+ decoded_for_score = decoded.to(device) if decoded.device != device else decoded
423
+ pl = pseudo_likelihood_scores(
424
+ model,
425
+ tokenizer,
426
+ decoded_for_score,
427
+ attn,
428
+ lock,
429
+ args.target_prob,
430
+ args.eps,
431
+ repeats=args.score_repeats,
432
+ mask_frac=args.score_mask_frac,
433
+ rerank_t=args.rerank_t,
434
+ ).detach().cpu().tolist()
435
+ for row, score in zip(rows, pl):
436
+ row["pseudo_logp"] = float(score)
437
+ row["rank_score"] = float(row["quality"] + args.pl_weight * score)
438
+ else:
439
+ for row in rows:
440
+ row["pseudo_logp"] = None
441
+ row["rank_score"] = float(row["quality"])
442
+
443
+ summary = {"type": "summary", "config": asdict(cfg), "agg": aggregate(rows)}
444
+ f.write(json.dumps(summary, ensure_ascii=False) + "\n")
445
+ print("\n" + "=" * 96)
446
+ print("[config]", cfg.label, asdict(cfg))
447
+ print("[metrics]", json.dumps(summary["agg"], ensure_ascii=False))
448
+ for prompt in prompts:
449
+ subset = [r for r in rows if r["prompt"] == prompt]
450
+ subset.sort(key=lambda r: r["rank_score"], reverse=True)
451
+ for rank, row in enumerate(subset[: args.topk], 1):
452
+ rec = {"type": "sample", "config": asdict(cfg), "rank": rank, **row}
453
+ f.write(json.dumps(rec, ensure_ascii=False) + "\n")
454
+ if rank <= 1:
455
+ print(f"\n--- best prompt={prompt!r} rank_score={row['rank_score']:.4f} quality={row['quality']:.4f} ---")
456
+ print(row["text"])
457
+
458
+ del init, attn, lock, lock_probs, decoded
459
+ if torch.cuda.is_available():
460
+ torch.cuda.empty_cache()
461
+
462
+ print(f"[done] wrote {out_path}")
463
+
464
+
465
+ if __name__ == "__main__":
466
+ main()