Add files using upload-large-folder tool
Browse files- LTA_openwebtext_dualt/logs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b64_resume4000_20260504_203021.nohup.out +0 -0
- LTA_openwebtext_dualt/logs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step.log +149 -0
- LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/scalinglaw_ctx1024_exact10_vocab50257_small384x6_4gpu_2000step.log +212 -0
- LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/scalinglaw_samples2_192x3_c512_vocab50257_4gpu_3000step.log +66 -0
- LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_ctx256_small384x6_step500.log +1 -0
- LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_ctx512_small384x6_step500.log +3 -0
- LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_params512x8_c512_vocab50257_step750.log +3 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/__multiarray_api.c +314 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/experimental_dtype_api.h +365 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/halffloat.h +70 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_cpu.h +129 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_interrupt.h +56 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h +20 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/utils.h +37 -0
- LTA_openwebtext_dualt/scripts/flowtext_score_decode_lab.py +129 -0
- LTA_openwebtext_dualt/scripts/launch_lta_owt_from_lm1b_c1024_4gpu.sh +85 -0
- LTA_openwebtext_dualt/scripts/launch_lta_wmt14_deen_fullycoupled_4gpu_smoke.sh +113 -0
- LTA_openwebtext_dualt/scripts/run_lta_owt_bert_absrope_time4_dirichlet_len1024_C1_to_1024_8gpu_1m_mask1_sameT_save10k.sh +77 -0
- LTA_openwebtext_dualt/scripts/tmp_run_three_quick_infer_20260525.sh +130 -0
- LTA_openwebtext_dualt/scripts/trace_lta_decode_steps.py +129 -0
LTA_openwebtext_dualt/logs/ar_lm1b_flmpack_bert_small_len128_gbs512_4gpu_1m_rowshard_b64_resume4000_20260504_203021.nohup.out
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LTA_openwebtext_dualt/logs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step.log
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
*****************************************
|
| 3 |
+
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
*****************************************
|
| 5 |
+
[rank0]:[W512 16:41:13.244392390 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 6 |
+
NCCL version 2.25.1+cuda12.8
|
| 7 |
+
[rank1]:[W512 16:41:14.936907560 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 8 |
+
[rank3]:[W512 16:41:14.612056126 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 9 |
+
[rank2]:[W512 16:41:15.111442637 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 10 |
+
{
|
| 11 |
+
"device": "cuda:0",
|
| 12 |
+
"rank": 0,
|
| 13 |
+
"world_size": 4,
|
| 14 |
+
"samples": "owt_cached_chunks:10904",
|
| 15 |
+
"vocab_size": 50257,
|
| 16 |
+
"save_dir": "runs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step",
|
| 17 |
+
"batch_size": 16,
|
| 18 |
+
"grad_accum": 8,
|
| 19 |
+
"effective_batch_size": 512,
|
| 20 |
+
"global_batch_size": 512,
|
| 21 |
+
"lr_schedule": "constant_warmup",
|
| 22 |
+
"warmup_steps": 20,
|
| 23 |
+
"adam_beta1": 0.9,
|
| 24 |
+
"adam_beta2": 0.999,
|
| 25 |
+
"adam_eps": 1e-08,
|
| 26 |
+
"model_type": "ddit",
|
| 27 |
+
"dual_t": true,
|
| 28 |
+
"corrupt_t_mode": "same",
|
| 29 |
+
"corrupt_min_t": 0.0,
|
| 30 |
+
"corrupt_max_t": 1.0,
|
| 31 |
+
"dirichlet_endpoint_mode": "categorical_dual_t",
|
| 32 |
+
"dirichlet_semantic_t_mode": "same",
|
| 33 |
+
"dirichlet_semantic_t_value": 0.0,
|
| 34 |
+
"categorical_wrong_from_full_vocab": true,
|
| 35 |
+
"simplex_bridge_sampler": "dirichlet",
|
| 36 |
+
"logistic_normal_sigma_min": 0.18,
|
| 37 |
+
"logistic_normal_sigma_max": 2.2,
|
| 38 |
+
"logistic_normal_tau_min": 0.65,
|
| 39 |
+
"logistic_normal_tau_max": 1.15,
|
| 40 |
+
"torch_compile": false,
|
| 41 |
+
"compile_mode": "max-autotune",
|
| 42 |
+
"state_format": "prob",
|
| 43 |
+
"target_loss": "hard_ce",
|
| 44 |
+
"meanflow_weight": 0.0,
|
| 45 |
+
"bridge_noise_init": "logistic_normal",
|
| 46 |
+
"noise_sigma": -1.0,
|
| 47 |
+
"wrap": true,
|
| 48 |
+
"wrap_mode": "stream",
|
| 49 |
+
"wrap_record_buffer_size": 200,
|
| 50 |
+
"owt_cached_chunks": true,
|
| 51 |
+
"owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k_fast10k",
|
| 52 |
+
"owt_chunk_cache_rebuild": false,
|
| 53 |
+
"owt_chunk_cache_write_batch": 4096,
|
| 54 |
+
"online_chunk_shuffle": false,
|
| 55 |
+
"online_chunk_shuffle_buffer": 10000,
|
| 56 |
+
"openwebtext_split": "train_minus_100k",
|
| 57 |
+
"detokenizer": "auto",
|
| 58 |
+
"resolved_detokenizer": null,
|
| 59 |
+
"num_workers": 0,
|
| 60 |
+
"latest_every": 25,
|
| 61 |
+
"resume_path": ""
|
| 62 |
+
}
|
| 63 |
+
step=5 micro_steps=40 elapsed=50.1s lr=9.000000e-05 loss_all=10.7950 acc_all=0.5424 loss_corrupt=10.8013 acc_corrupt=0.3710 corrupt_frac=0.5505 loss=10.8013 loss_recon=10.8013 loss_meanflow=0.0000 mean_model_t=0.5037 mean_corrupt_t=0.5037 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4924 init_acc_corrupt=0.4756 init_gold_top10=0.5024 init_gold_top100=0.5300
|
| 64 |
+
step=10 micro_steps=80 elapsed=48.8s lr=1.650000e-04 loss_all=10.5876 acc_all=0.5773 loss_corrupt=10.6249 acc_corrupt=0.3641 corrupt_frac=0.5710 loss=10.6249 loss_recon=10.6249 loss_meanflow=0.0000 mean_model_t=0.4988 mean_corrupt_t=0.4988 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5066 init_acc_corrupt=0.4603 init_gold_top10=0.4881 init_gold_top100=0.5178
|
| 65 |
+
step=15 micro_steps=120 elapsed=48.5s lr=2.400000e-04 loss_all=10.0529 acc_all=0.1321 loss_corrupt=10.0839 acc_corrupt=0.0846 corrupt_frac=0.5482 loss=10.0839 loss_recon=10.0839 loss_meanflow=0.0000 mean_model_t=0.4826 mean_corrupt_t=0.4826 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5143 init_acc_corrupt=0.4517 init_gold_top10=0.4797 init_gold_top100=0.5107
|
| 66 |
+
step=20 micro_steps=160 elapsed=46.3s lr=3.000000e-04 loss_all=9.1611 acc_all=0.2368 loss_corrupt=9.2121 acc_corrupt=0.1448 corrupt_frac=0.5527 loss=9.2121 loss_recon=9.2121 loss_meanflow=0.0000 mean_model_t=0.4898 mean_corrupt_t=0.4898 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5102 init_acc_corrupt=0.4514 init_gold_top10=0.4836 init_gold_top100=0.5166
|
| 67 |
+
step=25 micro_steps=200 elapsed=43.5s lr=3.000000e-04 loss_all=8.2892 acc_all=0.1852 loss_corrupt=8.3501 acc_corrupt=0.1275 corrupt_frac=0.5519 loss=8.3501 loss_recon=8.3501 loss_meanflow=0.0000 mean_model_t=0.5035 mean_corrupt_t=0.5035 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5017 init_acc_corrupt=0.4630 init_gold_top10=0.4920 init_gold_top100=0.5225
|
| 68 |
+
step=30 micro_steps=240 elapsed=58.5s lr=3.000000e-04 loss_all=7.5538 acc_all=0.2846 loss_corrupt=7.7125 acc_corrupt=0.1894 corrupt_frac=0.5483 loss=7.7125 loss_recon=7.7125 loss_meanflow=0.0000 mean_model_t=0.4809 mean_corrupt_t=0.4809 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5216 init_acc_corrupt=0.4416 init_gold_top10=0.4725 init_gold_top100=0.5025
|
| 69 |
+
step=35 micro_steps=280 elapsed=44.9s lr=3.000000e-04 loss_all=6.9044 acc_all=0.3164 loss_corrupt=7.2563 acc_corrupt=0.2266 corrupt_frac=0.5421 loss=7.2563 loss_recon=7.2563 loss_meanflow=0.0000 mean_model_t=0.5272 mean_corrupt_t=0.5272 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4720 init_acc_corrupt=0.4966 init_gold_top10=0.5229 init_gold_top100=0.5512
|
| 70 |
+
step=40 micro_steps=320 elapsed=44.0s lr=3.000000e-04 loss_all=6.4465 acc_all=0.2756 loss_corrupt=6.9584 acc_corrupt=0.1858 corrupt_frac=0.5546 loss=6.9584 loss_recon=6.9584 loss_meanflow=0.0000 mean_model_t=0.4862 mean_corrupt_t=0.4862 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5271 init_acc_corrupt=0.4348 init_gold_top10=0.4664 init_gold_top100=0.5004
|
| 71 |
+
step=45 micro_steps=360 elapsed=42.7s lr=3.000000e-04 loss_all=5.9251 acc_all=0.2724 loss_corrupt=6.5543 acc_corrupt=0.1895 corrupt_frac=0.5743 loss=6.5543 loss_recon=6.5543 loss_meanflow=0.0000 mean_model_t=0.4939 mean_corrupt_t=0.4939 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5111 init_acc_corrupt=0.4545 init_gold_top10=0.4826 init_gold_top100=0.5159
|
| 72 |
+
|
| 73 |
+
*****************************************
|
| 74 |
+
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 75 |
+
*****************************************
|
| 76 |
+
[rank0]:[W512 16:53:29.911882582 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 77 |
+
NCCL version 2.25.1+cuda12.8
|
| 78 |
+
[rank3]:[W512 16:53:30.911647198 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 79 |
+
[rank1]:[W512 16:53:30.554297191 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 80 |
+
[rank2]:[W512 16:53:30.591215668 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 81 |
+
resumed_from=runs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step/latest.pt start_step=26
|
| 82 |
+
{
|
| 83 |
+
"device": "cuda:0",
|
| 84 |
+
"rank": 0,
|
| 85 |
+
"world_size": 4,
|
| 86 |
+
"samples": "owt_cached_chunks:10904",
|
| 87 |
+
"vocab_size": 50257,
|
| 88 |
+
"save_dir": "runs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step",
|
| 89 |
+
"batch_size": 16,
|
| 90 |
+
"grad_accum": 8,
|
| 91 |
+
"effective_batch_size": 512,
|
| 92 |
+
"global_batch_size": 512,
|
| 93 |
+
"lr_schedule": "constant_warmup",
|
| 94 |
+
"warmup_steps": 20,
|
| 95 |
+
"adam_beta1": 0.9,
|
| 96 |
+
"adam_beta2": 0.999,
|
| 97 |
+
"adam_eps": 1e-08,
|
| 98 |
+
"model_type": "ddit",
|
| 99 |
+
"dual_t": true,
|
| 100 |
+
"corrupt_t_mode": "same",
|
| 101 |
+
"corrupt_min_t": 0.0,
|
| 102 |
+
"corrupt_max_t": 1.0,
|
| 103 |
+
"dirichlet_endpoint_mode": "categorical_dual_t",
|
| 104 |
+
"dirichlet_semantic_t_mode": "same",
|
| 105 |
+
"dirichlet_semantic_t_value": 0.0,
|
| 106 |
+
"categorical_wrong_from_full_vocab": true,
|
| 107 |
+
"simplex_bridge_sampler": "dirichlet",
|
| 108 |
+
"logistic_normal_sigma_min": 0.18,
|
| 109 |
+
"logistic_normal_sigma_max": 2.2,
|
| 110 |
+
"logistic_normal_tau_min": 0.65,
|
| 111 |
+
"logistic_normal_tau_max": 1.15,
|
| 112 |
+
"torch_compile": false,
|
| 113 |
+
"compile_mode": "max-autotune",
|
| 114 |
+
"state_format": "prob",
|
| 115 |
+
"target_loss": "hard_ce",
|
| 116 |
+
"meanflow_weight": 0.0,
|
| 117 |
+
"bridge_noise_init": "logistic_normal",
|
| 118 |
+
"noise_sigma": -1.0,
|
| 119 |
+
"wrap": true,
|
| 120 |
+
"wrap_mode": "stream",
|
| 121 |
+
"wrap_record_buffer_size": 200,
|
| 122 |
+
"owt_cached_chunks": true,
|
| 123 |
+
"owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k_fast10k",
|
| 124 |
+
"owt_chunk_cache_rebuild": false,
|
| 125 |
+
"owt_chunk_cache_write_batch": 4096,
|
| 126 |
+
"online_chunk_shuffle": false,
|
| 127 |
+
"online_chunk_shuffle_buffer": 10000,
|
| 128 |
+
"openwebtext_split": "train_minus_100k",
|
| 129 |
+
"detokenizer": "auto",
|
| 130 |
+
"resolved_detokenizer": null,
|
| 131 |
+
"num_workers": 0,
|
| 132 |
+
"latest_every": 25,
|
| 133 |
+
"resume_path": "runs/lta_owt_c1024_gpt2_cached_chunks_len1024_fast10k_4gpu_b16_100step/latest.pt"
|
| 134 |
+
}
|
| 135 |
+
step=30 micro_steps=240 elapsed=49.2s lr=3.000000e-04 loss_all=7.5368 acc_all=0.2895 loss_corrupt=7.6733 acc_corrupt=0.2021 corrupt_frac=0.5505 loss=7.6733 loss_recon=7.6733 loss_meanflow=0.0000 mean_model_t=0.5037 mean_corrupt_t=0.5037 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4924 init_acc_corrupt=0.4756 init_gold_top10=0.5024 init_gold_top100=0.5300
|
| 136 |
+
step=35 micro_steps=280 elapsed=48.2s lr=3.000000e-04 loss_all=7.0131 acc_all=0.2880 loss_corrupt=7.3238 acc_corrupt=0.1995 corrupt_frac=0.5710 loss=7.3238 loss_recon=7.3238 loss_meanflow=0.0000 mean_model_t=0.4988 mean_corrupt_t=0.4988 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5066 init_acc_corrupt=0.4603 init_gold_top10=0.4881 init_gold_top100=0.5178
|
| 137 |
+
step=40 micro_steps=320 elapsed=43.2s lr=3.000000e-04 loss_all=6.4932 acc_all=0.2757 loss_corrupt=6.9707 acc_corrupt=0.1886 corrupt_frac=0.5482 loss=6.9707 loss_recon=6.9707 loss_meanflow=0.0000 mean_model_t=0.4826 mean_corrupt_t=0.4826 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5143 init_acc_corrupt=0.4517 init_gold_top10=0.4797 init_gold_top100=0.5107
|
| 138 |
+
step=45 micro_steps=360 elapsed=42.4s lr=3.000000e-04 loss_all=6.0021 acc_all=0.2643 loss_corrupt=6.6188 acc_corrupt=0.1833 corrupt_frac=0.5527 loss=6.6188 loss_recon=6.6188 loss_meanflow=0.0000 mean_model_t=0.4898 mean_corrupt_t=0.4898 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5102 init_acc_corrupt=0.4514 init_gold_top10=0.4836 init_gold_top100=0.5166
|
| 139 |
+
step=50 micro_steps=400 elapsed=43.3s lr=3.000000e-04 loss_all=5.3270 acc_all=0.3213 loss_corrupt=6.1520 acc_corrupt=0.2234 corrupt_frac=0.5519 loss=6.1520 loss_recon=6.1520 loss_meanflow=0.0000 mean_model_t=0.5035 mean_corrupt_t=0.5035 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5017 init_acc_corrupt=0.4630 init_gold_top10=0.4920 init_gold_top100=0.5225
|
| 140 |
+
step=55 micro_steps=440 elapsed=73.6s lr=3.000000e-04 loss_all=4.5452 acc_all=0.4781 loss_corrupt=5.7044 acc_corrupt=0.3135 corrupt_frac=0.5483 loss=5.7044 loss_recon=5.7044 loss_meanflow=0.0000 mean_model_t=0.4809 mean_corrupt_t=0.4809 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5216 init_acc_corrupt=0.4416 init_gold_top10=0.4725 init_gold_top100=0.5025
|
| 141 |
+
step=60 micro_steps=480 elapsed=48.1s lr=3.000000e-04 loss_all=3.5387 acc_all=0.6034 loss_corrupt=4.8525 acc_corrupt=0.4246 corrupt_frac=0.5421 loss=4.8525 loss_recon=4.8525 loss_meanflow=0.0000 mean_model_t=0.5272 mean_corrupt_t=0.5272 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4720 init_acc_corrupt=0.4966 init_gold_top10=0.5229 init_gold_top100=0.5512
|
| 142 |
+
step=65 micro_steps=520 elapsed=45.9s lr=3.000000e-04 loss_all=3.3057 acc_all=0.6112 loss_corrupt=4.9579 acc_corrupt=0.4032 corrupt_frac=0.5546 loss=4.9579 loss_recon=4.9579 loss_meanflow=0.0000 mean_model_t=0.4862 mean_corrupt_t=0.4862 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5271 init_acc_corrupt=0.4348 init_gold_top10=0.4664 init_gold_top100=0.5004
|
| 143 |
+
step=70 micro_steps=560 elapsed=42.2s lr=3.000000e-04 loss_all=3.1512 acc_all=0.6235 loss_corrupt=4.7593 acc_corrupt=0.4248 corrupt_frac=0.5743 loss=4.7593 loss_recon=4.7593 loss_meanflow=0.0000 mean_model_t=0.4939 mean_corrupt_t=0.4939 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5111 init_acc_corrupt=0.4545 init_gold_top10=0.4826 init_gold_top100=0.5159
|
| 144 |
+
step=75 micro_steps=600 elapsed=42.2s lr=3.000000e-04 loss_all=2.9748 acc_all=0.6482 loss_corrupt=4.6855 acc_corrupt=0.4381 corrupt_frac=0.5472 loss=4.6855 loss_recon=4.6855 loss_meanflow=0.0000 mean_model_t=0.4975 mean_corrupt_t=0.4975 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4982 init_acc_corrupt=0.4679 init_gold_top10=0.4956 init_gold_top100=0.5266
|
| 145 |
+
step=80 micro_steps=640 elapsed=68.9s lr=3.000000e-04 loss_all=2.8523 acc_all=0.6580 loss_corrupt=4.6641 acc_corrupt=0.4379 corrupt_frac=0.5418 loss=4.6641 loss_recon=4.6641 loss_meanflow=0.0000 mean_model_t=0.4888 mean_corrupt_t=0.4888 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5130 init_acc_corrupt=0.4518 init_gold_top10=0.4813 init_gold_top100=0.5123
|
| 146 |
+
step=85 micro_steps=680 elapsed=43.6s lr=3.000000e-04 loss_all=2.8406 acc_all=0.6570 loss_corrupt=4.4957 acc_corrupt=0.4543 corrupt_frac=0.5720 loss=4.4957 loss_recon=4.4957 loss_meanflow=0.0000 mean_model_t=0.4943 mean_corrupt_t=0.4943 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4969 init_acc_corrupt=0.4674 init_gold_top10=0.4971 init_gold_top100=0.5276
|
| 147 |
+
step=90 micro_steps=720 elapsed=41.1s lr=3.000000e-04 loss_all=2.7984 acc_all=0.6605 loss_corrupt=4.5917 acc_corrupt=0.4384 corrupt_frac=0.5533 loss=4.5917 loss_recon=4.5917 loss_meanflow=0.0000 mean_model_t=0.4842 mean_corrupt_t=0.4842 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5198 init_acc_corrupt=0.4432 init_gold_top10=0.4741 init_gold_top100=0.5064
|
| 148 |
+
step=95 micro_steps=760 elapsed=45.6s lr=3.000000e-04 loss_all=2.5898 acc_all=0.6871 loss_corrupt=4.3891 acc_corrupt=0.4635 corrupt_frac=0.5377 loss=4.3891 loss_recon=4.3891 loss_meanflow=0.0000 mean_model_t=0.4992 mean_corrupt_t=0.4992 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4987 init_acc_corrupt=0.4669 init_gold_top10=0.4958 init_gold_top100=0.5251
|
| 149 |
+
step=100 micro_steps=800 elapsed=46.1s lr=3.000000e-04 loss_all=2.5553 acc_all=0.6918 loss_corrupt=4.3386 acc_corrupt=0.4699 corrupt_frac=0.5449 loss=4.3386 loss_recon=4.3386 loss_meanflow=0.0000 mean_model_t=0.5043 mean_corrupt_t=0.5043 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4947 init_acc_corrupt=0.4710 init_gold_top10=0.5004 init_gold_top100=0.5283
|
LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/scalinglaw_ctx1024_exact10_vocab50257_small384x6_4gpu_2000step.log
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
*****************************************
|
| 3 |
+
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
*****************************************
|
| 5 |
+
[rank0]: Traceback (most recent call last):
|
| 6 |
+
[rank0]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 7 |
+
[rank0]: main()
|
| 8 |
+
[rank0]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 9 |
+
[rank0]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 10 |
+
[rank0]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 11 |
+
[rank0]:[W513 01:44:17.923645362 ProcessGroupNCCL.cpp:1487] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|
| 12 |
+
[rank2]: Traceback (most recent call last):
|
| 13 |
+
[rank2]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 14 |
+
[rank2]: main()
|
| 15 |
+
[rank2]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 16 |
+
[rank2]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 17 |
+
[rank2]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 18 |
+
[rank1]: Traceback (most recent call last):
|
| 19 |
+
[rank1]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 20 |
+
[rank1]: main()
|
| 21 |
+
[rank1]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 22 |
+
[rank1]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 23 |
+
[rank1]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 24 |
+
[rank3]: Traceback (most recent call last):
|
| 25 |
+
[rank3]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 26 |
+
[rank3]: main()
|
| 27 |
+
[rank3]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 28 |
+
[rank3]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 29 |
+
[rank3]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 30 |
+
W0513 01:44:17.315000 312465 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 312533 closing signal SIGTERM
|
| 31 |
+
W0513 01:44:17.316000 312465 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 312534 closing signal SIGTERM
|
| 32 |
+
W0513 01:44:17.317000 312465 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 312535 closing signal SIGTERM
|
| 33 |
+
E0513 01:44:17.445000 312465 torch/distributed/elastic/multiprocessing/api.py:870] failed (exitcode: 1) local_rank: 0 (pid: 312532) of binary: /usr/bin/python
|
| 34 |
+
Traceback (most recent call last):
|
| 35 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
| 36 |
+
File "<frozen runpy>", line 88, in _run_code
|
| 37 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 922, in <module>
|
| 38 |
+
main()
|
| 39 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
| 40 |
+
return f(*args, **kwargs)
|
| 41 |
+
^^^^^^^^^^^^^^^^^^
|
| 42 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 918, in main
|
| 43 |
+
run(args)
|
| 44 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 909, in run
|
| 45 |
+
elastic_launch(
|
| 46 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
| 47 |
+
return launch_agent(self._config, self._entrypoint, list(args))
|
| 48 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 49 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 270, in launch_agent
|
| 50 |
+
raise ChildFailedError(
|
| 51 |
+
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
|
| 52 |
+
============================================================
|
| 53 |
+
train.py FAILED
|
| 54 |
+
------------------------------------------------------------
|
| 55 |
+
Failures:
|
| 56 |
+
<NO_OTHER_FAILURES>
|
| 57 |
+
------------------------------------------------------------
|
| 58 |
+
Root Cause (first observed failure):
|
| 59 |
+
[0]:
|
| 60 |
+
time : 2026-05-13_01:44:17
|
| 61 |
+
host : localhost
|
| 62 |
+
rank : 0 (local_rank: 0)
|
| 63 |
+
exitcode : 1 (pid: 312532)
|
| 64 |
+
error_file: <N/A>
|
| 65 |
+
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
|
| 66 |
+
============================================================
|
| 67 |
+
|
| 68 |
+
*****************************************
|
| 69 |
+
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 70 |
+
*****************************************
|
| 71 |
+
[rank0]:[W513 01:46:18.106846526 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 72 |
+
NCCL version 2.25.1+cuda12.8
|
| 73 |
+
[rank1]:[W513 01:46:18.152602439 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 74 |
+
[rank3]:[W513 01:46:18.156248186 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 75 |
+
[rank2]:[W513 01:46:18.173783313 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 76 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 77 |
+
warnings.warn(
|
| 78 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 79 |
+
warnings.warn(
|
| 80 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 81 |
+
warnings.warn(
|
| 82 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 83 |
+
warnings.warn(
|
| 84 |
+
{
|
| 85 |
+
"device": "cuda:0",
|
| 86 |
+
"rank": 0,
|
| 87 |
+
"world_size": 4,
|
| 88 |
+
"samples": "owt_cached_chunks:10",
|
| 89 |
+
"vocab_size": 50257,
|
| 90 |
+
"tokenizer_vocab_size": 50257,
|
| 91 |
+
"save_dir": "runs/scalinglaw_ctx1024_exact10_vocab50257_small384x6_4gpu_2000step",
|
| 92 |
+
"batch_size": 16,
|
| 93 |
+
"grad_accum": 8,
|
| 94 |
+
"effective_batch_size": 512,
|
| 95 |
+
"global_batch_size": 512,
|
| 96 |
+
"lr_schedule": "constant_warmup",
|
| 97 |
+
"warmup_steps": 20,
|
| 98 |
+
"min_lr": 0.0,
|
| 99 |
+
"adamw_param_groups": "all_decay",
|
| 100 |
+
"adam_beta1": 0.9,
|
| 101 |
+
"adam_beta2": 0.999,
|
| 102 |
+
"adam_eps": 1e-08,
|
| 103 |
+
"model_type": "transformer",
|
| 104 |
+
"dual_t": true,
|
| 105 |
+
"corrupt_t_mode": "independent",
|
| 106 |
+
"corrupt_min_t": null,
|
| 107 |
+
"corrupt_max_t": null,
|
| 108 |
+
"prefix_block_prob": 0.0,
|
| 109 |
+
"prefix_block_len": 128,
|
| 110 |
+
"dirichlet_endpoint_mode": "categorical_dual_t",
|
| 111 |
+
"dirichlet_semantic_t_mode": "same",
|
| 112 |
+
"dirichlet_semantic_t_value": 0.0,
|
| 113 |
+
"categorical_wrong_from_full_vocab": true,
|
| 114 |
+
"categorical_wrong_from_batch_valid_tokens": false,
|
| 115 |
+
"mask_mixture_original_prob": 0.0,
|
| 116 |
+
"mask_mixture_lowk_prob": 0.0,
|
| 117 |
+
"mask_mixture_lowcorrupt_prob": 0.0,
|
| 118 |
+
"mask_mixture_block_prob": 0.0,
|
| 119 |
+
"mask_mixture_all_prob": 0.0,
|
| 120 |
+
"mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
|
| 121 |
+
"mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
|
| 122 |
+
"mask_mixture_block_tokens": "64,128",
|
| 123 |
+
"simplex_bridge_sampler": "dirichlet",
|
| 124 |
+
"logistic_normal_sigma_min": 0.18,
|
| 125 |
+
"logistic_normal_sigma_max": 2.2,
|
| 126 |
+
"logistic_normal_tau_min": 0.65,
|
| 127 |
+
"logistic_normal_tau_max": 1.15,
|
| 128 |
+
"torch_compile": false,
|
| 129 |
+
"compile_mode": "max-autotune",
|
| 130 |
+
"state_format": "prob",
|
| 131 |
+
"target_loss": "hard_ce",
|
| 132 |
+
"meanflow_weight": 0.0,
|
| 133 |
+
"bridge_noise_init": "logistic_normal",
|
| 134 |
+
"noise_sigma": -1.0,
|
| 135 |
+
"wrap": true,
|
| 136 |
+
"wrap_mode": "stream",
|
| 137 |
+
"wrap_record_buffer_size": 200,
|
| 138 |
+
"owt_cached_chunks": true,
|
| 139 |
+
"owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k_exact10_minvocab",
|
| 140 |
+
"owt_chunk_cache_rebuild": false,
|
| 141 |
+
"owt_chunk_cache_write_batch": 4096,
|
| 142 |
+
"owt_exact_repeat_per_chunk": 10000,
|
| 143 |
+
"online_chunk_shuffle": false,
|
| 144 |
+
"online_chunk_shuffle_buffer": 10000,
|
| 145 |
+
"openwebtext_split": "all",
|
| 146 |
+
"detokenizer": "auto",
|
| 147 |
+
"resolved_detokenizer": null,
|
| 148 |
+
"num_workers": 0,
|
| 149 |
+
"latest_every": 500,
|
| 150 |
+
"resume_path": ""
|
| 151 |
+
}
|
| 152 |
+
step=25 micro_steps=200 elapsed=51.0s lr=3.000000e-04 loss_all=9.5698 acc_all=0.0351 loss_corrupt=9.5720 acc_corrupt=0.0348 corrupt_frac=0.5538 loss=9.5720 loss_recon=9.5720 loss_meanflow=0.0000 mean_model_t=0.5004 mean_corrupt_t=0.5067 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4889 init_acc_corrupt=0.4773 init_gold_top10=0.5053 init_gold_top100=0.5565
|
| 153 |
+
step=50 micro_steps=400 elapsed=59.4s lr=3.000000e-04 loss_all=6.9808 acc_all=0.0435 loss_corrupt=6.9836 acc_corrupt=0.0426 corrupt_frac=0.5502 loss=6.9836 loss_recon=6.9836 loss_meanflow=0.0000 mean_model_t=0.4927 mean_corrupt_t=0.4946 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5015 init_acc_corrupt=0.4633 init_gold_top10=0.4927 init_gold_top100=0.5466
|
| 154 |
+
step=75 micro_steps=600 elapsed=62.6s lr=3.000000e-04 loss_all=6.5358 acc_all=0.0417 loss_corrupt=6.5414 acc_corrupt=0.0417 corrupt_frac=0.5479 loss=6.5414 loss_recon=6.5414 loss_meanflow=0.0000 mean_model_t=0.4907 mean_corrupt_t=0.5026 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4971 init_acc_corrupt=0.4696 init_gold_top10=0.4972 init_gold_top100=0.5490
|
| 155 |
+
step=100 micro_steps=800 elapsed=63.8s lr=3.000000e-04 loss_all=6.4942 acc_all=0.0471 loss_corrupt=6.5060 acc_corrupt=0.0454 corrupt_frac=0.5491 loss=6.5060 loss_recon=6.5060 loss_meanflow=0.0000 mean_model_t=0.4977 mean_corrupt_t=0.5094 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4881 init_acc_corrupt=0.4785 init_gold_top10=0.5065 init_gold_top100=0.5552
|
| 156 |
+
step=125 micro_steps=1000 elapsed=64.4s lr=3.000000e-04 loss_all=6.2915 acc_all=0.0970 loss_corrupt=6.3833 acc_corrupt=0.0779 corrupt_frac=0.5601 loss=6.3833 loss_recon=6.3833 loss_meanflow=0.0000 mean_model_t=0.5049 mean_corrupt_t=0.4978 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5000 init_acc_corrupt=0.4654 init_gold_top10=0.4943 init_gold_top100=0.5459
|
| 157 |
+
step=150 micro_steps=1200 elapsed=64.7s lr=3.000000e-04 loss_all=5.5438 acc_all=0.1897 loss_corrupt=5.8703 acc_corrupt=0.1546 corrupt_frac=0.5550 loss=5.8703 loss_recon=5.8703 loss_meanflow=0.0000 mean_model_t=0.5132 mean_corrupt_t=0.4969 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5024 init_acc_corrupt=0.4618 init_gold_top10=0.4917 init_gold_top100=0.5428
|
| 158 |
+
step=175 micro_steps=1400 elapsed=64.9s lr=3.000000e-04 loss_all=4.6997 acc_all=0.2680 loss_corrupt=5.2135 acc_corrupt=0.2113 corrupt_frac=0.5524 loss=5.2135 loss_recon=5.2135 loss_meanflow=0.0000 mean_model_t=0.5089 mean_corrupt_t=0.5104 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4923 init_acc_corrupt=0.4735 init_gold_top10=0.5019 init_gold_top100=0.5528
|
| 159 |
+
step=200 micro_steps=1600 elapsed=65.0s lr=3.000000e-04 loss_all=3.9019 acc_all=0.3769 loss_corrupt=4.5546 acc_corrupt=0.2747 corrupt_frac=0.5513 loss=4.5546 loss_recon=4.5546 loss_meanflow=0.0000 mean_model_t=0.5030 mean_corrupt_t=0.5062 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4907 init_acc_corrupt=0.4750 init_gold_top10=0.5040 init_gold_top100=0.5515
|
| 160 |
+
step=225 micro_steps=1800 elapsed=65.2s lr=3.000000e-04 loss_all=3.3872 acc_all=0.5167 loss_corrupt=4.1294 acc_corrupt=0.3459 corrupt_frac=0.5499 loss=4.1294 loss_recon=4.1294 loss_meanflow=0.0000 mean_model_t=0.4978 mean_corrupt_t=0.5020 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5021 init_acc_corrupt=0.4618 init_gold_top10=0.4920 init_gold_top100=0.5455
|
| 161 |
+
step=250 micro_steps=2000 elapsed=65.2s lr=3.000000e-04 loss_all=3.0234 acc_all=0.6385 loss_corrupt=3.8056 acc_corrupt=0.4251 corrupt_frac=0.5563 loss=3.8056 loss_recon=3.8056 loss_meanflow=0.0000 mean_model_t=0.4949 mean_corrupt_t=0.5037 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4973 init_acc_corrupt=0.4692 init_gold_top10=0.4971 init_gold_top100=0.5482
|
| 162 |
+
step=275 micro_steps=2200 elapsed=65.1s lr=3.000000e-04 loss_all=2.6022 acc_all=0.7098 loss_corrupt=3.4180 acc_corrupt=0.4904 corrupt_frac=0.5559 loss=3.4180 loss_recon=3.4180 loss_meanflow=0.0000 mean_model_t=0.4927 mean_corrupt_t=0.5018 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4963 init_acc_corrupt=0.4691 init_gold_top10=0.4982 init_gold_top100=0.5482
|
| 163 |
+
step=300 micro_steps=2400 elapsed=64.9s lr=3.000000e-04 loss_all=2.2382 acc_all=0.7375 loss_corrupt=3.0869 acc_corrupt=0.5279 corrupt_frac=0.5560 loss=3.0869 loss_recon=3.0869 loss_meanflow=0.0000 mean_model_t=0.5101 mean_corrupt_t=0.5021 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4983 init_acc_corrupt=0.4672 init_gold_top10=0.4959 init_gold_top100=0.5493
|
| 164 |
+
step=325 micro_steps=2600 elapsed=64.5s lr=3.000000e-04 loss_all=1.9767 acc_all=0.7410 loss_corrupt=2.8649 acc_corrupt=0.5331 corrupt_frac=0.5541 loss=2.8649 loss_recon=2.8649 loss_meanflow=0.0000 mean_model_t=0.5050 mean_corrupt_t=0.4960 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5083 init_acc_corrupt=0.4563 init_gold_top10=0.4857 init_gold_top100=0.5395
|
| 165 |
+
step=350 micro_steps=2800 elapsed=65.1s lr=3.000000e-04 loss_all=1.6568 acc_all=0.7574 loss_corrupt=2.5097 acc_corrupt=0.5616 corrupt_frac=0.5528 loss=2.5097 loss_recon=2.5097 loss_meanflow=0.0000 mean_model_t=0.4963 mean_corrupt_t=0.5107 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4878 init_acc_corrupt=0.4793 init_gold_top10=0.5066 init_gold_top100=0.5567
|
| 166 |
+
step=375 micro_steps=3000 elapsed=64.8s lr=3.000000e-04 loss_all=1.4678 acc_all=0.7555 loss_corrupt=2.3501 acc_corrupt=0.5523 corrupt_frac=0.5468 loss=2.3501 loss_recon=2.3501 loss_meanflow=0.0000 mean_model_t=0.5051 mean_corrupt_t=0.4982 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5020 init_acc_corrupt=0.4619 init_gold_top10=0.4920 init_gold_top100=0.5448
|
| 167 |
+
step=400 micro_steps=3200 elapsed=64.6s lr=3.000000e-04 loss_all=1.2952 acc_all=0.7603 loss_corrupt=2.1428 acc_corrupt=0.5624 corrupt_frac=0.5475 loss=2.1428 loss_recon=2.1428 loss_meanflow=0.0000 mean_model_t=0.4886 mean_corrupt_t=0.5051 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4978 init_acc_corrupt=0.4664 init_gold_top10=0.4966 init_gold_top100=0.5475
|
| 168 |
+
step=425 micro_steps=3400 elapsed=64.8s lr=3.000000e-04 loss_all=1.1578 acc_all=0.7646 loss_corrupt=1.9592 acc_corrupt=0.5729 corrupt_frac=0.5509 loss=1.9592 loss_recon=1.9592 loss_meanflow=0.0000 mean_model_t=0.5035 mean_corrupt_t=0.4999 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4939 init_acc_corrupt=0.4715 init_gold_top10=0.5003 init_gold_top100=0.5506
|
| 169 |
+
step=450 micro_steps=3600 elapsed=64.7s lr=3.000000e-04 loss_all=1.0533 acc_all=0.7693 loss_corrupt=1.8220 acc_corrupt=0.5805 corrupt_frac=0.5503 loss=1.8220 loss_recon=1.8220 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.5016 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4946 init_acc_corrupt=0.4706 init_gold_top10=0.4998 init_gold_top100=0.5502
|
| 170 |
+
step=475 micro_steps=3800 elapsed=64.7s lr=3.000000e-04 loss_all=0.9739 acc_all=0.7743 loss_corrupt=1.6798 acc_corrupt=0.5961 corrupt_frac=0.5586 loss=1.6798 loss_recon=1.6798 loss_meanflow=0.0000 mean_model_t=0.5064 mean_corrupt_t=0.5119 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4869 init_acc_corrupt=0.4807 init_gold_top10=0.5078 init_gold_top100=0.5566
|
| 171 |
+
step=500 micro_steps=4000 elapsed=64.8s lr=3.000000e-04 loss_all=0.8900 acc_all=0.7838 loss_corrupt=1.5706 acc_corrupt=0.6084 corrupt_frac=0.5516 loss=1.5706 loss_recon=1.5706 loss_meanflow=0.0000 mean_model_t=0.4952 mean_corrupt_t=0.5048 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4970 init_acc_corrupt=0.4698 init_gold_top10=0.4974 init_gold_top100=0.5487
|
| 172 |
+
step=525 micro_steps=4200 elapsed=65.9s lr=3.000000e-04 loss_all=0.7953 acc_all=0.8066 loss_corrupt=1.4415 acc_corrupt=0.6420 corrupt_frac=0.5395 loss=1.4415 loss_recon=1.4415 loss_meanflow=0.0000 mean_model_t=0.5024 mean_corrupt_t=0.5028 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4925 init_acc_corrupt=0.4743 init_gold_top10=0.5020 init_gold_top100=0.5513
|
| 173 |
+
step=550 micro_steps=4400 elapsed=64.7s lr=3.000000e-04 loss_all=0.7557 acc_all=0.8208 loss_corrupt=1.3456 acc_corrupt=0.6753 corrupt_frac=0.5522 loss=1.3456 loss_recon=1.3456 loss_meanflow=0.0000 mean_model_t=0.4996 mean_corrupt_t=0.4982 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5028 init_acc_corrupt=0.4624 init_gold_top10=0.4915 init_gold_top100=0.5432
|
| 174 |
+
step=575 micro_steps=4600 elapsed=64.5s lr=3.000000e-04 loss_all=0.6402 acc_all=0.8644 loss_corrupt=1.1534 acc_corrupt=0.7515 corrupt_frac=0.5452 loss=1.1534 loss_recon=1.1534 loss_meanflow=0.0000 mean_model_t=0.5019 mean_corrupt_t=0.4997 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5020 init_acc_corrupt=0.4630 init_gold_top10=0.4920 init_gold_top100=0.5448
|
| 175 |
+
step=600 micro_steps=4800 elapsed=64.6s lr=3.000000e-04 loss_all=0.5154 acc_all=0.9112 loss_corrupt=0.9217 acc_corrupt=0.8384 corrupt_frac=0.5482 loss=0.9217 loss_recon=0.9217 loss_meanflow=0.0000 mean_model_t=0.4926 mean_corrupt_t=0.4977 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5034 init_acc_corrupt=0.4628 init_gold_top10=0.4908 init_gold_top100=0.5449
|
| 176 |
+
step=625 micro_steps=5000 elapsed=64.8s lr=3.000000e-04 loss_all=0.3651 acc_all=0.9599 loss_corrupt=0.6465 acc_corrupt=0.9279 corrupt_frac=0.5518 loss=0.6465 loss_recon=0.6465 loss_meanflow=0.0000 mean_model_t=0.5008 mean_corrupt_t=0.5011 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5007 init_acc_corrupt=0.4636 init_gold_top10=0.4933 init_gold_top100=0.5457
|
| 177 |
+
step=650 micro_steps=5200 elapsed=64.4s lr=3.000000e-04 loss_all=0.2195 acc_all=0.9897 loss_corrupt=0.3918 acc_corrupt=0.9812 corrupt_frac=0.5405 loss=0.3918 loss_recon=0.3918 loss_meanflow=0.0000 mean_model_t=0.4929 mean_corrupt_t=0.4927 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5096 init_acc_corrupt=0.4549 init_gold_top10=0.4843 init_gold_top100=0.5381
|
| 178 |
+
step=675 micro_steps=5400 elapsed=64.7s lr=3.000000e-04 loss_all=0.1207 acc_all=0.9953 loss_corrupt=0.2134 acc_corrupt=0.9913 corrupt_frac=0.5407 loss=0.2134 loss_recon=0.2134 loss_meanflow=0.0000 mean_model_t=0.5040 mean_corrupt_t=0.4962 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5034 init_acc_corrupt=0.4609 init_gold_top10=0.4908 init_gold_top100=0.5421
|
| 179 |
+
W0513 02:15:32.291000 316519 torch/distributed/elastic/agent/server/api.py:719] Received 15 death signal, shutting down workers
|
| 180 |
+
W0513 02:15:32.293000 316519 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 316613 closing signal SIGTERM
|
| 181 |
+
W0513 02:15:32.294000 316519 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 316614 closing signal SIGTERM
|
| 182 |
+
W0513 02:15:32.294000 316519 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 316615 closing signal SIGTERM
|
| 183 |
+
W0513 02:15:32.295000 316519 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 316616 closing signal SIGTERM
|
| 184 |
+
Traceback (most recent call last):
|
| 185 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
| 186 |
+
File "<frozen runpy>", line 88, in _run_code
|
| 187 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 922, in <module>
|
| 188 |
+
main()
|
| 189 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
| 190 |
+
return f(*args, **kwargs)
|
| 191 |
+
^^^^^^^^^^^^^^^^^^
|
| 192 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 918, in main
|
| 193 |
+
run(args)
|
| 194 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 909, in run
|
| 195 |
+
elastic_launch(
|
| 196 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
| 197 |
+
return launch_agent(self._config, self._entrypoint, list(args))
|
| 198 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 199 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 261, in launch_agent
|
| 200 |
+
result = agent.run()
|
| 201 |
+
^^^^^^^^^^^
|
| 202 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/metrics/api.py", line 137, in wrapper
|
| 203 |
+
result = f(*args, **kwargs)
|
| 204 |
+
^^^^^^^^^^^^^^^^^^
|
| 205 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 711, in run
|
| 206 |
+
result = self._invoke_run(role)
|
| 207 |
+
^^^^^^^^^^^^^^^^^^^^^^
|
| 208 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/agent/server/api.py", line 870, in _invoke_run
|
| 209 |
+
time.sleep(monitor_interval)
|
| 210 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/api.py", line 84, in _terminate_process_handler
|
| 211 |
+
raise SignalException(f"Process {os.getpid()} got signal: {sigval}", sigval=sigval)
|
| 212 |
+
torch.distributed.elastic.multiprocessing.api.SignalException: Process 316519 got signal: 15
|
LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/scalinglaw_samples2_192x3_c512_vocab50257_4gpu_3000step.log
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
*****************************************
|
| 3 |
+
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
|
| 4 |
+
*****************************************
|
| 5 |
+
[rank0]: Traceback (most recent call last):
|
| 6 |
+
[rank0]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 7 |
+
[rank0]: main()
|
| 8 |
+
[rank0]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 9 |
+
[rank0]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 10 |
+
[rank0]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 11 |
+
[rank0]:[W513 01:44:40.999052026 ProcessGroupNCCL.cpp:1487] Warning: WARNING: destroy_process_group() was not called before program exit, which can leak resources. For more info, please see https://pytorch.org/docs/stable/distributed.html#shutdown (function operator())
|
| 12 |
+
[rank3]: Traceback (most recent call last):
|
| 13 |
+
[rank3]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 14 |
+
[rank3]: main()
|
| 15 |
+
[rank3]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 16 |
+
[rank3]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 17 |
+
[rank3]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 18 |
+
[rank2]: Traceback (most recent call last):
|
| 19 |
+
[rank2]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 20 |
+
[rank2]: main()
|
| 21 |
+
[rank2]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 22 |
+
[rank2]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 23 |
+
[rank2]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 24 |
+
[rank1]: Traceback (most recent call last):
|
| 25 |
+
[rank1]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 1000, in <module>
|
| 26 |
+
[rank1]: main()
|
| 27 |
+
[rank1]: File "/e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt/train.py", line 615, in main
|
| 28 |
+
[rank1]: raise ValueError("--owt_cached_chunks requires --wrap --wrap_mode stream")
|
| 29 |
+
[rank1]: ValueError: --owt_cached_chunks requires --wrap --wrap_mode stream
|
| 30 |
+
W0513 01:44:40.334000 313116 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 313184 closing signal SIGTERM
|
| 31 |
+
W0513 01:44:40.335000 313116 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 313185 closing signal SIGTERM
|
| 32 |
+
W0513 01:44:40.336000 313116 torch/distributed/elastic/multiprocessing/api.py:898] Sending process 313186 closing signal SIGTERM
|
| 33 |
+
E0513 01:44:40.463000 313116 torch/distributed/elastic/multiprocessing/api.py:870] failed (exitcode: 1) local_rank: 0 (pid: 313183) of binary: /usr/bin/python
|
| 34 |
+
Traceback (most recent call last):
|
| 35 |
+
File "<frozen runpy>", line 198, in _run_module_as_main
|
| 36 |
+
File "<frozen runpy>", line 88, in _run_code
|
| 37 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 922, in <module>
|
| 38 |
+
main()
|
| 39 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/elastic/multiprocessing/errors/__init__.py", line 355, in wrapper
|
| 40 |
+
return f(*args, **kwargs)
|
| 41 |
+
^^^^^^^^^^^^^^^^^^
|
| 42 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 918, in main
|
| 43 |
+
run(args)
|
| 44 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/run.py", line 909, in run
|
| 45 |
+
elastic_launch(
|
| 46 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 139, in __call__
|
| 47 |
+
return launch_agent(self._config, self._entrypoint, list(args))
|
| 48 |
+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
| 49 |
+
File "/usr/local/lib/python3.12/dist-packages/torch/distributed/launcher/api.py", line 270, in launch_agent
|
| 50 |
+
raise ChildFailedError(
|
| 51 |
+
torch.distributed.elastic.multiprocessing.errors.ChildFailedError:
|
| 52 |
+
============================================================
|
| 53 |
+
train.py FAILED
|
| 54 |
+
------------------------------------------------------------
|
| 55 |
+
Failures:
|
| 56 |
+
<NO_OTHER_FAILURES>
|
| 57 |
+
------------------------------------------------------------
|
| 58 |
+
Root Cause (first observed failure):
|
| 59 |
+
[0]:
|
| 60 |
+
time : 2026-05-13_01:44:40
|
| 61 |
+
host : localhost
|
| 62 |
+
rank : 0 (local_rank: 0)
|
| 63 |
+
exitcode : 1 (pid: 313183)
|
| 64 |
+
error_file: <N/A>
|
| 65 |
+
traceback : To enable traceback see: https://pytorch.org/docs/stable/elastic/errors.html
|
| 66 |
+
============================================================
|
LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_ctx256_small384x6_step500.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"out_json": "docs/lta_samples/metrics_20260513/scalinglaw_4gpu_20260513/ctx256_small384x6_step500/trace_steps64_c48_t1p45.json", "records": 10, "step": 500}
|
LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_ctx512_small384x6_step500.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 2 |
+
warnings.warn(
|
| 3 |
+
{"out_json": "docs/lta_samples/metrics_20260513/scalinglaw_4gpu_20260513/ctx512_small384x6_step500/trace_steps64_c48_t1p45.json", "records": 10, "step": 500}
|
LTA_openwebtext_dualt/logs/scalinglaw_4gpu_20260513/trace_params512x8_c512_vocab50257_step750.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/usr/local/lib/python3.12/dist-packages/torch/nn/modules/transformer.py:375: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True
|
| 2 |
+
warnings.warn(
|
| 3 |
+
{"out_json": "docs/lta_samples/metrics_20260513/scalinglaw_4gpu_20260513/params512x8_c512_vocab50257_step750/trace_steps64_c48_t1p45.json", "records": 10, "step": 750}
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/__multiarray_api.c
ADDED
|
@@ -0,0 +1,314 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
/* These pointers will be stored in the C-object for use in other
|
| 3 |
+
extension modules
|
| 4 |
+
*/
|
| 5 |
+
|
| 6 |
+
void *PyArray_API[] = {
|
| 7 |
+
(void *) PyArray_GetNDArrayCVersion,
|
| 8 |
+
(void *) &PyBigArray_Type,
|
| 9 |
+
(void *) &PyArray_Type,
|
| 10 |
+
(void *) &PyArrayDescr_Type,
|
| 11 |
+
(void *) &PyArrayFlags_Type,
|
| 12 |
+
(void *) &PyArrayIter_Type,
|
| 13 |
+
(void *) &PyArrayMultiIter_Type,
|
| 14 |
+
(int *) &NPY_NUMUSERTYPES,
|
| 15 |
+
(void *) &PyBoolArrType_Type,
|
| 16 |
+
(void *) &_PyArrayScalar_BoolValues,
|
| 17 |
+
(void *) &PyGenericArrType_Type,
|
| 18 |
+
(void *) &PyNumberArrType_Type,
|
| 19 |
+
(void *) &PyIntegerArrType_Type,
|
| 20 |
+
(void *) &PySignedIntegerArrType_Type,
|
| 21 |
+
(void *) &PyUnsignedIntegerArrType_Type,
|
| 22 |
+
(void *) &PyInexactArrType_Type,
|
| 23 |
+
(void *) &PyFloatingArrType_Type,
|
| 24 |
+
(void *) &PyComplexFloatingArrType_Type,
|
| 25 |
+
(void *) &PyFlexibleArrType_Type,
|
| 26 |
+
(void *) &PyCharacterArrType_Type,
|
| 27 |
+
(void *) &PyByteArrType_Type,
|
| 28 |
+
(void *) &PyShortArrType_Type,
|
| 29 |
+
(void *) &PyIntArrType_Type,
|
| 30 |
+
(void *) &PyLongArrType_Type,
|
| 31 |
+
(void *) &PyLongLongArrType_Type,
|
| 32 |
+
(void *) &PyUByteArrType_Type,
|
| 33 |
+
(void *) &PyUShortArrType_Type,
|
| 34 |
+
(void *) &PyUIntArrType_Type,
|
| 35 |
+
(void *) &PyULongArrType_Type,
|
| 36 |
+
(void *) &PyULongLongArrType_Type,
|
| 37 |
+
(void *) &PyFloatArrType_Type,
|
| 38 |
+
(void *) &PyDoubleArrType_Type,
|
| 39 |
+
(void *) &PyLongDoubleArrType_Type,
|
| 40 |
+
(void *) &PyCFloatArrType_Type,
|
| 41 |
+
(void *) &PyCDoubleArrType_Type,
|
| 42 |
+
(void *) &PyCLongDoubleArrType_Type,
|
| 43 |
+
(void *) &PyObjectArrType_Type,
|
| 44 |
+
(void *) &PyStringArrType_Type,
|
| 45 |
+
(void *) &PyUnicodeArrType_Type,
|
| 46 |
+
(void *) &PyVoidArrType_Type,
|
| 47 |
+
(void *) PyArray_SetNumericOps,
|
| 48 |
+
(void *) PyArray_GetNumericOps,
|
| 49 |
+
(void *) PyArray_INCREF,
|
| 50 |
+
(void *) PyArray_XDECREF,
|
| 51 |
+
(void *) PyArray_SetStringFunction,
|
| 52 |
+
(void *) PyArray_DescrFromType,
|
| 53 |
+
(void *) PyArray_TypeObjectFromType,
|
| 54 |
+
(void *) PyArray_Zero,
|
| 55 |
+
(void *) PyArray_One,
|
| 56 |
+
(void *) PyArray_CastToType,
|
| 57 |
+
(void *) PyArray_CastTo,
|
| 58 |
+
(void *) PyArray_CastAnyTo,
|
| 59 |
+
(void *) PyArray_CanCastSafely,
|
| 60 |
+
(void *) PyArray_CanCastTo,
|
| 61 |
+
(void *) PyArray_ObjectType,
|
| 62 |
+
(void *) PyArray_DescrFromObject,
|
| 63 |
+
(void *) PyArray_ConvertToCommonType,
|
| 64 |
+
(void *) PyArray_DescrFromScalar,
|
| 65 |
+
(void *) PyArray_DescrFromTypeObject,
|
| 66 |
+
(void *) PyArray_Size,
|
| 67 |
+
(void *) PyArray_Scalar,
|
| 68 |
+
(void *) PyArray_FromScalar,
|
| 69 |
+
(void *) PyArray_ScalarAsCtype,
|
| 70 |
+
(void *) PyArray_CastScalarToCtype,
|
| 71 |
+
(void *) PyArray_CastScalarDirect,
|
| 72 |
+
(void *) PyArray_ScalarFromObject,
|
| 73 |
+
(void *) PyArray_GetCastFunc,
|
| 74 |
+
(void *) PyArray_FromDims,
|
| 75 |
+
(void *) PyArray_FromDimsAndDataAndDescr,
|
| 76 |
+
(void *) PyArray_FromAny,
|
| 77 |
+
(void *) PyArray_EnsureArray,
|
| 78 |
+
(void *) PyArray_EnsureAnyArray,
|
| 79 |
+
(void *) PyArray_FromFile,
|
| 80 |
+
(void *) PyArray_FromString,
|
| 81 |
+
(void *) PyArray_FromBuffer,
|
| 82 |
+
(void *) PyArray_FromIter,
|
| 83 |
+
(void *) PyArray_Return,
|
| 84 |
+
(void *) PyArray_GetField,
|
| 85 |
+
(void *) PyArray_SetField,
|
| 86 |
+
(void *) PyArray_Byteswap,
|
| 87 |
+
(void *) PyArray_Resize,
|
| 88 |
+
(void *) PyArray_MoveInto,
|
| 89 |
+
(void *) PyArray_CopyInto,
|
| 90 |
+
(void *) PyArray_CopyAnyInto,
|
| 91 |
+
(void *) PyArray_CopyObject,
|
| 92 |
+
(void *) PyArray_NewCopy,
|
| 93 |
+
(void *) PyArray_ToList,
|
| 94 |
+
(void *) PyArray_ToString,
|
| 95 |
+
(void *) PyArray_ToFile,
|
| 96 |
+
(void *) PyArray_Dump,
|
| 97 |
+
(void *) PyArray_Dumps,
|
| 98 |
+
(void *) PyArray_ValidType,
|
| 99 |
+
(void *) PyArray_UpdateFlags,
|
| 100 |
+
(void *) PyArray_New,
|
| 101 |
+
(void *) PyArray_NewFromDescr,
|
| 102 |
+
(void *) PyArray_DescrNew,
|
| 103 |
+
(void *) PyArray_DescrNewFromType,
|
| 104 |
+
(void *) PyArray_GetPriority,
|
| 105 |
+
(void *) PyArray_IterNew,
|
| 106 |
+
(void *) PyArray_MultiIterNew,
|
| 107 |
+
(void *) PyArray_PyIntAsInt,
|
| 108 |
+
(void *) PyArray_PyIntAsIntp,
|
| 109 |
+
(void *) PyArray_Broadcast,
|
| 110 |
+
(void *) PyArray_FillObjectArray,
|
| 111 |
+
(void *) PyArray_FillWithScalar,
|
| 112 |
+
(void *) PyArray_CheckStrides,
|
| 113 |
+
(void *) PyArray_DescrNewByteorder,
|
| 114 |
+
(void *) PyArray_IterAllButAxis,
|
| 115 |
+
(void *) PyArray_CheckFromAny,
|
| 116 |
+
(void *) PyArray_FromArray,
|
| 117 |
+
(void *) PyArray_FromInterface,
|
| 118 |
+
(void *) PyArray_FromStructInterface,
|
| 119 |
+
(void *) PyArray_FromArrayAttr,
|
| 120 |
+
(void *) PyArray_ScalarKind,
|
| 121 |
+
(void *) PyArray_CanCoerceScalar,
|
| 122 |
+
(void *) PyArray_NewFlagsObject,
|
| 123 |
+
(void *) PyArray_CanCastScalar,
|
| 124 |
+
(void *) PyArray_CompareUCS4,
|
| 125 |
+
(void *) PyArray_RemoveSmallest,
|
| 126 |
+
(void *) PyArray_ElementStrides,
|
| 127 |
+
(void *) PyArray_Item_INCREF,
|
| 128 |
+
(void *) PyArray_Item_XDECREF,
|
| 129 |
+
(void *) PyArray_FieldNames,
|
| 130 |
+
(void *) PyArray_Transpose,
|
| 131 |
+
(void *) PyArray_TakeFrom,
|
| 132 |
+
(void *) PyArray_PutTo,
|
| 133 |
+
(void *) PyArray_PutMask,
|
| 134 |
+
(void *) PyArray_Repeat,
|
| 135 |
+
(void *) PyArray_Choose,
|
| 136 |
+
(void *) PyArray_Sort,
|
| 137 |
+
(void *) PyArray_ArgSort,
|
| 138 |
+
(void *) PyArray_SearchSorted,
|
| 139 |
+
(void *) PyArray_ArgMax,
|
| 140 |
+
(void *) PyArray_ArgMin,
|
| 141 |
+
(void *) PyArray_Reshape,
|
| 142 |
+
(void *) PyArray_Newshape,
|
| 143 |
+
(void *) PyArray_Squeeze,
|
| 144 |
+
(void *) PyArray_View,
|
| 145 |
+
(void *) PyArray_SwapAxes,
|
| 146 |
+
(void *) PyArray_Max,
|
| 147 |
+
(void *) PyArray_Min,
|
| 148 |
+
(void *) PyArray_Ptp,
|
| 149 |
+
(void *) PyArray_Mean,
|
| 150 |
+
(void *) PyArray_Trace,
|
| 151 |
+
(void *) PyArray_Diagonal,
|
| 152 |
+
(void *) PyArray_Clip,
|
| 153 |
+
(void *) PyArray_Conjugate,
|
| 154 |
+
(void *) PyArray_Nonzero,
|
| 155 |
+
(void *) PyArray_Std,
|
| 156 |
+
(void *) PyArray_Sum,
|
| 157 |
+
(void *) PyArray_CumSum,
|
| 158 |
+
(void *) PyArray_Prod,
|
| 159 |
+
(void *) PyArray_CumProd,
|
| 160 |
+
(void *) PyArray_All,
|
| 161 |
+
(void *) PyArray_Any,
|
| 162 |
+
(void *) PyArray_Compress,
|
| 163 |
+
(void *) PyArray_Flatten,
|
| 164 |
+
(void *) PyArray_Ravel,
|
| 165 |
+
(void *) PyArray_MultiplyList,
|
| 166 |
+
(void *) PyArray_MultiplyIntList,
|
| 167 |
+
(void *) PyArray_GetPtr,
|
| 168 |
+
(void *) PyArray_CompareLists,
|
| 169 |
+
(void *) PyArray_AsCArray,
|
| 170 |
+
(void *) PyArray_As1D,
|
| 171 |
+
(void *) PyArray_As2D,
|
| 172 |
+
(void *) PyArray_Free,
|
| 173 |
+
(void *) PyArray_Converter,
|
| 174 |
+
(void *) PyArray_IntpFromSequence,
|
| 175 |
+
(void *) PyArray_Concatenate,
|
| 176 |
+
(void *) PyArray_InnerProduct,
|
| 177 |
+
(void *) PyArray_MatrixProduct,
|
| 178 |
+
(void *) PyArray_CopyAndTranspose,
|
| 179 |
+
(void *) PyArray_Correlate,
|
| 180 |
+
(void *) PyArray_TypestrConvert,
|
| 181 |
+
(void *) PyArray_DescrConverter,
|
| 182 |
+
(void *) PyArray_DescrConverter2,
|
| 183 |
+
(void *) PyArray_IntpConverter,
|
| 184 |
+
(void *) PyArray_BufferConverter,
|
| 185 |
+
(void *) PyArray_AxisConverter,
|
| 186 |
+
(void *) PyArray_BoolConverter,
|
| 187 |
+
(void *) PyArray_ByteorderConverter,
|
| 188 |
+
(void *) PyArray_OrderConverter,
|
| 189 |
+
(void *) PyArray_EquivTypes,
|
| 190 |
+
(void *) PyArray_Zeros,
|
| 191 |
+
(void *) PyArray_Empty,
|
| 192 |
+
(void *) PyArray_Where,
|
| 193 |
+
(void *) PyArray_Arange,
|
| 194 |
+
(void *) PyArray_ArangeObj,
|
| 195 |
+
(void *) PyArray_SortkindConverter,
|
| 196 |
+
(void *) PyArray_LexSort,
|
| 197 |
+
(void *) PyArray_Round,
|
| 198 |
+
(void *) PyArray_EquivTypenums,
|
| 199 |
+
(void *) PyArray_RegisterDataType,
|
| 200 |
+
(void *) PyArray_RegisterCastFunc,
|
| 201 |
+
(void *) PyArray_RegisterCanCast,
|
| 202 |
+
(void *) PyArray_InitArrFuncs,
|
| 203 |
+
(void *) PyArray_IntTupleFromIntp,
|
| 204 |
+
(void *) PyArray_TypeNumFromName,
|
| 205 |
+
(void *) PyArray_ClipmodeConverter,
|
| 206 |
+
(void *) PyArray_OutputConverter,
|
| 207 |
+
(void *) PyArray_BroadcastToShape,
|
| 208 |
+
(void *) _PyArray_SigintHandler,
|
| 209 |
+
(void *) _PyArray_GetSigintBuf,
|
| 210 |
+
(void *) PyArray_DescrAlignConverter,
|
| 211 |
+
(void *) PyArray_DescrAlignConverter2,
|
| 212 |
+
(void *) PyArray_SearchsideConverter,
|
| 213 |
+
(void *) PyArray_CheckAxis,
|
| 214 |
+
(void *) PyArray_OverflowMultiplyList,
|
| 215 |
+
(void *) PyArray_CompareString,
|
| 216 |
+
(void *) PyArray_MultiIterFromObjects,
|
| 217 |
+
(void *) PyArray_GetEndianness,
|
| 218 |
+
(void *) PyArray_GetNDArrayCFeatureVersion,
|
| 219 |
+
(void *) PyArray_Correlate2,
|
| 220 |
+
(void *) PyArray_NeighborhoodIterNew,
|
| 221 |
+
(void *) &PyTimeIntegerArrType_Type,
|
| 222 |
+
(void *) &PyDatetimeArrType_Type,
|
| 223 |
+
(void *) &PyTimedeltaArrType_Type,
|
| 224 |
+
(void *) &PyHalfArrType_Type,
|
| 225 |
+
(void *) &NpyIter_Type,
|
| 226 |
+
(void *) PyArray_SetDatetimeParseFunction,
|
| 227 |
+
(void *) PyArray_DatetimeToDatetimeStruct,
|
| 228 |
+
(void *) PyArray_TimedeltaToTimedeltaStruct,
|
| 229 |
+
(void *) PyArray_DatetimeStructToDatetime,
|
| 230 |
+
(void *) PyArray_TimedeltaStructToTimedelta,
|
| 231 |
+
(void *) NpyIter_New,
|
| 232 |
+
(void *) NpyIter_MultiNew,
|
| 233 |
+
(void *) NpyIter_AdvancedNew,
|
| 234 |
+
(void *) NpyIter_Copy,
|
| 235 |
+
(void *) NpyIter_Deallocate,
|
| 236 |
+
(void *) NpyIter_HasDelayedBufAlloc,
|
| 237 |
+
(void *) NpyIter_HasExternalLoop,
|
| 238 |
+
(void *) NpyIter_EnableExternalLoop,
|
| 239 |
+
(void *) NpyIter_GetInnerStrideArray,
|
| 240 |
+
(void *) NpyIter_GetInnerLoopSizePtr,
|
| 241 |
+
(void *) NpyIter_Reset,
|
| 242 |
+
(void *) NpyIter_ResetBasePointers,
|
| 243 |
+
(void *) NpyIter_ResetToIterIndexRange,
|
| 244 |
+
(void *) NpyIter_GetNDim,
|
| 245 |
+
(void *) NpyIter_GetNOp,
|
| 246 |
+
(void *) NpyIter_GetIterNext,
|
| 247 |
+
(void *) NpyIter_GetIterSize,
|
| 248 |
+
(void *) NpyIter_GetIterIndexRange,
|
| 249 |
+
(void *) NpyIter_GetIterIndex,
|
| 250 |
+
(void *) NpyIter_GotoIterIndex,
|
| 251 |
+
(void *) NpyIter_HasMultiIndex,
|
| 252 |
+
(void *) NpyIter_GetShape,
|
| 253 |
+
(void *) NpyIter_GetGetMultiIndex,
|
| 254 |
+
(void *) NpyIter_GotoMultiIndex,
|
| 255 |
+
(void *) NpyIter_RemoveMultiIndex,
|
| 256 |
+
(void *) NpyIter_HasIndex,
|
| 257 |
+
(void *) NpyIter_IsBuffered,
|
| 258 |
+
(void *) NpyIter_IsGrowInner,
|
| 259 |
+
(void *) NpyIter_GetBufferSize,
|
| 260 |
+
(void *) NpyIter_GetIndexPtr,
|
| 261 |
+
(void *) NpyIter_GotoIndex,
|
| 262 |
+
(void *) NpyIter_GetDataPtrArray,
|
| 263 |
+
(void *) NpyIter_GetDescrArray,
|
| 264 |
+
(void *) NpyIter_GetOperandArray,
|
| 265 |
+
(void *) NpyIter_GetIterView,
|
| 266 |
+
(void *) NpyIter_GetReadFlags,
|
| 267 |
+
(void *) NpyIter_GetWriteFlags,
|
| 268 |
+
(void *) NpyIter_DebugPrint,
|
| 269 |
+
(void *) NpyIter_IterationNeedsAPI,
|
| 270 |
+
(void *) NpyIter_GetInnerFixedStrideArray,
|
| 271 |
+
(void *) NpyIter_RemoveAxis,
|
| 272 |
+
(void *) NpyIter_GetAxisStrideArray,
|
| 273 |
+
(void *) NpyIter_RequiresBuffering,
|
| 274 |
+
(void *) NpyIter_GetInitialDataPtrArray,
|
| 275 |
+
(void *) NpyIter_CreateCompatibleStrides,
|
| 276 |
+
(void *) PyArray_CastingConverter,
|
| 277 |
+
(void *) PyArray_CountNonzero,
|
| 278 |
+
(void *) PyArray_PromoteTypes,
|
| 279 |
+
(void *) PyArray_MinScalarType,
|
| 280 |
+
(void *) PyArray_ResultType,
|
| 281 |
+
(void *) PyArray_CanCastArrayTo,
|
| 282 |
+
(void *) PyArray_CanCastTypeTo,
|
| 283 |
+
(void *) PyArray_EinsteinSum,
|
| 284 |
+
(void *) PyArray_NewLikeArray,
|
| 285 |
+
(void *) PyArray_GetArrayParamsFromObject,
|
| 286 |
+
(void *) PyArray_ConvertClipmodeSequence,
|
| 287 |
+
(void *) PyArray_MatrixProduct2,
|
| 288 |
+
(void *) NpyIter_IsFirstVisit,
|
| 289 |
+
(void *) PyArray_SetBaseObject,
|
| 290 |
+
(void *) PyArray_CreateSortedStridePerm,
|
| 291 |
+
(void *) PyArray_RemoveAxesInPlace,
|
| 292 |
+
(void *) PyArray_DebugPrint,
|
| 293 |
+
(void *) PyArray_FailUnlessWriteable,
|
| 294 |
+
(void *) PyArray_SetUpdateIfCopyBase,
|
| 295 |
+
(void *) PyDataMem_NEW,
|
| 296 |
+
(void *) PyDataMem_FREE,
|
| 297 |
+
(void *) PyDataMem_RENEW,
|
| 298 |
+
(void *) PyDataMem_SetEventHook,
|
| 299 |
+
(NPY_CASTING *) &NPY_DEFAULT_ASSIGN_CASTING,
|
| 300 |
+
(void *) PyArray_MapIterSwapAxes,
|
| 301 |
+
(void *) PyArray_MapIterArray,
|
| 302 |
+
(void *) PyArray_MapIterNext,
|
| 303 |
+
(void *) PyArray_Partition,
|
| 304 |
+
(void *) PyArray_ArgPartition,
|
| 305 |
+
(void *) PyArray_SelectkindConverter,
|
| 306 |
+
(void *) PyDataMem_NEW_ZEROED,
|
| 307 |
+
(void *) PyArray_CheckAnyScalarExact,
|
| 308 |
+
(void *) PyArray_MapIterArrayCopyIfOverlap,
|
| 309 |
+
(void *) PyArray_ResolveWritebackIfCopy,
|
| 310 |
+
(void *) PyArray_SetWritebackIfCopyBase,
|
| 311 |
+
(void *) PyDataMem_SetHandler,
|
| 312 |
+
(void *) PyDataMem_GetHandler,
|
| 313 |
+
(PyObject* *) &PyDataMem_DefaultHandler
|
| 314 |
+
};
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/experimental_dtype_api.h
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* This header exports the new experimental DType API as proposed in
|
| 3 |
+
* NEPs 41 to 43. For background, please check these NEPs. Otherwise,
|
| 4 |
+
* this header also serves as documentation for the time being.
|
| 5 |
+
*
|
| 6 |
+
* The header includes `_dtype_api.h` which holds most definition while this
|
| 7 |
+
* header mainly wraps functions for public consumption.
|
| 8 |
+
*
|
| 9 |
+
* Please do not hesitate to contact @seberg with questions. This is
|
| 10 |
+
* developed together with https://github.com/seberg/experimental_user_dtypes
|
| 11 |
+
* and those interested in experimenting are encouraged to contribute there.
|
| 12 |
+
*
|
| 13 |
+
* To use the functions defined in the header, call::
|
| 14 |
+
*
|
| 15 |
+
* if (import_experimental_dtype_api(version) < 0) {
|
| 16 |
+
* return NULL;
|
| 17 |
+
* }
|
| 18 |
+
*
|
| 19 |
+
* in your module init. (A version mismatch will be reported, just update
|
| 20 |
+
* to the correct one, this will alert you of possible changes.)
|
| 21 |
+
*
|
| 22 |
+
* The following lists the main symbols currently exported. Please do not
|
| 23 |
+
* hesitate to ask for help or clarification:
|
| 24 |
+
*
|
| 25 |
+
* - PyUFunc_AddLoopFromSpec:
|
| 26 |
+
*
|
| 27 |
+
* Register a new loop for a ufunc. This uses the `PyArrayMethod_Spec`
|
| 28 |
+
* which must be filled in (see in-line comments).
|
| 29 |
+
*
|
| 30 |
+
* - PyUFunc_AddWrappingLoop:
|
| 31 |
+
*
|
| 32 |
+
* Register a new loop which reuses an existing one, but modifies the
|
| 33 |
+
* result dtypes. Please search the internal NumPy docs for more info
|
| 34 |
+
* at this point. (Used for physical units dtype.)
|
| 35 |
+
*
|
| 36 |
+
* - PyUFunc_AddPromoter:
|
| 37 |
+
*
|
| 38 |
+
* Register a new promoter for a ufunc. A promoter is a function stored
|
| 39 |
+
* in a PyCapsule (see in-line comments). It is passed the operation and
|
| 40 |
+
* requested DType signatures and can mutate it to attempt a new search
|
| 41 |
+
* for a matching loop/promoter.
|
| 42 |
+
* I.e. for Numba a promoter could even add the desired loop.
|
| 43 |
+
*
|
| 44 |
+
* - PyArrayInitDTypeMeta_FromSpec:
|
| 45 |
+
*
|
| 46 |
+
* Initialize a new DType. It must currently be a static Python C type
|
| 47 |
+
* that is declared as `PyArray_DTypeMeta` and not `PyTypeObject`.
|
| 48 |
+
* Further, it must subclass `np.dtype` and set its type to
|
| 49 |
+
* `PyArrayDTypeMeta_Type` (before calling `PyType_Read()`).
|
| 50 |
+
*
|
| 51 |
+
* - PyArray_CommonDType:
|
| 52 |
+
*
|
| 53 |
+
* Find the common-dtype ("promotion") for two DType classes. Similar
|
| 54 |
+
* to `np.result_type`, but works on the classes and not instances.
|
| 55 |
+
*
|
| 56 |
+
* - PyArray_PromoteDTypeSequence:
|
| 57 |
+
*
|
| 58 |
+
* Same as CommonDType, but works with an arbitrary number of DTypes.
|
| 59 |
+
* This function is smarter and can often return successful and unambiguous
|
| 60 |
+
* results when `common_dtype(common_dtype(dt1, dt2), dt3)` would
|
| 61 |
+
* depend on the operation order or fail. Nevertheless, DTypes should
|
| 62 |
+
* aim to ensure that their common-dtype implementation is associative
|
| 63 |
+
* and commutative! (Mainly, unsigned and signed integers are not.)
|
| 64 |
+
*
|
| 65 |
+
* For guaranteed consistent results DTypes must implement common-Dtype
|
| 66 |
+
* "transitively". If A promotes B and B promotes C, than A must generally
|
| 67 |
+
* also promote C; where "promotes" means implements the promotion.
|
| 68 |
+
* (There are some exceptions for abstract DTypes)
|
| 69 |
+
*
|
| 70 |
+
* - PyArray_GetDefaultDescr:
|
| 71 |
+
*
|
| 72 |
+
* Given a DType class, returns the default instance (descriptor).
|
| 73 |
+
* This is an inline function checking for `singleton` first and only
|
| 74 |
+
* calls the `default_descr` function if necessary.
|
| 75 |
+
*
|
| 76 |
+
* - PyArray_DoubleDType, etc.:
|
| 77 |
+
*
|
| 78 |
+
* Aliases to the DType classes for the builtin NumPy DTypes.
|
| 79 |
+
*
|
| 80 |
+
* WARNING
|
| 81 |
+
* =======
|
| 82 |
+
*
|
| 83 |
+
* By using this header, you understand that this is a fully experimental
|
| 84 |
+
* exposure. Details are expected to change, and some options may have no
|
| 85 |
+
* effect. (Please contact @seberg if you have questions!)
|
| 86 |
+
* If the exposure stops working, please file a bug report with NumPy.
|
| 87 |
+
* Further, a DType created using this API/header should still be expected
|
| 88 |
+
* to be incompatible with some functionality inside and outside of NumPy.
|
| 89 |
+
* In this case crashes must be expected. Please report any such problems
|
| 90 |
+
* so that they can be fixed before final exposure.
|
| 91 |
+
* Furthermore, expect missing checks for programming errors which the final
|
| 92 |
+
* API is expected to have.
|
| 93 |
+
*
|
| 94 |
+
* Symbols with a leading underscore are likely to not be included in the
|
| 95 |
+
* first public version, if these are central to your use-case, please let
|
| 96 |
+
* us know, so that we can reconsider.
|
| 97 |
+
*
|
| 98 |
+
* "Array-like" consumer API not yet under considerations
|
| 99 |
+
* ======================================================
|
| 100 |
+
*
|
| 101 |
+
* The new DType API is designed in a way to make it potentially useful for
|
| 102 |
+
* alternative "array-like" implementations. This will require careful
|
| 103 |
+
* exposure of details and functions and is not part of this experimental API.
|
| 104 |
+
*
|
| 105 |
+
* Brief (incompatibility) changelog
|
| 106 |
+
* =================================
|
| 107 |
+
*
|
| 108 |
+
* 2. None (only additions).
|
| 109 |
+
* 3. New `npy_intp *view_offset` argument for `resolve_descriptors`.
|
| 110 |
+
* This replaces the `NPY_CAST_IS_VIEW` flag. It can be set to 0 if the
|
| 111 |
+
* operation is a view, and is pre-initialized to `NPY_MIN_INTP` indicating
|
| 112 |
+
* that the operation is not a view.
|
| 113 |
+
*/
|
| 114 |
+
|
| 115 |
+
#ifndef NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_
|
| 116 |
+
#define NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_
|
| 117 |
+
|
| 118 |
+
#include <Python.h>
|
| 119 |
+
#include "ndarraytypes.h"
|
| 120 |
+
#include "_dtype_api.h"
|
| 121 |
+
|
| 122 |
+
/*
|
| 123 |
+
* The contents of PyArrayMethodObject are currently opaque (is there a way
|
| 124 |
+
* good way to make them be `PyObject *`?)
|
| 125 |
+
*/
|
| 126 |
+
typedef struct PyArrayMethodObject_tag PyArrayMethodObject;
|
| 127 |
+
|
| 128 |
+
/*
|
| 129 |
+
* There must be a better way?! -- Oh well, this is experimental
|
| 130 |
+
* (my issue with it, is that I cannot undef those helpers).
|
| 131 |
+
*/
|
| 132 |
+
#if defined(PY_ARRAY_UNIQUE_SYMBOL)
|
| 133 |
+
#define NPY_EXP_DTYPE_API_CONCAT_HELPER2(x, y) x ## y
|
| 134 |
+
#define NPY_EXP_DTYPE_API_CONCAT_HELPER(arg) NPY_EXP_DTYPE_API_CONCAT_HELPER2(arg, __experimental_dtype_api_table)
|
| 135 |
+
#define __experimental_dtype_api_table NPY_EXP_DTYPE_API_CONCAT_HELPER(PY_ARRAY_UNIQUE_SYMBOL)
|
| 136 |
+
#else
|
| 137 |
+
#define __experimental_dtype_api_table __experimental_dtype_api_table
|
| 138 |
+
#endif
|
| 139 |
+
|
| 140 |
+
/* Support for correct multi-file projects: */
|
| 141 |
+
#if defined(NO_IMPORT) || defined(NO_IMPORT_ARRAY)
|
| 142 |
+
extern void **__experimental_dtype_api_table;
|
| 143 |
+
#else
|
| 144 |
+
/*
|
| 145 |
+
* Just a hack so I don't forget importing as much myself, I spend way too
|
| 146 |
+
* much time noticing it the first time around :).
|
| 147 |
+
*/
|
| 148 |
+
static void
|
| 149 |
+
__not_imported(void)
|
| 150 |
+
{
|
| 151 |
+
printf("*****\nCritical error, dtype API not imported\n*****\n");
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
static void *__uninitialized_table[] = {
|
| 155 |
+
&__not_imported, &__not_imported, &__not_imported, &__not_imported,
|
| 156 |
+
&__not_imported, &__not_imported, &__not_imported, &__not_imported};
|
| 157 |
+
|
| 158 |
+
#if defined(PY_ARRAY_UNIQUE_SYMBOL)
|
| 159 |
+
void **__experimental_dtype_api_table = __uninitialized_table;
|
| 160 |
+
#else
|
| 161 |
+
static void **__experimental_dtype_api_table = __uninitialized_table;
|
| 162 |
+
#endif
|
| 163 |
+
#endif
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
typedef int _ufunc_addloop_fromspec_func(
|
| 167 |
+
PyObject *ufunc, PyArrayMethod_Spec *spec);
|
| 168 |
+
/*
|
| 169 |
+
* The main ufunc registration function. This adds a new implementation/loop
|
| 170 |
+
* to a ufunc. It replaces `PyUFunc_RegisterLoopForType`.
|
| 171 |
+
*/
|
| 172 |
+
#define PyUFunc_AddLoopFromSpec \
|
| 173 |
+
(*(_ufunc_addloop_fromspec_func *)(__experimental_dtype_api_table[0]))
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
/* Please see the NumPy definitions in `array_method.h` for details on these */
|
| 177 |
+
typedef int translate_given_descrs_func(int nin, int nout,
|
| 178 |
+
PyArray_DTypeMeta *wrapped_dtypes[],
|
| 179 |
+
PyArray_Descr *given_descrs[], PyArray_Descr *new_descrs[]);
|
| 180 |
+
typedef int translate_loop_descrs_func(int nin, int nout,
|
| 181 |
+
PyArray_DTypeMeta *new_dtypes[], PyArray_Descr *given_descrs[],
|
| 182 |
+
PyArray_Descr *original_descrs[], PyArray_Descr *loop_descrs[]);
|
| 183 |
+
|
| 184 |
+
typedef int _ufunc_wrapping_loop_func(PyObject *ufunc_obj,
|
| 185 |
+
PyArray_DTypeMeta *new_dtypes[], PyArray_DTypeMeta *wrapped_dtypes[],
|
| 186 |
+
translate_given_descrs_func *translate_given_descrs,
|
| 187 |
+
translate_loop_descrs_func *translate_loop_descrs);
|
| 188 |
+
#define PyUFunc_AddWrappingLoop \
|
| 189 |
+
(*(_ufunc_wrapping_loop_func *)(__experimental_dtype_api_table[7]))
|
| 190 |
+
|
| 191 |
+
/*
|
| 192 |
+
* Type of the C promoter function, which must be wrapped into a
|
| 193 |
+
* PyCapsule with name "numpy._ufunc_promoter".
|
| 194 |
+
*
|
| 195 |
+
* Note that currently the output dtypes are always NULL unless they are
|
| 196 |
+
* also part of the signature. This is an implementation detail and could
|
| 197 |
+
* change in the future. However, in general promoters should not have a
|
| 198 |
+
* need for output dtypes.
|
| 199 |
+
* (There are potential use-cases, these are currently unsupported.)
|
| 200 |
+
*/
|
| 201 |
+
typedef int promoter_function(PyObject *ufunc,
|
| 202 |
+
PyArray_DTypeMeta *op_dtypes[], PyArray_DTypeMeta *signature[],
|
| 203 |
+
PyArray_DTypeMeta *new_op_dtypes[]);
|
| 204 |
+
|
| 205 |
+
/*
|
| 206 |
+
* Function to register a promoter.
|
| 207 |
+
*
|
| 208 |
+
* @param ufunc The ufunc object to register the promoter with.
|
| 209 |
+
* @param DType_tuple A Python tuple containing DTypes or None matching the
|
| 210 |
+
* number of inputs and outputs of the ufunc.
|
| 211 |
+
* @param promoter A PyCapsule with name "numpy._ufunc_promoter" containing
|
| 212 |
+
* a pointer to a `promoter_function`.
|
| 213 |
+
*/
|
| 214 |
+
typedef int _ufunc_addpromoter_func(
|
| 215 |
+
PyObject *ufunc, PyObject *DType_tuple, PyObject *promoter);
|
| 216 |
+
#define PyUFunc_AddPromoter \
|
| 217 |
+
(*(_ufunc_addpromoter_func *)(__experimental_dtype_api_table[1]))
|
| 218 |
+
|
| 219 |
+
#define PyArrayDTypeMeta_Type \
|
| 220 |
+
(*(PyTypeObject *)__experimental_dtype_api_table[2])
|
| 221 |
+
typedef int __dtypemeta_fromspec(
|
| 222 |
+
PyArray_DTypeMeta *DType, PyArrayDTypeMeta_Spec *dtype_spec);
|
| 223 |
+
/*
|
| 224 |
+
* Finalize creation of a DTypeMeta. You must ensure that the DTypeMeta is
|
| 225 |
+
* a proper subclass. The DTypeMeta object has additional fields compared to
|
| 226 |
+
* a normal PyTypeObject!
|
| 227 |
+
* The only (easy) creation of a new DType is to create a static Type which
|
| 228 |
+
* inherits `PyArray_DescrType`, sets its type to `PyArrayDTypeMeta_Type` and
|
| 229 |
+
* uses `PyArray_DTypeMeta` defined above as the C-structure.
|
| 230 |
+
*/
|
| 231 |
+
#define PyArrayInitDTypeMeta_FromSpec \
|
| 232 |
+
((__dtypemeta_fromspec *)(__experimental_dtype_api_table[3]))
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
/*
|
| 236 |
+
* *************************************
|
| 237 |
+
* WORKING WITH DTYPES
|
| 238 |
+
* *************************************
|
| 239 |
+
*/
|
| 240 |
+
|
| 241 |
+
typedef PyArray_DTypeMeta *__common_dtype(
|
| 242 |
+
PyArray_DTypeMeta *DType1, PyArray_DTypeMeta *DType2);
|
| 243 |
+
#define PyArray_CommonDType \
|
| 244 |
+
((__common_dtype *)(__experimental_dtype_api_table[4]))
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
typedef PyArray_DTypeMeta *__promote_dtype_sequence(
|
| 248 |
+
npy_intp num, PyArray_DTypeMeta *DTypes[]);
|
| 249 |
+
#define PyArray_PromoteDTypeSequence \
|
| 250 |
+
((__promote_dtype_sequence *)(__experimental_dtype_api_table[5]))
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
typedef PyArray_Descr *__get_default_descr(
|
| 254 |
+
PyArray_DTypeMeta *DType);
|
| 255 |
+
#define _PyArray_GetDefaultDescr \
|
| 256 |
+
((__get_default_descr *)(__experimental_dtype_api_table[6]))
|
| 257 |
+
|
| 258 |
+
static inline PyArray_Descr *
|
| 259 |
+
PyArray_GetDefaultDescr(PyArray_DTypeMeta *DType)
|
| 260 |
+
{
|
| 261 |
+
if (DType->singleton != NULL) {
|
| 262 |
+
Py_INCREF(DType->singleton);
|
| 263 |
+
return DType->singleton;
|
| 264 |
+
}
|
| 265 |
+
return _PyArray_GetDefaultDescr(DType);
|
| 266 |
+
}
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
/*
|
| 270 |
+
* NumPy's builtin DTypes:
|
| 271 |
+
*/
|
| 272 |
+
#define PyArray_BoolDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[10])
|
| 273 |
+
/* Integers */
|
| 274 |
+
#define PyArray_ByteDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[11])
|
| 275 |
+
#define PyArray_UByteDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[12])
|
| 276 |
+
#define PyArray_ShortDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[13])
|
| 277 |
+
#define PyArray_UShortDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[14])
|
| 278 |
+
#define PyArray_IntDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[15])
|
| 279 |
+
#define PyArray_UIntDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[16])
|
| 280 |
+
#define PyArray_LongDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[17])
|
| 281 |
+
#define PyArray_ULongDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[18])
|
| 282 |
+
#define PyArray_LongLongDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[19])
|
| 283 |
+
#define PyArray_ULongLongDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[20])
|
| 284 |
+
/* Integer aliases */
|
| 285 |
+
#define PyArray_Int8Type (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[21])
|
| 286 |
+
#define PyArray_UInt8DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[22])
|
| 287 |
+
#define PyArray_Int16DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[23])
|
| 288 |
+
#define PyArray_UInt16DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[24])
|
| 289 |
+
#define PyArray_Int32DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[25])
|
| 290 |
+
#define PyArray_UInt32DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[26])
|
| 291 |
+
#define PyArray_Int64DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[27])
|
| 292 |
+
#define PyArray_UInt64DType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[28])
|
| 293 |
+
#define PyArray_IntpDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[29])
|
| 294 |
+
#define PyArray_UIntpDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[30])
|
| 295 |
+
/* Floats */
|
| 296 |
+
#define PyArray_HalfType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[31])
|
| 297 |
+
#define PyArray_FloatDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[32])
|
| 298 |
+
#define PyArray_DoubleDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[33])
|
| 299 |
+
#define PyArray_LongDoubleDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[34])
|
| 300 |
+
/* Complex */
|
| 301 |
+
#define PyArray_CFloatDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[35])
|
| 302 |
+
#define PyArray_CDoubleDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[36])
|
| 303 |
+
#define PyArray_CLongDoubleDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[37])
|
| 304 |
+
/* String/Bytes */
|
| 305 |
+
#define PyArray_StringDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[38])
|
| 306 |
+
#define PyArray_UnicodeDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[39])
|
| 307 |
+
/* Datetime/Timedelta */
|
| 308 |
+
#define PyArray_DatetimeDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[40])
|
| 309 |
+
#define PyArray_TimedeltaDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[41])
|
| 310 |
+
/* Object/Void */
|
| 311 |
+
#define PyArray_ObjectDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[42])
|
| 312 |
+
#define PyArray_VoidDType (*(PyArray_DTypeMeta *)__experimental_dtype_api_table[43])
|
| 313 |
+
|
| 314 |
+
/*
|
| 315 |
+
* ********************************
|
| 316 |
+
* Initialization
|
| 317 |
+
* ********************************
|
| 318 |
+
*
|
| 319 |
+
* Import the experimental API, the version must match the one defined in
|
| 320 |
+
* the header to ensure changes are taken into account. NumPy will further
|
| 321 |
+
* runtime-check this.
|
| 322 |
+
* You must call this function to use the symbols defined in this file.
|
| 323 |
+
*/
|
| 324 |
+
#if !defined(NO_IMPORT) && !defined(NO_IMPORT_ARRAY)
|
| 325 |
+
|
| 326 |
+
static int
|
| 327 |
+
import_experimental_dtype_api(int version)
|
| 328 |
+
{
|
| 329 |
+
if (version != __EXPERIMENTAL_DTYPE_API_VERSION) {
|
| 330 |
+
PyErr_Format(PyExc_RuntimeError,
|
| 331 |
+
"DType API version %d did not match header version %d. Please "
|
| 332 |
+
"update the import statement and check for API changes.",
|
| 333 |
+
version, __EXPERIMENTAL_DTYPE_API_VERSION);
|
| 334 |
+
return -1;
|
| 335 |
+
}
|
| 336 |
+
if (__experimental_dtype_api_table != __uninitialized_table) {
|
| 337 |
+
/* already imported. */
|
| 338 |
+
return 0;
|
| 339 |
+
}
|
| 340 |
+
|
| 341 |
+
PyObject *multiarray = PyImport_ImportModule("numpy.core._multiarray_umath");
|
| 342 |
+
if (multiarray == NULL) {
|
| 343 |
+
return -1;
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
PyObject *api = PyObject_CallMethod(multiarray,
|
| 347 |
+
"_get_experimental_dtype_api", "i", version);
|
| 348 |
+
Py_DECREF(multiarray);
|
| 349 |
+
if (api == NULL) {
|
| 350 |
+
return -1;
|
| 351 |
+
}
|
| 352 |
+
__experimental_dtype_api_table = (void **)PyCapsule_GetPointer(api,
|
| 353 |
+
"experimental_dtype_api_table");
|
| 354 |
+
Py_DECREF(api);
|
| 355 |
+
|
| 356 |
+
if (__experimental_dtype_api_table == NULL) {
|
| 357 |
+
__experimental_dtype_api_table = __uninitialized_table;
|
| 358 |
+
return -1;
|
| 359 |
+
}
|
| 360 |
+
return 0;
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
#endif /* !defined(NO_IMPORT) && !defined(NO_IMPORT_ARRAY) */
|
| 364 |
+
|
| 365 |
+
#endif /* NUMPY_CORE_INCLUDE_NUMPY_EXPERIMENTAL_DTYPE_API_H_ */
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/halffloat.h
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_
|
| 2 |
+
#define NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_
|
| 3 |
+
|
| 4 |
+
#include <Python.h>
|
| 5 |
+
#include <numpy/npy_math.h>
|
| 6 |
+
|
| 7 |
+
#ifdef __cplusplus
|
| 8 |
+
extern "C" {
|
| 9 |
+
#endif
|
| 10 |
+
|
| 11 |
+
/*
|
| 12 |
+
* Half-precision routines
|
| 13 |
+
*/
|
| 14 |
+
|
| 15 |
+
/* Conversions */
|
| 16 |
+
float npy_half_to_float(npy_half h);
|
| 17 |
+
double npy_half_to_double(npy_half h);
|
| 18 |
+
npy_half npy_float_to_half(float f);
|
| 19 |
+
npy_half npy_double_to_half(double d);
|
| 20 |
+
/* Comparisons */
|
| 21 |
+
int npy_half_eq(npy_half h1, npy_half h2);
|
| 22 |
+
int npy_half_ne(npy_half h1, npy_half h2);
|
| 23 |
+
int npy_half_le(npy_half h1, npy_half h2);
|
| 24 |
+
int npy_half_lt(npy_half h1, npy_half h2);
|
| 25 |
+
int npy_half_ge(npy_half h1, npy_half h2);
|
| 26 |
+
int npy_half_gt(npy_half h1, npy_half h2);
|
| 27 |
+
/* faster *_nonan variants for when you know h1 and h2 are not NaN */
|
| 28 |
+
int npy_half_eq_nonan(npy_half h1, npy_half h2);
|
| 29 |
+
int npy_half_lt_nonan(npy_half h1, npy_half h2);
|
| 30 |
+
int npy_half_le_nonan(npy_half h1, npy_half h2);
|
| 31 |
+
/* Miscellaneous functions */
|
| 32 |
+
int npy_half_iszero(npy_half h);
|
| 33 |
+
int npy_half_isnan(npy_half h);
|
| 34 |
+
int npy_half_isinf(npy_half h);
|
| 35 |
+
int npy_half_isfinite(npy_half h);
|
| 36 |
+
int npy_half_signbit(npy_half h);
|
| 37 |
+
npy_half npy_half_copysign(npy_half x, npy_half y);
|
| 38 |
+
npy_half npy_half_spacing(npy_half h);
|
| 39 |
+
npy_half npy_half_nextafter(npy_half x, npy_half y);
|
| 40 |
+
npy_half npy_half_divmod(npy_half x, npy_half y, npy_half *modulus);
|
| 41 |
+
|
| 42 |
+
/*
|
| 43 |
+
* Half-precision constants
|
| 44 |
+
*/
|
| 45 |
+
|
| 46 |
+
#define NPY_HALF_ZERO (0x0000u)
|
| 47 |
+
#define NPY_HALF_PZERO (0x0000u)
|
| 48 |
+
#define NPY_HALF_NZERO (0x8000u)
|
| 49 |
+
#define NPY_HALF_ONE (0x3c00u)
|
| 50 |
+
#define NPY_HALF_NEGONE (0xbc00u)
|
| 51 |
+
#define NPY_HALF_PINF (0x7c00u)
|
| 52 |
+
#define NPY_HALF_NINF (0xfc00u)
|
| 53 |
+
#define NPY_HALF_NAN (0x7e00u)
|
| 54 |
+
|
| 55 |
+
#define NPY_MAX_HALF (0x7bffu)
|
| 56 |
+
|
| 57 |
+
/*
|
| 58 |
+
* Bit-level conversions
|
| 59 |
+
*/
|
| 60 |
+
|
| 61 |
+
npy_uint16 npy_floatbits_to_halfbits(npy_uint32 f);
|
| 62 |
+
npy_uint16 npy_doublebits_to_halfbits(npy_uint64 d);
|
| 63 |
+
npy_uint32 npy_halfbits_to_floatbits(npy_uint16 h);
|
| 64 |
+
npy_uint64 npy_halfbits_to_doublebits(npy_uint16 h);
|
| 65 |
+
|
| 66 |
+
#ifdef __cplusplus
|
| 67 |
+
}
|
| 68 |
+
#endif
|
| 69 |
+
|
| 70 |
+
#endif /* NUMPY_CORE_INCLUDE_NUMPY_HALFFLOAT_H_ */
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_cpu.h
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* This set (target) cpu specific macros:
|
| 3 |
+
* - Possible values:
|
| 4 |
+
* NPY_CPU_X86
|
| 5 |
+
* NPY_CPU_AMD64
|
| 6 |
+
* NPY_CPU_PPC
|
| 7 |
+
* NPY_CPU_PPC64
|
| 8 |
+
* NPY_CPU_PPC64LE
|
| 9 |
+
* NPY_CPU_SPARC
|
| 10 |
+
* NPY_CPU_S390
|
| 11 |
+
* NPY_CPU_IA64
|
| 12 |
+
* NPY_CPU_HPPA
|
| 13 |
+
* NPY_CPU_ALPHA
|
| 14 |
+
* NPY_CPU_ARMEL
|
| 15 |
+
* NPY_CPU_ARMEB
|
| 16 |
+
* NPY_CPU_SH_LE
|
| 17 |
+
* NPY_CPU_SH_BE
|
| 18 |
+
* NPY_CPU_ARCEL
|
| 19 |
+
* NPY_CPU_ARCEB
|
| 20 |
+
* NPY_CPU_RISCV64
|
| 21 |
+
* NPY_CPU_LOONGARCH
|
| 22 |
+
* NPY_CPU_WASM
|
| 23 |
+
*/
|
| 24 |
+
#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_
|
| 25 |
+
#define NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_
|
| 26 |
+
|
| 27 |
+
#include "numpyconfig.h"
|
| 28 |
+
|
| 29 |
+
#if defined( __i386__ ) || defined(i386) || defined(_M_IX86)
|
| 30 |
+
/*
|
| 31 |
+
* __i386__ is defined by gcc and Intel compiler on Linux,
|
| 32 |
+
* _M_IX86 by VS compiler,
|
| 33 |
+
* i386 by Sun compilers on opensolaris at least
|
| 34 |
+
*/
|
| 35 |
+
#define NPY_CPU_X86
|
| 36 |
+
#elif defined(__x86_64__) || defined(__amd64__) || defined(__x86_64) || defined(_M_AMD64)
|
| 37 |
+
/*
|
| 38 |
+
* both __x86_64__ and __amd64__ are defined by gcc
|
| 39 |
+
* __x86_64 defined by sun compiler on opensolaris at least
|
| 40 |
+
* _M_AMD64 defined by MS compiler
|
| 41 |
+
*/
|
| 42 |
+
#define NPY_CPU_AMD64
|
| 43 |
+
#elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__)
|
| 44 |
+
#define NPY_CPU_PPC64LE
|
| 45 |
+
#elif defined(__powerpc64__) && defined(__BIG_ENDIAN__)
|
| 46 |
+
#define NPY_CPU_PPC64
|
| 47 |
+
#elif defined(__ppc__) || defined(__powerpc__) || defined(_ARCH_PPC)
|
| 48 |
+
/*
|
| 49 |
+
* __ppc__ is defined by gcc, I remember having seen __powerpc__ once,
|
| 50 |
+
* but can't find it ATM
|
| 51 |
+
* _ARCH_PPC is used by at least gcc on AIX
|
| 52 |
+
* As __powerpc__ and _ARCH_PPC are also defined by PPC64 check
|
| 53 |
+
* for those specifically first before defaulting to ppc
|
| 54 |
+
*/
|
| 55 |
+
#define NPY_CPU_PPC
|
| 56 |
+
#elif defined(__sparc__) || defined(__sparc)
|
| 57 |
+
/* __sparc__ is defined by gcc and Forte (e.g. Sun) compilers */
|
| 58 |
+
#define NPY_CPU_SPARC
|
| 59 |
+
#elif defined(__s390__)
|
| 60 |
+
#define NPY_CPU_S390
|
| 61 |
+
#elif defined(__ia64)
|
| 62 |
+
#define NPY_CPU_IA64
|
| 63 |
+
#elif defined(__hppa)
|
| 64 |
+
#define NPY_CPU_HPPA
|
| 65 |
+
#elif defined(__alpha__)
|
| 66 |
+
#define NPY_CPU_ALPHA
|
| 67 |
+
#elif defined(__arm__) || defined(__aarch64__) || defined(_M_ARM64)
|
| 68 |
+
/* _M_ARM64 is defined in MSVC for ARM64 compilation on Windows */
|
| 69 |
+
#if defined(__ARMEB__) || defined(__AARCH64EB__)
|
| 70 |
+
#if defined(__ARM_32BIT_STATE)
|
| 71 |
+
#define NPY_CPU_ARMEB_AARCH32
|
| 72 |
+
#elif defined(__ARM_64BIT_STATE)
|
| 73 |
+
#define NPY_CPU_ARMEB_AARCH64
|
| 74 |
+
#else
|
| 75 |
+
#define NPY_CPU_ARMEB
|
| 76 |
+
#endif
|
| 77 |
+
#elif defined(__ARMEL__) || defined(__AARCH64EL__) || defined(_M_ARM64)
|
| 78 |
+
#if defined(__ARM_32BIT_STATE)
|
| 79 |
+
#define NPY_CPU_ARMEL_AARCH32
|
| 80 |
+
#elif defined(__ARM_64BIT_STATE) || defined(_M_ARM64) || defined(__AARCH64EL__)
|
| 81 |
+
#define NPY_CPU_ARMEL_AARCH64
|
| 82 |
+
#else
|
| 83 |
+
#define NPY_CPU_ARMEL
|
| 84 |
+
#endif
|
| 85 |
+
#else
|
| 86 |
+
# error Unknown ARM CPU, please report this to numpy maintainers with \
|
| 87 |
+
information about your platform (OS, CPU and compiler)
|
| 88 |
+
#endif
|
| 89 |
+
#elif defined(__sh__) && defined(__LITTLE_ENDIAN__)
|
| 90 |
+
#define NPY_CPU_SH_LE
|
| 91 |
+
#elif defined(__sh__) && defined(__BIG_ENDIAN__)
|
| 92 |
+
#define NPY_CPU_SH_BE
|
| 93 |
+
#elif defined(__MIPSEL__)
|
| 94 |
+
#define NPY_CPU_MIPSEL
|
| 95 |
+
#elif defined(__MIPSEB__)
|
| 96 |
+
#define NPY_CPU_MIPSEB
|
| 97 |
+
#elif defined(__or1k__)
|
| 98 |
+
#define NPY_CPU_OR1K
|
| 99 |
+
#elif defined(__mc68000__)
|
| 100 |
+
#define NPY_CPU_M68K
|
| 101 |
+
#elif defined(__arc__) && defined(__LITTLE_ENDIAN__)
|
| 102 |
+
#define NPY_CPU_ARCEL
|
| 103 |
+
#elif defined(__arc__) && defined(__BIG_ENDIAN__)
|
| 104 |
+
#define NPY_CPU_ARCEB
|
| 105 |
+
#elif defined(__riscv) && defined(__riscv_xlen) && __riscv_xlen == 64
|
| 106 |
+
#define NPY_CPU_RISCV64
|
| 107 |
+
#elif defined(__loongarch__)
|
| 108 |
+
#define NPY_CPU_LOONGARCH
|
| 109 |
+
#elif defined(__EMSCRIPTEN__)
|
| 110 |
+
/* __EMSCRIPTEN__ is defined by emscripten: an LLVM-to-Web compiler */
|
| 111 |
+
#define NPY_CPU_WASM
|
| 112 |
+
#else
|
| 113 |
+
#error Unknown CPU, please report this to numpy maintainers with \
|
| 114 |
+
information about your platform (OS, CPU and compiler)
|
| 115 |
+
#endif
|
| 116 |
+
|
| 117 |
+
/*
|
| 118 |
+
* Except for the following architectures, memory access is limited to the natural
|
| 119 |
+
* alignment of data types otherwise it may lead to bus error or performance regression.
|
| 120 |
+
* For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt.
|
| 121 |
+
*/
|
| 122 |
+
#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__)
|
| 123 |
+
#define NPY_ALIGNMENT_REQUIRED 0
|
| 124 |
+
#endif
|
| 125 |
+
#ifndef NPY_ALIGNMENT_REQUIRED
|
| 126 |
+
#define NPY_ALIGNMENT_REQUIRED 1
|
| 127 |
+
#endif
|
| 128 |
+
|
| 129 |
+
#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_CPU_H_ */
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_interrupt.h
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* This API is only provided because it is part of publicly exported
|
| 3 |
+
* headers. Its use is considered DEPRECATED, and it will be removed
|
| 4 |
+
* eventually.
|
| 5 |
+
* (This includes the _PyArray_SigintHandler and _PyArray_GetSigintBuf
|
| 6 |
+
* functions which are however, public API, and not headers.)
|
| 7 |
+
*
|
| 8 |
+
* Instead of using these non-threadsafe macros consider periodically
|
| 9 |
+
* querying `PyErr_CheckSignals()` or `PyOS_InterruptOccurred()` will work.
|
| 10 |
+
* Both of these require holding the GIL, although cpython could add a
|
| 11 |
+
* version of `PyOS_InterruptOccurred()` which does not. Such a version
|
| 12 |
+
* actually exists as private API in Python 3.10, and backported to 3.9 and 3.8,
|
| 13 |
+
* see also https://bugs.python.org/issue41037 and
|
| 14 |
+
* https://github.com/python/cpython/pull/20599).
|
| 15 |
+
*/
|
| 16 |
+
|
| 17 |
+
#ifndef NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_
|
| 18 |
+
#define NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_
|
| 19 |
+
|
| 20 |
+
#ifndef NPY_NO_SIGNAL
|
| 21 |
+
|
| 22 |
+
#include <setjmp.h>
|
| 23 |
+
#include <signal.h>
|
| 24 |
+
|
| 25 |
+
#ifndef sigsetjmp
|
| 26 |
+
|
| 27 |
+
#define NPY_SIGSETJMP(arg1, arg2) setjmp(arg1)
|
| 28 |
+
#define NPY_SIGLONGJMP(arg1, arg2) longjmp(arg1, arg2)
|
| 29 |
+
#define NPY_SIGJMP_BUF jmp_buf
|
| 30 |
+
|
| 31 |
+
#else
|
| 32 |
+
|
| 33 |
+
#define NPY_SIGSETJMP(arg1, arg2) sigsetjmp(arg1, arg2)
|
| 34 |
+
#define NPY_SIGLONGJMP(arg1, arg2) siglongjmp(arg1, arg2)
|
| 35 |
+
#define NPY_SIGJMP_BUF sigjmp_buf
|
| 36 |
+
|
| 37 |
+
#endif
|
| 38 |
+
|
| 39 |
+
# define NPY_SIGINT_ON { \
|
| 40 |
+
PyOS_sighandler_t _npy_sig_save; \
|
| 41 |
+
_npy_sig_save = PyOS_setsig(SIGINT, _PyArray_SigintHandler); \
|
| 42 |
+
if (NPY_SIGSETJMP(*((NPY_SIGJMP_BUF *)_PyArray_GetSigintBuf()), \
|
| 43 |
+
1) == 0) { \
|
| 44 |
+
|
| 45 |
+
# define NPY_SIGINT_OFF } \
|
| 46 |
+
PyOS_setsig(SIGINT, _npy_sig_save); \
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
#else /* NPY_NO_SIGNAL */
|
| 50 |
+
|
| 51 |
+
#define NPY_SIGINT_ON
|
| 52 |
+
#define NPY_SIGINT_OFF
|
| 53 |
+
|
| 54 |
+
#endif /* HAVE_SIGSETJMP */
|
| 55 |
+
|
| 56 |
+
#endif /* NUMPY_CORE_INCLUDE_NUMPY_NPY_INTERRUPT_H_ */
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/npy_no_deprecated_api.h
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/*
|
| 2 |
+
* This include file is provided for inclusion in Cython *.pyd files where
|
| 3 |
+
* one would like to define the NPY_NO_DEPRECATED_API macro. It can be
|
| 4 |
+
* included by
|
| 5 |
+
*
|
| 6 |
+
* cdef extern from "npy_no_deprecated_api.h": pass
|
| 7 |
+
*
|
| 8 |
+
*/
|
| 9 |
+
#ifndef NPY_NO_DEPRECATED_API
|
| 10 |
+
|
| 11 |
+
/* put this check here since there may be multiple includes in C extensions. */
|
| 12 |
+
#if defined(NUMPY_CORE_INCLUDE_NUMPY_NDARRAYTYPES_H_) || \
|
| 13 |
+
defined(NUMPY_CORE_INCLUDE_NUMPY_NPY_DEPRECATED_API_H) || \
|
| 14 |
+
defined(NUMPY_CORE_INCLUDE_NUMPY_OLD_DEFINES_H_)
|
| 15 |
+
#error "npy_no_deprecated_api.h" must be first among numpy includes.
|
| 16 |
+
#else
|
| 17 |
+
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
|
| 18 |
+
#endif
|
| 19 |
+
|
| 20 |
+
#endif /* NPY_NO_DEPRECATED_API */
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35_uv/lib/python3.12/site-packages/numpy/core/include/numpy/utils.h
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#ifndef NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_
|
| 2 |
+
#define NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_
|
| 3 |
+
|
| 4 |
+
#ifndef __COMP_NPY_UNUSED
|
| 5 |
+
#if defined(__GNUC__)
|
| 6 |
+
#define __COMP_NPY_UNUSED __attribute__ ((__unused__))
|
| 7 |
+
#elif defined(__ICC)
|
| 8 |
+
#define __COMP_NPY_UNUSED __attribute__ ((__unused__))
|
| 9 |
+
#elif defined(__clang__)
|
| 10 |
+
#define __COMP_NPY_UNUSED __attribute__ ((unused))
|
| 11 |
+
#else
|
| 12 |
+
#define __COMP_NPY_UNUSED
|
| 13 |
+
#endif
|
| 14 |
+
#endif
|
| 15 |
+
|
| 16 |
+
#if defined(__GNUC__) || defined(__ICC) || defined(__clang__)
|
| 17 |
+
#define NPY_DECL_ALIGNED(x) __attribute__ ((aligned (x)))
|
| 18 |
+
#elif defined(_MSC_VER)
|
| 19 |
+
#define NPY_DECL_ALIGNED(x) __declspec(align(x))
|
| 20 |
+
#else
|
| 21 |
+
#define NPY_DECL_ALIGNED(x)
|
| 22 |
+
#endif
|
| 23 |
+
|
| 24 |
+
/* Use this to tag a variable as not used. It will remove unused variable
|
| 25 |
+
* warning on support platforms (see __COM_NPY_UNUSED) and mangle the variable
|
| 26 |
+
* to avoid accidental use */
|
| 27 |
+
#define NPY_UNUSED(x) __NPY_UNUSED_TAGGED ## x __COMP_NPY_UNUSED
|
| 28 |
+
#define NPY_EXPAND(x) x
|
| 29 |
+
|
| 30 |
+
#define NPY_STRINGIFY(x) #x
|
| 31 |
+
#define NPY_TOSTRING(x) NPY_STRINGIFY(x)
|
| 32 |
+
|
| 33 |
+
#define NPY_CAT__(a, b) a ## b
|
| 34 |
+
#define NPY_CAT_(a, b) NPY_CAT__(a, b)
|
| 35 |
+
#define NPY_CAT(a, b) NPY_CAT_(a, b)
|
| 36 |
+
|
| 37 |
+
#endif /* NUMPY_CORE_INCLUDE_NUMPY_UTILS_H_ */
|
LTA_openwebtext_dualt/scripts/flowtext_score_decode_lab.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""Score decode-lab samples with an external causal LM in one model load."""
|
| 3 |
+
|
| 4 |
+
from __future__ import annotations
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import json
|
| 8 |
+
import math
|
| 9 |
+
from collections import defaultdict
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
|
| 12 |
+
import torch
|
| 13 |
+
import torch.nn.functional as F
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@torch.no_grad()
|
| 17 |
+
def score_texts(texts, model_name_or_path: str, batch_size: int, max_length: int, device: torch.device):
|
| 18 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 19 |
+
|
| 20 |
+
tok = AutoTokenizer.from_pretrained(model_name_or_path)
|
| 21 |
+
if tok.pad_token_id is None:
|
| 22 |
+
tok.pad_token = tok.eos_token
|
| 23 |
+
tok.pad_token_id = tok.eos_token_id
|
| 24 |
+
model = AutoModelForCausalLM.from_pretrained(model_name_or_path).to(device)
|
| 25 |
+
if getattr(model.config, "pad_token_id", None) is None and tok.pad_token_id is not None:
|
| 26 |
+
model.config.pad_token_id = tok.pad_token_id
|
| 27 |
+
model.eval()
|
| 28 |
+
|
| 29 |
+
out = []
|
| 30 |
+
for start in range(0, len(texts), batch_size):
|
| 31 |
+
batch = texts[start : start + batch_size]
|
| 32 |
+
enc = tok(
|
| 33 |
+
batch,
|
| 34 |
+
return_tensors="pt",
|
| 35 |
+
return_attention_mask=True,
|
| 36 |
+
return_token_type_ids=False,
|
| 37 |
+
padding=True,
|
| 38 |
+
truncation=True,
|
| 39 |
+
max_length=max_length,
|
| 40 |
+
).to(device)
|
| 41 |
+
input_ids = enc["input_ids"]
|
| 42 |
+
attention_mask = enc["attention_mask"].bool()
|
| 43 |
+
if input_ids.size(1) < 2:
|
| 44 |
+
out.extend([(None, None, 0) for _ in batch])
|
| 45 |
+
continue
|
| 46 |
+
logits = model(input_ids=input_ids, attention_mask=attention_mask).logits.transpose(-1, -2)
|
| 47 |
+
token_nll = F.cross_entropy(logits[..., :-1].float(), input_ids[..., 1:], reduction="none")
|
| 48 |
+
if tok.eos_token_id is not None:
|
| 49 |
+
first_eos = (input_ids == tok.eos_token_id).cumsum(-1) == 1
|
| 50 |
+
token_mask = input_ids != tok.eos_token_id
|
| 51 |
+
shift_mask = (first_eos[..., 1:] | token_mask[..., 1:]) & attention_mask[..., 1:]
|
| 52 |
+
else:
|
| 53 |
+
shift_mask = attention_mask[..., 1:]
|
| 54 |
+
for row in range(input_ids.size(0)):
|
| 55 |
+
mask = shift_mask[row]
|
| 56 |
+
count = int(mask.sum().detach().cpu())
|
| 57 |
+
if count <= 0:
|
| 58 |
+
out.append((None, None, 0))
|
| 59 |
+
continue
|
| 60 |
+
nll = float(token_nll[row][mask].sum().detach().cpu()) / count
|
| 61 |
+
out.append((float(math.exp(min(nll, 50.0))), nll, count))
|
| 62 |
+
return out
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def main():
|
| 66 |
+
parser = argparse.ArgumentParser()
|
| 67 |
+
parser.add_argument("--input", required=True)
|
| 68 |
+
parser.add_argument("--scorer", required=True)
|
| 69 |
+
parser.add_argument("--batch_size", type=int, default=4)
|
| 70 |
+
parser.add_argument("--max_length", type=int, default=512)
|
| 71 |
+
parser.add_argument("--topk", type=int, default=12)
|
| 72 |
+
parser.add_argument("--output", default="")
|
| 73 |
+
args = parser.parse_args()
|
| 74 |
+
|
| 75 |
+
rows = [json.loads(x) for x in Path(args.input).open() if x.strip()]
|
| 76 |
+
samples = [r for r in rows if r.get("type") == "sample"]
|
| 77 |
+
texts = [r["text"] for r in samples]
|
| 78 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 79 |
+
print(f"[info] scoring {len(texts)} samples on {device} with {args.scorer}", flush=True)
|
| 80 |
+
scored = score_texts(texts, args.scorer, args.batch_size, args.max_length, device)
|
| 81 |
+
|
| 82 |
+
enriched = []
|
| 83 |
+
for row, (ppl, nll, count) in zip(samples, scored):
|
| 84 |
+
rec = dict(row)
|
| 85 |
+
rec["external_ppl"] = ppl
|
| 86 |
+
rec["external_nll"] = nll
|
| 87 |
+
rec["external_tokens"] = count
|
| 88 |
+
# Conservative selection: prioritize samples that are both non-collapsed
|
| 89 |
+
# and plausible under an external LM.
|
| 90 |
+
rec["combined_score"] = float(rec["quality"]) - (0.08 * float(nll) if nll is not None else 10.0)
|
| 91 |
+
enriched.append(rec)
|
| 92 |
+
|
| 93 |
+
by_label = defaultdict(list)
|
| 94 |
+
for rec in enriched:
|
| 95 |
+
by_label[rec["config"]["label"]].append(rec)
|
| 96 |
+
print("\n== Config Summary ==")
|
| 97 |
+
for label, group in sorted(by_label.items()):
|
| 98 |
+
valid = [g for g in group if g["external_ppl"] is not None]
|
| 99 |
+
mean_ppl = sum(g["external_ppl"] for g in valid) / max(len(valid), 1)
|
| 100 |
+
mean_nll = sum(g["external_nll"] for g in valid) / max(len(valid), 1)
|
| 101 |
+
mean_quality = sum(float(g["quality"]) for g in group) / max(len(group), 1)
|
| 102 |
+
best = max(group, key=lambda g: g["combined_score"])
|
| 103 |
+
print(
|
| 104 |
+
f"{label:24s} mean_quality={mean_quality:7.4f} "
|
| 105 |
+
f"mean_ppl={mean_ppl:8.2f} mean_nll={mean_nll:6.3f} "
|
| 106 |
+
f"best_prompt={best['prompt']!r} best_combined={best['combined_score']:.4f}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
print("\n== Top Samples ==")
|
| 110 |
+
enriched.sort(key=lambda r: r["combined_score"], reverse=True)
|
| 111 |
+
for rec in enriched[: args.topk]:
|
| 112 |
+
print(
|
| 113 |
+
"\n"
|
| 114 |
+
+ "=" * 96
|
| 115 |
+
+ f"\nlabel={rec['config']['label']} prompt={rec['prompt']!r} "
|
| 116 |
+
+ f"quality={rec['quality']:.4f} ppl={rec['external_ppl']:.2f} "
|
| 117 |
+
+ f"rep3={rec['rep3']:.3f} d2={rec['distinct2']:.3f} combined={rec['combined_score']:.4f}\n"
|
| 118 |
+
+ rec["text"]
|
| 119 |
+
)
|
| 120 |
+
|
| 121 |
+
if args.output:
|
| 122 |
+
with Path(args.output).open("w") as f:
|
| 123 |
+
for rec in enriched:
|
| 124 |
+
f.write(json.dumps(rec, ensure_ascii=False) + "\n")
|
| 125 |
+
print(f"\n[done] wrote {args.output}")
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
main()
|
LTA_openwebtext_dualt/scripts/launch_lta_owt_from_lm1b_c1024_4gpu.sh
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
|
| 5 |
+
|
| 6 |
+
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0,1,2,3}
|
| 7 |
+
export OMP_NUM_THREADS=${OMP_NUM_THREADS:-1}
|
| 8 |
+
|
| 9 |
+
RUN_NAME=${RUN_NAME:-lta_owt_distilbert_len1024_init_lm1b1m_posemb_repeat_fully_c1024_adamw_gbs512_4gpu_20k_$(date +%Y%m%d_%H%M%S)}
|
| 10 |
+
SAVE_DIR=${SAVE_DIR:-runs_transfer/${RUN_NAME}}
|
| 11 |
+
LOG_DIR=${LOG_DIR:-logs/owt_from_lm1b_c1024_4gpu}
|
| 12 |
+
mkdir -p "${LOG_DIR}"
|
| 13 |
+
|
| 14 |
+
LM1B_CKPT=${LM1B_CKPT:-runs/lta_lm1b_dirichlet_categorical_fullvocab_c1024_fullycoupled_flmpack_onehot_hardce_ddit_small_len128_gbs512_8gpu_1m_nw0/step_1000000.pt}
|
| 15 |
+
TOTAL_STEPS=${TOTAL_STEPS:-20000}
|
| 16 |
+
PER_GPU_BATCH_SIZE=${PER_GPU_BATCH_SIZE:-16}
|
| 17 |
+
GLOBAL_BATCH_SIZE=${GLOBAL_BATCH_SIZE:-512}
|
| 18 |
+
MASTER_PORT=${MASTER_PORT:-32043}
|
| 19 |
+
|
| 20 |
+
python -m torch.distributed.run --nproc_per_node=4 --master_port="${MASTER_PORT}" train.py \
|
| 21 |
+
--data_path /e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext \
|
| 22 |
+
--openwebtext_split train_minus_100k \
|
| 23 |
+
--text_column text \
|
| 24 |
+
--detokenizer auto \
|
| 25 |
+
--tokenizer_path /e2e-data/evad-tech-vla/wanghan58/workspace/imagenet_handoff_20260327/nlp_dts_light/assets/distilbert-base-uncased/tokenizer.json \
|
| 26 |
+
--save_dir "${SAVE_DIR}" \
|
| 27 |
+
--wrap \
|
| 28 |
+
--wrap_mode stream \
|
| 29 |
+
--max_len 1024 \
|
| 30 |
+
--batch_size "${PER_GPU_BATCH_SIZE}" \
|
| 31 |
+
--global_batch_size "${GLOBAL_BATCH_SIZE}" \
|
| 32 |
+
--num_workers 4 \
|
| 33 |
+
--dataloader_prefetch_factor 4 \
|
| 34 |
+
--total_steps "${TOTAL_STEPS}" \
|
| 35 |
+
--warmup_steps 2500 \
|
| 36 |
+
--log_every 50 \
|
| 37 |
+
--eval_every 0 \
|
| 38 |
+
--save_every 1000 \
|
| 39 |
+
--latest_every 500 \
|
| 40 |
+
--init_model_path "${LM1B_CKPT}" \
|
| 41 |
+
--init_pos_embed_mode repeat \
|
| 42 |
+
--lr 0.0003 \
|
| 43 |
+
--lr_schedule constant_warmup \
|
| 44 |
+
--weight_decay 0.0 \
|
| 45 |
+
--adam_beta1 0.9 \
|
| 46 |
+
--adam_beta2 0.999 \
|
| 47 |
+
--adam_eps 1e-8 \
|
| 48 |
+
--grad_clip 1.0 \
|
| 49 |
+
--d_model 768 \
|
| 50 |
+
--cond_dim 128 \
|
| 51 |
+
--n_layers 12 \
|
| 52 |
+
--n_heads 12 \
|
| 53 |
+
--dim_ff 3072 \
|
| 54 |
+
--dropout 0.1 \
|
| 55 |
+
--model_type ddit \
|
| 56 |
+
--state_format prob \
|
| 57 |
+
--bridge dirichlet \
|
| 58 |
+
--target_loss hard_ce \
|
| 59 |
+
--target_prob 1.0 \
|
| 60 |
+
--min_t 0.0 \
|
| 61 |
+
--max_t 1.0 \
|
| 62 |
+
--dual_t \
|
| 63 |
+
--corrupt_t_mode same \
|
| 64 |
+
--min_mask_ratio 0.1 \
|
| 65 |
+
--max_mask_ratio 1.0 \
|
| 66 |
+
--wrong_token_replace_prob 1.0 \
|
| 67 |
+
--wrong_token_schedule linear_t \
|
| 68 |
+
--wrong_token_exp_k 1.0 \
|
| 69 |
+
--dirichlet_concentration_min 1.0 \
|
| 70 |
+
--dirichlet_concentration_max 1024 \
|
| 71 |
+
--dirichlet_endpoint_mode categorical_dual_t \
|
| 72 |
+
--dirichlet_semantic_t_mode same \
|
| 73 |
+
--categorical_wrong_from_full_vocab \
|
| 74 |
+
--simplex_bridge_sampler dirichlet \
|
| 75 |
+
--infer_steps 128 \
|
| 76 |
+
--decode_damping 1.0 \
|
| 77 |
+
--max_gamma 1.0 \
|
| 78 |
+
--decode_solver flowmap \
|
| 79 |
+
--noise_init logistic_normal \
|
| 80 |
+
--bridge_noise_init logistic_normal \
|
| 81 |
+
--noise_sigma -1 \
|
| 82 |
+
--allow_tf32 \
|
| 83 |
+
--ddp_gradient_as_bucket_view \
|
| 84 |
+
--bf16 \
|
| 85 |
+
2>&1 | tee "${LOG_DIR}/${RUN_NAME}.log"
|
LTA_openwebtext_dualt/scripts/launch_lta_wmt14_deen_fullycoupled_4gpu_smoke.sh
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
|
| 5 |
+
export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
|
| 6 |
+
export TOKENIZERS_PARALLELISM=false
|
| 7 |
+
export PYTHONUNBUFFERED=1
|
| 8 |
+
export OMP_NUM_THREADS="${OMP_NUM_THREADS:-1}"
|
| 9 |
+
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0,1,2,3}"
|
| 10 |
+
|
| 11 |
+
RUN_NAME="${RUN_NAME:-smoke_lta_wmt14_deen_t5_len128_fullycoupled_elfparams_4gpu_$(date +%Y%m%d_%H%M%S)}"
|
| 12 |
+
SAVE_DIR="${SAVE_DIR:-runs/${RUN_NAME}}"
|
| 13 |
+
LOG_DIR="${LOG_DIR:-logs/wmt14_deen_fullycoupled_smoke}"
|
| 14 |
+
LOG_FILE="${LOG_FILE:-${LOG_DIR}/${RUN_NAME}.log}"
|
| 15 |
+
mkdir -p "${LOG_DIR}" "${SAVE_DIR}"
|
| 16 |
+
|
| 17 |
+
DATA_ROOT="${DATA_ROOT:-/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/elf}"
|
| 18 |
+
DATA_PATH="${DATA_PATH:-${DATA_ROOT}/wmt14_de-en_train_t5}"
|
| 19 |
+
EVAL_DATA_PATH="${EVAL_DATA_PATH:-${DATA_ROOT}/wmt14_de-en_validation_t5}"
|
| 20 |
+
DATASET_CACHE_DIR="${DATASET_CACHE_DIR:-/e2e-data/evad-tech-vla/wanghan58/data/hf_cache}"
|
| 21 |
+
TOKENIZER_PATH="${TOKENIZER_PATH:-/e2e-data/evad-tech-vla/wanghan58/models/hf/t5-small/tokenizer.json}"
|
| 22 |
+
|
| 23 |
+
NPROC_PER_NODE="${NPROC_PER_NODE:-4}"
|
| 24 |
+
MASTER_PORT="${MASTER_PORT:-32072}"
|
| 25 |
+
GLOBAL_BATCH_SIZE="${GLOBAL_BATCH_SIZE:-512}"
|
| 26 |
+
PER_GPU_BATCH_SIZE="${PER_GPU_BATCH_SIZE:-32}"
|
| 27 |
+
TOTAL_STEPS="${TOTAL_STEPS:-50}"
|
| 28 |
+
WARMUP_STEPS="${WARMUP_STEPS:-20}"
|
| 29 |
+
MAX_RECORDS="${MAX_RECORDS:-4096}"
|
| 30 |
+
|
| 31 |
+
if [[ ! -e "${DATA_PATH}" ]]; then
|
| 32 |
+
echo "Missing WMT14 De-En train dataset at ${DATA_PATH}" >&2
|
| 33 |
+
echo "Stage embedded-language-flows/wmt14_de-en_train_t5 locally, then rerun." >&2
|
| 34 |
+
exit 2
|
| 35 |
+
fi
|
| 36 |
+
|
| 37 |
+
python -m torch.distributed.run --nproc_per_node="${NPROC_PER_NODE}" --master_port="${MASTER_PORT}" train.py \
|
| 38 |
+
--elf_conditional_hf \
|
| 39 |
+
--data_path "${DATA_PATH}" \
|
| 40 |
+
--eval_data_path "${EVAL_DATA_PATH}" \
|
| 41 |
+
--dataset_cache_dir "${DATASET_CACHE_DIR}" \
|
| 42 |
+
--tokenizer_path "${TOKENIZER_PATH}" \
|
| 43 |
+
--save_dir "${SAVE_DIR}" \
|
| 44 |
+
--max_len 128 \
|
| 45 |
+
--max_input_len 64 \
|
| 46 |
+
--conditional_pad_token eos \
|
| 47 |
+
--label_drop_prob 0.1 \
|
| 48 |
+
--max_records "${MAX_RECORDS}" \
|
| 49 |
+
--batch_size "${PER_GPU_BATCH_SIZE}" \
|
| 50 |
+
--global_batch_size "${GLOBAL_BATCH_SIZE}" \
|
| 51 |
+
--num_workers 4 \
|
| 52 |
+
--dataloader_prefetch_factor 4 \
|
| 53 |
+
--total_steps "${TOTAL_STEPS}" \
|
| 54 |
+
--warmup_steps "${WARMUP_STEPS}" \
|
| 55 |
+
--log_every 10 \
|
| 56 |
+
--eval_every 0 \
|
| 57 |
+
--save_every 0 \
|
| 58 |
+
--latest_every 25 \
|
| 59 |
+
--optimizer muon \
|
| 60 |
+
--lr 0.001 \
|
| 61 |
+
--lr_schedule constant_warmup \
|
| 62 |
+
--min_lr 0 \
|
| 63 |
+
--weight_decay 0.0 \
|
| 64 |
+
--adam_beta1 0.9 \
|
| 65 |
+
--adam_beta2 0.95 \
|
| 66 |
+
--adam_eps 1e-8 \
|
| 67 |
+
--muon_momentum 0.95 \
|
| 68 |
+
--muon_ns_steps 5 \
|
| 69 |
+
--muon_update_scale 1.0 \
|
| 70 |
+
--ema_decay 0.9999 \
|
| 71 |
+
--ema_start_step 0 \
|
| 72 |
+
--grad_clip 1.0 \
|
| 73 |
+
--seed 42 \
|
| 74 |
+
--d_model 768 \
|
| 75 |
+
--cond_dim 128 \
|
| 76 |
+
--n_layers 12 \
|
| 77 |
+
--n_heads 12 \
|
| 78 |
+
--dim_ff 3072 \
|
| 79 |
+
--dropout 0.1 \
|
| 80 |
+
--model_type ddit \
|
| 81 |
+
--state_format prob \
|
| 82 |
+
--bridge dirichlet \
|
| 83 |
+
--target_loss hard_ce \
|
| 84 |
+
--target_prob 1.0 \
|
| 85 |
+
--min_t 0.05 \
|
| 86 |
+
--max_t 1.0 \
|
| 87 |
+
--dual_t \
|
| 88 |
+
--corrupt_t_mode same \
|
| 89 |
+
--corrupt_min_t 0.05 \
|
| 90 |
+
--corrupt_max_t 1.0 \
|
| 91 |
+
--min_mask_ratio 0.1 \
|
| 92 |
+
--max_mask_ratio 1.0 \
|
| 93 |
+
--wrong_token_replace_prob 1.0 \
|
| 94 |
+
--wrong_token_schedule linear_t \
|
| 95 |
+
--wrong_token_exp_k 1.0 \
|
| 96 |
+
--dirichlet_concentration_min 1.0 \
|
| 97 |
+
--dirichlet_concentration_max 1024.0 \
|
| 98 |
+
--dirichlet_endpoint_mode categorical_dual_t \
|
| 99 |
+
--dirichlet_semantic_t_mode same \
|
| 100 |
+
--dirichlet_semantic_t_value 0.0 \
|
| 101 |
+
--categorical_wrong_from_full_vocab \
|
| 102 |
+
--simplex_bridge_sampler dirichlet \
|
| 103 |
+
--infer_steps 128 \
|
| 104 |
+
--decode_damping 1.0 \
|
| 105 |
+
--max_gamma 1.0 \
|
| 106 |
+
--decode_solver flowmap \
|
| 107 |
+
--noise_init logistic_normal \
|
| 108 |
+
--bridge_noise_init logistic_normal \
|
| 109 |
+
--noise_sigma -1 \
|
| 110 |
+
--allow_tf32 \
|
| 111 |
+
--ddp_gradient_as_bucket_view \
|
| 112 |
+
--bf16 \
|
| 113 |
+
2>&1 | tee -a "${LOG_FILE}"
|
LTA_openwebtext_dualt/scripts/run_lta_owt_bert_absrope_time4_dirichlet_len1024_C1_to_1024_8gpu_1m_mask1_sameT_save10k.sh
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
|
| 5 |
+
|
| 6 |
+
# OWT raw text + BERT tokenizer, FLM wrapped stream:
|
| 7 |
+
# [CLS] + 1022 payload tokens + [SEP]
|
| 8 |
+
#
|
| 9 |
+
# Backbone:
|
| 10 |
+
# ddit_elf = RMSNorm/SwiGLU/QK-norm + RoPE + 4 prefix time tokens.
|
| 11 |
+
# We also add learned absolute position embeddings before RoPE.
|
| 12 |
+
#
|
| 13 |
+
# Bridge:
|
| 14 |
+
# Dirichlet C=1->1024, mask_ratio=1.0, model t and corruption t are shared.
|
| 15 |
+
|
| 16 |
+
export DATA_PATH="${DATA_PATH:-/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext}"
|
| 17 |
+
export TEXT_COLUMN="${TEXT_COLUMN:-text}"
|
| 18 |
+
export OPENWEBTEXT_SPLIT="${OPENWEBTEXT_SPLIT:-train_minus_100k}"
|
| 19 |
+
export TOKENIZER_PATH="${TOKENIZER_PATH:-/e2e-data/evad-tech-vla/wanghan58/workspace/imagenet_handoff_20260327/nlp_dts_light/assets/distilbert-base-uncased/tokenizer.json}"
|
| 20 |
+
export TOKENIZED_HF=0
|
| 21 |
+
export WRAP_MODE="${WRAP_MODE:-stream}"
|
| 22 |
+
|
| 23 |
+
export VOCAB_SIZE="${VOCAB_SIZE:-30522}"
|
| 24 |
+
export CMIN="${CMIN:-1}"
|
| 25 |
+
export CMAX="${CMAX:-1024}"
|
| 26 |
+
|
| 27 |
+
export MODEL_TYPE=ddit_elf
|
| 28 |
+
export ELF_NUM_TIME_TOKENS="${ELF_NUM_TIME_TOKENS:-4}"
|
| 29 |
+
export ELF_NUM_MODEL_MODE_TOKENS="${ELF_NUM_MODEL_MODE_TOKENS:-0}"
|
| 30 |
+
export QK_NORM="${QK_NORM:-1}"
|
| 31 |
+
export ABS_POS_EMBED=1
|
| 32 |
+
export CORRUPT_T_MODE=same
|
| 33 |
+
export MIN_MASK_RATIO=1.0
|
| 34 |
+
export MAX_MASK_RATIO=1.0
|
| 35 |
+
export CATEGORICAL_WRONG_PROB_FLOOR="${CATEGORICAL_WRONG_PROB_FLOOR:-0.0}"
|
| 36 |
+
|
| 37 |
+
_ngpus_avail=$(nvidia-smi --query-gpu=index --format=csv,noheader 2>/dev/null | wc -l || echo 1)
|
| 38 |
+
if [[ "${_ngpus_avail}" -le 0 ]]; then _ngpus_avail=1; fi
|
| 39 |
+
_default_cvd=$(seq -s, 0 $((_ngpus_avail - 1)))
|
| 40 |
+
export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-${_default_cvd}}"
|
| 41 |
+
IFS=',' read -ra _cvd_arr <<< "${CUDA_VISIBLE_DEVICES}"
|
| 42 |
+
export NPROC_PER_NODE="${NPROC_PER_NODE:-${#_cvd_arr[@]}}"
|
| 43 |
+
unset _ngpus_avail _default_cvd _cvd_arr
|
| 44 |
+
export NNODES="${NNODES:-${MLP_WORKER_NUM:-1}}"
|
| 45 |
+
export NODE_RANK="${NODE_RANK:-${MLP_ROLE_INDEX:-0}}"
|
| 46 |
+
export MASTER_ADDR="${MASTER_ADDR:-${MLP_WORKER_0_HOST:-127.0.0.1}}"
|
| 47 |
+
export MASTER_PORT="${MASTER_PORT:-${MLP_WORKER_0_PORT:-29500}}"
|
| 48 |
+
export GLOBAL_BATCH_SIZE="${GLOBAL_BATCH_SIZE:-512}"
|
| 49 |
+
export PER_GPU_BATCH_SIZE="${PER_GPU_BATCH_SIZE:-32}"
|
| 50 |
+
export TOTAL_STEPS="${TOTAL_STEPS:-1000000}"
|
| 51 |
+
export WARMUP_STEPS="${WARMUP_STEPS:-2500}"
|
| 52 |
+
export SAVE_EVERY="${SAVE_EVERY:-10000}"
|
| 53 |
+
export LATEST_EVERY="${LATEST_EVERY:-1000}"
|
| 54 |
+
export LOG_EVERY="${LOG_EVERY:-100}"
|
| 55 |
+
|
| 56 |
+
export DATE_TAG="${DATE_TAG:-$(date +%Y%m%d)}"
|
| 57 |
+
export RUN_NAME="${RUN_NAME:-lta_owt_bert_absrope_time4_dirichlet_len1024_C1_to_1024_mask1_sameT_gbs512_b32_8gpu_1m_save10k_${DATE_TAG}}"
|
| 58 |
+
|
| 59 |
+
export WATCH_ENABLED="${WATCH_ENABLED:-1}"
|
| 60 |
+
export WATCH_STEP_INTERVAL="${WATCH_STEP_INTERVAL:-10000}"
|
| 61 |
+
export WATCH_N_SAMPLES="${WATCH_N_SAMPLES:-128}"
|
| 62 |
+
export WATCH_CUDA_VISIBLE_DEVICES="${WATCH_CUDA_VISIBLE_DEVICES:-7}"
|
| 63 |
+
export WATCH_DECODE_MODE="${WATCH_DECODE_MODE:-dual_line_probe}"
|
| 64 |
+
export WATCH_DECODE_BATCH="${WATCH_DECODE_BATCH:-1}"
|
| 65 |
+
export WATCH_DUAL_SEMANTIC_POWER="${WATCH_DUAL_SEMANTIC_POWER:-1.5}"
|
| 66 |
+
export WATCH_DUAL_EARLY_TEMP="${WATCH_DUAL_EARLY_TEMP:-2.8}"
|
| 67 |
+
export WATCH_DUAL_LATE_TEMP="${WATCH_DUAL_LATE_TEMP:-1.45}"
|
| 68 |
+
export WATCH_DUAL_TEMP_END="${WATCH_DUAL_TEMP_END:-0.55}"
|
| 69 |
+
export WATCH_DUAL_TEMP_POWER="${WATCH_DUAL_TEMP_POWER:-1.5}"
|
| 70 |
+
export WATCH_ENDPOINT_TEMP="${WATCH_ENDPOINT_TEMP:-1.45}"
|
| 71 |
+
export WATCH_ENDPOINT_TOP_P="${WATCH_ENDPOINT_TOP_P:-0.95}"
|
| 72 |
+
export WATCH_GUMBEL_TAU_START="${WATCH_GUMBEL_TAU_START:-1.0}"
|
| 73 |
+
export WATCH_GUMBEL_TAU_END="${WATCH_GUMBEL_TAU_END:-0.2}"
|
| 74 |
+
export WATCH_OUT_BASE="${WATCH_OUT_BASE:-docs/lta_samples/metrics_${DATE_TAG}/owt_bert_absrope_time4_C1_to_1024_mask1_sameT_dualline_dirres_c${CMIN}_${CMAX}_n${WATCH_N_SAMPLES}/${RUN_NAME}}"
|
| 75 |
+
export WATCH_LOG_DIR="${WATCH_LOG_DIR:-logs/owt_bert_absrope_time4_C1_to_1024_mask1_sameT_dualline_watch}"
|
| 76 |
+
|
| 77 |
+
bash scripts/run_lta_owt_dirichlet_len1024_Cv_to_2v_8gpu_save1k_with_gumbel_watch.sh
|
LTA_openwebtext_dualt/scripts/tmp_run_three_quick_infer_20260525.sh
ADDED
|
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
cd /e2e-data/evad-tech-vla/wanghan58/workspace/LTA_openwebtext_dualt
|
| 5 |
+
export PYTHONPATH="$(pwd)${PYTHONPATH:+:$PYTHONPATH}"
|
| 6 |
+
export TOKENIZERS_PARALLELISM=false
|
| 7 |
+
export PYTHONUNBUFFERED=1
|
| 8 |
+
|
| 9 |
+
run_sde_quick() {
|
| 10 |
+
local ckpt="$1"
|
| 11 |
+
local tok="$2"
|
| 12 |
+
local out_dir="$3"
|
| 13 |
+
local cmin="$4"
|
| 14 |
+
local cmax="$5"
|
| 15 |
+
local name="$6"
|
| 16 |
+
CUDA_VISIBLE_DEVICES=0 python - "$ckpt" "$tok" "$out_dir" "$cmin" "$cmax" "$name" <<'PY'
|
| 17 |
+
import json
|
| 18 |
+
import re
|
| 19 |
+
import sys
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
|
| 22 |
+
import torch
|
| 23 |
+
|
| 24 |
+
from flowtext_lab.genppl import summarize_token_diversity
|
| 25 |
+
from flowtext_lab.tokenization import BpeTextTokenizer
|
| 26 |
+
from scripts.eval_lm1b_c1024_fullycoupled_sde_genppl import build_model, decode_sde
|
| 27 |
+
|
| 28 |
+
ckpt_path, tok_path, out_dir_s, cmin_s, cmax_s, name = sys.argv[1:7]
|
| 29 |
+
out_dir = Path(out_dir_s)
|
| 30 |
+
out_dir.mkdir(parents=True, exist_ok=True)
|
| 31 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 32 |
+
|
| 33 |
+
print(f"[{name}] load {ckpt_path}", flush=True)
|
| 34 |
+
ckpt = torch.load(ckpt_path, map_location="cpu", weights_only=False)
|
| 35 |
+
print(f"[{name}] step={ckpt.get('step')}", flush=True)
|
| 36 |
+
tok = BpeTextTokenizer.from_file(tok_path)
|
| 37 |
+
model = build_model(ckpt, tok, device)
|
| 38 |
+
|
| 39 |
+
ids, texts, cfg = decode_sde(
|
| 40 |
+
model,
|
| 41 |
+
tok,
|
| 42 |
+
n_samples=8,
|
| 43 |
+
batch_size=1,
|
| 44 |
+
max_len=1024,
|
| 45 |
+
steps=128,
|
| 46 |
+
seed=20260525,
|
| 47 |
+
device=device,
|
| 48 |
+
concentration_min=float(cmin_s),
|
| 49 |
+
concentration_max=float(cmax_s),
|
| 50 |
+
endpoint_temp=1.45,
|
| 51 |
+
endpoint_projection="gumbel_softmax",
|
| 52 |
+
endpoint_top_p=0.95,
|
| 53 |
+
gumbel_tau_start=1.0,
|
| 54 |
+
gumbel_tau_end=0.2,
|
| 55 |
+
model_t_mode="support_t",
|
| 56 |
+
mean_mode="endpoint_only",
|
| 57 |
+
semantic_power=1.0,
|
| 58 |
+
noise_init="dirichlet",
|
| 59 |
+
noise_dirichlet_concentration=float(cmin_s),
|
| 60 |
+
sde_resample="dirichlet",
|
| 61 |
+
final_from="blend_0.5",
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
def strip_special(text: str) -> str:
|
| 65 |
+
for special in ("[CLS]", "[SEP]", "[PAD]", "<pad>", "</s>", "<s>", "<unk>", "<|endoftext|>"):
|
| 66 |
+
text = text.replace(special, " ")
|
| 67 |
+
return re.sub(r"\s+", " ", text).strip()
|
| 68 |
+
|
| 69 |
+
stripped = [strip_special(text) for text in texts]
|
| 70 |
+
summary = {
|
| 71 |
+
"type": "summary",
|
| 72 |
+
"checkpoint": ckpt_path,
|
| 73 |
+
"step": int(ckpt.get("step", -1)),
|
| 74 |
+
"decode": cfg,
|
| 75 |
+
"diversity": summarize_token_diversity(ids).__dict__,
|
| 76 |
+
}
|
| 77 |
+
out_jsonl = out_dir / "sde_steps128_samples8_unscored.jsonl"
|
| 78 |
+
with out_jsonl.open("w", encoding="utf-8") as f:
|
| 79 |
+
f.write(json.dumps(summary, ensure_ascii=False) + "\n")
|
| 80 |
+
for i, (raw, clean) in enumerate(zip(texts, stripped)):
|
| 81 |
+
f.write(json.dumps({"type": "sample", "index": i, "raw_text": raw, "stripped_text": clean}, ensure_ascii=False) + "\n")
|
| 82 |
+
(out_dir / "first8.txt").write_text("\n\n--- SAMPLE ---\n\n".join(texts), encoding="utf-8")
|
| 83 |
+
(out_dir / "first8_stripped.txt").write_text("\n\n--- SAMPLE ---\n\n".join(stripped), encoding="utf-8")
|
| 84 |
+
print(f"[{name}] done {out_jsonl}", flush=True)
|
| 85 |
+
print(json.dumps(summary, ensure_ascii=False, indent=2), flush=True)
|
| 86 |
+
PY
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
T5_TOK=/e2e-data/evad-tech-vla/wanghan58/models/hf/t5-small/tokenizer.json
|
| 90 |
+
BERT_TOK=/e2e-data/evad-tech-vla/wanghan58/workspace/imagenet_handoff_20260327/nlp_dts_light/assets/distilbert-base-uncased/tokenizer.json
|
| 91 |
+
|
| 92 |
+
run_sde_quick \
|
| 93 |
+
runs/lta_owt_t5_absrope_adaln_dirichlet_len1024_Cv_to_2v_mask0p1_1p0_sameT_gbs512_b32_8gpu_1m_save10k_20260525/latest.pt \
|
| 94 |
+
"${T5_TOK}" \
|
| 95 |
+
docs/lta_samples/metrics_20260525/lta_owt_t5_absrope_adaln_Cv_to_2v_step26000_quick_n8 \
|
| 96 |
+
32100 64200 t5_Cv2V
|
| 97 |
+
|
| 98 |
+
CUDA_VISIBLE_DEVICES=0 python scripts/infer_softkl_decode_probe.py \
|
| 99 |
+
--checkpoint runs/lta_owt_t5_absrope_adaln_dirichlet_len1024_C1_to_1024_mask0p1_1p0_sameT_gbs512_b32_8gpu_1m_save10k_20260525/latest.pt \
|
| 100 |
+
--tokenizer_path "${T5_TOK}" \
|
| 101 |
+
--out_dir docs/lta_samples/metrics_20260525/lta_owt_t5_absrope_adaln_C1_to_1024_step26000_dualline_quick_n8 \
|
| 102 |
+
--max_lens 1024 \
|
| 103 |
+
--n_samples 8 \
|
| 104 |
+
--batch_size 1 \
|
| 105 |
+
--steps 128 \
|
| 106 |
+
--decode_rule dual_line_resample \
|
| 107 |
+
--c_min 1 \
|
| 108 |
+
--c_max 1024 \
|
| 109 |
+
--input_noise_dirichlet_concentration 1 \
|
| 110 |
+
--anchor_mode state \
|
| 111 |
+
--model_t_mode flow \
|
| 112 |
+
--time_schedule uniform \
|
| 113 |
+
--support_power 1.0 \
|
| 114 |
+
--semantic_power 1.5 \
|
| 115 |
+
--early_temp 2.8 \
|
| 116 |
+
--late_temp 1.45 \
|
| 117 |
+
--temp_end 0.55 \
|
| 118 |
+
--temp_power 1.5 \
|
| 119 |
+
--final_from blend \
|
| 120 |
+
--final_decode argmax \
|
| 121 |
+
--seed 20260525
|
| 122 |
+
cp \
|
| 123 |
+
docs/lta_samples/metrics_20260525/lta_owt_t5_absrope_adaln_C1_to_1024_step26000_dualline_quick_n8/context1024_samples.txt \
|
| 124 |
+
docs/lta_samples/metrics_20260525/lta_owt_t5_absrope_adaln_C1_to_1024_step26000_dualline_quick_n8/first8.txt
|
| 125 |
+
|
| 126 |
+
run_sde_quick \
|
| 127 |
+
runs/lta_owt_bert_absrope_adaln_dirichlet_len1024_Cv_to_2v_mask1_sameT_gbs512_b4x4_1m_save1k_watch_20260525/latest.pt \
|
| 128 |
+
"${BERT_TOK}" \
|
| 129 |
+
docs/lta_samples/metrics_20260525/lta_owt_bert_absrope_adaln_Cv_to_2v_mask1_sameT_step4000_quick_n8 \
|
| 130 |
+
30522 61044 bert_Cv2V_step4000
|
LTA_openwebtext_dualt/scripts/trace_lta_decode_steps.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
import argparse
|
| 5 |
+
import json
|
| 6 |
+
import sys
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import torch
|
| 10 |
+
import torch.nn.functional as F
|
| 11 |
+
|
| 12 |
+
REPO_ROOT = Path(__file__).resolve().parents[1]
|
| 13 |
+
if str(REPO_ROOT) not in sys.path:
|
| 14 |
+
sys.path.insert(0, str(REPO_ROOT))
|
| 15 |
+
|
| 16 |
+
from eval import build_model_from_ckpt
|
| 17 |
+
from flowtext_lab.decode import model_time_for_step, sample_noise_simplex, state_for_model
|
| 18 |
+
from flowtext_lab.tokenization import BpeTextTokenizer
|
| 19 |
+
from scripts.flowtext_decode_lab import decode_text, flowmap_gamma
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def parse_args() -> argparse.Namespace:
|
| 23 |
+
p = argparse.ArgumentParser()
|
| 24 |
+
p.add_argument("--checkpoint", required=True)
|
| 25 |
+
p.add_argument("--tokenizer_path", required=True)
|
| 26 |
+
p.add_argument("--output", required=True)
|
| 27 |
+
p.add_argument("--max_len", type=int, default=128)
|
| 28 |
+
p.add_argument("--steps", type=int, default=128)
|
| 29 |
+
p.add_argument("--seed", type=int, default=20260502)
|
| 30 |
+
p.add_argument("--sample_index", type=int, default=13)
|
| 31 |
+
p.add_argument("--endpoint_temp", type=float, default=1.8)
|
| 32 |
+
p.add_argument("--damping", type=float, default=1.0)
|
| 33 |
+
p.add_argument("--max_gamma", type=float, default=1.0)
|
| 34 |
+
p.add_argument("--eps", type=float, default=1e-8)
|
| 35 |
+
p.add_argument("--trace_steps", default="0,1,2,4,8,16,32,64,96,127")
|
| 36 |
+
p.add_argument("--token_positions", default="0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31")
|
| 37 |
+
return p.parse_args()
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def parse_ints(text: str) -> list[int]:
|
| 41 |
+
return [int(x) for x in text.split(",") if x.strip()]
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@torch.no_grad()
|
| 45 |
+
def main() -> None:
|
| 46 |
+
args = parse_args()
|
| 47 |
+
torch.manual_seed(args.seed)
|
| 48 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 49 |
+
tokenizer = BpeTextTokenizer.from_file(args.tokenizer_path)
|
| 50 |
+
ckpt = torch.load(args.checkpoint, map_location="cpu")
|
| 51 |
+
model = build_model_from_ckpt(ckpt, tokenizer.vocab_size, args.max_len, device)
|
| 52 |
+
model.eval()
|
| 53 |
+
|
| 54 |
+
# Reproduce sample_index by drawing that many + 1 initial noise samples with the same seed.
|
| 55 |
+
init = sample_noise_simplex(
|
| 56 |
+
(args.sample_index + 1, args.max_len),
|
| 57 |
+
tokenizer.vocab_size,
|
| 58 |
+
device,
|
| 59 |
+
args.eps,
|
| 60 |
+
noise_mode="dirichlet",
|
| 61 |
+
target_prob=1.0,
|
| 62 |
+
noise_sigma=-1.0,
|
| 63 |
+
dirichlet_concentration=1.0,
|
| 64 |
+
)[-1:].float()
|
| 65 |
+
attn = torch.ones((1, args.max_len), dtype=torch.bool, device=device)
|
| 66 |
+
trace_steps = set(parse_ints(args.trace_steps))
|
| 67 |
+
positions = parse_ints(args.token_positions)
|
| 68 |
+
records = []
|
| 69 |
+
|
| 70 |
+
probs = init.clone()
|
| 71 |
+
last_endpoint = probs
|
| 72 |
+
for step in range(args.steps):
|
| 73 |
+
t = model_time_for_step("flow", step, args.steps, 1, device, dtype=torch.float32)
|
| 74 |
+
logits = model(state_for_model(model, probs, args.eps), t, attn).float()
|
| 75 |
+
logits = logits / args.endpoint_temp
|
| 76 |
+
endpoint = F.softmax(logits, dim=-1)
|
| 77 |
+
last_endpoint = endpoint
|
| 78 |
+
gamma = flowmap_gamma(step, args.steps, args.damping, args.max_gamma, args.eps)
|
| 79 |
+
new_probs = probs + gamma * (endpoint - probs)
|
| 80 |
+
new_probs = new_probs.clamp_min(args.eps)
|
| 81 |
+
new_probs = new_probs / new_probs.sum(dim=-1, keepdim=True).clamp_min(args.eps)
|
| 82 |
+
probs = new_probs
|
| 83 |
+
|
| 84 |
+
if step in trace_steps:
|
| 85 |
+
top_prob, ids = probs[0].max(dim=-1)
|
| 86 |
+
ent = -(probs[0].clamp_min(args.eps) * probs[0].clamp_min(args.eps).log()).sum(dim=-1)
|
| 87 |
+
endpoint_top_prob, endpoint_ids = endpoint[0].max(dim=-1)
|
| 88 |
+
records.append({
|
| 89 |
+
"step": step,
|
| 90 |
+
"gamma": gamma,
|
| 91 |
+
"model_t": float(t.item()),
|
| 92 |
+
"text_prefix": decode_text(tokenizer, ids[:64].detach().cpu().tolist()),
|
| 93 |
+
"positions": [
|
| 94 |
+
{
|
| 95 |
+
"pos": pos,
|
| 96 |
+
"state_token": tokenizer.decode([int(ids[pos].item())], stop_at_eos=False, skip_special_tokens=False),
|
| 97 |
+
"state_id": int(ids[pos].item()),
|
| 98 |
+
"state_top_p": float(top_prob[pos].item()),
|
| 99 |
+
"state_entropy": float(ent[pos].item()),
|
| 100 |
+
"endpoint_token": tokenizer.decode([int(endpoint_ids[pos].item())], stop_at_eos=False, skip_special_tokens=False),
|
| 101 |
+
"endpoint_id": int(endpoint_ids[pos].item()),
|
| 102 |
+
"endpoint_top_p": float(endpoint_top_prob[pos].item()),
|
| 103 |
+
}
|
| 104 |
+
for pos in positions
|
| 105 |
+
if 0 <= pos < args.max_len
|
| 106 |
+
],
|
| 107 |
+
})
|
| 108 |
+
|
| 109 |
+
final_ids = probs[0].argmax(dim=-1).detach().cpu().tolist()
|
| 110 |
+
final_text = decode_text(tokenizer, final_ids)
|
| 111 |
+
output = Path(args.output)
|
| 112 |
+
if not output.is_absolute():
|
| 113 |
+
output = Path(args.checkpoint).resolve().parent / output
|
| 114 |
+
output.parent.mkdir(parents=True, exist_ok=True)
|
| 115 |
+
payload = {
|
| 116 |
+
"checkpoint": args.checkpoint,
|
| 117 |
+
"seed": args.seed,
|
| 118 |
+
"sample_index": args.sample_index,
|
| 119 |
+
"steps": args.steps,
|
| 120 |
+
"endpoint_temp": args.endpoint_temp,
|
| 121 |
+
"final_text": final_text,
|
| 122 |
+
"records": records,
|
| 123 |
+
}
|
| 124 |
+
output.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 125 |
+
print(json.dumps(payload, ensure_ascii=False, indent=2))
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
if __name__ == "__main__":
|
| 129 |
+
main()
|