JinghuiLuAstronaut commited on
Commit
9b6ae63
·
verified ·
1 Parent(s): 907970e

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. LTA_openwebtext_dualt/logs/elfopt_8gpu/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024.log +617 -0
  2. LTA_openwebtext_dualt/logs/fullycoupled_loss1mt_floor0p25_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_loss1mt_floor0p25_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260514_230726.log +0 -0
  3. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0010000_state_fromstate_t1p45.log +8 -0
  4. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0020000_state_fromstate_t1p45.log +8 -0
  5. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0030000_state_fromstate_t1p45.log +8 -0
  6. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0040000_state_fromstate_t1p45.log +8 -0
  7. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0050000_state_fromstate_t1p45.log +8 -0
  8. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt +5 -0
  9. LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/watch_every10k_state_t1p45.nohup.log +46 -0
  10. LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2.dirichlet_resample.eval.log +8 -0
  11. LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce.flowmap.eval.log +8 -0
  12. LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor.dirichlet_resample.eval.log +8 -0
  13. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/__init__.py +33 -0
  14. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/compat.py +1138 -0
  15. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/database.py +1359 -0
  16. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/index.py +508 -0
  17. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/locators.py +1303 -0
  18. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/manifest.py +384 -0
  19. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/markers.py +167 -0
  20. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/metadata.py +1068 -0
  21. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/resources.py +358 -0
  22. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/scripts.py +452 -0
  23. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/util.py +2025 -0
  24. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/version.py +751 -0
  25. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/wheel.py +1099 -0
  26. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/__init__.py +322 -0
  27. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/actions.py +217 -0
  28. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/common.py +432 -0
  29. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/core.py +0 -0
  30. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/diagram/__init__.py +656 -0
  31. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/exceptions.py +299 -0
  32. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/helpers.py +1100 -0
  33. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/results.py +796 -0
  34. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/testing.py +331 -0
  35. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/unicode.py +361 -0
  36. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/util.py +284 -0
  37. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/__init__.py +102 -0
  38. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_collections.py +337 -0
  39. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_version.py +2 -0
  40. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connection.py +572 -0
  41. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connectionpool.py +1132 -0
  42. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/exceptions.py +323 -0
  43. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/fields.py +274 -0
  44. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/filepost.py +98 -0
  45. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/poolmanager.py +537 -0
  46. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/request.py +191 -0
  47. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py +879 -0
  48. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/__init__.py +49 -0
  49. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/connection.py +149 -0
  50. LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/proxy.py +57 -0
LTA_openwebtext_dualt/logs/elfopt_8gpu/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024.log ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [rank6]:[W513 02:30:29.571793088 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
2
+ [rank2]:[W513 02:30:29.573074597 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
3
+ [rank0]:[W513 02:30:29.621398720 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
4
+ t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
5
+ t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
6
+ t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO cudaDriverVersion 12080
7
+ t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO NCCL version 2.25.1+cuda12.8
8
+ t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO Comm config Blocking set to 1
9
+ t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO cudaDriverVersion 12080
10
+ t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
11
+ t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO cudaDriverVersion 12080
12
+ t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
13
+ t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
14
+ t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO NCCL version 2.25.1+cuda12.8
15
+ t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
16
+ t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO NCCL version 2.25.1+cuda12.8
17
+ t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO Comm config Blocking set to 1
18
+ t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO Comm config Blocking set to 1
19
+ [rank1]:[W513 02:30:29.693479602 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
20
+ [rank3]:[W513 02:30:29.694131805 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
21
+ [rank7]:[W513 02:30:29.696815174 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
22
+ t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO cudaDriverVersion 12080
23
+ t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
24
+ t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
25
+ t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO NCCL version 2.25.1+cuda12.8
26
+ t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO cudaDriverVersion 12080
27
+ t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
28
+ t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
29
+ t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO NCCL version 2.25.1+cuda12.8
30
+ [rank5]:[W513 02:30:29.704393470 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
31
+ t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO Comm config Blocking set to 1
32
+ t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO Comm config Blocking set to 1
33
+ t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO cudaDriverVersion 12080
34
+ t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
35
+ t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
36
+ t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO NCCL version 2.25.1+cuda12.8
37
+ t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO Comm config Blocking set to 1
38
+ [rank4]:[W513 02:30:29.710692738 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
39
+ t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO cudaDriverVersion 12080
40
+ t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
41
+ t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
42
+ t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO NCCL version 2.25.1+cuda12.8
43
+ t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO Comm config Blocking set to 1
44
+ t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO cudaDriverVersion 12080
45
+ t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
46
+ t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
47
+ t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO NCCL version 2.25.1+cuda12.8
48
+ t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO Comm config Blocking set to 1
49
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
50
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
51
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
52
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO P2P plugin v9 IBext_v9
53
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
54
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
55
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
56
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
57
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO P2P plugin v9 IBext_v9
58
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
59
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
60
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
61
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
62
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO P2P plugin v9 IBext_v9
63
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
64
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
65
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
66
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
67
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO P2P plugin v9 IBext_v9
68
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
69
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
70
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
71
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
72
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO P2P plugin v9 IBext_v9
73
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
74
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
75
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
76
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
77
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO P2P plugin v9 IBext_v9
78
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
79
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
80
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
81
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
82
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO P2P plugin v9 IBext_v9
83
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
84
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
85
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
86
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
87
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO P2P plugin v9 IBext_v9
88
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
89
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
90
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
91
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
92
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Using network IBext_v9
93
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
94
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
95
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
96
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Using network IBext_v9
97
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
98
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
99
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
100
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Using network IBext_v9
101
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
102
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
103
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
104
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
105
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
106
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Using network IBext_v9
107
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
108
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Using network IBext_v9
109
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
110
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
111
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
112
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Using network IBext_v9
113
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
114
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
115
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
116
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Using network IBext_v9
117
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
118
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
119
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
120
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Using network IBext_v9
121
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO ncclCommInitRankConfig comm 0x98ac490 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 69020 commId 0xc3d8f44253f33569 - Init START
122
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO ncclCommInitRankConfig comm 0xbd43ac0 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 65040 commId 0xc3d8f44253f33569 - Init START
123
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO ncclCommInitRankConfig comm 0xa99de10 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId 73020 commId 0xc3d8f44253f33569 - Init START
124
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO ncclCommInitRankConfig comm 0x98a1870 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 67020 commId 0xc3d8f44253f33569 - Init START
125
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO RAS client listening socket at ::1<28028>
126
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO ncclCommInitRankConfig comm 0x9b15700 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 6b020 commId 0xc3d8f44253f33569 - Init START
127
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO RAS client listening socket at ::1<28028>
128
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO ncclCommInitRankConfig comm 0xaa777e0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId 75020 commId 0xc3d8f44253f33569 - Init START
129
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO RAS client listening socket at ::1<28028>
130
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO RAS client listening socket at ::1<28028>
131
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO ncclCommInitRankConfig comm 0xa87b0d0 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 71020 commId 0xc3d8f44253f33569 - Init START
132
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO RAS client listening socket at ::1<28028>
133
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO ncclCommInitRankConfig comm 0xa1cc500 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 6f020 commId 0xc3d8f44253f33569 - Init START
134
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO RAS client listening socket at ::1<28028>
135
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO RAS client listening socket at ::1<28028>
136
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO RAS client listening socket at ::1<28028>
137
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Bootstrap timings total 0.008778 (create 0.000020, send 0.000074, recv 0.008247, ring 0.000142, delay 0.000000)
138
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Bootstrap timings total 0.011209 (create 0.000023, send 0.000069, recv 0.000102, ring 0.010580, delay 0.000001)
139
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Bootstrap timings total 0.000624 (create 0.000023, send 0.000070, recv 0.000111, ring 0.000105, delay 0.000000)
140
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Bootstrap timings total 0.002691 (create 0.000020, send 0.000076, recv 0.000032, ring 0.000105, delay 0.000000)
141
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Bootstrap timings total 0.022499 (create 0.000026, send 0.000074, recv 0.017801, ring 0.002214, delay 0.000001)
142
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Bootstrap timings total 0.078898 (create 0.000026, send 0.000068, recv 0.070159, ring 0.008328, delay 0.000000)
143
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Bootstrap timings total 0.004769 (create 0.000021, send 0.000079, recv 0.000076, ring 0.004295, delay 0.000000)
144
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Bootstrap timings total 0.072414 (create 0.000030, send 0.000069, recv 0.061280, ring 0.004294, delay 0.000001)
145
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO MNNVL busId 0x67020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
146
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO MNNVL busId 0x6b020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
147
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO MNNVL busId 0x75020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
148
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO MNNVL busId 0x6f020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
149
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO MNNVL busId 0x65040 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
150
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO MNNVL busId 0x69020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
151
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO MNNVL busId 0x71020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
152
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO MNNVL busId 0x73020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
153
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
154
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
155
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
156
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
157
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
158
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
159
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
160
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
161
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Setting affinity for GPU 3 to 03ffffff,ffffffff,ffffffff
162
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Setting affinity for GPU 2 to 03ffffff,ffffffff,ffffffff
163
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Setting affinity for GPU 1 to 03ffffff,ffffffff,ffffffff
164
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Setting affinity for GPU 4 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
165
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Setting affinity for GPU 0 to 03ffffff,ffffffff,ffffffff
166
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Setting affinity for GPU 7 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
167
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Setting affinity for GPU 5 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
168
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Setting affinity for GPU 6 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
169
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NVLS multicast support is available on dev 5
170
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NVLS multicast support is available on dev 7
171
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NVLS multicast support is available on dev 2
172
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NVLS multicast support is available on dev 4
173
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NVLS multicast support is available on dev 3
174
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NVLS multicast support is available on dev 6
175
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NVLS multicast support is available on dev 1
176
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NVLS multicast support is available on dev 0
177
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO comm 0xaa777e0 rank 7 nRanks 8 nNodes 1 localRanks 8 localRank 7 MNNVL 0
178
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO comm 0xa99de10 rank 6 nRanks 8 nNodes 1 localRanks 8 localRank 6 MNNVL 0
179
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO comm 0x98ac490 rank 2 nRanks 8 nNodes 1 localRanks 8 localRank 2 MNNVL 0
180
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO comm 0x9b15700 rank 3 nRanks 8 nNodes 1 localRanks 8 localRank 3 MNNVL 0
181
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO comm 0x98a1870 rank 1 nRanks 8 nNodes 1 localRanks 8 localRank 1 MNNVL 0
182
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO comm 0xa87b0d0 rank 5 nRanks 8 nNodes 1 localRanks 8 localRank 5 MNNVL 0
183
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO comm 0xbd43ac0 rank 0 nRanks 8 nNodes 1 localRanks 8 localRank 0 MNNVL 0
184
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO comm 0xa1cc500 rank 4 nRanks 8 nNodes 1 localRanks 8 localRank 4 MNNVL 0
185
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 [2] -1/-1/-1->7->6 [3] -1/-1/-1->7->6 [4] -1/-1/-1->7->6 [5] -1/-1/-1->7->6 [6] -1/-1/-1->7->6 [7] -1/-1/-1->7->6 [8] -1/-1/-1->7->6 [9] -1/-1/-1->7->6 [10] -1/-1/-1->7->6 [11] -1/-1/-1->7->6 [12] -1/-1/-1->7->6 [13] -1/-1/-1->7->6 [14] -1/-1/-1->7->6 [15] -1/-1/-1->7->6 [16] -1/-1/-1->7->6 [17] -1/-1/-1->7->6 [18] -1/-1/-1->7->6 [19] -1/-1/-1->7->6 [20] -1/-1/-1->7->6 [21] -1/-1/-1->7->6 [22] -1/-1/-1->7->6 [23] -1/-1/-1->7->6
186
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 [2] 7/-1/-1->6->5 [3] 7/-1/-1->6->5 [4] 7/-1/-1->6->5 [5] 7/-1/-1->6->5 [6] 7/-1/-1->6->5 [7] 7/-1/-1->6->5 [8] 7/-1/-1->6->5 [9] 7/-1/-1->6->5 [10] 7/-1/-1->6->5 [11] 7/-1/-1->6->5 [12] 7/-1/-1->6->5 [13] 7/-1/-1->6->5 [14] 7/-1/-1->6->5 [15] 7/-1/-1->6->5 [16] 7/-1/-1->6->5 [17] 7/-1/-1->6->5 [18] 7/-1/-1->6->5 [19] 7/-1/-1->6->5 [20] 7/-1/-1->6->5 [21] 7/-1/-1->6->5 [22] 7/-1/-1->6->5 [23] 7/-1/-1->6->5
187
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO P2P Chunksize set to 524288
188
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO P2P Chunksize set to 524288
189
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1
190
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2
191
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 7
192
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO P2P Chunksize set to 524288
193
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4
194
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO P2P Chunksize set to 524288
195
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 7
196
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0
197
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 7
198
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO P2P Chunksize set to 524288
199
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3
200
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO P2P Chunksize set to 524288
201
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 7
202
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO P2P Chunksize set to 524288
203
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 7
204
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 7
205
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 7
206
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 7
207
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 7
208
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 7
209
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 7
210
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 7
211
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 7
212
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 7
213
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 7
214
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 7
215
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 7
216
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 7
217
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 7
218
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 7
219
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 7
220
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 7
221
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 7
222
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 7
223
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1
224
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO P2P Chunksize set to 524288
225
+ t-20260513102957-v877z-worker-0:10225:10374 [3] NCCL INFO [Proxy Service] Device 3 CPU core 48
226
+ t-20260513102957-v877z-worker-0:10225:10375 [3] NCCL INFO [Proxy Service UDS] Device 3 CPU core 50
227
+ t-20260513102957-v877z-worker-0:10223:10376 [1] NCCL INFO [Proxy Service] Device 1 CPU core 2
228
+ t-20260513102957-v877z-worker-0:10223:10377 [1] NCCL INFO [Proxy Service UDS] Device 1 CPU core 4
229
+ t-20260513102957-v877z-worker-0:10229:10378 [7] NCCL INFO [Proxy Service] Device 7 CPU core 146
230
+ t-20260513102957-v877z-worker-0:10229:10379 [7] NCCL INFO [Proxy Service UDS] Device 7 CPU core 150
231
+ t-20260513102957-v877z-worker-0:10224:10380 [2] NCCL INFO [Proxy Service] Device 2 CPU core 48
232
+ t-20260513102957-v877z-worker-0:10224:10381 [2] NCCL INFO [Proxy Service UDS] Device 2 CPU core 50
233
+ t-20260513102957-v877z-worker-0:10227:10382 [5] NCCL INFO [Proxy Service] Device 5 CPU core 106
234
+ t-20260513102957-v877z-worker-0:10227:10383 [5] NCCL INFO [Proxy Service UDS] Device 5 CPU core 108
235
+ t-20260513102957-v877z-worker-0:10228:10384 [6] NCCL INFO [Proxy Service] Device 6 CPU core 94
236
+ t-20260513102957-v877z-worker-0:10228:10385 [6] NCCL INFO [Proxy Service UDS] Device 6 CPU core 98
237
+ t-20260513102957-v877z-worker-0:10226:10386 [4] NCCL INFO [Proxy Service] Device 4 CPU core 94
238
+ t-20260513102957-v877z-worker-0:10226:10387 [4] NCCL INFO [Proxy Service UDS] Device 4 CPU core 96
239
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Check P2P Type intraNodeP2pSupport 1 directMode 0
240
+ t-20260513102957-v877z-worker-0:10222:10388 [0] NCCL INFO [Proxy Service] Device 0 CPU core 52
241
+ t-20260513102957-v877z-worker-0:10222:10389 [0] NCCL INFO [Proxy Service UDS] Device 0 CPU core 48
242
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
243
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
244
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
245
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
246
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
247
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
248
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
249
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
250
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
251
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
252
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
253
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
254
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
255
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
256
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
257
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
258
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO CC Off, workFifoBytes 1048576
259
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
260
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
261
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
262
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
263
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
264
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
265
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
266
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
267
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
268
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
269
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
270
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
271
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
272
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
273
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
274
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
275
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
276
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO ncclCommInitRankConfig comm 0xa1cc500 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 6f020 commId 0xc3d8f44253f33569 - Init COMPLETE
277
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
278
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
279
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
280
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO ncclCommInitRankConfig comm 0xa99de10 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId 73020 commId 0xc3d8f44253f33569 - Init COMPLETE
281
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO ncclCommInitRankConfig comm 0xaa777e0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId 75020 commId 0xc3d8f44253f33569 - Init COMPLETE
282
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO ncclCommInitRankConfig comm 0xa87b0d0 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 71020 commId 0xc3d8f44253f33569 - Init COMPLETE
283
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
284
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
285
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO ncclCommInitRankConfig comm 0xbd43ac0 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 65040 commId 0xc3d8f44253f33569 - Init COMPLETE
286
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
287
+ t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Init timings - ncclCommInitRankConfig: rank 4 nranks 8 total 2.15 (kernels 0.20, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
288
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
289
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO ncclCommInitRankConfig comm 0x98ac490 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 69020 commId 0xc3d8f44253f33569 - Init COMPLETE
290
+ t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Init timings - ncclCommInitRankConfig: rank 6 nranks 8 total 2.21 (kernels 0.22, alloc 1.03, bootstrap 0.02, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
291
+ t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Init timings - ncclCommInitRankConfig: rank 5 nranks 8 total 2.16 (kernels 0.20, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
292
+ t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Init timings - ncclCommInitRankConfig: rank 7 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
293
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO ncclCommInitRankConfig comm 0x9b15700 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 6b020 commId 0xc3d8f44253f33569 - Init COMPLETE
294
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO ncclCommInitRankConfig comm 0x98a1870 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 67020 commId 0xc3d8f44253f33569 - Init COMPLETE
295
+ t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Init timings - ncclCommInitRankConfig: rank 0 nranks 8 total 2.21 (kernels 0.21, alloc 0.99, bootstrap 0.07, allgathers 0.00, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
296
+ t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Init timings - ncclCommInitRankConfig: rank 2 nranks 8 total 2.21 (kernels 0.21, alloc 0.98, bootstrap 0.08, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
297
+ t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Init timings - ncclCommInitRankConfig: rank 3 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.01, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.02)
298
+ t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Init timings - ncclCommInitRankConfig: rank 1 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.01, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
299
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 00/0 : 4[4] -> 5[5] via P2P/CUMEM
300
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 00/0 : 1[1] -> 2[2] via P2P/CUMEM
301
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 01/0 : 4[4] -> 5[5] via P2P/CUMEM
302
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 01/0 : 1[1] -> 2[2] via P2P/CUMEM
303
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 02/0 : 4[4] -> 5[5] via P2P/CUMEM
304
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 02/0 : 1[1] -> 2[2] via P2P/CUMEM
305
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 03/0 : 4[4] -> 5[5] via P2P/CUMEM
306
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 03/0 : 1[1] -> 2[2] via P2P/CUMEM
307
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 04/0 : 4[4] -> 5[5] via P2P/CUMEM
308
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 00/0 : 7[7] -> 0[0] via P2P/CUMEM
309
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 04/0 : 1[1] -> 2[2] via P2P/CUMEM
310
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 05/0 : 4[4] -> 5[5] via P2P/CUMEM
311
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 01/0 : 7[7] -> 0[0] via P2P/CUMEM
312
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 05/0 : 1[1] -> 2[2] via P2P/CUMEM
313
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 06/0 : 4[4] -> 5[5] via P2P/CUMEM
314
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 02/0 : 7[7] -> 0[0] via P2P/CUMEM
315
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 06/0 : 1[1] -> 2[2] via P2P/CUMEM
316
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 07/0 : 4[4] -> 5[5] via P2P/CUMEM
317
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 03/0 : 7[7] -> 0[0] via P2P/CUMEM
318
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 07/0 : 1[1] -> 2[2] via P2P/CUMEM
319
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 08/0 : 4[4] -> 5[5] via P2P/CUMEM
320
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 00/0 : 6[6] -> 7[7] via P2P/CUMEM
321
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 04/0 : 7[7] -> 0[0] via P2P/CUMEM
322
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 08/0 : 1[1] -> 2[2] via P2P/CUMEM
323
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 09/0 : 4[4] -> 5[5] via P2P/CUMEM
324
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 01/0 : 6[6] -> 7[7] via P2P/CUMEM
325
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 05/0 : 7[7] -> 0[0] via P2P/CUMEM
326
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 00/0 : 3[3] -> 4[4] via P2P/CUMEM
327
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 09/0 : 1[1] -> 2[2] via P2P/CUMEM
328
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 00/0 : 0[0] -> 1[1] via P2P/CUMEM
329
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 10/0 : 4[4] -> 5[5] via P2P/CUMEM
330
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 02/0 : 6[6] -> 7[7] via P2P/CUMEM
331
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 06/0 : 7[7] -> 0[0] via P2P/CUMEM
332
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 01/0 : 3[3] -> 4[4] via P2P/CUMEM
333
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 10/0 : 1[1] -> 2[2] via P2P/CUMEM
334
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 01/0 : 0[0] -> 1[1] via P2P/CUMEM
335
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 11/0 : 4[4] -> 5[5] via P2P/CUMEM
336
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 03/0 : 6[6] -> 7[7] via P2P/CUMEM
337
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 07/0 : 7[7] -> 0[0] via P2P/CUMEM
338
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 02/0 : 3[3] -> 4[4] via P2P/CUMEM
339
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 11/0 : 1[1] -> 2[2] via P2P/CUMEM
340
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 02/0 : 0[0] -> 1[1] via P2P/CUMEM
341
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 12/0 : 4[4] -> 5[5] via P2P/CUMEM
342
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 04/0 : 6[6] -> 7[7] via P2P/CUMEM
343
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 08/0 : 7[7] -> 0[0] via P2P/CUMEM
344
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 03/0 : 3[3] -> 4[4] via P2P/CUMEM
345
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 12/0 : 1[1] -> 2[2] via P2P/CUMEM
346
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 03/0 : 0[0] -> 1[1] via P2P/CUMEM
347
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 13/0 : 4[4] -> 5[5] via P2P/CUMEM
348
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 05/0 : 6[6] -> 7[7] via P2P/CUMEM
349
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 09/0 : 7[7] -> 0[0] via P2P/CUMEM
350
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 04/0 : 3[3] -> 4[4] via P2P/CUMEM
351
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 13/0 : 1[1] -> 2[2] via P2P/CUMEM
352
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 04/0 : 0[0] -> 1[1] via P2P/CUMEM
353
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 14/0 : 4[4] -> 5[5] via P2P/CUMEM
354
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 06/0 : 6[6] -> 7[7] via P2P/CUMEM
355
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 10/0 : 7[7] -> 0[0] via P2P/CUMEM
356
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 05/0 : 3[3] -> 4[4] via P2P/CUMEM
357
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 14/0 : 1[1] -> 2[2] via P2P/CUMEM
358
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 05/0 : 0[0] -> 1[1] via P2P/CUMEM
359
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 15/0 : 4[4] -> 5[5] via P2P/CUMEM
360
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 07/0 : 6[6] -> 7[7] via P2P/CUMEM
361
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 11/0 : 7[7] -> 0[0] via P2P/CUMEM
362
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 06/0 : 3[3] -> 4[4] via P2P/CUMEM
363
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 15/0 : 1[1] -> 2[2] via P2P/CUMEM
364
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 06/0 : 0[0] -> 1[1] via P2P/CUMEM
365
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 16/0 : 4[4] -> 5[5] via P2P/CUMEM
366
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 08/0 : 6[6] -> 7[7] via P2P/CUMEM
367
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 12/0 : 7[7] -> 0[0] via P2P/CUMEM
368
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 07/0 : 3[3] -> 4[4] via P2P/CUMEM
369
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 16/0 : 1[1] -> 2[2] via P2P/CUMEM
370
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 07/0 : 0[0] -> 1[1] via P2P/CUMEM
371
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 17/0 : 4[4] -> 5[5] via P2P/CUMEM
372
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 09/0 : 6[6] -> 7[7] via P2P/CUMEM
373
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 00/0 : 5[5] -> 6[6] via P2P/CUMEM
374
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 13/0 : 7[7] -> 0[0] via P2P/CUMEM
375
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 08/0 : 3[3] -> 4[4] via P2P/CUMEM
376
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 17/0 : 1[1] -> 2[2] via P2P/CUMEM
377
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 08/0 : 0[0] -> 1[1] via P2P/CUMEM
378
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 18/0 : 4[4] -> 5[5] via P2P/CUMEM
379
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 10/0 : 6[6] -> 7[7] via P2P/CUMEM
380
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 01/0 : 5[5] -> 6[6] via P2P/CUMEM
381
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 14/0 : 7[7] -> 0[0] via P2P/CUMEM
382
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 09/0 : 3[3] -> 4[4] via P2P/CUMEM
383
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 18/0 : 1[1] -> 2[2] via P2P/CUMEM
384
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 09/0 : 0[0] -> 1[1] via P2P/CUMEM
385
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 19/0 : 4[4] -> 5[5] via P2P/CUMEM
386
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 11/0 : 6[6] -> 7[7] via P2P/CUMEM
387
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 00/0 : 2[2] -> 3[3] via P2P/CUMEM
388
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 02/0 : 5[5] -> 6[6] via P2P/CUMEM
389
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 15/0 : 7[7] -> 0[0] via P2P/CUMEM
390
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 10/0 : 3[3] -> 4[4] via P2P/CUMEM
391
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 19/0 : 1[1] -> 2[2] via P2P/CUMEM
392
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 10/0 : 0[0] -> 1[1] via P2P/CUMEM
393
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 20/0 : 4[4] -> 5[5] via P2P/CUMEM
394
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 12/0 : 6[6] -> 7[7] via P2P/CUMEM
395
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 01/0 : 2[2] -> 3[3] via P2P/CUMEM
396
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 03/0 : 5[5] -> 6[6] via P2P/CUMEM
397
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 16/0 : 7[7] -> 0[0] via P2P/CUMEM
398
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 11/0 : 3[3] -> 4[4] via P2P/CUMEM
399
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 20/0 : 1[1] -> 2[2] via P2P/CUMEM
400
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 11/0 : 0[0] -> 1[1] via P2P/CUMEM
401
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 21/0 : 4[4] -> 5[5] via P2P/CUMEM
402
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 13/0 : 6[6] -> 7[7] via P2P/CUMEM
403
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 02/0 : 2[2] -> 3[3] via P2P/CUMEM
404
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 04/0 : 5[5] -> 6[6] via P2P/CUMEM
405
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 17/0 : 7[7] -> 0[0] via P2P/CUMEM
406
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 12/0 : 3[3] -> 4[4] via P2P/CUMEM
407
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 21/0 : 1[1] -> 2[2] via P2P/CUMEM
408
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 12/0 : 0[0] -> 1[1] via P2P/CUMEM
409
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 22/0 : 4[4] -> 5[5] via P2P/CUMEM
410
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 14/0 : 6[6] -> 7[7] via P2P/CUMEM
411
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 03/0 : 2[2] -> 3[3] via P2P/CUMEM
412
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 05/0 : 5[5] -> 6[6] via P2P/CUMEM
413
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 18/0 : 7[7] -> 0[0] via P2P/CUMEM
414
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 13/0 : 3[3] -> 4[4] via P2P/CUMEM
415
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 22/0 : 1[1] -> 2[2] via P2P/CUMEM
416
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 13/0 : 0[0] -> 1[1] via P2P/CUMEM
417
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 23/0 : 4[4] -> 5[5] via P2P/CUMEM
418
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 15/0 : 6[6] -> 7[7] via P2P/CUMEM
419
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 04/0 : 2[2] -> 3[3] via P2P/CUMEM
420
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 06/0 : 5[5] -> 6[6] via P2P/CUMEM
421
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 19/0 : 7[7] -> 0[0] via P2P/CUMEM
422
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 14/0 : 3[3] -> 4[4] via P2P/CUMEM
423
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 23/0 : 1[1] -> 2[2] via P2P/CUMEM
424
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 14/0 : 0[0] -> 1[1] via P2P/CUMEM
425
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 16/0 : 6[6] -> 7[7] via P2P/CUMEM
426
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 05/0 : 2[2] -> 3[3] via P2P/CUMEM
427
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 07/0 : 5[5] -> 6[6] via P2P/CUMEM
428
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 20/0 : 7[7] -> 0[0] via P2P/CUMEM
429
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 15/0 : 3[3] -> 4[4] via P2P/CUMEM
430
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 15/0 : 0[0] -> 1[1] via P2P/CUMEM
431
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 17/0 : 6[6] -> 7[7] via P2P/CUMEM
432
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 06/0 : 2[2] -> 3[3] via P2P/CUMEM
433
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 08/0 : 5[5] -> 6[6] via P2P/CUMEM
434
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 21/0 : 7[7] -> 0[0] via P2P/CUMEM
435
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 16/0 : 3[3] -> 4[4] via P2P/CUMEM
436
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 16/0 : 0[0] -> 1[1] via P2P/CUMEM
437
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 18/0 : 6[6] -> 7[7] via P2P/CUMEM
438
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 07/0 : 2[2] -> 3[3] via P2P/CUMEM
439
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 09/0 : 5[5] -> 6[6] via P2P/CUMEM
440
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 22/0 : 7[7] -> 0[0] via P2P/CUMEM
441
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 17/0 : 3[3] -> 4[4] via P2P/CUMEM
442
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 17/0 : 0[0] -> 1[1] via P2P/CUMEM
443
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 19/0 : 6[6] -> 7[7] via P2P/CUMEM
444
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 08/0 : 2[2] -> 3[3] via P2P/CUMEM
445
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 10/0 : 5[5] -> 6[6] via P2P/CUMEM
446
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 23/0 : 7[7] -> 0[0] via P2P/CUMEM
447
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 18/0 : 3[3] -> 4[4] via P2P/CUMEM
448
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 18/0 : 0[0] -> 1[1] via P2P/CUMEM
449
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 20/0 : 6[6] -> 7[7] via P2P/CUMEM
450
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 09/0 : 2[2] -> 3[3] via P2P/CUMEM
451
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 11/0 : 5[5] -> 6[6] via P2P/CUMEM
452
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 19/0 : 3[3] -> 4[4] via P2P/CUMEM
453
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 19/0 : 0[0] -> 1[1] via P2P/CUMEM
454
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 21/0 : 6[6] -> 7[7] via P2P/CUMEM
455
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 10/0 : 2[2] -> 3[3] via P2P/CUMEM
456
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 12/0 : 5[5] -> 6[6] via P2P/CUMEM
457
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 20/0 : 3[3] -> 4[4] via P2P/CUMEM
458
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 20/0 : 0[0] -> 1[1] via P2P/CUMEM
459
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 22/0 : 6[6] -> 7[7] via P2P/CUMEM
460
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 11/0 : 2[2] -> 3[3] via P2P/CUMEM
461
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 13/0 : 5[5] -> 6[6] via P2P/CUMEM
462
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 21/0 : 3[3] -> 4[4] via P2P/CUMEM
463
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 21/0 : 0[0] -> 1[1] via P2P/CUMEM
464
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 23/0 : 6[6] -> 7[7] via P2P/CUMEM
465
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 12/0 : 2[2] -> 3[3] via P2P/CUMEM
466
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 14/0 : 5[5] -> 6[6] via P2P/CUMEM
467
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 22/0 : 3[3] -> 4[4] via P2P/CUMEM
468
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 22/0 : 0[0] -> 1[1] via P2P/CUMEM
469
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 13/0 : 2[2] -> 3[3] via P2P/CUMEM
470
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 15/0 : 5[5] -> 6[6] via P2P/CUMEM
471
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 23/0 : 3[3] -> 4[4] via P2P/CUMEM
472
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 23/0 : 0[0] -> 1[1] via P2P/CUMEM
473
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 14/0 : 2[2] -> 3[3] via P2P/CUMEM
474
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 16/0 : 5[5] -> 6[6] via P2P/CUMEM
475
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 15/0 : 2[2] -> 3[3] via P2P/CUMEM
476
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 17/0 : 5[5] -> 6[6] via P2P/CUMEM
477
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 16/0 : 2[2] -> 3[3] via P2P/CUMEM
478
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 18/0 : 5[5] -> 6[6] via P2P/CUMEM
479
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 17/0 : 2[2] -> 3[3] via P2P/CUMEM
480
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 19/0 : 5[5] -> 6[6] via P2P/CUMEM
481
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 18/0 : 2[2] -> 3[3] via P2P/CUMEM
482
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 20/0 : 5[5] -> 6[6] via P2P/CUMEM
483
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 19/0 : 2[2] -> 3[3] via P2P/CUMEM
484
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 21/0 : 5[5] -> 6[6] via P2P/CUMEM
485
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 20/0 : 2[2] -> 3[3] via P2P/CUMEM
486
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 22/0 : 5[5] -> 6[6] via P2P/CUMEM
487
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 21/0 : 2[2] -> 3[3] via P2P/CUMEM
488
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 23/0 : 5[5] -> 6[6] via P2P/CUMEM
489
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 22/0 : 2[2] -> 3[3] via P2P/CUMEM
490
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 23/0 : 2[2] -> 3[3] via P2P/CUMEM
491
+ t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
492
+ t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
493
+ t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
494
+ t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
495
+ t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
496
+ t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
497
+ t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
498
+ t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
499
+ {
500
+ "device": "cuda:0",
501
+ "rank": 0,
502
+ "world_size": 8,
503
+ "samples": "owt_cached_chunks:8734897",
504
+ "vocab_size": 50257,
505
+ "tokenizer_vocab_size": 50257,
506
+ "save_dir": "runs/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024",
507
+ "batch_size": 8,
508
+ "grad_accum": 8,
509
+ "effective_batch_size": 512,
510
+ "global_batch_size": 512,
511
+ "lr_schedule": "constant_warmup",
512
+ "optimizer": "muon",
513
+ "warmup_steps": 8531,
514
+ "min_lr": 0.0,
515
+ "weight_decay": 0.0,
516
+ "adamw_param_groups": "nanogpt",
517
+ "adam_beta1": 0.9,
518
+ "adam_beta2": 0.95,
519
+ "adam_eps": 1e-08,
520
+ "muon_momentum": 0.95,
521
+ "muon_ns_steps": 5,
522
+ "muon_update_scale": 1.0,
523
+ "ema_decay": 0.9999,
524
+ "ema_start_step": 0,
525
+ "model_type": "ddit",
526
+ "dual_t": true,
527
+ "corrupt_t_mode": "independent",
528
+ "corrupt_min_t": null,
529
+ "corrupt_max_t": null,
530
+ "prefix_block_prob": 0.0,
531
+ "prefix_block_len": 128,
532
+ "dirichlet_endpoint_mode": "categorical_dual_t",
533
+ "dirichlet_semantic_t_mode": "same",
534
+ "dirichlet_semantic_t_value": 0.0,
535
+ "categorical_wrong_from_full_vocab": true,
536
+ "categorical_wrong_from_batch_valid_tokens": false,
537
+ "mask_mixture_original_prob": 0.0,
538
+ "mask_mixture_lowk_prob": 0.0,
539
+ "mask_mixture_lowcorrupt_prob": 0.0,
540
+ "mask_mixture_block_prob": 0.0,
541
+ "mask_mixture_all_prob": 0.0,
542
+ "mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
543
+ "mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
544
+ "mask_mixture_block_tokens": "64,128",
545
+ "simplex_bridge_sampler": "dirichlet",
546
+ "logistic_normal_sigma_min": 0.18,
547
+ "logistic_normal_sigma_max": 2.2,
548
+ "logistic_normal_tau_min": 0.65,
549
+ "logistic_normal_tau_max": 1.15,
550
+ "torch_compile": false,
551
+ "compile_mode": "max-autotune",
552
+ "state_format": "prob",
553
+ "target_loss": "hard_ce",
554
+ "meanflow_weight": 0.0,
555
+ "bridge_noise_init": "logistic_normal",
556
+ "noise_sigma": -1.0,
557
+ "wrap": true,
558
+ "wrap_mode": "stream",
559
+ "wrap_record_buffer_size": 200,
560
+ "owt_cached_chunks": true,
561
+ "owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k",
562
+ "owt_chunk_cache_rebuild": false,
563
+ "owt_chunk_cache_write_batch": 4096,
564
+ "owt_exact_repeat_per_chunk": 0,
565
+ "online_chunk_shuffle": false,
566
+ "online_chunk_shuffle_buffer": 10000,
567
+ "openwebtext_split": "all",
568
+ "detokenizer": "auto",
569
+ "resolved_detokenizer": null,
570
+ "num_workers": 0,
571
+ "latest_every": 1000,
572
+ "resume_path": ""
573
+ }
574
+ t-20260513102957-v877z-worker-0:10222:10486 [0] NCCL INFO NVLS comm 0xbd43ac0 headRank 0 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
575
+ t-20260513102957-v877z-worker-0:10226:10487 [4] NCCL INFO NVLS comm 0xa1cc500 headRank 4 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
576
+ t-20260513102957-v877z-worker-0:10223:10488 [1] NCCL INFO NVLS comm 0x98a1870 headRank 1 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
577
+ t-20260513102957-v877z-worker-0:10224:10489 [2] NCCL INFO NVLS comm 0x98ac490 headRank 2 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
578
+ t-20260513102957-v877z-worker-0:10229:10490 [7] NCCL INFO NVLS comm 0xaa777e0 headRank 7 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
579
+ t-20260513102957-v877z-worker-0:10228:10491 [6] NCCL INFO NVLS comm 0xa99de10 headRank 6 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
580
+ t-20260513102957-v877z-worker-0:10227:10492 [5] NCCL INFO NVLS comm 0xa87b0d0 headRank 5 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
581
+ t-20260513102957-v877z-worker-0:10225:10493 [3] NCCL INFO NVLS comm 0x9b15700 headRank 3 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
582
+ step=50 micro_steps=400 elapsed=48.6s lr=1.195639e-05 loss_all=10.8125 acc_all=0.5523 loss_corrupt=10.8125 acc_corrupt=0.3757 corrupt_frac=0.5552 loss=10.8125 loss_recon=10.8125 loss_meanflow=0.0000 mean_model_t=0.4944 mean_corrupt_t=0.5036 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4934 init_acc_corrupt=0.4725 init_gold_top10=0.5009 init_gold_top100=0.5309
583
+ step=100 micro_steps=800 elapsed=47.4s lr=2.367835e-05 loss_all=10.8125 acc_all=0.5656 loss_corrupt=10.8125 acc_corrupt=0.3881 corrupt_frac=0.5519 loss=10.8125 loss_recon=10.8125 loss_meanflow=0.0000 mean_model_t=0.4959 mean_corrupt_t=0.5064 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4916 init_acc_corrupt=0.4749 init_gold_top10=0.5030 init_gold_top100=0.5319
584
+ step=150 micro_steps=1200 elapsed=47.3s lr=3.540030e-05 loss_all=10.7976 acc_all=0.5622 loss_corrupt=10.8031 acc_corrupt=0.3899 corrupt_frac=0.5621 loss=10.8031 loss_recon=10.8031 loss_meanflow=0.0000 mean_model_t=0.5121 mean_corrupt_t=0.5055 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4925 init_acc_corrupt=0.4729 init_gold_top10=0.5021 init_gold_top100=0.5300
585
+ step=200 micro_steps=1600 elapsed=47.3s lr=4.712226e-05 loss_all=10.7770 acc_all=0.5614 loss_corrupt=10.7891 acc_corrupt=0.3900 corrupt_frac=0.5531 loss=10.7891 loss_recon=10.7891 loss_meanflow=0.0000 mean_model_t=0.5059 mean_corrupt_t=0.5118 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4887 init_acc_corrupt=0.4772 init_gold_top10=0.5058 init_gold_top100=0.5343
586
+ step=250 micro_steps=2000 elapsed=47.7s lr=5.884422e-05 loss_all=10.7465 acc_all=0.5551 loss_corrupt=10.7697 acc_corrupt=0.3777 corrupt_frac=0.5506 loss=10.7697 loss_recon=10.7697 loss_meanflow=0.0000 mean_model_t=0.4963 mean_corrupt_t=0.5016 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5008 init_acc_corrupt=0.4645 init_gold_top10=0.4934 init_gold_top100=0.5233
587
+ step=300 micro_steps=2400 elapsed=47.4s lr=7.056617e-05 loss_all=10.7068 acc_all=0.5429 loss_corrupt=10.7407 acc_corrupt=0.3755 corrupt_frac=0.5615 loss=10.7407 loss_recon=10.7407 loss_meanflow=0.0000 mean_model_t=0.4975 mean_corrupt_t=0.5060 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4925 init_acc_corrupt=0.4735 init_gold_top10=0.5019 init_gold_top100=0.5309
588
+ step=350 micro_steps=2800 elapsed=47.2s lr=8.228813e-05 loss_all=10.6568 acc_all=0.5378 loss_corrupt=10.7063 acc_corrupt=0.3645 corrupt_frac=0.5504 loss=10.7063 loss_recon=10.7063 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.5088 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4963 init_acc_corrupt=0.4697 init_gold_top10=0.4980 init_gold_top100=0.5278
589
+ step=400 micro_steps=3200 elapsed=47.6s lr=9.401008e-05 loss_all=10.5988 acc_all=0.5316 loss_corrupt=10.6653 acc_corrupt=0.3571 corrupt_frac=0.5454 loss=10.6653 loss_recon=10.6653 loss_meanflow=0.0000 mean_model_t=0.4950 mean_corrupt_t=0.5016 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5001 init_acc_corrupt=0.4643 init_gold_top10=0.4942 init_gold_top100=0.5236
590
+ step=450 micro_steps=3600 elapsed=47.2s lr=1.057320e-04 loss_all=10.5302 acc_all=0.5254 loss_corrupt=10.6143 acc_corrupt=0.3554 corrupt_frac=0.5518 loss=10.6143 loss_recon=10.6143 loss_meanflow=0.0000 mean_model_t=0.5052 mean_corrupt_t=0.4974 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4970 init_acc_corrupt=0.4686 init_gold_top10=0.4972 init_gold_top100=0.5272
591
+ step=500 micro_steps=4000 elapsed=47.2s lr=1.174540e-04 loss_all=10.4504 acc_all=0.5179 loss_corrupt=10.5520 acc_corrupt=0.3546 corrupt_frac=0.5567 loss=10.5520 loss_recon=10.5520 loss_meanflow=0.0000 mean_model_t=0.5000 mean_corrupt_t=0.5107 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4885 init_acc_corrupt=0.4787 init_gold_top10=0.5065 init_gold_top100=0.5332
592
+ step=550 micro_steps=4400 elapsed=47.2s lr=1.291759e-04 loss_all=10.3596 acc_all=0.5119 loss_corrupt=10.4883 acc_corrupt=0.3445 corrupt_frac=0.5446 loss=10.4883 loss_recon=10.4883 loss_meanflow=0.0000 mean_model_t=0.5052 mean_corrupt_t=0.5019 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4987 init_acc_corrupt=0.4672 init_gold_top10=0.4957 init_gold_top100=0.5252
593
+ step=600 micro_steps=4800 elapsed=47.4s lr=1.408979e-04 loss_all=10.2615 acc_all=0.5024 loss_corrupt=10.4195 acc_corrupt=0.3340 corrupt_frac=0.5407 loss=10.4195 loss_recon=10.4195 loss_meanflow=0.0000 mean_model_t=0.5048 mean_corrupt_t=0.4963 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5042 init_acc_corrupt=0.4609 init_gold_top10=0.4897 init_gold_top100=0.5213
594
+ step=650 micro_steps=5200 elapsed=47.2s lr=1.526199e-04 loss_all=10.1555 acc_all=0.4913 loss_corrupt=10.3425 acc_corrupt=0.3260 corrupt_frac=0.5479 loss=10.3425 loss_recon=10.3425 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4946 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5096 init_acc_corrupt=0.4549 init_gold_top10=0.4844 init_gold_top100=0.5153
595
+ step=700 micro_steps=5600 elapsed=47.2s lr=1.643418e-04 loss_all=10.0314 acc_all=0.4856 loss_corrupt=10.2464 acc_corrupt=0.3260 corrupt_frac=0.5511 loss=10.2464 loss_recon=10.2464 loss_meanflow=0.0000 mean_model_t=0.4999 mean_corrupt_t=0.4957 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5051 init_acc_corrupt=0.4599 init_gold_top10=0.4890 init_gold_top100=0.5194
596
+ step=750 micro_steps=6000 elapsed=47.3s lr=1.760638e-04 loss_all=9.8863 acc_all=0.4836 loss_corrupt=10.1351 acc_corrupt=0.3265 corrupt_frac=0.5437 loss=10.1351 loss_recon=10.1351 loss_meanflow=0.0000 mean_model_t=0.5055 mean_corrupt_t=0.5025 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4950 init_acc_corrupt=0.4706 init_gold_top10=0.4993 init_gold_top100=0.5293
597
+ step=800 micro_steps=6400 elapsed=47.4s lr=1.877857e-04 loss_all=9.7404 acc_all=0.4758 loss_corrupt=10.0222 acc_corrupt=0.3227 corrupt_frac=0.5505 loss=10.0222 loss_recon=10.0222 loss_meanflow=0.0000 mean_model_t=0.5058 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4982 init_acc_corrupt=0.4659 init_gold_top10=0.4959 init_gold_top100=0.5264
598
+ step=850 micro_steps=6800 elapsed=47.2s lr=1.995077e-04 loss_all=9.5817 acc_all=0.4695 loss_corrupt=9.8946 acc_corrupt=0.3218 corrupt_frac=0.5568 loss=9.8946 loss_recon=9.8946 loss_meanflow=0.0000 mean_model_t=0.5045 mean_corrupt_t=0.5047 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4955 init_acc_corrupt=0.4712 init_gold_top10=0.4988 init_gold_top100=0.5291
599
+ step=900 micro_steps=7200 elapsed=47.2s lr=2.112296e-04 loss_all=9.4086 acc_all=0.4659 loss_corrupt=9.7618 acc_corrupt=0.3203 corrupt_frac=0.5578 loss=9.7618 loss_recon=9.7618 loss_meanflow=0.0000 mean_model_t=0.4962 mean_corrupt_t=0.5050 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4990 init_acc_corrupt=0.4659 init_gold_top10=0.4954 init_gold_top100=0.5254
600
+ step=950 micro_steps=7600 elapsed=47.2s lr=2.229516e-04 loss_all=9.2066 acc_all=0.4715 loss_corrupt=9.6143 acc_corrupt=0.3233 corrupt_frac=0.5466 loss=9.6143 loss_recon=9.6143 loss_meanflow=0.0000 mean_model_t=0.5056 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4970 init_acc_corrupt=0.4677 init_gold_top10=0.4974 init_gold_top100=0.5269
601
+ step=1000 micro_steps=8000 elapsed=48.0s lr=2.346735e-04 loss_all=9.0073 acc_all=0.4722 loss_corrupt=9.4652 acc_corrupt=0.3230 corrupt_frac=0.5506 loss=9.4652 loss_recon=9.4652 loss_meanflow=0.0000 mean_model_t=0.4978 mean_corrupt_t=0.4901 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5091 init_acc_corrupt=0.4545 init_gold_top10=0.4849 init_gold_top100=0.5158
602
+ step=1050 micro_steps=8400 elapsed=49.4s lr=2.463955e-04 loss_all=8.7539 acc_all=0.4867 loss_corrupt=9.2592 acc_corrupt=0.3387 corrupt_frac=0.5455 loss=9.2592 loss_recon=9.2592 loss_meanflow=0.0000 mean_model_t=0.5068 mean_corrupt_t=0.4987 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4964 init_acc_corrupt=0.4689 init_gold_top10=0.4977 init_gold_top100=0.5280
603
+ step=1100 micro_steps=8800 elapsed=47.3s lr=2.581175e-04 loss_all=8.5291 acc_all=0.4858 loss_corrupt=9.1069 acc_corrupt=0.3332 corrupt_frac=0.5487 loss=9.1069 loss_recon=9.1069 loss_meanflow=0.0000 mean_model_t=0.5076 mean_corrupt_t=0.4923 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5131 init_acc_corrupt=0.4507 init_gold_top10=0.4807 init_gold_top100=0.5131
604
+ step=1150 micro_steps=9200 elapsed=47.3s lr=2.698394e-04 loss_all=8.2256 acc_all=0.5009 loss_corrupt=8.8483 acc_corrupt=0.3518 corrupt_frac=0.5492 loss=8.8483 loss_recon=8.8483 loss_meanflow=0.0000 mean_model_t=0.4880 mean_corrupt_t=0.5105 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4939 init_acc_corrupt=0.4728 init_gold_top10=0.5006 init_gold_top100=0.5300
605
+ step=1200 micro_steps=9600 elapsed=47.2s lr=2.815614e-04 loss_all=7.9819 acc_all=0.4942 loss_corrupt=8.6724 acc_corrupt=0.3445 corrupt_frac=0.5594 loss=8.6724 loss_recon=8.6724 loss_meanflow=0.0000 mean_model_t=0.5078 mean_corrupt_t=0.4905 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5065 init_acc_corrupt=0.4569 init_gold_top10=0.4872 init_gold_top100=0.5201
606
+ step=1250 micro_steps=10000 elapsed=47.1s lr=2.932833e-04 loss_all=7.6269 acc_all=0.5052 loss_corrupt=8.4087 acc_corrupt=0.3514 corrupt_frac=0.5458 loss=8.4087 loss_recon=8.4087 loss_meanflow=0.0000 mean_model_t=0.4963 mean_corrupt_t=0.4999 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5008 init_acc_corrupt=0.4647 init_gold_top10=0.4933 init_gold_top100=0.5238
607
+ step=1300 micro_steps=10400 elapsed=47.2s lr=3.050053e-04 loss_all=7.3064 acc_all=0.5058 loss_corrupt=8.1541 acc_corrupt=0.3535 corrupt_frac=0.5543 loss=8.1541 loss_recon=8.1541 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4979 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5009 init_acc_corrupt=0.4644 init_gold_top10=0.4933 init_gold_top100=0.5236
608
+ step=1350 micro_steps=10800 elapsed=47.3s lr=3.167272e-04 loss_all=6.9403 acc_all=0.5105 loss_corrupt=7.8690 acc_corrupt=0.3577 corrupt_frac=0.5550 loss=7.8690 loss_recon=7.8690 loss_meanflow=0.0000 mean_model_t=0.4935 mean_corrupt_t=0.5024 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4980 init_acc_corrupt=0.4682 init_gold_top10=0.4961 init_gold_top100=0.5263
609
+ step=1400 micro_steps=11200 elapsed=47.3s lr=3.284492e-04 loss_all=6.6098 acc_all=0.5081 loss_corrupt=7.6173 acc_corrupt=0.3549 corrupt_frac=0.5601 loss=7.6173 loss_recon=7.6173 loss_meanflow=0.0000 mean_model_t=0.4999 mean_corrupt_t=0.4967 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5063 init_acc_corrupt=0.4582 init_gold_top10=0.4878 init_gold_top100=0.5183
610
+ step=1450 micro_steps=11600 elapsed=47.5s lr=3.401711e-04 loss_all=6.2064 acc_all=0.5188 loss_corrupt=7.2967 acc_corrupt=0.3645 corrupt_frac=0.5511 loss=7.2967 loss_recon=7.2967 loss_meanflow=0.0000 mean_model_t=0.4940 mean_corrupt_t=0.5044 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4966 init_acc_corrupt=0.4682 init_gold_top10=0.4978 init_gold_top100=0.5272
611
+ step=1500 micro_steps=12000 elapsed=47.1s lr=3.518931e-04 loss_all=5.8854 acc_all=0.5207 loss_corrupt=7.0334 acc_corrupt=0.3677 corrupt_frac=0.5555 loss=7.0334 loss_recon=7.0334 loss_meanflow=0.0000 mean_model_t=0.4951 mean_corrupt_t=0.5001 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4963 init_acc_corrupt=0.4692 init_gold_top10=0.4981 init_gold_top100=0.5273
612
+ step=1550 micro_steps=12400 elapsed=47.5s lr=3.636151e-04 loss_all=5.6304 acc_all=0.5225 loss_corrupt=6.8543 acc_corrupt=0.3653 corrupt_frac=0.5503 loss=6.8543 loss_recon=6.8543 loss_meanflow=0.0000 mean_model_t=0.4989 mean_corrupt_t=0.5031 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4987 init_acc_corrupt=0.4659 init_gold_top10=0.4958 init_gold_top100=0.5247
613
+ step=1600 micro_steps=12800 elapsed=47.3s lr=3.753370e-04 loss_all=5.4321 acc_all=0.5248 loss_corrupt=6.7023 acc_corrupt=0.3652 corrupt_frac=0.5462 loss=6.7023 loss_recon=6.7023 loss_meanflow=0.0000 mean_model_t=0.5041 mean_corrupt_t=0.4979 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5059 init_acc_corrupt=0.4579 init_gold_top10=0.4881 init_gold_top100=0.5198
614
+ step=1650 micro_steps=13200 elapsed=47.2s lr=3.870590e-04 loss_all=5.2358 acc_all=0.5320 loss_corrupt=6.4720 acc_corrupt=0.3799 corrupt_frac=0.5497 loss=6.4720 loss_recon=6.4720 loss_meanflow=0.0000 mean_model_t=0.5084 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4927 init_acc_corrupt=0.4734 init_gold_top10=0.5017 init_gold_top100=0.5309
615
+ step=1700 micro_steps=13600 elapsed=47.2s lr=3.987809e-04 loss_all=5.0956 acc_all=0.5353 loss_corrupt=6.3531 acc_corrupt=0.3809 corrupt_frac=0.5492 loss=6.3531 loss_recon=6.3531 loss_meanflow=0.0000 mean_model_t=0.4959 mean_corrupt_t=0.5046 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4967 init_acc_corrupt=0.4688 init_gold_top10=0.4975 init_gold_top100=0.5289
616
+ step=1750 micro_steps=14000 elapsed=47.1s lr=4.105029e-04 loss_all=4.9791 acc_all=0.5365 loss_corrupt=6.2240 acc_corrupt=0.3836 corrupt_frac=0.5514 loss=6.2240 loss_recon=6.2240 loss_meanflow=0.0000 mean_model_t=0.4896 mean_corrupt_t=0.5021 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4968 init_acc_corrupt=0.4682 init_gold_top10=0.4975 init_gold_top100=0.5266
617
+ step=1800 micro_steps=14400 elapsed=47.2s lr=4.222248e-04 loss_all=4.8119 acc_all=0.5441 loss_corrupt=6.0439 acc_corrupt=0.3934 corrupt_frac=0.5495 loss=6.0439 loss_recon=6.0439 loss_meanflow=0.0000 mean_model_t=0.5011 mean_corrupt_t=0.5048 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4894 init_acc_corrupt=0.4760 init_gold_top10=0.5053 init_gold_top100=0.5336
LTA_openwebtext_dualt/logs/fullycoupled_loss1mt_floor0p25_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_loss1mt_floor0p25_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260514_230726.log ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0010000_state_fromstate_t1p45.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] 2026-05-20_23:01:09 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
2
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt step=10000
3
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
6
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt", "step": 10000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 8.05236863767061, "nll_per_token": 2.085966288854589, "tokens": 7891, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 7.63913892725258, "nll_per_token": 2.0332848909696546, "tokens": 7911, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.8837622158001541, "unique_tokens": 274, "token_count": 8192, "distinct_1": 0.033447265625, "distinct_2": 0.09547244094488189, "top_token_mass": 0.3001708984375}}
7
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [watch-infer] 2026-05-20_23:01:31 done step_0010000
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0020000_state_fromstate_t1p45.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] 2026-05-21_01:03:31 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
2
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt step=20000
3
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
6
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt", "step": 20000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 43.29514508349064, "nll_per_token": 3.768040505939089, "tokens": 7277, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 44.71103365440866, "nll_per_token": 3.8002203090662934, "tokens": 7167, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.680576719579072, "unique_tokens": 989, "token_count": 8192, "distinct_1": 0.1207275390625, "distinct_2": 0.436884842519685, "top_token_mass": 0.0694580078125}}
7
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [watch-infer] 2026-05-21_01:04:00 done step_0020000
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0030000_state_fromstate_t1p45.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] 2026-05-21_03:42:01 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
2
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt step=30000
3
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
6
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt", "step": 30000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 32.519971378109084, "nll_per_token": 3.4818544045472755, "tokens": 7020, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 34.09647656081684, "nll_per_token": 3.529194052288641, "tokens": 6748, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.1030114352732854, "unique_tokens": 881, "token_count": 8192, "distinct_1": 0.1075439453125, "distinct_2": 0.3661417322834646, "top_token_mass": 0.100341796875}}
7
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [watch-infer] 2026-05-21_03:42:30 done step_0030000
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0040000_state_fromstate_t1p45.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] 2026-05-21_06:20:32 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
2
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt step=40000
3
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
6
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt", "step": 40000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 35.01775070788609, "nll_per_token": 3.555855096008144, "tokens": 7383, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 35.25691068369559, "nll_per_token": 3.5626615578078398, "tokens": 7376, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.4672587399947643, "unique_tokens": 1033, "token_count": 8192, "distinct_1": 0.1260986328125, "distinct_2": 0.42913385826771655, "top_token_mass": 0.06982421875}}
7
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [watch-infer] 2026-05-21_06:21:00 done step_0040000
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0050000_state_fromstate_t1p45.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] 2026-05-21_08:58:02 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
2
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt step=50000
3
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
4
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
5
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
6
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt", "step": 50000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 39.13239939115062, "nll_per_token": 3.666950752797587, "tokens": 5574, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 38.076086050208254, "nll_per_token": 3.6395864223457512, "tokens": 5543, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.8907815168852227, "unique_tokens": 899, "token_count": 8192, "distinct_1": 0.1097412109375, "distinct_2": 0.3246801181102362, "top_token_mass": 0.1265869140625}}
7
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [watch-infer] 2026-05-21_08:58:34 done step_0050000
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt
2
+ runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt
3
+ runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt
4
+ runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt
5
+ runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/watch_every10k_state_t1p45.nohup.log ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [watch-infer] run_dir=runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739
2
+ [watch-infer] out_base=docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45
3
+ [watch-infer] processed_file=logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt
4
+ [watch-infer] decode=dual_line_resample anchor=state final_from=state final_sample=argmax temp=1.45 steps=128 cmax=1024 n=64
5
+ [watch-infer] 2026-05-20_22:59:09 no step_*.pt yet
6
+ [watch-infer] 2026-05-20_23:00:09 no step_*.pt yet
7
+ [watch-infer] 2026-05-20_23:01:09 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
8
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt step=10000
9
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
10
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
11
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
12
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt", "step": 10000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 8.05236863767061, "nll_per_token": 2.085966288854589, "tokens": 7891, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 7.63913892725258, "nll_per_token": 2.0332848909696546, "tokens": 7911, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.8837622158001541, "unique_tokens": 274, "token_count": 8192, "distinct_1": 0.033447265625, "distinct_2": 0.09547244094488189, "top_token_mass": 0.3001708984375}}
13
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
14
+ [watch-infer] 2026-05-20_23:01:31 done step_0010000
15
+ [watch-infer] 2026-05-21_01:03:31 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
16
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt step=20000
17
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
18
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
19
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
20
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt", "step": 20000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 43.29514508349064, "nll_per_token": 3.768040505939089, "tokens": 7277, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 44.71103365440866, "nll_per_token": 3.8002203090662934, "tokens": 7167, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.680576719579072, "unique_tokens": 989, "token_count": 8192, "distinct_1": 0.1207275390625, "distinct_2": 0.436884842519685, "top_token_mass": 0.0694580078125}}
21
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
22
+ [watch-infer] 2026-05-21_01:04:00 done step_0020000
23
+ [watch-infer] 2026-05-21_03:42:01 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
24
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt step=30000
25
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
26
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
27
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
28
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt", "step": 30000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 32.519971378109084, "nll_per_token": 3.4818544045472755, "tokens": 7020, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 34.09647656081684, "nll_per_token": 3.529194052288641, "tokens": 6748, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.1030114352732854, "unique_tokens": 881, "token_count": 8192, "distinct_1": 0.1075439453125, "distinct_2": 0.3661417322834646, "top_token_mass": 0.100341796875}}
29
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
30
+ [watch-infer] 2026-05-21_03:42:30 done step_0030000
31
+ [watch-infer] 2026-05-21_06:20:32 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
32
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt step=40000
33
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
34
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
35
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
36
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt", "step": 40000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 35.01775070788609, "nll_per_token": 3.555855096008144, "tokens": 7383, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 35.25691068369559, "nll_per_token": 3.5626615578078398, "tokens": 7376, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.4672587399947643, "unique_tokens": 1033, "token_count": 8192, "distinct_1": 0.1260986328125, "distinct_2": 0.42913385826771655, "top_token_mass": 0.06982421875}}
37
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
38
+ [watch-infer] 2026-05-21_06:21:00 done step_0040000
39
+ [watch-infer] 2026-05-21_08:58:02 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
40
+ [ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt step=50000
41
+ [decode-base] n=64 max_len=128 steps=128 model_t=flow
42
+ [decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
43
+ [decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
44
+ [summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt", "step": 50000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 39.13239939115062, "nll_per_token": 3.666950752797587, "tokens": 5574, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 38.076086050208254, "nll_per_token": 3.6395864223457512, "tokens": 5543, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.8907815168852227, "unique_tokens": 899, "token_count": 8192, "distinct_1": 0.1097412109375, "distinct_2": 0.3246801181102362, "top_token_mass": 0.1265869140625}}
45
+ [done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
46
+ [watch-infer] 2026-05-21_08:58:34 done step_0050000
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2.dirichlet_resample.eval.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_maskfloor_gamma2/latest.pt step=1000
2
+ [decode-base] n=16 max_len=1024 steps=64 model_t=flow
3
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
4
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
5
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
6
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
7
+ [summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_maskfloor_gamma2/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 205.89546320407374, "nll_per_token": 5.327368579813691, "tokens": 11862, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 186.53793153722665, "nll_per_token": 5.228634604662277, "tokens": 11770, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.3015467149505735, "unique_tokens": 2801, "token_count": 16384, "distinct_1": 0.17095947265625, "distinct_2": 0.34836265884652984, "top_token_mass": 0.55841064453125}}
8
+ [done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2_dirichlet_resample.jsonl
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce.flowmap.eval.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_drop_lowt_ce/latest.pt step=1000
2
+ [decode-base] n=16 max_len=1024 steps=64 model_t=flow
3
+ [decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
4
+ [decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
5
+ [decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
6
+ [decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
7
+ [summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_drop_lowt_ce/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "flowmap", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 18.545462901559578, "nll_per_token": 2.9202251716200354, "tokens": 406, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 12.73431918253559, "nll_per_token": 2.5443006466596554, "tokens": 390, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 0.04249307349759278, "unique_tokens": 18, "token_count": 16384, "distinct_1": 0.0010986328125, "distinct_2": 0.002993646138807429, "top_token_mass": 0.9935302734375}}
8
+ [done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce_flowmap.jsonl
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor.dirichlet_resample.eval.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_linear_nomaskfloor/latest.pt step=1000
2
+ [decode-base] n=16 max_len=1024 steps=64 model_t=flow
3
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
4
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
5
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
6
+ [decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
7
+ [summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_linear_nomaskfloor/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 29.662822677002694, "nll_per_token": 3.389894499983462, "tokens": 15117, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 26.313876829298174, "nll_per_token": 3.2700964361277274, "tokens": 14711, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.398402485948885, "unique_tokens": 704, "token_count": 16384, "distinct_1": 0.04296875, "distinct_2": 0.12243401759530792, "top_token_mass": 0.28253173828125}}
8
+ [done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor_dirichlet_resample.jsonl
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/__init__.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ import logging
8
+
9
+ __version__ = '0.3.8'
10
+
11
+
12
+ class DistlibException(Exception):
13
+ pass
14
+
15
+
16
+ try:
17
+ from logging import NullHandler
18
+ except ImportError: # pragma: no cover
19
+
20
+ class NullHandler(logging.Handler):
21
+
22
+ def handle(self, record):
23
+ pass
24
+
25
+ def emit(self, record):
26
+ pass
27
+
28
+ def createLock(self):
29
+ self.lock = None
30
+
31
+
32
+ logger = logging.getLogger(__name__)
33
+ logger.addHandler(NullHandler())
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/compat.py ADDED
@@ -0,0 +1,1138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013-2017 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ from __future__ import absolute_import
8
+
9
+ import os
10
+ import re
11
+ import shutil
12
+ import sys
13
+
14
+ try:
15
+ import ssl
16
+ except ImportError: # pragma: no cover
17
+ ssl = None
18
+
19
+ if sys.version_info[0] < 3: # pragma: no cover
20
+ from StringIO import StringIO
21
+ string_types = basestring,
22
+ text_type = unicode
23
+ from types import FileType as file_type
24
+ import __builtin__ as builtins
25
+ import ConfigParser as configparser
26
+ from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit
27
+ from urllib import (urlretrieve, quote as _quote, unquote, url2pathname,
28
+ pathname2url, ContentTooShortError, splittype)
29
+
30
+ def quote(s):
31
+ if isinstance(s, unicode):
32
+ s = s.encode('utf-8')
33
+ return _quote(s)
34
+
35
+ import urllib2
36
+ from urllib2 import (Request, urlopen, URLError, HTTPError,
37
+ HTTPBasicAuthHandler, HTTPPasswordMgr, HTTPHandler,
38
+ HTTPRedirectHandler, build_opener)
39
+ if ssl:
40
+ from urllib2 import HTTPSHandler
41
+ import httplib
42
+ import xmlrpclib
43
+ import Queue as queue
44
+ from HTMLParser import HTMLParser
45
+ import htmlentitydefs
46
+ raw_input = raw_input
47
+ from itertools import ifilter as filter
48
+ from itertools import ifilterfalse as filterfalse
49
+
50
+ # Leaving this around for now, in case it needs resurrecting in some way
51
+ # _userprog = None
52
+ # def splituser(host):
53
+ # """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
54
+ # global _userprog
55
+ # if _userprog is None:
56
+ # import re
57
+ # _userprog = re.compile('^(.*)@(.*)$')
58
+
59
+ # match = _userprog.match(host)
60
+ # if match: return match.group(1, 2)
61
+ # return None, host
62
+
63
+ else: # pragma: no cover
64
+ from io import StringIO
65
+ string_types = str,
66
+ text_type = str
67
+ from io import TextIOWrapper as file_type
68
+ import builtins
69
+ import configparser
70
+ from urllib.parse import (urlparse, urlunparse, urljoin, quote, unquote,
71
+ urlsplit, urlunsplit, splittype)
72
+ from urllib.request import (urlopen, urlretrieve, Request, url2pathname,
73
+ pathname2url, HTTPBasicAuthHandler,
74
+ HTTPPasswordMgr, HTTPHandler,
75
+ HTTPRedirectHandler, build_opener)
76
+ if ssl:
77
+ from urllib.request import HTTPSHandler
78
+ from urllib.error import HTTPError, URLError, ContentTooShortError
79
+ import http.client as httplib
80
+ import urllib.request as urllib2
81
+ import xmlrpc.client as xmlrpclib
82
+ import queue
83
+ from html.parser import HTMLParser
84
+ import html.entities as htmlentitydefs
85
+ raw_input = input
86
+ from itertools import filterfalse
87
+ filter = filter
88
+
89
+ try:
90
+ from ssl import match_hostname, CertificateError
91
+ except ImportError: # pragma: no cover
92
+
93
+ class CertificateError(ValueError):
94
+ pass
95
+
96
+ def _dnsname_match(dn, hostname, max_wildcards=1):
97
+ """Matching according to RFC 6125, section 6.4.3
98
+
99
+ http://tools.ietf.org/html/rfc6125#section-6.4.3
100
+ """
101
+ pats = []
102
+ if not dn:
103
+ return False
104
+
105
+ parts = dn.split('.')
106
+ leftmost, remainder = parts[0], parts[1:]
107
+
108
+ wildcards = leftmost.count('*')
109
+ if wildcards > max_wildcards:
110
+ # Issue #17980: avoid denials of service by refusing more
111
+ # than one wildcard per fragment. A survey of established
112
+ # policy among SSL implementations showed it to be a
113
+ # reasonable choice.
114
+ raise CertificateError(
115
+ "too many wildcards in certificate DNS name: " + repr(dn))
116
+
117
+ # speed up common case w/o wildcards
118
+ if not wildcards:
119
+ return dn.lower() == hostname.lower()
120
+
121
+ # RFC 6125, section 6.4.3, subitem 1.
122
+ # The client SHOULD NOT attempt to match a presented identifier in which
123
+ # the wildcard character comprises a label other than the left-most label.
124
+ if leftmost == '*':
125
+ # When '*' is a fragment by itself, it matches a non-empty dotless
126
+ # fragment.
127
+ pats.append('[^.]+')
128
+ elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
129
+ # RFC 6125, section 6.4.3, subitem 3.
130
+ # The client SHOULD NOT attempt to match a presented identifier
131
+ # where the wildcard character is embedded within an A-label or
132
+ # U-label of an internationalized domain name.
133
+ pats.append(re.escape(leftmost))
134
+ else:
135
+ # Otherwise, '*' matches any dotless string, e.g. www*
136
+ pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
137
+
138
+ # add the remaining fragments, ignore any wildcards
139
+ for frag in remainder:
140
+ pats.append(re.escape(frag))
141
+
142
+ pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
143
+ return pat.match(hostname)
144
+
145
+ def match_hostname(cert, hostname):
146
+ """Verify that *cert* (in decoded format as returned by
147
+ SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
148
+ rules are followed, but IP addresses are not accepted for *hostname*.
149
+
150
+ CertificateError is raised on failure. On success, the function
151
+ returns nothing.
152
+ """
153
+ if not cert:
154
+ raise ValueError("empty or no certificate, match_hostname needs a "
155
+ "SSL socket or SSL context with either "
156
+ "CERT_OPTIONAL or CERT_REQUIRED")
157
+ dnsnames = []
158
+ san = cert.get('subjectAltName', ())
159
+ for key, value in san:
160
+ if key == 'DNS':
161
+ if _dnsname_match(value, hostname):
162
+ return
163
+ dnsnames.append(value)
164
+ if not dnsnames:
165
+ # The subject is only checked when there is no dNSName entry
166
+ # in subjectAltName
167
+ for sub in cert.get('subject', ()):
168
+ for key, value in sub:
169
+ # XXX according to RFC 2818, the most specific Common Name
170
+ # must be used.
171
+ if key == 'commonName':
172
+ if _dnsname_match(value, hostname):
173
+ return
174
+ dnsnames.append(value)
175
+ if len(dnsnames) > 1:
176
+ raise CertificateError("hostname %r "
177
+ "doesn't match either of %s" %
178
+ (hostname, ', '.join(map(repr, dnsnames))))
179
+ elif len(dnsnames) == 1:
180
+ raise CertificateError("hostname %r "
181
+ "doesn't match %r" %
182
+ (hostname, dnsnames[0]))
183
+ else:
184
+ raise CertificateError("no appropriate commonName or "
185
+ "subjectAltName fields were found")
186
+
187
+
188
+ try:
189
+ from types import SimpleNamespace as Container
190
+ except ImportError: # pragma: no cover
191
+
192
+ class Container(object):
193
+ """
194
+ A generic container for when multiple values need to be returned
195
+ """
196
+
197
+ def __init__(self, **kwargs):
198
+ self.__dict__.update(kwargs)
199
+
200
+
201
+ try:
202
+ from shutil import which
203
+ except ImportError: # pragma: no cover
204
+ # Implementation from Python 3.3
205
+ def which(cmd, mode=os.F_OK | os.X_OK, path=None):
206
+ """Given a command, mode, and a PATH string, return the path which
207
+ conforms to the given mode on the PATH, or None if there is no such
208
+ file.
209
+
210
+ `mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
211
+ of os.environ.get("PATH"), or can be overridden with a custom search
212
+ path.
213
+
214
+ """
215
+
216
+ # Check that a given file can be accessed with the correct mode.
217
+ # Additionally check that `file` is not a directory, as on Windows
218
+ # directories pass the os.access check.
219
+ def _access_check(fn, mode):
220
+ return (os.path.exists(fn) and os.access(fn, mode)
221
+ and not os.path.isdir(fn))
222
+
223
+ # If we're given a path with a directory part, look it up directly rather
224
+ # than referring to PATH directories. This includes checking relative to the
225
+ # current directory, e.g. ./script
226
+ if os.path.dirname(cmd):
227
+ if _access_check(cmd, mode):
228
+ return cmd
229
+ return None
230
+
231
+ if path is None:
232
+ path = os.environ.get("PATH", os.defpath)
233
+ if not path:
234
+ return None
235
+ path = path.split(os.pathsep)
236
+
237
+ if sys.platform == "win32":
238
+ # The current directory takes precedence on Windows.
239
+ if os.curdir not in path:
240
+ path.insert(0, os.curdir)
241
+
242
+ # PATHEXT is necessary to check on Windows.
243
+ pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
244
+ # See if the given file matches any of the expected path extensions.
245
+ # This will allow us to short circuit when given "python.exe".
246
+ # If it does match, only test that one, otherwise we have to try
247
+ # others.
248
+ if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
249
+ files = [cmd]
250
+ else:
251
+ files = [cmd + ext for ext in pathext]
252
+ else:
253
+ # On other platforms you don't have things like PATHEXT to tell you
254
+ # what file suffixes are executable, so just pass on cmd as-is.
255
+ files = [cmd]
256
+
257
+ seen = set()
258
+ for dir in path:
259
+ normdir = os.path.normcase(dir)
260
+ if normdir not in seen:
261
+ seen.add(normdir)
262
+ for thefile in files:
263
+ name = os.path.join(dir, thefile)
264
+ if _access_check(name, mode):
265
+ return name
266
+ return None
267
+
268
+
269
+ # ZipFile is a context manager in 2.7, but not in 2.6
270
+
271
+ from zipfile import ZipFile as BaseZipFile
272
+
273
+ if hasattr(BaseZipFile, '__enter__'): # pragma: no cover
274
+ ZipFile = BaseZipFile
275
+ else: # pragma: no cover
276
+ from zipfile import ZipExtFile as BaseZipExtFile
277
+
278
+ class ZipExtFile(BaseZipExtFile):
279
+
280
+ def __init__(self, base):
281
+ self.__dict__.update(base.__dict__)
282
+
283
+ def __enter__(self):
284
+ return self
285
+
286
+ def __exit__(self, *exc_info):
287
+ self.close()
288
+ # return None, so if an exception occurred, it will propagate
289
+
290
+ class ZipFile(BaseZipFile):
291
+
292
+ def __enter__(self):
293
+ return self
294
+
295
+ def __exit__(self, *exc_info):
296
+ self.close()
297
+ # return None, so if an exception occurred, it will propagate
298
+
299
+ def open(self, *args, **kwargs):
300
+ base = BaseZipFile.open(self, *args, **kwargs)
301
+ return ZipExtFile(base)
302
+
303
+
304
+ try:
305
+ from platform import python_implementation
306
+ except ImportError: # pragma: no cover
307
+
308
+ def python_implementation():
309
+ """Return a string identifying the Python implementation."""
310
+ if 'PyPy' in sys.version:
311
+ return 'PyPy'
312
+ if os.name == 'java':
313
+ return 'Jython'
314
+ if sys.version.startswith('IronPython'):
315
+ return 'IronPython'
316
+ return 'CPython'
317
+
318
+
319
+ import sysconfig
320
+
321
+ try:
322
+ callable = callable
323
+ except NameError: # pragma: no cover
324
+ from collections.abc import Callable
325
+
326
+ def callable(obj):
327
+ return isinstance(obj, Callable)
328
+
329
+
330
+ try:
331
+ fsencode = os.fsencode
332
+ fsdecode = os.fsdecode
333
+ except AttributeError: # pragma: no cover
334
+ # Issue #99: on some systems (e.g. containerised),
335
+ # sys.getfilesystemencoding() returns None, and we need a real value,
336
+ # so fall back to utf-8. From the CPython 2.7 docs relating to Unix and
337
+ # sys.getfilesystemencoding(): the return value is "the user’s preference
338
+ # according to the result of nl_langinfo(CODESET), or None if the
339
+ # nl_langinfo(CODESET) failed."
340
+ _fsencoding = sys.getfilesystemencoding() or 'utf-8'
341
+ if _fsencoding == 'mbcs':
342
+ _fserrors = 'strict'
343
+ else:
344
+ _fserrors = 'surrogateescape'
345
+
346
+ def fsencode(filename):
347
+ if isinstance(filename, bytes):
348
+ return filename
349
+ elif isinstance(filename, text_type):
350
+ return filename.encode(_fsencoding, _fserrors)
351
+ else:
352
+ raise TypeError("expect bytes or str, not %s" %
353
+ type(filename).__name__)
354
+
355
+ def fsdecode(filename):
356
+ if isinstance(filename, text_type):
357
+ return filename
358
+ elif isinstance(filename, bytes):
359
+ return filename.decode(_fsencoding, _fserrors)
360
+ else:
361
+ raise TypeError("expect bytes or str, not %s" %
362
+ type(filename).__name__)
363
+
364
+
365
+ try:
366
+ from tokenize import detect_encoding
367
+ except ImportError: # pragma: no cover
368
+ from codecs import BOM_UTF8, lookup
369
+
370
+ cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
371
+
372
+ def _get_normal_name(orig_enc):
373
+ """Imitates get_normal_name in tokenizer.c."""
374
+ # Only care about the first 12 characters.
375
+ enc = orig_enc[:12].lower().replace("_", "-")
376
+ if enc == "utf-8" or enc.startswith("utf-8-"):
377
+ return "utf-8"
378
+ if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
379
+ enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
380
+ return "iso-8859-1"
381
+ return orig_enc
382
+
383
+ def detect_encoding(readline):
384
+ """
385
+ The detect_encoding() function is used to detect the encoding that should
386
+ be used to decode a Python source file. It requires one argument, readline,
387
+ in the same way as the tokenize() generator.
388
+
389
+ It will call readline a maximum of twice, and return the encoding used
390
+ (as a string) and a list of any lines (left as bytes) it has read in.
391
+
392
+ It detects the encoding from the presence of a utf-8 bom or an encoding
393
+ cookie as specified in pep-0263. If both a bom and a cookie are present,
394
+ but disagree, a SyntaxError will be raised. If the encoding cookie is an
395
+ invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
396
+ 'utf-8-sig' is returned.
397
+
398
+ If no encoding is specified, then the default of 'utf-8' will be returned.
399
+ """
400
+ try:
401
+ filename = readline.__self__.name
402
+ except AttributeError:
403
+ filename = None
404
+ bom_found = False
405
+ encoding = None
406
+ default = 'utf-8'
407
+
408
+ def read_or_stop():
409
+ try:
410
+ return readline()
411
+ except StopIteration:
412
+ return b''
413
+
414
+ def find_cookie(line):
415
+ try:
416
+ # Decode as UTF-8. Either the line is an encoding declaration,
417
+ # in which case it should be pure ASCII, or it must be UTF-8
418
+ # per default encoding.
419
+ line_string = line.decode('utf-8')
420
+ except UnicodeDecodeError:
421
+ msg = "invalid or missing encoding declaration"
422
+ if filename is not None:
423
+ msg = '{} for {!r}'.format(msg, filename)
424
+ raise SyntaxError(msg)
425
+
426
+ matches = cookie_re.findall(line_string)
427
+ if not matches:
428
+ return None
429
+ encoding = _get_normal_name(matches[0])
430
+ try:
431
+ codec = lookup(encoding)
432
+ except LookupError:
433
+ # This behaviour mimics the Python interpreter
434
+ if filename is None:
435
+ msg = "unknown encoding: " + encoding
436
+ else:
437
+ msg = "unknown encoding for {!r}: {}".format(
438
+ filename, encoding)
439
+ raise SyntaxError(msg)
440
+
441
+ if bom_found:
442
+ if codec.name != 'utf-8':
443
+ # This behaviour mimics the Python interpreter
444
+ if filename is None:
445
+ msg = 'encoding problem: utf-8'
446
+ else:
447
+ msg = 'encoding problem for {!r}: utf-8'.format(
448
+ filename)
449
+ raise SyntaxError(msg)
450
+ encoding += '-sig'
451
+ return encoding
452
+
453
+ first = read_or_stop()
454
+ if first.startswith(BOM_UTF8):
455
+ bom_found = True
456
+ first = first[3:]
457
+ default = 'utf-8-sig'
458
+ if not first:
459
+ return default, []
460
+
461
+ encoding = find_cookie(first)
462
+ if encoding:
463
+ return encoding, [first]
464
+
465
+ second = read_or_stop()
466
+ if not second:
467
+ return default, [first]
468
+
469
+ encoding = find_cookie(second)
470
+ if encoding:
471
+ return encoding, [first, second]
472
+
473
+ return default, [first, second]
474
+
475
+
476
+ # For converting & <-> &amp; etc.
477
+ try:
478
+ from html import escape
479
+ except ImportError:
480
+ from cgi import escape
481
+ if sys.version_info[:2] < (3, 4):
482
+ unescape = HTMLParser().unescape
483
+ else:
484
+ from html import unescape
485
+
486
+ try:
487
+ from collections import ChainMap
488
+ except ImportError: # pragma: no cover
489
+ from collections import MutableMapping
490
+
491
+ try:
492
+ from reprlib import recursive_repr as _recursive_repr
493
+ except ImportError:
494
+
495
+ def _recursive_repr(fillvalue='...'):
496
+ '''
497
+ Decorator to make a repr function return fillvalue for a recursive
498
+ call
499
+ '''
500
+
501
+ def decorating_function(user_function):
502
+ repr_running = set()
503
+
504
+ def wrapper(self):
505
+ key = id(self), get_ident()
506
+ if key in repr_running:
507
+ return fillvalue
508
+ repr_running.add(key)
509
+ try:
510
+ result = user_function(self)
511
+ finally:
512
+ repr_running.discard(key)
513
+ return result
514
+
515
+ # Can't use functools.wraps() here because of bootstrap issues
516
+ wrapper.__module__ = getattr(user_function, '__module__')
517
+ wrapper.__doc__ = getattr(user_function, '__doc__')
518
+ wrapper.__name__ = getattr(user_function, '__name__')
519
+ wrapper.__annotations__ = getattr(user_function,
520
+ '__annotations__', {})
521
+ return wrapper
522
+
523
+ return decorating_function
524
+
525
+ class ChainMap(MutableMapping):
526
+ '''
527
+ A ChainMap groups multiple dicts (or other mappings) together
528
+ to create a single, updateable view.
529
+
530
+ The underlying mappings are stored in a list. That list is public and can
531
+ accessed or updated using the *maps* attribute. There is no other state.
532
+
533
+ Lookups search the underlying mappings successively until a key is found.
534
+ In contrast, writes, updates, and deletions only operate on the first
535
+ mapping.
536
+ '''
537
+
538
+ def __init__(self, *maps):
539
+ '''Initialize a ChainMap by setting *maps* to the given mappings.
540
+ If no mappings are provided, a single empty dictionary is used.
541
+
542
+ '''
543
+ self.maps = list(maps) or [{}] # always at least one map
544
+
545
+ def __missing__(self, key):
546
+ raise KeyError(key)
547
+
548
+ def __getitem__(self, key):
549
+ for mapping in self.maps:
550
+ try:
551
+ return mapping[
552
+ key] # can't use 'key in mapping' with defaultdict
553
+ except KeyError:
554
+ pass
555
+ return self.__missing__(
556
+ key) # support subclasses that define __missing__
557
+
558
+ def get(self, key, default=None):
559
+ return self[key] if key in self else default
560
+
561
+ def __len__(self):
562
+ return len(set().union(
563
+ *self.maps)) # reuses stored hash values if possible
564
+
565
+ def __iter__(self):
566
+ return iter(set().union(*self.maps))
567
+
568
+ def __contains__(self, key):
569
+ return any(key in m for m in self.maps)
570
+
571
+ def __bool__(self):
572
+ return any(self.maps)
573
+
574
+ @_recursive_repr()
575
+ def __repr__(self):
576
+ return '{0.__class__.__name__}({1})'.format(
577
+ self, ', '.join(map(repr, self.maps)))
578
+
579
+ @classmethod
580
+ def fromkeys(cls, iterable, *args):
581
+ 'Create a ChainMap with a single dict created from the iterable.'
582
+ return cls(dict.fromkeys(iterable, *args))
583
+
584
+ def copy(self):
585
+ 'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
586
+ return self.__class__(self.maps[0].copy(), *self.maps[1:])
587
+
588
+ __copy__ = copy
589
+
590
+ def new_child(self): # like Django's Context.push()
591
+ 'New ChainMap with a new dict followed by all previous maps.'
592
+ return self.__class__({}, *self.maps)
593
+
594
+ @property
595
+ def parents(self): # like Django's Context.pop()
596
+ 'New ChainMap from maps[1:].'
597
+ return self.__class__(*self.maps[1:])
598
+
599
+ def __setitem__(self, key, value):
600
+ self.maps[0][key] = value
601
+
602
+ def __delitem__(self, key):
603
+ try:
604
+ del self.maps[0][key]
605
+ except KeyError:
606
+ raise KeyError(
607
+ 'Key not found in the first mapping: {!r}'.format(key))
608
+
609
+ def popitem(self):
610
+ 'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
611
+ try:
612
+ return self.maps[0].popitem()
613
+ except KeyError:
614
+ raise KeyError('No keys found in the first mapping.')
615
+
616
+ def pop(self, key, *args):
617
+ 'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
618
+ try:
619
+ return self.maps[0].pop(key, *args)
620
+ except KeyError:
621
+ raise KeyError(
622
+ 'Key not found in the first mapping: {!r}'.format(key))
623
+
624
+ def clear(self):
625
+ 'Clear maps[0], leaving maps[1:] intact.'
626
+ self.maps[0].clear()
627
+
628
+
629
+ try:
630
+ from importlib.util import cache_from_source # Python >= 3.4
631
+ except ImportError: # pragma: no cover
632
+
633
+ def cache_from_source(path, debug_override=None):
634
+ assert path.endswith('.py')
635
+ if debug_override is None:
636
+ debug_override = __debug__
637
+ if debug_override:
638
+ suffix = 'c'
639
+ else:
640
+ suffix = 'o'
641
+ return path + suffix
642
+
643
+
644
+ try:
645
+ from collections import OrderedDict
646
+ except ImportError: # pragma: no cover
647
+ # {{{ http://code.activestate.com/recipes/576693/ (r9)
648
+ # Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
649
+ # Passes Python2.7's test suite and incorporates all the latest updates.
650
+ try:
651
+ from thread import get_ident as _get_ident
652
+ except ImportError:
653
+ from dummy_thread import get_ident as _get_ident
654
+
655
+ try:
656
+ from _abcoll import KeysView, ValuesView, ItemsView
657
+ except ImportError:
658
+ pass
659
+
660
+ class OrderedDict(dict):
661
+ 'Dictionary that remembers insertion order'
662
+
663
+ # An inherited dict maps keys to values.
664
+ # The inherited dict provides __getitem__, __len__, __contains__, and get.
665
+ # The remaining methods are order-aware.
666
+ # Big-O running times for all methods are the same as for regular dictionaries.
667
+
668
+ # The internal self.__map dictionary maps keys to links in a doubly linked list.
669
+ # The circular doubly linked list starts and ends with a sentinel element.
670
+ # The sentinel element never gets deleted (this simplifies the algorithm).
671
+ # Each link is stored as a list of length three: [PREV, NEXT, KEY].
672
+
673
+ def __init__(self, *args, **kwds):
674
+ '''Initialize an ordered dictionary. Signature is the same as for
675
+ regular dictionaries, but keyword arguments are not recommended
676
+ because their insertion order is arbitrary.
677
+
678
+ '''
679
+ if len(args) > 1:
680
+ raise TypeError('expected at most 1 arguments, got %d' %
681
+ len(args))
682
+ try:
683
+ self.__root
684
+ except AttributeError:
685
+ self.__root = root = [] # sentinel node
686
+ root[:] = [root, root, None]
687
+ self.__map = {}
688
+ self.__update(*args, **kwds)
689
+
690
+ def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
691
+ 'od.__setitem__(i, y) <==> od[i]=y'
692
+ # Setting a new item creates a new link which goes at the end of the linked
693
+ # list, and the inherited dictionary is updated with the new key/value pair.
694
+ if key not in self:
695
+ root = self.__root
696
+ last = root[0]
697
+ last[1] = root[0] = self.__map[key] = [last, root, key]
698
+ dict_setitem(self, key, value)
699
+
700
+ def __delitem__(self, key, dict_delitem=dict.__delitem__):
701
+ 'od.__delitem__(y) <==> del od[y]'
702
+ # Deleting an existing item uses self.__map to find the link which is
703
+ # then removed by updating the links in the predecessor and successor nodes.
704
+ dict_delitem(self, key)
705
+ link_prev, link_next, key = self.__map.pop(key)
706
+ link_prev[1] = link_next
707
+ link_next[0] = link_prev
708
+
709
+ def __iter__(self):
710
+ 'od.__iter__() <==> iter(od)'
711
+ root = self.__root
712
+ curr = root[1]
713
+ while curr is not root:
714
+ yield curr[2]
715
+ curr = curr[1]
716
+
717
+ def __reversed__(self):
718
+ 'od.__reversed__() <==> reversed(od)'
719
+ root = self.__root
720
+ curr = root[0]
721
+ while curr is not root:
722
+ yield curr[2]
723
+ curr = curr[0]
724
+
725
+ def clear(self):
726
+ 'od.clear() -> None. Remove all items from od.'
727
+ try:
728
+ for node in self.__map.itervalues():
729
+ del node[:]
730
+ root = self.__root
731
+ root[:] = [root, root, None]
732
+ self.__map.clear()
733
+ except AttributeError:
734
+ pass
735
+ dict.clear(self)
736
+
737
+ def popitem(self, last=True):
738
+ '''od.popitem() -> (k, v), return and remove a (key, value) pair.
739
+ Pairs are returned in LIFO order if last is true or FIFO order if false.
740
+
741
+ '''
742
+ if not self:
743
+ raise KeyError('dictionary is empty')
744
+ root = self.__root
745
+ if last:
746
+ link = root[0]
747
+ link_prev = link[0]
748
+ link_prev[1] = root
749
+ root[0] = link_prev
750
+ else:
751
+ link = root[1]
752
+ link_next = link[1]
753
+ root[1] = link_next
754
+ link_next[0] = root
755
+ key = link[2]
756
+ del self.__map[key]
757
+ value = dict.pop(self, key)
758
+ return key, value
759
+
760
+ # -- the following methods do not depend on the internal structure --
761
+
762
+ def keys(self):
763
+ 'od.keys() -> list of keys in od'
764
+ return list(self)
765
+
766
+ def values(self):
767
+ 'od.values() -> list of values in od'
768
+ return [self[key] for key in self]
769
+
770
+ def items(self):
771
+ 'od.items() -> list of (key, value) pairs in od'
772
+ return [(key, self[key]) for key in self]
773
+
774
+ def iterkeys(self):
775
+ 'od.iterkeys() -> an iterator over the keys in od'
776
+ return iter(self)
777
+
778
+ def itervalues(self):
779
+ 'od.itervalues -> an iterator over the values in od'
780
+ for k in self:
781
+ yield self[k]
782
+
783
+ def iteritems(self):
784
+ 'od.iteritems -> an iterator over the (key, value) items in od'
785
+ for k in self:
786
+ yield (k, self[k])
787
+
788
+ def update(*args, **kwds):
789
+ '''od.update(E, **F) -> None. Update od from dict/iterable E and F.
790
+
791
+ If E is a dict instance, does: for k in E: od[k] = E[k]
792
+ If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
793
+ Or if E is an iterable of items, does: for k, v in E: od[k] = v
794
+ In either case, this is followed by: for k, v in F.items(): od[k] = v
795
+
796
+ '''
797
+ if len(args) > 2:
798
+ raise TypeError('update() takes at most 2 positional '
799
+ 'arguments (%d given)' % (len(args), ))
800
+ elif not args:
801
+ raise TypeError('update() takes at least 1 argument (0 given)')
802
+ self = args[0]
803
+ # Make progressively weaker assumptions about "other"
804
+ other = ()
805
+ if len(args) == 2:
806
+ other = args[1]
807
+ if isinstance(other, dict):
808
+ for key in other:
809
+ self[key] = other[key]
810
+ elif hasattr(other, 'keys'):
811
+ for key in other.keys():
812
+ self[key] = other[key]
813
+ else:
814
+ for key, value in other:
815
+ self[key] = value
816
+ for key, value in kwds.items():
817
+ self[key] = value
818
+
819
+ __update = update # let subclasses override update without breaking __init__
820
+
821
+ __marker = object()
822
+
823
+ def pop(self, key, default=__marker):
824
+ '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
825
+ If key is not found, d is returned if given, otherwise KeyError is raised.
826
+
827
+ '''
828
+ if key in self:
829
+ result = self[key]
830
+ del self[key]
831
+ return result
832
+ if default is self.__marker:
833
+ raise KeyError(key)
834
+ return default
835
+
836
+ def setdefault(self, key, default=None):
837
+ 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
838
+ if key in self:
839
+ return self[key]
840
+ self[key] = default
841
+ return default
842
+
843
+ def __repr__(self, _repr_running=None):
844
+ 'od.__repr__() <==> repr(od)'
845
+ if not _repr_running:
846
+ _repr_running = {}
847
+ call_key = id(self), _get_ident()
848
+ if call_key in _repr_running:
849
+ return '...'
850
+ _repr_running[call_key] = 1
851
+ try:
852
+ if not self:
853
+ return '%s()' % (self.__class__.__name__, )
854
+ return '%s(%r)' % (self.__class__.__name__, self.items())
855
+ finally:
856
+ del _repr_running[call_key]
857
+
858
+ def __reduce__(self):
859
+ 'Return state information for pickling'
860
+ items = [[k, self[k]] for k in self]
861
+ inst_dict = vars(self).copy()
862
+ for k in vars(OrderedDict()):
863
+ inst_dict.pop(k, None)
864
+ if inst_dict:
865
+ return (self.__class__, (items, ), inst_dict)
866
+ return self.__class__, (items, )
867
+
868
+ def copy(self):
869
+ 'od.copy() -> a shallow copy of od'
870
+ return self.__class__(self)
871
+
872
+ @classmethod
873
+ def fromkeys(cls, iterable, value=None):
874
+ '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
875
+ and values equal to v (which defaults to None).
876
+
877
+ '''
878
+ d = cls()
879
+ for key in iterable:
880
+ d[key] = value
881
+ return d
882
+
883
+ def __eq__(self, other):
884
+ '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
885
+ while comparison to a regular mapping is order-insensitive.
886
+
887
+ '''
888
+ if isinstance(other, OrderedDict):
889
+ return len(self) == len(
890
+ other) and self.items() == other.items()
891
+ return dict.__eq__(self, other)
892
+
893
+ def __ne__(self, other):
894
+ return not self == other
895
+
896
+ # -- the following methods are only used in Python 2.7 --
897
+
898
+ def viewkeys(self):
899
+ "od.viewkeys() -> a set-like object providing a view on od's keys"
900
+ return KeysView(self)
901
+
902
+ def viewvalues(self):
903
+ "od.viewvalues() -> an object providing a view on od's values"
904
+ return ValuesView(self)
905
+
906
+ def viewitems(self):
907
+ "od.viewitems() -> a set-like object providing a view on od's items"
908
+ return ItemsView(self)
909
+
910
+
911
+ try:
912
+ from logging.config import BaseConfigurator, valid_ident
913
+ except ImportError: # pragma: no cover
914
+ IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I)
915
+
916
+ def valid_ident(s):
917
+ m = IDENTIFIER.match(s)
918
+ if not m:
919
+ raise ValueError('Not a valid Python identifier: %r' % s)
920
+ return True
921
+
922
+ # The ConvertingXXX classes are wrappers around standard Python containers,
923
+ # and they serve to convert any suitable values in the container. The
924
+ # conversion converts base dicts, lists and tuples to their wrapped
925
+ # equivalents, whereas strings which match a conversion format are converted
926
+ # appropriately.
927
+ #
928
+ # Each wrapper should have a configurator attribute holding the actual
929
+ # configurator to use for conversion.
930
+
931
+ class ConvertingDict(dict):
932
+ """A converting dictionary wrapper."""
933
+
934
+ def __getitem__(self, key):
935
+ value = dict.__getitem__(self, key)
936
+ result = self.configurator.convert(value)
937
+ # If the converted value is different, save for next time
938
+ if value is not result:
939
+ self[key] = result
940
+ if type(result) in (ConvertingDict, ConvertingList,
941
+ ConvertingTuple):
942
+ result.parent = self
943
+ result.key = key
944
+ return result
945
+
946
+ def get(self, key, default=None):
947
+ value = dict.get(self, key, default)
948
+ result = self.configurator.convert(value)
949
+ # If the converted value is different, save for next time
950
+ if value is not result:
951
+ self[key] = result
952
+ if type(result) in (ConvertingDict, ConvertingList,
953
+ ConvertingTuple):
954
+ result.parent = self
955
+ result.key = key
956
+ return result
957
+
958
+ def pop(self, key, default=None):
959
+ value = dict.pop(self, key, default)
960
+ result = self.configurator.convert(value)
961
+ if value is not result:
962
+ if type(result) in (ConvertingDict, ConvertingList,
963
+ ConvertingTuple):
964
+ result.parent = self
965
+ result.key = key
966
+ return result
967
+
968
+ class ConvertingList(list):
969
+ """A converting list wrapper."""
970
+
971
+ def __getitem__(self, key):
972
+ value = list.__getitem__(self, key)
973
+ result = self.configurator.convert(value)
974
+ # If the converted value is different, save for next time
975
+ if value is not result:
976
+ self[key] = result
977
+ if type(result) in (ConvertingDict, ConvertingList,
978
+ ConvertingTuple):
979
+ result.parent = self
980
+ result.key = key
981
+ return result
982
+
983
+ def pop(self, idx=-1):
984
+ value = list.pop(self, idx)
985
+ result = self.configurator.convert(value)
986
+ if value is not result:
987
+ if type(result) in (ConvertingDict, ConvertingList,
988
+ ConvertingTuple):
989
+ result.parent = self
990
+ return result
991
+
992
+ class ConvertingTuple(tuple):
993
+ """A converting tuple wrapper."""
994
+
995
+ def __getitem__(self, key):
996
+ value = tuple.__getitem__(self, key)
997
+ result = self.configurator.convert(value)
998
+ if value is not result:
999
+ if type(result) in (ConvertingDict, ConvertingList,
1000
+ ConvertingTuple):
1001
+ result.parent = self
1002
+ result.key = key
1003
+ return result
1004
+
1005
+ class BaseConfigurator(object):
1006
+ """
1007
+ The configurator base class which defines some useful defaults.
1008
+ """
1009
+
1010
+ CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$')
1011
+
1012
+ WORD_PATTERN = re.compile(r'^\s*(\w+)\s*')
1013
+ DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*')
1014
+ INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*')
1015
+ DIGIT_PATTERN = re.compile(r'^\d+$')
1016
+
1017
+ value_converters = {
1018
+ 'ext': 'ext_convert',
1019
+ 'cfg': 'cfg_convert',
1020
+ }
1021
+
1022
+ # We might want to use a different one, e.g. importlib
1023
+ importer = staticmethod(__import__)
1024
+
1025
+ def __init__(self, config):
1026
+ self.config = ConvertingDict(config)
1027
+ self.config.configurator = self
1028
+
1029
+ def resolve(self, s):
1030
+ """
1031
+ Resolve strings to objects using standard import and attribute
1032
+ syntax.
1033
+ """
1034
+ name = s.split('.')
1035
+ used = name.pop(0)
1036
+ try:
1037
+ found = self.importer(used)
1038
+ for frag in name:
1039
+ used += '.' + frag
1040
+ try:
1041
+ found = getattr(found, frag)
1042
+ except AttributeError:
1043
+ self.importer(used)
1044
+ found = getattr(found, frag)
1045
+ return found
1046
+ except ImportError:
1047
+ e, tb = sys.exc_info()[1:]
1048
+ v = ValueError('Cannot resolve %r: %s' % (s, e))
1049
+ v.__cause__, v.__traceback__ = e, tb
1050
+ raise v
1051
+
1052
+ def ext_convert(self, value):
1053
+ """Default converter for the ext:// protocol."""
1054
+ return self.resolve(value)
1055
+
1056
+ def cfg_convert(self, value):
1057
+ """Default converter for the cfg:// protocol."""
1058
+ rest = value
1059
+ m = self.WORD_PATTERN.match(rest)
1060
+ if m is None:
1061
+ raise ValueError("Unable to convert %r" % value)
1062
+ else:
1063
+ rest = rest[m.end():]
1064
+ d = self.config[m.groups()[0]]
1065
+ while rest:
1066
+ m = self.DOT_PATTERN.match(rest)
1067
+ if m:
1068
+ d = d[m.groups()[0]]
1069
+ else:
1070
+ m = self.INDEX_PATTERN.match(rest)
1071
+ if m:
1072
+ idx = m.groups()[0]
1073
+ if not self.DIGIT_PATTERN.match(idx):
1074
+ d = d[idx]
1075
+ else:
1076
+ try:
1077
+ n = int(
1078
+ idx
1079
+ ) # try as number first (most likely)
1080
+ d = d[n]
1081
+ except TypeError:
1082
+ d = d[idx]
1083
+ if m:
1084
+ rest = rest[m.end():]
1085
+ else:
1086
+ raise ValueError('Unable to convert '
1087
+ '%r at %r' % (value, rest))
1088
+ # rest should be empty
1089
+ return d
1090
+
1091
+ def convert(self, value):
1092
+ """
1093
+ Convert values to an appropriate type. dicts, lists and tuples are
1094
+ replaced by their converting alternatives. Strings are checked to
1095
+ see if they have a conversion format and are converted if they do.
1096
+ """
1097
+ if not isinstance(value, ConvertingDict) and isinstance(
1098
+ value, dict):
1099
+ value = ConvertingDict(value)
1100
+ value.configurator = self
1101
+ elif not isinstance(value, ConvertingList) and isinstance(
1102
+ value, list):
1103
+ value = ConvertingList(value)
1104
+ value.configurator = self
1105
+ elif not isinstance(value, ConvertingTuple) and isinstance(value, tuple):
1106
+ value = ConvertingTuple(value)
1107
+ value.configurator = self
1108
+ elif isinstance(value, string_types):
1109
+ m = self.CONVERT_PATTERN.match(value)
1110
+ if m:
1111
+ d = m.groupdict()
1112
+ prefix = d['prefix']
1113
+ converter = self.value_converters.get(prefix, None)
1114
+ if converter:
1115
+ suffix = d['suffix']
1116
+ converter = getattr(self, converter)
1117
+ value = converter(suffix)
1118
+ return value
1119
+
1120
+ def configure_custom(self, config):
1121
+ """Configure an object with a user-supplied factory."""
1122
+ c = config.pop('()')
1123
+ if not callable(c):
1124
+ c = self.resolve(c)
1125
+ props = config.pop('.', None)
1126
+ # Check for valid identifiers
1127
+ kwargs = dict([(k, config[k]) for k in config if valid_ident(k)])
1128
+ result = c(**kwargs)
1129
+ if props:
1130
+ for name, value in props.items():
1131
+ setattr(result, name, value)
1132
+ return result
1133
+
1134
+ def as_tuple(self, value):
1135
+ """Utility function which converts lists to tuples."""
1136
+ if isinstance(value, list):
1137
+ value = tuple(value)
1138
+ return value
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/database.py ADDED
@@ -0,0 +1,1359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 The Python Software Foundation.
4
+ # See LICENSE.txt and CONTRIBUTORS.txt.
5
+ #
6
+ """PEP 376 implementation."""
7
+
8
+ from __future__ import unicode_literals
9
+
10
+ import base64
11
+ import codecs
12
+ import contextlib
13
+ import hashlib
14
+ import logging
15
+ import os
16
+ import posixpath
17
+ import sys
18
+ import zipimport
19
+
20
+ from . import DistlibException, resources
21
+ from .compat import StringIO
22
+ from .version import get_scheme, UnsupportedVersionError
23
+ from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME,
24
+ LEGACY_METADATA_FILENAME)
25
+ from .util import (parse_requirement, cached_property, parse_name_and_version,
26
+ read_exports, write_exports, CSVReader, CSVWriter)
27
+
28
+ __all__ = [
29
+ 'Distribution', 'BaseInstalledDistribution', 'InstalledDistribution',
30
+ 'EggInfoDistribution', 'DistributionPath'
31
+ ]
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ EXPORTS_FILENAME = 'pydist-exports.json'
36
+ COMMANDS_FILENAME = 'pydist-commands.json'
37
+
38
+ DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED',
39
+ 'RESOURCES', EXPORTS_FILENAME, 'SHARED')
40
+
41
+ DISTINFO_EXT = '.dist-info'
42
+
43
+
44
+ class _Cache(object):
45
+ """
46
+ A simple cache mapping names and .dist-info paths to distributions
47
+ """
48
+
49
+ def __init__(self):
50
+ """
51
+ Initialise an instance. There is normally one for each DistributionPath.
52
+ """
53
+ self.name = {}
54
+ self.path = {}
55
+ self.generated = False
56
+
57
+ def clear(self):
58
+ """
59
+ Clear the cache, setting it to its initial state.
60
+ """
61
+ self.name.clear()
62
+ self.path.clear()
63
+ self.generated = False
64
+
65
+ def add(self, dist):
66
+ """
67
+ Add a distribution to the cache.
68
+ :param dist: The distribution to add.
69
+ """
70
+ if dist.path not in self.path:
71
+ self.path[dist.path] = dist
72
+ self.name.setdefault(dist.key, []).append(dist)
73
+
74
+
75
+ class DistributionPath(object):
76
+ """
77
+ Represents a set of distributions installed on a path (typically sys.path).
78
+ """
79
+
80
+ def __init__(self, path=None, include_egg=False):
81
+ """
82
+ Create an instance from a path, optionally including legacy (distutils/
83
+ setuptools/distribute) distributions.
84
+ :param path: The path to use, as a list of directories. If not specified,
85
+ sys.path is used.
86
+ :param include_egg: If True, this instance will look for and return legacy
87
+ distributions as well as those based on PEP 376.
88
+ """
89
+ if path is None:
90
+ path = sys.path
91
+ self.path = path
92
+ self._include_dist = True
93
+ self._include_egg = include_egg
94
+
95
+ self._cache = _Cache()
96
+ self._cache_egg = _Cache()
97
+ self._cache_enabled = True
98
+ self._scheme = get_scheme('default')
99
+
100
+ def _get_cache_enabled(self):
101
+ return self._cache_enabled
102
+
103
+ def _set_cache_enabled(self, value):
104
+ self._cache_enabled = value
105
+
106
+ cache_enabled = property(_get_cache_enabled, _set_cache_enabled)
107
+
108
+ def clear_cache(self):
109
+ """
110
+ Clears the internal cache.
111
+ """
112
+ self._cache.clear()
113
+ self._cache_egg.clear()
114
+
115
+ def _yield_distributions(self):
116
+ """
117
+ Yield .dist-info and/or .egg(-info) distributions.
118
+ """
119
+ # We need to check if we've seen some resources already, because on
120
+ # some Linux systems (e.g. some Debian/Ubuntu variants) there are
121
+ # symlinks which alias other files in the environment.
122
+ seen = set()
123
+ for path in self.path:
124
+ finder = resources.finder_for_path(path)
125
+ if finder is None:
126
+ continue
127
+ r = finder.find('')
128
+ if not r or not r.is_container:
129
+ continue
130
+ rset = sorted(r.resources)
131
+ for entry in rset:
132
+ r = finder.find(entry)
133
+ if not r or r.path in seen:
134
+ continue
135
+ try:
136
+ if self._include_dist and entry.endswith(DISTINFO_EXT):
137
+ possible_filenames = [
138
+ METADATA_FILENAME, WHEEL_METADATA_FILENAME,
139
+ LEGACY_METADATA_FILENAME
140
+ ]
141
+ for metadata_filename in possible_filenames:
142
+ metadata_path = posixpath.join(
143
+ entry, metadata_filename)
144
+ pydist = finder.find(metadata_path)
145
+ if pydist:
146
+ break
147
+ else:
148
+ continue
149
+
150
+ with contextlib.closing(pydist.as_stream()) as stream:
151
+ metadata = Metadata(fileobj=stream,
152
+ scheme='legacy')
153
+ logger.debug('Found %s', r.path)
154
+ seen.add(r.path)
155
+ yield new_dist_class(r.path,
156
+ metadata=metadata,
157
+ env=self)
158
+ elif self._include_egg and entry.endswith(
159
+ ('.egg-info', '.egg')):
160
+ logger.debug('Found %s', r.path)
161
+ seen.add(r.path)
162
+ yield old_dist_class(r.path, self)
163
+ except Exception as e:
164
+ msg = 'Unable to read distribution at %s, perhaps due to bad metadata: %s'
165
+ logger.warning(msg, r.path, e)
166
+ import warnings
167
+ warnings.warn(msg % (r.path, e), stacklevel=2)
168
+
169
+ def _generate_cache(self):
170
+ """
171
+ Scan the path for distributions and populate the cache with
172
+ those that are found.
173
+ """
174
+ gen_dist = not self._cache.generated
175
+ gen_egg = self._include_egg and not self._cache_egg.generated
176
+ if gen_dist or gen_egg:
177
+ for dist in self._yield_distributions():
178
+ if isinstance(dist, InstalledDistribution):
179
+ self._cache.add(dist)
180
+ else:
181
+ self._cache_egg.add(dist)
182
+
183
+ if gen_dist:
184
+ self._cache.generated = True
185
+ if gen_egg:
186
+ self._cache_egg.generated = True
187
+
188
+ @classmethod
189
+ def distinfo_dirname(cls, name, version):
190
+ """
191
+ The *name* and *version* parameters are converted into their
192
+ filename-escaped form, i.e. any ``'-'`` characters are replaced
193
+ with ``'_'`` other than the one in ``'dist-info'`` and the one
194
+ separating the name from the version number.
195
+
196
+ :parameter name: is converted to a standard distribution name by replacing
197
+ any runs of non- alphanumeric characters with a single
198
+ ``'-'``.
199
+ :type name: string
200
+ :parameter version: is converted to a standard version string. Spaces
201
+ become dots, and all other non-alphanumeric characters
202
+ (except dots) become dashes, with runs of multiple
203
+ dashes condensed to a single dash.
204
+ :type version: string
205
+ :returns: directory name
206
+ :rtype: string"""
207
+ name = name.replace('-', '_')
208
+ return '-'.join([name, version]) + DISTINFO_EXT
209
+
210
+ def get_distributions(self):
211
+ """
212
+ Provides an iterator that looks for distributions and returns
213
+ :class:`InstalledDistribution` or
214
+ :class:`EggInfoDistribution` instances for each one of them.
215
+
216
+ :rtype: iterator of :class:`InstalledDistribution` and
217
+ :class:`EggInfoDistribution` instances
218
+ """
219
+ if not self._cache_enabled:
220
+ for dist in self._yield_distributions():
221
+ yield dist
222
+ else:
223
+ self._generate_cache()
224
+
225
+ for dist in self._cache.path.values():
226
+ yield dist
227
+
228
+ if self._include_egg:
229
+ for dist in self._cache_egg.path.values():
230
+ yield dist
231
+
232
+ def get_distribution(self, name):
233
+ """
234
+ Looks for a named distribution on the path.
235
+
236
+ This function only returns the first result found, as no more than one
237
+ value is expected. If nothing is found, ``None`` is returned.
238
+
239
+ :rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution`
240
+ or ``None``
241
+ """
242
+ result = None
243
+ name = name.lower()
244
+ if not self._cache_enabled:
245
+ for dist in self._yield_distributions():
246
+ if dist.key == name:
247
+ result = dist
248
+ break
249
+ else:
250
+ self._generate_cache()
251
+
252
+ if name in self._cache.name:
253
+ result = self._cache.name[name][0]
254
+ elif self._include_egg and name in self._cache_egg.name:
255
+ result = self._cache_egg.name[name][0]
256
+ return result
257
+
258
+ def provides_distribution(self, name, version=None):
259
+ """
260
+ Iterates over all distributions to find which distributions provide *name*.
261
+ If a *version* is provided, it will be used to filter the results.
262
+
263
+ This function only returns the first result found, since no more than
264
+ one values are expected. If the directory is not found, returns ``None``.
265
+
266
+ :parameter version: a version specifier that indicates the version
267
+ required, conforming to the format in ``PEP-345``
268
+
269
+ :type name: string
270
+ :type version: string
271
+ """
272
+ matcher = None
273
+ if version is not None:
274
+ try:
275
+ matcher = self._scheme.matcher('%s (%s)' % (name, version))
276
+ except ValueError:
277
+ raise DistlibException('invalid name or version: %r, %r' %
278
+ (name, version))
279
+
280
+ for dist in self.get_distributions():
281
+ # We hit a problem on Travis where enum34 was installed and doesn't
282
+ # have a provides attribute ...
283
+ if not hasattr(dist, 'provides'):
284
+ logger.debug('No "provides": %s', dist)
285
+ else:
286
+ provided = dist.provides
287
+
288
+ for p in provided:
289
+ p_name, p_ver = parse_name_and_version(p)
290
+ if matcher is None:
291
+ if p_name == name:
292
+ yield dist
293
+ break
294
+ else:
295
+ if p_name == name and matcher.match(p_ver):
296
+ yield dist
297
+ break
298
+
299
+ def get_file_path(self, name, relative_path):
300
+ """
301
+ Return the path to a resource file.
302
+ """
303
+ dist = self.get_distribution(name)
304
+ if dist is None:
305
+ raise LookupError('no distribution named %r found' % name)
306
+ return dist.get_resource_path(relative_path)
307
+
308
+ def get_exported_entries(self, category, name=None):
309
+ """
310
+ Return all of the exported entries in a particular category.
311
+
312
+ :param category: The category to search for entries.
313
+ :param name: If specified, only entries with that name are returned.
314
+ """
315
+ for dist in self.get_distributions():
316
+ r = dist.exports
317
+ if category in r:
318
+ d = r[category]
319
+ if name is not None:
320
+ if name in d:
321
+ yield d[name]
322
+ else:
323
+ for v in d.values():
324
+ yield v
325
+
326
+
327
+ class Distribution(object):
328
+ """
329
+ A base class for distributions, whether installed or from indexes.
330
+ Either way, it must have some metadata, so that's all that's needed
331
+ for construction.
332
+ """
333
+
334
+ build_time_dependency = False
335
+ """
336
+ Set to True if it's known to be only a build-time dependency (i.e.
337
+ not needed after installation).
338
+ """
339
+
340
+ requested = False
341
+ """A boolean that indicates whether the ``REQUESTED`` metadata file is
342
+ present (in other words, whether the package was installed by user
343
+ request or it was installed as a dependency)."""
344
+
345
+ def __init__(self, metadata):
346
+ """
347
+ Initialise an instance.
348
+ :param metadata: The instance of :class:`Metadata` describing this
349
+ distribution.
350
+ """
351
+ self.metadata = metadata
352
+ self.name = metadata.name
353
+ self.key = self.name.lower() # for case-insensitive comparisons
354
+ self.version = metadata.version
355
+ self.locator = None
356
+ self.digest = None
357
+ self.extras = None # additional features requested
358
+ self.context = None # environment marker overrides
359
+ self.download_urls = set()
360
+ self.digests = {}
361
+
362
+ @property
363
+ def source_url(self):
364
+ """
365
+ The source archive download URL for this distribution.
366
+ """
367
+ return self.metadata.source_url
368
+
369
+ download_url = source_url # Backward compatibility
370
+
371
+ @property
372
+ def name_and_version(self):
373
+ """
374
+ A utility property which displays the name and version in parentheses.
375
+ """
376
+ return '%s (%s)' % (self.name, self.version)
377
+
378
+ @property
379
+ def provides(self):
380
+ """
381
+ A set of distribution names and versions provided by this distribution.
382
+ :return: A set of "name (version)" strings.
383
+ """
384
+ plist = self.metadata.provides
385
+ s = '%s (%s)' % (self.name, self.version)
386
+ if s not in plist:
387
+ plist.append(s)
388
+ return plist
389
+
390
+ def _get_requirements(self, req_attr):
391
+ md = self.metadata
392
+ reqts = getattr(md, req_attr)
393
+ logger.debug('%s: got requirements %r from metadata: %r', self.name,
394
+ req_attr, reqts)
395
+ return set(
396
+ md.get_requirements(reqts, extras=self.extras, env=self.context))
397
+
398
+ @property
399
+ def run_requires(self):
400
+ return self._get_requirements('run_requires')
401
+
402
+ @property
403
+ def meta_requires(self):
404
+ return self._get_requirements('meta_requires')
405
+
406
+ @property
407
+ def build_requires(self):
408
+ return self._get_requirements('build_requires')
409
+
410
+ @property
411
+ def test_requires(self):
412
+ return self._get_requirements('test_requires')
413
+
414
+ @property
415
+ def dev_requires(self):
416
+ return self._get_requirements('dev_requires')
417
+
418
+ def matches_requirement(self, req):
419
+ """
420
+ Say if this instance matches (fulfills) a requirement.
421
+ :param req: The requirement to match.
422
+ :rtype req: str
423
+ :return: True if it matches, else False.
424
+ """
425
+ # Requirement may contain extras - parse to lose those
426
+ # from what's passed to the matcher
427
+ r = parse_requirement(req)
428
+ scheme = get_scheme(self.metadata.scheme)
429
+ try:
430
+ matcher = scheme.matcher(r.requirement)
431
+ except UnsupportedVersionError:
432
+ # XXX compat-mode if cannot read the version
433
+ logger.warning('could not read version %r - using name only', req)
434
+ name = req.split()[0]
435
+ matcher = scheme.matcher(name)
436
+
437
+ name = matcher.key # case-insensitive
438
+
439
+ result = False
440
+ for p in self.provides:
441
+ p_name, p_ver = parse_name_and_version(p)
442
+ if p_name != name:
443
+ continue
444
+ try:
445
+ result = matcher.match(p_ver)
446
+ break
447
+ except UnsupportedVersionError:
448
+ pass
449
+ return result
450
+
451
+ def __repr__(self):
452
+ """
453
+ Return a textual representation of this instance,
454
+ """
455
+ if self.source_url:
456
+ suffix = ' [%s]' % self.source_url
457
+ else:
458
+ suffix = ''
459
+ return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix)
460
+
461
+ def __eq__(self, other):
462
+ """
463
+ See if this distribution is the same as another.
464
+ :param other: The distribution to compare with. To be equal to one
465
+ another. distributions must have the same type, name,
466
+ version and source_url.
467
+ :return: True if it is the same, else False.
468
+ """
469
+ if type(other) is not type(self):
470
+ result = False
471
+ else:
472
+ result = (self.name == other.name and self.version == other.version
473
+ and self.source_url == other.source_url)
474
+ return result
475
+
476
+ def __hash__(self):
477
+ """
478
+ Compute hash in a way which matches the equality test.
479
+ """
480
+ return hash(self.name) + hash(self.version) + hash(self.source_url)
481
+
482
+
483
+ class BaseInstalledDistribution(Distribution):
484
+ """
485
+ This is the base class for installed distributions (whether PEP 376 or
486
+ legacy).
487
+ """
488
+
489
+ hasher = None
490
+
491
+ def __init__(self, metadata, path, env=None):
492
+ """
493
+ Initialise an instance.
494
+ :param metadata: An instance of :class:`Metadata` which describes the
495
+ distribution. This will normally have been initialised
496
+ from a metadata file in the ``path``.
497
+ :param path: The path of the ``.dist-info`` or ``.egg-info``
498
+ directory for the distribution.
499
+ :param env: This is normally the :class:`DistributionPath`
500
+ instance where this distribution was found.
501
+ """
502
+ super(BaseInstalledDistribution, self).__init__(metadata)
503
+ self.path = path
504
+ self.dist_path = env
505
+
506
+ def get_hash(self, data, hasher=None):
507
+ """
508
+ Get the hash of some data, using a particular hash algorithm, if
509
+ specified.
510
+
511
+ :param data: The data to be hashed.
512
+ :type data: bytes
513
+ :param hasher: The name of a hash implementation, supported by hashlib,
514
+ or ``None``. Examples of valid values are ``'sha1'``,
515
+ ``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and
516
+ ``'sha512'``. If no hasher is specified, the ``hasher``
517
+ attribute of the :class:`InstalledDistribution` instance
518
+ is used. If the hasher is determined to be ``None``, MD5
519
+ is used as the hashing algorithm.
520
+ :returns: The hash of the data. If a hasher was explicitly specified,
521
+ the returned hash will be prefixed with the specified hasher
522
+ followed by '='.
523
+ :rtype: str
524
+ """
525
+ if hasher is None:
526
+ hasher = self.hasher
527
+ if hasher is None:
528
+ hasher = hashlib.md5
529
+ prefix = ''
530
+ else:
531
+ hasher = getattr(hashlib, hasher)
532
+ prefix = '%s=' % self.hasher
533
+ digest = hasher(data).digest()
534
+ digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii')
535
+ return '%s%s' % (prefix, digest)
536
+
537
+
538
+ class InstalledDistribution(BaseInstalledDistribution):
539
+ """
540
+ Created with the *path* of the ``.dist-info`` directory provided to the
541
+ constructor. It reads the metadata contained in ``pydist.json`` when it is
542
+ instantiated., or uses a passed in Metadata instance (useful for when
543
+ dry-run mode is being used).
544
+ """
545
+
546
+ hasher = 'sha256'
547
+
548
+ def __init__(self, path, metadata=None, env=None):
549
+ self.modules = []
550
+ self.finder = finder = resources.finder_for_path(path)
551
+ if finder is None:
552
+ raise ValueError('finder unavailable for %s' % path)
553
+ if env and env._cache_enabled and path in env._cache.path:
554
+ metadata = env._cache.path[path].metadata
555
+ elif metadata is None:
556
+ r = finder.find(METADATA_FILENAME)
557
+ # Temporary - for Wheel 0.23 support
558
+ if r is None:
559
+ r = finder.find(WHEEL_METADATA_FILENAME)
560
+ # Temporary - for legacy support
561
+ if r is None:
562
+ r = finder.find(LEGACY_METADATA_FILENAME)
563
+ if r is None:
564
+ raise ValueError('no %s found in %s' %
565
+ (METADATA_FILENAME, path))
566
+ with contextlib.closing(r.as_stream()) as stream:
567
+ metadata = Metadata(fileobj=stream, scheme='legacy')
568
+
569
+ super(InstalledDistribution, self).__init__(metadata, path, env)
570
+
571
+ if env and env._cache_enabled:
572
+ env._cache.add(self)
573
+
574
+ r = finder.find('REQUESTED')
575
+ self.requested = r is not None
576
+ p = os.path.join(path, 'top_level.txt')
577
+ if os.path.exists(p):
578
+ with open(p, 'rb') as f:
579
+ data = f.read().decode('utf-8')
580
+ self.modules = data.splitlines()
581
+
582
+ def __repr__(self):
583
+ return '<InstalledDistribution %r %s at %r>' % (
584
+ self.name, self.version, self.path)
585
+
586
+ def __str__(self):
587
+ return "%s %s" % (self.name, self.version)
588
+
589
+ def _get_records(self):
590
+ """
591
+ Get the list of installed files for the distribution
592
+ :return: A list of tuples of path, hash and size. Note that hash and
593
+ size might be ``None`` for some entries. The path is exactly
594
+ as stored in the file (which is as in PEP 376).
595
+ """
596
+ results = []
597
+ r = self.get_distinfo_resource('RECORD')
598
+ with contextlib.closing(r.as_stream()) as stream:
599
+ with CSVReader(stream=stream) as record_reader:
600
+ # Base location is parent dir of .dist-info dir
601
+ # base_location = os.path.dirname(self.path)
602
+ # base_location = os.path.abspath(base_location)
603
+ for row in record_reader:
604
+ missing = [None for i in range(len(row), 3)]
605
+ path, checksum, size = row + missing
606
+ # if not os.path.isabs(path):
607
+ # path = path.replace('/', os.sep)
608
+ # path = os.path.join(base_location, path)
609
+ results.append((path, checksum, size))
610
+ return results
611
+
612
+ @cached_property
613
+ def exports(self):
614
+ """
615
+ Return the information exported by this distribution.
616
+ :return: A dictionary of exports, mapping an export category to a dict
617
+ of :class:`ExportEntry` instances describing the individual
618
+ export entries, and keyed by name.
619
+ """
620
+ result = {}
621
+ r = self.get_distinfo_resource(EXPORTS_FILENAME)
622
+ if r:
623
+ result = self.read_exports()
624
+ return result
625
+
626
+ def read_exports(self):
627
+ """
628
+ Read exports data from a file in .ini format.
629
+
630
+ :return: A dictionary of exports, mapping an export category to a list
631
+ of :class:`ExportEntry` instances describing the individual
632
+ export entries.
633
+ """
634
+ result = {}
635
+ r = self.get_distinfo_resource(EXPORTS_FILENAME)
636
+ if r:
637
+ with contextlib.closing(r.as_stream()) as stream:
638
+ result = read_exports(stream)
639
+ return result
640
+
641
+ def write_exports(self, exports):
642
+ """
643
+ Write a dictionary of exports to a file in .ini format.
644
+ :param exports: A dictionary of exports, mapping an export category to
645
+ a list of :class:`ExportEntry` instances describing the
646
+ individual export entries.
647
+ """
648
+ rf = self.get_distinfo_file(EXPORTS_FILENAME)
649
+ with open(rf, 'w') as f:
650
+ write_exports(exports, f)
651
+
652
+ def get_resource_path(self, relative_path):
653
+ """
654
+ NOTE: This API may change in the future.
655
+
656
+ Return the absolute path to a resource file with the given relative
657
+ path.
658
+
659
+ :param relative_path: The path, relative to .dist-info, of the resource
660
+ of interest.
661
+ :return: The absolute path where the resource is to be found.
662
+ """
663
+ r = self.get_distinfo_resource('RESOURCES')
664
+ with contextlib.closing(r.as_stream()) as stream:
665
+ with CSVReader(stream=stream) as resources_reader:
666
+ for relative, destination in resources_reader:
667
+ if relative == relative_path:
668
+ return destination
669
+ raise KeyError('no resource file with relative path %r '
670
+ 'is installed' % relative_path)
671
+
672
+ def list_installed_files(self):
673
+ """
674
+ Iterates over the ``RECORD`` entries and returns a tuple
675
+ ``(path, hash, size)`` for each line.
676
+
677
+ :returns: iterator of (path, hash, size)
678
+ """
679
+ for result in self._get_records():
680
+ yield result
681
+
682
+ def write_installed_files(self, paths, prefix, dry_run=False):
683
+ """
684
+ Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any
685
+ existing ``RECORD`` file is silently overwritten.
686
+
687
+ prefix is used to determine when to write absolute paths.
688
+ """
689
+ prefix = os.path.join(prefix, '')
690
+ base = os.path.dirname(self.path)
691
+ base_under_prefix = base.startswith(prefix)
692
+ base = os.path.join(base, '')
693
+ record_path = self.get_distinfo_file('RECORD')
694
+ logger.info('creating %s', record_path)
695
+ if dry_run:
696
+ return None
697
+ with CSVWriter(record_path) as writer:
698
+ for path in paths:
699
+ if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')):
700
+ # do not put size and hash, as in PEP-376
701
+ hash_value = size = ''
702
+ else:
703
+ size = '%d' % os.path.getsize(path)
704
+ with open(path, 'rb') as fp:
705
+ hash_value = self.get_hash(fp.read())
706
+ if path.startswith(base) or (base_under_prefix
707
+ and path.startswith(prefix)):
708
+ path = os.path.relpath(path, base)
709
+ writer.writerow((path, hash_value, size))
710
+
711
+ # add the RECORD file itself
712
+ if record_path.startswith(base):
713
+ record_path = os.path.relpath(record_path, base)
714
+ writer.writerow((record_path, '', ''))
715
+ return record_path
716
+
717
+ def check_installed_files(self):
718
+ """
719
+ Checks that the hashes and sizes of the files in ``RECORD`` are
720
+ matched by the files themselves. Returns a (possibly empty) list of
721
+ mismatches. Each entry in the mismatch list will be a tuple consisting
722
+ of the path, 'exists', 'size' or 'hash' according to what didn't match
723
+ (existence is checked first, then size, then hash), the expected
724
+ value and the actual value.
725
+ """
726
+ mismatches = []
727
+ base = os.path.dirname(self.path)
728
+ record_path = self.get_distinfo_file('RECORD')
729
+ for path, hash_value, size in self.list_installed_files():
730
+ if not os.path.isabs(path):
731
+ path = os.path.join(base, path)
732
+ if path == record_path:
733
+ continue
734
+ if not os.path.exists(path):
735
+ mismatches.append((path, 'exists', True, False))
736
+ elif os.path.isfile(path):
737
+ actual_size = str(os.path.getsize(path))
738
+ if size and actual_size != size:
739
+ mismatches.append((path, 'size', size, actual_size))
740
+ elif hash_value:
741
+ if '=' in hash_value:
742
+ hasher = hash_value.split('=', 1)[0]
743
+ else:
744
+ hasher = None
745
+
746
+ with open(path, 'rb') as f:
747
+ actual_hash = self.get_hash(f.read(), hasher)
748
+ if actual_hash != hash_value:
749
+ mismatches.append(
750
+ (path, 'hash', hash_value, actual_hash))
751
+ return mismatches
752
+
753
+ @cached_property
754
+ def shared_locations(self):
755
+ """
756
+ A dictionary of shared locations whose keys are in the set 'prefix',
757
+ 'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
758
+ The corresponding value is the absolute path of that category for
759
+ this distribution, and takes into account any paths selected by the
760
+ user at installation time (e.g. via command-line arguments). In the
761
+ case of the 'namespace' key, this would be a list of absolute paths
762
+ for the roots of namespace packages in this distribution.
763
+
764
+ The first time this property is accessed, the relevant information is
765
+ read from the SHARED file in the .dist-info directory.
766
+ """
767
+ result = {}
768
+ shared_path = os.path.join(self.path, 'SHARED')
769
+ if os.path.isfile(shared_path):
770
+ with codecs.open(shared_path, 'r', encoding='utf-8') as f:
771
+ lines = f.read().splitlines()
772
+ for line in lines:
773
+ key, value = line.split('=', 1)
774
+ if key == 'namespace':
775
+ result.setdefault(key, []).append(value)
776
+ else:
777
+ result[key] = value
778
+ return result
779
+
780
+ def write_shared_locations(self, paths, dry_run=False):
781
+ """
782
+ Write shared location information to the SHARED file in .dist-info.
783
+ :param paths: A dictionary as described in the documentation for
784
+ :meth:`shared_locations`.
785
+ :param dry_run: If True, the action is logged but no file is actually
786
+ written.
787
+ :return: The path of the file written to.
788
+ """
789
+ shared_path = os.path.join(self.path, 'SHARED')
790
+ logger.info('creating %s', shared_path)
791
+ if dry_run:
792
+ return None
793
+ lines = []
794
+ for key in ('prefix', 'lib', 'headers', 'scripts', 'data'):
795
+ path = paths[key]
796
+ if os.path.isdir(paths[key]):
797
+ lines.append('%s=%s' % (key, path))
798
+ for ns in paths.get('namespace', ()):
799
+ lines.append('namespace=%s' % ns)
800
+
801
+ with codecs.open(shared_path, 'w', encoding='utf-8') as f:
802
+ f.write('\n'.join(lines))
803
+ return shared_path
804
+
805
+ def get_distinfo_resource(self, path):
806
+ if path not in DIST_FILES:
807
+ raise DistlibException('invalid path for a dist-info file: '
808
+ '%r at %r' % (path, self.path))
809
+ finder = resources.finder_for_path(self.path)
810
+ if finder is None:
811
+ raise DistlibException('Unable to get a finder for %s' % self.path)
812
+ return finder.find(path)
813
+
814
+ def get_distinfo_file(self, path):
815
+ """
816
+ Returns a path located under the ``.dist-info`` directory. Returns a
817
+ string representing the path.
818
+
819
+ :parameter path: a ``'/'``-separated path relative to the
820
+ ``.dist-info`` directory or an absolute path;
821
+ If *path* is an absolute path and doesn't start
822
+ with the ``.dist-info`` directory path,
823
+ a :class:`DistlibException` is raised
824
+ :type path: str
825
+ :rtype: str
826
+ """
827
+ # Check if it is an absolute path # XXX use relpath, add tests
828
+ if path.find(os.sep) >= 0:
829
+ # it's an absolute path?
830
+ distinfo_dirname, path = path.split(os.sep)[-2:]
831
+ if distinfo_dirname != self.path.split(os.sep)[-1]:
832
+ raise DistlibException(
833
+ 'dist-info file %r does not belong to the %r %s '
834
+ 'distribution' % (path, self.name, self.version))
835
+
836
+ # The file must be relative
837
+ if path not in DIST_FILES:
838
+ raise DistlibException('invalid path for a dist-info file: '
839
+ '%r at %r' % (path, self.path))
840
+
841
+ return os.path.join(self.path, path)
842
+
843
+ def list_distinfo_files(self):
844
+ """
845
+ Iterates over the ``RECORD`` entries and returns paths for each line if
846
+ the path is pointing to a file located in the ``.dist-info`` directory
847
+ or one of its subdirectories.
848
+
849
+ :returns: iterator of paths
850
+ """
851
+ base = os.path.dirname(self.path)
852
+ for path, checksum, size in self._get_records():
853
+ # XXX add separator or use real relpath algo
854
+ if not os.path.isabs(path):
855
+ path = os.path.join(base, path)
856
+ if path.startswith(self.path):
857
+ yield path
858
+
859
+ def __eq__(self, other):
860
+ return (isinstance(other, InstalledDistribution)
861
+ and self.path == other.path)
862
+
863
+ # See http://docs.python.org/reference/datamodel#object.__hash__
864
+ __hash__ = object.__hash__
865
+
866
+
867
+ class EggInfoDistribution(BaseInstalledDistribution):
868
+ """Created with the *path* of the ``.egg-info`` directory or file provided
869
+ to the constructor. It reads the metadata contained in the file itself, or
870
+ if the given path happens to be a directory, the metadata is read from the
871
+ file ``PKG-INFO`` under that directory."""
872
+
873
+ requested = True # as we have no way of knowing, assume it was
874
+ shared_locations = {}
875
+
876
+ def __init__(self, path, env=None):
877
+
878
+ def set_name_and_version(s, n, v):
879
+ s.name = n
880
+ s.key = n.lower() # for case-insensitive comparisons
881
+ s.version = v
882
+
883
+ self.path = path
884
+ self.dist_path = env
885
+ if env and env._cache_enabled and path in env._cache_egg.path:
886
+ metadata = env._cache_egg.path[path].metadata
887
+ set_name_and_version(self, metadata.name, metadata.version)
888
+ else:
889
+ metadata = self._get_metadata(path)
890
+
891
+ # Need to be set before caching
892
+ set_name_and_version(self, metadata.name, metadata.version)
893
+
894
+ if env and env._cache_enabled:
895
+ env._cache_egg.add(self)
896
+ super(EggInfoDistribution, self).__init__(metadata, path, env)
897
+
898
+ def _get_metadata(self, path):
899
+ requires = None
900
+
901
+ def parse_requires_data(data):
902
+ """Create a list of dependencies from a requires.txt file.
903
+
904
+ *data*: the contents of a setuptools-produced requires.txt file.
905
+ """
906
+ reqs = []
907
+ lines = data.splitlines()
908
+ for line in lines:
909
+ line = line.strip()
910
+ # sectioned files have bare newlines (separating sections)
911
+ if not line: # pragma: no cover
912
+ continue
913
+ if line.startswith('['): # pragma: no cover
914
+ logger.warning(
915
+ 'Unexpected line: quitting requirement scan: %r', line)
916
+ break
917
+ r = parse_requirement(line)
918
+ if not r: # pragma: no cover
919
+ logger.warning('Not recognised as a requirement: %r', line)
920
+ continue
921
+ if r.extras: # pragma: no cover
922
+ logger.warning('extra requirements in requires.txt are '
923
+ 'not supported')
924
+ if not r.constraints:
925
+ reqs.append(r.name)
926
+ else:
927
+ cons = ', '.join('%s%s' % c for c in r.constraints)
928
+ reqs.append('%s (%s)' % (r.name, cons))
929
+ return reqs
930
+
931
+ def parse_requires_path(req_path):
932
+ """Create a list of dependencies from a requires.txt file.
933
+
934
+ *req_path*: the path to a setuptools-produced requires.txt file.
935
+ """
936
+
937
+ reqs = []
938
+ try:
939
+ with codecs.open(req_path, 'r', 'utf-8') as fp:
940
+ reqs = parse_requires_data(fp.read())
941
+ except IOError:
942
+ pass
943
+ return reqs
944
+
945
+ tl_path = tl_data = None
946
+ if path.endswith('.egg'):
947
+ if os.path.isdir(path):
948
+ p = os.path.join(path, 'EGG-INFO')
949
+ meta_path = os.path.join(p, 'PKG-INFO')
950
+ metadata = Metadata(path=meta_path, scheme='legacy')
951
+ req_path = os.path.join(p, 'requires.txt')
952
+ tl_path = os.path.join(p, 'top_level.txt')
953
+ requires = parse_requires_path(req_path)
954
+ else:
955
+ # FIXME handle the case where zipfile is not available
956
+ zipf = zipimport.zipimporter(path)
957
+ fileobj = StringIO(
958
+ zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
959
+ metadata = Metadata(fileobj=fileobj, scheme='legacy')
960
+ try:
961
+ data = zipf.get_data('EGG-INFO/requires.txt')
962
+ tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode(
963
+ 'utf-8')
964
+ requires = parse_requires_data(data.decode('utf-8'))
965
+ except IOError:
966
+ requires = None
967
+ elif path.endswith('.egg-info'):
968
+ if os.path.isdir(path):
969
+ req_path = os.path.join(path, 'requires.txt')
970
+ requires = parse_requires_path(req_path)
971
+ path = os.path.join(path, 'PKG-INFO')
972
+ tl_path = os.path.join(path, 'top_level.txt')
973
+ metadata = Metadata(path=path, scheme='legacy')
974
+ else:
975
+ raise DistlibException('path must end with .egg-info or .egg, '
976
+ 'got %r' % path)
977
+
978
+ if requires:
979
+ metadata.add_requirements(requires)
980
+ # look for top-level modules in top_level.txt, if present
981
+ if tl_data is None:
982
+ if tl_path is not None and os.path.exists(tl_path):
983
+ with open(tl_path, 'rb') as f:
984
+ tl_data = f.read().decode('utf-8')
985
+ if not tl_data:
986
+ tl_data = []
987
+ else:
988
+ tl_data = tl_data.splitlines()
989
+ self.modules = tl_data
990
+ return metadata
991
+
992
+ def __repr__(self):
993
+ return '<EggInfoDistribution %r %s at %r>' % (self.name, self.version,
994
+ self.path)
995
+
996
+ def __str__(self):
997
+ return "%s %s" % (self.name, self.version)
998
+
999
+ def check_installed_files(self):
1000
+ """
1001
+ Checks that the hashes and sizes of the files in ``RECORD`` are
1002
+ matched by the files themselves. Returns a (possibly empty) list of
1003
+ mismatches. Each entry in the mismatch list will be a tuple consisting
1004
+ of the path, 'exists', 'size' or 'hash' according to what didn't match
1005
+ (existence is checked first, then size, then hash), the expected
1006
+ value and the actual value.
1007
+ """
1008
+ mismatches = []
1009
+ record_path = os.path.join(self.path, 'installed-files.txt')
1010
+ if os.path.exists(record_path):
1011
+ for path, _, _ in self.list_installed_files():
1012
+ if path == record_path:
1013
+ continue
1014
+ if not os.path.exists(path):
1015
+ mismatches.append((path, 'exists', True, False))
1016
+ return mismatches
1017
+
1018
+ def list_installed_files(self):
1019
+ """
1020
+ Iterates over the ``installed-files.txt`` entries and returns a tuple
1021
+ ``(path, hash, size)`` for each line.
1022
+
1023
+ :returns: a list of (path, hash, size)
1024
+ """
1025
+
1026
+ def _md5(path):
1027
+ f = open(path, 'rb')
1028
+ try:
1029
+ content = f.read()
1030
+ finally:
1031
+ f.close()
1032
+ return hashlib.md5(content).hexdigest()
1033
+
1034
+ def _size(path):
1035
+ return os.stat(path).st_size
1036
+
1037
+ record_path = os.path.join(self.path, 'installed-files.txt')
1038
+ result = []
1039
+ if os.path.exists(record_path):
1040
+ with codecs.open(record_path, 'r', encoding='utf-8') as f:
1041
+ for line in f:
1042
+ line = line.strip()
1043
+ p = os.path.normpath(os.path.join(self.path, line))
1044
+ # "./" is present as a marker between installed files
1045
+ # and installation metadata files
1046
+ if not os.path.exists(p):
1047
+ logger.warning('Non-existent file: %s', p)
1048
+ if p.endswith(('.pyc', '.pyo')):
1049
+ continue
1050
+ # otherwise fall through and fail
1051
+ if not os.path.isdir(p):
1052
+ result.append((p, _md5(p), _size(p)))
1053
+ result.append((record_path, None, None))
1054
+ return result
1055
+
1056
+ def list_distinfo_files(self, absolute=False):
1057
+ """
1058
+ Iterates over the ``installed-files.txt`` entries and returns paths for
1059
+ each line if the path is pointing to a file located in the
1060
+ ``.egg-info`` directory or one of its subdirectories.
1061
+
1062
+ :parameter absolute: If *absolute* is ``True``, each returned path is
1063
+ transformed into a local absolute path. Otherwise the
1064
+ raw value from ``installed-files.txt`` is returned.
1065
+ :type absolute: boolean
1066
+ :returns: iterator of paths
1067
+ """
1068
+ record_path = os.path.join(self.path, 'installed-files.txt')
1069
+ if os.path.exists(record_path):
1070
+ skip = True
1071
+ with codecs.open(record_path, 'r', encoding='utf-8') as f:
1072
+ for line in f:
1073
+ line = line.strip()
1074
+ if line == './':
1075
+ skip = False
1076
+ continue
1077
+ if not skip:
1078
+ p = os.path.normpath(os.path.join(self.path, line))
1079
+ if p.startswith(self.path):
1080
+ if absolute:
1081
+ yield p
1082
+ else:
1083
+ yield line
1084
+
1085
+ def __eq__(self, other):
1086
+ return (isinstance(other, EggInfoDistribution)
1087
+ and self.path == other.path)
1088
+
1089
+ # See http://docs.python.org/reference/datamodel#object.__hash__
1090
+ __hash__ = object.__hash__
1091
+
1092
+
1093
+ new_dist_class = InstalledDistribution
1094
+ old_dist_class = EggInfoDistribution
1095
+
1096
+
1097
+ class DependencyGraph(object):
1098
+ """
1099
+ Represents a dependency graph between distributions.
1100
+
1101
+ The dependency relationships are stored in an ``adjacency_list`` that maps
1102
+ distributions to a list of ``(other, label)`` tuples where ``other``
1103
+ is a distribution and the edge is labeled with ``label`` (i.e. the version
1104
+ specifier, if such was provided). Also, for more efficient traversal, for
1105
+ every distribution ``x``, a list of predecessors is kept in
1106
+ ``reverse_list[x]``. An edge from distribution ``a`` to
1107
+ distribution ``b`` means that ``a`` depends on ``b``. If any missing
1108
+ dependencies are found, they are stored in ``missing``, which is a
1109
+ dictionary that maps distributions to a list of requirements that were not
1110
+ provided by any other distributions.
1111
+ """
1112
+
1113
+ def __init__(self):
1114
+ self.adjacency_list = {}
1115
+ self.reverse_list = {}
1116
+ self.missing = {}
1117
+
1118
+ def add_distribution(self, distribution):
1119
+ """Add the *distribution* to the graph.
1120
+
1121
+ :type distribution: :class:`distutils2.database.InstalledDistribution`
1122
+ or :class:`distutils2.database.EggInfoDistribution`
1123
+ """
1124
+ self.adjacency_list[distribution] = []
1125
+ self.reverse_list[distribution] = []
1126
+ # self.missing[distribution] = []
1127
+
1128
+ def add_edge(self, x, y, label=None):
1129
+ """Add an edge from distribution *x* to distribution *y* with the given
1130
+ *label*.
1131
+
1132
+ :type x: :class:`distutils2.database.InstalledDistribution` or
1133
+ :class:`distutils2.database.EggInfoDistribution`
1134
+ :type y: :class:`distutils2.database.InstalledDistribution` or
1135
+ :class:`distutils2.database.EggInfoDistribution`
1136
+ :type label: ``str`` or ``None``
1137
+ """
1138
+ self.adjacency_list[x].append((y, label))
1139
+ # multiple edges are allowed, so be careful
1140
+ if x not in self.reverse_list[y]:
1141
+ self.reverse_list[y].append(x)
1142
+
1143
+ def add_missing(self, distribution, requirement):
1144
+ """
1145
+ Add a missing *requirement* for the given *distribution*.
1146
+
1147
+ :type distribution: :class:`distutils2.database.InstalledDistribution`
1148
+ or :class:`distutils2.database.EggInfoDistribution`
1149
+ :type requirement: ``str``
1150
+ """
1151
+ logger.debug('%s missing %r', distribution, requirement)
1152
+ self.missing.setdefault(distribution, []).append(requirement)
1153
+
1154
+ def _repr_dist(self, dist):
1155
+ return '%s %s' % (dist.name, dist.version)
1156
+
1157
+ def repr_node(self, dist, level=1):
1158
+ """Prints only a subgraph"""
1159
+ output = [self._repr_dist(dist)]
1160
+ for other, label in self.adjacency_list[dist]:
1161
+ dist = self._repr_dist(other)
1162
+ if label is not None:
1163
+ dist = '%s [%s]' % (dist, label)
1164
+ output.append(' ' * level + str(dist))
1165
+ suboutput = self.repr_node(other, level + 1)
1166
+ subs = suboutput.split('\n')
1167
+ output.extend(subs[1:])
1168
+ return '\n'.join(output)
1169
+
1170
+ def to_dot(self, f, skip_disconnected=True):
1171
+ """Writes a DOT output for the graph to the provided file *f*.
1172
+
1173
+ If *skip_disconnected* is set to ``True``, then all distributions
1174
+ that are not dependent on any other distribution are skipped.
1175
+
1176
+ :type f: has to support ``file``-like operations
1177
+ :type skip_disconnected: ``bool``
1178
+ """
1179
+ disconnected = []
1180
+
1181
+ f.write("digraph dependencies {\n")
1182
+ for dist, adjs in self.adjacency_list.items():
1183
+ if len(adjs) == 0 and not skip_disconnected:
1184
+ disconnected.append(dist)
1185
+ for other, label in adjs:
1186
+ if label is not None:
1187
+ f.write('"%s" -> "%s" [label="%s"]\n' %
1188
+ (dist.name, other.name, label))
1189
+ else:
1190
+ f.write('"%s" -> "%s"\n' % (dist.name, other.name))
1191
+ if not skip_disconnected and len(disconnected) > 0:
1192
+ f.write('subgraph disconnected {\n')
1193
+ f.write('label = "Disconnected"\n')
1194
+ f.write('bgcolor = red\n')
1195
+
1196
+ for dist in disconnected:
1197
+ f.write('"%s"' % dist.name)
1198
+ f.write('\n')
1199
+ f.write('}\n')
1200
+ f.write('}\n')
1201
+
1202
+ def topological_sort(self):
1203
+ """
1204
+ Perform a topological sort of the graph.
1205
+ :return: A tuple, the first element of which is a topologically sorted
1206
+ list of distributions, and the second element of which is a
1207
+ list of distributions that cannot be sorted because they have
1208
+ circular dependencies and so form a cycle.
1209
+ """
1210
+ result = []
1211
+ # Make a shallow copy of the adjacency list
1212
+ alist = {}
1213
+ for k, v in self.adjacency_list.items():
1214
+ alist[k] = v[:]
1215
+ while True:
1216
+ # See what we can remove in this run
1217
+ to_remove = []
1218
+ for k, v in list(alist.items())[:]:
1219
+ if not v:
1220
+ to_remove.append(k)
1221
+ del alist[k]
1222
+ if not to_remove:
1223
+ # What's left in alist (if anything) is a cycle.
1224
+ break
1225
+ # Remove from the adjacency list of others
1226
+ for k, v in alist.items():
1227
+ alist[k] = [(d, r) for d, r in v if d not in to_remove]
1228
+ logger.debug('Moving to result: %s',
1229
+ ['%s (%s)' % (d.name, d.version) for d in to_remove])
1230
+ result.extend(to_remove)
1231
+ return result, list(alist.keys())
1232
+
1233
+ def __repr__(self):
1234
+ """Representation of the graph"""
1235
+ output = []
1236
+ for dist, adjs in self.adjacency_list.items():
1237
+ output.append(self.repr_node(dist))
1238
+ return '\n'.join(output)
1239
+
1240
+
1241
+ def make_graph(dists, scheme='default'):
1242
+ """Makes a dependency graph from the given distributions.
1243
+
1244
+ :parameter dists: a list of distributions
1245
+ :type dists: list of :class:`distutils2.database.InstalledDistribution` and
1246
+ :class:`distutils2.database.EggInfoDistribution` instances
1247
+ :rtype: a :class:`DependencyGraph` instance
1248
+ """
1249
+ scheme = get_scheme(scheme)
1250
+ graph = DependencyGraph()
1251
+ provided = {} # maps names to lists of (version, dist) tuples
1252
+
1253
+ # first, build the graph and find out what's provided
1254
+ for dist in dists:
1255
+ graph.add_distribution(dist)
1256
+
1257
+ for p in dist.provides:
1258
+ name, version = parse_name_and_version(p)
1259
+ logger.debug('Add to provided: %s, %s, %s', name, version, dist)
1260
+ provided.setdefault(name, []).append((version, dist))
1261
+
1262
+ # now make the edges
1263
+ for dist in dists:
1264
+ requires = (dist.run_requires | dist.meta_requires
1265
+ | dist.build_requires | dist.dev_requires)
1266
+ for req in requires:
1267
+ try:
1268
+ matcher = scheme.matcher(req)
1269
+ except UnsupportedVersionError:
1270
+ # XXX compat-mode if cannot read the version
1271
+ logger.warning('could not read version %r - using name only',
1272
+ req)
1273
+ name = req.split()[0]
1274
+ matcher = scheme.matcher(name)
1275
+
1276
+ name = matcher.key # case-insensitive
1277
+
1278
+ matched = False
1279
+ if name in provided:
1280
+ for version, provider in provided[name]:
1281
+ try:
1282
+ match = matcher.match(version)
1283
+ except UnsupportedVersionError:
1284
+ match = False
1285
+
1286
+ if match:
1287
+ graph.add_edge(dist, provider, req)
1288
+ matched = True
1289
+ break
1290
+ if not matched:
1291
+ graph.add_missing(dist, req)
1292
+ return graph
1293
+
1294
+
1295
+ def get_dependent_dists(dists, dist):
1296
+ """Recursively generate a list of distributions from *dists* that are
1297
+ dependent on *dist*.
1298
+
1299
+ :param dists: a list of distributions
1300
+ :param dist: a distribution, member of *dists* for which we are interested
1301
+ """
1302
+ if dist not in dists:
1303
+ raise DistlibException('given distribution %r is not a member '
1304
+ 'of the list' % dist.name)
1305
+ graph = make_graph(dists)
1306
+
1307
+ dep = [dist] # dependent distributions
1308
+ todo = graph.reverse_list[dist] # list of nodes we should inspect
1309
+
1310
+ while todo:
1311
+ d = todo.pop()
1312
+ dep.append(d)
1313
+ for succ in graph.reverse_list[d]:
1314
+ if succ not in dep:
1315
+ todo.append(succ)
1316
+
1317
+ dep.pop(0) # remove dist from dep, was there to prevent infinite loops
1318
+ return dep
1319
+
1320
+
1321
+ def get_required_dists(dists, dist):
1322
+ """Recursively generate a list of distributions from *dists* that are
1323
+ required by *dist*.
1324
+
1325
+ :param dists: a list of distributions
1326
+ :param dist: a distribution, member of *dists* for which we are interested
1327
+ in finding the dependencies.
1328
+ """
1329
+ if dist not in dists:
1330
+ raise DistlibException('given distribution %r is not a member '
1331
+ 'of the list' % dist.name)
1332
+ graph = make_graph(dists)
1333
+
1334
+ req = set() # required distributions
1335
+ todo = graph.adjacency_list[dist] # list of nodes we should inspect
1336
+ seen = set(t[0] for t in todo) # already added to todo
1337
+
1338
+ while todo:
1339
+ d = todo.pop()[0]
1340
+ req.add(d)
1341
+ pred_list = graph.adjacency_list[d]
1342
+ for pred in pred_list:
1343
+ d = pred[0]
1344
+ if d not in req and d not in seen:
1345
+ seen.add(d)
1346
+ todo.append(pred)
1347
+ return req
1348
+
1349
+
1350
+ def make_dist(name, version, **kwargs):
1351
+ """
1352
+ A convenience method for making a dist given just a name and version.
1353
+ """
1354
+ summary = kwargs.pop('summary', 'Placeholder for summary')
1355
+ md = Metadata(**kwargs)
1356
+ md.name = name
1357
+ md.version = version
1358
+ md.summary = summary or 'Placeholder for summary'
1359
+ return Distribution(md)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/index.py ADDED
@@ -0,0 +1,508 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ import hashlib
8
+ import logging
9
+ import os
10
+ import shutil
11
+ import subprocess
12
+ import tempfile
13
+ try:
14
+ from threading import Thread
15
+ except ImportError: # pragma: no cover
16
+ from dummy_threading import Thread
17
+
18
+ from . import DistlibException
19
+ from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
20
+ urlparse, build_opener, string_types)
21
+ from .util import zip_dir, ServerProxy
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ DEFAULT_INDEX = 'https://pypi.org/pypi'
26
+ DEFAULT_REALM = 'pypi'
27
+
28
+
29
+ class PackageIndex(object):
30
+ """
31
+ This class represents a package index compatible with PyPI, the Python
32
+ Package Index.
33
+ """
34
+
35
+ boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
36
+
37
+ def __init__(self, url=None):
38
+ """
39
+ Initialise an instance.
40
+
41
+ :param url: The URL of the index. If not specified, the URL for PyPI is
42
+ used.
43
+ """
44
+ self.url = url or DEFAULT_INDEX
45
+ self.read_configuration()
46
+ scheme, netloc, path, params, query, frag = urlparse(self.url)
47
+ if params or query or frag or scheme not in ('http', 'https'):
48
+ raise DistlibException('invalid repository: %s' % self.url)
49
+ self.password_handler = None
50
+ self.ssl_verifier = None
51
+ self.gpg = None
52
+ self.gpg_home = None
53
+ with open(os.devnull, 'w') as sink:
54
+ # Use gpg by default rather than gpg2, as gpg2 insists on
55
+ # prompting for passwords
56
+ for s in ('gpg', 'gpg2'):
57
+ try:
58
+ rc = subprocess.check_call([s, '--version'], stdout=sink,
59
+ stderr=sink)
60
+ if rc == 0:
61
+ self.gpg = s
62
+ break
63
+ except OSError:
64
+ pass
65
+
66
+ def _get_pypirc_command(self):
67
+ """
68
+ Get the distutils command for interacting with PyPI configurations.
69
+ :return: the command.
70
+ """
71
+ from .util import _get_pypirc_command as cmd
72
+ return cmd()
73
+
74
+ def read_configuration(self):
75
+ """
76
+ Read the PyPI access configuration as supported by distutils. This populates
77
+ ``username``, ``password``, ``realm`` and ``url`` attributes from the
78
+ configuration.
79
+ """
80
+ from .util import _load_pypirc
81
+ cfg = _load_pypirc(self)
82
+ self.username = cfg.get('username')
83
+ self.password = cfg.get('password')
84
+ self.realm = cfg.get('realm', 'pypi')
85
+ self.url = cfg.get('repository', self.url)
86
+
87
+ def save_configuration(self):
88
+ """
89
+ Save the PyPI access configuration. You must have set ``username`` and
90
+ ``password`` attributes before calling this method.
91
+ """
92
+ self.check_credentials()
93
+ from .util import _store_pypirc
94
+ _store_pypirc(self)
95
+
96
+ def check_credentials(self):
97
+ """
98
+ Check that ``username`` and ``password`` have been set, and raise an
99
+ exception if not.
100
+ """
101
+ if self.username is None or self.password is None:
102
+ raise DistlibException('username and password must be set')
103
+ pm = HTTPPasswordMgr()
104
+ _, netloc, _, _, _, _ = urlparse(self.url)
105
+ pm.add_password(self.realm, netloc, self.username, self.password)
106
+ self.password_handler = HTTPBasicAuthHandler(pm)
107
+
108
+ def register(self, metadata): # pragma: no cover
109
+ """
110
+ Register a distribution on PyPI, using the provided metadata.
111
+
112
+ :param metadata: A :class:`Metadata` instance defining at least a name
113
+ and version number for the distribution to be
114
+ registered.
115
+ :return: The HTTP response received from PyPI upon submission of the
116
+ request.
117
+ """
118
+ self.check_credentials()
119
+ metadata.validate()
120
+ d = metadata.todict()
121
+ d[':action'] = 'verify'
122
+ request = self.encode_request(d.items(), [])
123
+ self.send_request(request)
124
+ d[':action'] = 'submit'
125
+ request = self.encode_request(d.items(), [])
126
+ return self.send_request(request)
127
+
128
+ def _reader(self, name, stream, outbuf):
129
+ """
130
+ Thread runner for reading lines of from a subprocess into a buffer.
131
+
132
+ :param name: The logical name of the stream (used for logging only).
133
+ :param stream: The stream to read from. This will typically a pipe
134
+ connected to the output stream of a subprocess.
135
+ :param outbuf: The list to append the read lines to.
136
+ """
137
+ while True:
138
+ s = stream.readline()
139
+ if not s:
140
+ break
141
+ s = s.decode('utf-8').rstrip()
142
+ outbuf.append(s)
143
+ logger.debug('%s: %s' % (name, s))
144
+ stream.close()
145
+
146
+ def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover
147
+ """
148
+ Return a suitable command for signing a file.
149
+
150
+ :param filename: The pathname to the file to be signed.
151
+ :param signer: The identifier of the signer of the file.
152
+ :param sign_password: The passphrase for the signer's
153
+ private key used for signing.
154
+ :param keystore: The path to a directory which contains the keys
155
+ used in verification. If not specified, the
156
+ instance's ``gpg_home`` attribute is used instead.
157
+ :return: The signing command as a list suitable to be
158
+ passed to :class:`subprocess.Popen`.
159
+ """
160
+ cmd = [self.gpg, '--status-fd', '2', '--no-tty']
161
+ if keystore is None:
162
+ keystore = self.gpg_home
163
+ if keystore:
164
+ cmd.extend(['--homedir', keystore])
165
+ if sign_password is not None:
166
+ cmd.extend(['--batch', '--passphrase-fd', '0'])
167
+ td = tempfile.mkdtemp()
168
+ sf = os.path.join(td, os.path.basename(filename) + '.asc')
169
+ cmd.extend(['--detach-sign', '--armor', '--local-user',
170
+ signer, '--output', sf, filename])
171
+ logger.debug('invoking: %s', ' '.join(cmd))
172
+ return cmd, sf
173
+
174
+ def run_command(self, cmd, input_data=None):
175
+ """
176
+ Run a command in a child process , passing it any input data specified.
177
+
178
+ :param cmd: The command to run.
179
+ :param input_data: If specified, this must be a byte string containing
180
+ data to be sent to the child process.
181
+ :return: A tuple consisting of the subprocess' exit code, a list of
182
+ lines read from the subprocess' ``stdout``, and a list of
183
+ lines read from the subprocess' ``stderr``.
184
+ """
185
+ kwargs = {
186
+ 'stdout': subprocess.PIPE,
187
+ 'stderr': subprocess.PIPE,
188
+ }
189
+ if input_data is not None:
190
+ kwargs['stdin'] = subprocess.PIPE
191
+ stdout = []
192
+ stderr = []
193
+ p = subprocess.Popen(cmd, **kwargs)
194
+ # We don't use communicate() here because we may need to
195
+ # get clever with interacting with the command
196
+ t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
197
+ t1.start()
198
+ t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
199
+ t2.start()
200
+ if input_data is not None:
201
+ p.stdin.write(input_data)
202
+ p.stdin.close()
203
+
204
+ p.wait()
205
+ t1.join()
206
+ t2.join()
207
+ return p.returncode, stdout, stderr
208
+
209
+ def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover
210
+ """
211
+ Sign a file.
212
+
213
+ :param filename: The pathname to the file to be signed.
214
+ :param signer: The identifier of the signer of the file.
215
+ :param sign_password: The passphrase for the signer's
216
+ private key used for signing.
217
+ :param keystore: The path to a directory which contains the keys
218
+ used in signing. If not specified, the instance's
219
+ ``gpg_home`` attribute is used instead.
220
+ :return: The absolute pathname of the file where the signature is
221
+ stored.
222
+ """
223
+ cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
224
+ keystore)
225
+ rc, stdout, stderr = self.run_command(cmd,
226
+ sign_password.encode('utf-8'))
227
+ if rc != 0:
228
+ raise DistlibException('sign command failed with error '
229
+ 'code %s' % rc)
230
+ return sig_file
231
+
232
+ def upload_file(self, metadata, filename, signer=None, sign_password=None,
233
+ filetype='sdist', pyversion='source', keystore=None):
234
+ """
235
+ Upload a release file to the index.
236
+
237
+ :param metadata: A :class:`Metadata` instance defining at least a name
238
+ and version number for the file to be uploaded.
239
+ :param filename: The pathname of the file to be uploaded.
240
+ :param signer: The identifier of the signer of the file.
241
+ :param sign_password: The passphrase for the signer's
242
+ private key used for signing.
243
+ :param filetype: The type of the file being uploaded. This is the
244
+ distutils command which produced that file, e.g.
245
+ ``sdist`` or ``bdist_wheel``.
246
+ :param pyversion: The version of Python which the release relates
247
+ to. For code compatible with any Python, this would
248
+ be ``source``, otherwise it would be e.g. ``3.2``.
249
+ :param keystore: The path to a directory which contains the keys
250
+ used in signing. If not specified, the instance's
251
+ ``gpg_home`` attribute is used instead.
252
+ :return: The HTTP response received from PyPI upon submission of the
253
+ request.
254
+ """
255
+ self.check_credentials()
256
+ if not os.path.exists(filename):
257
+ raise DistlibException('not found: %s' % filename)
258
+ metadata.validate()
259
+ d = metadata.todict()
260
+ sig_file = None
261
+ if signer:
262
+ if not self.gpg:
263
+ logger.warning('no signing program available - not signed')
264
+ else:
265
+ sig_file = self.sign_file(filename, signer, sign_password,
266
+ keystore)
267
+ with open(filename, 'rb') as f:
268
+ file_data = f.read()
269
+ md5_digest = hashlib.md5(file_data).hexdigest()
270
+ sha256_digest = hashlib.sha256(file_data).hexdigest()
271
+ d.update({
272
+ ':action': 'file_upload',
273
+ 'protocol_version': '1',
274
+ 'filetype': filetype,
275
+ 'pyversion': pyversion,
276
+ 'md5_digest': md5_digest,
277
+ 'sha256_digest': sha256_digest,
278
+ })
279
+ files = [('content', os.path.basename(filename), file_data)]
280
+ if sig_file:
281
+ with open(sig_file, 'rb') as f:
282
+ sig_data = f.read()
283
+ files.append(('gpg_signature', os.path.basename(sig_file),
284
+ sig_data))
285
+ shutil.rmtree(os.path.dirname(sig_file))
286
+ request = self.encode_request(d.items(), files)
287
+ return self.send_request(request)
288
+
289
+ def upload_documentation(self, metadata, doc_dir): # pragma: no cover
290
+ """
291
+ Upload documentation to the index.
292
+
293
+ :param metadata: A :class:`Metadata` instance defining at least a name
294
+ and version number for the documentation to be
295
+ uploaded.
296
+ :param doc_dir: The pathname of the directory which contains the
297
+ documentation. This should be the directory that
298
+ contains the ``index.html`` for the documentation.
299
+ :return: The HTTP response received from PyPI upon submission of the
300
+ request.
301
+ """
302
+ self.check_credentials()
303
+ if not os.path.isdir(doc_dir):
304
+ raise DistlibException('not a directory: %r' % doc_dir)
305
+ fn = os.path.join(doc_dir, 'index.html')
306
+ if not os.path.exists(fn):
307
+ raise DistlibException('not found: %r' % fn)
308
+ metadata.validate()
309
+ name, version = metadata.name, metadata.version
310
+ zip_data = zip_dir(doc_dir).getvalue()
311
+ fields = [(':action', 'doc_upload'),
312
+ ('name', name), ('version', version)]
313
+ files = [('content', name, zip_data)]
314
+ request = self.encode_request(fields, files)
315
+ return self.send_request(request)
316
+
317
+ def get_verify_command(self, signature_filename, data_filename,
318
+ keystore=None):
319
+ """
320
+ Return a suitable command for verifying a file.
321
+
322
+ :param signature_filename: The pathname to the file containing the
323
+ signature.
324
+ :param data_filename: The pathname to the file containing the
325
+ signed data.
326
+ :param keystore: The path to a directory which contains the keys
327
+ used in verification. If not specified, the
328
+ instance's ``gpg_home`` attribute is used instead.
329
+ :return: The verifying command as a list suitable to be
330
+ passed to :class:`subprocess.Popen`.
331
+ """
332
+ cmd = [self.gpg, '--status-fd', '2', '--no-tty']
333
+ if keystore is None:
334
+ keystore = self.gpg_home
335
+ if keystore:
336
+ cmd.extend(['--homedir', keystore])
337
+ cmd.extend(['--verify', signature_filename, data_filename])
338
+ logger.debug('invoking: %s', ' '.join(cmd))
339
+ return cmd
340
+
341
+ def verify_signature(self, signature_filename, data_filename,
342
+ keystore=None):
343
+ """
344
+ Verify a signature for a file.
345
+
346
+ :param signature_filename: The pathname to the file containing the
347
+ signature.
348
+ :param data_filename: The pathname to the file containing the
349
+ signed data.
350
+ :param keystore: The path to a directory which contains the keys
351
+ used in verification. If not specified, the
352
+ instance's ``gpg_home`` attribute is used instead.
353
+ :return: True if the signature was verified, else False.
354
+ """
355
+ if not self.gpg:
356
+ raise DistlibException('verification unavailable because gpg '
357
+ 'unavailable')
358
+ cmd = self.get_verify_command(signature_filename, data_filename,
359
+ keystore)
360
+ rc, stdout, stderr = self.run_command(cmd)
361
+ if rc not in (0, 1):
362
+ raise DistlibException('verify command failed with error code %s' % rc)
363
+ return rc == 0
364
+
365
+ def download_file(self, url, destfile, digest=None, reporthook=None):
366
+ """
367
+ This is a convenience method for downloading a file from an URL.
368
+ Normally, this will be a file from the index, though currently
369
+ no check is made for this (i.e. a file can be downloaded from
370
+ anywhere).
371
+
372
+ The method is just like the :func:`urlretrieve` function in the
373
+ standard library, except that it allows digest computation to be
374
+ done during download and checking that the downloaded data
375
+ matched any expected value.
376
+
377
+ :param url: The URL of the file to be downloaded (assumed to be
378
+ available via an HTTP GET request).
379
+ :param destfile: The pathname where the downloaded file is to be
380
+ saved.
381
+ :param digest: If specified, this must be a (hasher, value)
382
+ tuple, where hasher is the algorithm used (e.g.
383
+ ``'md5'``) and ``value`` is the expected value.
384
+ :param reporthook: The same as for :func:`urlretrieve` in the
385
+ standard library.
386
+ """
387
+ if digest is None:
388
+ digester = None
389
+ logger.debug('No digest specified')
390
+ else:
391
+ if isinstance(digest, (list, tuple)):
392
+ hasher, digest = digest
393
+ else:
394
+ hasher = 'md5'
395
+ digester = getattr(hashlib, hasher)()
396
+ logger.debug('Digest specified: %s' % digest)
397
+ # The following code is equivalent to urlretrieve.
398
+ # We need to do it this way so that we can compute the
399
+ # digest of the file as we go.
400
+ with open(destfile, 'wb') as dfp:
401
+ # addinfourl is not a context manager on 2.x
402
+ # so we have to use try/finally
403
+ sfp = self.send_request(Request(url))
404
+ try:
405
+ headers = sfp.info()
406
+ blocksize = 8192
407
+ size = -1
408
+ read = 0
409
+ blocknum = 0
410
+ if "content-length" in headers:
411
+ size = int(headers["Content-Length"])
412
+ if reporthook:
413
+ reporthook(blocknum, blocksize, size)
414
+ while True:
415
+ block = sfp.read(blocksize)
416
+ if not block:
417
+ break
418
+ read += len(block)
419
+ dfp.write(block)
420
+ if digester:
421
+ digester.update(block)
422
+ blocknum += 1
423
+ if reporthook:
424
+ reporthook(blocknum, blocksize, size)
425
+ finally:
426
+ sfp.close()
427
+
428
+ # check that we got the whole file, if we can
429
+ if size >= 0 and read < size:
430
+ raise DistlibException(
431
+ 'retrieval incomplete: got only %d out of %d bytes'
432
+ % (read, size))
433
+ # if we have a digest, it must match.
434
+ if digester:
435
+ actual = digester.hexdigest()
436
+ if digest != actual:
437
+ raise DistlibException('%s digest mismatch for %s: expected '
438
+ '%s, got %s' % (hasher, destfile,
439
+ digest, actual))
440
+ logger.debug('Digest verified: %s', digest)
441
+
442
+ def send_request(self, req):
443
+ """
444
+ Send a standard library :class:`Request` to PyPI and return its
445
+ response.
446
+
447
+ :param req: The request to send.
448
+ :return: The HTTP response from PyPI (a standard library HTTPResponse).
449
+ """
450
+ handlers = []
451
+ if self.password_handler:
452
+ handlers.append(self.password_handler)
453
+ if self.ssl_verifier:
454
+ handlers.append(self.ssl_verifier)
455
+ opener = build_opener(*handlers)
456
+ return opener.open(req)
457
+
458
+ def encode_request(self, fields, files):
459
+ """
460
+ Encode fields and files for posting to an HTTP server.
461
+
462
+ :param fields: The fields to send as a list of (fieldname, value)
463
+ tuples.
464
+ :param files: The files to send as a list of (fieldname, filename,
465
+ file_bytes) tuple.
466
+ """
467
+ # Adapted from packaging, which in turn was adapted from
468
+ # http://code.activestate.com/recipes/146306
469
+
470
+ parts = []
471
+ boundary = self.boundary
472
+ for k, values in fields:
473
+ if not isinstance(values, (list, tuple)):
474
+ values = [values]
475
+
476
+ for v in values:
477
+ parts.extend((
478
+ b'--' + boundary,
479
+ ('Content-Disposition: form-data; name="%s"' %
480
+ k).encode('utf-8'),
481
+ b'',
482
+ v.encode('utf-8')))
483
+ for key, filename, value in files:
484
+ parts.extend((
485
+ b'--' + boundary,
486
+ ('Content-Disposition: form-data; name="%s"; filename="%s"' %
487
+ (key, filename)).encode('utf-8'),
488
+ b'',
489
+ value))
490
+
491
+ parts.extend((b'--' + boundary + b'--', b''))
492
+
493
+ body = b'\r\n'.join(parts)
494
+ ct = b'multipart/form-data; boundary=' + boundary
495
+ headers = {
496
+ 'Content-type': ct,
497
+ 'Content-length': str(len(body))
498
+ }
499
+ return Request(self.url, body, headers)
500
+
501
+ def search(self, terms, operator=None): # pragma: no cover
502
+ if isinstance(terms, string_types):
503
+ terms = {'name': terms}
504
+ rpc_proxy = ServerProxy(self.url, timeout=3.0)
505
+ try:
506
+ return rpc_proxy.search(terms, operator or 'and')
507
+ finally:
508
+ rpc_proxy('close')()
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/locators.py ADDED
@@ -0,0 +1,1303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+
8
+ import gzip
9
+ from io import BytesIO
10
+ import json
11
+ import logging
12
+ import os
13
+ import posixpath
14
+ import re
15
+ try:
16
+ import threading
17
+ except ImportError: # pragma: no cover
18
+ import dummy_threading as threading
19
+ import zlib
20
+
21
+ from . import DistlibException
22
+ from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
23
+ queue, quote, unescape, build_opener,
24
+ HTTPRedirectHandler as BaseRedirectHandler, text_type,
25
+ Request, HTTPError, URLError)
26
+ from .database import Distribution, DistributionPath, make_dist
27
+ from .metadata import Metadata, MetadataInvalidError
28
+ from .util import (cached_property, ensure_slash, split_filename, get_project_data,
29
+ parse_requirement, parse_name_and_version, ServerProxy,
30
+ normalize_name)
31
+ from .version import get_scheme, UnsupportedVersionError
32
+ from .wheel import Wheel, is_compatible
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)')
37
+ CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)
38
+ HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')
39
+ DEFAULT_INDEX = 'https://pypi.org/pypi'
40
+
41
+
42
+ def get_all_distribution_names(url=None):
43
+ """
44
+ Return all distribution names known by an index.
45
+ :param url: The URL of the index.
46
+ :return: A list of all known distribution names.
47
+ """
48
+ if url is None:
49
+ url = DEFAULT_INDEX
50
+ client = ServerProxy(url, timeout=3.0)
51
+ try:
52
+ return client.list_packages()
53
+ finally:
54
+ client('close')()
55
+
56
+
57
+ class RedirectHandler(BaseRedirectHandler):
58
+ """
59
+ A class to work around a bug in some Python 3.2.x releases.
60
+ """
61
+ # There's a bug in the base version for some 3.2.x
62
+ # (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header
63
+ # returns e.g. /abc, it bails because it says the scheme ''
64
+ # is bogus, when actually it should use the request's
65
+ # URL for the scheme. See Python issue #13696.
66
+ def http_error_302(self, req, fp, code, msg, headers):
67
+ # Some servers (incorrectly) return multiple Location headers
68
+ # (so probably same goes for URI). Use first header.
69
+ newurl = None
70
+ for key in ('location', 'uri'):
71
+ if key in headers:
72
+ newurl = headers[key]
73
+ break
74
+ if newurl is None: # pragma: no cover
75
+ return
76
+ urlparts = urlparse(newurl)
77
+ if urlparts.scheme == '':
78
+ newurl = urljoin(req.get_full_url(), newurl)
79
+ if hasattr(headers, 'replace_header'):
80
+ headers.replace_header(key, newurl)
81
+ else:
82
+ headers[key] = newurl
83
+ return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,
84
+ headers)
85
+
86
+ http_error_301 = http_error_303 = http_error_307 = http_error_302
87
+
88
+
89
+ class Locator(object):
90
+ """
91
+ A base class for locators - things that locate distributions.
92
+ """
93
+ source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')
94
+ binary_extensions = ('.egg', '.exe', '.whl')
95
+ excluded_extensions = ('.pdf',)
96
+
97
+ # A list of tags indicating which wheels you want to match. The default
98
+ # value of None matches against the tags compatible with the running
99
+ # Python. If you want to match other values, set wheel_tags on a locator
100
+ # instance to a list of tuples (pyver, abi, arch) which you want to match.
101
+ wheel_tags = None
102
+
103
+ downloadable_extensions = source_extensions + ('.whl',)
104
+
105
+ def __init__(self, scheme='default'):
106
+ """
107
+ Initialise an instance.
108
+ :param scheme: Because locators look for most recent versions, they
109
+ need to know the version scheme to use. This specifies
110
+ the current PEP-recommended scheme - use ``'legacy'``
111
+ if you need to support existing distributions on PyPI.
112
+ """
113
+ self._cache = {}
114
+ self.scheme = scheme
115
+ # Because of bugs in some of the handlers on some of the platforms,
116
+ # we use our own opener rather than just using urlopen.
117
+ self.opener = build_opener(RedirectHandler())
118
+ # If get_project() is called from locate(), the matcher instance
119
+ # is set from the requirement passed to locate(). See issue #18 for
120
+ # why this can be useful to know.
121
+ self.matcher = None
122
+ self.errors = queue.Queue()
123
+
124
+ def get_errors(self):
125
+ """
126
+ Return any errors which have occurred.
127
+ """
128
+ result = []
129
+ while not self.errors.empty(): # pragma: no cover
130
+ try:
131
+ e = self.errors.get(False)
132
+ result.append(e)
133
+ except self.errors.Empty:
134
+ continue
135
+ self.errors.task_done()
136
+ return result
137
+
138
+ def clear_errors(self):
139
+ """
140
+ Clear any errors which may have been logged.
141
+ """
142
+ # Just get the errors and throw them away
143
+ self.get_errors()
144
+
145
+ def clear_cache(self):
146
+ self._cache.clear()
147
+
148
+ def _get_scheme(self):
149
+ return self._scheme
150
+
151
+ def _set_scheme(self, value):
152
+ self._scheme = value
153
+
154
+ scheme = property(_get_scheme, _set_scheme)
155
+
156
+ def _get_project(self, name):
157
+ """
158
+ For a given project, get a dictionary mapping available versions to Distribution
159
+ instances.
160
+
161
+ This should be implemented in subclasses.
162
+
163
+ If called from a locate() request, self.matcher will be set to a
164
+ matcher for the requirement to satisfy, otherwise it will be None.
165
+ """
166
+ raise NotImplementedError('Please implement in the subclass')
167
+
168
+ def get_distribution_names(self):
169
+ """
170
+ Return all the distribution names known to this locator.
171
+ """
172
+ raise NotImplementedError('Please implement in the subclass')
173
+
174
+ def get_project(self, name):
175
+ """
176
+ For a given project, get a dictionary mapping available versions to Distribution
177
+ instances.
178
+
179
+ This calls _get_project to do all the work, and just implements a caching layer on top.
180
+ """
181
+ if self._cache is None: # pragma: no cover
182
+ result = self._get_project(name)
183
+ elif name in self._cache:
184
+ result = self._cache[name]
185
+ else:
186
+ self.clear_errors()
187
+ result = self._get_project(name)
188
+ self._cache[name] = result
189
+ return result
190
+
191
+ def score_url(self, url):
192
+ """
193
+ Give an url a score which can be used to choose preferred URLs
194
+ for a given project release.
195
+ """
196
+ t = urlparse(url)
197
+ basename = posixpath.basename(t.path)
198
+ compatible = True
199
+ is_wheel = basename.endswith('.whl')
200
+ is_downloadable = basename.endswith(self.downloadable_extensions)
201
+ if is_wheel:
202
+ compatible = is_compatible(Wheel(basename), self.wheel_tags)
203
+ return (t.scheme == 'https', 'pypi.org' in t.netloc,
204
+ is_downloadable, is_wheel, compatible, basename)
205
+
206
+ def prefer_url(self, url1, url2):
207
+ """
208
+ Choose one of two URLs where both are candidates for distribution
209
+ archives for the same version of a distribution (for example,
210
+ .tar.gz vs. zip).
211
+
212
+ The current implementation favours https:// URLs over http://, archives
213
+ from PyPI over those from other locations, wheel compatibility (if a
214
+ wheel) and then the archive name.
215
+ """
216
+ result = url2
217
+ if url1:
218
+ s1 = self.score_url(url1)
219
+ s2 = self.score_url(url2)
220
+ if s1 > s2:
221
+ result = url1
222
+ if result != url2:
223
+ logger.debug('Not replacing %r with %r', url1, url2)
224
+ else:
225
+ logger.debug('Replacing %r with %r', url1, url2)
226
+ return result
227
+
228
+ def split_filename(self, filename, project_name):
229
+ """
230
+ Attempt to split a filename in project name, version and Python version.
231
+ """
232
+ return split_filename(filename, project_name)
233
+
234
+ def convert_url_to_download_info(self, url, project_name):
235
+ """
236
+ See if a URL is a candidate for a download URL for a project (the URL
237
+ has typically been scraped from an HTML page).
238
+
239
+ If it is, a dictionary is returned with keys "name", "version",
240
+ "filename" and "url"; otherwise, None is returned.
241
+ """
242
+ def same_project(name1, name2):
243
+ return normalize_name(name1) == normalize_name(name2)
244
+
245
+ result = None
246
+ scheme, netloc, path, params, query, frag = urlparse(url)
247
+ if frag.lower().startswith('egg='): # pragma: no cover
248
+ logger.debug('%s: version hint in fragment: %r',
249
+ project_name, frag)
250
+ m = HASHER_HASH.match(frag)
251
+ if m:
252
+ algo, digest = m.groups()
253
+ else:
254
+ algo, digest = None, None
255
+ origpath = path
256
+ if path and path[-1] == '/': # pragma: no cover
257
+ path = path[:-1]
258
+ if path.endswith('.whl'):
259
+ try:
260
+ wheel = Wheel(path)
261
+ if not is_compatible(wheel, self.wheel_tags):
262
+ logger.debug('Wheel not compatible: %s', path)
263
+ else:
264
+ if project_name is None:
265
+ include = True
266
+ else:
267
+ include = same_project(wheel.name, project_name)
268
+ if include:
269
+ result = {
270
+ 'name': wheel.name,
271
+ 'version': wheel.version,
272
+ 'filename': wheel.filename,
273
+ 'url': urlunparse((scheme, netloc, origpath,
274
+ params, query, '')),
275
+ 'python-version': ', '.join(
276
+ ['.'.join(list(v[2:])) for v in wheel.pyver]),
277
+ }
278
+ except Exception: # pragma: no cover
279
+ logger.warning('invalid path for wheel: %s', path)
280
+ elif not path.endswith(self.downloadable_extensions): # pragma: no cover
281
+ logger.debug('Not downloadable: %s', path)
282
+ else: # downloadable extension
283
+ path = filename = posixpath.basename(path)
284
+ for ext in self.downloadable_extensions:
285
+ if path.endswith(ext):
286
+ path = path[:-len(ext)]
287
+ t = self.split_filename(path, project_name)
288
+ if not t: # pragma: no cover
289
+ logger.debug('No match for project/version: %s', path)
290
+ else:
291
+ name, version, pyver = t
292
+ if not project_name or same_project(project_name, name):
293
+ result = {
294
+ 'name': name,
295
+ 'version': version,
296
+ 'filename': filename,
297
+ 'url': urlunparse((scheme, netloc, origpath,
298
+ params, query, '')),
299
+ }
300
+ if pyver: # pragma: no cover
301
+ result['python-version'] = pyver
302
+ break
303
+ if result and algo:
304
+ result['%s_digest' % algo] = digest
305
+ return result
306
+
307
+ def _get_digest(self, info):
308
+ """
309
+ Get a digest from a dictionary by looking at a "digests" dictionary
310
+ or keys of the form 'algo_digest'.
311
+
312
+ Returns a 2-tuple (algo, digest) if found, else None. Currently
313
+ looks only for SHA256, then MD5.
314
+ """
315
+ result = None
316
+ if 'digests' in info:
317
+ digests = info['digests']
318
+ for algo in ('sha256', 'md5'):
319
+ if algo in digests:
320
+ result = (algo, digests[algo])
321
+ break
322
+ if not result:
323
+ for algo in ('sha256', 'md5'):
324
+ key = '%s_digest' % algo
325
+ if key in info:
326
+ result = (algo, info[key])
327
+ break
328
+ return result
329
+
330
+ def _update_version_data(self, result, info):
331
+ """
332
+ Update a result dictionary (the final result from _get_project) with a
333
+ dictionary for a specific version, which typically holds information
334
+ gleaned from a filename or URL for an archive for the distribution.
335
+ """
336
+ name = info.pop('name')
337
+ version = info.pop('version')
338
+ if version in result:
339
+ dist = result[version]
340
+ md = dist.metadata
341
+ else:
342
+ dist = make_dist(name, version, scheme=self.scheme)
343
+ md = dist.metadata
344
+ dist.digest = digest = self._get_digest(info)
345
+ url = info['url']
346
+ result['digests'][url] = digest
347
+ if md.source_url != info['url']:
348
+ md.source_url = self.prefer_url(md.source_url, url)
349
+ result['urls'].setdefault(version, set()).add(url)
350
+ dist.locator = self
351
+ result[version] = dist
352
+
353
+ def locate(self, requirement, prereleases=False):
354
+ """
355
+ Find the most recent distribution which matches the given
356
+ requirement.
357
+
358
+ :param requirement: A requirement of the form 'foo (1.0)' or perhaps
359
+ 'foo (>= 1.0, < 2.0, != 1.3)'
360
+ :param prereleases: If ``True``, allow pre-release versions
361
+ to be located. Otherwise, pre-release versions
362
+ are not returned.
363
+ :return: A :class:`Distribution` instance, or ``None`` if no such
364
+ distribution could be located.
365
+ """
366
+ result = None
367
+ r = parse_requirement(requirement)
368
+ if r is None: # pragma: no cover
369
+ raise DistlibException('Not a valid requirement: %r' % requirement)
370
+ scheme = get_scheme(self.scheme)
371
+ self.matcher = matcher = scheme.matcher(r.requirement)
372
+ logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__)
373
+ versions = self.get_project(r.name)
374
+ if len(versions) > 2: # urls and digests keys are present
375
+ # sometimes, versions are invalid
376
+ slist = []
377
+ vcls = matcher.version_class
378
+ for k in versions:
379
+ if k in ('urls', 'digests'):
380
+ continue
381
+ try:
382
+ if not matcher.match(k):
383
+ pass # logger.debug('%s did not match %r', matcher, k)
384
+ else:
385
+ if prereleases or not vcls(k).is_prerelease:
386
+ slist.append(k)
387
+ except Exception: # pragma: no cover
388
+ logger.warning('error matching %s with %r', matcher, k)
389
+ pass # slist.append(k)
390
+ if len(slist) > 1:
391
+ slist = sorted(slist, key=scheme.key)
392
+ if slist:
393
+ logger.debug('sorted list: %s', slist)
394
+ version = slist[-1]
395
+ result = versions[version]
396
+ if result:
397
+ if r.extras:
398
+ result.extras = r.extras
399
+ result.download_urls = versions.get('urls', {}).get(version, set())
400
+ d = {}
401
+ sd = versions.get('digests', {})
402
+ for url in result.download_urls:
403
+ if url in sd: # pragma: no cover
404
+ d[url] = sd[url]
405
+ result.digests = d
406
+ self.matcher = None
407
+ return result
408
+
409
+
410
+ class PyPIRPCLocator(Locator):
411
+ """
412
+ This locator uses XML-RPC to locate distributions. It therefore
413
+ cannot be used with simple mirrors (that only mirror file content).
414
+ """
415
+ def __init__(self, url, **kwargs):
416
+ """
417
+ Initialise an instance.
418
+
419
+ :param url: The URL to use for XML-RPC.
420
+ :param kwargs: Passed to the superclass constructor.
421
+ """
422
+ super(PyPIRPCLocator, self).__init__(**kwargs)
423
+ self.base_url = url
424
+ self.client = ServerProxy(url, timeout=3.0)
425
+
426
+ def get_distribution_names(self):
427
+ """
428
+ Return all the distribution names known to this locator.
429
+ """
430
+ return set(self.client.list_packages())
431
+
432
+ def _get_project(self, name):
433
+ result = {'urls': {}, 'digests': {}}
434
+ versions = self.client.package_releases(name, True)
435
+ for v in versions:
436
+ urls = self.client.release_urls(name, v)
437
+ data = self.client.release_data(name, v)
438
+ metadata = Metadata(scheme=self.scheme)
439
+ metadata.name = data['name']
440
+ metadata.version = data['version']
441
+ metadata.license = data.get('license')
442
+ metadata.keywords = data.get('keywords', [])
443
+ metadata.summary = data.get('summary')
444
+ dist = Distribution(metadata)
445
+ if urls:
446
+ info = urls[0]
447
+ metadata.source_url = info['url']
448
+ dist.digest = self._get_digest(info)
449
+ dist.locator = self
450
+ result[v] = dist
451
+ for info in urls:
452
+ url = info['url']
453
+ digest = self._get_digest(info)
454
+ result['urls'].setdefault(v, set()).add(url)
455
+ result['digests'][url] = digest
456
+ return result
457
+
458
+
459
+ class PyPIJSONLocator(Locator):
460
+ """
461
+ This locator uses PyPI's JSON interface. It's very limited in functionality
462
+ and probably not worth using.
463
+ """
464
+ def __init__(self, url, **kwargs):
465
+ super(PyPIJSONLocator, self).__init__(**kwargs)
466
+ self.base_url = ensure_slash(url)
467
+
468
+ def get_distribution_names(self):
469
+ """
470
+ Return all the distribution names known to this locator.
471
+ """
472
+ raise NotImplementedError('Not available from this locator')
473
+
474
+ def _get_project(self, name):
475
+ result = {'urls': {}, 'digests': {}}
476
+ url = urljoin(self.base_url, '%s/json' % quote(name))
477
+ try:
478
+ resp = self.opener.open(url)
479
+ data = resp.read().decode() # for now
480
+ d = json.loads(data)
481
+ md = Metadata(scheme=self.scheme)
482
+ data = d['info']
483
+ md.name = data['name']
484
+ md.version = data['version']
485
+ md.license = data.get('license')
486
+ md.keywords = data.get('keywords', [])
487
+ md.summary = data.get('summary')
488
+ dist = Distribution(md)
489
+ dist.locator = self
490
+ # urls = d['urls']
491
+ result[md.version] = dist
492
+ for info in d['urls']:
493
+ url = info['url']
494
+ dist.download_urls.add(url)
495
+ dist.digests[url] = self._get_digest(info)
496
+ result['urls'].setdefault(md.version, set()).add(url)
497
+ result['digests'][url] = self._get_digest(info)
498
+ # Now get other releases
499
+ for version, infos in d['releases'].items():
500
+ if version == md.version:
501
+ continue # already done
502
+ omd = Metadata(scheme=self.scheme)
503
+ omd.name = md.name
504
+ omd.version = version
505
+ odist = Distribution(omd)
506
+ odist.locator = self
507
+ result[version] = odist
508
+ for info in infos:
509
+ url = info['url']
510
+ odist.download_urls.add(url)
511
+ odist.digests[url] = self._get_digest(info)
512
+ result['urls'].setdefault(version, set()).add(url)
513
+ result['digests'][url] = self._get_digest(info)
514
+ # for info in urls:
515
+ # md.source_url = info['url']
516
+ # dist.digest = self._get_digest(info)
517
+ # dist.locator = self
518
+ # for info in urls:
519
+ # url = info['url']
520
+ # result['urls'].setdefault(md.version, set()).add(url)
521
+ # result['digests'][url] = self._get_digest(info)
522
+ except Exception as e:
523
+ self.errors.put(text_type(e))
524
+ logger.exception('JSON fetch failed: %s', e)
525
+ return result
526
+
527
+
528
+ class Page(object):
529
+ """
530
+ This class represents a scraped HTML page.
531
+ """
532
+ # The following slightly hairy-looking regex just looks for the contents of
533
+ # an anchor link, which has an attribute "href" either immediately preceded
534
+ # or immediately followed by a "rel" attribute. The attribute values can be
535
+ # declared with double quotes, single quotes or no quotes - which leads to
536
+ # the length of the expression.
537
+ _href = re.compile("""
538
+ (rel\\s*=\\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\\s\n]*))\\s+)?
539
+ href\\s*=\\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\\s\n]*))
540
+ (\\s+rel\\s*=\\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\\s\n]*)))?
541
+ """, re.I | re.S | re.X)
542
+ _base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S)
543
+
544
+ def __init__(self, data, url):
545
+ """
546
+ Initialise an instance with the Unicode page contents and the URL they
547
+ came from.
548
+ """
549
+ self.data = data
550
+ self.base_url = self.url = url
551
+ m = self._base.search(self.data)
552
+ if m:
553
+ self.base_url = m.group(1)
554
+
555
+ _clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
556
+
557
+ @cached_property
558
+ def links(self):
559
+ """
560
+ Return the URLs of all the links on a page together with information
561
+ about their "rel" attribute, for determining which ones to treat as
562
+ downloads and which ones to queue for further scraping.
563
+ """
564
+ def clean(url):
565
+ "Tidy up an URL."
566
+ scheme, netloc, path, params, query, frag = urlparse(url)
567
+ return urlunparse((scheme, netloc, quote(path),
568
+ params, query, frag))
569
+
570
+ result = set()
571
+ for match in self._href.finditer(self.data):
572
+ d = match.groupdict('')
573
+ rel = (d['rel1'] or d['rel2'] or d['rel3'] or
574
+ d['rel4'] or d['rel5'] or d['rel6'])
575
+ url = d['url1'] or d['url2'] or d['url3']
576
+ url = urljoin(self.base_url, url)
577
+ url = unescape(url)
578
+ url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url)
579
+ result.add((url, rel))
580
+ # We sort the result, hoping to bring the most recent versions
581
+ # to the front
582
+ result = sorted(result, key=lambda t: t[0], reverse=True)
583
+ return result
584
+
585
+
586
+ class SimpleScrapingLocator(Locator):
587
+ """
588
+ A locator which scrapes HTML pages to locate downloads for a distribution.
589
+ This runs multiple threads to do the I/O; performance is at least as good
590
+ as pip's PackageFinder, which works in an analogous fashion.
591
+ """
592
+
593
+ # These are used to deal with various Content-Encoding schemes.
594
+ decoders = {
595
+ 'deflate': zlib.decompress,
596
+ 'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(b)).read(),
597
+ 'none': lambda b: b,
598
+ }
599
+
600
+ def __init__(self, url, timeout=None, num_workers=10, **kwargs):
601
+ """
602
+ Initialise an instance.
603
+ :param url: The root URL to use for scraping.
604
+ :param timeout: The timeout, in seconds, to be applied to requests.
605
+ This defaults to ``None`` (no timeout specified).
606
+ :param num_workers: The number of worker threads you want to do I/O,
607
+ This defaults to 10.
608
+ :param kwargs: Passed to the superclass.
609
+ """
610
+ super(SimpleScrapingLocator, self).__init__(**kwargs)
611
+ self.base_url = ensure_slash(url)
612
+ self.timeout = timeout
613
+ self._page_cache = {}
614
+ self._seen = set()
615
+ self._to_fetch = queue.Queue()
616
+ self._bad_hosts = set()
617
+ self.skip_externals = False
618
+ self.num_workers = num_workers
619
+ self._lock = threading.RLock()
620
+ # See issue #45: we need to be resilient when the locator is used
621
+ # in a thread, e.g. with concurrent.futures. We can't use self._lock
622
+ # as it is for coordinating our internal threads - the ones created
623
+ # in _prepare_threads.
624
+ self._gplock = threading.RLock()
625
+ self.platform_check = False # See issue #112
626
+
627
+ def _prepare_threads(self):
628
+ """
629
+ Threads are created only when get_project is called, and terminate
630
+ before it returns. They are there primarily to parallelise I/O (i.e.
631
+ fetching web pages).
632
+ """
633
+ self._threads = []
634
+ for i in range(self.num_workers):
635
+ t = threading.Thread(target=self._fetch)
636
+ t.daemon = True
637
+ t.start()
638
+ self._threads.append(t)
639
+
640
+ def _wait_threads(self):
641
+ """
642
+ Tell all the threads to terminate (by sending a sentinel value) and
643
+ wait for them to do so.
644
+ """
645
+ # Note that you need two loops, since you can't say which
646
+ # thread will get each sentinel
647
+ for t in self._threads:
648
+ self._to_fetch.put(None) # sentinel
649
+ for t in self._threads:
650
+ t.join()
651
+ self._threads = []
652
+
653
+ def _get_project(self, name):
654
+ result = {'urls': {}, 'digests': {}}
655
+ with self._gplock:
656
+ self.result = result
657
+ self.project_name = name
658
+ url = urljoin(self.base_url, '%s/' % quote(name))
659
+ self._seen.clear()
660
+ self._page_cache.clear()
661
+ self._prepare_threads()
662
+ try:
663
+ logger.debug('Queueing %s', url)
664
+ self._to_fetch.put(url)
665
+ self._to_fetch.join()
666
+ finally:
667
+ self._wait_threads()
668
+ del self.result
669
+ return result
670
+
671
+ platform_dependent = re.compile(r'\b(linux_(i\d86|x86_64|arm\w+)|'
672
+ r'win(32|_amd64)|macosx_?\d+)\b', re.I)
673
+
674
+ def _is_platform_dependent(self, url):
675
+ """
676
+ Does an URL refer to a platform-specific download?
677
+ """
678
+ return self.platform_dependent.search(url)
679
+
680
+ def _process_download(self, url):
681
+ """
682
+ See if an URL is a suitable download for a project.
683
+
684
+ If it is, register information in the result dictionary (for
685
+ _get_project) about the specific version it's for.
686
+
687
+ Note that the return value isn't actually used other than as a boolean
688
+ value.
689
+ """
690
+ if self.platform_check and self._is_platform_dependent(url):
691
+ info = None
692
+ else:
693
+ info = self.convert_url_to_download_info(url, self.project_name)
694
+ logger.debug('process_download: %s -> %s', url, info)
695
+ if info:
696
+ with self._lock: # needed because self.result is shared
697
+ self._update_version_data(self.result, info)
698
+ return info
699
+
700
+ def _should_queue(self, link, referrer, rel):
701
+ """
702
+ Determine whether a link URL from a referring page and with a
703
+ particular "rel" attribute should be queued for scraping.
704
+ """
705
+ scheme, netloc, path, _, _, _ = urlparse(link)
706
+ if path.endswith(self.source_extensions + self.binary_extensions +
707
+ self.excluded_extensions):
708
+ result = False
709
+ elif self.skip_externals and not link.startswith(self.base_url):
710
+ result = False
711
+ elif not referrer.startswith(self.base_url):
712
+ result = False
713
+ elif rel not in ('homepage', 'download'):
714
+ result = False
715
+ elif scheme not in ('http', 'https', 'ftp'):
716
+ result = False
717
+ elif self._is_platform_dependent(link):
718
+ result = False
719
+ else:
720
+ host = netloc.split(':', 1)[0]
721
+ if host.lower() == 'localhost':
722
+ result = False
723
+ else:
724
+ result = True
725
+ logger.debug('should_queue: %s (%s) from %s -> %s', link, rel,
726
+ referrer, result)
727
+ return result
728
+
729
+ def _fetch(self):
730
+ """
731
+ Get a URL to fetch from the work queue, get the HTML page, examine its
732
+ links for download candidates and candidates for further scraping.
733
+
734
+ This is a handy method to run in a thread.
735
+ """
736
+ while True:
737
+ url = self._to_fetch.get()
738
+ try:
739
+ if url:
740
+ page = self.get_page(url)
741
+ if page is None: # e.g. after an error
742
+ continue
743
+ for link, rel in page.links:
744
+ if link not in self._seen:
745
+ try:
746
+ self._seen.add(link)
747
+ if (not self._process_download(link) and
748
+ self._should_queue(link, url, rel)):
749
+ logger.debug('Queueing %s from %s', link, url)
750
+ self._to_fetch.put(link)
751
+ except MetadataInvalidError: # e.g. invalid versions
752
+ pass
753
+ except Exception as e: # pragma: no cover
754
+ self.errors.put(text_type(e))
755
+ finally:
756
+ # always do this, to avoid hangs :-)
757
+ self._to_fetch.task_done()
758
+ if not url:
759
+ # logger.debug('Sentinel seen, quitting.')
760
+ break
761
+
762
+ def get_page(self, url):
763
+ """
764
+ Get the HTML for an URL, possibly from an in-memory cache.
765
+
766
+ XXX TODO Note: this cache is never actually cleared. It's assumed that
767
+ the data won't get stale over the lifetime of a locator instance (not
768
+ necessarily true for the default_locator).
769
+ """
770
+ # http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api
771
+ scheme, netloc, path, _, _, _ = urlparse(url)
772
+ if scheme == 'file' and os.path.isdir(url2pathname(path)):
773
+ url = urljoin(ensure_slash(url), 'index.html')
774
+
775
+ if url in self._page_cache:
776
+ result = self._page_cache[url]
777
+ logger.debug('Returning %s from cache: %s', url, result)
778
+ else:
779
+ host = netloc.split(':', 1)[0]
780
+ result = None
781
+ if host in self._bad_hosts:
782
+ logger.debug('Skipping %s due to bad host %s', url, host)
783
+ else:
784
+ req = Request(url, headers={'Accept-encoding': 'identity'})
785
+ try:
786
+ logger.debug('Fetching %s', url)
787
+ resp = self.opener.open(req, timeout=self.timeout)
788
+ logger.debug('Fetched %s', url)
789
+ headers = resp.info()
790
+ content_type = headers.get('Content-Type', '')
791
+ if HTML_CONTENT_TYPE.match(content_type):
792
+ final_url = resp.geturl()
793
+ data = resp.read()
794
+ encoding = headers.get('Content-Encoding')
795
+ if encoding:
796
+ decoder = self.decoders[encoding] # fail if not found
797
+ data = decoder(data)
798
+ encoding = 'utf-8'
799
+ m = CHARSET.search(content_type)
800
+ if m:
801
+ encoding = m.group(1)
802
+ try:
803
+ data = data.decode(encoding)
804
+ except UnicodeError: # pragma: no cover
805
+ data = data.decode('latin-1') # fallback
806
+ result = Page(data, final_url)
807
+ self._page_cache[final_url] = result
808
+ except HTTPError as e:
809
+ if e.code != 404:
810
+ logger.exception('Fetch failed: %s: %s', url, e)
811
+ except URLError as e: # pragma: no cover
812
+ logger.exception('Fetch failed: %s: %s', url, e)
813
+ with self._lock:
814
+ self._bad_hosts.add(host)
815
+ except Exception as e: # pragma: no cover
816
+ logger.exception('Fetch failed: %s: %s', url, e)
817
+ finally:
818
+ self._page_cache[url] = result # even if None (failure)
819
+ return result
820
+
821
+ _distname_re = re.compile('<a href=[^>]*>([^<]+)<')
822
+
823
+ def get_distribution_names(self):
824
+ """
825
+ Return all the distribution names known to this locator.
826
+ """
827
+ result = set()
828
+ page = self.get_page(self.base_url)
829
+ if not page:
830
+ raise DistlibException('Unable to get %s' % self.base_url)
831
+ for match in self._distname_re.finditer(page.data):
832
+ result.add(match.group(1))
833
+ return result
834
+
835
+
836
+ class DirectoryLocator(Locator):
837
+ """
838
+ This class locates distributions in a directory tree.
839
+ """
840
+
841
+ def __init__(self, path, **kwargs):
842
+ """
843
+ Initialise an instance.
844
+ :param path: The root of the directory tree to search.
845
+ :param kwargs: Passed to the superclass constructor,
846
+ except for:
847
+ * recursive - if True (the default), subdirectories are
848
+ recursed into. If False, only the top-level directory
849
+ is searched,
850
+ """
851
+ self.recursive = kwargs.pop('recursive', True)
852
+ super(DirectoryLocator, self).__init__(**kwargs)
853
+ path = os.path.abspath(path)
854
+ if not os.path.isdir(path): # pragma: no cover
855
+ raise DistlibException('Not a directory: %r' % path)
856
+ self.base_dir = path
857
+
858
+ def should_include(self, filename, parent):
859
+ """
860
+ Should a filename be considered as a candidate for a distribution
861
+ archive? As well as the filename, the directory which contains it
862
+ is provided, though not used by the current implementation.
863
+ """
864
+ return filename.endswith(self.downloadable_extensions)
865
+
866
+ def _get_project(self, name):
867
+ result = {'urls': {}, 'digests': {}}
868
+ for root, dirs, files in os.walk(self.base_dir):
869
+ for fn in files:
870
+ if self.should_include(fn, root):
871
+ fn = os.path.join(root, fn)
872
+ url = urlunparse(('file', '',
873
+ pathname2url(os.path.abspath(fn)),
874
+ '', '', ''))
875
+ info = self.convert_url_to_download_info(url, name)
876
+ if info:
877
+ self._update_version_data(result, info)
878
+ if not self.recursive:
879
+ break
880
+ return result
881
+
882
+ def get_distribution_names(self):
883
+ """
884
+ Return all the distribution names known to this locator.
885
+ """
886
+ result = set()
887
+ for root, dirs, files in os.walk(self.base_dir):
888
+ for fn in files:
889
+ if self.should_include(fn, root):
890
+ fn = os.path.join(root, fn)
891
+ url = urlunparse(('file', '',
892
+ pathname2url(os.path.abspath(fn)),
893
+ '', '', ''))
894
+ info = self.convert_url_to_download_info(url, None)
895
+ if info:
896
+ result.add(info['name'])
897
+ if not self.recursive:
898
+ break
899
+ return result
900
+
901
+
902
+ class JSONLocator(Locator):
903
+ """
904
+ This locator uses special extended metadata (not available on PyPI) and is
905
+ the basis of performant dependency resolution in distlib. Other locators
906
+ require archive downloads before dependencies can be determined! As you
907
+ might imagine, that can be slow.
908
+ """
909
+ def get_distribution_names(self):
910
+ """
911
+ Return all the distribution names known to this locator.
912
+ """
913
+ raise NotImplementedError('Not available from this locator')
914
+
915
+ def _get_project(self, name):
916
+ result = {'urls': {}, 'digests': {}}
917
+ data = get_project_data(name)
918
+ if data:
919
+ for info in data.get('files', []):
920
+ if info['ptype'] != 'sdist' or info['pyversion'] != 'source':
921
+ continue
922
+ # We don't store summary in project metadata as it makes
923
+ # the data bigger for no benefit during dependency
924
+ # resolution
925
+ dist = make_dist(data['name'], info['version'],
926
+ summary=data.get('summary',
927
+ 'Placeholder for summary'),
928
+ scheme=self.scheme)
929
+ md = dist.metadata
930
+ md.source_url = info['url']
931
+ # TODO SHA256 digest
932
+ if 'digest' in info and info['digest']:
933
+ dist.digest = ('md5', info['digest'])
934
+ md.dependencies = info.get('requirements', {})
935
+ dist.exports = info.get('exports', {})
936
+ result[dist.version] = dist
937
+ result['urls'].setdefault(dist.version, set()).add(info['url'])
938
+ return result
939
+
940
+
941
+ class DistPathLocator(Locator):
942
+ """
943
+ This locator finds installed distributions in a path. It can be useful for
944
+ adding to an :class:`AggregatingLocator`.
945
+ """
946
+ def __init__(self, distpath, **kwargs):
947
+ """
948
+ Initialise an instance.
949
+
950
+ :param distpath: A :class:`DistributionPath` instance to search.
951
+ """
952
+ super(DistPathLocator, self).__init__(**kwargs)
953
+ assert isinstance(distpath, DistributionPath)
954
+ self.distpath = distpath
955
+
956
+ def _get_project(self, name):
957
+ dist = self.distpath.get_distribution(name)
958
+ if dist is None:
959
+ result = {'urls': {}, 'digests': {}}
960
+ else:
961
+ result = {
962
+ dist.version: dist,
963
+ 'urls': {dist.version: set([dist.source_url])},
964
+ 'digests': {dist.version: set([None])}
965
+ }
966
+ return result
967
+
968
+
969
+ class AggregatingLocator(Locator):
970
+ """
971
+ This class allows you to chain and/or merge a list of locators.
972
+ """
973
+ def __init__(self, *locators, **kwargs):
974
+ """
975
+ Initialise an instance.
976
+
977
+ :param locators: The list of locators to search.
978
+ :param kwargs: Passed to the superclass constructor,
979
+ except for:
980
+ * merge - if False (the default), the first successful
981
+ search from any of the locators is returned. If True,
982
+ the results from all locators are merged (this can be
983
+ slow).
984
+ """
985
+ self.merge = kwargs.pop('merge', False)
986
+ self.locators = locators
987
+ super(AggregatingLocator, self).__init__(**kwargs)
988
+
989
+ def clear_cache(self):
990
+ super(AggregatingLocator, self).clear_cache()
991
+ for locator in self.locators:
992
+ locator.clear_cache()
993
+
994
+ def _set_scheme(self, value):
995
+ self._scheme = value
996
+ for locator in self.locators:
997
+ locator.scheme = value
998
+
999
+ scheme = property(Locator.scheme.fget, _set_scheme)
1000
+
1001
+ def _get_project(self, name):
1002
+ result = {}
1003
+ for locator in self.locators:
1004
+ d = locator.get_project(name)
1005
+ if d:
1006
+ if self.merge:
1007
+ files = result.get('urls', {})
1008
+ digests = result.get('digests', {})
1009
+ # next line could overwrite result['urls'], result['digests']
1010
+ result.update(d)
1011
+ df = result.get('urls')
1012
+ if files and df:
1013
+ for k, v in files.items():
1014
+ if k in df:
1015
+ df[k] |= v
1016
+ else:
1017
+ df[k] = v
1018
+ dd = result.get('digests')
1019
+ if digests and dd:
1020
+ dd.update(digests)
1021
+ else:
1022
+ # See issue #18. If any dists are found and we're looking
1023
+ # for specific constraints, we only return something if
1024
+ # a match is found. For example, if a DirectoryLocator
1025
+ # returns just foo (1.0) while we're looking for
1026
+ # foo (>= 2.0), we'll pretend there was nothing there so
1027
+ # that subsequent locators can be queried. Otherwise we
1028
+ # would just return foo (1.0) which would then lead to a
1029
+ # failure to find foo (>= 2.0), because other locators
1030
+ # weren't searched. Note that this only matters when
1031
+ # merge=False.
1032
+ if self.matcher is None:
1033
+ found = True
1034
+ else:
1035
+ found = False
1036
+ for k in d:
1037
+ if self.matcher.match(k):
1038
+ found = True
1039
+ break
1040
+ if found:
1041
+ result = d
1042
+ break
1043
+ return result
1044
+
1045
+ def get_distribution_names(self):
1046
+ """
1047
+ Return all the distribution names known to this locator.
1048
+ """
1049
+ result = set()
1050
+ for locator in self.locators:
1051
+ try:
1052
+ result |= locator.get_distribution_names()
1053
+ except NotImplementedError:
1054
+ pass
1055
+ return result
1056
+
1057
+
1058
+ # We use a legacy scheme simply because most of the dists on PyPI use legacy
1059
+ # versions which don't conform to PEP 440.
1060
+ default_locator = AggregatingLocator(
1061
+ # JSONLocator(), # don't use as PEP 426 is withdrawn
1062
+ SimpleScrapingLocator('https://pypi.org/simple/',
1063
+ timeout=3.0),
1064
+ scheme='legacy')
1065
+
1066
+ locate = default_locator.locate
1067
+
1068
+
1069
+ class DependencyFinder(object):
1070
+ """
1071
+ Locate dependencies for distributions.
1072
+ """
1073
+
1074
+ def __init__(self, locator=None):
1075
+ """
1076
+ Initialise an instance, using the specified locator
1077
+ to locate distributions.
1078
+ """
1079
+ self.locator = locator or default_locator
1080
+ self.scheme = get_scheme(self.locator.scheme)
1081
+
1082
+ def add_distribution(self, dist):
1083
+ """
1084
+ Add a distribution to the finder. This will update internal information
1085
+ about who provides what.
1086
+ :param dist: The distribution to add.
1087
+ """
1088
+ logger.debug('adding distribution %s', dist)
1089
+ name = dist.key
1090
+ self.dists_by_name[name] = dist
1091
+ self.dists[(name, dist.version)] = dist
1092
+ for p in dist.provides:
1093
+ name, version = parse_name_and_version(p)
1094
+ logger.debug('Add to provided: %s, %s, %s', name, version, dist)
1095
+ self.provided.setdefault(name, set()).add((version, dist))
1096
+
1097
+ def remove_distribution(self, dist):
1098
+ """
1099
+ Remove a distribution from the finder. This will update internal
1100
+ information about who provides what.
1101
+ :param dist: The distribution to remove.
1102
+ """
1103
+ logger.debug('removing distribution %s', dist)
1104
+ name = dist.key
1105
+ del self.dists_by_name[name]
1106
+ del self.dists[(name, dist.version)]
1107
+ for p in dist.provides:
1108
+ name, version = parse_name_and_version(p)
1109
+ logger.debug('Remove from provided: %s, %s, %s', name, version, dist)
1110
+ s = self.provided[name]
1111
+ s.remove((version, dist))
1112
+ if not s:
1113
+ del self.provided[name]
1114
+
1115
+ def get_matcher(self, reqt):
1116
+ """
1117
+ Get a version matcher for a requirement.
1118
+ :param reqt: The requirement
1119
+ :type reqt: str
1120
+ :return: A version matcher (an instance of
1121
+ :class:`distlib.version.Matcher`).
1122
+ """
1123
+ try:
1124
+ matcher = self.scheme.matcher(reqt)
1125
+ except UnsupportedVersionError: # pragma: no cover
1126
+ # XXX compat-mode if cannot read the version
1127
+ name = reqt.split()[0]
1128
+ matcher = self.scheme.matcher(name)
1129
+ return matcher
1130
+
1131
+ def find_providers(self, reqt):
1132
+ """
1133
+ Find the distributions which can fulfill a requirement.
1134
+
1135
+ :param reqt: The requirement.
1136
+ :type reqt: str
1137
+ :return: A set of distribution which can fulfill the requirement.
1138
+ """
1139
+ matcher = self.get_matcher(reqt)
1140
+ name = matcher.key # case-insensitive
1141
+ result = set()
1142
+ provided = self.provided
1143
+ if name in provided:
1144
+ for version, provider in provided[name]:
1145
+ try:
1146
+ match = matcher.match(version)
1147
+ except UnsupportedVersionError:
1148
+ match = False
1149
+
1150
+ if match:
1151
+ result.add(provider)
1152
+ break
1153
+ return result
1154
+
1155
+ def try_to_replace(self, provider, other, problems):
1156
+ """
1157
+ Attempt to replace one provider with another. This is typically used
1158
+ when resolving dependencies from multiple sources, e.g. A requires
1159
+ (B >= 1.0) while C requires (B >= 1.1).
1160
+
1161
+ For successful replacement, ``provider`` must meet all the requirements
1162
+ which ``other`` fulfills.
1163
+
1164
+ :param provider: The provider we are trying to replace with.
1165
+ :param other: The provider we're trying to replace.
1166
+ :param problems: If False is returned, this will contain what
1167
+ problems prevented replacement. This is currently
1168
+ a tuple of the literal string 'cantreplace',
1169
+ ``provider``, ``other`` and the set of requirements
1170
+ that ``provider`` couldn't fulfill.
1171
+ :return: True if we can replace ``other`` with ``provider``, else
1172
+ False.
1173
+ """
1174
+ rlist = self.reqts[other]
1175
+ unmatched = set()
1176
+ for s in rlist:
1177
+ matcher = self.get_matcher(s)
1178
+ if not matcher.match(provider.version):
1179
+ unmatched.add(s)
1180
+ if unmatched:
1181
+ # can't replace other with provider
1182
+ problems.add(('cantreplace', provider, other,
1183
+ frozenset(unmatched)))
1184
+ result = False
1185
+ else:
1186
+ # can replace other with provider
1187
+ self.remove_distribution(other)
1188
+ del self.reqts[other]
1189
+ for s in rlist:
1190
+ self.reqts.setdefault(provider, set()).add(s)
1191
+ self.add_distribution(provider)
1192
+ result = True
1193
+ return result
1194
+
1195
+ def find(self, requirement, meta_extras=None, prereleases=False):
1196
+ """
1197
+ Find a distribution and all distributions it depends on.
1198
+
1199
+ :param requirement: The requirement specifying the distribution to
1200
+ find, or a Distribution instance.
1201
+ :param meta_extras: A list of meta extras such as :test:, :build: and
1202
+ so on.
1203
+ :param prereleases: If ``True``, allow pre-release versions to be
1204
+ returned - otherwise, don't return prereleases
1205
+ unless they're all that's available.
1206
+
1207
+ Return a set of :class:`Distribution` instances and a set of
1208
+ problems.
1209
+
1210
+ The distributions returned should be such that they have the
1211
+ :attr:`required` attribute set to ``True`` if they were
1212
+ from the ``requirement`` passed to ``find()``, and they have the
1213
+ :attr:`build_time_dependency` attribute set to ``True`` unless they
1214
+ are post-installation dependencies of the ``requirement``.
1215
+
1216
+ The problems should be a tuple consisting of the string
1217
+ ``'unsatisfied'`` and the requirement which couldn't be satisfied
1218
+ by any distribution known to the locator.
1219
+ """
1220
+
1221
+ self.provided = {}
1222
+ self.dists = {}
1223
+ self.dists_by_name = {}
1224
+ self.reqts = {}
1225
+
1226
+ meta_extras = set(meta_extras or [])
1227
+ if ':*:' in meta_extras:
1228
+ meta_extras.remove(':*:')
1229
+ # :meta: and :run: are implicitly included
1230
+ meta_extras |= set([':test:', ':build:', ':dev:'])
1231
+
1232
+ if isinstance(requirement, Distribution):
1233
+ dist = odist = requirement
1234
+ logger.debug('passed %s as requirement', odist)
1235
+ else:
1236
+ dist = odist = self.locator.locate(requirement,
1237
+ prereleases=prereleases)
1238
+ if dist is None:
1239
+ raise DistlibException('Unable to locate %r' % requirement)
1240
+ logger.debug('located %s', odist)
1241
+ dist.requested = True
1242
+ problems = set()
1243
+ todo = set([dist])
1244
+ install_dists = set([odist])
1245
+ while todo:
1246
+ dist = todo.pop()
1247
+ name = dist.key # case-insensitive
1248
+ if name not in self.dists_by_name:
1249
+ self.add_distribution(dist)
1250
+ else:
1251
+ # import pdb; pdb.set_trace()
1252
+ other = self.dists_by_name[name]
1253
+ if other != dist:
1254
+ self.try_to_replace(dist, other, problems)
1255
+
1256
+ ireqts = dist.run_requires | dist.meta_requires
1257
+ sreqts = dist.build_requires
1258
+ ereqts = set()
1259
+ if meta_extras and dist in install_dists:
1260
+ for key in ('test', 'build', 'dev'):
1261
+ e = ':%s:' % key
1262
+ if e in meta_extras:
1263
+ ereqts |= getattr(dist, '%s_requires' % key)
1264
+ all_reqts = ireqts | sreqts | ereqts
1265
+ for r in all_reqts:
1266
+ providers = self.find_providers(r)
1267
+ if not providers:
1268
+ logger.debug('No providers found for %r', r)
1269
+ provider = self.locator.locate(r, prereleases=prereleases)
1270
+ # If no provider is found and we didn't consider
1271
+ # prereleases, consider them now.
1272
+ if provider is None and not prereleases:
1273
+ provider = self.locator.locate(r, prereleases=True)
1274
+ if provider is None:
1275
+ logger.debug('Cannot satisfy %r', r)
1276
+ problems.add(('unsatisfied', r))
1277
+ else:
1278
+ n, v = provider.key, provider.version
1279
+ if (n, v) not in self.dists:
1280
+ todo.add(provider)
1281
+ providers.add(provider)
1282
+ if r in ireqts and dist in install_dists:
1283
+ install_dists.add(provider)
1284
+ logger.debug('Adding %s to install_dists',
1285
+ provider.name_and_version)
1286
+ for p in providers:
1287
+ name = p.key
1288
+ if name not in self.dists_by_name:
1289
+ self.reqts.setdefault(p, set()).add(r)
1290
+ else:
1291
+ other = self.dists_by_name[name]
1292
+ if other != p:
1293
+ # see if other can be replaced by p
1294
+ self.try_to_replace(p, other, problems)
1295
+
1296
+ dists = set(self.dists.values())
1297
+ for dist in dists:
1298
+ dist.build_time_dependency = dist not in install_dists
1299
+ if dist.build_time_dependency:
1300
+ logger.debug('%s is a build-time dependency only.',
1301
+ dist.name_and_version)
1302
+ logger.debug('find done for %s', odist)
1303
+ return dists, problems
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/manifest.py ADDED
@@ -0,0 +1,384 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 Python Software Foundation.
4
+ # See LICENSE.txt and CONTRIBUTORS.txt.
5
+ #
6
+ """
7
+ Class representing the list of files in a distribution.
8
+
9
+ Equivalent to distutils.filelist, but fixes some problems.
10
+ """
11
+ import fnmatch
12
+ import logging
13
+ import os
14
+ import re
15
+ import sys
16
+
17
+ from . import DistlibException
18
+ from .compat import fsdecode
19
+ from .util import convert_path
20
+
21
+
22
+ __all__ = ['Manifest']
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+ # a \ followed by some spaces + EOL
27
+ _COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
28
+ _COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
29
+
30
+ #
31
+ # Due to the different results returned by fnmatch.translate, we need
32
+ # to do slightly different processing for Python 2.7 and 3.2 ... this needed
33
+ # to be brought in for Python 3.6 onwards.
34
+ #
35
+ _PYTHON_VERSION = sys.version_info[:2]
36
+
37
+
38
+ class Manifest(object):
39
+ """
40
+ A list of files built by exploring the filesystem and filtered by applying various
41
+ patterns to what we find there.
42
+ """
43
+
44
+ def __init__(self, base=None):
45
+ """
46
+ Initialise an instance.
47
+
48
+ :param base: The base directory to explore under.
49
+ """
50
+ self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
51
+ self.prefix = self.base + os.sep
52
+ self.allfiles = None
53
+ self.files = set()
54
+
55
+ #
56
+ # Public API
57
+ #
58
+
59
+ def findall(self):
60
+ """Find all files under the base and set ``allfiles`` to the absolute
61
+ pathnames of files found.
62
+ """
63
+ from stat import S_ISREG, S_ISDIR, S_ISLNK
64
+
65
+ self.allfiles = allfiles = []
66
+ root = self.base
67
+ stack = [root]
68
+ pop = stack.pop
69
+ push = stack.append
70
+
71
+ while stack:
72
+ root = pop()
73
+ names = os.listdir(root)
74
+
75
+ for name in names:
76
+ fullname = os.path.join(root, name)
77
+
78
+ # Avoid excess stat calls -- just one will do, thank you!
79
+ stat = os.stat(fullname)
80
+ mode = stat.st_mode
81
+ if S_ISREG(mode):
82
+ allfiles.append(fsdecode(fullname))
83
+ elif S_ISDIR(mode) and not S_ISLNK(mode):
84
+ push(fullname)
85
+
86
+ def add(self, item):
87
+ """
88
+ Add a file to the manifest.
89
+
90
+ :param item: The pathname to add. This can be relative to the base.
91
+ """
92
+ if not item.startswith(self.prefix):
93
+ item = os.path.join(self.base, item)
94
+ self.files.add(os.path.normpath(item))
95
+
96
+ def add_many(self, items):
97
+ """
98
+ Add a list of files to the manifest.
99
+
100
+ :param items: The pathnames to add. These can be relative to the base.
101
+ """
102
+ for item in items:
103
+ self.add(item)
104
+
105
+ def sorted(self, wantdirs=False):
106
+ """
107
+ Return sorted files in directory order
108
+ """
109
+
110
+ def add_dir(dirs, d):
111
+ dirs.add(d)
112
+ logger.debug('add_dir added %s', d)
113
+ if d != self.base:
114
+ parent, _ = os.path.split(d)
115
+ assert parent not in ('', '/')
116
+ add_dir(dirs, parent)
117
+
118
+ result = set(self.files) # make a copy!
119
+ if wantdirs:
120
+ dirs = set()
121
+ for f in result:
122
+ add_dir(dirs, os.path.dirname(f))
123
+ result |= dirs
124
+ return [os.path.join(*path_tuple) for path_tuple in
125
+ sorted(os.path.split(path) for path in result)]
126
+
127
+ def clear(self):
128
+ """Clear all collected files."""
129
+ self.files = set()
130
+ self.allfiles = []
131
+
132
+ def process_directive(self, directive):
133
+ """
134
+ Process a directive which either adds some files from ``allfiles`` to
135
+ ``files``, or removes some files from ``files``.
136
+
137
+ :param directive: The directive to process. This should be in a format
138
+ compatible with distutils ``MANIFEST.in`` files:
139
+
140
+ http://docs.python.org/distutils/sourcedist.html#commands
141
+ """
142
+ # Parse the line: split it up, make sure the right number of words
143
+ # is there, and return the relevant words. 'action' is always
144
+ # defined: it's the first word of the line. Which of the other
145
+ # three are defined depends on the action; it'll be either
146
+ # patterns, (dir and patterns), or (dirpattern).
147
+ action, patterns, thedir, dirpattern = self._parse_directive(directive)
148
+
149
+ # OK, now we know that the action is valid and we have the
150
+ # right number of words on the line for that action -- so we
151
+ # can proceed with minimal error-checking.
152
+ if action == 'include':
153
+ for pattern in patterns:
154
+ if not self._include_pattern(pattern, anchor=True):
155
+ logger.warning('no files found matching %r', pattern)
156
+
157
+ elif action == 'exclude':
158
+ for pattern in patterns:
159
+ self._exclude_pattern(pattern, anchor=True)
160
+
161
+ elif action == 'global-include':
162
+ for pattern in patterns:
163
+ if not self._include_pattern(pattern, anchor=False):
164
+ logger.warning('no files found matching %r '
165
+ 'anywhere in distribution', pattern)
166
+
167
+ elif action == 'global-exclude':
168
+ for pattern in patterns:
169
+ self._exclude_pattern(pattern, anchor=False)
170
+
171
+ elif action == 'recursive-include':
172
+ for pattern in patterns:
173
+ if not self._include_pattern(pattern, prefix=thedir):
174
+ logger.warning('no files found matching %r '
175
+ 'under directory %r', pattern, thedir)
176
+
177
+ elif action == 'recursive-exclude':
178
+ for pattern in patterns:
179
+ self._exclude_pattern(pattern, prefix=thedir)
180
+
181
+ elif action == 'graft':
182
+ if not self._include_pattern(None, prefix=dirpattern):
183
+ logger.warning('no directories found matching %r',
184
+ dirpattern)
185
+
186
+ elif action == 'prune':
187
+ if not self._exclude_pattern(None, prefix=dirpattern):
188
+ logger.warning('no previously-included directories found '
189
+ 'matching %r', dirpattern)
190
+ else: # pragma: no cover
191
+ # This should never happen, as it should be caught in
192
+ # _parse_template_line
193
+ raise DistlibException(
194
+ 'invalid action %r' % action)
195
+
196
+ #
197
+ # Private API
198
+ #
199
+
200
+ def _parse_directive(self, directive):
201
+ """
202
+ Validate a directive.
203
+ :param directive: The directive to validate.
204
+ :return: A tuple of action, patterns, thedir, dir_patterns
205
+ """
206
+ words = directive.split()
207
+ if len(words) == 1 and words[0] not in ('include', 'exclude',
208
+ 'global-include',
209
+ 'global-exclude',
210
+ 'recursive-include',
211
+ 'recursive-exclude',
212
+ 'graft', 'prune'):
213
+ # no action given, let's use the default 'include'
214
+ words.insert(0, 'include')
215
+
216
+ action = words[0]
217
+ patterns = thedir = dir_pattern = None
218
+
219
+ if action in ('include', 'exclude',
220
+ 'global-include', 'global-exclude'):
221
+ if len(words) < 2:
222
+ raise DistlibException(
223
+ '%r expects <pattern1> <pattern2> ...' % action)
224
+
225
+ patterns = [convert_path(word) for word in words[1:]]
226
+
227
+ elif action in ('recursive-include', 'recursive-exclude'):
228
+ if len(words) < 3:
229
+ raise DistlibException(
230
+ '%r expects <dir> <pattern1> <pattern2> ...' % action)
231
+
232
+ thedir = convert_path(words[1])
233
+ patterns = [convert_path(word) for word in words[2:]]
234
+
235
+ elif action in ('graft', 'prune'):
236
+ if len(words) != 2:
237
+ raise DistlibException(
238
+ '%r expects a single <dir_pattern>' % action)
239
+
240
+ dir_pattern = convert_path(words[1])
241
+
242
+ else:
243
+ raise DistlibException('unknown action %r' % action)
244
+
245
+ return action, patterns, thedir, dir_pattern
246
+
247
+ def _include_pattern(self, pattern, anchor=True, prefix=None,
248
+ is_regex=False):
249
+ """Select strings (presumably filenames) from 'self.files' that
250
+ match 'pattern', a Unix-style wildcard (glob) pattern.
251
+
252
+ Patterns are not quite the same as implemented by the 'fnmatch'
253
+ module: '*' and '?' match non-special characters, where "special"
254
+ is platform-dependent: slash on Unix; colon, slash, and backslash on
255
+ DOS/Windows; and colon on Mac OS.
256
+
257
+ If 'anchor' is true (the default), then the pattern match is more
258
+ stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
259
+ 'anchor' is false, both of these will match.
260
+
261
+ If 'prefix' is supplied, then only filenames starting with 'prefix'
262
+ (itself a pattern) and ending with 'pattern', with anything in between
263
+ them, will match. 'anchor' is ignored in this case.
264
+
265
+ If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
266
+ 'pattern' is assumed to be either a string containing a regex or a
267
+ regex object -- no translation is done, the regex is just compiled
268
+ and used as-is.
269
+
270
+ Selected strings will be added to self.files.
271
+
272
+ Return True if files are found.
273
+ """
274
+ # XXX docstring lying about what the special chars are?
275
+ found = False
276
+ pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
277
+
278
+ # delayed loading of allfiles list
279
+ if self.allfiles is None:
280
+ self.findall()
281
+
282
+ for name in self.allfiles:
283
+ if pattern_re.search(name):
284
+ self.files.add(name)
285
+ found = True
286
+ return found
287
+
288
+ def _exclude_pattern(self, pattern, anchor=True, prefix=None,
289
+ is_regex=False):
290
+ """Remove strings (presumably filenames) from 'files' that match
291
+ 'pattern'.
292
+
293
+ Other parameters are the same as for 'include_pattern()', above.
294
+ The list 'self.files' is modified in place. Return True if files are
295
+ found.
296
+
297
+ This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
298
+ packaging source distributions
299
+ """
300
+ found = False
301
+ pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
302
+ for f in list(self.files):
303
+ if pattern_re.search(f):
304
+ self.files.remove(f)
305
+ found = True
306
+ return found
307
+
308
+ def _translate_pattern(self, pattern, anchor=True, prefix=None,
309
+ is_regex=False):
310
+ """Translate a shell-like wildcard pattern to a compiled regular
311
+ expression.
312
+
313
+ Return the compiled regex. If 'is_regex' true,
314
+ then 'pattern' is directly compiled to a regex (if it's a string)
315
+ or just returned as-is (assumes it's a regex object).
316
+ """
317
+ if is_regex:
318
+ if isinstance(pattern, str):
319
+ return re.compile(pattern)
320
+ else:
321
+ return pattern
322
+
323
+ if _PYTHON_VERSION > (3, 2):
324
+ # ditch start and end characters
325
+ start, _, end = self._glob_to_re('_').partition('_')
326
+
327
+ if pattern:
328
+ pattern_re = self._glob_to_re(pattern)
329
+ if _PYTHON_VERSION > (3, 2):
330
+ assert pattern_re.startswith(start) and pattern_re.endswith(end)
331
+ else:
332
+ pattern_re = ''
333
+
334
+ base = re.escape(os.path.join(self.base, ''))
335
+ if prefix is not None:
336
+ # ditch end of pattern character
337
+ if _PYTHON_VERSION <= (3, 2):
338
+ empty_pattern = self._glob_to_re('')
339
+ prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
340
+ else:
341
+ prefix_re = self._glob_to_re(prefix)
342
+ assert prefix_re.startswith(start) and prefix_re.endswith(end)
343
+ prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
344
+ sep = os.sep
345
+ if os.sep == '\\':
346
+ sep = r'\\'
347
+ if _PYTHON_VERSION <= (3, 2):
348
+ pattern_re = '^' + base + sep.join((prefix_re,
349
+ '.*' + pattern_re))
350
+ else:
351
+ pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
352
+ pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
353
+ pattern_re, end)
354
+ else: # no prefix -- respect anchor flag
355
+ if anchor:
356
+ if _PYTHON_VERSION <= (3, 2):
357
+ pattern_re = '^' + base + pattern_re
358
+ else:
359
+ pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
360
+
361
+ return re.compile(pattern_re)
362
+
363
+ def _glob_to_re(self, pattern):
364
+ """Translate a shell-like glob pattern to a regular expression.
365
+
366
+ Return a string containing the regex. Differs from
367
+ 'fnmatch.translate()' in that '*' does not match "special characters"
368
+ (which are platform-specific).
369
+ """
370
+ pattern_re = fnmatch.translate(pattern)
371
+
372
+ # '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
373
+ # IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
374
+ # and by extension they shouldn't match such "special characters" under
375
+ # any OS. So change all non-escaped dots in the RE to match any
376
+ # character except the special characters (currently: just os.sep).
377
+ sep = os.sep
378
+ if os.sep == '\\':
379
+ # we're using a regex to manipulate a regex, so we need
380
+ # to escape the backslash twice
381
+ sep = r'\\\\'
382
+ escaped = r'\1[^%s]' % sep
383
+ pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
384
+ return pattern_re
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/markers.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ """
8
+ Parser for the environment markers micro-language defined in PEP 508.
9
+ """
10
+
11
+ # Note: In PEP 345, the micro-language was Python compatible, so the ast
12
+ # module could be used to parse it. However, PEP 508 introduced operators such
13
+ # as ~= and === which aren't in Python, necessitating a different approach.
14
+
15
+ import os
16
+ import re
17
+ import sys
18
+ import platform
19
+
20
+ from .compat import string_types
21
+ from .util import in_venv, parse_marker
22
+ from .version import LegacyVersion as LV
23
+
24
+ __all__ = ['interpret']
25
+
26
+ _VERSION_PATTERN = re.compile(
27
+ r'((\d+(\.\d+)*\w*)|\'(\d+(\.\d+)*\w*)\'|\"(\d+(\.\d+)*\w*)\")')
28
+ _VERSION_MARKERS = {'python_version', 'python_full_version'}
29
+
30
+
31
+ def _is_version_marker(s):
32
+ return isinstance(s, string_types) and s in _VERSION_MARKERS
33
+
34
+
35
+ def _is_literal(o):
36
+ if not isinstance(o, string_types) or not o:
37
+ return False
38
+ return o[0] in '\'"'
39
+
40
+
41
+ def _get_versions(s):
42
+ return {LV(m.groups()[0]) for m in _VERSION_PATTERN.finditer(s)}
43
+
44
+
45
+ class Evaluator(object):
46
+ """
47
+ This class is used to evaluate marker expressions.
48
+ """
49
+
50
+ operations = {
51
+ '==': lambda x, y: x == y,
52
+ '===': lambda x, y: x == y,
53
+ '~=': lambda x, y: x == y or x > y,
54
+ '!=': lambda x, y: x != y,
55
+ '<': lambda x, y: x < y,
56
+ '<=': lambda x, y: x == y or x < y,
57
+ '>': lambda x, y: x > y,
58
+ '>=': lambda x, y: x == y or x > y,
59
+ 'and': lambda x, y: x and y,
60
+ 'or': lambda x, y: x or y,
61
+ 'in': lambda x, y: x in y,
62
+ 'not in': lambda x, y: x not in y,
63
+ }
64
+
65
+ def evaluate(self, expr, context):
66
+ """
67
+ Evaluate a marker expression returned by the :func:`parse_requirement`
68
+ function in the specified context.
69
+ """
70
+ if isinstance(expr, string_types):
71
+ if expr[0] in '\'"':
72
+ result = expr[1:-1]
73
+ else:
74
+ if expr not in context:
75
+ raise SyntaxError('unknown variable: %s' % expr)
76
+ result = context[expr]
77
+ else:
78
+ assert isinstance(expr, dict)
79
+ op = expr['op']
80
+ if op not in self.operations:
81
+ raise NotImplementedError('op not implemented: %s' % op)
82
+ elhs = expr['lhs']
83
+ erhs = expr['rhs']
84
+ if _is_literal(expr['lhs']) and _is_literal(expr['rhs']):
85
+ raise SyntaxError('invalid comparison: %s %s %s' %
86
+ (elhs, op, erhs))
87
+
88
+ lhs = self.evaluate(elhs, context)
89
+ rhs = self.evaluate(erhs, context)
90
+ if ((_is_version_marker(elhs) or _is_version_marker(erhs))
91
+ and op in ('<', '<=', '>', '>=', '===', '==', '!=', '~=')):
92
+ lhs = LV(lhs)
93
+ rhs = LV(rhs)
94
+ elif _is_version_marker(elhs) and op in ('in', 'not in'):
95
+ lhs = LV(lhs)
96
+ rhs = _get_versions(rhs)
97
+ result = self.operations[op](lhs, rhs)
98
+ return result
99
+
100
+
101
+ _DIGITS = re.compile(r'\d+\.\d+')
102
+
103
+
104
+ def default_context():
105
+
106
+ def format_full_version(info):
107
+ version = '%s.%s.%s' % (info.major, info.minor, info.micro)
108
+ kind = info.releaselevel
109
+ if kind != 'final':
110
+ version += kind[0] + str(info.serial)
111
+ return version
112
+
113
+ if hasattr(sys, 'implementation'):
114
+ implementation_version = format_full_version(
115
+ sys.implementation.version)
116
+ implementation_name = sys.implementation.name
117
+ else:
118
+ implementation_version = '0'
119
+ implementation_name = ''
120
+
121
+ ppv = platform.python_version()
122
+ m = _DIGITS.match(ppv)
123
+ pv = m.group(0)
124
+ result = {
125
+ 'implementation_name': implementation_name,
126
+ 'implementation_version': implementation_version,
127
+ 'os_name': os.name,
128
+ 'platform_machine': platform.machine(),
129
+ 'platform_python_implementation': platform.python_implementation(),
130
+ 'platform_release': platform.release(),
131
+ 'platform_system': platform.system(),
132
+ 'platform_version': platform.version(),
133
+ 'platform_in_venv': str(in_venv()),
134
+ 'python_full_version': ppv,
135
+ 'python_version': pv,
136
+ 'sys_platform': sys.platform,
137
+ }
138
+ return result
139
+
140
+
141
+ DEFAULT_CONTEXT = default_context()
142
+ del default_context
143
+
144
+ evaluator = Evaluator()
145
+
146
+
147
+ def interpret(marker, execution_context=None):
148
+ """
149
+ Interpret a marker and return a result depending on environment.
150
+
151
+ :param marker: The marker to interpret.
152
+ :type marker: str
153
+ :param execution_context: The context used for name lookup.
154
+ :type execution_context: mapping
155
+ """
156
+ try:
157
+ expr, rest = parse_marker(marker)
158
+ except Exception as e:
159
+ raise SyntaxError('Unable to interpret marker syntax: %s: %s' %
160
+ (marker, e))
161
+ if rest and rest[0] != '#':
162
+ raise SyntaxError('unexpected trailing data in marker: %s: %s' %
163
+ (marker, rest))
164
+ context = dict(DEFAULT_CONTEXT)
165
+ if execution_context:
166
+ context.update(execution_context)
167
+ return evaluator.evaluate(expr, context)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/metadata.py ADDED
@@ -0,0 +1,1068 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012 The Python Software Foundation.
4
+ # See LICENSE.txt and CONTRIBUTORS.txt.
5
+ #
6
+ """Implementation of the Metadata for Python packages PEPs.
7
+
8
+ Supports all metadata formats (1.0, 1.1, 1.2, 1.3/2.1 and 2.2).
9
+ """
10
+ from __future__ import unicode_literals
11
+
12
+ import codecs
13
+ from email import message_from_file
14
+ import json
15
+ import logging
16
+ import re
17
+
18
+
19
+ from . import DistlibException, __version__
20
+ from .compat import StringIO, string_types, text_type
21
+ from .markers import interpret
22
+ from .util import extract_by_key, get_extras
23
+ from .version import get_scheme, PEP440_VERSION_RE
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+
28
+ class MetadataMissingError(DistlibException):
29
+ """A required metadata is missing"""
30
+
31
+
32
+ class MetadataConflictError(DistlibException):
33
+ """Attempt to read or write metadata fields that are conflictual."""
34
+
35
+
36
+ class MetadataUnrecognizedVersionError(DistlibException):
37
+ """Unknown metadata version number."""
38
+
39
+
40
+ class MetadataInvalidError(DistlibException):
41
+ """A metadata value is invalid"""
42
+
43
+ # public API of this module
44
+ __all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION']
45
+
46
+ # Encoding used for the PKG-INFO files
47
+ PKG_INFO_ENCODING = 'utf-8'
48
+
49
+ # preferred version. Hopefully will be changed
50
+ # to 1.2 once PEP 345 is supported everywhere
51
+ PKG_INFO_PREFERRED_VERSION = '1.1'
52
+
53
+ _LINE_PREFIX_1_2 = re.compile('\n \\|')
54
+ _LINE_PREFIX_PRE_1_2 = re.compile('\n ')
55
+ _241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
56
+ 'Summary', 'Description',
57
+ 'Keywords', 'Home-page', 'Author', 'Author-email',
58
+ 'License')
59
+
60
+ _314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
61
+ 'Supported-Platform', 'Summary', 'Description',
62
+ 'Keywords', 'Home-page', 'Author', 'Author-email',
63
+ 'License', 'Classifier', 'Download-URL', 'Obsoletes',
64
+ 'Provides', 'Requires')
65
+
66
+ _314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier',
67
+ 'Download-URL')
68
+
69
+ _345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
70
+ 'Supported-Platform', 'Summary', 'Description',
71
+ 'Keywords', 'Home-page', 'Author', 'Author-email',
72
+ 'Maintainer', 'Maintainer-email', 'License',
73
+ 'Classifier', 'Download-URL', 'Obsoletes-Dist',
74
+ 'Project-URL', 'Provides-Dist', 'Requires-Dist',
75
+ 'Requires-Python', 'Requires-External')
76
+
77
+ _345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python',
78
+ 'Obsoletes-Dist', 'Requires-External', 'Maintainer',
79
+ 'Maintainer-email', 'Project-URL')
80
+
81
+ _426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
82
+ 'Supported-Platform', 'Summary', 'Description',
83
+ 'Keywords', 'Home-page', 'Author', 'Author-email',
84
+ 'Maintainer', 'Maintainer-email', 'License',
85
+ 'Classifier', 'Download-URL', 'Obsoletes-Dist',
86
+ 'Project-URL', 'Provides-Dist', 'Requires-Dist',
87
+ 'Requires-Python', 'Requires-External', 'Private-Version',
88
+ 'Obsoleted-By', 'Setup-Requires-Dist', 'Extension',
89
+ 'Provides-Extra')
90
+
91
+ _426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By',
92
+ 'Setup-Requires-Dist', 'Extension')
93
+
94
+ # See issue #106: Sometimes 'Requires' and 'Provides' occur wrongly in
95
+ # the metadata. Include them in the tuple literal below to allow them
96
+ # (for now).
97
+ # Ditto for Obsoletes - see issue #140.
98
+ _566_FIELDS = _426_FIELDS + ('Description-Content-Type',
99
+ 'Requires', 'Provides', 'Obsoletes')
100
+
101
+ _566_MARKERS = ('Description-Content-Type',)
102
+
103
+ _643_MARKERS = ('Dynamic', 'License-File')
104
+
105
+ _643_FIELDS = _566_FIELDS + _643_MARKERS
106
+
107
+ _ALL_FIELDS = set()
108
+ _ALL_FIELDS.update(_241_FIELDS)
109
+ _ALL_FIELDS.update(_314_FIELDS)
110
+ _ALL_FIELDS.update(_345_FIELDS)
111
+ _ALL_FIELDS.update(_426_FIELDS)
112
+ _ALL_FIELDS.update(_566_FIELDS)
113
+ _ALL_FIELDS.update(_643_FIELDS)
114
+
115
+ EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''')
116
+
117
+
118
+ def _version2fieldlist(version):
119
+ if version == '1.0':
120
+ return _241_FIELDS
121
+ elif version == '1.1':
122
+ return _314_FIELDS
123
+ elif version == '1.2':
124
+ return _345_FIELDS
125
+ elif version in ('1.3', '2.1'):
126
+ # avoid adding field names if already there
127
+ return _345_FIELDS + tuple(f for f in _566_FIELDS if f not in _345_FIELDS)
128
+ elif version == '2.0':
129
+ raise ValueError('Metadata 2.0 is withdrawn and not supported')
130
+ # return _426_FIELDS
131
+ elif version == '2.2':
132
+ return _643_FIELDS
133
+ raise MetadataUnrecognizedVersionError(version)
134
+
135
+
136
+ def _best_version(fields):
137
+ """Detect the best version depending on the fields used."""
138
+ def _has_marker(keys, markers):
139
+ return any(marker in keys for marker in markers)
140
+
141
+ keys = [key for key, value in fields.items() if value not in ([], 'UNKNOWN', None)]
142
+ possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.1', '2.2'] # 2.0 removed
143
+
144
+ # first let's try to see if a field is not part of one of the version
145
+ for key in keys:
146
+ if key not in _241_FIELDS and '1.0' in possible_versions:
147
+ possible_versions.remove('1.0')
148
+ logger.debug('Removed 1.0 due to %s', key)
149
+ if key not in _314_FIELDS and '1.1' in possible_versions:
150
+ possible_versions.remove('1.1')
151
+ logger.debug('Removed 1.1 due to %s', key)
152
+ if key not in _345_FIELDS and '1.2' in possible_versions:
153
+ possible_versions.remove('1.2')
154
+ logger.debug('Removed 1.2 due to %s', key)
155
+ if key not in _566_FIELDS and '1.3' in possible_versions:
156
+ possible_versions.remove('1.3')
157
+ logger.debug('Removed 1.3 due to %s', key)
158
+ if key not in _566_FIELDS and '2.1' in possible_versions:
159
+ if key != 'Description': # In 2.1, description allowed after headers
160
+ possible_versions.remove('2.1')
161
+ logger.debug('Removed 2.1 due to %s', key)
162
+ if key not in _643_FIELDS and '2.2' in possible_versions:
163
+ possible_versions.remove('2.2')
164
+ logger.debug('Removed 2.2 due to %s', key)
165
+ # if key not in _426_FIELDS and '2.0' in possible_versions:
166
+ # possible_versions.remove('2.0')
167
+ # logger.debug('Removed 2.0 due to %s', key)
168
+
169
+ # possible_version contains qualified versions
170
+ if len(possible_versions) == 1:
171
+ return possible_versions[0] # found !
172
+ elif len(possible_versions) == 0:
173
+ logger.debug('Out of options - unknown metadata set: %s', fields)
174
+ raise MetadataConflictError('Unknown metadata set')
175
+
176
+ # let's see if one unique marker is found
177
+ is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS)
178
+ is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS)
179
+ is_2_1 = '2.1' in possible_versions and _has_marker(keys, _566_MARKERS)
180
+ # is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS)
181
+ is_2_2 = '2.2' in possible_versions and _has_marker(keys, _643_MARKERS)
182
+ if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_2) > 1:
183
+ raise MetadataConflictError('You used incompatible 1.1/1.2/2.1/2.2 fields')
184
+
185
+ # we have the choice, 1.0, or 1.2, 2.1 or 2.2
186
+ # - 1.0 has a broken Summary field but works with all tools
187
+ # - 1.1 is to avoid
188
+ # - 1.2 fixes Summary but has little adoption
189
+ # - 2.1 adds more features
190
+ # - 2.2 is the latest
191
+ if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_2:
192
+ # we couldn't find any specific marker
193
+ if PKG_INFO_PREFERRED_VERSION in possible_versions:
194
+ return PKG_INFO_PREFERRED_VERSION
195
+ if is_1_1:
196
+ return '1.1'
197
+ if is_1_2:
198
+ return '1.2'
199
+ if is_2_1:
200
+ return '2.1'
201
+ # if is_2_2:
202
+ # return '2.2'
203
+
204
+ return '2.2'
205
+
206
+ # This follows the rules about transforming keys as described in
207
+ # https://www.python.org/dev/peps/pep-0566/#id17
208
+ _ATTR2FIELD = {
209
+ name.lower().replace("-", "_"): name for name in _ALL_FIELDS
210
+ }
211
+ _FIELD2ATTR = {field: attr for attr, field in _ATTR2FIELD.items()}
212
+
213
+ _PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist')
214
+ _VERSIONS_FIELDS = ('Requires-Python',)
215
+ _VERSION_FIELDS = ('Version',)
216
+ _LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes',
217
+ 'Requires', 'Provides', 'Obsoletes-Dist',
218
+ 'Provides-Dist', 'Requires-Dist', 'Requires-External',
219
+ 'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist',
220
+ 'Provides-Extra', 'Extension', 'License-File')
221
+ _LISTTUPLEFIELDS = ('Project-URL',)
222
+
223
+ _ELEMENTSFIELD = ('Keywords',)
224
+
225
+ _UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description')
226
+
227
+ _MISSING = object()
228
+
229
+ _FILESAFE = re.compile('[^A-Za-z0-9.]+')
230
+
231
+
232
+ def _get_name_and_version(name, version, for_filename=False):
233
+ """Return the distribution name with version.
234
+
235
+ If for_filename is true, return a filename-escaped form."""
236
+ if for_filename:
237
+ # For both name and version any runs of non-alphanumeric or '.'
238
+ # characters are replaced with a single '-'. Additionally any
239
+ # spaces in the version string become '.'
240
+ name = _FILESAFE.sub('-', name)
241
+ version = _FILESAFE.sub('-', version.replace(' ', '.'))
242
+ return '%s-%s' % (name, version)
243
+
244
+
245
+ class LegacyMetadata(object):
246
+ """The legacy metadata of a release.
247
+
248
+ Supports versions 1.0, 1.1, 1.2, 2.0 and 1.3/2.1 (auto-detected). You can
249
+ instantiate the class with one of these arguments (or none):
250
+ - *path*, the path to a metadata file
251
+ - *fileobj* give a file-like object with metadata as content
252
+ - *mapping* is a dict-like object
253
+ - *scheme* is a version scheme name
254
+ """
255
+ # TODO document the mapping API and UNKNOWN default key
256
+
257
+ def __init__(self, path=None, fileobj=None, mapping=None,
258
+ scheme='default'):
259
+ if [path, fileobj, mapping].count(None) < 2:
260
+ raise TypeError('path, fileobj and mapping are exclusive')
261
+ self._fields = {}
262
+ self.requires_files = []
263
+ self._dependencies = None
264
+ self.scheme = scheme
265
+ if path is not None:
266
+ self.read(path)
267
+ elif fileobj is not None:
268
+ self.read_file(fileobj)
269
+ elif mapping is not None:
270
+ self.update(mapping)
271
+ self.set_metadata_version()
272
+
273
+ def set_metadata_version(self):
274
+ self._fields['Metadata-Version'] = _best_version(self._fields)
275
+
276
+ def _write_field(self, fileobj, name, value):
277
+ fileobj.write('%s: %s\n' % (name, value))
278
+
279
+ def __getitem__(self, name):
280
+ return self.get(name)
281
+
282
+ def __setitem__(self, name, value):
283
+ return self.set(name, value)
284
+
285
+ def __delitem__(self, name):
286
+ field_name = self._convert_name(name)
287
+ try:
288
+ del self._fields[field_name]
289
+ except KeyError:
290
+ raise KeyError(name)
291
+
292
+ def __contains__(self, name):
293
+ return (name in self._fields or
294
+ self._convert_name(name) in self._fields)
295
+
296
+ def _convert_name(self, name):
297
+ if name in _ALL_FIELDS:
298
+ return name
299
+ name = name.replace('-', '_').lower()
300
+ return _ATTR2FIELD.get(name, name)
301
+
302
+ def _default_value(self, name):
303
+ if name in _LISTFIELDS or name in _ELEMENTSFIELD:
304
+ return []
305
+ return 'UNKNOWN'
306
+
307
+ def _remove_line_prefix(self, value):
308
+ if self.metadata_version in ('1.0', '1.1'):
309
+ return _LINE_PREFIX_PRE_1_2.sub('\n', value)
310
+ else:
311
+ return _LINE_PREFIX_1_2.sub('\n', value)
312
+
313
+ def __getattr__(self, name):
314
+ if name in _ATTR2FIELD:
315
+ return self[name]
316
+ raise AttributeError(name)
317
+
318
+ #
319
+ # Public API
320
+ #
321
+
322
+ # dependencies = property(_get_dependencies, _set_dependencies)
323
+
324
+ def get_fullname(self, filesafe=False):
325
+ """Return the distribution name with version.
326
+
327
+ If filesafe is true, return a filename-escaped form."""
328
+ return _get_name_and_version(self['Name'], self['Version'], filesafe)
329
+
330
+ def is_field(self, name):
331
+ """return True if name is a valid metadata key"""
332
+ name = self._convert_name(name)
333
+ return name in _ALL_FIELDS
334
+
335
+ def is_multi_field(self, name):
336
+ name = self._convert_name(name)
337
+ return name in _LISTFIELDS
338
+
339
+ def read(self, filepath):
340
+ """Read the metadata values from a file path."""
341
+ fp = codecs.open(filepath, 'r', encoding='utf-8')
342
+ try:
343
+ self.read_file(fp)
344
+ finally:
345
+ fp.close()
346
+
347
+ def read_file(self, fileob):
348
+ """Read the metadata values from a file object."""
349
+ msg = message_from_file(fileob)
350
+ self._fields['Metadata-Version'] = msg['metadata-version']
351
+
352
+ # When reading, get all the fields we can
353
+ for field in _ALL_FIELDS:
354
+ if field not in msg:
355
+ continue
356
+ if field in _LISTFIELDS:
357
+ # we can have multiple lines
358
+ values = msg.get_all(field)
359
+ if field in _LISTTUPLEFIELDS and values is not None:
360
+ values = [tuple(value.split(',')) for value in values]
361
+ self.set(field, values)
362
+ else:
363
+ # single line
364
+ value = msg[field]
365
+ if value is not None and value != 'UNKNOWN':
366
+ self.set(field, value)
367
+
368
+ # PEP 566 specifies that the body be used for the description, if
369
+ # available
370
+ body = msg.get_payload()
371
+ self["Description"] = body if body else self["Description"]
372
+ # logger.debug('Attempting to set metadata for %s', self)
373
+ # self.set_metadata_version()
374
+
375
+ def write(self, filepath, skip_unknown=False):
376
+ """Write the metadata fields to filepath."""
377
+ fp = codecs.open(filepath, 'w', encoding='utf-8')
378
+ try:
379
+ self.write_file(fp, skip_unknown)
380
+ finally:
381
+ fp.close()
382
+
383
+ def write_file(self, fileobject, skip_unknown=False):
384
+ """Write the PKG-INFO format data to a file object."""
385
+ self.set_metadata_version()
386
+
387
+ for field in _version2fieldlist(self['Metadata-Version']):
388
+ values = self.get(field)
389
+ if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']):
390
+ continue
391
+ if field in _ELEMENTSFIELD:
392
+ self._write_field(fileobject, field, ','.join(values))
393
+ continue
394
+ if field not in _LISTFIELDS:
395
+ if field == 'Description':
396
+ if self.metadata_version in ('1.0', '1.1'):
397
+ values = values.replace('\n', '\n ')
398
+ else:
399
+ values = values.replace('\n', '\n |')
400
+ values = [values]
401
+
402
+ if field in _LISTTUPLEFIELDS:
403
+ values = [','.join(value) for value in values]
404
+
405
+ for value in values:
406
+ self._write_field(fileobject, field, value)
407
+
408
+ def update(self, other=None, **kwargs):
409
+ """Set metadata values from the given iterable `other` and kwargs.
410
+
411
+ Behavior is like `dict.update`: If `other` has a ``keys`` method,
412
+ they are looped over and ``self[key]`` is assigned ``other[key]``.
413
+ Else, ``other`` is an iterable of ``(key, value)`` iterables.
414
+
415
+ Keys that don't match a metadata field or that have an empty value are
416
+ dropped.
417
+ """
418
+ def _set(key, value):
419
+ if key in _ATTR2FIELD and value:
420
+ self.set(self._convert_name(key), value)
421
+
422
+ if not other:
423
+ # other is None or empty container
424
+ pass
425
+ elif hasattr(other, 'keys'):
426
+ for k in other.keys():
427
+ _set(k, other[k])
428
+ else:
429
+ for k, v in other:
430
+ _set(k, v)
431
+
432
+ if kwargs:
433
+ for k, v in kwargs.items():
434
+ _set(k, v)
435
+
436
+ def set(self, name, value):
437
+ """Control then set a metadata field."""
438
+ name = self._convert_name(name)
439
+
440
+ if ((name in _ELEMENTSFIELD or name == 'Platform') and
441
+ not isinstance(value, (list, tuple))):
442
+ if isinstance(value, string_types):
443
+ value = [v.strip() for v in value.split(',')]
444
+ else:
445
+ value = []
446
+ elif (name in _LISTFIELDS and
447
+ not isinstance(value, (list, tuple))):
448
+ if isinstance(value, string_types):
449
+ value = [value]
450
+ else:
451
+ value = []
452
+
453
+ if logger.isEnabledFor(logging.WARNING):
454
+ project_name = self['Name']
455
+
456
+ scheme = get_scheme(self.scheme)
457
+ if name in _PREDICATE_FIELDS and value is not None:
458
+ for v in value:
459
+ # check that the values are valid
460
+ if not scheme.is_valid_matcher(v.split(';')[0]):
461
+ logger.warning(
462
+ "'%s': '%s' is not valid (field '%s')",
463
+ project_name, v, name)
464
+ # FIXME this rejects UNKNOWN, is that right?
465
+ elif name in _VERSIONS_FIELDS and value is not None:
466
+ if not scheme.is_valid_constraint_list(value):
467
+ logger.warning("'%s': '%s' is not a valid version (field '%s')",
468
+ project_name, value, name)
469
+ elif name in _VERSION_FIELDS and value is not None:
470
+ if not scheme.is_valid_version(value):
471
+ logger.warning("'%s': '%s' is not a valid version (field '%s')",
472
+ project_name, value, name)
473
+
474
+ if name in _UNICODEFIELDS:
475
+ if name == 'Description':
476
+ value = self._remove_line_prefix(value)
477
+
478
+ self._fields[name] = value
479
+
480
+ def get(self, name, default=_MISSING):
481
+ """Get a metadata field."""
482
+ name = self._convert_name(name)
483
+ if name not in self._fields:
484
+ if default is _MISSING:
485
+ default = self._default_value(name)
486
+ return default
487
+ if name in _UNICODEFIELDS:
488
+ value = self._fields[name]
489
+ return value
490
+ elif name in _LISTFIELDS:
491
+ value = self._fields[name]
492
+ if value is None:
493
+ return []
494
+ res = []
495
+ for val in value:
496
+ if name not in _LISTTUPLEFIELDS:
497
+ res.append(val)
498
+ else:
499
+ # That's for Project-URL
500
+ res.append((val[0], val[1]))
501
+ return res
502
+
503
+ elif name in _ELEMENTSFIELD:
504
+ value = self._fields[name]
505
+ if isinstance(value, string_types):
506
+ return value.split(',')
507
+ return self._fields[name]
508
+
509
+ def check(self, strict=False):
510
+ """Check if the metadata is compliant. If strict is True then raise if
511
+ no Name or Version are provided"""
512
+ self.set_metadata_version()
513
+
514
+ # XXX should check the versions (if the file was loaded)
515
+ missing, warnings = [], []
516
+
517
+ for attr in ('Name', 'Version'): # required by PEP 345
518
+ if attr not in self:
519
+ missing.append(attr)
520
+
521
+ if strict and missing != []:
522
+ msg = 'missing required metadata: %s' % ', '.join(missing)
523
+ raise MetadataMissingError(msg)
524
+
525
+ for attr in ('Home-page', 'Author'):
526
+ if attr not in self:
527
+ missing.append(attr)
528
+
529
+ # checking metadata 1.2 (XXX needs to check 1.1, 1.0)
530
+ if self['Metadata-Version'] != '1.2':
531
+ return missing, warnings
532
+
533
+ scheme = get_scheme(self.scheme)
534
+
535
+ def are_valid_constraints(value):
536
+ for v in value:
537
+ if not scheme.is_valid_matcher(v.split(';')[0]):
538
+ return False
539
+ return True
540
+
541
+ for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints),
542
+ (_VERSIONS_FIELDS,
543
+ scheme.is_valid_constraint_list),
544
+ (_VERSION_FIELDS,
545
+ scheme.is_valid_version)):
546
+ for field in fields:
547
+ value = self.get(field, None)
548
+ if value is not None and not controller(value):
549
+ warnings.append("Wrong value for '%s': %s" % (field, value))
550
+
551
+ return missing, warnings
552
+
553
+ def todict(self, skip_missing=False):
554
+ """Return fields as a dict.
555
+
556
+ Field names will be converted to use the underscore-lowercase style
557
+ instead of hyphen-mixed case (i.e. home_page instead of Home-page).
558
+ This is as per https://www.python.org/dev/peps/pep-0566/#id17.
559
+ """
560
+ self.set_metadata_version()
561
+
562
+ fields = _version2fieldlist(self['Metadata-Version'])
563
+
564
+ data = {}
565
+
566
+ for field_name in fields:
567
+ if not skip_missing or field_name in self._fields:
568
+ key = _FIELD2ATTR[field_name]
569
+ if key != 'project_url':
570
+ data[key] = self[field_name]
571
+ else:
572
+ data[key] = [','.join(u) for u in self[field_name]]
573
+
574
+ return data
575
+
576
+ def add_requirements(self, requirements):
577
+ if self['Metadata-Version'] == '1.1':
578
+ # we can't have 1.1 metadata *and* Setuptools requires
579
+ for field in ('Obsoletes', 'Requires', 'Provides'):
580
+ if field in self:
581
+ del self[field]
582
+ self['Requires-Dist'] += requirements
583
+
584
+ # Mapping API
585
+ # TODO could add iter* variants
586
+
587
+ def keys(self):
588
+ return list(_version2fieldlist(self['Metadata-Version']))
589
+
590
+ def __iter__(self):
591
+ for key in self.keys():
592
+ yield key
593
+
594
+ def values(self):
595
+ return [self[key] for key in self.keys()]
596
+
597
+ def items(self):
598
+ return [(key, self[key]) for key in self.keys()]
599
+
600
+ def __repr__(self):
601
+ return '<%s %s %s>' % (self.__class__.__name__, self.name,
602
+ self.version)
603
+
604
+
605
+ METADATA_FILENAME = 'pydist.json'
606
+ WHEEL_METADATA_FILENAME = 'metadata.json'
607
+ LEGACY_METADATA_FILENAME = 'METADATA'
608
+
609
+
610
+ class Metadata(object):
611
+ """
612
+ The metadata of a release. This implementation uses 2.1
613
+ metadata where possible. If not possible, it wraps a LegacyMetadata
614
+ instance which handles the key-value metadata format.
615
+ """
616
+
617
+ METADATA_VERSION_MATCHER = re.compile(r'^\d+(\.\d+)*$')
618
+
619
+ NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I)
620
+
621
+ FIELDNAME_MATCHER = re.compile('^[A-Z]([0-9A-Z-]*[0-9A-Z])?$', re.I)
622
+
623
+ VERSION_MATCHER = PEP440_VERSION_RE
624
+
625
+ SUMMARY_MATCHER = re.compile('.{1,2047}')
626
+
627
+ METADATA_VERSION = '2.0'
628
+
629
+ GENERATOR = 'distlib (%s)' % __version__
630
+
631
+ MANDATORY_KEYS = {
632
+ 'name': (),
633
+ 'version': (),
634
+ 'summary': ('legacy',),
635
+ }
636
+
637
+ INDEX_KEYS = ('name version license summary description author '
638
+ 'author_email keywords platform home_page classifiers '
639
+ 'download_url')
640
+
641
+ DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires '
642
+ 'dev_requires provides meta_requires obsoleted_by '
643
+ 'supports_environments')
644
+
645
+ SYNTAX_VALIDATORS = {
646
+ 'metadata_version': (METADATA_VERSION_MATCHER, ()),
647
+ 'name': (NAME_MATCHER, ('legacy',)),
648
+ 'version': (VERSION_MATCHER, ('legacy',)),
649
+ 'summary': (SUMMARY_MATCHER, ('legacy',)),
650
+ 'dynamic': (FIELDNAME_MATCHER, ('legacy',)),
651
+ }
652
+
653
+ __slots__ = ('_legacy', '_data', 'scheme')
654
+
655
+ def __init__(self, path=None, fileobj=None, mapping=None,
656
+ scheme='default'):
657
+ if [path, fileobj, mapping].count(None) < 2:
658
+ raise TypeError('path, fileobj and mapping are exclusive')
659
+ self._legacy = None
660
+ self._data = None
661
+ self.scheme = scheme
662
+ #import pdb; pdb.set_trace()
663
+ if mapping is not None:
664
+ try:
665
+ self._validate_mapping(mapping, scheme)
666
+ self._data = mapping
667
+ except MetadataUnrecognizedVersionError:
668
+ self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme)
669
+ self.validate()
670
+ else:
671
+ data = None
672
+ if path:
673
+ with open(path, 'rb') as f:
674
+ data = f.read()
675
+ elif fileobj:
676
+ data = fileobj.read()
677
+ if data is None:
678
+ # Initialised with no args - to be added
679
+ self._data = {
680
+ 'metadata_version': self.METADATA_VERSION,
681
+ 'generator': self.GENERATOR,
682
+ }
683
+ else:
684
+ if not isinstance(data, text_type):
685
+ data = data.decode('utf-8')
686
+ try:
687
+ self._data = json.loads(data)
688
+ self._validate_mapping(self._data, scheme)
689
+ except ValueError:
690
+ # Note: MetadataUnrecognizedVersionError does not
691
+ # inherit from ValueError (it's a DistlibException,
692
+ # which should not inherit from ValueError).
693
+ # The ValueError comes from the json.load - if that
694
+ # succeeds and we get a validation error, we want
695
+ # that to propagate
696
+ self._legacy = LegacyMetadata(fileobj=StringIO(data),
697
+ scheme=scheme)
698
+ self.validate()
699
+
700
+ common_keys = set(('name', 'version', 'license', 'keywords', 'summary'))
701
+
702
+ none_list = (None, list)
703
+ none_dict = (None, dict)
704
+
705
+ mapped_keys = {
706
+ 'run_requires': ('Requires-Dist', list),
707
+ 'build_requires': ('Setup-Requires-Dist', list),
708
+ 'dev_requires': none_list,
709
+ 'test_requires': none_list,
710
+ 'meta_requires': none_list,
711
+ 'extras': ('Provides-Extra', list),
712
+ 'modules': none_list,
713
+ 'namespaces': none_list,
714
+ 'exports': none_dict,
715
+ 'commands': none_dict,
716
+ 'classifiers': ('Classifier', list),
717
+ 'source_url': ('Download-URL', None),
718
+ 'metadata_version': ('Metadata-Version', None),
719
+ }
720
+
721
+ del none_list, none_dict
722
+
723
+ def __getattribute__(self, key):
724
+ common = object.__getattribute__(self, 'common_keys')
725
+ mapped = object.__getattribute__(self, 'mapped_keys')
726
+ if key in mapped:
727
+ lk, maker = mapped[key]
728
+ if self._legacy:
729
+ if lk is None:
730
+ result = None if maker is None else maker()
731
+ else:
732
+ result = self._legacy.get(lk)
733
+ else:
734
+ value = None if maker is None else maker()
735
+ if key not in ('commands', 'exports', 'modules', 'namespaces',
736
+ 'classifiers'):
737
+ result = self._data.get(key, value)
738
+ else:
739
+ # special cases for PEP 459
740
+ sentinel = object()
741
+ result = sentinel
742
+ d = self._data.get('extensions')
743
+ if d:
744
+ if key == 'commands':
745
+ result = d.get('python.commands', value)
746
+ elif key == 'classifiers':
747
+ d = d.get('python.details')
748
+ if d:
749
+ result = d.get(key, value)
750
+ else:
751
+ d = d.get('python.exports')
752
+ if not d:
753
+ d = self._data.get('python.exports')
754
+ if d:
755
+ result = d.get(key, value)
756
+ if result is sentinel:
757
+ result = value
758
+ elif key not in common:
759
+ result = object.__getattribute__(self, key)
760
+ elif self._legacy:
761
+ result = self._legacy.get(key)
762
+ else:
763
+ result = self._data.get(key)
764
+ return result
765
+
766
+ def _validate_value(self, key, value, scheme=None):
767
+ if key in self.SYNTAX_VALIDATORS:
768
+ pattern, exclusions = self.SYNTAX_VALIDATORS[key]
769
+ if (scheme or self.scheme) not in exclusions:
770
+ m = pattern.match(value)
771
+ if not m:
772
+ raise MetadataInvalidError("'%s' is an invalid value for "
773
+ "the '%s' property" % (value,
774
+ key))
775
+
776
+ def __setattr__(self, key, value):
777
+ self._validate_value(key, value)
778
+ common = object.__getattribute__(self, 'common_keys')
779
+ mapped = object.__getattribute__(self, 'mapped_keys')
780
+ if key in mapped:
781
+ lk, _ = mapped[key]
782
+ if self._legacy:
783
+ if lk is None:
784
+ raise NotImplementedError
785
+ self._legacy[lk] = value
786
+ elif key not in ('commands', 'exports', 'modules', 'namespaces',
787
+ 'classifiers'):
788
+ self._data[key] = value
789
+ else:
790
+ # special cases for PEP 459
791
+ d = self._data.setdefault('extensions', {})
792
+ if key == 'commands':
793
+ d['python.commands'] = value
794
+ elif key == 'classifiers':
795
+ d = d.setdefault('python.details', {})
796
+ d[key] = value
797
+ else:
798
+ d = d.setdefault('python.exports', {})
799
+ d[key] = value
800
+ elif key not in common:
801
+ object.__setattr__(self, key, value)
802
+ else:
803
+ if key == 'keywords':
804
+ if isinstance(value, string_types):
805
+ value = value.strip()
806
+ if value:
807
+ value = value.split()
808
+ else:
809
+ value = []
810
+ if self._legacy:
811
+ self._legacy[key] = value
812
+ else:
813
+ self._data[key] = value
814
+
815
+ @property
816
+ def name_and_version(self):
817
+ return _get_name_and_version(self.name, self.version, True)
818
+
819
+ @property
820
+ def provides(self):
821
+ if self._legacy:
822
+ result = self._legacy['Provides-Dist']
823
+ else:
824
+ result = self._data.setdefault('provides', [])
825
+ s = '%s (%s)' % (self.name, self.version)
826
+ if s not in result:
827
+ result.append(s)
828
+ return result
829
+
830
+ @provides.setter
831
+ def provides(self, value):
832
+ if self._legacy:
833
+ self._legacy['Provides-Dist'] = value
834
+ else:
835
+ self._data['provides'] = value
836
+
837
+ def get_requirements(self, reqts, extras=None, env=None):
838
+ """
839
+ Base method to get dependencies, given a set of extras
840
+ to satisfy and an optional environment context.
841
+ :param reqts: A list of sometimes-wanted dependencies,
842
+ perhaps dependent on extras and environment.
843
+ :param extras: A list of optional components being requested.
844
+ :param env: An optional environment for marker evaluation.
845
+ """
846
+ if self._legacy:
847
+ result = reqts
848
+ else:
849
+ result = []
850
+ extras = get_extras(extras or [], self.extras)
851
+ for d in reqts:
852
+ if 'extra' not in d and 'environment' not in d:
853
+ # unconditional
854
+ include = True
855
+ else:
856
+ if 'extra' not in d:
857
+ # Not extra-dependent - only environment-dependent
858
+ include = True
859
+ else:
860
+ include = d.get('extra') in extras
861
+ if include:
862
+ # Not excluded because of extras, check environment
863
+ marker = d.get('environment')
864
+ if marker:
865
+ include = interpret(marker, env)
866
+ if include:
867
+ result.extend(d['requires'])
868
+ for key in ('build', 'dev', 'test'):
869
+ e = ':%s:' % key
870
+ if e in extras:
871
+ extras.remove(e)
872
+ # A recursive call, but it should terminate since 'test'
873
+ # has been removed from the extras
874
+ reqts = self._data.get('%s_requires' % key, [])
875
+ result.extend(self.get_requirements(reqts, extras=extras,
876
+ env=env))
877
+ return result
878
+
879
+ @property
880
+ def dictionary(self):
881
+ if self._legacy:
882
+ return self._from_legacy()
883
+ return self._data
884
+
885
+ @property
886
+ def dependencies(self):
887
+ if self._legacy:
888
+ raise NotImplementedError
889
+ else:
890
+ return extract_by_key(self._data, self.DEPENDENCY_KEYS)
891
+
892
+ @dependencies.setter
893
+ def dependencies(self, value):
894
+ if self._legacy:
895
+ raise NotImplementedError
896
+ else:
897
+ self._data.update(value)
898
+
899
+ def _validate_mapping(self, mapping, scheme):
900
+ if mapping.get('metadata_version') != self.METADATA_VERSION:
901
+ raise MetadataUnrecognizedVersionError()
902
+ missing = []
903
+ for key, exclusions in self.MANDATORY_KEYS.items():
904
+ if key not in mapping:
905
+ if scheme not in exclusions:
906
+ missing.append(key)
907
+ if missing:
908
+ msg = 'Missing metadata items: %s' % ', '.join(missing)
909
+ raise MetadataMissingError(msg)
910
+ for k, v in mapping.items():
911
+ self._validate_value(k, v, scheme)
912
+
913
+ def validate(self):
914
+ if self._legacy:
915
+ missing, warnings = self._legacy.check(True)
916
+ if missing or warnings:
917
+ logger.warning('Metadata: missing: %s, warnings: %s',
918
+ missing, warnings)
919
+ else:
920
+ self._validate_mapping(self._data, self.scheme)
921
+
922
+ def todict(self):
923
+ if self._legacy:
924
+ return self._legacy.todict(True)
925
+ else:
926
+ result = extract_by_key(self._data, self.INDEX_KEYS)
927
+ return result
928
+
929
+ def _from_legacy(self):
930
+ assert self._legacy and not self._data
931
+ result = {
932
+ 'metadata_version': self.METADATA_VERSION,
933
+ 'generator': self.GENERATOR,
934
+ }
935
+ lmd = self._legacy.todict(True) # skip missing ones
936
+ for k in ('name', 'version', 'license', 'summary', 'description',
937
+ 'classifier'):
938
+ if k in lmd:
939
+ if k == 'classifier':
940
+ nk = 'classifiers'
941
+ else:
942
+ nk = k
943
+ result[nk] = lmd[k]
944
+ kw = lmd.get('Keywords', [])
945
+ if kw == ['']:
946
+ kw = []
947
+ result['keywords'] = kw
948
+ keys = (('requires_dist', 'run_requires'),
949
+ ('setup_requires_dist', 'build_requires'))
950
+ for ok, nk in keys:
951
+ if ok in lmd and lmd[ok]:
952
+ result[nk] = [{'requires': lmd[ok]}]
953
+ result['provides'] = self.provides
954
+ author = {}
955
+ maintainer = {}
956
+ return result
957
+
958
+ LEGACY_MAPPING = {
959
+ 'name': 'Name',
960
+ 'version': 'Version',
961
+ ('extensions', 'python.details', 'license'): 'License',
962
+ 'summary': 'Summary',
963
+ 'description': 'Description',
964
+ ('extensions', 'python.project', 'project_urls', 'Home'): 'Home-page',
965
+ ('extensions', 'python.project', 'contacts', 0, 'name'): 'Author',
966
+ ('extensions', 'python.project', 'contacts', 0, 'email'): 'Author-email',
967
+ 'source_url': 'Download-URL',
968
+ ('extensions', 'python.details', 'classifiers'): 'Classifier',
969
+ }
970
+
971
+ def _to_legacy(self):
972
+ def process_entries(entries):
973
+ reqts = set()
974
+ for e in entries:
975
+ extra = e.get('extra')
976
+ env = e.get('environment')
977
+ rlist = e['requires']
978
+ for r in rlist:
979
+ if not env and not extra:
980
+ reqts.add(r)
981
+ else:
982
+ marker = ''
983
+ if extra:
984
+ marker = 'extra == "%s"' % extra
985
+ if env:
986
+ if marker:
987
+ marker = '(%s) and %s' % (env, marker)
988
+ else:
989
+ marker = env
990
+ reqts.add(';'.join((r, marker)))
991
+ return reqts
992
+
993
+ assert self._data and not self._legacy
994
+ result = LegacyMetadata()
995
+ nmd = self._data
996
+ # import pdb; pdb.set_trace()
997
+ for nk, ok in self.LEGACY_MAPPING.items():
998
+ if not isinstance(nk, tuple):
999
+ if nk in nmd:
1000
+ result[ok] = nmd[nk]
1001
+ else:
1002
+ d = nmd
1003
+ found = True
1004
+ for k in nk:
1005
+ try:
1006
+ d = d[k]
1007
+ except (KeyError, IndexError):
1008
+ found = False
1009
+ break
1010
+ if found:
1011
+ result[ok] = d
1012
+ r1 = process_entries(self.run_requires + self.meta_requires)
1013
+ r2 = process_entries(self.build_requires + self.dev_requires)
1014
+ if self.extras:
1015
+ result['Provides-Extra'] = sorted(self.extras)
1016
+ result['Requires-Dist'] = sorted(r1)
1017
+ result['Setup-Requires-Dist'] = sorted(r2)
1018
+ # TODO: any other fields wanted
1019
+ return result
1020
+
1021
+ def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True):
1022
+ if [path, fileobj].count(None) != 1:
1023
+ raise ValueError('Exactly one of path and fileobj is needed')
1024
+ self.validate()
1025
+ if legacy:
1026
+ if self._legacy:
1027
+ legacy_md = self._legacy
1028
+ else:
1029
+ legacy_md = self._to_legacy()
1030
+ if path:
1031
+ legacy_md.write(path, skip_unknown=skip_unknown)
1032
+ else:
1033
+ legacy_md.write_file(fileobj, skip_unknown=skip_unknown)
1034
+ else:
1035
+ if self._legacy:
1036
+ d = self._from_legacy()
1037
+ else:
1038
+ d = self._data
1039
+ if fileobj:
1040
+ json.dump(d, fileobj, ensure_ascii=True, indent=2,
1041
+ sort_keys=True)
1042
+ else:
1043
+ with codecs.open(path, 'w', 'utf-8') as f:
1044
+ json.dump(d, f, ensure_ascii=True, indent=2,
1045
+ sort_keys=True)
1046
+
1047
+ def add_requirements(self, requirements):
1048
+ if self._legacy:
1049
+ self._legacy.add_requirements(requirements)
1050
+ else:
1051
+ run_requires = self._data.setdefault('run_requires', [])
1052
+ always = None
1053
+ for entry in run_requires:
1054
+ if 'environment' not in entry and 'extra' not in entry:
1055
+ always = entry
1056
+ break
1057
+ if always is None:
1058
+ always = { 'requires': requirements }
1059
+ run_requires.insert(0, always)
1060
+ else:
1061
+ rset = set(always['requires']) | set(requirements)
1062
+ always['requires'] = sorted(rset)
1063
+
1064
+ def __repr__(self):
1065
+ name = self.name or '(no name)'
1066
+ version = self.version or 'no version'
1067
+ return '<%s %s %s (%s)>' % (self.__class__.__name__,
1068
+ self.metadata_version, name, version)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/resources.py ADDED
@@ -0,0 +1,358 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013-2017 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ from __future__ import unicode_literals
8
+
9
+ import bisect
10
+ import io
11
+ import logging
12
+ import os
13
+ import pkgutil
14
+ import sys
15
+ import types
16
+ import zipimport
17
+
18
+ from . import DistlibException
19
+ from .util import cached_property, get_cache_base, Cache
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ cache = None # created when needed
25
+
26
+
27
+ class ResourceCache(Cache):
28
+ def __init__(self, base=None):
29
+ if base is None:
30
+ # Use native string to avoid issues on 2.x: see Python #20140.
31
+ base = os.path.join(get_cache_base(), str('resource-cache'))
32
+ super(ResourceCache, self).__init__(base)
33
+
34
+ def is_stale(self, resource, path):
35
+ """
36
+ Is the cache stale for the given resource?
37
+
38
+ :param resource: The :class:`Resource` being cached.
39
+ :param path: The path of the resource in the cache.
40
+ :return: True if the cache is stale.
41
+ """
42
+ # Cache invalidation is a hard problem :-)
43
+ return True
44
+
45
+ def get(self, resource):
46
+ """
47
+ Get a resource into the cache,
48
+
49
+ :param resource: A :class:`Resource` instance.
50
+ :return: The pathname of the resource in the cache.
51
+ """
52
+ prefix, path = resource.finder.get_cache_info(resource)
53
+ if prefix is None:
54
+ result = path
55
+ else:
56
+ result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
57
+ dirname = os.path.dirname(result)
58
+ if not os.path.isdir(dirname):
59
+ os.makedirs(dirname)
60
+ if not os.path.exists(result):
61
+ stale = True
62
+ else:
63
+ stale = self.is_stale(resource, path)
64
+ if stale:
65
+ # write the bytes of the resource to the cache location
66
+ with open(result, 'wb') as f:
67
+ f.write(resource.bytes)
68
+ return result
69
+
70
+
71
+ class ResourceBase(object):
72
+ def __init__(self, finder, name):
73
+ self.finder = finder
74
+ self.name = name
75
+
76
+
77
+ class Resource(ResourceBase):
78
+ """
79
+ A class representing an in-package resource, such as a data file. This is
80
+ not normally instantiated by user code, but rather by a
81
+ :class:`ResourceFinder` which manages the resource.
82
+ """
83
+ is_container = False # Backwards compatibility
84
+
85
+ def as_stream(self):
86
+ """
87
+ Get the resource as a stream.
88
+
89
+ This is not a property to make it obvious that it returns a new stream
90
+ each time.
91
+ """
92
+ return self.finder.get_stream(self)
93
+
94
+ @cached_property
95
+ def file_path(self):
96
+ global cache
97
+ if cache is None:
98
+ cache = ResourceCache()
99
+ return cache.get(self)
100
+
101
+ @cached_property
102
+ def bytes(self):
103
+ return self.finder.get_bytes(self)
104
+
105
+ @cached_property
106
+ def size(self):
107
+ return self.finder.get_size(self)
108
+
109
+
110
+ class ResourceContainer(ResourceBase):
111
+ is_container = True # Backwards compatibility
112
+
113
+ @cached_property
114
+ def resources(self):
115
+ return self.finder.get_resources(self)
116
+
117
+
118
+ class ResourceFinder(object):
119
+ """
120
+ Resource finder for file system resources.
121
+ """
122
+
123
+ if sys.platform.startswith('java'):
124
+ skipped_extensions = ('.pyc', '.pyo', '.class')
125
+ else:
126
+ skipped_extensions = ('.pyc', '.pyo')
127
+
128
+ def __init__(self, module):
129
+ self.module = module
130
+ self.loader = getattr(module, '__loader__', None)
131
+ self.base = os.path.dirname(getattr(module, '__file__', ''))
132
+
133
+ def _adjust_path(self, path):
134
+ return os.path.realpath(path)
135
+
136
+ def _make_path(self, resource_name):
137
+ # Issue #50: need to preserve type of path on Python 2.x
138
+ # like os.path._get_sep
139
+ if isinstance(resource_name, bytes): # should only happen on 2.x
140
+ sep = b'/'
141
+ else:
142
+ sep = '/'
143
+ parts = resource_name.split(sep)
144
+ parts.insert(0, self.base)
145
+ result = os.path.join(*parts)
146
+ return self._adjust_path(result)
147
+
148
+ def _find(self, path):
149
+ return os.path.exists(path)
150
+
151
+ def get_cache_info(self, resource):
152
+ return None, resource.path
153
+
154
+ def find(self, resource_name):
155
+ path = self._make_path(resource_name)
156
+ if not self._find(path):
157
+ result = None
158
+ else:
159
+ if self._is_directory(path):
160
+ result = ResourceContainer(self, resource_name)
161
+ else:
162
+ result = Resource(self, resource_name)
163
+ result.path = path
164
+ return result
165
+
166
+ def get_stream(self, resource):
167
+ return open(resource.path, 'rb')
168
+
169
+ def get_bytes(self, resource):
170
+ with open(resource.path, 'rb') as f:
171
+ return f.read()
172
+
173
+ def get_size(self, resource):
174
+ return os.path.getsize(resource.path)
175
+
176
+ def get_resources(self, resource):
177
+ def allowed(f):
178
+ return (f != '__pycache__' and not
179
+ f.endswith(self.skipped_extensions))
180
+ return set([f for f in os.listdir(resource.path) if allowed(f)])
181
+
182
+ def is_container(self, resource):
183
+ return self._is_directory(resource.path)
184
+
185
+ _is_directory = staticmethod(os.path.isdir)
186
+
187
+ def iterator(self, resource_name):
188
+ resource = self.find(resource_name)
189
+ if resource is not None:
190
+ todo = [resource]
191
+ while todo:
192
+ resource = todo.pop(0)
193
+ yield resource
194
+ if resource.is_container:
195
+ rname = resource.name
196
+ for name in resource.resources:
197
+ if not rname:
198
+ new_name = name
199
+ else:
200
+ new_name = '/'.join([rname, name])
201
+ child = self.find(new_name)
202
+ if child.is_container:
203
+ todo.append(child)
204
+ else:
205
+ yield child
206
+
207
+
208
+ class ZipResourceFinder(ResourceFinder):
209
+ """
210
+ Resource finder for resources in .zip files.
211
+ """
212
+ def __init__(self, module):
213
+ super(ZipResourceFinder, self).__init__(module)
214
+ archive = self.loader.archive
215
+ self.prefix_len = 1 + len(archive)
216
+ # PyPy doesn't have a _files attr on zipimporter, and you can't set one
217
+ if hasattr(self.loader, '_files'):
218
+ self._files = self.loader._files
219
+ else:
220
+ self._files = zipimport._zip_directory_cache[archive]
221
+ self.index = sorted(self._files)
222
+
223
+ def _adjust_path(self, path):
224
+ return path
225
+
226
+ def _find(self, path):
227
+ path = path[self.prefix_len:]
228
+ if path in self._files:
229
+ result = True
230
+ else:
231
+ if path and path[-1] != os.sep:
232
+ path = path + os.sep
233
+ i = bisect.bisect(self.index, path)
234
+ try:
235
+ result = self.index[i].startswith(path)
236
+ except IndexError:
237
+ result = False
238
+ if not result:
239
+ logger.debug('_find failed: %r %r', path, self.loader.prefix)
240
+ else:
241
+ logger.debug('_find worked: %r %r', path, self.loader.prefix)
242
+ return result
243
+
244
+ def get_cache_info(self, resource):
245
+ prefix = self.loader.archive
246
+ path = resource.path[1 + len(prefix):]
247
+ return prefix, path
248
+
249
+ def get_bytes(self, resource):
250
+ return self.loader.get_data(resource.path)
251
+
252
+ def get_stream(self, resource):
253
+ return io.BytesIO(self.get_bytes(resource))
254
+
255
+ def get_size(self, resource):
256
+ path = resource.path[self.prefix_len:]
257
+ return self._files[path][3]
258
+
259
+ def get_resources(self, resource):
260
+ path = resource.path[self.prefix_len:]
261
+ if path and path[-1] != os.sep:
262
+ path += os.sep
263
+ plen = len(path)
264
+ result = set()
265
+ i = bisect.bisect(self.index, path)
266
+ while i < len(self.index):
267
+ if not self.index[i].startswith(path):
268
+ break
269
+ s = self.index[i][plen:]
270
+ result.add(s.split(os.sep, 1)[0]) # only immediate children
271
+ i += 1
272
+ return result
273
+
274
+ def _is_directory(self, path):
275
+ path = path[self.prefix_len:]
276
+ if path and path[-1] != os.sep:
277
+ path += os.sep
278
+ i = bisect.bisect(self.index, path)
279
+ try:
280
+ result = self.index[i].startswith(path)
281
+ except IndexError:
282
+ result = False
283
+ return result
284
+
285
+
286
+ _finder_registry = {
287
+ type(None): ResourceFinder,
288
+ zipimport.zipimporter: ZipResourceFinder
289
+ }
290
+
291
+ try:
292
+ # In Python 3.6, _frozen_importlib -> _frozen_importlib_external
293
+ try:
294
+ import _frozen_importlib_external as _fi
295
+ except ImportError:
296
+ import _frozen_importlib as _fi
297
+ _finder_registry[_fi.SourceFileLoader] = ResourceFinder
298
+ _finder_registry[_fi.FileFinder] = ResourceFinder
299
+ # See issue #146
300
+ _finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
301
+ del _fi
302
+ except (ImportError, AttributeError):
303
+ pass
304
+
305
+
306
+ def register_finder(loader, finder_maker):
307
+ _finder_registry[type(loader)] = finder_maker
308
+
309
+
310
+ _finder_cache = {}
311
+
312
+
313
+ def finder(package):
314
+ """
315
+ Return a resource finder for a package.
316
+ :param package: The name of the package.
317
+ :return: A :class:`ResourceFinder` instance for the package.
318
+ """
319
+ if package in _finder_cache:
320
+ result = _finder_cache[package]
321
+ else:
322
+ if package not in sys.modules:
323
+ __import__(package)
324
+ module = sys.modules[package]
325
+ path = getattr(module, '__path__', None)
326
+ if path is None:
327
+ raise DistlibException('You cannot get a finder for a module, '
328
+ 'only for a package')
329
+ loader = getattr(module, '__loader__', None)
330
+ finder_maker = _finder_registry.get(type(loader))
331
+ if finder_maker is None:
332
+ raise DistlibException('Unable to locate finder for %r' % package)
333
+ result = finder_maker(module)
334
+ _finder_cache[package] = result
335
+ return result
336
+
337
+
338
+ _dummy_module = types.ModuleType(str('__dummy__'))
339
+
340
+
341
+ def finder_for_path(path):
342
+ """
343
+ Return a resource finder for a path, which should represent a container.
344
+
345
+ :param path: The path.
346
+ :return: A :class:`ResourceFinder` instance for the path.
347
+ """
348
+ result = None
349
+ # calls any path hooks, gets importer into cache
350
+ pkgutil.get_importer(path)
351
+ loader = sys.path_importer_cache.get(path)
352
+ finder = _finder_registry.get(type(loader))
353
+ if finder:
354
+ module = _dummy_module
355
+ module.__file__ = os.path.join(path, '')
356
+ module.__loader__ = loader
357
+ result = finder(module)
358
+ return result
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/scripts.py ADDED
@@ -0,0 +1,452 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ from io import BytesIO
8
+ import logging
9
+ import os
10
+ import re
11
+ import struct
12
+ import sys
13
+ import time
14
+ from zipfile import ZipInfo
15
+
16
+ from .compat import sysconfig, detect_encoding, ZipFile
17
+ from .resources import finder
18
+ from .util import (FileOperator, get_export_entry, convert_path,
19
+ get_executable, get_platform, in_venv)
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ _DEFAULT_MANIFEST = '''
24
+ <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
25
+ <assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
26
+ <assemblyIdentity version="1.0.0.0"
27
+ processorArchitecture="X86"
28
+ name="%s"
29
+ type="win32"/>
30
+
31
+ <!-- Identify the application security requirements. -->
32
+ <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
33
+ <security>
34
+ <requestedPrivileges>
35
+ <requestedExecutionLevel level="asInvoker" uiAccess="false"/>
36
+ </requestedPrivileges>
37
+ </security>
38
+ </trustInfo>
39
+ </assembly>'''.strip()
40
+
41
+ # check if Python is called on the first line with this expression
42
+ FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
43
+ SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*-
44
+ import re
45
+ import sys
46
+ from %(module)s import %(import_name)s
47
+ if __name__ == '__main__':
48
+ sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
49
+ sys.exit(%(func)s())
50
+ '''
51
+
52
+
53
+ def enquote_executable(executable):
54
+ if ' ' in executable:
55
+ # make sure we quote only the executable in case of env
56
+ # for example /usr/bin/env "/dir with spaces/bin/jython"
57
+ # instead of "/usr/bin/env /dir with spaces/bin/jython"
58
+ # otherwise whole
59
+ if executable.startswith('/usr/bin/env '):
60
+ env, _executable = executable.split(' ', 1)
61
+ if ' ' in _executable and not _executable.startswith('"'):
62
+ executable = '%s "%s"' % (env, _executable)
63
+ else:
64
+ if not executable.startswith('"'):
65
+ executable = '"%s"' % executable
66
+ return executable
67
+
68
+
69
+ # Keep the old name around (for now), as there is at least one project using it!
70
+ _enquote_executable = enquote_executable
71
+
72
+
73
+ class ScriptMaker(object):
74
+ """
75
+ A class to copy or create scripts from source scripts or callable
76
+ specifications.
77
+ """
78
+ script_template = SCRIPT_TEMPLATE
79
+
80
+ executable = None # for shebangs
81
+
82
+ def __init__(self,
83
+ source_dir,
84
+ target_dir,
85
+ add_launchers=True,
86
+ dry_run=False,
87
+ fileop=None):
88
+ self.source_dir = source_dir
89
+ self.target_dir = target_dir
90
+ self.add_launchers = add_launchers
91
+ self.force = False
92
+ self.clobber = False
93
+ # It only makes sense to set mode bits on POSIX.
94
+ self.set_mode = (os.name == 'posix') or (os.name == 'java'
95
+ and os._name == 'posix')
96
+ self.variants = set(('', 'X.Y'))
97
+ self._fileop = fileop or FileOperator(dry_run)
98
+
99
+ self._is_nt = os.name == 'nt' or (os.name == 'java'
100
+ and os._name == 'nt')
101
+ self.version_info = sys.version_info
102
+
103
+ def _get_alternate_executable(self, executable, options):
104
+ if options.get('gui', False) and self._is_nt: # pragma: no cover
105
+ dn, fn = os.path.split(executable)
106
+ fn = fn.replace('python', 'pythonw')
107
+ executable = os.path.join(dn, fn)
108
+ return executable
109
+
110
+ if sys.platform.startswith('java'): # pragma: no cover
111
+
112
+ def _is_shell(self, executable):
113
+ """
114
+ Determine if the specified executable is a script
115
+ (contains a #! line)
116
+ """
117
+ try:
118
+ with open(executable) as fp:
119
+ return fp.read(2) == '#!'
120
+ except (OSError, IOError):
121
+ logger.warning('Failed to open %s', executable)
122
+ return False
123
+
124
+ def _fix_jython_executable(self, executable):
125
+ if self._is_shell(executable):
126
+ # Workaround for Jython is not needed on Linux systems.
127
+ import java
128
+
129
+ if java.lang.System.getProperty('os.name') == 'Linux':
130
+ return executable
131
+ elif executable.lower().endswith('jython.exe'):
132
+ # Use wrapper exe for Jython on Windows
133
+ return executable
134
+ return '/usr/bin/env %s' % executable
135
+
136
+ def _build_shebang(self, executable, post_interp):
137
+ """
138
+ Build a shebang line. In the simple case (on Windows, or a shebang line
139
+ which is not too long or contains spaces) use a simple formulation for
140
+ the shebang. Otherwise, use /bin/sh as the executable, with a contrived
141
+ shebang which allows the script to run either under Python or sh, using
142
+ suitable quoting. Thanks to Harald Nordgren for his input.
143
+
144
+ See also: http://www.in-ulm.de/~mascheck/various/shebang/#length
145
+ https://hg.mozilla.org/mozilla-central/file/tip/mach
146
+ """
147
+ if os.name != 'posix':
148
+ simple_shebang = True
149
+ else:
150
+ # Add 3 for '#!' prefix and newline suffix.
151
+ shebang_length = len(executable) + len(post_interp) + 3
152
+ if sys.platform == 'darwin':
153
+ max_shebang_length = 512
154
+ else:
155
+ max_shebang_length = 127
156
+ simple_shebang = ((b' ' not in executable)
157
+ and (shebang_length <= max_shebang_length))
158
+
159
+ if simple_shebang:
160
+ result = b'#!' + executable + post_interp + b'\n'
161
+ else:
162
+ result = b'#!/bin/sh\n'
163
+ result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n'
164
+ result += b"' '''"
165
+ return result
166
+
167
+ def _get_shebang(self, encoding, post_interp=b'', options=None):
168
+ enquote = True
169
+ if self.executable:
170
+ executable = self.executable
171
+ enquote = False # assume this will be taken care of
172
+ elif not sysconfig.is_python_build():
173
+ executable = get_executable()
174
+ elif in_venv(): # pragma: no cover
175
+ executable = os.path.join(
176
+ sysconfig.get_path('scripts'),
177
+ 'python%s' % sysconfig.get_config_var('EXE'))
178
+ else: # pragma: no cover
179
+ if os.name == 'nt':
180
+ # for Python builds from source on Windows, no Python executables with
181
+ # a version suffix are created, so we use python.exe
182
+ executable = os.path.join(
183
+ sysconfig.get_config_var('BINDIR'),
184
+ 'python%s' % (sysconfig.get_config_var('EXE')))
185
+ else:
186
+ executable = os.path.join(
187
+ sysconfig.get_config_var('BINDIR'),
188
+ 'python%s%s' % (sysconfig.get_config_var('VERSION'),
189
+ sysconfig.get_config_var('EXE')))
190
+ if options:
191
+ executable = self._get_alternate_executable(executable, options)
192
+
193
+ if sys.platform.startswith('java'): # pragma: no cover
194
+ executable = self._fix_jython_executable(executable)
195
+
196
+ # Normalise case for Windows - COMMENTED OUT
197
+ # executable = os.path.normcase(executable)
198
+ # N.B. The normalising operation above has been commented out: See
199
+ # issue #124. Although paths in Windows are generally case-insensitive,
200
+ # they aren't always. For example, a path containing a ẞ (which is a
201
+ # LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a
202
+ # LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by
203
+ # Windows as equivalent in path names.
204
+
205
+ # If the user didn't specify an executable, it may be necessary to
206
+ # cater for executable paths with spaces (not uncommon on Windows)
207
+ if enquote:
208
+ executable = enquote_executable(executable)
209
+ # Issue #51: don't use fsencode, since we later try to
210
+ # check that the shebang is decodable using utf-8.
211
+ executable = executable.encode('utf-8')
212
+ # in case of IronPython, play safe and enable frames support
213
+ if (sys.platform == 'cli' and '-X:Frames' not in post_interp
214
+ and '-X:FullFrames' not in post_interp): # pragma: no cover
215
+ post_interp += b' -X:Frames'
216
+ shebang = self._build_shebang(executable, post_interp)
217
+ # Python parser starts to read a script using UTF-8 until
218
+ # it gets a #coding:xxx cookie. The shebang has to be the
219
+ # first line of a file, the #coding:xxx cookie cannot be
220
+ # written before. So the shebang has to be decodable from
221
+ # UTF-8.
222
+ try:
223
+ shebang.decode('utf-8')
224
+ except UnicodeDecodeError: # pragma: no cover
225
+ raise ValueError('The shebang (%r) is not decodable from utf-8' %
226
+ shebang)
227
+ # If the script is encoded to a custom encoding (use a
228
+ # #coding:xxx cookie), the shebang has to be decodable from
229
+ # the script encoding too.
230
+ if encoding != 'utf-8':
231
+ try:
232
+ shebang.decode(encoding)
233
+ except UnicodeDecodeError: # pragma: no cover
234
+ raise ValueError('The shebang (%r) is not decodable '
235
+ 'from the script encoding (%r)' %
236
+ (shebang, encoding))
237
+ return shebang
238
+
239
+ def _get_script_text(self, entry):
240
+ return self.script_template % dict(
241
+ module=entry.prefix,
242
+ import_name=entry.suffix.split('.')[0],
243
+ func=entry.suffix)
244
+
245
+ manifest = _DEFAULT_MANIFEST
246
+
247
+ def get_manifest(self, exename):
248
+ base = os.path.basename(exename)
249
+ return self.manifest % base
250
+
251
+ def _write_script(self, names, shebang, script_bytes, filenames, ext):
252
+ use_launcher = self.add_launchers and self._is_nt
253
+ linesep = os.linesep.encode('utf-8')
254
+ if not shebang.endswith(linesep):
255
+ shebang += linesep
256
+ if not use_launcher:
257
+ script_bytes = shebang + script_bytes
258
+ else: # pragma: no cover
259
+ if ext == 'py':
260
+ launcher = self._get_launcher('t')
261
+ else:
262
+ launcher = self._get_launcher('w')
263
+ stream = BytesIO()
264
+ with ZipFile(stream, 'w') as zf:
265
+ source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
266
+ if source_date_epoch:
267
+ date_time = time.gmtime(int(source_date_epoch))[:6]
268
+ zinfo = ZipInfo(filename='__main__.py',
269
+ date_time=date_time)
270
+ zf.writestr(zinfo, script_bytes)
271
+ else:
272
+ zf.writestr('__main__.py', script_bytes)
273
+ zip_data = stream.getvalue()
274
+ script_bytes = launcher + shebang + zip_data
275
+ for name in names:
276
+ outname = os.path.join(self.target_dir, name)
277
+ if use_launcher: # pragma: no cover
278
+ n, e = os.path.splitext(outname)
279
+ if e.startswith('.py'):
280
+ outname = n
281
+ outname = '%s.exe' % outname
282
+ try:
283
+ self._fileop.write_binary_file(outname, script_bytes)
284
+ except Exception:
285
+ # Failed writing an executable - it might be in use.
286
+ logger.warning('Failed to write executable - trying to '
287
+ 'use .deleteme logic')
288
+ dfname = '%s.deleteme' % outname
289
+ if os.path.exists(dfname):
290
+ os.remove(dfname) # Not allowed to fail here
291
+ os.rename(outname, dfname) # nor here
292
+ self._fileop.write_binary_file(outname, script_bytes)
293
+ logger.debug('Able to replace executable using '
294
+ '.deleteme logic')
295
+ try:
296
+ os.remove(dfname)
297
+ except Exception:
298
+ pass # still in use - ignore error
299
+ else:
300
+ if self._is_nt and not outname.endswith(
301
+ '.' + ext): # pragma: no cover
302
+ outname = '%s.%s' % (outname, ext)
303
+ if os.path.exists(outname) and not self.clobber:
304
+ logger.warning('Skipping existing file %s', outname)
305
+ continue
306
+ self._fileop.write_binary_file(outname, script_bytes)
307
+ if self.set_mode:
308
+ self._fileop.set_executable_mode([outname])
309
+ filenames.append(outname)
310
+
311
+ variant_separator = '-'
312
+
313
+ def get_script_filenames(self, name):
314
+ result = set()
315
+ if '' in self.variants:
316
+ result.add(name)
317
+ if 'X' in self.variants:
318
+ result.add('%s%s' % (name, self.version_info[0]))
319
+ if 'X.Y' in self.variants:
320
+ result.add('%s%s%s.%s' %
321
+ (name, self.variant_separator, self.version_info[0],
322
+ self.version_info[1]))
323
+ return result
324
+
325
+ def _make_script(self, entry, filenames, options=None):
326
+ post_interp = b''
327
+ if options:
328
+ args = options.get('interpreter_args', [])
329
+ if args:
330
+ args = ' %s' % ' '.join(args)
331
+ post_interp = args.encode('utf-8')
332
+ shebang = self._get_shebang('utf-8', post_interp, options=options)
333
+ script = self._get_script_text(entry).encode('utf-8')
334
+ scriptnames = self.get_script_filenames(entry.name)
335
+ if options and options.get('gui', False):
336
+ ext = 'pyw'
337
+ else:
338
+ ext = 'py'
339
+ self._write_script(scriptnames, shebang, script, filenames, ext)
340
+
341
+ def _copy_script(self, script, filenames):
342
+ adjust = False
343
+ script = os.path.join(self.source_dir, convert_path(script))
344
+ outname = os.path.join(self.target_dir, os.path.basename(script))
345
+ if not self.force and not self._fileop.newer(script, outname):
346
+ logger.debug('not copying %s (up-to-date)', script)
347
+ return
348
+
349
+ # Always open the file, but ignore failures in dry-run mode --
350
+ # that way, we'll get accurate feedback if we can read the
351
+ # script.
352
+ try:
353
+ f = open(script, 'rb')
354
+ except IOError: # pragma: no cover
355
+ if not self.dry_run:
356
+ raise
357
+ f = None
358
+ else:
359
+ first_line = f.readline()
360
+ if not first_line: # pragma: no cover
361
+ logger.warning('%s is an empty file (skipping)', script)
362
+ return
363
+
364
+ match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
365
+ if match:
366
+ adjust = True
367
+ post_interp = match.group(1) or b''
368
+
369
+ if not adjust:
370
+ if f:
371
+ f.close()
372
+ self._fileop.copy_file(script, outname)
373
+ if self.set_mode:
374
+ self._fileop.set_executable_mode([outname])
375
+ filenames.append(outname)
376
+ else:
377
+ logger.info('copying and adjusting %s -> %s', script,
378
+ self.target_dir)
379
+ if not self._fileop.dry_run:
380
+ encoding, lines = detect_encoding(f.readline)
381
+ f.seek(0)
382
+ shebang = self._get_shebang(encoding, post_interp)
383
+ if b'pythonw' in first_line: # pragma: no cover
384
+ ext = 'pyw'
385
+ else:
386
+ ext = 'py'
387
+ n = os.path.basename(outname)
388
+ self._write_script([n], shebang, f.read(), filenames, ext)
389
+ if f:
390
+ f.close()
391
+
392
+ @property
393
+ def dry_run(self):
394
+ return self._fileop.dry_run
395
+
396
+ @dry_run.setter
397
+ def dry_run(self, value):
398
+ self._fileop.dry_run = value
399
+
400
+ if os.name == 'nt' or (os.name == 'java'
401
+ and os._name == 'nt'): # pragma: no cover
402
+ # Executable launcher support.
403
+ # Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
404
+
405
+ def _get_launcher(self, kind):
406
+ if struct.calcsize('P') == 8: # 64-bit
407
+ bits = '64'
408
+ else:
409
+ bits = '32'
410
+ platform_suffix = '-arm' if get_platform() == 'win-arm64' else ''
411
+ name = '%s%s%s.exe' % (kind, bits, platform_suffix)
412
+ # Issue 31: don't hardcode an absolute package name, but
413
+ # determine it relative to the current package
414
+ distlib_package = __name__.rsplit('.', 1)[0]
415
+ resource = finder(distlib_package).find(name)
416
+ if not resource:
417
+ msg = ('Unable to find resource %s in package %s' %
418
+ (name, distlib_package))
419
+ raise ValueError(msg)
420
+ return resource.bytes
421
+
422
+ # Public API follows
423
+
424
+ def make(self, specification, options=None):
425
+ """
426
+ Make a script.
427
+
428
+ :param specification: The specification, which is either a valid export
429
+ entry specification (to make a script from a
430
+ callable) or a filename (to make a script by
431
+ copying from a source location).
432
+ :param options: A dictionary of options controlling script generation.
433
+ :return: A list of all absolute pathnames written to.
434
+ """
435
+ filenames = []
436
+ entry = get_export_entry(specification)
437
+ if entry is None:
438
+ self._copy_script(specification, filenames)
439
+ else:
440
+ self._make_script(entry, filenames, options=options)
441
+ return filenames
442
+
443
+ def make_multiple(self, specifications, options=None):
444
+ """
445
+ Take a list of specifications and make scripts from them,
446
+ :param specifications: A list of specifications.
447
+ :return: A list of all absolute pathnames written to,
448
+ """
449
+ filenames = []
450
+ for specification in specifications:
451
+ filenames.extend(self.make(specification, options))
452
+ return filenames
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/util.py ADDED
@@ -0,0 +1,2025 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright (C) 2012-2023 The Python Software Foundation.
3
+ # See LICENSE.txt and CONTRIBUTORS.txt.
4
+ #
5
+ import codecs
6
+ from collections import deque
7
+ import contextlib
8
+ import csv
9
+ from glob import iglob as std_iglob
10
+ import io
11
+ import json
12
+ import logging
13
+ import os
14
+ import py_compile
15
+ import re
16
+ import socket
17
+ try:
18
+ import ssl
19
+ except ImportError: # pragma: no cover
20
+ ssl = None
21
+ import subprocess
22
+ import sys
23
+ import tarfile
24
+ import tempfile
25
+ import textwrap
26
+
27
+ try:
28
+ import threading
29
+ except ImportError: # pragma: no cover
30
+ import dummy_threading as threading
31
+ import time
32
+
33
+ from . import DistlibException
34
+ from .compat import (string_types, text_type, shutil, raw_input, StringIO,
35
+ cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
36
+ HTTPHandler, BaseConfigurator, valid_ident,
37
+ Container, configparser, URLError, ZipFile, fsdecode,
38
+ unquote, urlparse)
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ #
43
+ # Requirement parsing code as per PEP 508
44
+ #
45
+
46
+ IDENTIFIER = re.compile(r'^([\w\.-]+)\s*')
47
+ VERSION_IDENTIFIER = re.compile(r'^([\w\.*+-]+)\s*')
48
+ COMPARE_OP = re.compile(r'^(<=?|>=?|={2,3}|[~!]=)\s*')
49
+ MARKER_OP = re.compile(r'^((<=?)|(>=?)|={2,3}|[~!]=|in|not\s+in)\s*')
50
+ OR = re.compile(r'^or\b\s*')
51
+ AND = re.compile(r'^and\b\s*')
52
+ NON_SPACE = re.compile(r'(\S+)\s*')
53
+ STRING_CHUNK = re.compile(r'([\s\w\.{}()*+#:;,/?!~`@$%^&=|<>\[\]-]+)')
54
+
55
+
56
+ def parse_marker(marker_string):
57
+ """
58
+ Parse a marker string and return a dictionary containing a marker expression.
59
+
60
+ The dictionary will contain keys "op", "lhs" and "rhs" for non-terminals in
61
+ the expression grammar, or strings. A string contained in quotes is to be
62
+ interpreted as a literal string, and a string not contained in quotes is a
63
+ variable (such as os_name).
64
+ """
65
+
66
+ def marker_var(remaining):
67
+ # either identifier, or literal string
68
+ m = IDENTIFIER.match(remaining)
69
+ if m:
70
+ result = m.groups()[0]
71
+ remaining = remaining[m.end():]
72
+ elif not remaining:
73
+ raise SyntaxError('unexpected end of input')
74
+ else:
75
+ q = remaining[0]
76
+ if q not in '\'"':
77
+ raise SyntaxError('invalid expression: %s' % remaining)
78
+ oq = '\'"'.replace(q, '')
79
+ remaining = remaining[1:]
80
+ parts = [q]
81
+ while remaining:
82
+ # either a string chunk, or oq, or q to terminate
83
+ if remaining[0] == q:
84
+ break
85
+ elif remaining[0] == oq:
86
+ parts.append(oq)
87
+ remaining = remaining[1:]
88
+ else:
89
+ m = STRING_CHUNK.match(remaining)
90
+ if not m:
91
+ raise SyntaxError('error in string literal: %s' %
92
+ remaining)
93
+ parts.append(m.groups()[0])
94
+ remaining = remaining[m.end():]
95
+ else:
96
+ s = ''.join(parts)
97
+ raise SyntaxError('unterminated string: %s' % s)
98
+ parts.append(q)
99
+ result = ''.join(parts)
100
+ remaining = remaining[1:].lstrip() # skip past closing quote
101
+ return result, remaining
102
+
103
+ def marker_expr(remaining):
104
+ if remaining and remaining[0] == '(':
105
+ result, remaining = marker(remaining[1:].lstrip())
106
+ if remaining[0] != ')':
107
+ raise SyntaxError('unterminated parenthesis: %s' % remaining)
108
+ remaining = remaining[1:].lstrip()
109
+ else:
110
+ lhs, remaining = marker_var(remaining)
111
+ while remaining:
112
+ m = MARKER_OP.match(remaining)
113
+ if not m:
114
+ break
115
+ op = m.groups()[0]
116
+ remaining = remaining[m.end():]
117
+ rhs, remaining = marker_var(remaining)
118
+ lhs = {'op': op, 'lhs': lhs, 'rhs': rhs}
119
+ result = lhs
120
+ return result, remaining
121
+
122
+ def marker_and(remaining):
123
+ lhs, remaining = marker_expr(remaining)
124
+ while remaining:
125
+ m = AND.match(remaining)
126
+ if not m:
127
+ break
128
+ remaining = remaining[m.end():]
129
+ rhs, remaining = marker_expr(remaining)
130
+ lhs = {'op': 'and', 'lhs': lhs, 'rhs': rhs}
131
+ return lhs, remaining
132
+
133
+ def marker(remaining):
134
+ lhs, remaining = marker_and(remaining)
135
+ while remaining:
136
+ m = OR.match(remaining)
137
+ if not m:
138
+ break
139
+ remaining = remaining[m.end():]
140
+ rhs, remaining = marker_and(remaining)
141
+ lhs = {'op': 'or', 'lhs': lhs, 'rhs': rhs}
142
+ return lhs, remaining
143
+
144
+ return marker(marker_string)
145
+
146
+
147
+ def parse_requirement(req):
148
+ """
149
+ Parse a requirement passed in as a string. Return a Container
150
+ whose attributes contain the various parts of the requirement.
151
+ """
152
+ remaining = req.strip()
153
+ if not remaining or remaining.startswith('#'):
154
+ return None
155
+ m = IDENTIFIER.match(remaining)
156
+ if not m:
157
+ raise SyntaxError('name expected: %s' % remaining)
158
+ distname = m.groups()[0]
159
+ remaining = remaining[m.end():]
160
+ extras = mark_expr = versions = uri = None
161
+ if remaining and remaining[0] == '[':
162
+ i = remaining.find(']', 1)
163
+ if i < 0:
164
+ raise SyntaxError('unterminated extra: %s' % remaining)
165
+ s = remaining[1:i]
166
+ remaining = remaining[i + 1:].lstrip()
167
+ extras = []
168
+ while s:
169
+ m = IDENTIFIER.match(s)
170
+ if not m:
171
+ raise SyntaxError('malformed extra: %s' % s)
172
+ extras.append(m.groups()[0])
173
+ s = s[m.end():]
174
+ if not s:
175
+ break
176
+ if s[0] != ',':
177
+ raise SyntaxError('comma expected in extras: %s' % s)
178
+ s = s[1:].lstrip()
179
+ if not extras:
180
+ extras = None
181
+ if remaining:
182
+ if remaining[0] == '@':
183
+ # it's a URI
184
+ remaining = remaining[1:].lstrip()
185
+ m = NON_SPACE.match(remaining)
186
+ if not m:
187
+ raise SyntaxError('invalid URI: %s' % remaining)
188
+ uri = m.groups()[0]
189
+ t = urlparse(uri)
190
+ # there are issues with Python and URL parsing, so this test
191
+ # is a bit crude. See bpo-20271, bpo-23505. Python doesn't
192
+ # always parse invalid URLs correctly - it should raise
193
+ # exceptions for malformed URLs
194
+ if not (t.scheme and t.netloc):
195
+ raise SyntaxError('Invalid URL: %s' % uri)
196
+ remaining = remaining[m.end():].lstrip()
197
+ else:
198
+
199
+ def get_versions(ver_remaining):
200
+ """
201
+ Return a list of operator, version tuples if any are
202
+ specified, else None.
203
+ """
204
+ m = COMPARE_OP.match(ver_remaining)
205
+ versions = None
206
+ if m:
207
+ versions = []
208
+ while True:
209
+ op = m.groups()[0]
210
+ ver_remaining = ver_remaining[m.end():]
211
+ m = VERSION_IDENTIFIER.match(ver_remaining)
212
+ if not m:
213
+ raise SyntaxError('invalid version: %s' %
214
+ ver_remaining)
215
+ v = m.groups()[0]
216
+ versions.append((op, v))
217
+ ver_remaining = ver_remaining[m.end():]
218
+ if not ver_remaining or ver_remaining[0] != ',':
219
+ break
220
+ ver_remaining = ver_remaining[1:].lstrip()
221
+ # Some packages have a trailing comma which would break things
222
+ # See issue #148
223
+ if not ver_remaining:
224
+ break
225
+ m = COMPARE_OP.match(ver_remaining)
226
+ if not m:
227
+ raise SyntaxError('invalid constraint: %s' %
228
+ ver_remaining)
229
+ if not versions:
230
+ versions = None
231
+ return versions, ver_remaining
232
+
233
+ if remaining[0] != '(':
234
+ versions, remaining = get_versions(remaining)
235
+ else:
236
+ i = remaining.find(')', 1)
237
+ if i < 0:
238
+ raise SyntaxError('unterminated parenthesis: %s' %
239
+ remaining)
240
+ s = remaining[1:i]
241
+ remaining = remaining[i + 1:].lstrip()
242
+ # As a special diversion from PEP 508, allow a version number
243
+ # a.b.c in parentheses as a synonym for ~= a.b.c (because this
244
+ # is allowed in earlier PEPs)
245
+ if COMPARE_OP.match(s):
246
+ versions, _ = get_versions(s)
247
+ else:
248
+ m = VERSION_IDENTIFIER.match(s)
249
+ if not m:
250
+ raise SyntaxError('invalid constraint: %s' % s)
251
+ v = m.groups()[0]
252
+ s = s[m.end():].lstrip()
253
+ if s:
254
+ raise SyntaxError('invalid constraint: %s' % s)
255
+ versions = [('~=', v)]
256
+
257
+ if remaining:
258
+ if remaining[0] != ';':
259
+ raise SyntaxError('invalid requirement: %s' % remaining)
260
+ remaining = remaining[1:].lstrip()
261
+
262
+ mark_expr, remaining = parse_marker(remaining)
263
+
264
+ if remaining and remaining[0] != '#':
265
+ raise SyntaxError('unexpected trailing data: %s' % remaining)
266
+
267
+ if not versions:
268
+ rs = distname
269
+ else:
270
+ rs = '%s %s' % (distname, ', '.join(
271
+ ['%s %s' % con for con in versions]))
272
+ return Container(name=distname,
273
+ extras=extras,
274
+ constraints=versions,
275
+ marker=mark_expr,
276
+ url=uri,
277
+ requirement=rs)
278
+
279
+
280
+ def get_resources_dests(resources_root, rules):
281
+ """Find destinations for resources files"""
282
+
283
+ def get_rel_path(root, path):
284
+ # normalizes and returns a lstripped-/-separated path
285
+ root = root.replace(os.path.sep, '/')
286
+ path = path.replace(os.path.sep, '/')
287
+ assert path.startswith(root)
288
+ return path[len(root):].lstrip('/')
289
+
290
+ destinations = {}
291
+ for base, suffix, dest in rules:
292
+ prefix = os.path.join(resources_root, base)
293
+ for abs_base in iglob(prefix):
294
+ abs_glob = os.path.join(abs_base, suffix)
295
+ for abs_path in iglob(abs_glob):
296
+ resource_file = get_rel_path(resources_root, abs_path)
297
+ if dest is None: # remove the entry if it was here
298
+ destinations.pop(resource_file, None)
299
+ else:
300
+ rel_path = get_rel_path(abs_base, abs_path)
301
+ rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
302
+ destinations[resource_file] = rel_dest + '/' + rel_path
303
+ return destinations
304
+
305
+
306
+ def in_venv():
307
+ if hasattr(sys, 'real_prefix'):
308
+ # virtualenv venvs
309
+ result = True
310
+ else:
311
+ # PEP 405 venvs
312
+ result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
313
+ return result
314
+
315
+
316
+ def get_executable():
317
+ # The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
318
+ # changes to the stub launcher mean that sys.executable always points
319
+ # to the stub on OS X
320
+ # if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
321
+ # in os.environ):
322
+ # result = os.environ['__PYVENV_LAUNCHER__']
323
+ # else:
324
+ # result = sys.executable
325
+ # return result
326
+ # Avoid normcasing: see issue #143
327
+ # result = os.path.normcase(sys.executable)
328
+ result = sys.executable
329
+ if not isinstance(result, text_type):
330
+ result = fsdecode(result)
331
+ return result
332
+
333
+
334
+ def proceed(prompt, allowed_chars, error_prompt=None, default=None):
335
+ p = prompt
336
+ while True:
337
+ s = raw_input(p)
338
+ p = prompt
339
+ if not s and default:
340
+ s = default
341
+ if s:
342
+ c = s[0].lower()
343
+ if c in allowed_chars:
344
+ break
345
+ if error_prompt:
346
+ p = '%c: %s\n%s' % (c, error_prompt, prompt)
347
+ return c
348
+
349
+
350
+ def extract_by_key(d, keys):
351
+ if isinstance(keys, string_types):
352
+ keys = keys.split()
353
+ result = {}
354
+ for key in keys:
355
+ if key in d:
356
+ result[key] = d[key]
357
+ return result
358
+
359
+
360
+ def read_exports(stream):
361
+ if sys.version_info[0] >= 3:
362
+ # needs to be a text stream
363
+ stream = codecs.getreader('utf-8')(stream)
364
+ # Try to load as JSON, falling back on legacy format
365
+ data = stream.read()
366
+ stream = StringIO(data)
367
+ try:
368
+ jdata = json.load(stream)
369
+ result = jdata['extensions']['python.exports']['exports']
370
+ for group, entries in result.items():
371
+ for k, v in entries.items():
372
+ s = '%s = %s' % (k, v)
373
+ entry = get_export_entry(s)
374
+ assert entry is not None
375
+ entries[k] = entry
376
+ return result
377
+ except Exception:
378
+ stream.seek(0, 0)
379
+
380
+ def read_stream(cp, stream):
381
+ if hasattr(cp, 'read_file'):
382
+ cp.read_file(stream)
383
+ else:
384
+ cp.readfp(stream)
385
+
386
+ cp = configparser.ConfigParser()
387
+ try:
388
+ read_stream(cp, stream)
389
+ except configparser.MissingSectionHeaderError:
390
+ stream.close()
391
+ data = textwrap.dedent(data)
392
+ stream = StringIO(data)
393
+ read_stream(cp, stream)
394
+
395
+ result = {}
396
+ for key in cp.sections():
397
+ result[key] = entries = {}
398
+ for name, value in cp.items(key):
399
+ s = '%s = %s' % (name, value)
400
+ entry = get_export_entry(s)
401
+ assert entry is not None
402
+ # entry.dist = self
403
+ entries[name] = entry
404
+ return result
405
+
406
+
407
+ def write_exports(exports, stream):
408
+ if sys.version_info[0] >= 3:
409
+ # needs to be a text stream
410
+ stream = codecs.getwriter('utf-8')(stream)
411
+ cp = configparser.ConfigParser()
412
+ for k, v in exports.items():
413
+ # TODO check k, v for valid values
414
+ cp.add_section(k)
415
+ for entry in v.values():
416
+ if entry.suffix is None:
417
+ s = entry.prefix
418
+ else:
419
+ s = '%s:%s' % (entry.prefix, entry.suffix)
420
+ if entry.flags:
421
+ s = '%s [%s]' % (s, ', '.join(entry.flags))
422
+ cp.set(k, entry.name, s)
423
+ cp.write(stream)
424
+
425
+
426
+ @contextlib.contextmanager
427
+ def tempdir():
428
+ td = tempfile.mkdtemp()
429
+ try:
430
+ yield td
431
+ finally:
432
+ shutil.rmtree(td)
433
+
434
+
435
+ @contextlib.contextmanager
436
+ def chdir(d):
437
+ cwd = os.getcwd()
438
+ try:
439
+ os.chdir(d)
440
+ yield
441
+ finally:
442
+ os.chdir(cwd)
443
+
444
+
445
+ @contextlib.contextmanager
446
+ def socket_timeout(seconds=15):
447
+ cto = socket.getdefaulttimeout()
448
+ try:
449
+ socket.setdefaulttimeout(seconds)
450
+ yield
451
+ finally:
452
+ socket.setdefaulttimeout(cto)
453
+
454
+
455
+ class cached_property(object):
456
+
457
+ def __init__(self, func):
458
+ self.func = func
459
+ # for attr in ('__name__', '__module__', '__doc__'):
460
+ # setattr(self, attr, getattr(func, attr, None))
461
+
462
+ def __get__(self, obj, cls=None):
463
+ if obj is None:
464
+ return self
465
+ value = self.func(obj)
466
+ object.__setattr__(obj, self.func.__name__, value)
467
+ # obj.__dict__[self.func.__name__] = value = self.func(obj)
468
+ return value
469
+
470
+
471
+ def convert_path(pathname):
472
+ """Return 'pathname' as a name that will work on the native filesystem.
473
+
474
+ The path is split on '/' and put back together again using the current
475
+ directory separator. Needed because filenames in the setup script are
476
+ always supplied in Unix style, and have to be converted to the local
477
+ convention before we can actually use them in the filesystem. Raises
478
+ ValueError on non-Unix-ish systems if 'pathname' either starts or
479
+ ends with a slash.
480
+ """
481
+ if os.sep == '/':
482
+ return pathname
483
+ if not pathname:
484
+ return pathname
485
+ if pathname[0] == '/':
486
+ raise ValueError("path '%s' cannot be absolute" % pathname)
487
+ if pathname[-1] == '/':
488
+ raise ValueError("path '%s' cannot end with '/'" % pathname)
489
+
490
+ paths = pathname.split('/')
491
+ while os.curdir in paths:
492
+ paths.remove(os.curdir)
493
+ if not paths:
494
+ return os.curdir
495
+ return os.path.join(*paths)
496
+
497
+
498
+ class FileOperator(object):
499
+
500
+ def __init__(self, dry_run=False):
501
+ self.dry_run = dry_run
502
+ self.ensured = set()
503
+ self._init_record()
504
+
505
+ def _init_record(self):
506
+ self.record = False
507
+ self.files_written = set()
508
+ self.dirs_created = set()
509
+
510
+ def record_as_written(self, path):
511
+ if self.record:
512
+ self.files_written.add(path)
513
+
514
+ def newer(self, source, target):
515
+ """Tell if the target is newer than the source.
516
+
517
+ Returns true if 'source' exists and is more recently modified than
518
+ 'target', or if 'source' exists and 'target' doesn't.
519
+
520
+ Returns false if both exist and 'target' is the same age or younger
521
+ than 'source'. Raise PackagingFileError if 'source' does not exist.
522
+
523
+ Note that this test is not very accurate: files created in the same
524
+ second will have the same "age".
525
+ """
526
+ if not os.path.exists(source):
527
+ raise DistlibException("file '%r' does not exist" %
528
+ os.path.abspath(source))
529
+ if not os.path.exists(target):
530
+ return True
531
+
532
+ return os.stat(source).st_mtime > os.stat(target).st_mtime
533
+
534
+ def copy_file(self, infile, outfile, check=True):
535
+ """Copy a file respecting dry-run and force flags.
536
+ """
537
+ self.ensure_dir(os.path.dirname(outfile))
538
+ logger.info('Copying %s to %s', infile, outfile)
539
+ if not self.dry_run:
540
+ msg = None
541
+ if check:
542
+ if os.path.islink(outfile):
543
+ msg = '%s is a symlink' % outfile
544
+ elif os.path.exists(outfile) and not os.path.isfile(outfile):
545
+ msg = '%s is a non-regular file' % outfile
546
+ if msg:
547
+ raise ValueError(msg + ' which would be overwritten')
548
+ shutil.copyfile(infile, outfile)
549
+ self.record_as_written(outfile)
550
+
551
+ def copy_stream(self, instream, outfile, encoding=None):
552
+ assert not os.path.isdir(outfile)
553
+ self.ensure_dir(os.path.dirname(outfile))
554
+ logger.info('Copying stream %s to %s', instream, outfile)
555
+ if not self.dry_run:
556
+ if encoding is None:
557
+ outstream = open(outfile, 'wb')
558
+ else:
559
+ outstream = codecs.open(outfile, 'w', encoding=encoding)
560
+ try:
561
+ shutil.copyfileobj(instream, outstream)
562
+ finally:
563
+ outstream.close()
564
+ self.record_as_written(outfile)
565
+
566
+ def write_binary_file(self, path, data):
567
+ self.ensure_dir(os.path.dirname(path))
568
+ if not self.dry_run:
569
+ if os.path.exists(path):
570
+ os.remove(path)
571
+ with open(path, 'wb') as f:
572
+ f.write(data)
573
+ self.record_as_written(path)
574
+
575
+ def write_text_file(self, path, data, encoding):
576
+ self.write_binary_file(path, data.encode(encoding))
577
+
578
+ def set_mode(self, bits, mask, files):
579
+ if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
580
+ # Set the executable bits (owner, group, and world) on
581
+ # all the files specified.
582
+ for f in files:
583
+ if self.dry_run:
584
+ logger.info("changing mode of %s", f)
585
+ else:
586
+ mode = (os.stat(f).st_mode | bits) & mask
587
+ logger.info("changing mode of %s to %o", f, mode)
588
+ os.chmod(f, mode)
589
+
590
+ set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
591
+
592
+ def ensure_dir(self, path):
593
+ path = os.path.abspath(path)
594
+ if path not in self.ensured and not os.path.exists(path):
595
+ self.ensured.add(path)
596
+ d, f = os.path.split(path)
597
+ self.ensure_dir(d)
598
+ logger.info('Creating %s' % path)
599
+ if not self.dry_run:
600
+ os.mkdir(path)
601
+ if self.record:
602
+ self.dirs_created.add(path)
603
+
604
+ def byte_compile(self,
605
+ path,
606
+ optimize=False,
607
+ force=False,
608
+ prefix=None,
609
+ hashed_invalidation=False):
610
+ dpath = cache_from_source(path, not optimize)
611
+ logger.info('Byte-compiling %s to %s', path, dpath)
612
+ if not self.dry_run:
613
+ if force or self.newer(path, dpath):
614
+ if not prefix:
615
+ diagpath = None
616
+ else:
617
+ assert path.startswith(prefix)
618
+ diagpath = path[len(prefix):]
619
+ compile_kwargs = {}
620
+ if hashed_invalidation and hasattr(py_compile,
621
+ 'PycInvalidationMode'):
622
+ compile_kwargs[
623
+ 'invalidation_mode'] = py_compile.PycInvalidationMode.CHECKED_HASH
624
+ py_compile.compile(path, dpath, diagpath, True,
625
+ **compile_kwargs) # raise error
626
+ self.record_as_written(dpath)
627
+ return dpath
628
+
629
+ def ensure_removed(self, path):
630
+ if os.path.exists(path):
631
+ if os.path.isdir(path) and not os.path.islink(path):
632
+ logger.debug('Removing directory tree at %s', path)
633
+ if not self.dry_run:
634
+ shutil.rmtree(path)
635
+ if self.record:
636
+ if path in self.dirs_created:
637
+ self.dirs_created.remove(path)
638
+ else:
639
+ if os.path.islink(path):
640
+ s = 'link'
641
+ else:
642
+ s = 'file'
643
+ logger.debug('Removing %s %s', s, path)
644
+ if not self.dry_run:
645
+ os.remove(path)
646
+ if self.record:
647
+ if path in self.files_written:
648
+ self.files_written.remove(path)
649
+
650
+ def is_writable(self, path):
651
+ result = False
652
+ while not result:
653
+ if os.path.exists(path):
654
+ result = os.access(path, os.W_OK)
655
+ break
656
+ parent = os.path.dirname(path)
657
+ if parent == path:
658
+ break
659
+ path = parent
660
+ return result
661
+
662
+ def commit(self):
663
+ """
664
+ Commit recorded changes, turn off recording, return
665
+ changes.
666
+ """
667
+ assert self.record
668
+ result = self.files_written, self.dirs_created
669
+ self._init_record()
670
+ return result
671
+
672
+ def rollback(self):
673
+ if not self.dry_run:
674
+ for f in list(self.files_written):
675
+ if os.path.exists(f):
676
+ os.remove(f)
677
+ # dirs should all be empty now, except perhaps for
678
+ # __pycache__ subdirs
679
+ # reverse so that subdirs appear before their parents
680
+ dirs = sorted(self.dirs_created, reverse=True)
681
+ for d in dirs:
682
+ flist = os.listdir(d)
683
+ if flist:
684
+ assert flist == ['__pycache__']
685
+ sd = os.path.join(d, flist[0])
686
+ os.rmdir(sd)
687
+ os.rmdir(d) # should fail if non-empty
688
+ self._init_record()
689
+
690
+
691
+ def resolve(module_name, dotted_path):
692
+ if module_name in sys.modules:
693
+ mod = sys.modules[module_name]
694
+ else:
695
+ mod = __import__(module_name)
696
+ if dotted_path is None:
697
+ result = mod
698
+ else:
699
+ parts = dotted_path.split('.')
700
+ result = getattr(mod, parts.pop(0))
701
+ for p in parts:
702
+ result = getattr(result, p)
703
+ return result
704
+
705
+
706
+ class ExportEntry(object):
707
+
708
+ def __init__(self, name, prefix, suffix, flags):
709
+ self.name = name
710
+ self.prefix = prefix
711
+ self.suffix = suffix
712
+ self.flags = flags
713
+
714
+ @cached_property
715
+ def value(self):
716
+ return resolve(self.prefix, self.suffix)
717
+
718
+ def __repr__(self): # pragma: no cover
719
+ return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
720
+ self.suffix, self.flags)
721
+
722
+ def __eq__(self, other):
723
+ if not isinstance(other, ExportEntry):
724
+ result = False
725
+ else:
726
+ result = (self.name == other.name and self.prefix == other.prefix
727
+ and self.suffix == other.suffix
728
+ and self.flags == other.flags)
729
+ return result
730
+
731
+ __hash__ = object.__hash__
732
+
733
+
734
+ ENTRY_RE = re.compile(
735
+ r'''(?P<name>([^\[]\S*))
736
+ \s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
737
+ \s*(\[\s*(?P<flags>[\w-]+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
738
+ ''', re.VERBOSE)
739
+
740
+
741
+ def get_export_entry(specification):
742
+ m = ENTRY_RE.search(specification)
743
+ if not m:
744
+ result = None
745
+ if '[' in specification or ']' in specification:
746
+ raise DistlibException("Invalid specification "
747
+ "'%s'" % specification)
748
+ else:
749
+ d = m.groupdict()
750
+ name = d['name']
751
+ path = d['callable']
752
+ colons = path.count(':')
753
+ if colons == 0:
754
+ prefix, suffix = path, None
755
+ else:
756
+ if colons != 1:
757
+ raise DistlibException("Invalid specification "
758
+ "'%s'" % specification)
759
+ prefix, suffix = path.split(':')
760
+ flags = d['flags']
761
+ if flags is None:
762
+ if '[' in specification or ']' in specification:
763
+ raise DistlibException("Invalid specification "
764
+ "'%s'" % specification)
765
+ flags = []
766
+ else:
767
+ flags = [f.strip() for f in flags.split(',')]
768
+ result = ExportEntry(name, prefix, suffix, flags)
769
+ return result
770
+
771
+
772
+ def get_cache_base(suffix=None):
773
+ """
774
+ Return the default base location for distlib caches. If the directory does
775
+ not exist, it is created. Use the suffix provided for the base directory,
776
+ and default to '.distlib' if it isn't provided.
777
+
778
+ On Windows, if LOCALAPPDATA is defined in the environment, then it is
779
+ assumed to be a directory, and will be the parent directory of the result.
780
+ On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
781
+ directory - using os.expanduser('~') - will be the parent directory of
782
+ the result.
783
+
784
+ The result is just the directory '.distlib' in the parent directory as
785
+ determined above, or with the name specified with ``suffix``.
786
+ """
787
+ if suffix is None:
788
+ suffix = '.distlib'
789
+ if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
790
+ result = os.path.expandvars('$localappdata')
791
+ else:
792
+ # Assume posix, or old Windows
793
+ result = os.path.expanduser('~')
794
+ # we use 'isdir' instead of 'exists', because we want to
795
+ # fail if there's a file with that name
796
+ if os.path.isdir(result):
797
+ usable = os.access(result, os.W_OK)
798
+ if not usable:
799
+ logger.warning('Directory exists but is not writable: %s', result)
800
+ else:
801
+ try:
802
+ os.makedirs(result)
803
+ usable = True
804
+ except OSError:
805
+ logger.warning('Unable to create %s', result, exc_info=True)
806
+ usable = False
807
+ if not usable:
808
+ result = tempfile.mkdtemp()
809
+ logger.warning('Default location unusable, using %s', result)
810
+ return os.path.join(result, suffix)
811
+
812
+
813
+ def path_to_cache_dir(path):
814
+ """
815
+ Convert an absolute path to a directory name for use in a cache.
816
+
817
+ The algorithm used is:
818
+
819
+ #. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
820
+ #. Any occurrence of ``os.sep`` is replaced with ``'--'``.
821
+ #. ``'.cache'`` is appended.
822
+ """
823
+ d, p = os.path.splitdrive(os.path.abspath(path))
824
+ if d:
825
+ d = d.replace(':', '---')
826
+ p = p.replace(os.sep, '--')
827
+ return d + p + '.cache'
828
+
829
+
830
+ def ensure_slash(s):
831
+ if not s.endswith('/'):
832
+ return s + '/'
833
+ return s
834
+
835
+
836
+ def parse_credentials(netloc):
837
+ username = password = None
838
+ if '@' in netloc:
839
+ prefix, netloc = netloc.rsplit('@', 1)
840
+ if ':' not in prefix:
841
+ username = prefix
842
+ else:
843
+ username, password = prefix.split(':', 1)
844
+ if username:
845
+ username = unquote(username)
846
+ if password:
847
+ password = unquote(password)
848
+ return username, password, netloc
849
+
850
+
851
+ def get_process_umask():
852
+ result = os.umask(0o22)
853
+ os.umask(result)
854
+ return result
855
+
856
+
857
+ def is_string_sequence(seq):
858
+ result = True
859
+ i = None
860
+ for i, s in enumerate(seq):
861
+ if not isinstance(s, string_types):
862
+ result = False
863
+ break
864
+ assert i is not None
865
+ return result
866
+
867
+
868
+ PROJECT_NAME_AND_VERSION = re.compile(
869
+ '([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
870
+ '([a-z0-9_.+-]+)', re.I)
871
+ PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
872
+
873
+
874
+ def split_filename(filename, project_name=None):
875
+ """
876
+ Extract name, version, python version from a filename (no extension)
877
+
878
+ Return name, version, pyver or None
879
+ """
880
+ result = None
881
+ pyver = None
882
+ filename = unquote(filename).replace(' ', '-')
883
+ m = PYTHON_VERSION.search(filename)
884
+ if m:
885
+ pyver = m.group(1)
886
+ filename = filename[:m.start()]
887
+ if project_name and len(filename) > len(project_name) + 1:
888
+ m = re.match(re.escape(project_name) + r'\b', filename)
889
+ if m:
890
+ n = m.end()
891
+ result = filename[:n], filename[n + 1:], pyver
892
+ if result is None:
893
+ m = PROJECT_NAME_AND_VERSION.match(filename)
894
+ if m:
895
+ result = m.group(1), m.group(3), pyver
896
+ return result
897
+
898
+
899
+ # Allow spaces in name because of legacy dists like "Twisted Core"
900
+ NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
901
+ r'\(\s*(?P<ver>[^\s)]+)\)$')
902
+
903
+
904
+ def parse_name_and_version(p):
905
+ """
906
+ A utility method used to get name and version from a string.
907
+
908
+ From e.g. a Provides-Dist value.
909
+
910
+ :param p: A value in a form 'foo (1.0)'
911
+ :return: The name and version as a tuple.
912
+ """
913
+ m = NAME_VERSION_RE.match(p)
914
+ if not m:
915
+ raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
916
+ d = m.groupdict()
917
+ return d['name'].strip().lower(), d['ver']
918
+
919
+
920
+ def get_extras(requested, available):
921
+ result = set()
922
+ requested = set(requested or [])
923
+ available = set(available or [])
924
+ if '*' in requested:
925
+ requested.remove('*')
926
+ result |= available
927
+ for r in requested:
928
+ if r == '-':
929
+ result.add(r)
930
+ elif r.startswith('-'):
931
+ unwanted = r[1:]
932
+ if unwanted not in available:
933
+ logger.warning('undeclared extra: %s' % unwanted)
934
+ if unwanted in result:
935
+ result.remove(unwanted)
936
+ else:
937
+ if r not in available:
938
+ logger.warning('undeclared extra: %s' % r)
939
+ result.add(r)
940
+ return result
941
+
942
+
943
+ #
944
+ # Extended metadata functionality
945
+ #
946
+
947
+
948
+ def _get_external_data(url):
949
+ result = {}
950
+ try:
951
+ # urlopen might fail if it runs into redirections,
952
+ # because of Python issue #13696. Fixed in locators
953
+ # using a custom redirect handler.
954
+ resp = urlopen(url)
955
+ headers = resp.info()
956
+ ct = headers.get('Content-Type')
957
+ if not ct.startswith('application/json'):
958
+ logger.debug('Unexpected response for JSON request: %s', ct)
959
+ else:
960
+ reader = codecs.getreader('utf-8')(resp)
961
+ # data = reader.read().decode('utf-8')
962
+ # result = json.loads(data)
963
+ result = json.load(reader)
964
+ except Exception as e:
965
+ logger.exception('Failed to get external data for %s: %s', url, e)
966
+ return result
967
+
968
+
969
+ _external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
970
+
971
+
972
+ def get_project_data(name):
973
+ url = '%s/%s/project.json' % (name[0].upper(), name)
974
+ url = urljoin(_external_data_base_url, url)
975
+ result = _get_external_data(url)
976
+ return result
977
+
978
+
979
+ def get_package_data(name, version):
980
+ url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
981
+ url = urljoin(_external_data_base_url, url)
982
+ return _get_external_data(url)
983
+
984
+
985
+ class Cache(object):
986
+ """
987
+ A class implementing a cache for resources that need to live in the file system
988
+ e.g. shared libraries. This class was moved from resources to here because it
989
+ could be used by other modules, e.g. the wheel module.
990
+ """
991
+
992
+ def __init__(self, base):
993
+ """
994
+ Initialise an instance.
995
+
996
+ :param base: The base directory where the cache should be located.
997
+ """
998
+ # we use 'isdir' instead of 'exists', because we want to
999
+ # fail if there's a file with that name
1000
+ if not os.path.isdir(base): # pragma: no cover
1001
+ os.makedirs(base)
1002
+ if (os.stat(base).st_mode & 0o77) != 0:
1003
+ logger.warning('Directory \'%s\' is not private', base)
1004
+ self.base = os.path.abspath(os.path.normpath(base))
1005
+
1006
+ def prefix_to_dir(self, prefix):
1007
+ """
1008
+ Converts a resource prefix to a directory name in the cache.
1009
+ """
1010
+ return path_to_cache_dir(prefix)
1011
+
1012
+ def clear(self):
1013
+ """
1014
+ Clear the cache.
1015
+ """
1016
+ not_removed = []
1017
+ for fn in os.listdir(self.base):
1018
+ fn = os.path.join(self.base, fn)
1019
+ try:
1020
+ if os.path.islink(fn) or os.path.isfile(fn):
1021
+ os.remove(fn)
1022
+ elif os.path.isdir(fn):
1023
+ shutil.rmtree(fn)
1024
+ except Exception:
1025
+ not_removed.append(fn)
1026
+ return not_removed
1027
+
1028
+
1029
+ class EventMixin(object):
1030
+ """
1031
+ A very simple publish/subscribe system.
1032
+ """
1033
+
1034
+ def __init__(self):
1035
+ self._subscribers = {}
1036
+
1037
+ def add(self, event, subscriber, append=True):
1038
+ """
1039
+ Add a subscriber for an event.
1040
+
1041
+ :param event: The name of an event.
1042
+ :param subscriber: The subscriber to be added (and called when the
1043
+ event is published).
1044
+ :param append: Whether to append or prepend the subscriber to an
1045
+ existing subscriber list for the event.
1046
+ """
1047
+ subs = self._subscribers
1048
+ if event not in subs:
1049
+ subs[event] = deque([subscriber])
1050
+ else:
1051
+ sq = subs[event]
1052
+ if append:
1053
+ sq.append(subscriber)
1054
+ else:
1055
+ sq.appendleft(subscriber)
1056
+
1057
+ def remove(self, event, subscriber):
1058
+ """
1059
+ Remove a subscriber for an event.
1060
+
1061
+ :param event: The name of an event.
1062
+ :param subscriber: The subscriber to be removed.
1063
+ """
1064
+ subs = self._subscribers
1065
+ if event not in subs:
1066
+ raise ValueError('No subscribers: %r' % event)
1067
+ subs[event].remove(subscriber)
1068
+
1069
+ def get_subscribers(self, event):
1070
+ """
1071
+ Return an iterator for the subscribers for an event.
1072
+ :param event: The event to return subscribers for.
1073
+ """
1074
+ return iter(self._subscribers.get(event, ()))
1075
+
1076
+ def publish(self, event, *args, **kwargs):
1077
+ """
1078
+ Publish a event and return a list of values returned by its
1079
+ subscribers.
1080
+
1081
+ :param event: The event to publish.
1082
+ :param args: The positional arguments to pass to the event's
1083
+ subscribers.
1084
+ :param kwargs: The keyword arguments to pass to the event's
1085
+ subscribers.
1086
+ """
1087
+ result = []
1088
+ for subscriber in self.get_subscribers(event):
1089
+ try:
1090
+ value = subscriber(event, *args, **kwargs)
1091
+ except Exception:
1092
+ logger.exception('Exception during event publication')
1093
+ value = None
1094
+ result.append(value)
1095
+ logger.debug('publish %s: args = %s, kwargs = %s, result = %s', event,
1096
+ args, kwargs, result)
1097
+ return result
1098
+
1099
+
1100
+ #
1101
+ # Simple sequencing
1102
+ #
1103
+ class Sequencer(object):
1104
+
1105
+ def __init__(self):
1106
+ self._preds = {}
1107
+ self._succs = {}
1108
+ self._nodes = set() # nodes with no preds/succs
1109
+
1110
+ def add_node(self, node):
1111
+ self._nodes.add(node)
1112
+
1113
+ def remove_node(self, node, edges=False):
1114
+ if node in self._nodes:
1115
+ self._nodes.remove(node)
1116
+ if edges:
1117
+ for p in set(self._preds.get(node, ())):
1118
+ self.remove(p, node)
1119
+ for s in set(self._succs.get(node, ())):
1120
+ self.remove(node, s)
1121
+ # Remove empties
1122
+ for k, v in list(self._preds.items()):
1123
+ if not v:
1124
+ del self._preds[k]
1125
+ for k, v in list(self._succs.items()):
1126
+ if not v:
1127
+ del self._succs[k]
1128
+
1129
+ def add(self, pred, succ):
1130
+ assert pred != succ
1131
+ self._preds.setdefault(succ, set()).add(pred)
1132
+ self._succs.setdefault(pred, set()).add(succ)
1133
+
1134
+ def remove(self, pred, succ):
1135
+ assert pred != succ
1136
+ try:
1137
+ preds = self._preds[succ]
1138
+ succs = self._succs[pred]
1139
+ except KeyError: # pragma: no cover
1140
+ raise ValueError('%r not a successor of anything' % succ)
1141
+ try:
1142
+ preds.remove(pred)
1143
+ succs.remove(succ)
1144
+ except KeyError: # pragma: no cover
1145
+ raise ValueError('%r not a successor of %r' % (succ, pred))
1146
+
1147
+ def is_step(self, step):
1148
+ return (step in self._preds or step in self._succs
1149
+ or step in self._nodes)
1150
+
1151
+ def get_steps(self, final):
1152
+ if not self.is_step(final):
1153
+ raise ValueError('Unknown: %r' % final)
1154
+ result = []
1155
+ todo = []
1156
+ seen = set()
1157
+ todo.append(final)
1158
+ while todo:
1159
+ step = todo.pop(0)
1160
+ if step in seen:
1161
+ # if a step was already seen,
1162
+ # move it to the end (so it will appear earlier
1163
+ # when reversed on return) ... but not for the
1164
+ # final step, as that would be confusing for
1165
+ # users
1166
+ if step != final:
1167
+ result.remove(step)
1168
+ result.append(step)
1169
+ else:
1170
+ seen.add(step)
1171
+ result.append(step)
1172
+ preds = self._preds.get(step, ())
1173
+ todo.extend(preds)
1174
+ return reversed(result)
1175
+
1176
+ @property
1177
+ def strong_connections(self):
1178
+ # http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
1179
+ index_counter = [0]
1180
+ stack = []
1181
+ lowlinks = {}
1182
+ index = {}
1183
+ result = []
1184
+
1185
+ graph = self._succs
1186
+
1187
+ def strongconnect(node):
1188
+ # set the depth index for this node to the smallest unused index
1189
+ index[node] = index_counter[0]
1190
+ lowlinks[node] = index_counter[0]
1191
+ index_counter[0] += 1
1192
+ stack.append(node)
1193
+
1194
+ # Consider successors
1195
+ try:
1196
+ successors = graph[node]
1197
+ except Exception:
1198
+ successors = []
1199
+ for successor in successors:
1200
+ if successor not in lowlinks:
1201
+ # Successor has not yet been visited
1202
+ strongconnect(successor)
1203
+ lowlinks[node] = min(lowlinks[node], lowlinks[successor])
1204
+ elif successor in stack:
1205
+ # the successor is in the stack and hence in the current
1206
+ # strongly connected component (SCC)
1207
+ lowlinks[node] = min(lowlinks[node], index[successor])
1208
+
1209
+ # If `node` is a root node, pop the stack and generate an SCC
1210
+ if lowlinks[node] == index[node]:
1211
+ connected_component = []
1212
+
1213
+ while True:
1214
+ successor = stack.pop()
1215
+ connected_component.append(successor)
1216
+ if successor == node:
1217
+ break
1218
+ component = tuple(connected_component)
1219
+ # storing the result
1220
+ result.append(component)
1221
+
1222
+ for node in graph:
1223
+ if node not in lowlinks:
1224
+ strongconnect(node)
1225
+
1226
+ return result
1227
+
1228
+ @property
1229
+ def dot(self):
1230
+ result = ['digraph G {']
1231
+ for succ in self._preds:
1232
+ preds = self._preds[succ]
1233
+ for pred in preds:
1234
+ result.append(' %s -> %s;' % (pred, succ))
1235
+ for node in self._nodes:
1236
+ result.append(' %s;' % node)
1237
+ result.append('}')
1238
+ return '\n'.join(result)
1239
+
1240
+
1241
+ #
1242
+ # Unarchiving functionality for zip, tar, tgz, tbz, whl
1243
+ #
1244
+
1245
+ ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz',
1246
+ '.whl')
1247
+
1248
+
1249
+ def unarchive(archive_filename, dest_dir, format=None, check=True):
1250
+
1251
+ def check_path(path):
1252
+ if not isinstance(path, text_type):
1253
+ path = path.decode('utf-8')
1254
+ p = os.path.abspath(os.path.join(dest_dir, path))
1255
+ if not p.startswith(dest_dir) or p[plen] != os.sep:
1256
+ raise ValueError('path outside destination: %r' % p)
1257
+
1258
+ dest_dir = os.path.abspath(dest_dir)
1259
+ plen = len(dest_dir)
1260
+ archive = None
1261
+ if format is None:
1262
+ if archive_filename.endswith(('.zip', '.whl')):
1263
+ format = 'zip'
1264
+ elif archive_filename.endswith(('.tar.gz', '.tgz')):
1265
+ format = 'tgz'
1266
+ mode = 'r:gz'
1267
+ elif archive_filename.endswith(('.tar.bz2', '.tbz')):
1268
+ format = 'tbz'
1269
+ mode = 'r:bz2'
1270
+ elif archive_filename.endswith('.tar'):
1271
+ format = 'tar'
1272
+ mode = 'r'
1273
+ else: # pragma: no cover
1274
+ raise ValueError('Unknown format for %r' % archive_filename)
1275
+ try:
1276
+ if format == 'zip':
1277
+ archive = ZipFile(archive_filename, 'r')
1278
+ if check:
1279
+ names = archive.namelist()
1280
+ for name in names:
1281
+ check_path(name)
1282
+ else:
1283
+ archive = tarfile.open(archive_filename, mode)
1284
+ if check:
1285
+ names = archive.getnames()
1286
+ for name in names:
1287
+ check_path(name)
1288
+ if format != 'zip' and sys.version_info[0] < 3:
1289
+ # See Python issue 17153. If the dest path contains Unicode,
1290
+ # tarfile extraction fails on Python 2.x if a member path name
1291
+ # contains non-ASCII characters - it leads to an implicit
1292
+ # bytes -> unicode conversion using ASCII to decode.
1293
+ for tarinfo in archive.getmembers():
1294
+ if not isinstance(tarinfo.name, text_type):
1295
+ tarinfo.name = tarinfo.name.decode('utf-8')
1296
+
1297
+ # Limit extraction of dangerous items, if this Python
1298
+ # allows it easily. If not, just trust the input.
1299
+ # See: https://docs.python.org/3/library/tarfile.html#extraction-filters
1300
+ def extraction_filter(member, path):
1301
+ """Run tarfile.tar_filter, but raise the expected ValueError"""
1302
+ # This is only called if the current Python has tarfile filters
1303
+ try:
1304
+ return tarfile.tar_filter(member, path)
1305
+ except tarfile.FilterError as exc:
1306
+ raise ValueError(str(exc))
1307
+
1308
+ archive.extraction_filter = extraction_filter
1309
+
1310
+ archive.extractall(dest_dir)
1311
+
1312
+ finally:
1313
+ if archive:
1314
+ archive.close()
1315
+
1316
+
1317
+ def zip_dir(directory):
1318
+ """zip a directory tree into a BytesIO object"""
1319
+ result = io.BytesIO()
1320
+ dlen = len(directory)
1321
+ with ZipFile(result, "w") as zf:
1322
+ for root, dirs, files in os.walk(directory):
1323
+ for name in files:
1324
+ full = os.path.join(root, name)
1325
+ rel = root[dlen:]
1326
+ dest = os.path.join(rel, name)
1327
+ zf.write(full, dest)
1328
+ return result
1329
+
1330
+
1331
+ #
1332
+ # Simple progress bar
1333
+ #
1334
+
1335
+ UNITS = ('', 'K', 'M', 'G', 'T', 'P')
1336
+
1337
+
1338
+ class Progress(object):
1339
+ unknown = 'UNKNOWN'
1340
+
1341
+ def __init__(self, minval=0, maxval=100):
1342
+ assert maxval is None or maxval >= minval
1343
+ self.min = self.cur = minval
1344
+ self.max = maxval
1345
+ self.started = None
1346
+ self.elapsed = 0
1347
+ self.done = False
1348
+
1349
+ def update(self, curval):
1350
+ assert self.min <= curval
1351
+ assert self.max is None or curval <= self.max
1352
+ self.cur = curval
1353
+ now = time.time()
1354
+ if self.started is None:
1355
+ self.started = now
1356
+ else:
1357
+ self.elapsed = now - self.started
1358
+
1359
+ def increment(self, incr):
1360
+ assert incr >= 0
1361
+ self.update(self.cur + incr)
1362
+
1363
+ def start(self):
1364
+ self.update(self.min)
1365
+ return self
1366
+
1367
+ def stop(self):
1368
+ if self.max is not None:
1369
+ self.update(self.max)
1370
+ self.done = True
1371
+
1372
+ @property
1373
+ def maximum(self):
1374
+ return self.unknown if self.max is None else self.max
1375
+
1376
+ @property
1377
+ def percentage(self):
1378
+ if self.done:
1379
+ result = '100 %'
1380
+ elif self.max is None:
1381
+ result = ' ?? %'
1382
+ else:
1383
+ v = 100.0 * (self.cur - self.min) / (self.max - self.min)
1384
+ result = '%3d %%' % v
1385
+ return result
1386
+
1387
+ def format_duration(self, duration):
1388
+ if (duration <= 0) and self.max is None or self.cur == self.min:
1389
+ result = '??:??:??'
1390
+ # elif duration < 1:
1391
+ # result = '--:--:--'
1392
+ else:
1393
+ result = time.strftime('%H:%M:%S', time.gmtime(duration))
1394
+ return result
1395
+
1396
+ @property
1397
+ def ETA(self):
1398
+ if self.done:
1399
+ prefix = 'Done'
1400
+ t = self.elapsed
1401
+ # import pdb; pdb.set_trace()
1402
+ else:
1403
+ prefix = 'ETA '
1404
+ if self.max is None:
1405
+ t = -1
1406
+ elif self.elapsed == 0 or (self.cur == self.min):
1407
+ t = 0
1408
+ else:
1409
+ # import pdb; pdb.set_trace()
1410
+ t = float(self.max - self.min)
1411
+ t /= self.cur - self.min
1412
+ t = (t - 1) * self.elapsed
1413
+ return '%s: %s' % (prefix, self.format_duration(t))
1414
+
1415
+ @property
1416
+ def speed(self):
1417
+ if self.elapsed == 0:
1418
+ result = 0.0
1419
+ else:
1420
+ result = (self.cur - self.min) / self.elapsed
1421
+ for unit in UNITS:
1422
+ if result < 1000:
1423
+ break
1424
+ result /= 1000.0
1425
+ return '%d %sB/s' % (result, unit)
1426
+
1427
+
1428
+ #
1429
+ # Glob functionality
1430
+ #
1431
+
1432
+ RICH_GLOB = re.compile(r'\{([^}]*)\}')
1433
+ _CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
1434
+ _CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
1435
+
1436
+
1437
+ def iglob(path_glob):
1438
+ """Extended globbing function that supports ** and {opt1,opt2,opt3}."""
1439
+ if _CHECK_RECURSIVE_GLOB.search(path_glob):
1440
+ msg = """invalid glob %r: recursive glob "**" must be used alone"""
1441
+ raise ValueError(msg % path_glob)
1442
+ if _CHECK_MISMATCH_SET.search(path_glob):
1443
+ msg = """invalid glob %r: mismatching set marker '{' or '}'"""
1444
+ raise ValueError(msg % path_glob)
1445
+ return _iglob(path_glob)
1446
+
1447
+
1448
+ def _iglob(path_glob):
1449
+ rich_path_glob = RICH_GLOB.split(path_glob, 1)
1450
+ if len(rich_path_glob) > 1:
1451
+ assert len(rich_path_glob) == 3, rich_path_glob
1452
+ prefix, set, suffix = rich_path_glob
1453
+ for item in set.split(','):
1454
+ for path in _iglob(''.join((prefix, item, suffix))):
1455
+ yield path
1456
+ else:
1457
+ if '**' not in path_glob:
1458
+ for item in std_iglob(path_glob):
1459
+ yield item
1460
+ else:
1461
+ prefix, radical = path_glob.split('**', 1)
1462
+ if prefix == '':
1463
+ prefix = '.'
1464
+ if radical == '':
1465
+ radical = '*'
1466
+ else:
1467
+ # we support both
1468
+ radical = radical.lstrip('/')
1469
+ radical = radical.lstrip('\\')
1470
+ for path, dir, files in os.walk(prefix):
1471
+ path = os.path.normpath(path)
1472
+ for fn in _iglob(os.path.join(path, radical)):
1473
+ yield fn
1474
+
1475
+
1476
+ if ssl:
1477
+ from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
1478
+ CertificateError)
1479
+
1480
+ #
1481
+ # HTTPSConnection which verifies certificates/matches domains
1482
+ #
1483
+
1484
+ class HTTPSConnection(httplib.HTTPSConnection):
1485
+ ca_certs = None # set this to the path to the certs file (.pem)
1486
+ check_domain = True # only used if ca_certs is not None
1487
+
1488
+ # noinspection PyPropertyAccess
1489
+ def connect(self):
1490
+ sock = socket.create_connection((self.host, self.port),
1491
+ self.timeout)
1492
+ if getattr(self, '_tunnel_host', False):
1493
+ self.sock = sock
1494
+ self._tunnel()
1495
+
1496
+ context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
1497
+ if hasattr(ssl, 'OP_NO_SSLv2'):
1498
+ context.options |= ssl.OP_NO_SSLv2
1499
+ if getattr(self, 'cert_file', None):
1500
+ context.load_cert_chain(self.cert_file, self.key_file)
1501
+ kwargs = {}
1502
+ if self.ca_certs:
1503
+ context.verify_mode = ssl.CERT_REQUIRED
1504
+ context.load_verify_locations(cafile=self.ca_certs)
1505
+ if getattr(ssl, 'HAS_SNI', False):
1506
+ kwargs['server_hostname'] = self.host
1507
+
1508
+ self.sock = context.wrap_socket(sock, **kwargs)
1509
+ if self.ca_certs and self.check_domain:
1510
+ try:
1511
+ match_hostname(self.sock.getpeercert(), self.host)
1512
+ logger.debug('Host verified: %s', self.host)
1513
+ except CertificateError: # pragma: no cover
1514
+ self.sock.shutdown(socket.SHUT_RDWR)
1515
+ self.sock.close()
1516
+ raise
1517
+
1518
+ class HTTPSHandler(BaseHTTPSHandler):
1519
+
1520
+ def __init__(self, ca_certs, check_domain=True):
1521
+ BaseHTTPSHandler.__init__(self)
1522
+ self.ca_certs = ca_certs
1523
+ self.check_domain = check_domain
1524
+
1525
+ def _conn_maker(self, *args, **kwargs):
1526
+ """
1527
+ This is called to create a connection instance. Normally you'd
1528
+ pass a connection class to do_open, but it doesn't actually check for
1529
+ a class, and just expects a callable. As long as we behave just as a
1530
+ constructor would have, we should be OK. If it ever changes so that
1531
+ we *must* pass a class, we'll create an UnsafeHTTPSConnection class
1532
+ which just sets check_domain to False in the class definition, and
1533
+ choose which one to pass to do_open.
1534
+ """
1535
+ result = HTTPSConnection(*args, **kwargs)
1536
+ if self.ca_certs:
1537
+ result.ca_certs = self.ca_certs
1538
+ result.check_domain = self.check_domain
1539
+ return result
1540
+
1541
+ def https_open(self, req):
1542
+ try:
1543
+ return self.do_open(self._conn_maker, req)
1544
+ except URLError as e:
1545
+ if 'certificate verify failed' in str(e.reason):
1546
+ raise CertificateError(
1547
+ 'Unable to verify server certificate '
1548
+ 'for %s' % req.host)
1549
+ else:
1550
+ raise
1551
+
1552
+ #
1553
+ # To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
1554
+ # Middle proxy using HTTP listens on port 443, or an index mistakenly serves
1555
+ # HTML containing a http://xyz link when it should be https://xyz),
1556
+ # you can use the following handler class, which does not allow HTTP traffic.
1557
+ #
1558
+ # It works by inheriting from HTTPHandler - so build_opener won't add a
1559
+ # handler for HTTP itself.
1560
+ #
1561
+ class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
1562
+
1563
+ def http_open(self, req):
1564
+ raise URLError(
1565
+ 'Unexpected HTTP request on what should be a secure '
1566
+ 'connection: %s' % req)
1567
+
1568
+
1569
+ #
1570
+ # XML-RPC with timeouts
1571
+ #
1572
+ class Transport(xmlrpclib.Transport):
1573
+
1574
+ def __init__(self, timeout, use_datetime=0):
1575
+ self.timeout = timeout
1576
+ xmlrpclib.Transport.__init__(self, use_datetime)
1577
+
1578
+ def make_connection(self, host):
1579
+ h, eh, x509 = self.get_host_info(host)
1580
+ if not self._connection or host != self._connection[0]:
1581
+ self._extra_headers = eh
1582
+ self._connection = host, httplib.HTTPConnection(h)
1583
+ return self._connection[1]
1584
+
1585
+
1586
+ if ssl:
1587
+
1588
+ class SafeTransport(xmlrpclib.SafeTransport):
1589
+
1590
+ def __init__(self, timeout, use_datetime=0):
1591
+ self.timeout = timeout
1592
+ xmlrpclib.SafeTransport.__init__(self, use_datetime)
1593
+
1594
+ def make_connection(self, host):
1595
+ h, eh, kwargs = self.get_host_info(host)
1596
+ if not kwargs:
1597
+ kwargs = {}
1598
+ kwargs['timeout'] = self.timeout
1599
+ if not self._connection or host != self._connection[0]:
1600
+ self._extra_headers = eh
1601
+ self._connection = host, httplib.HTTPSConnection(
1602
+ h, None, **kwargs)
1603
+ return self._connection[1]
1604
+
1605
+
1606
+ class ServerProxy(xmlrpclib.ServerProxy):
1607
+
1608
+ def __init__(self, uri, **kwargs):
1609
+ self.timeout = timeout = kwargs.pop('timeout', None)
1610
+ # The above classes only come into play if a timeout
1611
+ # is specified
1612
+ if timeout is not None:
1613
+ # scheme = splittype(uri) # deprecated as of Python 3.8
1614
+ scheme = urlparse(uri)[0]
1615
+ use_datetime = kwargs.get('use_datetime', 0)
1616
+ if scheme == 'https':
1617
+ tcls = SafeTransport
1618
+ else:
1619
+ tcls = Transport
1620
+ kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
1621
+ self.transport = t
1622
+ xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
1623
+
1624
+
1625
+ #
1626
+ # CSV functionality. This is provided because on 2.x, the csv module can't
1627
+ # handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
1628
+ #
1629
+
1630
+
1631
+ def _csv_open(fn, mode, **kwargs):
1632
+ if sys.version_info[0] < 3:
1633
+ mode += 'b'
1634
+ else:
1635
+ kwargs['newline'] = ''
1636
+ # Python 3 determines encoding from locale. Force 'utf-8'
1637
+ # file encoding to match other forced utf-8 encoding
1638
+ kwargs['encoding'] = 'utf-8'
1639
+ return open(fn, mode, **kwargs)
1640
+
1641
+
1642
+ class CSVBase(object):
1643
+ defaults = {
1644
+ 'delimiter': str(','), # The strs are used because we need native
1645
+ 'quotechar': str('"'), # str in the csv API (2.x won't take
1646
+ 'lineterminator': str('\n') # Unicode)
1647
+ }
1648
+
1649
+ def __enter__(self):
1650
+ return self
1651
+
1652
+ def __exit__(self, *exc_info):
1653
+ self.stream.close()
1654
+
1655
+
1656
+ class CSVReader(CSVBase):
1657
+
1658
+ def __init__(self, **kwargs):
1659
+ if 'stream' in kwargs:
1660
+ stream = kwargs['stream']
1661
+ if sys.version_info[0] >= 3:
1662
+ # needs to be a text stream
1663
+ stream = codecs.getreader('utf-8')(stream)
1664
+ self.stream = stream
1665
+ else:
1666
+ self.stream = _csv_open(kwargs['path'], 'r')
1667
+ self.reader = csv.reader(self.stream, **self.defaults)
1668
+
1669
+ def __iter__(self):
1670
+ return self
1671
+
1672
+ def next(self):
1673
+ result = next(self.reader)
1674
+ if sys.version_info[0] < 3:
1675
+ for i, item in enumerate(result):
1676
+ if not isinstance(item, text_type):
1677
+ result[i] = item.decode('utf-8')
1678
+ return result
1679
+
1680
+ __next__ = next
1681
+
1682
+
1683
+ class CSVWriter(CSVBase):
1684
+
1685
+ def __init__(self, fn, **kwargs):
1686
+ self.stream = _csv_open(fn, 'w')
1687
+ self.writer = csv.writer(self.stream, **self.defaults)
1688
+
1689
+ def writerow(self, row):
1690
+ if sys.version_info[0] < 3:
1691
+ r = []
1692
+ for item in row:
1693
+ if isinstance(item, text_type):
1694
+ item = item.encode('utf-8')
1695
+ r.append(item)
1696
+ row = r
1697
+ self.writer.writerow(row)
1698
+
1699
+
1700
+ #
1701
+ # Configurator functionality
1702
+ #
1703
+
1704
+
1705
+ class Configurator(BaseConfigurator):
1706
+
1707
+ value_converters = dict(BaseConfigurator.value_converters)
1708
+ value_converters['inc'] = 'inc_convert'
1709
+
1710
+ def __init__(self, config, base=None):
1711
+ super(Configurator, self).__init__(config)
1712
+ self.base = base or os.getcwd()
1713
+
1714
+ def configure_custom(self, config):
1715
+
1716
+ def convert(o):
1717
+ if isinstance(o, (list, tuple)):
1718
+ result = type(o)([convert(i) for i in o])
1719
+ elif isinstance(o, dict):
1720
+ if '()' in o:
1721
+ result = self.configure_custom(o)
1722
+ else:
1723
+ result = {}
1724
+ for k in o:
1725
+ result[k] = convert(o[k])
1726
+ else:
1727
+ result = self.convert(o)
1728
+ return result
1729
+
1730
+ c = config.pop('()')
1731
+ if not callable(c):
1732
+ c = self.resolve(c)
1733
+ props = config.pop('.', None)
1734
+ # Check for valid identifiers
1735
+ args = config.pop('[]', ())
1736
+ if args:
1737
+ args = tuple([convert(o) for o in args])
1738
+ items = [(k, convert(config[k])) for k in config if valid_ident(k)]
1739
+ kwargs = dict(items)
1740
+ result = c(*args, **kwargs)
1741
+ if props:
1742
+ for n, v in props.items():
1743
+ setattr(result, n, convert(v))
1744
+ return result
1745
+
1746
+ def __getitem__(self, key):
1747
+ result = self.config[key]
1748
+ if isinstance(result, dict) and '()' in result:
1749
+ self.config[key] = result = self.configure_custom(result)
1750
+ return result
1751
+
1752
+ def inc_convert(self, value):
1753
+ """Default converter for the inc:// protocol."""
1754
+ if not os.path.isabs(value):
1755
+ value = os.path.join(self.base, value)
1756
+ with codecs.open(value, 'r', encoding='utf-8') as f:
1757
+ result = json.load(f)
1758
+ return result
1759
+
1760
+
1761
+ class SubprocessMixin(object):
1762
+ """
1763
+ Mixin for running subprocesses and capturing their output
1764
+ """
1765
+
1766
+ def __init__(self, verbose=False, progress=None):
1767
+ self.verbose = verbose
1768
+ self.progress = progress
1769
+
1770
+ def reader(self, stream, context):
1771
+ """
1772
+ Read lines from a subprocess' output stream and either pass to a progress
1773
+ callable (if specified) or write progress information to sys.stderr.
1774
+ """
1775
+ progress = self.progress
1776
+ verbose = self.verbose
1777
+ while True:
1778
+ s = stream.readline()
1779
+ if not s:
1780
+ break
1781
+ if progress is not None:
1782
+ progress(s, context)
1783
+ else:
1784
+ if not verbose:
1785
+ sys.stderr.write('.')
1786
+ else:
1787
+ sys.stderr.write(s.decode('utf-8'))
1788
+ sys.stderr.flush()
1789
+ stream.close()
1790
+
1791
+ def run_command(self, cmd, **kwargs):
1792
+ p = subprocess.Popen(cmd,
1793
+ stdout=subprocess.PIPE,
1794
+ stderr=subprocess.PIPE,
1795
+ **kwargs)
1796
+ t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
1797
+ t1.start()
1798
+ t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
1799
+ t2.start()
1800
+ p.wait()
1801
+ t1.join()
1802
+ t2.join()
1803
+ if self.progress is not None:
1804
+ self.progress('done.', 'main')
1805
+ elif self.verbose:
1806
+ sys.stderr.write('done.\n')
1807
+ return p
1808
+
1809
+
1810
+ def normalize_name(name):
1811
+ """Normalize a python package name a la PEP 503"""
1812
+ # https://www.python.org/dev/peps/pep-0503/#normalized-names
1813
+ return re.sub('[-_.]+', '-', name).lower()
1814
+
1815
+
1816
+ # def _get_pypirc_command():
1817
+ # """
1818
+ # Get the distutils command for interacting with PyPI configurations.
1819
+ # :return: the command.
1820
+ # """
1821
+ # from distutils.core import Distribution
1822
+ # from distutils.config import PyPIRCCommand
1823
+ # d = Distribution()
1824
+ # return PyPIRCCommand(d)
1825
+
1826
+
1827
+ class PyPIRCFile(object):
1828
+
1829
+ DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/'
1830
+ DEFAULT_REALM = 'pypi'
1831
+
1832
+ def __init__(self, fn=None, url=None):
1833
+ if fn is None:
1834
+ fn = os.path.join(os.path.expanduser('~'), '.pypirc')
1835
+ self.filename = fn
1836
+ self.url = url
1837
+
1838
+ def read(self):
1839
+ result = {}
1840
+
1841
+ if os.path.exists(self.filename):
1842
+ repository = self.url or self.DEFAULT_REPOSITORY
1843
+
1844
+ config = configparser.RawConfigParser()
1845
+ config.read(self.filename)
1846
+ sections = config.sections()
1847
+ if 'distutils' in sections:
1848
+ # let's get the list of servers
1849
+ index_servers = config.get('distutils', 'index-servers')
1850
+ _servers = [
1851
+ server.strip() for server in index_servers.split('\n')
1852
+ if server.strip() != ''
1853
+ ]
1854
+ if _servers == []:
1855
+ # nothing set, let's try to get the default pypi
1856
+ if 'pypi' in sections:
1857
+ _servers = ['pypi']
1858
+ else:
1859
+ for server in _servers:
1860
+ result = {'server': server}
1861
+ result['username'] = config.get(server, 'username')
1862
+
1863
+ # optional params
1864
+ for key, default in (('repository',
1865
+ self.DEFAULT_REPOSITORY),
1866
+ ('realm', self.DEFAULT_REALM),
1867
+ ('password', None)):
1868
+ if config.has_option(server, key):
1869
+ result[key] = config.get(server, key)
1870
+ else:
1871
+ result[key] = default
1872
+
1873
+ # work around people having "repository" for the "pypi"
1874
+ # section of their config set to the HTTP (rather than
1875
+ # HTTPS) URL
1876
+ if (server == 'pypi' and repository
1877
+ in (self.DEFAULT_REPOSITORY, 'pypi')):
1878
+ result['repository'] = self.DEFAULT_REPOSITORY
1879
+ elif (result['server'] != repository
1880
+ and result['repository'] != repository):
1881
+ result = {}
1882
+ elif 'server-login' in sections:
1883
+ # old format
1884
+ server = 'server-login'
1885
+ if config.has_option(server, 'repository'):
1886
+ repository = config.get(server, 'repository')
1887
+ else:
1888
+ repository = self.DEFAULT_REPOSITORY
1889
+ result = {
1890
+ 'username': config.get(server, 'username'),
1891
+ 'password': config.get(server, 'password'),
1892
+ 'repository': repository,
1893
+ 'server': server,
1894
+ 'realm': self.DEFAULT_REALM
1895
+ }
1896
+ return result
1897
+
1898
+ def update(self, username, password):
1899
+ # import pdb; pdb.set_trace()
1900
+ config = configparser.RawConfigParser()
1901
+ fn = self.filename
1902
+ config.read(fn)
1903
+ if not config.has_section('pypi'):
1904
+ config.add_section('pypi')
1905
+ config.set('pypi', 'username', username)
1906
+ config.set('pypi', 'password', password)
1907
+ with open(fn, 'w') as f:
1908
+ config.write(f)
1909
+
1910
+
1911
+ def _load_pypirc(index):
1912
+ """
1913
+ Read the PyPI access configuration as supported by distutils.
1914
+ """
1915
+ return PyPIRCFile(url=index.url).read()
1916
+
1917
+
1918
+ def _store_pypirc(index):
1919
+ PyPIRCFile().update(index.username, index.password)
1920
+
1921
+
1922
+ #
1923
+ # get_platform()/get_host_platform() copied from Python 3.10.a0 source, with some minor
1924
+ # tweaks
1925
+ #
1926
+
1927
+
1928
+ def get_host_platform():
1929
+ """Return a string that identifies the current platform. This is used mainly to
1930
+ distinguish platform-specific build directories and platform-specific built
1931
+ distributions. Typically includes the OS name and version and the
1932
+ architecture (as supplied by 'os.uname()'), although the exact information
1933
+ included depends on the OS; eg. on Linux, the kernel version isn't
1934
+ particularly important.
1935
+
1936
+ Examples of returned values:
1937
+ linux-i586
1938
+ linux-alpha (?)
1939
+ solaris-2.6-sun4u
1940
+
1941
+ Windows will return one of:
1942
+ win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
1943
+ win32 (all others - specifically, sys.platform is returned)
1944
+
1945
+ For other non-POSIX platforms, currently just returns 'sys.platform'.
1946
+
1947
+ """
1948
+ if os.name == 'nt':
1949
+ if 'amd64' in sys.version.lower():
1950
+ return 'win-amd64'
1951
+ if '(arm)' in sys.version.lower():
1952
+ return 'win-arm32'
1953
+ if '(arm64)' in sys.version.lower():
1954
+ return 'win-arm64'
1955
+ return sys.platform
1956
+
1957
+ # Set for cross builds explicitly
1958
+ if "_PYTHON_HOST_PLATFORM" in os.environ:
1959
+ return os.environ["_PYTHON_HOST_PLATFORM"]
1960
+
1961
+ if os.name != 'posix' or not hasattr(os, 'uname'):
1962
+ # XXX what about the architecture? NT is Intel or Alpha,
1963
+ # Mac OS is M68k or PPC, etc.
1964
+ return sys.platform
1965
+
1966
+ # Try to distinguish various flavours of Unix
1967
+
1968
+ (osname, host, release, version, machine) = os.uname()
1969
+
1970
+ # Convert the OS name to lowercase, remove '/' characters, and translate
1971
+ # spaces (for "Power Macintosh")
1972
+ osname = osname.lower().replace('/', '')
1973
+ machine = machine.replace(' ', '_').replace('/', '-')
1974
+
1975
+ if osname[:5] == 'linux':
1976
+ # At least on Linux/Intel, 'machine' is the processor --
1977
+ # i386, etc.
1978
+ # XXX what about Alpha, SPARC, etc?
1979
+ return "%s-%s" % (osname, machine)
1980
+
1981
+ elif osname[:5] == 'sunos':
1982
+ if release[0] >= '5': # SunOS 5 == Solaris 2
1983
+ osname = 'solaris'
1984
+ release = '%d.%s' % (int(release[0]) - 3, release[2:])
1985
+ # We can't use 'platform.architecture()[0]' because a
1986
+ # bootstrap problem. We use a dict to get an error
1987
+ # if some suspicious happens.
1988
+ bitness = {2147483647: '32bit', 9223372036854775807: '64bit'}
1989
+ machine += '.%s' % bitness[sys.maxsize]
1990
+ # fall through to standard osname-release-machine representation
1991
+ elif osname[:3] == 'aix':
1992
+ from _aix_support import aix_platform
1993
+ return aix_platform()
1994
+ elif osname[:6] == 'cygwin':
1995
+ osname = 'cygwin'
1996
+ rel_re = re.compile(r'[\d.]+', re.ASCII)
1997
+ m = rel_re.match(release)
1998
+ if m:
1999
+ release = m.group()
2000
+ elif osname[:6] == 'darwin':
2001
+ import _osx_support
2002
+ try:
2003
+ from distutils import sysconfig
2004
+ except ImportError:
2005
+ import sysconfig
2006
+ osname, release, machine = _osx_support.get_platform_osx(
2007
+ sysconfig.get_config_vars(), osname, release, machine)
2008
+
2009
+ return '%s-%s-%s' % (osname, release, machine)
2010
+
2011
+
2012
+ _TARGET_TO_PLAT = {
2013
+ 'x86': 'win32',
2014
+ 'x64': 'win-amd64',
2015
+ 'arm': 'win-arm32',
2016
+ }
2017
+
2018
+
2019
+ def get_platform():
2020
+ if os.name != 'nt':
2021
+ return get_host_platform()
2022
+ cross_compilation_target = os.environ.get('VSCMD_ARG_TGT_ARCH')
2023
+ if cross_compilation_target not in _TARGET_TO_PLAT:
2024
+ return get_host_platform()
2025
+ return _TARGET_TO_PLAT[cross_compilation_target]
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/version.py ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2012-2023 The Python Software Foundation.
4
+ # See LICENSE.txt and CONTRIBUTORS.txt.
5
+ #
6
+ """
7
+ Implementation of a flexible versioning scheme providing support for PEP-440,
8
+ setuptools-compatible and semantic versioning.
9
+ """
10
+
11
+ import logging
12
+ import re
13
+
14
+ from .compat import string_types
15
+ from .util import parse_requirement
16
+
17
+ __all__ = ['NormalizedVersion', 'NormalizedMatcher',
18
+ 'LegacyVersion', 'LegacyMatcher',
19
+ 'SemanticVersion', 'SemanticMatcher',
20
+ 'UnsupportedVersionError', 'get_scheme']
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class UnsupportedVersionError(ValueError):
26
+ """This is an unsupported version."""
27
+ pass
28
+
29
+
30
+ class Version(object):
31
+ def __init__(self, s):
32
+ self._string = s = s.strip()
33
+ self._parts = parts = self.parse(s)
34
+ assert isinstance(parts, tuple)
35
+ assert len(parts) > 0
36
+
37
+ def parse(self, s):
38
+ raise NotImplementedError('please implement in a subclass')
39
+
40
+ def _check_compatible(self, other):
41
+ if type(self) != type(other):
42
+ raise TypeError('cannot compare %r and %r' % (self, other))
43
+
44
+ def __eq__(self, other):
45
+ self._check_compatible(other)
46
+ return self._parts == other._parts
47
+
48
+ def __ne__(self, other):
49
+ return not self.__eq__(other)
50
+
51
+ def __lt__(self, other):
52
+ self._check_compatible(other)
53
+ return self._parts < other._parts
54
+
55
+ def __gt__(self, other):
56
+ return not (self.__lt__(other) or self.__eq__(other))
57
+
58
+ def __le__(self, other):
59
+ return self.__lt__(other) or self.__eq__(other)
60
+
61
+ def __ge__(self, other):
62
+ return self.__gt__(other) or self.__eq__(other)
63
+
64
+ # See http://docs.python.org/reference/datamodel#object.__hash__
65
+ def __hash__(self):
66
+ return hash(self._parts)
67
+
68
+ def __repr__(self):
69
+ return "%s('%s')" % (self.__class__.__name__, self._string)
70
+
71
+ def __str__(self):
72
+ return self._string
73
+
74
+ @property
75
+ def is_prerelease(self):
76
+ raise NotImplementedError('Please implement in subclasses.')
77
+
78
+
79
+ class Matcher(object):
80
+ version_class = None
81
+
82
+ # value is either a callable or the name of a method
83
+ _operators = {
84
+ '<': lambda v, c, p: v < c,
85
+ '>': lambda v, c, p: v > c,
86
+ '<=': lambda v, c, p: v == c or v < c,
87
+ '>=': lambda v, c, p: v == c or v > c,
88
+ '==': lambda v, c, p: v == c,
89
+ '===': lambda v, c, p: v == c,
90
+ # by default, compatible => >=.
91
+ '~=': lambda v, c, p: v == c or v > c,
92
+ '!=': lambda v, c, p: v != c,
93
+ }
94
+
95
+ # this is a method only to support alternative implementations
96
+ # via overriding
97
+ def parse_requirement(self, s):
98
+ return parse_requirement(s)
99
+
100
+ def __init__(self, s):
101
+ if self.version_class is None:
102
+ raise ValueError('Please specify a version class')
103
+ self._string = s = s.strip()
104
+ r = self.parse_requirement(s)
105
+ if not r:
106
+ raise ValueError('Not valid: %r' % s)
107
+ self.name = r.name
108
+ self.key = self.name.lower() # for case-insensitive comparisons
109
+ clist = []
110
+ if r.constraints:
111
+ # import pdb; pdb.set_trace()
112
+ for op, s in r.constraints:
113
+ if s.endswith('.*'):
114
+ if op not in ('==', '!='):
115
+ raise ValueError('\'.*\' not allowed for '
116
+ '%r constraints' % op)
117
+ # Could be a partial version (e.g. for '2.*') which
118
+ # won't parse as a version, so keep it as a string
119
+ vn, prefix = s[:-2], True
120
+ # Just to check that vn is a valid version
121
+ self.version_class(vn)
122
+ else:
123
+ # Should parse as a version, so we can create an
124
+ # instance for the comparison
125
+ vn, prefix = self.version_class(s), False
126
+ clist.append((op, vn, prefix))
127
+ self._parts = tuple(clist)
128
+
129
+ def match(self, version):
130
+ """
131
+ Check if the provided version matches the constraints.
132
+
133
+ :param version: The version to match against this instance.
134
+ :type version: String or :class:`Version` instance.
135
+ """
136
+ if isinstance(version, string_types):
137
+ version = self.version_class(version)
138
+ for operator, constraint, prefix in self._parts:
139
+ f = self._operators.get(operator)
140
+ if isinstance(f, string_types):
141
+ f = getattr(self, f)
142
+ if not f:
143
+ msg = ('%r not implemented '
144
+ 'for %s' % (operator, self.__class__.__name__))
145
+ raise NotImplementedError(msg)
146
+ if not f(version, constraint, prefix):
147
+ return False
148
+ return True
149
+
150
+ @property
151
+ def exact_version(self):
152
+ result = None
153
+ if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
154
+ result = self._parts[0][1]
155
+ return result
156
+
157
+ def _check_compatible(self, other):
158
+ if type(self) != type(other) or self.name != other.name:
159
+ raise TypeError('cannot compare %s and %s' % (self, other))
160
+
161
+ def __eq__(self, other):
162
+ self._check_compatible(other)
163
+ return self.key == other.key and self._parts == other._parts
164
+
165
+ def __ne__(self, other):
166
+ return not self.__eq__(other)
167
+
168
+ # See http://docs.python.org/reference/datamodel#object.__hash__
169
+ def __hash__(self):
170
+ return hash(self.key) + hash(self._parts)
171
+
172
+ def __repr__(self):
173
+ return "%s(%r)" % (self.__class__.__name__, self._string)
174
+
175
+ def __str__(self):
176
+ return self._string
177
+
178
+
179
+ PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|alpha|b|beta|c|rc|pre|preview)(\d+)?)?'
180
+ r'(\.(post|r|rev)(\d+)?)?([._-]?(dev)(\d+)?)?'
181
+ r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$', re.I)
182
+
183
+
184
+ def _pep_440_key(s):
185
+ s = s.strip()
186
+ m = PEP440_VERSION_RE.match(s)
187
+ if not m:
188
+ raise UnsupportedVersionError('Not a valid version: %s' % s)
189
+ groups = m.groups()
190
+ nums = tuple(int(v) for v in groups[1].split('.'))
191
+ while len(nums) > 1 and nums[-1] == 0:
192
+ nums = nums[:-1]
193
+
194
+ if not groups[0]:
195
+ epoch = 0
196
+ else:
197
+ epoch = int(groups[0][:-1])
198
+ pre = groups[4:6]
199
+ post = groups[7:9]
200
+ dev = groups[10:12]
201
+ local = groups[13]
202
+ if pre == (None, None):
203
+ pre = ()
204
+ else:
205
+ if pre[1] is None:
206
+ pre = pre[0], 0
207
+ else:
208
+ pre = pre[0], int(pre[1])
209
+ if post == (None, None):
210
+ post = ()
211
+ else:
212
+ if post[1] is None:
213
+ post = post[0], 0
214
+ else:
215
+ post = post[0], int(post[1])
216
+ if dev == (None, None):
217
+ dev = ()
218
+ else:
219
+ if dev[1] is None:
220
+ dev = dev[0], 0
221
+ else:
222
+ dev = dev[0], int(dev[1])
223
+ if local is None:
224
+ local = ()
225
+ else:
226
+ parts = []
227
+ for part in local.split('.'):
228
+ # to ensure that numeric compares as > lexicographic, avoid
229
+ # comparing them directly, but encode a tuple which ensures
230
+ # correct sorting
231
+ if part.isdigit():
232
+ part = (1, int(part))
233
+ else:
234
+ part = (0, part)
235
+ parts.append(part)
236
+ local = tuple(parts)
237
+ if not pre:
238
+ # either before pre-release, or final release and after
239
+ if not post and dev:
240
+ # before pre-release
241
+ pre = ('a', -1) # to sort before a0
242
+ else:
243
+ pre = ('z',) # to sort after all pre-releases
244
+ # now look at the state of post and dev.
245
+ if not post:
246
+ post = ('_',) # sort before 'a'
247
+ if not dev:
248
+ dev = ('final',)
249
+
250
+ return epoch, nums, pre, post, dev, local
251
+
252
+
253
+ _normalized_key = _pep_440_key
254
+
255
+
256
+ class NormalizedVersion(Version):
257
+ """A rational version.
258
+
259
+ Good:
260
+ 1.2 # equivalent to "1.2.0"
261
+ 1.2.0
262
+ 1.2a1
263
+ 1.2.3a2
264
+ 1.2.3b1
265
+ 1.2.3c1
266
+ 1.2.3.4
267
+ TODO: fill this out
268
+
269
+ Bad:
270
+ 1 # minimum two numbers
271
+ 1.2a # release level must have a release serial
272
+ 1.2.3b
273
+ """
274
+ def parse(self, s):
275
+ result = _normalized_key(s)
276
+ # _normalized_key loses trailing zeroes in the release
277
+ # clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
278
+ # However, PEP 440 prefix matching needs it: for example,
279
+ # (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
280
+ m = PEP440_VERSION_RE.match(s) # must succeed
281
+ groups = m.groups()
282
+ self._release_clause = tuple(int(v) for v in groups[1].split('.'))
283
+ return result
284
+
285
+ PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
286
+
287
+ @property
288
+ def is_prerelease(self):
289
+ return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
290
+
291
+
292
+ def _match_prefix(x, y):
293
+ x = str(x)
294
+ y = str(y)
295
+ if x == y:
296
+ return True
297
+ if not x.startswith(y):
298
+ return False
299
+ n = len(y)
300
+ return x[n] == '.'
301
+
302
+
303
+ class NormalizedMatcher(Matcher):
304
+ version_class = NormalizedVersion
305
+
306
+ # value is either a callable or the name of a method
307
+ _operators = {
308
+ '~=': '_match_compatible',
309
+ '<': '_match_lt',
310
+ '>': '_match_gt',
311
+ '<=': '_match_le',
312
+ '>=': '_match_ge',
313
+ '==': '_match_eq',
314
+ '===': '_match_arbitrary',
315
+ '!=': '_match_ne',
316
+ }
317
+
318
+ def _adjust_local(self, version, constraint, prefix):
319
+ if prefix:
320
+ strip_local = '+' not in constraint and version._parts[-1]
321
+ else:
322
+ # both constraint and version are
323
+ # NormalizedVersion instances.
324
+ # If constraint does not have a local component,
325
+ # ensure the version doesn't, either.
326
+ strip_local = not constraint._parts[-1] and version._parts[-1]
327
+ if strip_local:
328
+ s = version._string.split('+', 1)[0]
329
+ version = self.version_class(s)
330
+ return version, constraint
331
+
332
+ def _match_lt(self, version, constraint, prefix):
333
+ version, constraint = self._adjust_local(version, constraint, prefix)
334
+ if version >= constraint:
335
+ return False
336
+ release_clause = constraint._release_clause
337
+ pfx = '.'.join([str(i) for i in release_clause])
338
+ return not _match_prefix(version, pfx)
339
+
340
+ def _match_gt(self, version, constraint, prefix):
341
+ version, constraint = self._adjust_local(version, constraint, prefix)
342
+ if version <= constraint:
343
+ return False
344
+ release_clause = constraint._release_clause
345
+ pfx = '.'.join([str(i) for i in release_clause])
346
+ return not _match_prefix(version, pfx)
347
+
348
+ def _match_le(self, version, constraint, prefix):
349
+ version, constraint = self._adjust_local(version, constraint, prefix)
350
+ return version <= constraint
351
+
352
+ def _match_ge(self, version, constraint, prefix):
353
+ version, constraint = self._adjust_local(version, constraint, prefix)
354
+ return version >= constraint
355
+
356
+ def _match_eq(self, version, constraint, prefix):
357
+ version, constraint = self._adjust_local(version, constraint, prefix)
358
+ if not prefix:
359
+ result = (version == constraint)
360
+ else:
361
+ result = _match_prefix(version, constraint)
362
+ return result
363
+
364
+ def _match_arbitrary(self, version, constraint, prefix):
365
+ return str(version) == str(constraint)
366
+
367
+ def _match_ne(self, version, constraint, prefix):
368
+ version, constraint = self._adjust_local(version, constraint, prefix)
369
+ if not prefix:
370
+ result = (version != constraint)
371
+ else:
372
+ result = not _match_prefix(version, constraint)
373
+ return result
374
+
375
+ def _match_compatible(self, version, constraint, prefix):
376
+ version, constraint = self._adjust_local(version, constraint, prefix)
377
+ if version == constraint:
378
+ return True
379
+ if version < constraint:
380
+ return False
381
+ # if not prefix:
382
+ # return True
383
+ release_clause = constraint._release_clause
384
+ if len(release_clause) > 1:
385
+ release_clause = release_clause[:-1]
386
+ pfx = '.'.join([str(i) for i in release_clause])
387
+ return _match_prefix(version, pfx)
388
+
389
+
390
+ _REPLACEMENTS = (
391
+ (re.compile('[.+-]$'), ''), # remove trailing puncts
392
+ (re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
393
+ (re.compile('^[.-]'), ''), # remove leading puncts
394
+ (re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
395
+ (re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
396
+ (re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
397
+ (re.compile('[.]{2,}'), '.'), # multiple runs of '.'
398
+ (re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
399
+ (re.compile(r'\b(pre-alpha|prealpha)\b'),
400
+ 'pre.alpha'), # standardise
401
+ (re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
402
+ )
403
+
404
+ _SUFFIX_REPLACEMENTS = (
405
+ (re.compile('^[:~._+-]+'), ''), # remove leading puncts
406
+ (re.compile('[,*")([\\]]'), ''), # remove unwanted chars
407
+ (re.compile('[~:+_ -]'), '.'), # replace illegal chars
408
+ (re.compile('[.]{2,}'), '.'), # multiple runs of '.'
409
+ (re.compile(r'\.$'), ''), # trailing '.'
410
+ )
411
+
412
+ _NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
413
+
414
+
415
+ def _suggest_semantic_version(s):
416
+ """
417
+ Try to suggest a semantic form for a version for which
418
+ _suggest_normalized_version couldn't come up with anything.
419
+ """
420
+ result = s.strip().lower()
421
+ for pat, repl in _REPLACEMENTS:
422
+ result = pat.sub(repl, result)
423
+ if not result:
424
+ result = '0.0.0'
425
+
426
+ # Now look for numeric prefix, and separate it out from
427
+ # the rest.
428
+ # import pdb; pdb.set_trace()
429
+ m = _NUMERIC_PREFIX.match(result)
430
+ if not m:
431
+ prefix = '0.0.0'
432
+ suffix = result
433
+ else:
434
+ prefix = m.groups()[0].split('.')
435
+ prefix = [int(i) for i in prefix]
436
+ while len(prefix) < 3:
437
+ prefix.append(0)
438
+ if len(prefix) == 3:
439
+ suffix = result[m.end():]
440
+ else:
441
+ suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
442
+ prefix = prefix[:3]
443
+ prefix = '.'.join([str(i) for i in prefix])
444
+ suffix = suffix.strip()
445
+ if suffix:
446
+ # import pdb; pdb.set_trace()
447
+ # massage the suffix.
448
+ for pat, repl in _SUFFIX_REPLACEMENTS:
449
+ suffix = pat.sub(repl, suffix)
450
+
451
+ if not suffix:
452
+ result = prefix
453
+ else:
454
+ sep = '-' if 'dev' in suffix else '+'
455
+ result = prefix + sep + suffix
456
+ if not is_semver(result):
457
+ result = None
458
+ return result
459
+
460
+
461
+ def _suggest_normalized_version(s):
462
+ """Suggest a normalized version close to the given version string.
463
+
464
+ If you have a version string that isn't rational (i.e. NormalizedVersion
465
+ doesn't like it) then you might be able to get an equivalent (or close)
466
+ rational version from this function.
467
+
468
+ This does a number of simple normalizations to the given string, based
469
+ on observation of versions currently in use on PyPI. Given a dump of
470
+ those version during PyCon 2009, 4287 of them:
471
+ - 2312 (53.93%) match NormalizedVersion without change
472
+ with the automatic suggestion
473
+ - 3474 (81.04%) match when using this suggestion method
474
+
475
+ @param s {str} An irrational version string.
476
+ @returns A rational version string, or None, if couldn't determine one.
477
+ """
478
+ try:
479
+ _normalized_key(s)
480
+ return s # already rational
481
+ except UnsupportedVersionError:
482
+ pass
483
+
484
+ rs = s.lower()
485
+
486
+ # part of this could use maketrans
487
+ for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
488
+ ('beta', 'b'), ('rc', 'c'), ('-final', ''),
489
+ ('-pre', 'c'),
490
+ ('-release', ''), ('.release', ''), ('-stable', ''),
491
+ ('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
492
+ ('final', '')):
493
+ rs = rs.replace(orig, repl)
494
+
495
+ # if something ends with dev or pre, we add a 0
496
+ rs = re.sub(r"pre$", r"pre0", rs)
497
+ rs = re.sub(r"dev$", r"dev0", rs)
498
+
499
+ # if we have something like "b-2" or "a.2" at the end of the
500
+ # version, that is probably beta, alpha, etc
501
+ # let's remove the dash or dot
502
+ rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
503
+
504
+ # 1.0-dev-r371 -> 1.0.dev371
505
+ # 0.1-dev-r79 -> 0.1.dev79
506
+ rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
507
+
508
+ # Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
509
+ rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
510
+
511
+ # Clean: v0.3, v1.0
512
+ if rs.startswith('v'):
513
+ rs = rs[1:]
514
+
515
+ # Clean leading '0's on numbers.
516
+ # TODO: unintended side-effect on, e.g., "2003.05.09"
517
+ # PyPI stats: 77 (~2%) better
518
+ rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
519
+
520
+ # Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
521
+ # zero.
522
+ # PyPI stats: 245 (7.56%) better
523
+ rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
524
+
525
+ # the 'dev-rNNN' tag is a dev tag
526
+ rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
527
+
528
+ # clean the - when used as a pre delimiter
529
+ rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
530
+
531
+ # a terminal "dev" or "devel" can be changed into ".dev0"
532
+ rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
533
+
534
+ # a terminal "dev" can be changed into ".dev0"
535
+ rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
536
+
537
+ # a terminal "final" or "stable" can be removed
538
+ rs = re.sub(r"(final|stable)$", "", rs)
539
+
540
+ # The 'r' and the '-' tags are post release tags
541
+ # 0.4a1.r10 -> 0.4a1.post10
542
+ # 0.9.33-17222 -> 0.9.33.post17222
543
+ # 0.9.33-r17222 -> 0.9.33.post17222
544
+ rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
545
+
546
+ # Clean 'r' instead of 'dev' usage:
547
+ # 0.9.33+r17222 -> 0.9.33.dev17222
548
+ # 1.0dev123 -> 1.0.dev123
549
+ # 1.0.git123 -> 1.0.dev123
550
+ # 1.0.bzr123 -> 1.0.dev123
551
+ # 0.1a0dev.123 -> 0.1a0.dev123
552
+ # PyPI stats: ~150 (~4%) better
553
+ rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
554
+
555
+ # Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
556
+ # 0.2.pre1 -> 0.2c1
557
+ # 0.2-c1 -> 0.2c1
558
+ # 1.0preview123 -> 1.0c123
559
+ # PyPI stats: ~21 (0.62%) better
560
+ rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
561
+
562
+ # Tcl/Tk uses "px" for their post release markers
563
+ rs = re.sub(r"p(\d+)$", r".post\1", rs)
564
+
565
+ try:
566
+ _normalized_key(rs)
567
+ except UnsupportedVersionError:
568
+ rs = None
569
+ return rs
570
+
571
+ #
572
+ # Legacy version processing (distribute-compatible)
573
+ #
574
+
575
+
576
+ _VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
577
+ _VERSION_REPLACE = {
578
+ 'pre': 'c',
579
+ 'preview': 'c',
580
+ '-': 'final-',
581
+ 'rc': 'c',
582
+ 'dev': '@',
583
+ '': None,
584
+ '.': None,
585
+ }
586
+
587
+
588
+ def _legacy_key(s):
589
+ def get_parts(s):
590
+ result = []
591
+ for p in _VERSION_PART.split(s.lower()):
592
+ p = _VERSION_REPLACE.get(p, p)
593
+ if p:
594
+ if '0' <= p[:1] <= '9':
595
+ p = p.zfill(8)
596
+ else:
597
+ p = '*' + p
598
+ result.append(p)
599
+ result.append('*final')
600
+ return result
601
+
602
+ result = []
603
+ for p in get_parts(s):
604
+ if p.startswith('*'):
605
+ if p < '*final':
606
+ while result and result[-1] == '*final-':
607
+ result.pop()
608
+ while result and result[-1] == '00000000':
609
+ result.pop()
610
+ result.append(p)
611
+ return tuple(result)
612
+
613
+
614
+ class LegacyVersion(Version):
615
+ def parse(self, s):
616
+ return _legacy_key(s)
617
+
618
+ @property
619
+ def is_prerelease(self):
620
+ result = False
621
+ for x in self._parts:
622
+ if (isinstance(x, string_types) and x.startswith('*') and
623
+ x < '*final'):
624
+ result = True
625
+ break
626
+ return result
627
+
628
+
629
+ class LegacyMatcher(Matcher):
630
+ version_class = LegacyVersion
631
+
632
+ _operators = dict(Matcher._operators)
633
+ _operators['~='] = '_match_compatible'
634
+
635
+ numeric_re = re.compile(r'^(\d+(\.\d+)*)')
636
+
637
+ def _match_compatible(self, version, constraint, prefix):
638
+ if version < constraint:
639
+ return False
640
+ m = self.numeric_re.match(str(constraint))
641
+ if not m:
642
+ logger.warning('Cannot compute compatible match for version %s '
643
+ ' and constraint %s', version, constraint)
644
+ return True
645
+ s = m.groups()[0]
646
+ if '.' in s:
647
+ s = s.rsplit('.', 1)[0]
648
+ return _match_prefix(version, s)
649
+
650
+ #
651
+ # Semantic versioning
652
+ #
653
+
654
+
655
+ _SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
656
+ r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
657
+ r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
658
+
659
+
660
+ def is_semver(s):
661
+ return _SEMVER_RE.match(s)
662
+
663
+
664
+ def _semantic_key(s):
665
+ def make_tuple(s, absent):
666
+ if s is None:
667
+ result = (absent,)
668
+ else:
669
+ parts = s[1:].split('.')
670
+ # We can't compare ints and strings on Python 3, so fudge it
671
+ # by zero-filling numeric values so simulate a numeric comparison
672
+ result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
673
+ return result
674
+
675
+ m = is_semver(s)
676
+ if not m:
677
+ raise UnsupportedVersionError(s)
678
+ groups = m.groups()
679
+ major, minor, patch = [int(i) for i in groups[:3]]
680
+ # choose the '|' and '*' so that versions sort correctly
681
+ pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
682
+ return (major, minor, patch), pre, build
683
+
684
+
685
+ class SemanticVersion(Version):
686
+ def parse(self, s):
687
+ return _semantic_key(s)
688
+
689
+ @property
690
+ def is_prerelease(self):
691
+ return self._parts[1][0] != '|'
692
+
693
+
694
+ class SemanticMatcher(Matcher):
695
+ version_class = SemanticVersion
696
+
697
+
698
+ class VersionScheme(object):
699
+ def __init__(self, key, matcher, suggester=None):
700
+ self.key = key
701
+ self.matcher = matcher
702
+ self.suggester = suggester
703
+
704
+ def is_valid_version(self, s):
705
+ try:
706
+ self.matcher.version_class(s)
707
+ result = True
708
+ except UnsupportedVersionError:
709
+ result = False
710
+ return result
711
+
712
+ def is_valid_matcher(self, s):
713
+ try:
714
+ self.matcher(s)
715
+ result = True
716
+ except UnsupportedVersionError:
717
+ result = False
718
+ return result
719
+
720
+ def is_valid_constraint_list(self, s):
721
+ """
722
+ Used for processing some metadata fields
723
+ """
724
+ # See issue #140. Be tolerant of a single trailing comma.
725
+ if s.endswith(','):
726
+ s = s[:-1]
727
+ return self.is_valid_matcher('dummy_name (%s)' % s)
728
+
729
+ def suggest(self, s):
730
+ if self.suggester is None:
731
+ result = None
732
+ else:
733
+ result = self.suggester(s)
734
+ return result
735
+
736
+
737
+ _SCHEMES = {
738
+ 'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
739
+ _suggest_normalized_version),
740
+ 'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
741
+ 'semantic': VersionScheme(_semantic_key, SemanticMatcher,
742
+ _suggest_semantic_version),
743
+ }
744
+
745
+ _SCHEMES['default'] = _SCHEMES['normalized']
746
+
747
+
748
+ def get_scheme(name):
749
+ if name not in _SCHEMES:
750
+ raise ValueError('unknown scheme name: %r' % name)
751
+ return _SCHEMES[name]
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/wheel.py ADDED
@@ -0,0 +1,1099 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # Copyright (C) 2013-2023 Vinay Sajip.
4
+ # Licensed to the Python Software Foundation under a contributor agreement.
5
+ # See LICENSE.txt and CONTRIBUTORS.txt.
6
+ #
7
+ from __future__ import unicode_literals
8
+
9
+ import base64
10
+ import codecs
11
+ import datetime
12
+ from email import message_from_file
13
+ import hashlib
14
+ import json
15
+ import logging
16
+ import os
17
+ import posixpath
18
+ import re
19
+ import shutil
20
+ import sys
21
+ import tempfile
22
+ import zipfile
23
+
24
+ from . import __version__, DistlibException
25
+ from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
26
+ from .database import InstalledDistribution
27
+ from .metadata import Metadata, WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME
28
+ from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
29
+ cached_property, get_cache_base, read_exports, tempdir,
30
+ get_platform)
31
+ from .version import NormalizedVersion, UnsupportedVersionError
32
+
33
+ logger = logging.getLogger(__name__)
34
+
35
+ cache = None # created when needed
36
+
37
+ if hasattr(sys, 'pypy_version_info'): # pragma: no cover
38
+ IMP_PREFIX = 'pp'
39
+ elif sys.platform.startswith('java'): # pragma: no cover
40
+ IMP_PREFIX = 'jy'
41
+ elif sys.platform == 'cli': # pragma: no cover
42
+ IMP_PREFIX = 'ip'
43
+ else:
44
+ IMP_PREFIX = 'cp'
45
+
46
+ VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
47
+ if not VER_SUFFIX: # pragma: no cover
48
+ VER_SUFFIX = '%s%s' % sys.version_info[:2]
49
+ PYVER = 'py' + VER_SUFFIX
50
+ IMPVER = IMP_PREFIX + VER_SUFFIX
51
+
52
+ ARCH = get_platform().replace('-', '_').replace('.', '_')
53
+
54
+ ABI = sysconfig.get_config_var('SOABI')
55
+ if ABI and ABI.startswith('cpython-'):
56
+ ABI = ABI.replace('cpython-', 'cp').split('-')[0]
57
+ else:
58
+
59
+ def _derive_abi():
60
+ parts = ['cp', VER_SUFFIX]
61
+ if sysconfig.get_config_var('Py_DEBUG'):
62
+ parts.append('d')
63
+ if IMP_PREFIX == 'cp':
64
+ vi = sys.version_info[:2]
65
+ if vi < (3, 8):
66
+ wpm = sysconfig.get_config_var('WITH_PYMALLOC')
67
+ if wpm is None:
68
+ wpm = True
69
+ if wpm:
70
+ parts.append('m')
71
+ if vi < (3, 3):
72
+ us = sysconfig.get_config_var('Py_UNICODE_SIZE')
73
+ if us == 4 or (us is None and sys.maxunicode == 0x10FFFF):
74
+ parts.append('u')
75
+ return ''.join(parts)
76
+
77
+ ABI = _derive_abi()
78
+ del _derive_abi
79
+
80
+ FILENAME_RE = re.compile(
81
+ r'''
82
+ (?P<nm>[^-]+)
83
+ -(?P<vn>\d+[^-]*)
84
+ (-(?P<bn>\d+[^-]*))?
85
+ -(?P<py>\w+\d+(\.\w+\d+)*)
86
+ -(?P<bi>\w+)
87
+ -(?P<ar>\w+(\.\w+)*)
88
+ \.whl$
89
+ ''', re.IGNORECASE | re.VERBOSE)
90
+
91
+ NAME_VERSION_RE = re.compile(
92
+ r'''
93
+ (?P<nm>[^-]+)
94
+ -(?P<vn>\d+[^-]*)
95
+ (-(?P<bn>\d+[^-]*))?$
96
+ ''', re.IGNORECASE | re.VERBOSE)
97
+
98
+ SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
99
+ SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$')
100
+ SHEBANG_PYTHON = b'#!python'
101
+ SHEBANG_PYTHONW = b'#!pythonw'
102
+
103
+ if os.sep == '/':
104
+ to_posix = lambda o: o
105
+ else:
106
+ to_posix = lambda o: o.replace(os.sep, '/')
107
+
108
+ if sys.version_info[0] < 3:
109
+ import imp
110
+ else:
111
+ imp = None
112
+ import importlib.machinery
113
+ import importlib.util
114
+
115
+
116
+ def _get_suffixes():
117
+ if imp:
118
+ return [s[0] for s in imp.get_suffixes()]
119
+ else:
120
+ return importlib.machinery.EXTENSION_SUFFIXES
121
+
122
+
123
+ def _load_dynamic(name, path):
124
+ # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
125
+ if imp:
126
+ return imp.load_dynamic(name, path)
127
+ else:
128
+ spec = importlib.util.spec_from_file_location(name, path)
129
+ module = importlib.util.module_from_spec(spec)
130
+ sys.modules[name] = module
131
+ spec.loader.exec_module(module)
132
+ return module
133
+
134
+
135
+ class Mounter(object):
136
+
137
+ def __init__(self):
138
+ self.impure_wheels = {}
139
+ self.libs = {}
140
+
141
+ def add(self, pathname, extensions):
142
+ self.impure_wheels[pathname] = extensions
143
+ self.libs.update(extensions)
144
+
145
+ def remove(self, pathname):
146
+ extensions = self.impure_wheels.pop(pathname)
147
+ for k, v in extensions:
148
+ if k in self.libs:
149
+ del self.libs[k]
150
+
151
+ def find_module(self, fullname, path=None):
152
+ if fullname in self.libs:
153
+ result = self
154
+ else:
155
+ result = None
156
+ return result
157
+
158
+ def load_module(self, fullname):
159
+ if fullname in sys.modules:
160
+ result = sys.modules[fullname]
161
+ else:
162
+ if fullname not in self.libs:
163
+ raise ImportError('unable to find extension for %s' % fullname)
164
+ result = _load_dynamic(fullname, self.libs[fullname])
165
+ result.__loader__ = self
166
+ parts = fullname.rsplit('.', 1)
167
+ if len(parts) > 1:
168
+ result.__package__ = parts[0]
169
+ return result
170
+
171
+
172
+ _hook = Mounter()
173
+
174
+
175
+ class Wheel(object):
176
+ """
177
+ Class to build and install from Wheel files (PEP 427).
178
+ """
179
+
180
+ wheel_version = (1, 1)
181
+ hash_kind = 'sha256'
182
+
183
+ def __init__(self, filename=None, sign=False, verify=False):
184
+ """
185
+ Initialise an instance using a (valid) filename.
186
+ """
187
+ self.sign = sign
188
+ self.should_verify = verify
189
+ self.buildver = ''
190
+ self.pyver = [PYVER]
191
+ self.abi = ['none']
192
+ self.arch = ['any']
193
+ self.dirname = os.getcwd()
194
+ if filename is None:
195
+ self.name = 'dummy'
196
+ self.version = '0.1'
197
+ self._filename = self.filename
198
+ else:
199
+ m = NAME_VERSION_RE.match(filename)
200
+ if m:
201
+ info = m.groupdict('')
202
+ self.name = info['nm']
203
+ # Reinstate the local version separator
204
+ self.version = info['vn'].replace('_', '-')
205
+ self.buildver = info['bn']
206
+ self._filename = self.filename
207
+ else:
208
+ dirname, filename = os.path.split(filename)
209
+ m = FILENAME_RE.match(filename)
210
+ if not m:
211
+ raise DistlibException('Invalid name or '
212
+ 'filename: %r' % filename)
213
+ if dirname:
214
+ self.dirname = os.path.abspath(dirname)
215
+ self._filename = filename
216
+ info = m.groupdict('')
217
+ self.name = info['nm']
218
+ self.version = info['vn']
219
+ self.buildver = info['bn']
220
+ self.pyver = info['py'].split('.')
221
+ self.abi = info['bi'].split('.')
222
+ self.arch = info['ar'].split('.')
223
+
224
+ @property
225
+ def filename(self):
226
+ """
227
+ Build and return a filename from the various components.
228
+ """
229
+ if self.buildver:
230
+ buildver = '-' + self.buildver
231
+ else:
232
+ buildver = ''
233
+ pyver = '.'.join(self.pyver)
234
+ abi = '.'.join(self.abi)
235
+ arch = '.'.join(self.arch)
236
+ # replace - with _ as a local version separator
237
+ version = self.version.replace('-', '_')
238
+ return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver, pyver,
239
+ abi, arch)
240
+
241
+ @property
242
+ def exists(self):
243
+ path = os.path.join(self.dirname, self.filename)
244
+ return os.path.isfile(path)
245
+
246
+ @property
247
+ def tags(self):
248
+ for pyver in self.pyver:
249
+ for abi in self.abi:
250
+ for arch in self.arch:
251
+ yield pyver, abi, arch
252
+
253
+ @cached_property
254
+ def metadata(self):
255
+ pathname = os.path.join(self.dirname, self.filename)
256
+ name_ver = '%s-%s' % (self.name, self.version)
257
+ info_dir = '%s.dist-info' % name_ver
258
+ wrapper = codecs.getreader('utf-8')
259
+ with ZipFile(pathname, 'r') as zf:
260
+ self.get_wheel_metadata(zf)
261
+ # wv = wheel_metadata['Wheel-Version'].split('.', 1)
262
+ # file_version = tuple([int(i) for i in wv])
263
+ # if file_version < (1, 1):
264
+ # fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME,
265
+ # LEGACY_METADATA_FILENAME]
266
+ # else:
267
+ # fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME]
268
+ fns = [WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME]
269
+ result = None
270
+ for fn in fns:
271
+ try:
272
+ metadata_filename = posixpath.join(info_dir, fn)
273
+ with zf.open(metadata_filename) as bf:
274
+ wf = wrapper(bf)
275
+ result = Metadata(fileobj=wf)
276
+ if result:
277
+ break
278
+ except KeyError:
279
+ pass
280
+ if not result:
281
+ raise ValueError('Invalid wheel, because metadata is '
282
+ 'missing: looked in %s' % ', '.join(fns))
283
+ return result
284
+
285
+ def get_wheel_metadata(self, zf):
286
+ name_ver = '%s-%s' % (self.name, self.version)
287
+ info_dir = '%s.dist-info' % name_ver
288
+ metadata_filename = posixpath.join(info_dir, 'WHEEL')
289
+ with zf.open(metadata_filename) as bf:
290
+ wf = codecs.getreader('utf-8')(bf)
291
+ message = message_from_file(wf)
292
+ return dict(message)
293
+
294
+ @cached_property
295
+ def info(self):
296
+ pathname = os.path.join(self.dirname, self.filename)
297
+ with ZipFile(pathname, 'r') as zf:
298
+ result = self.get_wheel_metadata(zf)
299
+ return result
300
+
301
+ def process_shebang(self, data):
302
+ m = SHEBANG_RE.match(data)
303
+ if m:
304
+ end = m.end()
305
+ shebang, data_after_shebang = data[:end], data[end:]
306
+ # Preserve any arguments after the interpreter
307
+ if b'pythonw' in shebang.lower():
308
+ shebang_python = SHEBANG_PYTHONW
309
+ else:
310
+ shebang_python = SHEBANG_PYTHON
311
+ m = SHEBANG_DETAIL_RE.match(shebang)
312
+ if m:
313
+ args = b' ' + m.groups()[-1]
314
+ else:
315
+ args = b''
316
+ shebang = shebang_python + args
317
+ data = shebang + data_after_shebang
318
+ else:
319
+ cr = data.find(b'\r')
320
+ lf = data.find(b'\n')
321
+ if cr < 0 or cr > lf:
322
+ term = b'\n'
323
+ else:
324
+ if data[cr:cr + 2] == b'\r\n':
325
+ term = b'\r\n'
326
+ else:
327
+ term = b'\r'
328
+ data = SHEBANG_PYTHON + term + data
329
+ return data
330
+
331
+ def get_hash(self, data, hash_kind=None):
332
+ if hash_kind is None:
333
+ hash_kind = self.hash_kind
334
+ try:
335
+ hasher = getattr(hashlib, hash_kind)
336
+ except AttributeError:
337
+ raise DistlibException('Unsupported hash algorithm: %r' %
338
+ hash_kind)
339
+ result = hasher(data).digest()
340
+ result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
341
+ return hash_kind, result
342
+
343
+ def write_record(self, records, record_path, archive_record_path):
344
+ records = list(records) # make a copy, as mutated
345
+ records.append((archive_record_path, '', ''))
346
+ with CSVWriter(record_path) as writer:
347
+ for row in records:
348
+ writer.writerow(row)
349
+
350
+ def write_records(self, info, libdir, archive_paths):
351
+ records = []
352
+ distinfo, info_dir = info
353
+ # hasher = getattr(hashlib, self.hash_kind)
354
+ for ap, p in archive_paths:
355
+ with open(p, 'rb') as f:
356
+ data = f.read()
357
+ digest = '%s=%s' % self.get_hash(data)
358
+ size = os.path.getsize(p)
359
+ records.append((ap, digest, size))
360
+
361
+ p = os.path.join(distinfo, 'RECORD')
362
+ ap = to_posix(os.path.join(info_dir, 'RECORD'))
363
+ self.write_record(records, p, ap)
364
+ archive_paths.append((ap, p))
365
+
366
+ def build_zip(self, pathname, archive_paths):
367
+ with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
368
+ for ap, p in archive_paths:
369
+ logger.debug('Wrote %s to %s in wheel', p, ap)
370
+ zf.write(p, ap)
371
+
372
+ def build(self, paths, tags=None, wheel_version=None):
373
+ """
374
+ Build a wheel from files in specified paths, and use any specified tags
375
+ when determining the name of the wheel.
376
+ """
377
+ if tags is None:
378
+ tags = {}
379
+
380
+ libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
381
+ if libkey == 'platlib':
382
+ is_pure = 'false'
383
+ default_pyver = [IMPVER]
384
+ default_abi = [ABI]
385
+ default_arch = [ARCH]
386
+ else:
387
+ is_pure = 'true'
388
+ default_pyver = [PYVER]
389
+ default_abi = ['none']
390
+ default_arch = ['any']
391
+
392
+ self.pyver = tags.get('pyver', default_pyver)
393
+ self.abi = tags.get('abi', default_abi)
394
+ self.arch = tags.get('arch', default_arch)
395
+
396
+ libdir = paths[libkey]
397
+
398
+ name_ver = '%s-%s' % (self.name, self.version)
399
+ data_dir = '%s.data' % name_ver
400
+ info_dir = '%s.dist-info' % name_ver
401
+
402
+ archive_paths = []
403
+
404
+ # First, stuff which is not in site-packages
405
+ for key in ('data', 'headers', 'scripts'):
406
+ if key not in paths:
407
+ continue
408
+ path = paths[key]
409
+ if os.path.isdir(path):
410
+ for root, dirs, files in os.walk(path):
411
+ for fn in files:
412
+ p = fsdecode(os.path.join(root, fn))
413
+ rp = os.path.relpath(p, path)
414
+ ap = to_posix(os.path.join(data_dir, key, rp))
415
+ archive_paths.append((ap, p))
416
+ if key == 'scripts' and not p.endswith('.exe'):
417
+ with open(p, 'rb') as f:
418
+ data = f.read()
419
+ data = self.process_shebang(data)
420
+ with open(p, 'wb') as f:
421
+ f.write(data)
422
+
423
+ # Now, stuff which is in site-packages, other than the
424
+ # distinfo stuff.
425
+ path = libdir
426
+ distinfo = None
427
+ for root, dirs, files in os.walk(path):
428
+ if root == path:
429
+ # At the top level only, save distinfo for later
430
+ # and skip it for now
431
+ for i, dn in enumerate(dirs):
432
+ dn = fsdecode(dn)
433
+ if dn.endswith('.dist-info'):
434
+ distinfo = os.path.join(root, dn)
435
+ del dirs[i]
436
+ break
437
+ assert distinfo, '.dist-info directory expected, not found'
438
+
439
+ for fn in files:
440
+ # comment out next suite to leave .pyc files in
441
+ if fsdecode(fn).endswith(('.pyc', '.pyo')):
442
+ continue
443
+ p = os.path.join(root, fn)
444
+ rp = to_posix(os.path.relpath(p, path))
445
+ archive_paths.append((rp, p))
446
+
447
+ # Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
448
+ files = os.listdir(distinfo)
449
+ for fn in files:
450
+ if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'):
451
+ p = fsdecode(os.path.join(distinfo, fn))
452
+ ap = to_posix(os.path.join(info_dir, fn))
453
+ archive_paths.append((ap, p))
454
+
455
+ wheel_metadata = [
456
+ 'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
457
+ 'Generator: distlib %s' % __version__,
458
+ 'Root-Is-Purelib: %s' % is_pure,
459
+ ]
460
+ for pyver, abi, arch in self.tags:
461
+ wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
462
+ p = os.path.join(distinfo, 'WHEEL')
463
+ with open(p, 'w') as f:
464
+ f.write('\n'.join(wheel_metadata))
465
+ ap = to_posix(os.path.join(info_dir, 'WHEEL'))
466
+ archive_paths.append((ap, p))
467
+
468
+ # sort the entries by archive path. Not needed by any spec, but it
469
+ # keeps the archive listing and RECORD tidier than they would otherwise
470
+ # be. Use the number of path segments to keep directory entries together,
471
+ # and keep the dist-info stuff at the end.
472
+ def sorter(t):
473
+ ap = t[0]
474
+ n = ap.count('/')
475
+ if '.dist-info' in ap:
476
+ n += 10000
477
+ return (n, ap)
478
+
479
+ archive_paths = sorted(archive_paths, key=sorter)
480
+
481
+ # Now, at last, RECORD.
482
+ # Paths in here are archive paths - nothing else makes sense.
483
+ self.write_records((distinfo, info_dir), libdir, archive_paths)
484
+ # Now, ready to build the zip file
485
+ pathname = os.path.join(self.dirname, self.filename)
486
+ self.build_zip(pathname, archive_paths)
487
+ return pathname
488
+
489
+ def skip_entry(self, arcname):
490
+ """
491
+ Determine whether an archive entry should be skipped when verifying
492
+ or installing.
493
+ """
494
+ # The signature file won't be in RECORD,
495
+ # and we don't currently don't do anything with it
496
+ # We also skip directories, as they won't be in RECORD
497
+ # either. See:
498
+ #
499
+ # https://github.com/pypa/wheel/issues/294
500
+ # https://github.com/pypa/wheel/issues/287
501
+ # https://github.com/pypa/wheel/pull/289
502
+ #
503
+ return arcname.endswith(('/', '/RECORD.jws'))
504
+
505
+ def install(self, paths, maker, **kwargs):
506
+ """
507
+ Install a wheel to the specified paths. If kwarg ``warner`` is
508
+ specified, it should be a callable, which will be called with two
509
+ tuples indicating the wheel version of this software and the wheel
510
+ version in the file, if there is a discrepancy in the versions.
511
+ This can be used to issue any warnings to raise any exceptions.
512
+ If kwarg ``lib_only`` is True, only the purelib/platlib files are
513
+ installed, and the headers, scripts, data and dist-info metadata are
514
+ not written. If kwarg ``bytecode_hashed_invalidation`` is True, written
515
+ bytecode will try to use file-hash based invalidation (PEP-552) on
516
+ supported interpreter versions (CPython 2.7+).
517
+
518
+ The return value is a :class:`InstalledDistribution` instance unless
519
+ ``options.lib_only`` is True, in which case the return value is ``None``.
520
+ """
521
+
522
+ dry_run = maker.dry_run
523
+ warner = kwargs.get('warner')
524
+ lib_only = kwargs.get('lib_only', False)
525
+ bc_hashed_invalidation = kwargs.get('bytecode_hashed_invalidation',
526
+ False)
527
+
528
+ pathname = os.path.join(self.dirname, self.filename)
529
+ name_ver = '%s-%s' % (self.name, self.version)
530
+ data_dir = '%s.data' % name_ver
531
+ info_dir = '%s.dist-info' % name_ver
532
+
533
+ metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME)
534
+ wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
535
+ record_name = posixpath.join(info_dir, 'RECORD')
536
+
537
+ wrapper = codecs.getreader('utf-8')
538
+
539
+ with ZipFile(pathname, 'r') as zf:
540
+ with zf.open(wheel_metadata_name) as bwf:
541
+ wf = wrapper(bwf)
542
+ message = message_from_file(wf)
543
+ wv = message['Wheel-Version'].split('.', 1)
544
+ file_version = tuple([int(i) for i in wv])
545
+ if (file_version != self.wheel_version) and warner:
546
+ warner(self.wheel_version, file_version)
547
+
548
+ if message['Root-Is-Purelib'] == 'true':
549
+ libdir = paths['purelib']
550
+ else:
551
+ libdir = paths['platlib']
552
+
553
+ records = {}
554
+ with zf.open(record_name) as bf:
555
+ with CSVReader(stream=bf) as reader:
556
+ for row in reader:
557
+ p = row[0]
558
+ records[p] = row
559
+
560
+ data_pfx = posixpath.join(data_dir, '')
561
+ info_pfx = posixpath.join(info_dir, '')
562
+ script_pfx = posixpath.join(data_dir, 'scripts', '')
563
+
564
+ # make a new instance rather than a copy of maker's,
565
+ # as we mutate it
566
+ fileop = FileOperator(dry_run=dry_run)
567
+ fileop.record = True # so we can rollback if needed
568
+
569
+ bc = not sys.dont_write_bytecode # Double negatives. Lovely!
570
+
571
+ outfiles = [] # for RECORD writing
572
+
573
+ # for script copying/shebang processing
574
+ workdir = tempfile.mkdtemp()
575
+ # set target dir later
576
+ # we default add_launchers to False, as the
577
+ # Python Launcher should be used instead
578
+ maker.source_dir = workdir
579
+ maker.target_dir = None
580
+ try:
581
+ for zinfo in zf.infolist():
582
+ arcname = zinfo.filename
583
+ if isinstance(arcname, text_type):
584
+ u_arcname = arcname
585
+ else:
586
+ u_arcname = arcname.decode('utf-8')
587
+ if self.skip_entry(u_arcname):
588
+ continue
589
+ row = records[u_arcname]
590
+ if row[2] and str(zinfo.file_size) != row[2]:
591
+ raise DistlibException('size mismatch for '
592
+ '%s' % u_arcname)
593
+ if row[1]:
594
+ kind, value = row[1].split('=', 1)
595
+ with zf.open(arcname) as bf:
596
+ data = bf.read()
597
+ _, digest = self.get_hash(data, kind)
598
+ if digest != value:
599
+ raise DistlibException('digest mismatch for '
600
+ '%s' % arcname)
601
+
602
+ if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
603
+ logger.debug('lib_only: skipping %s', u_arcname)
604
+ continue
605
+ is_script = (u_arcname.startswith(script_pfx)
606
+ and not u_arcname.endswith('.exe'))
607
+
608
+ if u_arcname.startswith(data_pfx):
609
+ _, where, rp = u_arcname.split('/', 2)
610
+ outfile = os.path.join(paths[where], convert_path(rp))
611
+ else:
612
+ # meant for site-packages.
613
+ if u_arcname in (wheel_metadata_name, record_name):
614
+ continue
615
+ outfile = os.path.join(libdir, convert_path(u_arcname))
616
+ if not is_script:
617
+ with zf.open(arcname) as bf:
618
+ fileop.copy_stream(bf, outfile)
619
+ # Issue #147: permission bits aren't preserved. Using
620
+ # zf.extract(zinfo, libdir) should have worked, but didn't,
621
+ # see https://www.thetopsites.net/article/53834422.shtml
622
+ # So ... manually preserve permission bits as given in zinfo
623
+ if os.name == 'posix':
624
+ # just set the normal permission bits
625
+ os.chmod(outfile,
626
+ (zinfo.external_attr >> 16) & 0x1FF)
627
+ outfiles.append(outfile)
628
+ # Double check the digest of the written file
629
+ if not dry_run and row[1]:
630
+ with open(outfile, 'rb') as bf:
631
+ data = bf.read()
632
+ _, newdigest = self.get_hash(data, kind)
633
+ if newdigest != digest:
634
+ raise DistlibException('digest mismatch '
635
+ 'on write for '
636
+ '%s' % outfile)
637
+ if bc and outfile.endswith('.py'):
638
+ try:
639
+ pyc = fileop.byte_compile(
640
+ outfile,
641
+ hashed_invalidation=bc_hashed_invalidation)
642
+ outfiles.append(pyc)
643
+ except Exception:
644
+ # Don't give up if byte-compilation fails,
645
+ # but log it and perhaps warn the user
646
+ logger.warning('Byte-compilation failed',
647
+ exc_info=True)
648
+ else:
649
+ fn = os.path.basename(convert_path(arcname))
650
+ workname = os.path.join(workdir, fn)
651
+ with zf.open(arcname) as bf:
652
+ fileop.copy_stream(bf, workname)
653
+
654
+ dn, fn = os.path.split(outfile)
655
+ maker.target_dir = dn
656
+ filenames = maker.make(fn)
657
+ fileop.set_executable_mode(filenames)
658
+ outfiles.extend(filenames)
659
+
660
+ if lib_only:
661
+ logger.debug('lib_only: returning None')
662
+ dist = None
663
+ else:
664
+ # Generate scripts
665
+
666
+ # Try to get pydist.json so we can see if there are
667
+ # any commands to generate. If this fails (e.g. because
668
+ # of a legacy wheel), log a warning but don't give up.
669
+ commands = None
670
+ file_version = self.info['Wheel-Version']
671
+ if file_version == '1.0':
672
+ # Use legacy info
673
+ ep = posixpath.join(info_dir, 'entry_points.txt')
674
+ try:
675
+ with zf.open(ep) as bwf:
676
+ epdata = read_exports(bwf)
677
+ commands = {}
678
+ for key in ('console', 'gui'):
679
+ k = '%s_scripts' % key
680
+ if k in epdata:
681
+ commands['wrap_%s' % key] = d = {}
682
+ for v in epdata[k].values():
683
+ s = '%s:%s' % (v.prefix, v.suffix)
684
+ if v.flags:
685
+ s += ' [%s]' % ','.join(v.flags)
686
+ d[v.name] = s
687
+ except Exception:
688
+ logger.warning('Unable to read legacy script '
689
+ 'metadata, so cannot generate '
690
+ 'scripts')
691
+ else:
692
+ try:
693
+ with zf.open(metadata_name) as bwf:
694
+ wf = wrapper(bwf)
695
+ commands = json.load(wf).get('extensions')
696
+ if commands:
697
+ commands = commands.get('python.commands')
698
+ except Exception:
699
+ logger.warning('Unable to read JSON metadata, so '
700
+ 'cannot generate scripts')
701
+ if commands:
702
+ console_scripts = commands.get('wrap_console', {})
703
+ gui_scripts = commands.get('wrap_gui', {})
704
+ if console_scripts or gui_scripts:
705
+ script_dir = paths.get('scripts', '')
706
+ if not os.path.isdir(script_dir):
707
+ raise ValueError('Valid script path not '
708
+ 'specified')
709
+ maker.target_dir = script_dir
710
+ for k, v in console_scripts.items():
711
+ script = '%s = %s' % (k, v)
712
+ filenames = maker.make(script)
713
+ fileop.set_executable_mode(filenames)
714
+
715
+ if gui_scripts:
716
+ options = {'gui': True}
717
+ for k, v in gui_scripts.items():
718
+ script = '%s = %s' % (k, v)
719
+ filenames = maker.make(script, options)
720
+ fileop.set_executable_mode(filenames)
721
+
722
+ p = os.path.join(libdir, info_dir)
723
+ dist = InstalledDistribution(p)
724
+
725
+ # Write SHARED
726
+ paths = dict(paths) # don't change passed in dict
727
+ del paths['purelib']
728
+ del paths['platlib']
729
+ paths['lib'] = libdir
730
+ p = dist.write_shared_locations(paths, dry_run)
731
+ if p:
732
+ outfiles.append(p)
733
+
734
+ # Write RECORD
735
+ dist.write_installed_files(outfiles, paths['prefix'],
736
+ dry_run)
737
+ return dist
738
+ except Exception: # pragma: no cover
739
+ logger.exception('installation failed.')
740
+ fileop.rollback()
741
+ raise
742
+ finally:
743
+ shutil.rmtree(workdir)
744
+
745
+ def _get_dylib_cache(self):
746
+ global cache
747
+ if cache is None:
748
+ # Use native string to avoid issues on 2.x: see Python #20140.
749
+ base = os.path.join(get_cache_base(), str('dylib-cache'),
750
+ '%s.%s' % sys.version_info[:2])
751
+ cache = Cache(base)
752
+ return cache
753
+
754
+ def _get_extensions(self):
755
+ pathname = os.path.join(self.dirname, self.filename)
756
+ name_ver = '%s-%s' % (self.name, self.version)
757
+ info_dir = '%s.dist-info' % name_ver
758
+ arcname = posixpath.join(info_dir, 'EXTENSIONS')
759
+ wrapper = codecs.getreader('utf-8')
760
+ result = []
761
+ with ZipFile(pathname, 'r') as zf:
762
+ try:
763
+ with zf.open(arcname) as bf:
764
+ wf = wrapper(bf)
765
+ extensions = json.load(wf)
766
+ cache = self._get_dylib_cache()
767
+ prefix = cache.prefix_to_dir(pathname)
768
+ cache_base = os.path.join(cache.base, prefix)
769
+ if not os.path.isdir(cache_base):
770
+ os.makedirs(cache_base)
771
+ for name, relpath in extensions.items():
772
+ dest = os.path.join(cache_base, convert_path(relpath))
773
+ if not os.path.exists(dest):
774
+ extract = True
775
+ else:
776
+ file_time = os.stat(dest).st_mtime
777
+ file_time = datetime.datetime.fromtimestamp(
778
+ file_time)
779
+ info = zf.getinfo(relpath)
780
+ wheel_time = datetime.datetime(*info.date_time)
781
+ extract = wheel_time > file_time
782
+ if extract:
783
+ zf.extract(relpath, cache_base)
784
+ result.append((name, dest))
785
+ except KeyError:
786
+ pass
787
+ return result
788
+
789
+ def is_compatible(self):
790
+ """
791
+ Determine if a wheel is compatible with the running system.
792
+ """
793
+ return is_compatible(self)
794
+
795
+ def is_mountable(self):
796
+ """
797
+ Determine if a wheel is asserted as mountable by its metadata.
798
+ """
799
+ return True # for now - metadata details TBD
800
+
801
+ def mount(self, append=False):
802
+ pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
803
+ if not self.is_compatible():
804
+ msg = 'Wheel %s not compatible with this Python.' % pathname
805
+ raise DistlibException(msg)
806
+ if not self.is_mountable():
807
+ msg = 'Wheel %s is marked as not mountable.' % pathname
808
+ raise DistlibException(msg)
809
+ if pathname in sys.path:
810
+ logger.debug('%s already in path', pathname)
811
+ else:
812
+ if append:
813
+ sys.path.append(pathname)
814
+ else:
815
+ sys.path.insert(0, pathname)
816
+ extensions = self._get_extensions()
817
+ if extensions:
818
+ if _hook not in sys.meta_path:
819
+ sys.meta_path.append(_hook)
820
+ _hook.add(pathname, extensions)
821
+
822
+ def unmount(self):
823
+ pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
824
+ if pathname not in sys.path:
825
+ logger.debug('%s not in path', pathname)
826
+ else:
827
+ sys.path.remove(pathname)
828
+ if pathname in _hook.impure_wheels:
829
+ _hook.remove(pathname)
830
+ if not _hook.impure_wheels:
831
+ if _hook in sys.meta_path:
832
+ sys.meta_path.remove(_hook)
833
+
834
+ def verify(self):
835
+ pathname = os.path.join(self.dirname, self.filename)
836
+ name_ver = '%s-%s' % (self.name, self.version)
837
+ # data_dir = '%s.data' % name_ver
838
+ info_dir = '%s.dist-info' % name_ver
839
+
840
+ # metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME)
841
+ wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
842
+ record_name = posixpath.join(info_dir, 'RECORD')
843
+
844
+ wrapper = codecs.getreader('utf-8')
845
+
846
+ with ZipFile(pathname, 'r') as zf:
847
+ with zf.open(wheel_metadata_name) as bwf:
848
+ wf = wrapper(bwf)
849
+ message_from_file(wf)
850
+ # wv = message['Wheel-Version'].split('.', 1)
851
+ # file_version = tuple([int(i) for i in wv])
852
+ # TODO version verification
853
+
854
+ records = {}
855
+ with zf.open(record_name) as bf:
856
+ with CSVReader(stream=bf) as reader:
857
+ for row in reader:
858
+ p = row[0]
859
+ records[p] = row
860
+
861
+ for zinfo in zf.infolist():
862
+ arcname = zinfo.filename
863
+ if isinstance(arcname, text_type):
864
+ u_arcname = arcname
865
+ else:
866
+ u_arcname = arcname.decode('utf-8')
867
+ # See issue #115: some wheels have .. in their entries, but
868
+ # in the filename ... e.g. __main__..py ! So the check is
869
+ # updated to look for .. in the directory portions
870
+ p = u_arcname.split('/')
871
+ if '..' in p:
872
+ raise DistlibException('invalid entry in '
873
+ 'wheel: %r' % u_arcname)
874
+
875
+ if self.skip_entry(u_arcname):
876
+ continue
877
+ row = records[u_arcname]
878
+ if row[2] and str(zinfo.file_size) != row[2]:
879
+ raise DistlibException('size mismatch for '
880
+ '%s' % u_arcname)
881
+ if row[1]:
882
+ kind, value = row[1].split('=', 1)
883
+ with zf.open(arcname) as bf:
884
+ data = bf.read()
885
+ _, digest = self.get_hash(data, kind)
886
+ if digest != value:
887
+ raise DistlibException('digest mismatch for '
888
+ '%s' % arcname)
889
+
890
+ def update(self, modifier, dest_dir=None, **kwargs):
891
+ """
892
+ Update the contents of a wheel in a generic way. The modifier should
893
+ be a callable which expects a dictionary argument: its keys are
894
+ archive-entry paths, and its values are absolute filesystem paths
895
+ where the contents the corresponding archive entries can be found. The
896
+ modifier is free to change the contents of the files pointed to, add
897
+ new entries and remove entries, before returning. This method will
898
+ extract the entire contents of the wheel to a temporary location, call
899
+ the modifier, and then use the passed (and possibly updated)
900
+ dictionary to write a new wheel. If ``dest_dir`` is specified, the new
901
+ wheel is written there -- otherwise, the original wheel is overwritten.
902
+
903
+ The modifier should return True if it updated the wheel, else False.
904
+ This method returns the same value the modifier returns.
905
+ """
906
+
907
+ def get_version(path_map, info_dir):
908
+ version = path = None
909
+ key = '%s/%s' % (info_dir, LEGACY_METADATA_FILENAME)
910
+ if key not in path_map:
911
+ key = '%s/PKG-INFO' % info_dir
912
+ if key in path_map:
913
+ path = path_map[key]
914
+ version = Metadata(path=path).version
915
+ return version, path
916
+
917
+ def update_version(version, path):
918
+ updated = None
919
+ try:
920
+ NormalizedVersion(version)
921
+ i = version.find('-')
922
+ if i < 0:
923
+ updated = '%s+1' % version
924
+ else:
925
+ parts = [int(s) for s in version[i + 1:].split('.')]
926
+ parts[-1] += 1
927
+ updated = '%s+%s' % (version[:i], '.'.join(
928
+ str(i) for i in parts))
929
+ except UnsupportedVersionError:
930
+ logger.debug(
931
+ 'Cannot update non-compliant (PEP-440) '
932
+ 'version %r', version)
933
+ if updated:
934
+ md = Metadata(path=path)
935
+ md.version = updated
936
+ legacy = path.endswith(LEGACY_METADATA_FILENAME)
937
+ md.write(path=path, legacy=legacy)
938
+ logger.debug('Version updated from %r to %r', version, updated)
939
+
940
+ pathname = os.path.join(self.dirname, self.filename)
941
+ name_ver = '%s-%s' % (self.name, self.version)
942
+ info_dir = '%s.dist-info' % name_ver
943
+ record_name = posixpath.join(info_dir, 'RECORD')
944
+ with tempdir() as workdir:
945
+ with ZipFile(pathname, 'r') as zf:
946
+ path_map = {}
947
+ for zinfo in zf.infolist():
948
+ arcname = zinfo.filename
949
+ if isinstance(arcname, text_type):
950
+ u_arcname = arcname
951
+ else:
952
+ u_arcname = arcname.decode('utf-8')
953
+ if u_arcname == record_name:
954
+ continue
955
+ if '..' in u_arcname:
956
+ raise DistlibException('invalid entry in '
957
+ 'wheel: %r' % u_arcname)
958
+ zf.extract(zinfo, workdir)
959
+ path = os.path.join(workdir, convert_path(u_arcname))
960
+ path_map[u_arcname] = path
961
+
962
+ # Remember the version.
963
+ original_version, _ = get_version(path_map, info_dir)
964
+ # Files extracted. Call the modifier.
965
+ modified = modifier(path_map, **kwargs)
966
+ if modified:
967
+ # Something changed - need to build a new wheel.
968
+ current_version, path = get_version(path_map, info_dir)
969
+ if current_version and (current_version == original_version):
970
+ # Add or update local version to signify changes.
971
+ update_version(current_version, path)
972
+ # Decide where the new wheel goes.
973
+ if dest_dir is None:
974
+ fd, newpath = tempfile.mkstemp(suffix='.whl',
975
+ prefix='wheel-update-',
976
+ dir=workdir)
977
+ os.close(fd)
978
+ else:
979
+ if not os.path.isdir(dest_dir):
980
+ raise DistlibException('Not a directory: %r' %
981
+ dest_dir)
982
+ newpath = os.path.join(dest_dir, self.filename)
983
+ archive_paths = list(path_map.items())
984
+ distinfo = os.path.join(workdir, info_dir)
985
+ info = distinfo, info_dir
986
+ self.write_records(info, workdir, archive_paths)
987
+ self.build_zip(newpath, archive_paths)
988
+ if dest_dir is None:
989
+ shutil.copyfile(newpath, pathname)
990
+ return modified
991
+
992
+
993
+ def _get_glibc_version():
994
+ import platform
995
+ ver = platform.libc_ver()
996
+ result = []
997
+ if ver[0] == 'glibc':
998
+ for s in ver[1].split('.'):
999
+ result.append(int(s) if s.isdigit() else 0)
1000
+ result = tuple(result)
1001
+ return result
1002
+
1003
+
1004
+ def compatible_tags():
1005
+ """
1006
+ Return (pyver, abi, arch) tuples compatible with this Python.
1007
+ """
1008
+ versions = [VER_SUFFIX]
1009
+ major = VER_SUFFIX[0]
1010
+ for minor in range(sys.version_info[1] - 1, -1, -1):
1011
+ versions.append(''.join([major, str(minor)]))
1012
+
1013
+ abis = []
1014
+ for suffix in _get_suffixes():
1015
+ if suffix.startswith('.abi'):
1016
+ abis.append(suffix.split('.', 2)[1])
1017
+ abis.sort()
1018
+ if ABI != 'none':
1019
+ abis.insert(0, ABI)
1020
+ abis.append('none')
1021
+ result = []
1022
+
1023
+ arches = [ARCH]
1024
+ if sys.platform == 'darwin':
1025
+ m = re.match(r'(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
1026
+ if m:
1027
+ name, major, minor, arch = m.groups()
1028
+ minor = int(minor)
1029
+ matches = [arch]
1030
+ if arch in ('i386', 'ppc'):
1031
+ matches.append('fat')
1032
+ if arch in ('i386', 'ppc', 'x86_64'):
1033
+ matches.append('fat3')
1034
+ if arch in ('ppc64', 'x86_64'):
1035
+ matches.append('fat64')
1036
+ if arch in ('i386', 'x86_64'):
1037
+ matches.append('intel')
1038
+ if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
1039
+ matches.append('universal')
1040
+ while minor >= 0:
1041
+ for match in matches:
1042
+ s = '%s_%s_%s_%s' % (name, major, minor, match)
1043
+ if s != ARCH: # already there
1044
+ arches.append(s)
1045
+ minor -= 1
1046
+
1047
+ # Most specific - our Python version, ABI and arch
1048
+ for abi in abis:
1049
+ for arch in arches:
1050
+ result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
1051
+ # manylinux
1052
+ if abi != 'none' and sys.platform.startswith('linux'):
1053
+ arch = arch.replace('linux_', '')
1054
+ parts = _get_glibc_version()
1055
+ if len(parts) == 2:
1056
+ if parts >= (2, 5):
1057
+ result.append((''.join((IMP_PREFIX, versions[0])), abi,
1058
+ 'manylinux1_%s' % arch))
1059
+ if parts >= (2, 12):
1060
+ result.append((''.join((IMP_PREFIX, versions[0])), abi,
1061
+ 'manylinux2010_%s' % arch))
1062
+ if parts >= (2, 17):
1063
+ result.append((''.join((IMP_PREFIX, versions[0])), abi,
1064
+ 'manylinux2014_%s' % arch))
1065
+ result.append(
1066
+ (''.join((IMP_PREFIX, versions[0])), abi,
1067
+ 'manylinux_%s_%s_%s' % (parts[0], parts[1], arch)))
1068
+
1069
+ # where no ABI / arch dependency, but IMP_PREFIX dependency
1070
+ for i, version in enumerate(versions):
1071
+ result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
1072
+ if i == 0:
1073
+ result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
1074
+
1075
+ # no IMP_PREFIX, ABI or arch dependency
1076
+ for i, version in enumerate(versions):
1077
+ result.append((''.join(('py', version)), 'none', 'any'))
1078
+ if i == 0:
1079
+ result.append((''.join(('py', version[0])), 'none', 'any'))
1080
+
1081
+ return set(result)
1082
+
1083
+
1084
+ COMPATIBLE_TAGS = compatible_tags()
1085
+
1086
+ del compatible_tags
1087
+
1088
+
1089
+ def is_compatible(wheel, tags=None):
1090
+ if not isinstance(wheel, Wheel):
1091
+ wheel = Wheel(wheel) # assume it's a filename
1092
+ result = False
1093
+ if tags is None:
1094
+ tags = COMPATIBLE_TAGS
1095
+ for ver, abi, arch in tags:
1096
+ if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
1097
+ result = True
1098
+ break
1099
+ return result
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/__init__.py ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # module pyparsing.py
2
+ #
3
+ # Copyright (c) 2003-2022 Paul T. McGuire
4
+ #
5
+ # Permission is hereby granted, free of charge, to any person obtaining
6
+ # a copy of this software and associated documentation files (the
7
+ # "Software"), to deal in the Software without restriction, including
8
+ # without limitation the rights to use, copy, modify, merge, publish,
9
+ # distribute, sublicense, and/or sell copies of the Software, and to
10
+ # permit persons to whom the Software is furnished to do so, subject to
11
+ # the following conditions:
12
+ #
13
+ # The above copyright notice and this permission notice shall be
14
+ # included in all copies or substantial portions of the Software.
15
+ #
16
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
+ # IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20
+ # CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
+ # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
+ # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
+ #
24
+
25
+ __doc__ = """
26
+ pyparsing module - Classes and methods to define and execute parsing grammars
27
+ =============================================================================
28
+
29
+ The pyparsing module is an alternative approach to creating and
30
+ executing simple grammars, vs. the traditional lex/yacc approach, or the
31
+ use of regular expressions. With pyparsing, you don't need to learn
32
+ a new syntax for defining grammars or matching expressions - the parsing
33
+ module provides a library of classes that you use to construct the
34
+ grammar directly in Python.
35
+
36
+ Here is a program to parse "Hello, World!" (or any greeting of the form
37
+ ``"<salutation>, <addressee>!"``), built up using :class:`Word`,
38
+ :class:`Literal`, and :class:`And` elements
39
+ (the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
40
+ and the strings are auto-converted to :class:`Literal` expressions)::
41
+
42
+ from pip._vendor.pyparsing import Word, alphas
43
+
44
+ # define grammar of a greeting
45
+ greet = Word(alphas) + "," + Word(alphas) + "!"
46
+
47
+ hello = "Hello, World!"
48
+ print(hello, "->", greet.parse_string(hello))
49
+
50
+ The program outputs the following::
51
+
52
+ Hello, World! -> ['Hello', ',', 'World', '!']
53
+
54
+ The Python representation of the grammar is quite readable, owing to the
55
+ self-explanatory class names, and the use of :class:`'+'<And>`,
56
+ :class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
57
+
58
+ The :class:`ParseResults` object returned from
59
+ :class:`ParserElement.parse_string` can be
60
+ accessed as a nested list, a dictionary, or an object with named
61
+ attributes.
62
+
63
+ The pyparsing module handles some of the problems that are typically
64
+ vexing when writing text parsers:
65
+
66
+ - extra or missing whitespace (the above program will also handle
67
+ "Hello,World!", "Hello , World !", etc.)
68
+ - quoted strings
69
+ - embedded comments
70
+
71
+
72
+ Getting Started -
73
+ -----------------
74
+ Visit the classes :class:`ParserElement` and :class:`ParseResults` to
75
+ see the base classes that most other pyparsing
76
+ classes inherit from. Use the docstrings for examples of how to:
77
+
78
+ - construct literal match expressions from :class:`Literal` and
79
+ :class:`CaselessLiteral` classes
80
+ - construct character word-group expressions using the :class:`Word`
81
+ class
82
+ - see how to create repetitive expressions using :class:`ZeroOrMore`
83
+ and :class:`OneOrMore` classes
84
+ - use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
85
+ and :class:`'&'<Each>` operators to combine simple expressions into
86
+ more complex ones
87
+ - associate names with your parsed results using
88
+ :class:`ParserElement.set_results_name`
89
+ - access the parsed data, which is returned as a :class:`ParseResults`
90
+ object
91
+ - find some helpful expression short-cuts like :class:`DelimitedList`
92
+ and :class:`one_of`
93
+ - find more useful common expressions in the :class:`pyparsing_common`
94
+ namespace class
95
+ """
96
+ from typing import NamedTuple
97
+
98
+
99
+ class version_info(NamedTuple):
100
+ major: int
101
+ minor: int
102
+ micro: int
103
+ releaselevel: str
104
+ serial: int
105
+
106
+ @property
107
+ def __version__(self):
108
+ return (
109
+ f"{self.major}.{self.minor}.{self.micro}"
110
+ + (
111
+ f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
112
+ "",
113
+ )[self.releaselevel == "final"]
114
+ )
115
+
116
+ def __str__(self):
117
+ return f"{__name__} {self.__version__} / {__version_time__}"
118
+
119
+ def __repr__(self):
120
+ return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
121
+
122
+
123
+ __version_info__ = version_info(3, 1, 0, "final", 1)
124
+ __version_time__ = "18 Jun 2023 14:05 UTC"
125
+ __version__ = __version_info__.__version__
126
+ __versionTime__ = __version_time__
127
+ __author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
128
+
129
+ from .util import *
130
+ from .exceptions import *
131
+ from .actions import *
132
+ from .core import __diag__, __compat__
133
+ from .results import *
134
+ from .core import * # type: ignore[misc, assignment]
135
+ from .core import _builtin_exprs as core_builtin_exprs
136
+ from .helpers import * # type: ignore[misc, assignment]
137
+ from .helpers import _builtin_exprs as helper_builtin_exprs
138
+
139
+ from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
140
+ from .testing import pyparsing_test as testing
141
+ from .common import (
142
+ pyparsing_common as common,
143
+ _builtin_exprs as common_builtin_exprs,
144
+ )
145
+
146
+ # define backward compat synonyms
147
+ if "pyparsing_unicode" not in globals():
148
+ pyparsing_unicode = unicode # type: ignore[misc]
149
+ if "pyparsing_common" not in globals():
150
+ pyparsing_common = common # type: ignore[misc]
151
+ if "pyparsing_test" not in globals():
152
+ pyparsing_test = testing # type: ignore[misc]
153
+
154
+ core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
155
+
156
+
157
+ __all__ = [
158
+ "__version__",
159
+ "__version_time__",
160
+ "__author__",
161
+ "__compat__",
162
+ "__diag__",
163
+ "And",
164
+ "AtLineStart",
165
+ "AtStringStart",
166
+ "CaselessKeyword",
167
+ "CaselessLiteral",
168
+ "CharsNotIn",
169
+ "CloseMatch",
170
+ "Combine",
171
+ "DelimitedList",
172
+ "Dict",
173
+ "Each",
174
+ "Empty",
175
+ "FollowedBy",
176
+ "Forward",
177
+ "GoToColumn",
178
+ "Group",
179
+ "IndentedBlock",
180
+ "Keyword",
181
+ "LineEnd",
182
+ "LineStart",
183
+ "Literal",
184
+ "Located",
185
+ "PrecededBy",
186
+ "MatchFirst",
187
+ "NoMatch",
188
+ "NotAny",
189
+ "OneOrMore",
190
+ "OnlyOnce",
191
+ "OpAssoc",
192
+ "Opt",
193
+ "Optional",
194
+ "Or",
195
+ "ParseBaseException",
196
+ "ParseElementEnhance",
197
+ "ParseException",
198
+ "ParseExpression",
199
+ "ParseFatalException",
200
+ "ParseResults",
201
+ "ParseSyntaxException",
202
+ "ParserElement",
203
+ "PositionToken",
204
+ "QuotedString",
205
+ "RecursiveGrammarException",
206
+ "Regex",
207
+ "SkipTo",
208
+ "StringEnd",
209
+ "StringStart",
210
+ "Suppress",
211
+ "Token",
212
+ "TokenConverter",
213
+ "White",
214
+ "Word",
215
+ "WordEnd",
216
+ "WordStart",
217
+ "ZeroOrMore",
218
+ "Char",
219
+ "alphanums",
220
+ "alphas",
221
+ "alphas8bit",
222
+ "any_close_tag",
223
+ "any_open_tag",
224
+ "autoname_elements",
225
+ "c_style_comment",
226
+ "col",
227
+ "common_html_entity",
228
+ "condition_as_parse_action",
229
+ "counted_array",
230
+ "cpp_style_comment",
231
+ "dbl_quoted_string",
232
+ "dbl_slash_comment",
233
+ "delimited_list",
234
+ "dict_of",
235
+ "empty",
236
+ "hexnums",
237
+ "html_comment",
238
+ "identchars",
239
+ "identbodychars",
240
+ "infix_notation",
241
+ "java_style_comment",
242
+ "line",
243
+ "line_end",
244
+ "line_start",
245
+ "lineno",
246
+ "make_html_tags",
247
+ "make_xml_tags",
248
+ "match_only_at_col",
249
+ "match_previous_expr",
250
+ "match_previous_literal",
251
+ "nested_expr",
252
+ "null_debug_action",
253
+ "nums",
254
+ "one_of",
255
+ "original_text_for",
256
+ "printables",
257
+ "punc8bit",
258
+ "pyparsing_common",
259
+ "pyparsing_test",
260
+ "pyparsing_unicode",
261
+ "python_style_comment",
262
+ "quoted_string",
263
+ "remove_quotes",
264
+ "replace_with",
265
+ "replace_html_entity",
266
+ "rest_of_line",
267
+ "sgl_quoted_string",
268
+ "srange",
269
+ "string_end",
270
+ "string_start",
271
+ "token_map",
272
+ "trace_parse_action",
273
+ "ungroup",
274
+ "unicode_set",
275
+ "unicode_string",
276
+ "with_attribute",
277
+ "with_class",
278
+ # pre-PEP8 compatibility names
279
+ "__versionTime__",
280
+ "anyCloseTag",
281
+ "anyOpenTag",
282
+ "cStyleComment",
283
+ "commonHTMLEntity",
284
+ "conditionAsParseAction",
285
+ "countedArray",
286
+ "cppStyleComment",
287
+ "dblQuotedString",
288
+ "dblSlashComment",
289
+ "delimitedList",
290
+ "dictOf",
291
+ "htmlComment",
292
+ "indentedBlock",
293
+ "infixNotation",
294
+ "javaStyleComment",
295
+ "lineEnd",
296
+ "lineStart",
297
+ "locatedExpr",
298
+ "makeHTMLTags",
299
+ "makeXMLTags",
300
+ "matchOnlyAtCol",
301
+ "matchPreviousExpr",
302
+ "matchPreviousLiteral",
303
+ "nestedExpr",
304
+ "nullDebugAction",
305
+ "oneOf",
306
+ "opAssoc",
307
+ "originalTextFor",
308
+ "pythonStyleComment",
309
+ "quotedString",
310
+ "removeQuotes",
311
+ "replaceHTMLEntity",
312
+ "replaceWith",
313
+ "restOfLine",
314
+ "sglQuotedString",
315
+ "stringEnd",
316
+ "stringStart",
317
+ "tokenMap",
318
+ "traceParseAction",
319
+ "unicodeString",
320
+ "withAttribute",
321
+ "withClass",
322
+ ]
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/actions.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # actions.py
2
+
3
+ from .exceptions import ParseException
4
+ from .util import col, replaced_by_pep8
5
+
6
+
7
+ class OnlyOnce:
8
+ """
9
+ Wrapper for parse actions, to ensure they are only called once.
10
+ """
11
+
12
+ def __init__(self, method_call):
13
+ from .core import _trim_arity
14
+
15
+ self.callable = _trim_arity(method_call)
16
+ self.called = False
17
+
18
+ def __call__(self, s, l, t):
19
+ if not self.called:
20
+ results = self.callable(s, l, t)
21
+ self.called = True
22
+ return results
23
+ raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
24
+
25
+ def reset(self):
26
+ """
27
+ Allow the associated parse action to be called once more.
28
+ """
29
+
30
+ self.called = False
31
+
32
+
33
+ def match_only_at_col(n):
34
+ """
35
+ Helper method for defining parse actions that require matching at
36
+ a specific column in the input text.
37
+ """
38
+
39
+ def verify_col(strg, locn, toks):
40
+ if col(locn, strg) != n:
41
+ raise ParseException(strg, locn, f"matched token not at column {n}")
42
+
43
+ return verify_col
44
+
45
+
46
+ def replace_with(repl_str):
47
+ """
48
+ Helper method for common parse actions that simply return
49
+ a literal value. Especially useful when used with
50
+ :class:`transform_string<ParserElement.transform_string>` ().
51
+
52
+ Example::
53
+
54
+ num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
55
+ na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
56
+ term = na | num
57
+
58
+ term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
59
+ """
60
+ return lambda s, l, t: [repl_str]
61
+
62
+
63
+ def remove_quotes(s, l, t):
64
+ """
65
+ Helper parse action for removing quotation marks from parsed
66
+ quoted strings.
67
+
68
+ Example::
69
+
70
+ # by default, quotation marks are included in parsed results
71
+ quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
72
+
73
+ # use remove_quotes to strip quotation marks from parsed results
74
+ quoted_string.set_parse_action(remove_quotes)
75
+ quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
76
+ """
77
+ return t[0][1:-1]
78
+
79
+
80
+ def with_attribute(*args, **attr_dict):
81
+ """
82
+ Helper to create a validating parse action to be used with start
83
+ tags created with :class:`make_xml_tags` or
84
+ :class:`make_html_tags`. Use ``with_attribute`` to qualify
85
+ a starting tag with a required attribute value, to avoid false
86
+ matches on common tags such as ``<TD>`` or ``<DIV>``.
87
+
88
+ Call ``with_attribute`` with a series of attribute names and
89
+ values. Specify the list of filter attributes names and values as:
90
+
91
+ - keyword arguments, as in ``(align="right")``, or
92
+ - as an explicit dict with ``**`` operator, when an attribute
93
+ name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
94
+ - a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
95
+
96
+ For attribute names with a namespace prefix, you must use the second
97
+ form. Attribute names are matched insensitive to upper/lower case.
98
+
99
+ If just testing for ``class`` (with or without a namespace), use
100
+ :class:`with_class`.
101
+
102
+ To verify that the attribute exists, but without specifying a value,
103
+ pass ``with_attribute.ANY_VALUE`` as the value.
104
+
105
+ Example::
106
+
107
+ html = '''
108
+ <div>
109
+ Some text
110
+ <div type="grid">1 4 0 1 0</div>
111
+ <div type="graph">1,3 2,3 1,1</div>
112
+ <div>this has no type</div>
113
+ </div>
114
+
115
+ '''
116
+ div,div_end = make_html_tags("div")
117
+
118
+ # only match div tag having a type attribute with value "grid"
119
+ div_grid = div().set_parse_action(with_attribute(type="grid"))
120
+ grid_expr = div_grid + SkipTo(div | div_end)("body")
121
+ for grid_header in grid_expr.search_string(html):
122
+ print(grid_header.body)
123
+
124
+ # construct a match with any div tag having a type attribute, regardless of the value
125
+ div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
126
+ div_expr = div_any_type + SkipTo(div | div_end)("body")
127
+ for div_header in div_expr.search_string(html):
128
+ print(div_header.body)
129
+
130
+ prints::
131
+
132
+ 1 4 0 1 0
133
+
134
+ 1 4 0 1 0
135
+ 1,3 2,3 1,1
136
+ """
137
+ if args:
138
+ attrs = args[:]
139
+ else:
140
+ attrs = attr_dict.items()
141
+ attrs = [(k, v) for k, v in attrs]
142
+
143
+ def pa(s, l, tokens):
144
+ for attrName, attrValue in attrs:
145
+ if attrName not in tokens:
146
+ raise ParseException(s, l, "no matching attribute " + attrName)
147
+ if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
148
+ raise ParseException(
149
+ s,
150
+ l,
151
+ f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
152
+ )
153
+
154
+ return pa
155
+
156
+
157
+ with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
158
+
159
+
160
+ def with_class(classname, namespace=""):
161
+ """
162
+ Simplified version of :class:`with_attribute` when
163
+ matching on a div class - made difficult because ``class`` is
164
+ a reserved word in Python.
165
+
166
+ Example::
167
+
168
+ html = '''
169
+ <div>
170
+ Some text
171
+ <div class="grid">1 4 0 1 0</div>
172
+ <div class="graph">1,3 2,3 1,1</div>
173
+ <div>this &lt;div&gt; has no class</div>
174
+ </div>
175
+
176
+ '''
177
+ div,div_end = make_html_tags("div")
178
+ div_grid = div().set_parse_action(with_class("grid"))
179
+
180
+ grid_expr = div_grid + SkipTo(div | div_end)("body")
181
+ for grid_header in grid_expr.search_string(html):
182
+ print(grid_header.body)
183
+
184
+ div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
185
+ div_expr = div_any_type + SkipTo(div | div_end)("body")
186
+ for div_header in div_expr.search_string(html):
187
+ print(div_header.body)
188
+
189
+ prints::
190
+
191
+ 1 4 0 1 0
192
+
193
+ 1 4 0 1 0
194
+ 1,3 2,3 1,1
195
+ """
196
+ classattr = f"{namespace}:class" if namespace else "class"
197
+ return with_attribute(**{classattr: classname})
198
+
199
+
200
+ # pre-PEP8 compatibility symbols
201
+ # fmt: off
202
+ @replaced_by_pep8(replace_with)
203
+ def replaceWith(): ...
204
+
205
+ @replaced_by_pep8(remove_quotes)
206
+ def removeQuotes(): ...
207
+
208
+ @replaced_by_pep8(with_attribute)
209
+ def withAttribute(): ...
210
+
211
+ @replaced_by_pep8(with_class)
212
+ def withClass(): ...
213
+
214
+ @replaced_by_pep8(match_only_at_col)
215
+ def matchOnlyAtCol(): ...
216
+
217
+ # fmt: on
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/common.py ADDED
@@ -0,0 +1,432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # common.py
2
+ from .core import *
3
+ from .helpers import DelimitedList, any_open_tag, any_close_tag
4
+ from datetime import datetime
5
+
6
+
7
+ # some other useful expressions - using lower-case class name since we are really using this as a namespace
8
+ class pyparsing_common:
9
+ """Here are some common low-level expressions that may be useful in
10
+ jump-starting parser development:
11
+
12
+ - numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
13
+ :class:`scientific notation<sci_real>`)
14
+ - common :class:`programming identifiers<identifier>`
15
+ - network addresses (:class:`MAC<mac_address>`,
16
+ :class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
17
+ - ISO8601 :class:`dates<iso8601_date>` and
18
+ :class:`datetime<iso8601_datetime>`
19
+ - :class:`UUID<uuid>`
20
+ - :class:`comma-separated list<comma_separated_list>`
21
+ - :class:`url`
22
+
23
+ Parse actions:
24
+
25
+ - :class:`convert_to_integer`
26
+ - :class:`convert_to_float`
27
+ - :class:`convert_to_date`
28
+ - :class:`convert_to_datetime`
29
+ - :class:`strip_html_tags`
30
+ - :class:`upcase_tokens`
31
+ - :class:`downcase_tokens`
32
+
33
+ Example::
34
+
35
+ pyparsing_common.number.run_tests('''
36
+ # any int or real number, returned as the appropriate type
37
+ 100
38
+ -100
39
+ +100
40
+ 3.14159
41
+ 6.02e23
42
+ 1e-12
43
+ ''')
44
+
45
+ pyparsing_common.fnumber.run_tests('''
46
+ # any int or real number, returned as float
47
+ 100
48
+ -100
49
+ +100
50
+ 3.14159
51
+ 6.02e23
52
+ 1e-12
53
+ ''')
54
+
55
+ pyparsing_common.hex_integer.run_tests('''
56
+ # hex numbers
57
+ 100
58
+ FF
59
+ ''')
60
+
61
+ pyparsing_common.fraction.run_tests('''
62
+ # fractions
63
+ 1/2
64
+ -3/4
65
+ ''')
66
+
67
+ pyparsing_common.mixed_integer.run_tests('''
68
+ # mixed fractions
69
+ 1
70
+ 1/2
71
+ -3/4
72
+ 1-3/4
73
+ ''')
74
+
75
+ import uuid
76
+ pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
77
+ pyparsing_common.uuid.run_tests('''
78
+ # uuid
79
+ 12345678-1234-5678-1234-567812345678
80
+ ''')
81
+
82
+ prints::
83
+
84
+ # any int or real number, returned as the appropriate type
85
+ 100
86
+ [100]
87
+
88
+ -100
89
+ [-100]
90
+
91
+ +100
92
+ [100]
93
+
94
+ 3.14159
95
+ [3.14159]
96
+
97
+ 6.02e23
98
+ [6.02e+23]
99
+
100
+ 1e-12
101
+ [1e-12]
102
+
103
+ # any int or real number, returned as float
104
+ 100
105
+ [100.0]
106
+
107
+ -100
108
+ [-100.0]
109
+
110
+ +100
111
+ [100.0]
112
+
113
+ 3.14159
114
+ [3.14159]
115
+
116
+ 6.02e23
117
+ [6.02e+23]
118
+
119
+ 1e-12
120
+ [1e-12]
121
+
122
+ # hex numbers
123
+ 100
124
+ [256]
125
+
126
+ FF
127
+ [255]
128
+
129
+ # fractions
130
+ 1/2
131
+ [0.5]
132
+
133
+ -3/4
134
+ [-0.75]
135
+
136
+ # mixed fractions
137
+ 1
138
+ [1]
139
+
140
+ 1/2
141
+ [0.5]
142
+
143
+ -3/4
144
+ [-0.75]
145
+
146
+ 1-3/4
147
+ [1.75]
148
+
149
+ # uuid
150
+ 12345678-1234-5678-1234-567812345678
151
+ [UUID('12345678-1234-5678-1234-567812345678')]
152
+ """
153
+
154
+ convert_to_integer = token_map(int)
155
+ """
156
+ Parse action for converting parsed integers to Python int
157
+ """
158
+
159
+ convert_to_float = token_map(float)
160
+ """
161
+ Parse action for converting parsed numbers to Python float
162
+ """
163
+
164
+ integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
165
+ """expression that parses an unsigned integer, returns an int"""
166
+
167
+ hex_integer = (
168
+ Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
169
+ )
170
+ """expression that parses a hexadecimal integer, returns an int"""
171
+
172
+ signed_integer = (
173
+ Regex(r"[+-]?\d+")
174
+ .set_name("signed integer")
175
+ .set_parse_action(convert_to_integer)
176
+ )
177
+ """expression that parses an integer with optional leading sign, returns an int"""
178
+
179
+ fraction = (
180
+ signed_integer().set_parse_action(convert_to_float)
181
+ + "/"
182
+ + signed_integer().set_parse_action(convert_to_float)
183
+ ).set_name("fraction")
184
+ """fractional expression of an integer divided by an integer, returns a float"""
185
+ fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
186
+
187
+ mixed_integer = (
188
+ fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
189
+ ).set_name("fraction or mixed integer-fraction")
190
+ """mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
191
+ mixed_integer.add_parse_action(sum)
192
+
193
+ real = (
194
+ Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
195
+ .set_name("real number")
196
+ .set_parse_action(convert_to_float)
197
+ )
198
+ """expression that parses a floating point number and returns a float"""
199
+
200
+ sci_real = (
201
+ Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
202
+ .set_name("real number with scientific notation")
203
+ .set_parse_action(convert_to_float)
204
+ )
205
+ """expression that parses a floating point number with optional
206
+ scientific notation and returns a float"""
207
+
208
+ # streamlining this expression makes the docs nicer-looking
209
+ number = (sci_real | real | signed_integer).setName("number").streamline()
210
+ """any numeric expression, returns the corresponding Python type"""
211
+
212
+ fnumber = (
213
+ Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
214
+ .set_name("fnumber")
215
+ .set_parse_action(convert_to_float)
216
+ )
217
+ """any int or real number, returned as float"""
218
+
219
+ identifier = Word(identchars, identbodychars).set_name("identifier")
220
+ """typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
221
+
222
+ ipv4_address = Regex(
223
+ r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
224
+ ).set_name("IPv4 address")
225
+ "IPv4 address (``0.0.0.0 - 255.255.255.255``)"
226
+
227
+ _ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
228
+ _full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
229
+ "full IPv6 address"
230
+ )
231
+ _short_ipv6_address = (
232
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
233
+ + "::"
234
+ + Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
235
+ ).set_name("short IPv6 address")
236
+ _short_ipv6_address.add_condition(
237
+ lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
238
+ )
239
+ _mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
240
+ ipv6_address = Combine(
241
+ (_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
242
+ "IPv6 address"
243
+ )
244
+ ).set_name("IPv6 address")
245
+ "IPv6 address (long, short, or mixed form)"
246
+
247
+ mac_address = Regex(
248
+ r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
249
+ ).set_name("MAC address")
250
+ "MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
251
+
252
+ @staticmethod
253
+ def convert_to_date(fmt: str = "%Y-%m-%d"):
254
+ """
255
+ Helper to create a parse action for converting parsed date string to Python datetime.date
256
+
257
+ Params -
258
+ - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
259
+
260
+ Example::
261
+
262
+ date_expr = pyparsing_common.iso8601_date.copy()
263
+ date_expr.set_parse_action(pyparsing_common.convert_to_date())
264
+ print(date_expr.parse_string("1999-12-31"))
265
+
266
+ prints::
267
+
268
+ [datetime.date(1999, 12, 31)]
269
+ """
270
+
271
+ def cvt_fn(ss, ll, tt):
272
+ try:
273
+ return datetime.strptime(tt[0], fmt).date()
274
+ except ValueError as ve:
275
+ raise ParseException(ss, ll, str(ve))
276
+
277
+ return cvt_fn
278
+
279
+ @staticmethod
280
+ def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
281
+ """Helper to create a parse action for converting parsed
282
+ datetime string to Python datetime.datetime
283
+
284
+ Params -
285
+ - fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
286
+
287
+ Example::
288
+
289
+ dt_expr = pyparsing_common.iso8601_datetime.copy()
290
+ dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
291
+ print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
292
+
293
+ prints::
294
+
295
+ [datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
296
+ """
297
+
298
+ def cvt_fn(s, l, t):
299
+ try:
300
+ return datetime.strptime(t[0], fmt)
301
+ except ValueError as ve:
302
+ raise ParseException(s, l, str(ve))
303
+
304
+ return cvt_fn
305
+
306
+ iso8601_date = Regex(
307
+ r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
308
+ ).set_name("ISO8601 date")
309
+ "ISO8601 date (``yyyy-mm-dd``)"
310
+
311
+ iso8601_datetime = Regex(
312
+ r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
313
+ ).set_name("ISO8601 datetime")
314
+ "ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
315
+
316
+ uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
317
+ "UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
318
+
319
+ _html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
320
+
321
+ @staticmethod
322
+ def strip_html_tags(s: str, l: int, tokens: ParseResults):
323
+ """Parse action to remove HTML tags from web page HTML source
324
+
325
+ Example::
326
+
327
+ # strip HTML links from normal text
328
+ text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
329
+ td, td_end = make_html_tags("TD")
330
+ table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end
331
+ print(table_text.parse_string(text).body)
332
+
333
+ Prints::
334
+
335
+ More info at the pyparsing wiki page
336
+ """
337
+ return pyparsing_common._html_stripper.transform_string(tokens[0])
338
+
339
+ _commasepitem = (
340
+ Combine(
341
+ OneOrMore(
342
+ ~Literal(",")
343
+ + ~LineEnd()
344
+ + Word(printables, exclude_chars=",")
345
+ + Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
346
+ )
347
+ )
348
+ .streamline()
349
+ .set_name("commaItem")
350
+ )
351
+ comma_separated_list = DelimitedList(
352
+ Opt(quoted_string.copy() | _commasepitem, default="")
353
+ ).set_name("comma separated list")
354
+ """Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
355
+
356
+ upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
357
+ """Parse action to convert tokens to upper case."""
358
+
359
+ downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
360
+ """Parse action to convert tokens to lower case."""
361
+
362
+ # fmt: off
363
+ url = Regex(
364
+ # https://mathiasbynens.be/demo/url-regex
365
+ # https://gist.github.com/dperini/729294
366
+ r"(?P<url>" +
367
+ # protocol identifier (optional)
368
+ # short syntax // still required
369
+ r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
370
+ # user:pass BasicAuth (optional)
371
+ r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
372
+ r"(?P<host>" +
373
+ # IP address exclusion
374
+ # private & local networks
375
+ r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
376
+ r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
377
+ r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
378
+ # IP address dotted notation octets
379
+ # excludes loopback network 0.0.0.0
380
+ # excludes reserved space >= 224.0.0.0
381
+ # excludes network & broadcast addresses
382
+ # (first & last IP address of each class)
383
+ r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
384
+ r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
385
+ r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
386
+ r"|" +
387
+ # host & domain names, may end with dot
388
+ # can be replaced by a shortest alternative
389
+ # (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
390
+ r"(?:" +
391
+ r"(?:" +
392
+ r"[a-z0-9\u00a1-\uffff]" +
393
+ r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
394
+ r")?" +
395
+ r"[a-z0-9\u00a1-\uffff]\." +
396
+ r")+" +
397
+ # TLD identifier name, may end with dot
398
+ r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
399
+ r")" +
400
+ # port number (optional)
401
+ r"(:(?P<port>\d{2,5}))?" +
402
+ # resource path (optional)
403
+ r"(?P<path>\/[^?# ]*)?" +
404
+ # query string (optional)
405
+ r"(\?(?P<query>[^#]*))?" +
406
+ # fragment (optional)
407
+ r"(#(?P<fragment>\S*))?" +
408
+ r")"
409
+ ).set_name("url")
410
+ """URL (http/https/ftp scheme)"""
411
+ # fmt: on
412
+
413
+ # pre-PEP8 compatibility names
414
+ convertToInteger = convert_to_integer
415
+ """Deprecated - use :class:`convert_to_integer`"""
416
+ convertToFloat = convert_to_float
417
+ """Deprecated - use :class:`convert_to_float`"""
418
+ convertToDate = convert_to_date
419
+ """Deprecated - use :class:`convert_to_date`"""
420
+ convertToDatetime = convert_to_datetime
421
+ """Deprecated - use :class:`convert_to_datetime`"""
422
+ stripHTMLTags = strip_html_tags
423
+ """Deprecated - use :class:`strip_html_tags`"""
424
+ upcaseTokens = upcase_tokens
425
+ """Deprecated - use :class:`upcase_tokens`"""
426
+ downcaseTokens = downcase_tokens
427
+ """Deprecated - use :class:`downcase_tokens`"""
428
+
429
+
430
+ _builtin_exprs = [
431
+ v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
432
+ ]
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/core.py ADDED
The diff for this file is too large to render. See raw diff
 
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/diagram/__init__.py ADDED
@@ -0,0 +1,656 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # mypy: ignore-errors
2
+ import railroad
3
+ from pip._vendor import pyparsing
4
+ import typing
5
+ from typing import (
6
+ List,
7
+ NamedTuple,
8
+ Generic,
9
+ TypeVar,
10
+ Dict,
11
+ Callable,
12
+ Set,
13
+ Iterable,
14
+ )
15
+ from jinja2 import Template
16
+ from io import StringIO
17
+ import inspect
18
+
19
+
20
+ jinja2_template_source = """\
21
+ {% if not embed %}
22
+ <!DOCTYPE html>
23
+ <html>
24
+ <head>
25
+ {% endif %}
26
+ {% if not head %}
27
+ <style>
28
+ .railroad-heading {
29
+ font-family: monospace;
30
+ }
31
+ </style>
32
+ {% else %}
33
+ {{ head | safe }}
34
+ {% endif %}
35
+ {% if not embed %}
36
+ </head>
37
+ <body>
38
+ {% endif %}
39
+ {{ body | safe }}
40
+ {% for diagram in diagrams %}
41
+ <div class="railroad-group">
42
+ <h1 class="railroad-heading">{{ diagram.title }}</h1>
43
+ <div class="railroad-description">{{ diagram.text }}</div>
44
+ <div class="railroad-svg">
45
+ {{ diagram.svg }}
46
+ </div>
47
+ </div>
48
+ {% endfor %}
49
+ {% if not embed %}
50
+ </body>
51
+ </html>
52
+ {% endif %}
53
+ """
54
+
55
+ template = Template(jinja2_template_source)
56
+
57
+ # Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
58
+ NamedDiagram = NamedTuple(
59
+ "NamedDiagram",
60
+ [("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
61
+ )
62
+ """
63
+ A simple structure for associating a name with a railroad diagram
64
+ """
65
+
66
+ T = TypeVar("T")
67
+
68
+
69
+ class EachItem(railroad.Group):
70
+ """
71
+ Custom railroad item to compose a:
72
+ - Group containing a
73
+ - OneOrMore containing a
74
+ - Choice of the elements in the Each
75
+ with the group label indicating that all must be matched
76
+ """
77
+
78
+ all_label = "[ALL]"
79
+
80
+ def __init__(self, *items):
81
+ choice_item = railroad.Choice(len(items) - 1, *items)
82
+ one_or_more_item = railroad.OneOrMore(item=choice_item)
83
+ super().__init__(one_or_more_item, label=self.all_label)
84
+
85
+
86
+ class AnnotatedItem(railroad.Group):
87
+ """
88
+ Simple subclass of Group that creates an annotation label
89
+ """
90
+
91
+ def __init__(self, label: str, item):
92
+ super().__init__(item=item, label="[{}]".format(label) if label else label)
93
+
94
+
95
+ class EditablePartial(Generic[T]):
96
+ """
97
+ Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
98
+ constructed.
99
+ """
100
+
101
+ # We need this here because the railroad constructors actually transform the data, so can't be called until the
102
+ # entire tree is assembled
103
+
104
+ def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
105
+ self.func = func
106
+ self.args = args
107
+ self.kwargs = kwargs
108
+
109
+ @classmethod
110
+ def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
111
+ """
112
+ If you call this function in the same way that you would call the constructor, it will store the arguments
113
+ as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
114
+ """
115
+ return EditablePartial(func=func, args=list(args), kwargs=kwargs)
116
+
117
+ @property
118
+ def name(self):
119
+ return self.kwargs["name"]
120
+
121
+ def __call__(self) -> T:
122
+ """
123
+ Evaluate the partial and return the result
124
+ """
125
+ args = self.args.copy()
126
+ kwargs = self.kwargs.copy()
127
+
128
+ # This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
129
+ # args=['list', 'of', 'things'])
130
+ arg_spec = inspect.getfullargspec(self.func)
131
+ if arg_spec.varargs in self.kwargs:
132
+ args += kwargs.pop(arg_spec.varargs)
133
+
134
+ return self.func(*args, **kwargs)
135
+
136
+
137
+ def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str:
138
+ """
139
+ Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
140
+ :params kwargs: kwargs to be passed in to the template
141
+ """
142
+ data = []
143
+ for diagram in diagrams:
144
+ if diagram.diagram is None:
145
+ continue
146
+ io = StringIO()
147
+ try:
148
+ css = kwargs.get('css')
149
+ diagram.diagram.writeStandalone(io.write, css=css)
150
+ except AttributeError:
151
+ diagram.diagram.writeSvg(io.write)
152
+ title = diagram.name
153
+ if diagram.index == 0:
154
+ title += " (root)"
155
+ data.append({"title": title, "text": "", "svg": io.getvalue()})
156
+
157
+ return template.render(diagrams=data, embed=embed, **kwargs)
158
+
159
+
160
+ def resolve_partial(partial: "EditablePartial[T]") -> T:
161
+ """
162
+ Recursively resolves a collection of Partials into whatever type they are
163
+ """
164
+ if isinstance(partial, EditablePartial):
165
+ partial.args = resolve_partial(partial.args)
166
+ partial.kwargs = resolve_partial(partial.kwargs)
167
+ return partial()
168
+ elif isinstance(partial, list):
169
+ return [resolve_partial(x) for x in partial]
170
+ elif isinstance(partial, dict):
171
+ return {key: resolve_partial(x) for key, x in partial.items()}
172
+ else:
173
+ return partial
174
+
175
+
176
+ def to_railroad(
177
+ element: pyparsing.ParserElement,
178
+ diagram_kwargs: typing.Optional[dict] = None,
179
+ vertical: int = 3,
180
+ show_results_names: bool = False,
181
+ show_groups: bool = False,
182
+ ) -> List[NamedDiagram]:
183
+ """
184
+ Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
185
+ creation if you want to access the Railroad tree before it is converted to HTML
186
+ :param element: base element of the parser being diagrammed
187
+ :param diagram_kwargs: kwargs to pass to the Diagram() constructor
188
+ :param vertical: (optional) - int - limit at which number of alternatives should be
189
+ shown vertically instead of horizontally
190
+ :param show_results_names - bool to indicate whether results name annotations should be
191
+ included in the diagram
192
+ :param show_groups - bool to indicate whether groups should be highlighted with an unlabeled
193
+ surrounding box
194
+ """
195
+ # Convert the whole tree underneath the root
196
+ lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
197
+ _to_diagram_element(
198
+ element,
199
+ lookup=lookup,
200
+ parent=None,
201
+ vertical=vertical,
202
+ show_results_names=show_results_names,
203
+ show_groups=show_groups,
204
+ )
205
+
206
+ root_id = id(element)
207
+ # Convert the root if it hasn't been already
208
+ if root_id in lookup:
209
+ if not element.customName:
210
+ lookup[root_id].name = ""
211
+ lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
212
+
213
+ # Now that we're finished, we can convert from intermediate structures into Railroad elements
214
+ diags = list(lookup.diagrams.values())
215
+ if len(diags) > 1:
216
+ # collapse out duplicate diags with the same name
217
+ seen = set()
218
+ deduped_diags = []
219
+ for d in diags:
220
+ # don't extract SkipTo elements, they are uninformative as subdiagrams
221
+ if d.name == "...":
222
+ continue
223
+ if d.name is not None and d.name not in seen:
224
+ seen.add(d.name)
225
+ deduped_diags.append(d)
226
+ resolved = [resolve_partial(partial) for partial in deduped_diags]
227
+ else:
228
+ # special case - if just one diagram, always display it, even if
229
+ # it has no name
230
+ resolved = [resolve_partial(partial) for partial in diags]
231
+ return sorted(resolved, key=lambda diag: diag.index)
232
+
233
+
234
+ def _should_vertical(
235
+ specification: int, exprs: Iterable[pyparsing.ParserElement]
236
+ ) -> bool:
237
+ """
238
+ Returns true if we should return a vertical list of elements
239
+ """
240
+ if specification is None:
241
+ return False
242
+ else:
243
+ return len(_visible_exprs(exprs)) >= specification
244
+
245
+
246
+ class ElementState:
247
+ """
248
+ State recorded for an individual pyparsing Element
249
+ """
250
+
251
+ # Note: this should be a dataclass, but we have to support Python 3.5
252
+ def __init__(
253
+ self,
254
+ element: pyparsing.ParserElement,
255
+ converted: EditablePartial,
256
+ parent: EditablePartial,
257
+ number: int,
258
+ name: str = None,
259
+ parent_index: typing.Optional[int] = None,
260
+ ):
261
+ #: The pyparsing element that this represents
262
+ self.element: pyparsing.ParserElement = element
263
+ #: The name of the element
264
+ self.name: typing.Optional[str] = name
265
+ #: The output Railroad element in an unconverted state
266
+ self.converted: EditablePartial = converted
267
+ #: The parent Railroad element, which we store so that we can extract this if it's duplicated
268
+ self.parent: EditablePartial = parent
269
+ #: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
270
+ self.number: int = number
271
+ #: The index of this inside its parent
272
+ self.parent_index: typing.Optional[int] = parent_index
273
+ #: If true, we should extract this out into a subdiagram
274
+ self.extract: bool = False
275
+ #: If true, all of this element's children have been filled out
276
+ self.complete: bool = False
277
+
278
+ def mark_for_extraction(
279
+ self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
280
+ ):
281
+ """
282
+ Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
283
+ :param el_id: id of the element
284
+ :param state: element/diagram state tracker
285
+ :param name: name to use for this element's text
286
+ :param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
287
+ root element when we know we're finished
288
+ """
289
+ self.extract = True
290
+
291
+ # Set the name
292
+ if not self.name:
293
+ if name:
294
+ # Allow forcing a custom name
295
+ self.name = name
296
+ elif self.element.customName:
297
+ self.name = self.element.customName
298
+ else:
299
+ self.name = ""
300
+
301
+ # Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
302
+ # to be added
303
+ # Also, if this is just a string literal etc, don't bother extracting it
304
+ if force or (self.complete and _worth_extracting(self.element)):
305
+ state.extract_into_diagram(el_id)
306
+
307
+
308
+ class ConverterState:
309
+ """
310
+ Stores some state that persists between recursions into the element tree
311
+ """
312
+
313
+ def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
314
+ #: A dictionary mapping ParserElements to state relating to them
315
+ self._element_diagram_states: Dict[int, ElementState] = {}
316
+ #: A dictionary mapping ParserElement IDs to subdiagrams generated from them
317
+ self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
318
+ #: The index of the next unnamed element
319
+ self.unnamed_index: int = 1
320
+ #: The index of the next element. This is used for sorting
321
+ self.index: int = 0
322
+ #: Shared kwargs that are used to customize the construction of diagrams
323
+ self.diagram_kwargs: dict = diagram_kwargs or {}
324
+ self.extracted_diagram_names: Set[str] = set()
325
+
326
+ def __setitem__(self, key: int, value: ElementState):
327
+ self._element_diagram_states[key] = value
328
+
329
+ def __getitem__(self, key: int) -> ElementState:
330
+ return self._element_diagram_states[key]
331
+
332
+ def __delitem__(self, key: int):
333
+ del self._element_diagram_states[key]
334
+
335
+ def __contains__(self, key: int):
336
+ return key in self._element_diagram_states
337
+
338
+ def generate_unnamed(self) -> int:
339
+ """
340
+ Generate a number used in the name of an otherwise unnamed diagram
341
+ """
342
+ self.unnamed_index += 1
343
+ return self.unnamed_index
344
+
345
+ def generate_index(self) -> int:
346
+ """
347
+ Generate a number used to index a diagram
348
+ """
349
+ self.index += 1
350
+ return self.index
351
+
352
+ def extract_into_diagram(self, el_id: int):
353
+ """
354
+ Used when we encounter the same token twice in the same tree. When this
355
+ happens, we replace all instances of that token with a terminal, and
356
+ create a new subdiagram for the token
357
+ """
358
+ position = self[el_id]
359
+
360
+ # Replace the original definition of this element with a regular block
361
+ if position.parent:
362
+ ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
363
+ if "item" in position.parent.kwargs:
364
+ position.parent.kwargs["item"] = ret
365
+ elif "items" in position.parent.kwargs:
366
+ position.parent.kwargs["items"][position.parent_index] = ret
367
+
368
+ # If the element we're extracting is a group, skip to its content but keep the title
369
+ if position.converted.func == railroad.Group:
370
+ content = position.converted.kwargs["item"]
371
+ else:
372
+ content = position.converted
373
+
374
+ self.diagrams[el_id] = EditablePartial.from_call(
375
+ NamedDiagram,
376
+ name=position.name,
377
+ diagram=EditablePartial.from_call(
378
+ railroad.Diagram, content, **self.diagram_kwargs
379
+ ),
380
+ index=position.number,
381
+ )
382
+
383
+ del self[el_id]
384
+
385
+
386
+ def _worth_extracting(element: pyparsing.ParserElement) -> bool:
387
+ """
388
+ Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
389
+ themselves have children, then its complex enough to extract
390
+ """
391
+ children = element.recurse()
392
+ return any(child.recurse() for child in children)
393
+
394
+
395
+ def _apply_diagram_item_enhancements(fn):
396
+ """
397
+ decorator to ensure enhancements to a diagram item (such as results name annotations)
398
+ get applied on return from _to_diagram_element (we do this since there are several
399
+ returns in _to_diagram_element)
400
+ """
401
+
402
+ def _inner(
403
+ element: pyparsing.ParserElement,
404
+ parent: typing.Optional[EditablePartial],
405
+ lookup: ConverterState = None,
406
+ vertical: int = None,
407
+ index: int = 0,
408
+ name_hint: str = None,
409
+ show_results_names: bool = False,
410
+ show_groups: bool = False,
411
+ ) -> typing.Optional[EditablePartial]:
412
+ ret = fn(
413
+ element,
414
+ parent,
415
+ lookup,
416
+ vertical,
417
+ index,
418
+ name_hint,
419
+ show_results_names,
420
+ show_groups,
421
+ )
422
+
423
+ # apply annotation for results name, if present
424
+ if show_results_names and ret is not None:
425
+ element_results_name = element.resultsName
426
+ if element_results_name:
427
+ # add "*" to indicate if this is a "list all results" name
428
+ element_results_name += "" if element.modalResults else "*"
429
+ ret = EditablePartial.from_call(
430
+ railroad.Group, item=ret, label=element_results_name
431
+ )
432
+
433
+ return ret
434
+
435
+ return _inner
436
+
437
+
438
+ def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
439
+ non_diagramming_exprs = (
440
+ pyparsing.ParseElementEnhance,
441
+ pyparsing.PositionToken,
442
+ pyparsing.And._ErrorStop,
443
+ )
444
+ return [
445
+ e
446
+ for e in exprs
447
+ if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
448
+ ]
449
+
450
+
451
+ @_apply_diagram_item_enhancements
452
+ def _to_diagram_element(
453
+ element: pyparsing.ParserElement,
454
+ parent: typing.Optional[EditablePartial],
455
+ lookup: ConverterState = None,
456
+ vertical: int = None,
457
+ index: int = 0,
458
+ name_hint: str = None,
459
+ show_results_names: bool = False,
460
+ show_groups: bool = False,
461
+ ) -> typing.Optional[EditablePartial]:
462
+ """
463
+ Recursively converts a PyParsing Element to a railroad Element
464
+ :param lookup: The shared converter state that keeps track of useful things
465
+ :param index: The index of this element within the parent
466
+ :param parent: The parent of this element in the output tree
467
+ :param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
468
+ it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
469
+ do so
470
+ :param name_hint: If provided, this will override the generated name
471
+ :param show_results_names: bool flag indicating whether to add annotations for results names
472
+ :returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
473
+ :param show_groups: bool flag indicating whether to show groups using bounding box
474
+ """
475
+ exprs = element.recurse()
476
+ name = name_hint or element.customName or element.__class__.__name__
477
+
478
+ # Python's id() is used to provide a unique identifier for elements
479
+ el_id = id(element)
480
+
481
+ element_results_name = element.resultsName
482
+
483
+ # Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
484
+ if not element.customName:
485
+ if isinstance(
486
+ element,
487
+ (
488
+ # pyparsing.TokenConverter,
489
+ # pyparsing.Forward,
490
+ pyparsing.Located,
491
+ ),
492
+ ):
493
+ # However, if this element has a useful custom name, and its child does not, we can pass it on to the child
494
+ if exprs:
495
+ if not exprs[0].customName:
496
+ propagated_name = name
497
+ else:
498
+ propagated_name = None
499
+
500
+ return _to_diagram_element(
501
+ element.expr,
502
+ parent=parent,
503
+ lookup=lookup,
504
+ vertical=vertical,
505
+ index=index,
506
+ name_hint=propagated_name,
507
+ show_results_names=show_results_names,
508
+ show_groups=show_groups,
509
+ )
510
+
511
+ # If the element isn't worth extracting, we always treat it as the first time we say it
512
+ if _worth_extracting(element):
513
+ if el_id in lookup:
514
+ # If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
515
+ # so we have to extract it into a new diagram.
516
+ looked_up = lookup[el_id]
517
+ looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
518
+ ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
519
+ return ret
520
+
521
+ elif el_id in lookup.diagrams:
522
+ # If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
523
+ # just put in a marker element that refers to the sub-diagram
524
+ ret = EditablePartial.from_call(
525
+ railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
526
+ )
527
+ return ret
528
+
529
+ # Recursively convert child elements
530
+ # Here we find the most relevant Railroad element for matching pyparsing Element
531
+ # We use ``items=[]`` here to hold the place for where the child elements will go once created
532
+ if isinstance(element, pyparsing.And):
533
+ # detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
534
+ # (all will have the same name, and resultsName)
535
+ if not exprs:
536
+ return None
537
+ if len(set((e.name, e.resultsName) for e in exprs)) == 1:
538
+ ret = EditablePartial.from_call(
539
+ railroad.OneOrMore, item="", repeat=str(len(exprs))
540
+ )
541
+ elif _should_vertical(vertical, exprs):
542
+ ret = EditablePartial.from_call(railroad.Stack, items=[])
543
+ else:
544
+ ret = EditablePartial.from_call(railroad.Sequence, items=[])
545
+ elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
546
+ if not exprs:
547
+ return None
548
+ if _should_vertical(vertical, exprs):
549
+ ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
550
+ else:
551
+ ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
552
+ elif isinstance(element, pyparsing.Each):
553
+ if not exprs:
554
+ return None
555
+ ret = EditablePartial.from_call(EachItem, items=[])
556
+ elif isinstance(element, pyparsing.NotAny):
557
+ ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
558
+ elif isinstance(element, pyparsing.FollowedBy):
559
+ ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
560
+ elif isinstance(element, pyparsing.PrecededBy):
561
+ ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
562
+ elif isinstance(element, pyparsing.Group):
563
+ if show_groups:
564
+ ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
565
+ else:
566
+ ret = EditablePartial.from_call(railroad.Group, label="", item="")
567
+ elif isinstance(element, pyparsing.TokenConverter):
568
+ label = type(element).__name__.lower()
569
+ if label == "tokenconverter":
570
+ ret = EditablePartial.from_call(railroad.Sequence, items=[])
571
+ else:
572
+ ret = EditablePartial.from_call(AnnotatedItem, label=label, item="")
573
+ elif isinstance(element, pyparsing.Opt):
574
+ ret = EditablePartial.from_call(railroad.Optional, item="")
575
+ elif isinstance(element, pyparsing.OneOrMore):
576
+ ret = EditablePartial.from_call(railroad.OneOrMore, item="")
577
+ elif isinstance(element, pyparsing.ZeroOrMore):
578
+ ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
579
+ elif isinstance(element, pyparsing.Group):
580
+ ret = EditablePartial.from_call(
581
+ railroad.Group, item=None, label=element_results_name
582
+ )
583
+ elif isinstance(element, pyparsing.Empty) and not element.customName:
584
+ # Skip unnamed "Empty" elements
585
+ ret = None
586
+ elif isinstance(element, pyparsing.ParseElementEnhance):
587
+ ret = EditablePartial.from_call(railroad.Sequence, items=[])
588
+ elif len(exprs) > 0 and not element_results_name:
589
+ ret = EditablePartial.from_call(railroad.Group, item="", label=name)
590
+ elif len(exprs) > 0:
591
+ ret = EditablePartial.from_call(railroad.Sequence, items=[])
592
+ else:
593
+ terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
594
+ ret = terminal
595
+
596
+ if ret is None:
597
+ return
598
+
599
+ # Indicate this element's position in the tree so we can extract it if necessary
600
+ lookup[el_id] = ElementState(
601
+ element=element,
602
+ converted=ret,
603
+ parent=parent,
604
+ parent_index=index,
605
+ number=lookup.generate_index(),
606
+ )
607
+ if element.customName:
608
+ lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
609
+
610
+ i = 0
611
+ for expr in exprs:
612
+ # Add a placeholder index in case we have to extract the child before we even add it to the parent
613
+ if "items" in ret.kwargs:
614
+ ret.kwargs["items"].insert(i, None)
615
+
616
+ item = _to_diagram_element(
617
+ expr,
618
+ parent=ret,
619
+ lookup=lookup,
620
+ vertical=vertical,
621
+ index=i,
622
+ show_results_names=show_results_names,
623
+ show_groups=show_groups,
624
+ )
625
+
626
+ # Some elements don't need to be shown in the diagram
627
+ if item is not None:
628
+ if "item" in ret.kwargs:
629
+ ret.kwargs["item"] = item
630
+ elif "items" in ret.kwargs:
631
+ # If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
632
+ ret.kwargs["items"][i] = item
633
+ i += 1
634
+ elif "items" in ret.kwargs:
635
+ # If we're supposed to skip this element, remove it from the parent
636
+ del ret.kwargs["items"][i]
637
+
638
+ # If all this items children are none, skip this item
639
+ if ret and (
640
+ ("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
641
+ or ("item" in ret.kwargs and ret.kwargs["item"] is None)
642
+ ):
643
+ ret = EditablePartial.from_call(railroad.Terminal, name)
644
+
645
+ # Mark this element as "complete", ie it has all of its children
646
+ if el_id in lookup:
647
+ lookup[el_id].complete = True
648
+
649
+ if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
650
+ lookup.extract_into_diagram(el_id)
651
+ if ret is not None:
652
+ ret = EditablePartial.from_call(
653
+ railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
654
+ )
655
+
656
+ return ret
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/exceptions.py ADDED
@@ -0,0 +1,299 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # exceptions.py
2
+
3
+ import re
4
+ import sys
5
+ import typing
6
+
7
+ from .util import (
8
+ col,
9
+ line,
10
+ lineno,
11
+ _collapse_string_to_ranges,
12
+ replaced_by_pep8,
13
+ )
14
+ from .unicode import pyparsing_unicode as ppu
15
+
16
+
17
+ class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic):
18
+ pass
19
+
20
+
21
+ _extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums)
22
+ _exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.")
23
+
24
+
25
+ class ParseBaseException(Exception):
26
+ """base exception class for all parsing runtime exceptions"""
27
+
28
+ loc: int
29
+ msg: str
30
+ pstr: str
31
+ parser_element: typing.Any # "ParserElement"
32
+ args: typing.Tuple[str, int, typing.Optional[str]]
33
+
34
+ __slots__ = (
35
+ "loc",
36
+ "msg",
37
+ "pstr",
38
+ "parser_element",
39
+ "args",
40
+ )
41
+
42
+ # Performance tuning: we construct a *lot* of these, so keep this
43
+ # constructor as small and fast as possible
44
+ def __init__(
45
+ self,
46
+ pstr: str,
47
+ loc: int = 0,
48
+ msg: typing.Optional[str] = None,
49
+ elem=None,
50
+ ):
51
+ self.loc = loc
52
+ if msg is None:
53
+ self.msg = pstr
54
+ self.pstr = ""
55
+ else:
56
+ self.msg = msg
57
+ self.pstr = pstr
58
+ self.parser_element = elem
59
+ self.args = (pstr, loc, msg)
60
+
61
+ @staticmethod
62
+ def explain_exception(exc, depth=16):
63
+ """
64
+ Method to take an exception and translate the Python internal traceback into a list
65
+ of the pyparsing expressions that caused the exception to be raised.
66
+
67
+ Parameters:
68
+
69
+ - exc - exception raised during parsing (need not be a ParseException, in support
70
+ of Python exceptions that might be raised in a parse action)
71
+ - depth (default=16) - number of levels back in the stack trace to list expression
72
+ and function names; if None, the full stack trace names will be listed; if 0, only
73
+ the failing input line, marker, and exception string will be shown
74
+
75
+ Returns a multi-line string listing the ParserElements and/or function names in the
76
+ exception's stack trace.
77
+ """
78
+ import inspect
79
+ from .core import ParserElement
80
+
81
+ if depth is None:
82
+ depth = sys.getrecursionlimit()
83
+ ret = []
84
+ if isinstance(exc, ParseBaseException):
85
+ ret.append(exc.line)
86
+ ret.append(" " * (exc.column - 1) + "^")
87
+ ret.append(f"{type(exc).__name__}: {exc}")
88
+
89
+ if depth > 0:
90
+ callers = inspect.getinnerframes(exc.__traceback__, context=depth)
91
+ seen = set()
92
+ for i, ff in enumerate(callers[-depth:]):
93
+ frm = ff[0]
94
+
95
+ f_self = frm.f_locals.get("self", None)
96
+ if isinstance(f_self, ParserElement):
97
+ if not frm.f_code.co_name.startswith(
98
+ ("parseImpl", "_parseNoCache")
99
+ ):
100
+ continue
101
+ if id(f_self) in seen:
102
+ continue
103
+ seen.add(id(f_self))
104
+
105
+ self_type = type(f_self)
106
+ ret.append(
107
+ f"{self_type.__module__}.{self_type.__name__} - {f_self}"
108
+ )
109
+
110
+ elif f_self is not None:
111
+ self_type = type(f_self)
112
+ ret.append(f"{self_type.__module__}.{self_type.__name__}")
113
+
114
+ else:
115
+ code = frm.f_code
116
+ if code.co_name in ("wrapper", "<module>"):
117
+ continue
118
+
119
+ ret.append(code.co_name)
120
+
121
+ depth -= 1
122
+ if not depth:
123
+ break
124
+
125
+ return "\n".join(ret)
126
+
127
+ @classmethod
128
+ def _from_exception(cls, pe):
129
+ """
130
+ internal factory method to simplify creating one type of ParseException
131
+ from another - avoids having __init__ signature conflicts among subclasses
132
+ """
133
+ return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element)
134
+
135
+ @property
136
+ def line(self) -> str:
137
+ """
138
+ Return the line of text where the exception occurred.
139
+ """
140
+ return line(self.loc, self.pstr)
141
+
142
+ @property
143
+ def lineno(self) -> int:
144
+ """
145
+ Return the 1-based line number of text where the exception occurred.
146
+ """
147
+ return lineno(self.loc, self.pstr)
148
+
149
+ @property
150
+ def col(self) -> int:
151
+ """
152
+ Return the 1-based column on the line of text where the exception occurred.
153
+ """
154
+ return col(self.loc, self.pstr)
155
+
156
+ @property
157
+ def column(self) -> int:
158
+ """
159
+ Return the 1-based column on the line of text where the exception occurred.
160
+ """
161
+ return col(self.loc, self.pstr)
162
+
163
+ # pre-PEP8 compatibility
164
+ @property
165
+ def parserElement(self):
166
+ return self.parser_element
167
+
168
+ @parserElement.setter
169
+ def parserElement(self, elem):
170
+ self.parser_element = elem
171
+
172
+ def __str__(self) -> str:
173
+ if self.pstr:
174
+ if self.loc >= len(self.pstr):
175
+ foundstr = ", found end of text"
176
+ else:
177
+ # pull out next word at error location
178
+ found_match = _exception_word_extractor.match(self.pstr, self.loc)
179
+ if found_match is not None:
180
+ found = found_match.group(0)
181
+ else:
182
+ found = self.pstr[self.loc : self.loc + 1]
183
+ foundstr = (", found %r" % found).replace(r"\\", "\\")
184
+ else:
185
+ foundstr = ""
186
+ return f"{self.msg}{foundstr} (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
187
+
188
+ def __repr__(self):
189
+ return str(self)
190
+
191
+ def mark_input_line(
192
+ self, marker_string: typing.Optional[str] = None, *, markerString: str = ">!<"
193
+ ) -> str:
194
+ """
195
+ Extracts the exception line from the input string, and marks
196
+ the location of the exception with a special symbol.
197
+ """
198
+ markerString = marker_string if marker_string is not None else markerString
199
+ line_str = self.line
200
+ line_column = self.column - 1
201
+ if markerString:
202
+ line_str = "".join(
203
+ (line_str[:line_column], markerString, line_str[line_column:])
204
+ )
205
+ return line_str.strip()
206
+
207
+ def explain(self, depth=16) -> str:
208
+ """
209
+ Method to translate the Python internal traceback into a list
210
+ of the pyparsing expressions that caused the exception to be raised.
211
+
212
+ Parameters:
213
+
214
+ - depth (default=16) - number of levels back in the stack trace to list expression
215
+ and function names; if None, the full stack trace names will be listed; if 0, only
216
+ the failing input line, marker, and exception string will be shown
217
+
218
+ Returns a multi-line string listing the ParserElements and/or function names in the
219
+ exception's stack trace.
220
+
221
+ Example::
222
+
223
+ expr = pp.Word(pp.nums) * 3
224
+ try:
225
+ expr.parse_string("123 456 A789")
226
+ except pp.ParseException as pe:
227
+ print(pe.explain(depth=0))
228
+
229
+ prints::
230
+
231
+ 123 456 A789
232
+ ^
233
+ ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9)
234
+
235
+ Note: the diagnostic output will include string representations of the expressions
236
+ that failed to parse. These representations will be more helpful if you use `set_name` to
237
+ give identifiable names to your expressions. Otherwise they will use the default string
238
+ forms, which may be cryptic to read.
239
+
240
+ Note: pyparsing's default truncation of exception tracebacks may also truncate the
241
+ stack of expressions that are displayed in the ``explain`` output. To get the full listing
242
+ of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
243
+ """
244
+ return self.explain_exception(self, depth)
245
+
246
+ # fmt: off
247
+ @replaced_by_pep8(mark_input_line)
248
+ def markInputline(self): ...
249
+ # fmt: on
250
+
251
+
252
+ class ParseException(ParseBaseException):
253
+ """
254
+ Exception thrown when a parse expression doesn't match the input string
255
+
256
+ Example::
257
+
258
+ try:
259
+ Word(nums).set_name("integer").parse_string("ABC")
260
+ except ParseException as pe:
261
+ print(pe)
262
+ print("column: {}".format(pe.column))
263
+
264
+ prints::
265
+
266
+ Expected integer (at char 0), (line:1, col:1)
267
+ column: 1
268
+
269
+ """
270
+
271
+
272
+ class ParseFatalException(ParseBaseException):
273
+ """
274
+ User-throwable exception thrown when inconsistent parse content
275
+ is found; stops all parsing immediately
276
+ """
277
+
278
+
279
+ class ParseSyntaxException(ParseFatalException):
280
+ """
281
+ Just like :class:`ParseFatalException`, but thrown internally
282
+ when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
283
+ that parsing is to stop immediately because an unbacktrackable
284
+ syntax error has been found.
285
+ """
286
+
287
+
288
+ class RecursiveGrammarException(Exception):
289
+ """
290
+ Exception thrown by :class:`ParserElement.validate` if the
291
+ grammar could be left-recursive; parser may need to enable
292
+ left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
293
+ """
294
+
295
+ def __init__(self, parseElementList):
296
+ self.parseElementTrace = parseElementList
297
+
298
+ def __str__(self) -> str:
299
+ return f"RecursiveGrammarException: {self.parseElementTrace}"
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/helpers.py ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # helpers.py
2
+ import html.entities
3
+ import re
4
+ import sys
5
+ import typing
6
+
7
+ from . import __diag__
8
+ from .core import *
9
+ from .util import (
10
+ _bslash,
11
+ _flatten,
12
+ _escape_regex_range_chars,
13
+ replaced_by_pep8,
14
+ )
15
+
16
+
17
+ #
18
+ # global helpers
19
+ #
20
+ def counted_array(
21
+ expr: ParserElement,
22
+ int_expr: typing.Optional[ParserElement] = None,
23
+ *,
24
+ intExpr: typing.Optional[ParserElement] = None,
25
+ ) -> ParserElement:
26
+ """Helper to define a counted list of expressions.
27
+
28
+ This helper defines a pattern of the form::
29
+
30
+ integer expr expr expr...
31
+
32
+ where the leading integer tells how many expr expressions follow.
33
+ The matched tokens returns the array of expr tokens as a list - the
34
+ leading count token is suppressed.
35
+
36
+ If ``int_expr`` is specified, it should be a pyparsing expression
37
+ that produces an integer value.
38
+
39
+ Example::
40
+
41
+ counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
42
+
43
+ # in this parser, the leading integer value is given in binary,
44
+ # '10' indicating that 2 values are in the array
45
+ binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
46
+ counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
47
+
48
+ # if other fields must be parsed after the count but before the
49
+ # list items, give the fields results names and they will
50
+ # be preserved in the returned ParseResults:
51
+ count_with_metadata = integer + Word(alphas)("type")
52
+ typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
53
+ result = typed_array.parse_string("3 bool True True False")
54
+ print(result.dump())
55
+
56
+ # prints
57
+ # ['True', 'True', 'False']
58
+ # - items: ['True', 'True', 'False']
59
+ # - type: 'bool'
60
+ """
61
+ intExpr = intExpr or int_expr
62
+ array_expr = Forward()
63
+
64
+ def count_field_parse_action(s, l, t):
65
+ nonlocal array_expr
66
+ n = t[0]
67
+ array_expr <<= (expr * n) if n else Empty()
68
+ # clear list contents, but keep any named results
69
+ del t[:]
70
+
71
+ if intExpr is None:
72
+ intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
73
+ else:
74
+ intExpr = intExpr.copy()
75
+ intExpr.set_name("arrayLen")
76
+ intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
77
+ return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
78
+
79
+
80
+ def match_previous_literal(expr: ParserElement) -> ParserElement:
81
+ """Helper to define an expression that is indirectly defined from
82
+ the tokens matched in a previous expression, that is, it looks for
83
+ a 'repeat' of a previous expression. For example::
84
+
85
+ first = Word(nums)
86
+ second = match_previous_literal(first)
87
+ match_expr = first + ":" + second
88
+
89
+ will match ``"1:1"``, but not ``"1:2"``. Because this
90
+ matches a previous literal, will also match the leading
91
+ ``"1:1"`` in ``"1:10"``. If this is not desired, use
92
+ :class:`match_previous_expr`. Do *not* use with packrat parsing
93
+ enabled.
94
+ """
95
+ rep = Forward()
96
+
97
+ def copy_token_to_repeater(s, l, t):
98
+ if t:
99
+ if len(t) == 1:
100
+ rep << t[0]
101
+ else:
102
+ # flatten t tokens
103
+ tflat = _flatten(t.as_list())
104
+ rep << And(Literal(tt) for tt in tflat)
105
+ else:
106
+ rep << Empty()
107
+
108
+ expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
109
+ rep.set_name("(prev) " + str(expr))
110
+ return rep
111
+
112
+
113
+ def match_previous_expr(expr: ParserElement) -> ParserElement:
114
+ """Helper to define an expression that is indirectly defined from
115
+ the tokens matched in a previous expression, that is, it looks for
116
+ a 'repeat' of a previous expression. For example::
117
+
118
+ first = Word(nums)
119
+ second = match_previous_expr(first)
120
+ match_expr = first + ":" + second
121
+
122
+ will match ``"1:1"``, but not ``"1:2"``. Because this
123
+ matches by expressions, will *not* match the leading ``"1:1"``
124
+ in ``"1:10"``; the expressions are evaluated first, and then
125
+ compared, so ``"1"`` is compared with ``"10"``. Do *not* use
126
+ with packrat parsing enabled.
127
+ """
128
+ rep = Forward()
129
+ e2 = expr.copy()
130
+ rep <<= e2
131
+
132
+ def copy_token_to_repeater(s, l, t):
133
+ matchTokens = _flatten(t.as_list())
134
+
135
+ def must_match_these_tokens(s, l, t):
136
+ theseTokens = _flatten(t.as_list())
137
+ if theseTokens != matchTokens:
138
+ raise ParseException(
139
+ s, l, f"Expected {matchTokens}, found{theseTokens}"
140
+ )
141
+
142
+ rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
143
+
144
+ expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
145
+ rep.set_name("(prev) " + str(expr))
146
+ return rep
147
+
148
+
149
+ def one_of(
150
+ strs: Union[typing.Iterable[str], str],
151
+ caseless: bool = False,
152
+ use_regex: bool = True,
153
+ as_keyword: bool = False,
154
+ *,
155
+ useRegex: bool = True,
156
+ asKeyword: bool = False,
157
+ ) -> ParserElement:
158
+ """Helper to quickly define a set of alternative :class:`Literal` s,
159
+ and makes sure to do longest-first testing when there is a conflict,
160
+ regardless of the input order, but returns
161
+ a :class:`MatchFirst` for best performance.
162
+
163
+ Parameters:
164
+
165
+ - ``strs`` - a string of space-delimited literals, or a collection of
166
+ string literals
167
+ - ``caseless`` - treat all literals as caseless - (default= ``False``)
168
+ - ``use_regex`` - as an optimization, will
169
+ generate a :class:`Regex` object; otherwise, will generate
170
+ a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
171
+ creating a :class:`Regex` raises an exception) - (default= ``True``)
172
+ - ``as_keyword`` - enforce :class:`Keyword`-style matching on the
173
+ generated expressions - (default= ``False``)
174
+ - ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
175
+ but will be removed in a future release
176
+
177
+ Example::
178
+
179
+ comp_oper = one_of("< = > <= >= !=")
180
+ var = Word(alphas)
181
+ number = Word(nums)
182
+ term = var | number
183
+ comparison_expr = term + comp_oper + term
184
+ print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
185
+
186
+ prints::
187
+
188
+ [['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
189
+ """
190
+ asKeyword = asKeyword or as_keyword
191
+ useRegex = useRegex and use_regex
192
+
193
+ if (
194
+ isinstance(caseless, str_type)
195
+ and __diag__.warn_on_multiple_string_args_to_oneof
196
+ ):
197
+ warnings.warn(
198
+ "More than one string argument passed to one_of, pass"
199
+ " choices as a list or space-delimited string",
200
+ stacklevel=2,
201
+ )
202
+
203
+ if caseless:
204
+ isequal = lambda a, b: a.upper() == b.upper()
205
+ masks = lambda a, b: b.upper().startswith(a.upper())
206
+ parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
207
+ else:
208
+ isequal = lambda a, b: a == b
209
+ masks = lambda a, b: b.startswith(a)
210
+ parseElementClass = Keyword if asKeyword else Literal
211
+
212
+ symbols: List[str] = []
213
+ if isinstance(strs, str_type):
214
+ strs = typing.cast(str, strs)
215
+ symbols = strs.split()
216
+ elif isinstance(strs, Iterable):
217
+ symbols = list(strs)
218
+ else:
219
+ raise TypeError("Invalid argument to one_of, expected string or iterable")
220
+ if not symbols:
221
+ return NoMatch()
222
+
223
+ # reorder given symbols to take care to avoid masking longer choices with shorter ones
224
+ # (but only if the given symbols are not just single characters)
225
+ if any(len(sym) > 1 for sym in symbols):
226
+ i = 0
227
+ while i < len(symbols) - 1:
228
+ cur = symbols[i]
229
+ for j, other in enumerate(symbols[i + 1 :]):
230
+ if isequal(other, cur):
231
+ del symbols[i + j + 1]
232
+ break
233
+ elif masks(cur, other):
234
+ del symbols[i + j + 1]
235
+ symbols.insert(i, other)
236
+ break
237
+ else:
238
+ i += 1
239
+
240
+ if useRegex:
241
+ re_flags: int = re.IGNORECASE if caseless else 0
242
+
243
+ try:
244
+ if all(len(sym) == 1 for sym in symbols):
245
+ # symbols are just single characters, create range regex pattern
246
+ patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
247
+ else:
248
+ patt = "|".join(re.escape(sym) for sym in symbols)
249
+
250
+ # wrap with \b word break markers if defining as keywords
251
+ if asKeyword:
252
+ patt = rf"\b(?:{patt})\b"
253
+
254
+ ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
255
+
256
+ if caseless:
257
+ # add parse action to return symbols as specified, not in random
258
+ # casing as found in input string
259
+ symbol_map = {sym.lower(): sym for sym in symbols}
260
+ ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
261
+
262
+ return ret
263
+
264
+ except re.error:
265
+ warnings.warn(
266
+ "Exception creating Regex for one_of, building MatchFirst", stacklevel=2
267
+ )
268
+
269
+ # last resort, just use MatchFirst
270
+ return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
271
+ " | ".join(symbols)
272
+ )
273
+
274
+
275
+ def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
276
+ """Helper to easily and clearly define a dictionary by specifying
277
+ the respective patterns for the key and value. Takes care of
278
+ defining the :class:`Dict`, :class:`ZeroOrMore`, and
279
+ :class:`Group` tokens in the proper order. The key pattern
280
+ can include delimiting markers or punctuation, as long as they are
281
+ suppressed, thereby leaving the significant key text. The value
282
+ pattern can include named results, so that the :class:`Dict` results
283
+ can include named token fields.
284
+
285
+ Example::
286
+
287
+ text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
288
+ attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
289
+ print(attr_expr[1, ...].parse_string(text).dump())
290
+
291
+ attr_label = label
292
+ attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
293
+
294
+ # similar to Dict, but simpler call format
295
+ result = dict_of(attr_label, attr_value).parse_string(text)
296
+ print(result.dump())
297
+ print(result['shape'])
298
+ print(result.shape) # object attribute access works too
299
+ print(result.as_dict())
300
+
301
+ prints::
302
+
303
+ [['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
304
+ - color: 'light blue'
305
+ - posn: 'upper left'
306
+ - shape: 'SQUARE'
307
+ - texture: 'burlap'
308
+ SQUARE
309
+ SQUARE
310
+ {'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
311
+ """
312
+ return Dict(OneOrMore(Group(key + value)))
313
+
314
+
315
+ def original_text_for(
316
+ expr: ParserElement, as_string: bool = True, *, asString: bool = True
317
+ ) -> ParserElement:
318
+ """Helper to return the original, untokenized text for a given
319
+ expression. Useful to restore the parsed fields of an HTML start
320
+ tag into the raw tag text itself, or to revert separate tokens with
321
+ intervening whitespace back to the original matching input text. By
322
+ default, returns a string containing the original parsed text.
323
+
324
+ If the optional ``as_string`` argument is passed as
325
+ ``False``, then the return value is
326
+ a :class:`ParseResults` containing any results names that
327
+ were originally matched, and a single token containing the original
328
+ matched text from the input string. So if the expression passed to
329
+ :class:`original_text_for` contains expressions with defined
330
+ results names, you must set ``as_string`` to ``False`` if you
331
+ want to preserve those results name values.
332
+
333
+ The ``asString`` pre-PEP8 argument is retained for compatibility,
334
+ but will be removed in a future release.
335
+
336
+ Example::
337
+
338
+ src = "this is test <b> bold <i>text</i> </b> normal text "
339
+ for tag in ("b", "i"):
340
+ opener, closer = make_html_tags(tag)
341
+ patt = original_text_for(opener + ... + closer)
342
+ print(patt.search_string(src)[0])
343
+
344
+ prints::
345
+
346
+ ['<b> bold <i>text</i> </b>']
347
+ ['<i>text</i>']
348
+ """
349
+ asString = asString and as_string
350
+
351
+ locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
352
+ endlocMarker = locMarker.copy()
353
+ endlocMarker.callPreparse = False
354
+ matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
355
+ if asString:
356
+ extractText = lambda s, l, t: s[t._original_start : t._original_end]
357
+ else:
358
+
359
+ def extractText(s, l, t):
360
+ t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
361
+
362
+ matchExpr.set_parse_action(extractText)
363
+ matchExpr.ignoreExprs = expr.ignoreExprs
364
+ matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
365
+ return matchExpr
366
+
367
+
368
+ def ungroup(expr: ParserElement) -> ParserElement:
369
+ """Helper to undo pyparsing's default grouping of And expressions,
370
+ even if all but one are non-empty.
371
+ """
372
+ return TokenConverter(expr).add_parse_action(lambda t: t[0])
373
+
374
+
375
+ def locatedExpr(expr: ParserElement) -> ParserElement:
376
+ """
377
+ (DEPRECATED - future code should use the :class:`Located` class)
378
+ Helper to decorate a returned token with its starting and ending
379
+ locations in the input string.
380
+
381
+ This helper adds the following results names:
382
+
383
+ - ``locn_start`` - location where matched expression begins
384
+ - ``locn_end`` - location where matched expression ends
385
+ - ``value`` - the actual parsed results
386
+
387
+ Be careful if the input text contains ``<TAB>`` characters, you
388
+ may want to call :class:`ParserElement.parse_with_tabs`
389
+
390
+ Example::
391
+
392
+ wd = Word(alphas)
393
+ for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
394
+ print(match)
395
+
396
+ prints::
397
+
398
+ [[0, 'ljsdf', 5]]
399
+ [[8, 'lksdjjf', 15]]
400
+ [[18, 'lkkjj', 23]]
401
+ """
402
+ locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
403
+ return Group(
404
+ locator("locn_start")
405
+ + expr("value")
406
+ + locator.copy().leaveWhitespace()("locn_end")
407
+ )
408
+
409
+
410
+ def nested_expr(
411
+ opener: Union[str, ParserElement] = "(",
412
+ closer: Union[str, ParserElement] = ")",
413
+ content: typing.Optional[ParserElement] = None,
414
+ ignore_expr: ParserElement = quoted_string(),
415
+ *,
416
+ ignoreExpr: ParserElement = quoted_string(),
417
+ ) -> ParserElement:
418
+ """Helper method for defining nested lists enclosed in opening and
419
+ closing delimiters (``"("`` and ``")"`` are the default).
420
+
421
+ Parameters:
422
+
423
+ - ``opener`` - opening character for a nested list
424
+ (default= ``"("``); can also be a pyparsing expression
425
+ - ``closer`` - closing character for a nested list
426
+ (default= ``")"``); can also be a pyparsing expression
427
+ - ``content`` - expression for items within the nested lists
428
+ (default= ``None``)
429
+ - ``ignore_expr`` - expression for ignoring opening and closing delimiters
430
+ (default= :class:`quoted_string`)
431
+ - ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
432
+ but will be removed in a future release
433
+
434
+ If an expression is not provided for the content argument, the
435
+ nested expression will capture all whitespace-delimited content
436
+ between delimiters as a list of separate values.
437
+
438
+ Use the ``ignore_expr`` argument to define expressions that may
439
+ contain opening or closing characters that should not be treated as
440
+ opening or closing characters for nesting, such as quoted_string or
441
+ a comment expression. Specify multiple expressions using an
442
+ :class:`Or` or :class:`MatchFirst`. The default is
443
+ :class:`quoted_string`, but if no expressions are to be ignored, then
444
+ pass ``None`` for this argument.
445
+
446
+ Example::
447
+
448
+ data_type = one_of("void int short long char float double")
449
+ decl_data_type = Combine(data_type + Opt(Word('*')))
450
+ ident = Word(alphas+'_', alphanums+'_')
451
+ number = pyparsing_common.number
452
+ arg = Group(decl_data_type + ident)
453
+ LPAR, RPAR = map(Suppress, "()")
454
+
455
+ code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
456
+
457
+ c_function = (decl_data_type("type")
458
+ + ident("name")
459
+ + LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
460
+ + code_body("body"))
461
+ c_function.ignore(c_style_comment)
462
+
463
+ source_code = '''
464
+ int is_odd(int x) {
465
+ return (x%2);
466
+ }
467
+
468
+ int dec_to_hex(char hchar) {
469
+ if (hchar >= '0' && hchar <= '9') {
470
+ return (ord(hchar)-ord('0'));
471
+ } else {
472
+ return (10+ord(hchar)-ord('A'));
473
+ }
474
+ }
475
+ '''
476
+ for func in c_function.search_string(source_code):
477
+ print("%(name)s (%(type)s) args: %(args)s" % func)
478
+
479
+
480
+ prints::
481
+
482
+ is_odd (int) args: [['int', 'x']]
483
+ dec_to_hex (int) args: [['char', 'hchar']]
484
+ """
485
+ if ignoreExpr != ignore_expr:
486
+ ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
487
+ if opener == closer:
488
+ raise ValueError("opening and closing strings cannot be the same")
489
+ if content is None:
490
+ if isinstance(opener, str_type) and isinstance(closer, str_type):
491
+ opener = typing.cast(str, opener)
492
+ closer = typing.cast(str, closer)
493
+ if len(opener) == 1 and len(closer) == 1:
494
+ if ignoreExpr is not None:
495
+ content = Combine(
496
+ OneOrMore(
497
+ ~ignoreExpr
498
+ + CharsNotIn(
499
+ opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
500
+ exact=1,
501
+ )
502
+ )
503
+ ).set_parse_action(lambda t: t[0].strip())
504
+ else:
505
+ content = empty.copy() + CharsNotIn(
506
+ opener + closer + ParserElement.DEFAULT_WHITE_CHARS
507
+ ).set_parse_action(lambda t: t[0].strip())
508
+ else:
509
+ if ignoreExpr is not None:
510
+ content = Combine(
511
+ OneOrMore(
512
+ ~ignoreExpr
513
+ + ~Literal(opener)
514
+ + ~Literal(closer)
515
+ + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
516
+ )
517
+ ).set_parse_action(lambda t: t[0].strip())
518
+ else:
519
+ content = Combine(
520
+ OneOrMore(
521
+ ~Literal(opener)
522
+ + ~Literal(closer)
523
+ + CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
524
+ )
525
+ ).set_parse_action(lambda t: t[0].strip())
526
+ else:
527
+ raise ValueError(
528
+ "opening and closing arguments must be strings if no content expression is given"
529
+ )
530
+ ret = Forward()
531
+ if ignoreExpr is not None:
532
+ ret <<= Group(
533
+ Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
534
+ )
535
+ else:
536
+ ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
537
+ ret.set_name("nested %s%s expression" % (opener, closer))
538
+ return ret
539
+
540
+
541
+ def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
542
+ """Internal helper to construct opening and closing tag expressions, given a tag name"""
543
+ if isinstance(tagStr, str_type):
544
+ resname = tagStr
545
+ tagStr = Keyword(tagStr, caseless=not xml)
546
+ else:
547
+ resname = tagStr.name
548
+
549
+ tagAttrName = Word(alphas, alphanums + "_-:")
550
+ if xml:
551
+ tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
552
+ openTag = (
553
+ suppress_LT
554
+ + tagStr("tag")
555
+ + Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
556
+ + Opt("/", default=[False])("empty").set_parse_action(
557
+ lambda s, l, t: t[0] == "/"
558
+ )
559
+ + suppress_GT
560
+ )
561
+ else:
562
+ tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
563
+ printables, exclude_chars=">"
564
+ )
565
+ openTag = (
566
+ suppress_LT
567
+ + tagStr("tag")
568
+ + Dict(
569
+ ZeroOrMore(
570
+ Group(
571
+ tagAttrName.set_parse_action(lambda t: t[0].lower())
572
+ + Opt(Suppress("=") + tagAttrValue)
573
+ )
574
+ )
575
+ )
576
+ + Opt("/", default=[False])("empty").set_parse_action(
577
+ lambda s, l, t: t[0] == "/"
578
+ )
579
+ + suppress_GT
580
+ )
581
+ closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
582
+
583
+ openTag.set_name("<%s>" % resname)
584
+ # add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
585
+ openTag.add_parse_action(
586
+ lambda t: t.__setitem__(
587
+ "start" + "".join(resname.replace(":", " ").title().split()), t.copy()
588
+ )
589
+ )
590
+ closeTag = closeTag(
591
+ "end" + "".join(resname.replace(":", " ").title().split())
592
+ ).set_name("</%s>" % resname)
593
+ openTag.tag = resname
594
+ closeTag.tag = resname
595
+ openTag.tag_body = SkipTo(closeTag())
596
+ return openTag, closeTag
597
+
598
+
599
+ def make_html_tags(
600
+ tag_str: Union[str, ParserElement]
601
+ ) -> Tuple[ParserElement, ParserElement]:
602
+ """Helper to construct opening and closing tag expressions for HTML,
603
+ given a tag name. Matches tags in either upper or lower case,
604
+ attributes with namespaces and with quoted or unquoted values.
605
+
606
+ Example::
607
+
608
+ text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
609
+ # make_html_tags returns pyparsing expressions for the opening and
610
+ # closing tags as a 2-tuple
611
+ a, a_end = make_html_tags("A")
612
+ link_expr = a + SkipTo(a_end)("link_text") + a_end
613
+
614
+ for link in link_expr.search_string(text):
615
+ # attributes in the <A> tag (like "href" shown here) are
616
+ # also accessible as named results
617
+ print(link.link_text, '->', link.href)
618
+
619
+ prints::
620
+
621
+ pyparsing -> https://github.com/pyparsing/pyparsing/wiki
622
+ """
623
+ return _makeTags(tag_str, False)
624
+
625
+
626
+ def make_xml_tags(
627
+ tag_str: Union[str, ParserElement]
628
+ ) -> Tuple[ParserElement, ParserElement]:
629
+ """Helper to construct opening and closing tag expressions for XML,
630
+ given a tag name. Matches tags only in the given upper/lower case.
631
+
632
+ Example: similar to :class:`make_html_tags`
633
+ """
634
+ return _makeTags(tag_str, True)
635
+
636
+
637
+ any_open_tag: ParserElement
638
+ any_close_tag: ParserElement
639
+ any_open_tag, any_close_tag = make_html_tags(
640
+ Word(alphas, alphanums + "_:").set_name("any tag")
641
+ )
642
+
643
+ _htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
644
+ common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
645
+ "common HTML entity"
646
+ )
647
+
648
+
649
+ def replace_html_entity(s, l, t):
650
+ """Helper parser action to replace common HTML entities with their special characters"""
651
+ return _htmlEntityMap.get(t.entity)
652
+
653
+
654
+ class OpAssoc(Enum):
655
+ """Enumeration of operator associativity
656
+ - used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
657
+
658
+ LEFT = 1
659
+ RIGHT = 2
660
+
661
+
662
+ InfixNotationOperatorArgType = Union[
663
+ ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
664
+ ]
665
+ InfixNotationOperatorSpec = Union[
666
+ Tuple[
667
+ InfixNotationOperatorArgType,
668
+ int,
669
+ OpAssoc,
670
+ typing.Optional[ParseAction],
671
+ ],
672
+ Tuple[
673
+ InfixNotationOperatorArgType,
674
+ int,
675
+ OpAssoc,
676
+ ],
677
+ ]
678
+
679
+
680
+ def infix_notation(
681
+ base_expr: ParserElement,
682
+ op_list: List[InfixNotationOperatorSpec],
683
+ lpar: Union[str, ParserElement] = Suppress("("),
684
+ rpar: Union[str, ParserElement] = Suppress(")"),
685
+ ) -> ParserElement:
686
+ """Helper method for constructing grammars of expressions made up of
687
+ operators working in a precedence hierarchy. Operators may be unary
688
+ or binary, left- or right-associative. Parse actions can also be
689
+ attached to operator expressions. The generated parser will also
690
+ recognize the use of parentheses to override operator precedences
691
+ (see example below).
692
+
693
+ Note: if you define a deep operator list, you may see performance
694
+ issues when using infix_notation. See
695
+ :class:`ParserElement.enable_packrat` for a mechanism to potentially
696
+ improve your parser performance.
697
+
698
+ Parameters:
699
+
700
+ - ``base_expr`` - expression representing the most basic operand to
701
+ be used in the expression
702
+ - ``op_list`` - list of tuples, one for each operator precedence level
703
+ in the expression grammar; each tuple is of the form ``(op_expr,
704
+ num_operands, right_left_assoc, (optional)parse_action)``, where:
705
+
706
+ - ``op_expr`` is the pyparsing expression for the operator; may also
707
+ be a string, which will be converted to a Literal; if ``num_operands``
708
+ is 3, ``op_expr`` is a tuple of two expressions, for the two
709
+ operators separating the 3 terms
710
+ - ``num_operands`` is the number of terms for this operator (must be 1,
711
+ 2, or 3)
712
+ - ``right_left_assoc`` is the indicator whether the operator is right
713
+ or left associative, using the pyparsing-defined constants
714
+ ``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
715
+ - ``parse_action`` is the parse action to be associated with
716
+ expressions matching this operator expression (the parse action
717
+ tuple member may be omitted); if the parse action is passed
718
+ a tuple or list of functions, this is equivalent to calling
719
+ ``set_parse_action(*fn)``
720
+ (:class:`ParserElement.set_parse_action`)
721
+ - ``lpar`` - expression for matching left-parentheses; if passed as a
722
+ str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
723
+ an expression (such as ``Literal('(')``), then it will be kept in
724
+ the parsed results, and grouped with them. (default= ``Suppress('(')``)
725
+ - ``rpar`` - expression for matching right-parentheses; if passed as a
726
+ str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
727
+ an expression (such as ``Literal(')')``), then it will be kept in
728
+ the parsed results, and grouped with them. (default= ``Suppress(')')``)
729
+
730
+ Example::
731
+
732
+ # simple example of four-function arithmetic with ints and
733
+ # variable names
734
+ integer = pyparsing_common.signed_integer
735
+ varname = pyparsing_common.identifier
736
+
737
+ arith_expr = infix_notation(integer | varname,
738
+ [
739
+ ('-', 1, OpAssoc.RIGHT),
740
+ (one_of('* /'), 2, OpAssoc.LEFT),
741
+ (one_of('+ -'), 2, OpAssoc.LEFT),
742
+ ])
743
+
744
+ arith_expr.run_tests('''
745
+ 5+3*6
746
+ (5+3)*6
747
+ -2--11
748
+ ''', full_dump=False)
749
+
750
+ prints::
751
+
752
+ 5+3*6
753
+ [[5, '+', [3, '*', 6]]]
754
+
755
+ (5+3)*6
756
+ [[[5, '+', 3], '*', 6]]
757
+
758
+ (5+x)*y
759
+ [[[5, '+', 'x'], '*', 'y']]
760
+
761
+ -2--11
762
+ [[['-', 2], '-', ['-', 11]]]
763
+ """
764
+
765
+ # captive version of FollowedBy that does not do parse actions or capture results names
766
+ class _FB(FollowedBy):
767
+ def parseImpl(self, instring, loc, doActions=True):
768
+ self.expr.try_parse(instring, loc)
769
+ return loc, []
770
+
771
+ _FB.__name__ = "FollowedBy>"
772
+
773
+ ret = Forward()
774
+ if isinstance(lpar, str):
775
+ lpar = Suppress(lpar)
776
+ if isinstance(rpar, str):
777
+ rpar = Suppress(rpar)
778
+
779
+ # if lpar and rpar are not suppressed, wrap in group
780
+ if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
781
+ lastExpr = base_expr | Group(lpar + ret + rpar)
782
+ else:
783
+ lastExpr = base_expr | (lpar + ret + rpar)
784
+
785
+ arity: int
786
+ rightLeftAssoc: opAssoc
787
+ pa: typing.Optional[ParseAction]
788
+ opExpr1: ParserElement
789
+ opExpr2: ParserElement
790
+ for i, operDef in enumerate(op_list):
791
+ opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
792
+ if isinstance(opExpr, str_type):
793
+ opExpr = ParserElement._literalStringClass(opExpr)
794
+ opExpr = typing.cast(ParserElement, opExpr)
795
+ if arity == 3:
796
+ if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
797
+ raise ValueError(
798
+ "if numterms=3, opExpr must be a tuple or list of two expressions"
799
+ )
800
+ opExpr1, opExpr2 = opExpr
801
+ term_name = f"{opExpr1}{opExpr2} term"
802
+ else:
803
+ term_name = f"{opExpr} term"
804
+
805
+ if not 1 <= arity <= 3:
806
+ raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
807
+
808
+ if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
809
+ raise ValueError("operator must indicate right or left associativity")
810
+
811
+ thisExpr: ParserElement = Forward().set_name(term_name)
812
+ thisExpr = typing.cast(Forward, thisExpr)
813
+ if rightLeftAssoc is OpAssoc.LEFT:
814
+ if arity == 1:
815
+ matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
816
+ elif arity == 2:
817
+ if opExpr is not None:
818
+ matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
819
+ lastExpr + (opExpr + lastExpr)[1, ...]
820
+ )
821
+ else:
822
+ matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
823
+ elif arity == 3:
824
+ matchExpr = _FB(
825
+ lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
826
+ ) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
827
+ elif rightLeftAssoc is OpAssoc.RIGHT:
828
+ if arity == 1:
829
+ # try to avoid LR with this extra test
830
+ if not isinstance(opExpr, Opt):
831
+ opExpr = Opt(opExpr)
832
+ matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
833
+ elif arity == 2:
834
+ if opExpr is not None:
835
+ matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
836
+ lastExpr + (opExpr + thisExpr)[1, ...]
837
+ )
838
+ else:
839
+ matchExpr = _FB(lastExpr + thisExpr) + Group(
840
+ lastExpr + thisExpr[1, ...]
841
+ )
842
+ elif arity == 3:
843
+ matchExpr = _FB(
844
+ lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
845
+ ) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
846
+ if pa:
847
+ if isinstance(pa, (tuple, list)):
848
+ matchExpr.set_parse_action(*pa)
849
+ else:
850
+ matchExpr.set_parse_action(pa)
851
+ thisExpr <<= (matchExpr | lastExpr).setName(term_name)
852
+ lastExpr = thisExpr
853
+ ret <<= lastExpr
854
+ return ret
855
+
856
+
857
+ def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
858
+ """
859
+ (DEPRECATED - use :class:`IndentedBlock` class instead)
860
+ Helper method for defining space-delimited indentation blocks,
861
+ such as those used to define block statements in Python source code.
862
+
863
+ Parameters:
864
+
865
+ - ``blockStatementExpr`` - expression defining syntax of statement that
866
+ is repeated within the indented block
867
+ - ``indentStack`` - list created by caller to manage indentation stack
868
+ (multiple ``statementWithIndentedBlock`` expressions within a single
869
+ grammar should share a common ``indentStack``)
870
+ - ``indent`` - boolean indicating whether block must be indented beyond
871
+ the current level; set to ``False`` for block of left-most statements
872
+ (default= ``True``)
873
+
874
+ A valid block must contain at least one ``blockStatement``.
875
+
876
+ (Note that indentedBlock uses internal parse actions which make it
877
+ incompatible with packrat parsing.)
878
+
879
+ Example::
880
+
881
+ data = '''
882
+ def A(z):
883
+ A1
884
+ B = 100
885
+ G = A2
886
+ A2
887
+ A3
888
+ B
889
+ def BB(a,b,c):
890
+ BB1
891
+ def BBA():
892
+ bba1
893
+ bba2
894
+ bba3
895
+ C
896
+ D
897
+ def spam(x,y):
898
+ def eggs(z):
899
+ pass
900
+ '''
901
+
902
+
903
+ indentStack = [1]
904
+ stmt = Forward()
905
+
906
+ identifier = Word(alphas, alphanums)
907
+ funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
908
+ func_body = indentedBlock(stmt, indentStack)
909
+ funcDef = Group(funcDecl + func_body)
910
+
911
+ rvalue = Forward()
912
+ funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
913
+ rvalue << (funcCall | identifier | Word(nums))
914
+ assignment = Group(identifier + "=" + rvalue)
915
+ stmt << (funcDef | assignment | identifier)
916
+
917
+ module_body = stmt[1, ...]
918
+
919
+ parseTree = module_body.parseString(data)
920
+ parseTree.pprint()
921
+
922
+ prints::
923
+
924
+ [['def',
925
+ 'A',
926
+ ['(', 'z', ')'],
927
+ ':',
928
+ [['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
929
+ 'B',
930
+ ['def',
931
+ 'BB',
932
+ ['(', 'a', 'b', 'c', ')'],
933
+ ':',
934
+ [['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
935
+ 'C',
936
+ 'D',
937
+ ['def',
938
+ 'spam',
939
+ ['(', 'x', 'y', ')'],
940
+ ':',
941
+ [[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
942
+ """
943
+ backup_stacks.append(indentStack[:])
944
+
945
+ def reset_stack():
946
+ indentStack[:] = backup_stacks[-1]
947
+
948
+ def checkPeerIndent(s, l, t):
949
+ if l >= len(s):
950
+ return
951
+ curCol = col(l, s)
952
+ if curCol != indentStack[-1]:
953
+ if curCol > indentStack[-1]:
954
+ raise ParseException(s, l, "illegal nesting")
955
+ raise ParseException(s, l, "not a peer entry")
956
+
957
+ def checkSubIndent(s, l, t):
958
+ curCol = col(l, s)
959
+ if curCol > indentStack[-1]:
960
+ indentStack.append(curCol)
961
+ else:
962
+ raise ParseException(s, l, "not a subentry")
963
+
964
+ def checkUnindent(s, l, t):
965
+ if l >= len(s):
966
+ return
967
+ curCol = col(l, s)
968
+ if not (indentStack and curCol in indentStack):
969
+ raise ParseException(s, l, "not an unindent")
970
+ if curCol < indentStack[-1]:
971
+ indentStack.pop()
972
+
973
+ NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
974
+ INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
975
+ PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
976
+ UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
977
+ if indent:
978
+ smExpr = Group(
979
+ Opt(NL)
980
+ + INDENT
981
+ + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
982
+ + UNDENT
983
+ )
984
+ else:
985
+ smExpr = Group(
986
+ Opt(NL)
987
+ + OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
988
+ + Opt(UNDENT)
989
+ )
990
+
991
+ # add a parse action to remove backup_stack from list of backups
992
+ smExpr.add_parse_action(
993
+ lambda: backup_stacks.pop(-1) and None if backup_stacks else None
994
+ )
995
+ smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
996
+ blockStatementExpr.ignore(_bslash + LineEnd())
997
+ return smExpr.set_name("indented block")
998
+
999
+
1000
+ # it's easy to get these comment structures wrong - they're very common, so may as well make them available
1001
+ c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
1002
+ "C style comment"
1003
+ )
1004
+ "Comment of the form ``/* ... */``"
1005
+
1006
+ html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
1007
+ "Comment of the form ``<!-- ... -->``"
1008
+
1009
+ rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
1010
+ dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
1011
+ "Comment of the form ``// ... (to end of line)``"
1012
+
1013
+ cpp_style_comment = Combine(
1014
+ Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
1015
+ ).set_name("C++ style comment")
1016
+ "Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
1017
+
1018
+ java_style_comment = cpp_style_comment
1019
+ "Same as :class:`cpp_style_comment`"
1020
+
1021
+ python_style_comment = Regex(r"#.*").set_name("Python style comment")
1022
+ "Comment of the form ``# ... (to end of line)``"
1023
+
1024
+
1025
+ # build list of built-in expressions, for future reference if a global default value
1026
+ # gets updated
1027
+ _builtin_exprs: List[ParserElement] = [
1028
+ v for v in vars().values() if isinstance(v, ParserElement)
1029
+ ]
1030
+
1031
+
1032
+ # compatibility function, superseded by DelimitedList class
1033
+ def delimited_list(
1034
+ expr: Union[str, ParserElement],
1035
+ delim: Union[str, ParserElement] = ",",
1036
+ combine: bool = False,
1037
+ min: typing.Optional[int] = None,
1038
+ max: typing.Optional[int] = None,
1039
+ *,
1040
+ allow_trailing_delim: bool = False,
1041
+ ) -> ParserElement:
1042
+ """(DEPRECATED - use :class:`DelimitedList` class)"""
1043
+ return DelimitedList(
1044
+ expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
1045
+ )
1046
+
1047
+
1048
+ # pre-PEP8 compatible names
1049
+ # fmt: off
1050
+ opAssoc = OpAssoc
1051
+ anyOpenTag = any_open_tag
1052
+ anyCloseTag = any_close_tag
1053
+ commonHTMLEntity = common_html_entity
1054
+ cStyleComment = c_style_comment
1055
+ htmlComment = html_comment
1056
+ restOfLine = rest_of_line
1057
+ dblSlashComment = dbl_slash_comment
1058
+ cppStyleComment = cpp_style_comment
1059
+ javaStyleComment = java_style_comment
1060
+ pythonStyleComment = python_style_comment
1061
+
1062
+ @replaced_by_pep8(DelimitedList)
1063
+ def delimitedList(): ...
1064
+
1065
+ @replaced_by_pep8(DelimitedList)
1066
+ def delimited_list(): ...
1067
+
1068
+ @replaced_by_pep8(counted_array)
1069
+ def countedArray(): ...
1070
+
1071
+ @replaced_by_pep8(match_previous_literal)
1072
+ def matchPreviousLiteral(): ...
1073
+
1074
+ @replaced_by_pep8(match_previous_expr)
1075
+ def matchPreviousExpr(): ...
1076
+
1077
+ @replaced_by_pep8(one_of)
1078
+ def oneOf(): ...
1079
+
1080
+ @replaced_by_pep8(dict_of)
1081
+ def dictOf(): ...
1082
+
1083
+ @replaced_by_pep8(original_text_for)
1084
+ def originalTextFor(): ...
1085
+
1086
+ @replaced_by_pep8(nested_expr)
1087
+ def nestedExpr(): ...
1088
+
1089
+ @replaced_by_pep8(make_html_tags)
1090
+ def makeHTMLTags(): ...
1091
+
1092
+ @replaced_by_pep8(make_xml_tags)
1093
+ def makeXMLTags(): ...
1094
+
1095
+ @replaced_by_pep8(replace_html_entity)
1096
+ def replaceHTMLEntity(): ...
1097
+
1098
+ @replaced_by_pep8(infix_notation)
1099
+ def infixNotation(): ...
1100
+ # fmt: on
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/results.py ADDED
@@ -0,0 +1,796 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # results.py
2
+ from collections.abc import (
3
+ MutableMapping,
4
+ Mapping,
5
+ MutableSequence,
6
+ Iterator,
7
+ Sequence,
8
+ Container,
9
+ )
10
+ import pprint
11
+ from typing import Tuple, Any, Dict, Set, List
12
+
13
+ str_type: Tuple[type, ...] = (str, bytes)
14
+ _generator_type = type((_ for _ in ()))
15
+
16
+
17
+ class _ParseResultsWithOffset:
18
+ tup: Tuple["ParseResults", int]
19
+ __slots__ = ["tup"]
20
+
21
+ def __init__(self, p1: "ParseResults", p2: int):
22
+ self.tup: Tuple[ParseResults, int] = (p1, p2)
23
+
24
+ def __getitem__(self, i):
25
+ return self.tup[i]
26
+
27
+ def __getstate__(self):
28
+ return self.tup
29
+
30
+ def __setstate__(self, *args):
31
+ self.tup = args[0]
32
+
33
+
34
+ class ParseResults:
35
+ """Structured parse results, to provide multiple means of access to
36
+ the parsed data:
37
+
38
+ - as a list (``len(results)``)
39
+ - by list index (``results[0], results[1]``, etc.)
40
+ - by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
41
+
42
+ Example::
43
+
44
+ integer = Word(nums)
45
+ date_str = (integer.set_results_name("year") + '/'
46
+ + integer.set_results_name("month") + '/'
47
+ + integer.set_results_name("day"))
48
+ # equivalent form:
49
+ # date_str = (integer("year") + '/'
50
+ # + integer("month") + '/'
51
+ # + integer("day"))
52
+
53
+ # parse_string returns a ParseResults object
54
+ result = date_str.parse_string("1999/12/31")
55
+
56
+ def test(s, fn=repr):
57
+ print(f"{s} -> {fn(eval(s))}")
58
+ test("list(result)")
59
+ test("result[0]")
60
+ test("result['month']")
61
+ test("result.day")
62
+ test("'month' in result")
63
+ test("'minutes' in result")
64
+ test("result.dump()", str)
65
+
66
+ prints::
67
+
68
+ list(result) -> ['1999', '/', '12', '/', '31']
69
+ result[0] -> '1999'
70
+ result['month'] -> '12'
71
+ result.day -> '31'
72
+ 'month' in result -> True
73
+ 'minutes' in result -> False
74
+ result.dump() -> ['1999', '/', '12', '/', '31']
75
+ - day: '31'
76
+ - month: '12'
77
+ - year: '1999'
78
+ """
79
+
80
+ _null_values: Tuple[Any, ...] = (None, [], ())
81
+
82
+ _name: str
83
+ _parent: "ParseResults"
84
+ _all_names: Set[str]
85
+ _modal: bool
86
+ _toklist: List[Any]
87
+ _tokdict: Dict[str, Any]
88
+
89
+ __slots__ = (
90
+ "_name",
91
+ "_parent",
92
+ "_all_names",
93
+ "_modal",
94
+ "_toklist",
95
+ "_tokdict",
96
+ )
97
+
98
+ class List(list):
99
+ """
100
+ Simple wrapper class to distinguish parsed list results that should be preserved
101
+ as actual Python lists, instead of being converted to :class:`ParseResults`::
102
+
103
+ LBRACK, RBRACK = map(pp.Suppress, "[]")
104
+ element = pp.Forward()
105
+ item = ppc.integer
106
+ element_list = LBRACK + pp.DelimitedList(element) + RBRACK
107
+
108
+ # add parse actions to convert from ParseResults to actual Python collection types
109
+ def as_python_list(t):
110
+ return pp.ParseResults.List(t.as_list())
111
+ element_list.add_parse_action(as_python_list)
112
+
113
+ element <<= item | element_list
114
+
115
+ element.run_tests('''
116
+ 100
117
+ [2,3,4]
118
+ [[2, 1],3,4]
119
+ [(2, 1),3,4]
120
+ (2,3,4)
121
+ ''', post_parse=lambda s, r: (r[0], type(r[0])))
122
+
123
+ prints::
124
+
125
+ 100
126
+ (100, <class 'int'>)
127
+
128
+ [2,3,4]
129
+ ([2, 3, 4], <class 'list'>)
130
+
131
+ [[2, 1],3,4]
132
+ ([[2, 1], 3, 4], <class 'list'>)
133
+
134
+ (Used internally by :class:`Group` when `aslist=True`.)
135
+ """
136
+
137
+ def __new__(cls, contained=None):
138
+ if contained is None:
139
+ contained = []
140
+
141
+ if not isinstance(contained, list):
142
+ raise TypeError(
143
+ f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
144
+ )
145
+
146
+ return list.__new__(cls)
147
+
148
+ def __new__(cls, toklist=None, name=None, **kwargs):
149
+ if isinstance(toklist, ParseResults):
150
+ return toklist
151
+ self = object.__new__(cls)
152
+ self._name = None
153
+ self._parent = None
154
+ self._all_names = set()
155
+
156
+ if toklist is None:
157
+ self._toklist = []
158
+ elif isinstance(toklist, (list, _generator_type)):
159
+ self._toklist = (
160
+ [toklist[:]]
161
+ if isinstance(toklist, ParseResults.List)
162
+ else list(toklist)
163
+ )
164
+ else:
165
+ self._toklist = [toklist]
166
+ self._tokdict = dict()
167
+ return self
168
+
169
+ # Performance tuning: we construct a *lot* of these, so keep this
170
+ # constructor as small and fast as possible
171
+ def __init__(
172
+ self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
173
+ ):
174
+ self._tokdict: Dict[str, _ParseResultsWithOffset]
175
+ self._modal = modal
176
+ if name is not None and name != "":
177
+ if isinstance(name, int):
178
+ name = str(name)
179
+ if not modal:
180
+ self._all_names = {name}
181
+ self._name = name
182
+ if toklist not in self._null_values:
183
+ if isinstance(toklist, (str_type, type)):
184
+ toklist = [toklist]
185
+ if asList:
186
+ if isinstance(toklist, ParseResults):
187
+ self[name] = _ParseResultsWithOffset(
188
+ ParseResults(toklist._toklist), 0
189
+ )
190
+ else:
191
+ self[name] = _ParseResultsWithOffset(
192
+ ParseResults(toklist[0]), 0
193
+ )
194
+ self[name]._name = name
195
+ else:
196
+ try:
197
+ self[name] = toklist[0]
198
+ except (KeyError, TypeError, IndexError):
199
+ if toklist is not self:
200
+ self[name] = toklist
201
+ else:
202
+ self._name = name
203
+
204
+ def __getitem__(self, i):
205
+ if isinstance(i, (int, slice)):
206
+ return self._toklist[i]
207
+ else:
208
+ if i not in self._all_names:
209
+ return self._tokdict[i][-1][0]
210
+ else:
211
+ return ParseResults([v[0] for v in self._tokdict[i]])
212
+
213
+ def __setitem__(self, k, v, isinstance=isinstance):
214
+ if isinstance(v, _ParseResultsWithOffset):
215
+ self._tokdict[k] = self._tokdict.get(k, list()) + [v]
216
+ sub = v[0]
217
+ elif isinstance(k, (int, slice)):
218
+ self._toklist[k] = v
219
+ sub = v
220
+ else:
221
+ self._tokdict[k] = self._tokdict.get(k, list()) + [
222
+ _ParseResultsWithOffset(v, 0)
223
+ ]
224
+ sub = v
225
+ if isinstance(sub, ParseResults):
226
+ sub._parent = self
227
+
228
+ def __delitem__(self, i):
229
+ if isinstance(i, (int, slice)):
230
+ mylen = len(self._toklist)
231
+ del self._toklist[i]
232
+
233
+ # convert int to slice
234
+ if isinstance(i, int):
235
+ if i < 0:
236
+ i += mylen
237
+ i = slice(i, i + 1)
238
+ # get removed indices
239
+ removed = list(range(*i.indices(mylen)))
240
+ removed.reverse()
241
+ # fixup indices in token dictionary
242
+ for name, occurrences in self._tokdict.items():
243
+ for j in removed:
244
+ for k, (value, position) in enumerate(occurrences):
245
+ occurrences[k] = _ParseResultsWithOffset(
246
+ value, position - (position > j)
247
+ )
248
+ else:
249
+ del self._tokdict[i]
250
+
251
+ def __contains__(self, k) -> bool:
252
+ return k in self._tokdict
253
+
254
+ def __len__(self) -> int:
255
+ return len(self._toklist)
256
+
257
+ def __bool__(self) -> bool:
258
+ return not not (self._toklist or self._tokdict)
259
+
260
+ def __iter__(self) -> Iterator:
261
+ return iter(self._toklist)
262
+
263
+ def __reversed__(self) -> Iterator:
264
+ return iter(self._toklist[::-1])
265
+
266
+ def keys(self):
267
+ return iter(self._tokdict)
268
+
269
+ def values(self):
270
+ return (self[k] for k in self.keys())
271
+
272
+ def items(self):
273
+ return ((k, self[k]) for k in self.keys())
274
+
275
+ def haskeys(self) -> bool:
276
+ """
277
+ Since ``keys()`` returns an iterator, this method is helpful in bypassing
278
+ code that looks for the existence of any defined results names."""
279
+ return not not self._tokdict
280
+
281
+ def pop(self, *args, **kwargs):
282
+ """
283
+ Removes and returns item at specified index (default= ``last``).
284
+ Supports both ``list`` and ``dict`` semantics for ``pop()``. If
285
+ passed no argument or an integer argument, it will use ``list``
286
+ semantics and pop tokens from the list of parsed tokens. If passed
287
+ a non-integer argument (most likely a string), it will use ``dict``
288
+ semantics and pop the corresponding value from any defined results
289
+ names. A second default return value argument is supported, just as in
290
+ ``dict.pop()``.
291
+
292
+ Example::
293
+
294
+ numlist = Word(nums)[...]
295
+ print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
296
+
297
+ def remove_first(tokens):
298
+ tokens.pop(0)
299
+ numlist.add_parse_action(remove_first)
300
+ print(numlist.parse_string("0 123 321")) # -> ['123', '321']
301
+
302
+ label = Word(alphas)
303
+ patt = label("LABEL") + Word(nums)[1, ...]
304
+ print(patt.parse_string("AAB 123 321").dump())
305
+
306
+ # Use pop() in a parse action to remove named result (note that corresponding value is not
307
+ # removed from list form of results)
308
+ def remove_LABEL(tokens):
309
+ tokens.pop("LABEL")
310
+ return tokens
311
+ patt.add_parse_action(remove_LABEL)
312
+ print(patt.parse_string("AAB 123 321").dump())
313
+
314
+ prints::
315
+
316
+ ['AAB', '123', '321']
317
+ - LABEL: 'AAB'
318
+
319
+ ['AAB', '123', '321']
320
+ """
321
+ if not args:
322
+ args = [-1]
323
+ for k, v in kwargs.items():
324
+ if k == "default":
325
+ args = (args[0], v)
326
+ else:
327
+ raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
328
+ if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
329
+ index = args[0]
330
+ ret = self[index]
331
+ del self[index]
332
+ return ret
333
+ else:
334
+ defaultvalue = args[1]
335
+ return defaultvalue
336
+
337
+ def get(self, key, default_value=None):
338
+ """
339
+ Returns named result matching the given key, or if there is no
340
+ such name, then returns the given ``default_value`` or ``None`` if no
341
+ ``default_value`` is specified.
342
+
343
+ Similar to ``dict.get()``.
344
+
345
+ Example::
346
+
347
+ integer = Word(nums)
348
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
349
+
350
+ result = date_str.parse_string("1999/12/31")
351
+ print(result.get("year")) # -> '1999'
352
+ print(result.get("hour", "not specified")) # -> 'not specified'
353
+ print(result.get("hour")) # -> None
354
+ """
355
+ if key in self:
356
+ return self[key]
357
+ else:
358
+ return default_value
359
+
360
+ def insert(self, index, ins_string):
361
+ """
362
+ Inserts new element at location index in the list of parsed tokens.
363
+
364
+ Similar to ``list.insert()``.
365
+
366
+ Example::
367
+
368
+ numlist = Word(nums)[...]
369
+ print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
370
+
371
+ # use a parse action to insert the parse location in the front of the parsed results
372
+ def insert_locn(locn, tokens):
373
+ tokens.insert(0, locn)
374
+ numlist.add_parse_action(insert_locn)
375
+ print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
376
+ """
377
+ self._toklist.insert(index, ins_string)
378
+ # fixup indices in token dictionary
379
+ for name, occurrences in self._tokdict.items():
380
+ for k, (value, position) in enumerate(occurrences):
381
+ occurrences[k] = _ParseResultsWithOffset(
382
+ value, position + (position > index)
383
+ )
384
+
385
+ def append(self, item):
386
+ """
387
+ Add single element to end of ``ParseResults`` list of elements.
388
+
389
+ Example::
390
+
391
+ numlist = Word(nums)[...]
392
+ print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
393
+
394
+ # use a parse action to compute the sum of the parsed integers, and add it to the end
395
+ def append_sum(tokens):
396
+ tokens.append(sum(map(int, tokens)))
397
+ numlist.add_parse_action(append_sum)
398
+ print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
399
+ """
400
+ self._toklist.append(item)
401
+
402
+ def extend(self, itemseq):
403
+ """
404
+ Add sequence of elements to end of ``ParseResults`` list of elements.
405
+
406
+ Example::
407
+
408
+ patt = Word(alphas)[1, ...]
409
+
410
+ # use a parse action to append the reverse of the matched strings, to make a palindrome
411
+ def make_palindrome(tokens):
412
+ tokens.extend(reversed([t[::-1] for t in tokens]))
413
+ return ''.join(tokens)
414
+ patt.add_parse_action(make_palindrome)
415
+ print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
416
+ """
417
+ if isinstance(itemseq, ParseResults):
418
+ self.__iadd__(itemseq)
419
+ else:
420
+ self._toklist.extend(itemseq)
421
+
422
+ def clear(self):
423
+ """
424
+ Clear all elements and results names.
425
+ """
426
+ del self._toklist[:]
427
+ self._tokdict.clear()
428
+
429
+ def __getattr__(self, name):
430
+ try:
431
+ return self[name]
432
+ except KeyError:
433
+ if name.startswith("__"):
434
+ raise AttributeError(name)
435
+ return ""
436
+
437
+ def __add__(self, other: "ParseResults") -> "ParseResults":
438
+ ret = self.copy()
439
+ ret += other
440
+ return ret
441
+
442
+ def __iadd__(self, other: "ParseResults") -> "ParseResults":
443
+ if not other:
444
+ return self
445
+
446
+ if other._tokdict:
447
+ offset = len(self._toklist)
448
+ addoffset = lambda a: offset if a < 0 else a + offset
449
+ otheritems = other._tokdict.items()
450
+ otherdictitems = [
451
+ (k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
452
+ for k, vlist in otheritems
453
+ for v in vlist
454
+ ]
455
+ for k, v in otherdictitems:
456
+ self[k] = v
457
+ if isinstance(v[0], ParseResults):
458
+ v[0]._parent = self
459
+
460
+ self._toklist += other._toklist
461
+ self._all_names |= other._all_names
462
+ return self
463
+
464
+ def __radd__(self, other) -> "ParseResults":
465
+ if isinstance(other, int) and other == 0:
466
+ # useful for merging many ParseResults using sum() builtin
467
+ return self.copy()
468
+ else:
469
+ # this may raise a TypeError - so be it
470
+ return other + self
471
+
472
+ def __repr__(self) -> str:
473
+ return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
474
+
475
+ def __str__(self) -> str:
476
+ return (
477
+ "["
478
+ + ", ".join(
479
+ [
480
+ str(i) if isinstance(i, ParseResults) else repr(i)
481
+ for i in self._toklist
482
+ ]
483
+ )
484
+ + "]"
485
+ )
486
+
487
+ def _asStringList(self, sep=""):
488
+ out = []
489
+ for item in self._toklist:
490
+ if out and sep:
491
+ out.append(sep)
492
+ if isinstance(item, ParseResults):
493
+ out += item._asStringList()
494
+ else:
495
+ out.append(str(item))
496
+ return out
497
+
498
+ def as_list(self) -> list:
499
+ """
500
+ Returns the parse results as a nested list of matching tokens, all converted to strings.
501
+
502
+ Example::
503
+
504
+ patt = Word(alphas)[1, ...]
505
+ result = patt.parse_string("sldkj lsdkj sldkj")
506
+ # even though the result prints in string-like form, it is actually a pyparsing ParseResults
507
+ print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
508
+
509
+ # Use as_list() to create an actual list
510
+ result_list = result.as_list()
511
+ print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
512
+ """
513
+ return [
514
+ res.as_list() if isinstance(res, ParseResults) else res
515
+ for res in self._toklist
516
+ ]
517
+
518
+ def as_dict(self) -> dict:
519
+ """
520
+ Returns the named parse results as a nested dictionary.
521
+
522
+ Example::
523
+
524
+ integer = Word(nums)
525
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
526
+
527
+ result = date_str.parse_string('12/31/1999')
528
+ print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
529
+
530
+ result_dict = result.as_dict()
531
+ print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
532
+
533
+ # even though a ParseResults supports dict-like access, sometime you just need to have a dict
534
+ import json
535
+ print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
536
+ print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
537
+ """
538
+
539
+ def to_item(obj):
540
+ if isinstance(obj, ParseResults):
541
+ return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
542
+ else:
543
+ return obj
544
+
545
+ return dict((k, to_item(v)) for k, v in self.items())
546
+
547
+ def copy(self) -> "ParseResults":
548
+ """
549
+ Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
550
+ items contained within the source are shared with the copy. Use
551
+ :class:`ParseResults.deepcopy()` to create a copy with its own separate
552
+ content values.
553
+ """
554
+ ret = ParseResults(self._toklist)
555
+ ret._tokdict = self._tokdict.copy()
556
+ ret._parent = self._parent
557
+ ret._all_names |= self._all_names
558
+ ret._name = self._name
559
+ return ret
560
+
561
+ def deepcopy(self) -> "ParseResults":
562
+ """
563
+ Returns a new deep copy of a :class:`ParseResults` object.
564
+ """
565
+ ret = self.copy()
566
+ # replace values with copies if they are of known mutable types
567
+ for i, obj in enumerate(self._toklist):
568
+ if isinstance(obj, ParseResults):
569
+ self._toklist[i] = obj.deepcopy()
570
+ elif isinstance(obj, (str, bytes)):
571
+ pass
572
+ elif isinstance(obj, MutableMapping):
573
+ self._toklist[i] = dest = type(obj)()
574
+ for k, v in obj.items():
575
+ dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
576
+ elif isinstance(obj, Container):
577
+ self._toklist[i] = type(obj)(
578
+ v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
579
+ )
580
+ return ret
581
+
582
+ def get_name(self):
583
+ r"""
584
+ Returns the results name for this token expression. Useful when several
585
+ different expressions might match at a particular location.
586
+
587
+ Example::
588
+
589
+ integer = Word(nums)
590
+ ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
591
+ house_number_expr = Suppress('#') + Word(nums, alphanums)
592
+ user_data = (Group(house_number_expr)("house_number")
593
+ | Group(ssn_expr)("ssn")
594
+ | Group(integer)("age"))
595
+ user_info = user_data[1, ...]
596
+
597
+ result = user_info.parse_string("22 111-22-3333 #221B")
598
+ for item in result:
599
+ print(item.get_name(), ':', item[0])
600
+
601
+ prints::
602
+
603
+ age : 22
604
+ ssn : 111-22-3333
605
+ house_number : 221B
606
+ """
607
+ if self._name:
608
+ return self._name
609
+ elif self._parent:
610
+ par: "ParseResults" = self._parent
611
+ parent_tokdict_items = par._tokdict.items()
612
+ return next(
613
+ (
614
+ k
615
+ for k, vlist in parent_tokdict_items
616
+ for v, loc in vlist
617
+ if v is self
618
+ ),
619
+ None,
620
+ )
621
+ elif (
622
+ len(self) == 1
623
+ and len(self._tokdict) == 1
624
+ and next(iter(self._tokdict.values()))[0][1] in (0, -1)
625
+ ):
626
+ return next(iter(self._tokdict.keys()))
627
+ else:
628
+ return None
629
+
630
+ def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
631
+ """
632
+ Diagnostic method for listing out the contents of
633
+ a :class:`ParseResults`. Accepts an optional ``indent`` argument so
634
+ that this string can be embedded in a nested display of other data.
635
+
636
+ Example::
637
+
638
+ integer = Word(nums)
639
+ date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
640
+
641
+ result = date_str.parse_string('1999/12/31')
642
+ print(result.dump())
643
+
644
+ prints::
645
+
646
+ ['1999', '/', '12', '/', '31']
647
+ - day: '31'
648
+ - month: '12'
649
+ - year: '1999'
650
+ """
651
+ out = []
652
+ NL = "\n"
653
+ out.append(indent + str(self.as_list()) if include_list else "")
654
+
655
+ if full:
656
+ if self.haskeys():
657
+ items = sorted((str(k), v) for k, v in self.items())
658
+ for k, v in items:
659
+ if out:
660
+ out.append(NL)
661
+ out.append(f"{indent}{(' ' * _depth)}- {k}: ")
662
+ if isinstance(v, ParseResults):
663
+ if v:
664
+ out.append(
665
+ v.dump(
666
+ indent=indent,
667
+ full=full,
668
+ include_list=include_list,
669
+ _depth=_depth + 1,
670
+ )
671
+ )
672
+ else:
673
+ out.append(str(v))
674
+ else:
675
+ out.append(repr(v))
676
+ if any(isinstance(vv, ParseResults) for vv in self):
677
+ v = self
678
+ for i, vv in enumerate(v):
679
+ if isinstance(vv, ParseResults):
680
+ out.append(
681
+ "\n{}{}[{}]:\n{}{}{}".format(
682
+ indent,
683
+ (" " * (_depth)),
684
+ i,
685
+ indent,
686
+ (" " * (_depth + 1)),
687
+ vv.dump(
688
+ indent=indent,
689
+ full=full,
690
+ include_list=include_list,
691
+ _depth=_depth + 1,
692
+ ),
693
+ )
694
+ )
695
+ else:
696
+ out.append(
697
+ "\n%s%s[%d]:\n%s%s%s"
698
+ % (
699
+ indent,
700
+ (" " * (_depth)),
701
+ i,
702
+ indent,
703
+ (" " * (_depth + 1)),
704
+ str(vv),
705
+ )
706
+ )
707
+
708
+ return "".join(out)
709
+
710
+ def pprint(self, *args, **kwargs):
711
+ """
712
+ Pretty-printer for parsed results as a list, using the
713
+ `pprint <https://docs.python.org/3/library/pprint.html>`_ module.
714
+ Accepts additional positional or keyword args as defined for
715
+ `pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
716
+
717
+ Example::
718
+
719
+ ident = Word(alphas, alphanums)
720
+ num = Word(nums)
721
+ func = Forward()
722
+ term = ident | num | Group('(' + func + ')')
723
+ func <<= ident + Group(Optional(DelimitedList(term)))
724
+ result = func.parse_string("fna a,b,(fnb c,d,200),100")
725
+ result.pprint(width=40)
726
+
727
+ prints::
728
+
729
+ ['fna',
730
+ ['a',
731
+ 'b',
732
+ ['(', 'fnb', ['c', 'd', '200'], ')'],
733
+ '100']]
734
+ """
735
+ pprint.pprint(self.as_list(), *args, **kwargs)
736
+
737
+ # add support for pickle protocol
738
+ def __getstate__(self):
739
+ return (
740
+ self._toklist,
741
+ (
742
+ self._tokdict.copy(),
743
+ None,
744
+ self._all_names,
745
+ self._name,
746
+ ),
747
+ )
748
+
749
+ def __setstate__(self, state):
750
+ self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
751
+ self._all_names = set(inAccumNames)
752
+ self._parent = None
753
+
754
+ def __getnewargs__(self):
755
+ return self._toklist, self._name
756
+
757
+ def __dir__(self):
758
+ return dir(type(self)) + list(self.keys())
759
+
760
+ @classmethod
761
+ def from_dict(cls, other, name=None) -> "ParseResults":
762
+ """
763
+ Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
764
+ name-value relations as results names. If an optional ``name`` argument is
765
+ given, a nested ``ParseResults`` will be returned.
766
+ """
767
+
768
+ def is_iterable(obj):
769
+ try:
770
+ iter(obj)
771
+ except Exception:
772
+ return False
773
+ # str's are iterable, but in pyparsing, we don't want to iterate over them
774
+ else:
775
+ return not isinstance(obj, str_type)
776
+
777
+ ret = cls([])
778
+ for k, v in other.items():
779
+ if isinstance(v, Mapping):
780
+ ret += cls.from_dict(v, name=k)
781
+ else:
782
+ ret += cls([v], name=k, asList=is_iterable(v))
783
+ if name is not None:
784
+ ret = cls([ret], name=name)
785
+ return ret
786
+
787
+ asList = as_list
788
+ """Deprecated - use :class:`as_list`"""
789
+ asDict = as_dict
790
+ """Deprecated - use :class:`as_dict`"""
791
+ getName = get_name
792
+ """Deprecated - use :class:`get_name`"""
793
+
794
+
795
+ MutableMapping.register(ParseResults)
796
+ MutableSequence.register(ParseResults)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/testing.py ADDED
@@ -0,0 +1,331 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # testing.py
2
+
3
+ from contextlib import contextmanager
4
+ import typing
5
+
6
+ from .core import (
7
+ ParserElement,
8
+ ParseException,
9
+ Keyword,
10
+ __diag__,
11
+ __compat__,
12
+ )
13
+
14
+
15
+ class pyparsing_test:
16
+ """
17
+ namespace class for classes useful in writing unit tests
18
+ """
19
+
20
+ class reset_pyparsing_context:
21
+ """
22
+ Context manager to be used when writing unit tests that modify pyparsing config values:
23
+ - packrat parsing
24
+ - bounded recursion parsing
25
+ - default whitespace characters.
26
+ - default keyword characters
27
+ - literal string auto-conversion class
28
+ - __diag__ settings
29
+
30
+ Example::
31
+
32
+ with reset_pyparsing_context():
33
+ # test that literals used to construct a grammar are automatically suppressed
34
+ ParserElement.inlineLiteralsUsing(Suppress)
35
+
36
+ term = Word(alphas) | Word(nums)
37
+ group = Group('(' + term[...] + ')')
38
+
39
+ # assert that the '()' characters are not included in the parsed tokens
40
+ self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def'])
41
+
42
+ # after exiting context manager, literals are converted to Literal expressions again
43
+ """
44
+
45
+ def __init__(self):
46
+ self._save_context = {}
47
+
48
+ def save(self):
49
+ self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
50
+ self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
51
+
52
+ self._save_context[
53
+ "literal_string_class"
54
+ ] = ParserElement._literalStringClass
55
+
56
+ self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
57
+
58
+ self._save_context["packrat_enabled"] = ParserElement._packratEnabled
59
+ if ParserElement._packratEnabled:
60
+ self._save_context[
61
+ "packrat_cache_size"
62
+ ] = ParserElement.packrat_cache.size
63
+ else:
64
+ self._save_context["packrat_cache_size"] = None
65
+ self._save_context["packrat_parse"] = ParserElement._parse
66
+ self._save_context[
67
+ "recursion_enabled"
68
+ ] = ParserElement._left_recursion_enabled
69
+
70
+ self._save_context["__diag__"] = {
71
+ name: getattr(__diag__, name) for name in __diag__._all_names
72
+ }
73
+
74
+ self._save_context["__compat__"] = {
75
+ "collect_all_And_tokens": __compat__.collect_all_And_tokens
76
+ }
77
+
78
+ return self
79
+
80
+ def restore(self):
81
+ # reset pyparsing global state
82
+ if (
83
+ ParserElement.DEFAULT_WHITE_CHARS
84
+ != self._save_context["default_whitespace"]
85
+ ):
86
+ ParserElement.set_default_whitespace_chars(
87
+ self._save_context["default_whitespace"]
88
+ )
89
+
90
+ ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
91
+
92
+ Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
93
+ ParserElement.inlineLiteralsUsing(
94
+ self._save_context["literal_string_class"]
95
+ )
96
+
97
+ for name, value in self._save_context["__diag__"].items():
98
+ (__diag__.enable if value else __diag__.disable)(name)
99
+
100
+ ParserElement._packratEnabled = False
101
+ if self._save_context["packrat_enabled"]:
102
+ ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
103
+ else:
104
+ ParserElement._parse = self._save_context["packrat_parse"]
105
+ ParserElement._left_recursion_enabled = self._save_context[
106
+ "recursion_enabled"
107
+ ]
108
+
109
+ __compat__.collect_all_And_tokens = self._save_context["__compat__"]
110
+
111
+ return self
112
+
113
+ def copy(self):
114
+ ret = type(self)()
115
+ ret._save_context.update(self._save_context)
116
+ return ret
117
+
118
+ def __enter__(self):
119
+ return self.save()
120
+
121
+ def __exit__(self, *args):
122
+ self.restore()
123
+
124
+ class TestParseResultsAsserts:
125
+ """
126
+ A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
127
+ """
128
+
129
+ def assertParseResultsEquals(
130
+ self, result, expected_list=None, expected_dict=None, msg=None
131
+ ):
132
+ """
133
+ Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
134
+ and compare any defined results names with an optional ``expected_dict``.
135
+ """
136
+ if expected_list is not None:
137
+ self.assertEqual(expected_list, result.as_list(), msg=msg)
138
+ if expected_dict is not None:
139
+ self.assertEqual(expected_dict, result.as_dict(), msg=msg)
140
+
141
+ def assertParseAndCheckList(
142
+ self, expr, test_string, expected_list, msg=None, verbose=True
143
+ ):
144
+ """
145
+ Convenience wrapper assert to test a parser element and input string, and assert that
146
+ the resulting ``ParseResults.asList()`` is equal to the ``expected_list``.
147
+ """
148
+ result = expr.parse_string(test_string, parse_all=True)
149
+ if verbose:
150
+ print(result.dump())
151
+ else:
152
+ print(result.as_list())
153
+ self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
154
+
155
+ def assertParseAndCheckDict(
156
+ self, expr, test_string, expected_dict, msg=None, verbose=True
157
+ ):
158
+ """
159
+ Convenience wrapper assert to test a parser element and input string, and assert that
160
+ the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``.
161
+ """
162
+ result = expr.parse_string(test_string, parseAll=True)
163
+ if verbose:
164
+ print(result.dump())
165
+ else:
166
+ print(result.as_list())
167
+ self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
168
+
169
+ def assertRunTestResults(
170
+ self, run_tests_report, expected_parse_results=None, msg=None
171
+ ):
172
+ """
173
+ Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of
174
+ list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped
175
+ with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``.
176
+ Finally, asserts that the overall ``runTests()`` success value is ``True``.
177
+
178
+ :param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
179
+ :param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
180
+ """
181
+ run_test_success, run_test_results = run_tests_report
182
+
183
+ if expected_parse_results is not None:
184
+ merged = [
185
+ (*rpt, expected)
186
+ for rpt, expected in zip(run_test_results, expected_parse_results)
187
+ ]
188
+ for test_string, result, expected in merged:
189
+ # expected should be a tuple containing a list and/or a dict or an exception,
190
+ # and optional failure message string
191
+ # an empty tuple will skip any result validation
192
+ fail_msg = next(
193
+ (exp for exp in expected if isinstance(exp, str)), None
194
+ )
195
+ expected_exception = next(
196
+ (
197
+ exp
198
+ for exp in expected
199
+ if isinstance(exp, type) and issubclass(exp, Exception)
200
+ ),
201
+ None,
202
+ )
203
+ if expected_exception is not None:
204
+ with self.assertRaises(
205
+ expected_exception=expected_exception, msg=fail_msg or msg
206
+ ):
207
+ if isinstance(result, Exception):
208
+ raise result
209
+ else:
210
+ expected_list = next(
211
+ (exp for exp in expected if isinstance(exp, list)), None
212
+ )
213
+ expected_dict = next(
214
+ (exp for exp in expected if isinstance(exp, dict)), None
215
+ )
216
+ if (expected_list, expected_dict) != (None, None):
217
+ self.assertParseResultsEquals(
218
+ result,
219
+ expected_list=expected_list,
220
+ expected_dict=expected_dict,
221
+ msg=fail_msg or msg,
222
+ )
223
+ else:
224
+ # warning here maybe?
225
+ print(f"no validation for {test_string!r}")
226
+
227
+ # do this last, in case some specific test results can be reported instead
228
+ self.assertTrue(
229
+ run_test_success, msg=msg if msg is not None else "failed runTests"
230
+ )
231
+
232
+ @contextmanager
233
+ def assertRaisesParseException(self, exc_type=ParseException, msg=None):
234
+ with self.assertRaises(exc_type, msg=msg):
235
+ yield
236
+
237
+ @staticmethod
238
+ def with_line_numbers(
239
+ s: str,
240
+ start_line: typing.Optional[int] = None,
241
+ end_line: typing.Optional[int] = None,
242
+ expand_tabs: bool = True,
243
+ eol_mark: str = "|",
244
+ mark_spaces: typing.Optional[str] = None,
245
+ mark_control: typing.Optional[str] = None,
246
+ ) -> str:
247
+ """
248
+ Helpful method for debugging a parser - prints a string with line and column numbers.
249
+ (Line and column numbers are 1-based.)
250
+
251
+ :param s: tuple(bool, str - string to be printed with line and column numbers
252
+ :param start_line: int - (optional) starting line number in s to print (default=1)
253
+ :param end_line: int - (optional) ending line number in s to print (default=len(s))
254
+ :param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default
255
+ :param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|")
256
+ :param mark_spaces: str - (optional) special character to display in place of spaces
257
+ :param mark_control: str - (optional) convert non-printing control characters to a placeholding
258
+ character; valid values:
259
+ - "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊"
260
+ - any single character string - replace control characters with given string
261
+ - None (default) - string is displayed as-is
262
+
263
+ :return: str - input string with leading line numbers and column number headers
264
+ """
265
+ if expand_tabs:
266
+ s = s.expandtabs()
267
+ if mark_control is not None:
268
+ mark_control = typing.cast(str, mark_control)
269
+ if mark_control == "unicode":
270
+ transtable_map = {
271
+ c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))
272
+ }
273
+ transtable_map[127] = 0x2421
274
+ tbl = str.maketrans(transtable_map)
275
+ eol_mark = ""
276
+ else:
277
+ ord_mark_control = ord(mark_control)
278
+ tbl = str.maketrans(
279
+ {c: ord_mark_control for c in list(range(0, 32)) + [127]}
280
+ )
281
+ s = s.translate(tbl)
282
+ if mark_spaces is not None and mark_spaces != " ":
283
+ if mark_spaces == "unicode":
284
+ tbl = str.maketrans({9: 0x2409, 32: 0x2423})
285
+ s = s.translate(tbl)
286
+ else:
287
+ s = s.replace(" ", mark_spaces)
288
+ if start_line is None:
289
+ start_line = 1
290
+ if end_line is None:
291
+ end_line = len(s)
292
+ end_line = min(end_line, len(s))
293
+ start_line = min(max(1, start_line), end_line)
294
+
295
+ if mark_control != "unicode":
296
+ s_lines = s.splitlines()[start_line - 1 : end_line]
297
+ else:
298
+ s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]]
299
+ if not s_lines:
300
+ return ""
301
+
302
+ lineno_width = len(str(end_line))
303
+ max_line_len = max(len(line) for line in s_lines)
304
+ lead = " " * (lineno_width + 1)
305
+ if max_line_len >= 99:
306
+ header0 = (
307
+ lead
308
+ + "".join(
309
+ f"{' ' * 99}{(i + 1) % 100}"
310
+ for i in range(max(max_line_len // 100, 1))
311
+ )
312
+ + "\n"
313
+ )
314
+ else:
315
+ header0 = ""
316
+ header1 = (
317
+ header0
318
+ + lead
319
+ + "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10)))
320
+ + "\n"
321
+ )
322
+ header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
323
+ return (
324
+ header1
325
+ + header2
326
+ + "\n".join(
327
+ f"{i:{lineno_width}d}:{line}{eol_mark}"
328
+ for i, line in enumerate(s_lines, start=start_line)
329
+ )
330
+ + "\n"
331
+ )
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/unicode.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # unicode.py
2
+
3
+ import sys
4
+ from itertools import filterfalse
5
+ from typing import List, Tuple, Union
6
+
7
+
8
+ class _lazyclassproperty:
9
+ def __init__(self, fn):
10
+ self.fn = fn
11
+ self.__doc__ = fn.__doc__
12
+ self.__name__ = fn.__name__
13
+
14
+ def __get__(self, obj, cls):
15
+ if cls is None:
16
+ cls = type(obj)
17
+ if not hasattr(cls, "_intern") or any(
18
+ cls._intern is getattr(superclass, "_intern", [])
19
+ for superclass in cls.__mro__[1:]
20
+ ):
21
+ cls._intern = {}
22
+ attrname = self.fn.__name__
23
+ if attrname not in cls._intern:
24
+ cls._intern[attrname] = self.fn(cls)
25
+ return cls._intern[attrname]
26
+
27
+
28
+ UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
29
+
30
+
31
+ class unicode_set:
32
+ """
33
+ A set of Unicode characters, for language-specific strings for
34
+ ``alphas``, ``nums``, ``alphanums``, and ``printables``.
35
+ A unicode_set is defined by a list of ranges in the Unicode character
36
+ set, in a class attribute ``_ranges``. Ranges can be specified using
37
+ 2-tuples or a 1-tuple, such as::
38
+
39
+ _ranges = [
40
+ (0x0020, 0x007e),
41
+ (0x00a0, 0x00ff),
42
+ (0x0100,),
43
+ ]
44
+
45
+ Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
46
+
47
+ A unicode set can also be defined using multiple inheritance of other unicode sets::
48
+
49
+ class CJK(Chinese, Japanese, Korean):
50
+ pass
51
+ """
52
+
53
+ _ranges: UnicodeRangeList = []
54
+
55
+ @_lazyclassproperty
56
+ def _chars_for_ranges(cls):
57
+ ret = []
58
+ for cc in cls.__mro__:
59
+ if cc is unicode_set:
60
+ break
61
+ for rr in getattr(cc, "_ranges", ()):
62
+ ret.extend(range(rr[0], rr[-1] + 1))
63
+ return [chr(c) for c in sorted(set(ret))]
64
+
65
+ @_lazyclassproperty
66
+ def printables(cls):
67
+ """all non-whitespace characters in this range"""
68
+ return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
69
+
70
+ @_lazyclassproperty
71
+ def alphas(cls):
72
+ """all alphabetic characters in this range"""
73
+ return "".join(filter(str.isalpha, cls._chars_for_ranges))
74
+
75
+ @_lazyclassproperty
76
+ def nums(cls):
77
+ """all numeric digit characters in this range"""
78
+ return "".join(filter(str.isdigit, cls._chars_for_ranges))
79
+
80
+ @_lazyclassproperty
81
+ def alphanums(cls):
82
+ """all alphanumeric characters in this range"""
83
+ return cls.alphas + cls.nums
84
+
85
+ @_lazyclassproperty
86
+ def identchars(cls):
87
+ """all characters in this range that are valid identifier characters, plus underscore '_'"""
88
+ return "".join(
89
+ sorted(
90
+ set(
91
+ "".join(filter(str.isidentifier, cls._chars_for_ranges))
92
+ + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
93
+ + "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
94
+ + "_"
95
+ )
96
+ )
97
+ )
98
+
99
+ @_lazyclassproperty
100
+ def identbodychars(cls):
101
+ """
102
+ all characters in this range that are valid identifier body characters,
103
+ plus the digits 0-9, and · (Unicode MIDDLE DOT)
104
+ """
105
+ return "".join(
106
+ sorted(
107
+ set(
108
+ cls.identchars
109
+ + "0123456789·"
110
+ + "".join(
111
+ [c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
112
+ )
113
+ )
114
+ )
115
+ )
116
+
117
+ @_lazyclassproperty
118
+ def identifier(cls):
119
+ """
120
+ a pyparsing Word expression for an identifier using this range's definitions for
121
+ identchars and identbodychars
122
+ """
123
+ from pip._vendor.pyparsing import Word
124
+
125
+ return Word(cls.identchars, cls.identbodychars)
126
+
127
+
128
+ class pyparsing_unicode(unicode_set):
129
+ """
130
+ A namespace class for defining common language unicode_sets.
131
+ """
132
+
133
+ # fmt: off
134
+
135
+ # define ranges in language character sets
136
+ _ranges: UnicodeRangeList = [
137
+ (0x0020, sys.maxunicode),
138
+ ]
139
+
140
+ class BasicMultilingualPlane(unicode_set):
141
+ """Unicode set for the Basic Multilingual Plane"""
142
+ _ranges: UnicodeRangeList = [
143
+ (0x0020, 0xFFFF),
144
+ ]
145
+
146
+ class Latin1(unicode_set):
147
+ """Unicode set for Latin-1 Unicode Character Range"""
148
+ _ranges: UnicodeRangeList = [
149
+ (0x0020, 0x007E),
150
+ (0x00A0, 0x00FF),
151
+ ]
152
+
153
+ class LatinA(unicode_set):
154
+ """Unicode set for Latin-A Unicode Character Range"""
155
+ _ranges: UnicodeRangeList = [
156
+ (0x0100, 0x017F),
157
+ ]
158
+
159
+ class LatinB(unicode_set):
160
+ """Unicode set for Latin-B Unicode Character Range"""
161
+ _ranges: UnicodeRangeList = [
162
+ (0x0180, 0x024F),
163
+ ]
164
+
165
+ class Greek(unicode_set):
166
+ """Unicode set for Greek Unicode Character Ranges"""
167
+ _ranges: UnicodeRangeList = [
168
+ (0x0342, 0x0345),
169
+ (0x0370, 0x0377),
170
+ (0x037A, 0x037F),
171
+ (0x0384, 0x038A),
172
+ (0x038C,),
173
+ (0x038E, 0x03A1),
174
+ (0x03A3, 0x03E1),
175
+ (0x03F0, 0x03FF),
176
+ (0x1D26, 0x1D2A),
177
+ (0x1D5E,),
178
+ (0x1D60,),
179
+ (0x1D66, 0x1D6A),
180
+ (0x1F00, 0x1F15),
181
+ (0x1F18, 0x1F1D),
182
+ (0x1F20, 0x1F45),
183
+ (0x1F48, 0x1F4D),
184
+ (0x1F50, 0x1F57),
185
+ (0x1F59,),
186
+ (0x1F5B,),
187
+ (0x1F5D,),
188
+ (0x1F5F, 0x1F7D),
189
+ (0x1F80, 0x1FB4),
190
+ (0x1FB6, 0x1FC4),
191
+ (0x1FC6, 0x1FD3),
192
+ (0x1FD6, 0x1FDB),
193
+ (0x1FDD, 0x1FEF),
194
+ (0x1FF2, 0x1FF4),
195
+ (0x1FF6, 0x1FFE),
196
+ (0x2129,),
197
+ (0x2719, 0x271A),
198
+ (0xAB65,),
199
+ (0x10140, 0x1018D),
200
+ (0x101A0,),
201
+ (0x1D200, 0x1D245),
202
+ (0x1F7A1, 0x1F7A7),
203
+ ]
204
+
205
+ class Cyrillic(unicode_set):
206
+ """Unicode set for Cyrillic Unicode Character Range"""
207
+ _ranges: UnicodeRangeList = [
208
+ (0x0400, 0x052F),
209
+ (0x1C80, 0x1C88),
210
+ (0x1D2B,),
211
+ (0x1D78,),
212
+ (0x2DE0, 0x2DFF),
213
+ (0xA640, 0xA672),
214
+ (0xA674, 0xA69F),
215
+ (0xFE2E, 0xFE2F),
216
+ ]
217
+
218
+ class Chinese(unicode_set):
219
+ """Unicode set for Chinese Unicode Character Range"""
220
+ _ranges: UnicodeRangeList = [
221
+ (0x2E80, 0x2E99),
222
+ (0x2E9B, 0x2EF3),
223
+ (0x31C0, 0x31E3),
224
+ (0x3400, 0x4DB5),
225
+ (0x4E00, 0x9FEF),
226
+ (0xA700, 0xA707),
227
+ (0xF900, 0xFA6D),
228
+ (0xFA70, 0xFAD9),
229
+ (0x16FE2, 0x16FE3),
230
+ (0x1F210, 0x1F212),
231
+ (0x1F214, 0x1F23B),
232
+ (0x1F240, 0x1F248),
233
+ (0x20000, 0x2A6D6),
234
+ (0x2A700, 0x2B734),
235
+ (0x2B740, 0x2B81D),
236
+ (0x2B820, 0x2CEA1),
237
+ (0x2CEB0, 0x2EBE0),
238
+ (0x2F800, 0x2FA1D),
239
+ ]
240
+
241
+ class Japanese(unicode_set):
242
+ """Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
243
+
244
+ class Kanji(unicode_set):
245
+ "Unicode set for Kanji Unicode Character Range"
246
+ _ranges: UnicodeRangeList = [
247
+ (0x4E00, 0x9FBF),
248
+ (0x3000, 0x303F),
249
+ ]
250
+
251
+ class Hiragana(unicode_set):
252
+ """Unicode set for Hiragana Unicode Character Range"""
253
+ _ranges: UnicodeRangeList = [
254
+ (0x3041, 0x3096),
255
+ (0x3099, 0x30A0),
256
+ (0x30FC,),
257
+ (0xFF70,),
258
+ (0x1B001,),
259
+ (0x1B150, 0x1B152),
260
+ (0x1F200,),
261
+ ]
262
+
263
+ class Katakana(unicode_set):
264
+ """Unicode set for Katakana Unicode Character Range"""
265
+ _ranges: UnicodeRangeList = [
266
+ (0x3099, 0x309C),
267
+ (0x30A0, 0x30FF),
268
+ (0x31F0, 0x31FF),
269
+ (0x32D0, 0x32FE),
270
+ (0xFF65, 0xFF9F),
271
+ (0x1B000,),
272
+ (0x1B164, 0x1B167),
273
+ (0x1F201, 0x1F202),
274
+ (0x1F213,),
275
+ ]
276
+
277
+ 漢字 = Kanji
278
+ カタカナ = Katakana
279
+ ひらがな = Hiragana
280
+
281
+ _ranges = (
282
+ Kanji._ranges
283
+ + Hiragana._ranges
284
+ + Katakana._ranges
285
+ )
286
+
287
+ class Hangul(unicode_set):
288
+ """Unicode set for Hangul (Korean) Unicode Character Range"""
289
+ _ranges: UnicodeRangeList = [
290
+ (0x1100, 0x11FF),
291
+ (0x302E, 0x302F),
292
+ (0x3131, 0x318E),
293
+ (0x3200, 0x321C),
294
+ (0x3260, 0x327B),
295
+ (0x327E,),
296
+ (0xA960, 0xA97C),
297
+ (0xAC00, 0xD7A3),
298
+ (0xD7B0, 0xD7C6),
299
+ (0xD7CB, 0xD7FB),
300
+ (0xFFA0, 0xFFBE),
301
+ (0xFFC2, 0xFFC7),
302
+ (0xFFCA, 0xFFCF),
303
+ (0xFFD2, 0xFFD7),
304
+ (0xFFDA, 0xFFDC),
305
+ ]
306
+
307
+ Korean = Hangul
308
+
309
+ class CJK(Chinese, Japanese, Hangul):
310
+ """Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
311
+
312
+ class Thai(unicode_set):
313
+ """Unicode set for Thai Unicode Character Range"""
314
+ _ranges: UnicodeRangeList = [
315
+ (0x0E01, 0x0E3A),
316
+ (0x0E3F, 0x0E5B)
317
+ ]
318
+
319
+ class Arabic(unicode_set):
320
+ """Unicode set for Arabic Unicode Character Range"""
321
+ _ranges: UnicodeRangeList = [
322
+ (0x0600, 0x061B),
323
+ (0x061E, 0x06FF),
324
+ (0x0700, 0x077F),
325
+ ]
326
+
327
+ class Hebrew(unicode_set):
328
+ """Unicode set for Hebrew Unicode Character Range"""
329
+ _ranges: UnicodeRangeList = [
330
+ (0x0591, 0x05C7),
331
+ (0x05D0, 0x05EA),
332
+ (0x05EF, 0x05F4),
333
+ (0xFB1D, 0xFB36),
334
+ (0xFB38, 0xFB3C),
335
+ (0xFB3E,),
336
+ (0xFB40, 0xFB41),
337
+ (0xFB43, 0xFB44),
338
+ (0xFB46, 0xFB4F),
339
+ ]
340
+
341
+ class Devanagari(unicode_set):
342
+ """Unicode set for Devanagari Unicode Character Range"""
343
+ _ranges: UnicodeRangeList = [
344
+ (0x0900, 0x097F),
345
+ (0xA8E0, 0xA8FF)
346
+ ]
347
+
348
+ BMP = BasicMultilingualPlane
349
+
350
+ # add language identifiers using language Unicode
351
+ العربية = Arabic
352
+ 中文 = Chinese
353
+ кириллица = Cyrillic
354
+ Ελληνικά = Greek
355
+ עִברִית = Hebrew
356
+ 日本語 = Japanese
357
+ 한국어 = Korean
358
+ ไทย = Thai
359
+ देवनागरी = Devanagari
360
+
361
+ # fmt: on
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/util.py ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # util.py
2
+ import inspect
3
+ import warnings
4
+ import types
5
+ import collections
6
+ import itertools
7
+ from functools import lru_cache, wraps
8
+ from typing import Callable, List, Union, Iterable, TypeVar, cast
9
+
10
+ _bslash = chr(92)
11
+ C = TypeVar("C", bound=Callable)
12
+
13
+
14
+ class __config_flags:
15
+ """Internal class for defining compatibility and debugging flags"""
16
+
17
+ _all_names: List[str] = []
18
+ _fixed_names: List[str] = []
19
+ _type_desc = "configuration"
20
+
21
+ @classmethod
22
+ def _set(cls, dname, value):
23
+ if dname in cls._fixed_names:
24
+ warnings.warn(
25
+ f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}"
26
+ f" and cannot be overridden",
27
+ stacklevel=3,
28
+ )
29
+ return
30
+ if dname in cls._all_names:
31
+ setattr(cls, dname, value)
32
+ else:
33
+ raise ValueError(f"no such {cls._type_desc} {dname!r}")
34
+
35
+ enable = classmethod(lambda cls, name: cls._set(name, True))
36
+ disable = classmethod(lambda cls, name: cls._set(name, False))
37
+
38
+
39
+ @lru_cache(maxsize=128)
40
+ def col(loc: int, strg: str) -> int:
41
+ """
42
+ Returns current column within a string, counting newlines as line separators.
43
+ The first column is number 1.
44
+
45
+ Note: the default parsing behavior is to expand tabs in the input string
46
+ before starting the parsing process. See
47
+ :class:`ParserElement.parse_string` for more
48
+ information on parsing strings containing ``<TAB>`` s, and suggested
49
+ methods to maintain a consistent view of the parsed string, the parse
50
+ location, and line and column positions within the parsed string.
51
+ """
52
+ s = strg
53
+ return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
54
+
55
+
56
+ @lru_cache(maxsize=128)
57
+ def lineno(loc: int, strg: str) -> int:
58
+ """Returns current line number within a string, counting newlines as line separators.
59
+ The first line is number 1.
60
+
61
+ Note - the default parsing behavior is to expand tabs in the input string
62
+ before starting the parsing process. See :class:`ParserElement.parse_string`
63
+ for more information on parsing strings containing ``<TAB>`` s, and
64
+ suggested methods to maintain a consistent view of the parsed string, the
65
+ parse location, and line and column positions within the parsed string.
66
+ """
67
+ return strg.count("\n", 0, loc) + 1
68
+
69
+
70
+ @lru_cache(maxsize=128)
71
+ def line(loc: int, strg: str) -> str:
72
+ """
73
+ Returns the line of text containing loc within a string, counting newlines as line separators.
74
+ """
75
+ last_cr = strg.rfind("\n", 0, loc)
76
+ next_cr = strg.find("\n", loc)
77
+ return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
78
+
79
+
80
+ class _UnboundedCache:
81
+ def __init__(self):
82
+ cache = {}
83
+ cache_get = cache.get
84
+ self.not_in_cache = not_in_cache = object()
85
+
86
+ def get(_, key):
87
+ return cache_get(key, not_in_cache)
88
+
89
+ def set_(_, key, value):
90
+ cache[key] = value
91
+
92
+ def clear(_):
93
+ cache.clear()
94
+
95
+ self.size = None
96
+ self.get = types.MethodType(get, self)
97
+ self.set = types.MethodType(set_, self)
98
+ self.clear = types.MethodType(clear, self)
99
+
100
+
101
+ class _FifoCache:
102
+ def __init__(self, size):
103
+ self.not_in_cache = not_in_cache = object()
104
+ cache = {}
105
+ keyring = [object()] * size
106
+ cache_get = cache.get
107
+ cache_pop = cache.pop
108
+ keyiter = itertools.cycle(range(size))
109
+
110
+ def get(_, key):
111
+ return cache_get(key, not_in_cache)
112
+
113
+ def set_(_, key, value):
114
+ cache[key] = value
115
+ i = next(keyiter)
116
+ cache_pop(keyring[i], None)
117
+ keyring[i] = key
118
+
119
+ def clear(_):
120
+ cache.clear()
121
+ keyring[:] = [object()] * size
122
+
123
+ self.size = size
124
+ self.get = types.MethodType(get, self)
125
+ self.set = types.MethodType(set_, self)
126
+ self.clear = types.MethodType(clear, self)
127
+
128
+
129
+ class LRUMemo:
130
+ """
131
+ A memoizing mapping that retains `capacity` deleted items
132
+
133
+ The memo tracks retained items by their access order; once `capacity` items
134
+ are retained, the least recently used item is discarded.
135
+ """
136
+
137
+ def __init__(self, capacity):
138
+ self._capacity = capacity
139
+ self._active = {}
140
+ self._memory = collections.OrderedDict()
141
+
142
+ def __getitem__(self, key):
143
+ try:
144
+ return self._active[key]
145
+ except KeyError:
146
+ self._memory.move_to_end(key)
147
+ return self._memory[key]
148
+
149
+ def __setitem__(self, key, value):
150
+ self._memory.pop(key, None)
151
+ self._active[key] = value
152
+
153
+ def __delitem__(self, key):
154
+ try:
155
+ value = self._active.pop(key)
156
+ except KeyError:
157
+ pass
158
+ else:
159
+ while len(self._memory) >= self._capacity:
160
+ self._memory.popitem(last=False)
161
+ self._memory[key] = value
162
+
163
+ def clear(self):
164
+ self._active.clear()
165
+ self._memory.clear()
166
+
167
+
168
+ class UnboundedMemo(dict):
169
+ """
170
+ A memoizing mapping that retains all deleted items
171
+ """
172
+
173
+ def __delitem__(self, key):
174
+ pass
175
+
176
+
177
+ def _escape_regex_range_chars(s: str) -> str:
178
+ # escape these chars: ^-[]
179
+ for c in r"\^-[]":
180
+ s = s.replace(c, _bslash + c)
181
+ s = s.replace("\n", r"\n")
182
+ s = s.replace("\t", r"\t")
183
+ return str(s)
184
+
185
+
186
+ def _collapse_string_to_ranges(
187
+ s: Union[str, Iterable[str]], re_escape: bool = True
188
+ ) -> str:
189
+ def is_consecutive(c):
190
+ c_int = ord(c)
191
+ is_consecutive.prev, prev = c_int, is_consecutive.prev
192
+ if c_int - prev > 1:
193
+ is_consecutive.value = next(is_consecutive.counter)
194
+ return is_consecutive.value
195
+
196
+ is_consecutive.prev = 0 # type: ignore [attr-defined]
197
+ is_consecutive.counter = itertools.count() # type: ignore [attr-defined]
198
+ is_consecutive.value = -1 # type: ignore [attr-defined]
199
+
200
+ def escape_re_range_char(c):
201
+ return "\\" + c if c in r"\^-][" else c
202
+
203
+ def no_escape_re_range_char(c):
204
+ return c
205
+
206
+ if not re_escape:
207
+ escape_re_range_char = no_escape_re_range_char
208
+
209
+ ret = []
210
+ s = "".join(sorted(set(s)))
211
+ if len(s) > 3:
212
+ for _, chars in itertools.groupby(s, key=is_consecutive):
213
+ first = last = next(chars)
214
+ last = collections.deque(
215
+ itertools.chain(iter([last]), chars), maxlen=1
216
+ ).pop()
217
+ if first == last:
218
+ ret.append(escape_re_range_char(first))
219
+ else:
220
+ sep = "" if ord(last) == ord(first) + 1 else "-"
221
+ ret.append(
222
+ f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}"
223
+ )
224
+ else:
225
+ ret = [escape_re_range_char(c) for c in s]
226
+
227
+ return "".join(ret)
228
+
229
+
230
+ def _flatten(ll: list) -> list:
231
+ ret = []
232
+ for i in ll:
233
+ if isinstance(i, list):
234
+ ret.extend(_flatten(i))
235
+ else:
236
+ ret.append(i)
237
+ return ret
238
+
239
+
240
+ def _make_synonym_function(compat_name: str, fn: C) -> C:
241
+ # In a future version, uncomment the code in the internal _inner() functions
242
+ # to begin emitting DeprecationWarnings.
243
+
244
+ # Unwrap staticmethod/classmethod
245
+ fn = getattr(fn, "__func__", fn)
246
+
247
+ # (Presence of 'self' arg in signature is used by explain_exception() methods, so we take
248
+ # some extra steps to add it if present in decorated function.)
249
+ if "self" == list(inspect.signature(fn).parameters)[0]:
250
+
251
+ @wraps(fn)
252
+ def _inner(self, *args, **kwargs):
253
+ # warnings.warn(
254
+ # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=3
255
+ # )
256
+ return fn(self, *args, **kwargs)
257
+
258
+ else:
259
+
260
+ @wraps(fn)
261
+ def _inner(*args, **kwargs):
262
+ # warnings.warn(
263
+ # f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=3
264
+ # )
265
+ return fn(*args, **kwargs)
266
+
267
+ _inner.__doc__ = f"""Deprecated - use :class:`{fn.__name__}`"""
268
+ _inner.__name__ = compat_name
269
+ _inner.__annotations__ = fn.__annotations__
270
+ if isinstance(fn, types.FunctionType):
271
+ _inner.__kwdefaults__ = fn.__kwdefaults__
272
+ elif isinstance(fn, type) and hasattr(fn, "__init__"):
273
+ _inner.__kwdefaults__ = fn.__init__.__kwdefaults__
274
+ else:
275
+ _inner.__kwdefaults__ = None
276
+ _inner.__qualname__ = fn.__qualname__
277
+ return cast(C, _inner)
278
+
279
+
280
+ def replaced_by_pep8(fn: C) -> Callable[[Callable], C]:
281
+ """
282
+ Decorator for pre-PEP8 compatibility synonyms, to link them to the new function.
283
+ """
284
+ return lambda other: _make_synonym_function(other.__name__, fn)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/__init__.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
3
+ """
4
+ from __future__ import absolute_import
5
+
6
+ # Set default logging handler to avoid "No handler found" warnings.
7
+ import logging
8
+ import warnings
9
+ from logging import NullHandler
10
+
11
+ from . import exceptions
12
+ from ._version import __version__
13
+ from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
14
+ from .filepost import encode_multipart_formdata
15
+ from .poolmanager import PoolManager, ProxyManager, proxy_from_url
16
+ from .response import HTTPResponse
17
+ from .util.request import make_headers
18
+ from .util.retry import Retry
19
+ from .util.timeout import Timeout
20
+ from .util.url import get_host
21
+
22
+ # === NOTE TO REPACKAGERS AND VENDORS ===
23
+ # Please delete this block, this logic is only
24
+ # for urllib3 being distributed via PyPI.
25
+ # See: https://github.com/urllib3/urllib3/issues/2680
26
+ try:
27
+ import urllib3_secure_extra # type: ignore # noqa: F401
28
+ except ImportError:
29
+ pass
30
+ else:
31
+ warnings.warn(
32
+ "'urllib3[secure]' extra is deprecated and will be removed "
33
+ "in a future release of urllib3 2.x. Read more in this issue: "
34
+ "https://github.com/urllib3/urllib3/issues/2680",
35
+ category=DeprecationWarning,
36
+ stacklevel=2,
37
+ )
38
+
39
+ __author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
40
+ __license__ = "MIT"
41
+ __version__ = __version__
42
+
43
+ __all__ = (
44
+ "HTTPConnectionPool",
45
+ "HTTPSConnectionPool",
46
+ "PoolManager",
47
+ "ProxyManager",
48
+ "HTTPResponse",
49
+ "Retry",
50
+ "Timeout",
51
+ "add_stderr_logger",
52
+ "connection_from_url",
53
+ "disable_warnings",
54
+ "encode_multipart_formdata",
55
+ "get_host",
56
+ "make_headers",
57
+ "proxy_from_url",
58
+ )
59
+
60
+ logging.getLogger(__name__).addHandler(NullHandler())
61
+
62
+
63
+ def add_stderr_logger(level=logging.DEBUG):
64
+ """
65
+ Helper for quickly adding a StreamHandler to the logger. Useful for
66
+ debugging.
67
+
68
+ Returns the handler after adding it.
69
+ """
70
+ # This method needs to be in this __init__.py to get the __name__ correct
71
+ # even if urllib3 is vendored within another package.
72
+ logger = logging.getLogger(__name__)
73
+ handler = logging.StreamHandler()
74
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
75
+ logger.addHandler(handler)
76
+ logger.setLevel(level)
77
+ logger.debug("Added a stderr logging handler to logger: %s", __name__)
78
+ return handler
79
+
80
+
81
+ # ... Clean up.
82
+ del NullHandler
83
+
84
+
85
+ # All warning filters *must* be appended unless you're really certain that they
86
+ # shouldn't be: otherwise, it's very hard for users to use most Python
87
+ # mechanisms to silence them.
88
+ # SecurityWarning's always go off by default.
89
+ warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
90
+ # SubjectAltNameWarning's should go off once per host
91
+ warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
92
+ # InsecurePlatformWarning's don't vary between requests, so we keep it default.
93
+ warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
94
+ # SNIMissingWarnings should go off only once.
95
+ warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
96
+
97
+
98
+ def disable_warnings(category=exceptions.HTTPWarning):
99
+ """
100
+ Helper for quickly disabling all urllib3 warnings.
101
+ """
102
+ warnings.simplefilter("ignore", category)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_collections.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ try:
4
+ from collections.abc import Mapping, MutableMapping
5
+ except ImportError:
6
+ from collections import Mapping, MutableMapping
7
+ try:
8
+ from threading import RLock
9
+ except ImportError: # Platform-specific: No threads available
10
+
11
+ class RLock:
12
+ def __enter__(self):
13
+ pass
14
+
15
+ def __exit__(self, exc_type, exc_value, traceback):
16
+ pass
17
+
18
+
19
+ from collections import OrderedDict
20
+
21
+ from .exceptions import InvalidHeader
22
+ from .packages import six
23
+ from .packages.six import iterkeys, itervalues
24
+
25
+ __all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
26
+
27
+
28
+ _Null = object()
29
+
30
+
31
+ class RecentlyUsedContainer(MutableMapping):
32
+ """
33
+ Provides a thread-safe dict-like container which maintains up to
34
+ ``maxsize`` keys while throwing away the least-recently-used keys beyond
35
+ ``maxsize``.
36
+
37
+ :param maxsize:
38
+ Maximum number of recent elements to retain.
39
+
40
+ :param dispose_func:
41
+ Every time an item is evicted from the container,
42
+ ``dispose_func(value)`` is called. Callback which will get called
43
+ """
44
+
45
+ ContainerCls = OrderedDict
46
+
47
+ def __init__(self, maxsize=10, dispose_func=None):
48
+ self._maxsize = maxsize
49
+ self.dispose_func = dispose_func
50
+
51
+ self._container = self.ContainerCls()
52
+ self.lock = RLock()
53
+
54
+ def __getitem__(self, key):
55
+ # Re-insert the item, moving it to the end of the eviction line.
56
+ with self.lock:
57
+ item = self._container.pop(key)
58
+ self._container[key] = item
59
+ return item
60
+
61
+ def __setitem__(self, key, value):
62
+ evicted_value = _Null
63
+ with self.lock:
64
+ # Possibly evict the existing value of 'key'
65
+ evicted_value = self._container.get(key, _Null)
66
+ self._container[key] = value
67
+
68
+ # If we didn't evict an existing value, we might have to evict the
69
+ # least recently used item from the beginning of the container.
70
+ if len(self._container) > self._maxsize:
71
+ _key, evicted_value = self._container.popitem(last=False)
72
+
73
+ if self.dispose_func and evicted_value is not _Null:
74
+ self.dispose_func(evicted_value)
75
+
76
+ def __delitem__(self, key):
77
+ with self.lock:
78
+ value = self._container.pop(key)
79
+
80
+ if self.dispose_func:
81
+ self.dispose_func(value)
82
+
83
+ def __len__(self):
84
+ with self.lock:
85
+ return len(self._container)
86
+
87
+ def __iter__(self):
88
+ raise NotImplementedError(
89
+ "Iteration over this class is unlikely to be threadsafe."
90
+ )
91
+
92
+ def clear(self):
93
+ with self.lock:
94
+ # Copy pointers to all values, then wipe the mapping
95
+ values = list(itervalues(self._container))
96
+ self._container.clear()
97
+
98
+ if self.dispose_func:
99
+ for value in values:
100
+ self.dispose_func(value)
101
+
102
+ def keys(self):
103
+ with self.lock:
104
+ return list(iterkeys(self._container))
105
+
106
+
107
+ class HTTPHeaderDict(MutableMapping):
108
+ """
109
+ :param headers:
110
+ An iterable of field-value pairs. Must not contain multiple field names
111
+ when compared case-insensitively.
112
+
113
+ :param kwargs:
114
+ Additional field-value pairs to pass in to ``dict.update``.
115
+
116
+ A ``dict`` like container for storing HTTP Headers.
117
+
118
+ Field names are stored and compared case-insensitively in compliance with
119
+ RFC 7230. Iteration provides the first case-sensitive key seen for each
120
+ case-insensitive pair.
121
+
122
+ Using ``__setitem__`` syntax overwrites fields that compare equal
123
+ case-insensitively in order to maintain ``dict``'s api. For fields that
124
+ compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
125
+ in a loop.
126
+
127
+ If multiple fields that are equal case-insensitively are passed to the
128
+ constructor or ``.update``, the behavior is undefined and some will be
129
+ lost.
130
+
131
+ >>> headers = HTTPHeaderDict()
132
+ >>> headers.add('Set-Cookie', 'foo=bar')
133
+ >>> headers.add('set-cookie', 'baz=quxx')
134
+ >>> headers['content-length'] = '7'
135
+ >>> headers['SET-cookie']
136
+ 'foo=bar, baz=quxx'
137
+ >>> headers['Content-Length']
138
+ '7'
139
+ """
140
+
141
+ def __init__(self, headers=None, **kwargs):
142
+ super(HTTPHeaderDict, self).__init__()
143
+ self._container = OrderedDict()
144
+ if headers is not None:
145
+ if isinstance(headers, HTTPHeaderDict):
146
+ self._copy_from(headers)
147
+ else:
148
+ self.extend(headers)
149
+ if kwargs:
150
+ self.extend(kwargs)
151
+
152
+ def __setitem__(self, key, val):
153
+ self._container[key.lower()] = [key, val]
154
+ return self._container[key.lower()]
155
+
156
+ def __getitem__(self, key):
157
+ val = self._container[key.lower()]
158
+ return ", ".join(val[1:])
159
+
160
+ def __delitem__(self, key):
161
+ del self._container[key.lower()]
162
+
163
+ def __contains__(self, key):
164
+ return key.lower() in self._container
165
+
166
+ def __eq__(self, other):
167
+ if not isinstance(other, Mapping) and not hasattr(other, "keys"):
168
+ return False
169
+ if not isinstance(other, type(self)):
170
+ other = type(self)(other)
171
+ return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
172
+ (k.lower(), v) for k, v in other.itermerged()
173
+ )
174
+
175
+ def __ne__(self, other):
176
+ return not self.__eq__(other)
177
+
178
+ if six.PY2: # Python 2
179
+ iterkeys = MutableMapping.iterkeys
180
+ itervalues = MutableMapping.itervalues
181
+
182
+ __marker = object()
183
+
184
+ def __len__(self):
185
+ return len(self._container)
186
+
187
+ def __iter__(self):
188
+ # Only provide the originally cased names
189
+ for vals in self._container.values():
190
+ yield vals[0]
191
+
192
+ def pop(self, key, default=__marker):
193
+ """D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
194
+ If key is not found, d is returned if given, otherwise KeyError is raised.
195
+ """
196
+ # Using the MutableMapping function directly fails due to the private marker.
197
+ # Using ordinary dict.pop would expose the internal structures.
198
+ # So let's reinvent the wheel.
199
+ try:
200
+ value = self[key]
201
+ except KeyError:
202
+ if default is self.__marker:
203
+ raise
204
+ return default
205
+ else:
206
+ del self[key]
207
+ return value
208
+
209
+ def discard(self, key):
210
+ try:
211
+ del self[key]
212
+ except KeyError:
213
+ pass
214
+
215
+ def add(self, key, val):
216
+ """Adds a (name, value) pair, doesn't overwrite the value if it already
217
+ exists.
218
+
219
+ >>> headers = HTTPHeaderDict(foo='bar')
220
+ >>> headers.add('Foo', 'baz')
221
+ >>> headers['foo']
222
+ 'bar, baz'
223
+ """
224
+ key_lower = key.lower()
225
+ new_vals = [key, val]
226
+ # Keep the common case aka no item present as fast as possible
227
+ vals = self._container.setdefault(key_lower, new_vals)
228
+ if new_vals is not vals:
229
+ vals.append(val)
230
+
231
+ def extend(self, *args, **kwargs):
232
+ """Generic import function for any type of header-like object.
233
+ Adapted version of MutableMapping.update in order to insert items
234
+ with self.add instead of self.__setitem__
235
+ """
236
+ if len(args) > 1:
237
+ raise TypeError(
238
+ "extend() takes at most 1 positional "
239
+ "arguments ({0} given)".format(len(args))
240
+ )
241
+ other = args[0] if len(args) >= 1 else ()
242
+
243
+ if isinstance(other, HTTPHeaderDict):
244
+ for key, val in other.iteritems():
245
+ self.add(key, val)
246
+ elif isinstance(other, Mapping):
247
+ for key in other:
248
+ self.add(key, other[key])
249
+ elif hasattr(other, "keys"):
250
+ for key in other.keys():
251
+ self.add(key, other[key])
252
+ else:
253
+ for key, value in other:
254
+ self.add(key, value)
255
+
256
+ for key, value in kwargs.items():
257
+ self.add(key, value)
258
+
259
+ def getlist(self, key, default=__marker):
260
+ """Returns a list of all the values for the named field. Returns an
261
+ empty list if the key doesn't exist."""
262
+ try:
263
+ vals = self._container[key.lower()]
264
+ except KeyError:
265
+ if default is self.__marker:
266
+ return []
267
+ return default
268
+ else:
269
+ return vals[1:]
270
+
271
+ # Backwards compatibility for httplib
272
+ getheaders = getlist
273
+ getallmatchingheaders = getlist
274
+ iget = getlist
275
+
276
+ # Backwards compatibility for http.cookiejar
277
+ get_all = getlist
278
+
279
+ def __repr__(self):
280
+ return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
281
+
282
+ def _copy_from(self, other):
283
+ for key in other:
284
+ val = other.getlist(key)
285
+ if isinstance(val, list):
286
+ # Don't need to convert tuples
287
+ val = list(val)
288
+ self._container[key.lower()] = [key] + val
289
+
290
+ def copy(self):
291
+ clone = type(self)()
292
+ clone._copy_from(self)
293
+ return clone
294
+
295
+ def iteritems(self):
296
+ """Iterate over all header lines, including duplicate ones."""
297
+ for key in self:
298
+ vals = self._container[key.lower()]
299
+ for val in vals[1:]:
300
+ yield vals[0], val
301
+
302
+ def itermerged(self):
303
+ """Iterate over all headers, merging duplicate ones together."""
304
+ for key in self:
305
+ val = self._container[key.lower()]
306
+ yield val[0], ", ".join(val[1:])
307
+
308
+ def items(self):
309
+ return list(self.iteritems())
310
+
311
+ @classmethod
312
+ def from_httplib(cls, message): # Python 2
313
+ """Read headers from a Python 2 httplib message object."""
314
+ # python2.7 does not expose a proper API for exporting multiheaders
315
+ # efficiently. This function re-reads raw lines from the message
316
+ # object and extracts the multiheaders properly.
317
+ obs_fold_continued_leaders = (" ", "\t")
318
+ headers = []
319
+
320
+ for line in message.headers:
321
+ if line.startswith(obs_fold_continued_leaders):
322
+ if not headers:
323
+ # We received a header line that starts with OWS as described
324
+ # in RFC-7230 S3.2.4. This indicates a multiline header, but
325
+ # there exists no previous header to which we can attach it.
326
+ raise InvalidHeader(
327
+ "Header continuation with no previous header: %s" % line
328
+ )
329
+ else:
330
+ key, value = headers[-1]
331
+ headers[-1] = (key, value + " " + line.strip())
332
+ continue
333
+
334
+ key, value = line.split(":", 1)
335
+ headers.append((key, value.strip()))
336
+
337
+ return cls(headers)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_version.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # This file is protected via CODEOWNERS
2
+ __version__ = "1.26.17"
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connection.py ADDED
@@ -0,0 +1,572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import datetime
4
+ import logging
5
+ import os
6
+ import re
7
+ import socket
8
+ import warnings
9
+ from socket import error as SocketError
10
+ from socket import timeout as SocketTimeout
11
+
12
+ from .packages import six
13
+ from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
14
+ from .packages.six.moves.http_client import HTTPException # noqa: F401
15
+ from .util.proxy import create_proxy_ssl_context
16
+
17
+ try: # Compiled with SSL?
18
+ import ssl
19
+
20
+ BaseSSLError = ssl.SSLError
21
+ except (ImportError, AttributeError): # Platform-specific: No SSL.
22
+ ssl = None
23
+
24
+ class BaseSSLError(BaseException):
25
+ pass
26
+
27
+
28
+ try:
29
+ # Python 3: not a no-op, we're adding this to the namespace so it can be imported.
30
+ ConnectionError = ConnectionError
31
+ except NameError:
32
+ # Python 2
33
+ class ConnectionError(Exception):
34
+ pass
35
+
36
+
37
+ try: # Python 3:
38
+ # Not a no-op, we're adding this to the namespace so it can be imported.
39
+ BrokenPipeError = BrokenPipeError
40
+ except NameError: # Python 2:
41
+
42
+ class BrokenPipeError(Exception):
43
+ pass
44
+
45
+
46
+ from ._collections import HTTPHeaderDict # noqa (historical, removed in v2)
47
+ from ._version import __version__
48
+ from .exceptions import (
49
+ ConnectTimeoutError,
50
+ NewConnectionError,
51
+ SubjectAltNameWarning,
52
+ SystemTimeWarning,
53
+ )
54
+ from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
55
+ from .util.ssl_ import (
56
+ assert_fingerprint,
57
+ create_urllib3_context,
58
+ is_ipaddress,
59
+ resolve_cert_reqs,
60
+ resolve_ssl_version,
61
+ ssl_wrap_socket,
62
+ )
63
+ from .util.ssl_match_hostname import CertificateError, match_hostname
64
+
65
+ log = logging.getLogger(__name__)
66
+
67
+ port_by_scheme = {"http": 80, "https": 443}
68
+
69
+ # When it comes time to update this value as a part of regular maintenance
70
+ # (ie test_recent_date is failing) update it to ~6 months before the current date.
71
+ RECENT_DATE = datetime.date(2022, 1, 1)
72
+
73
+ _CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]")
74
+
75
+
76
+ class HTTPConnection(_HTTPConnection, object):
77
+ """
78
+ Based on :class:`http.client.HTTPConnection` but provides an extra constructor
79
+ backwards-compatibility layer between older and newer Pythons.
80
+
81
+ Additional keyword parameters are used to configure attributes of the connection.
82
+ Accepted parameters include:
83
+
84
+ - ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
85
+ - ``source_address``: Set the source address for the current connection.
86
+ - ``socket_options``: Set specific options on the underlying socket. If not specified, then
87
+ defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
88
+ Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
89
+
90
+ For example, if you wish to enable TCP Keep Alive in addition to the defaults,
91
+ you might pass:
92
+
93
+ .. code-block:: python
94
+
95
+ HTTPConnection.default_socket_options + [
96
+ (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
97
+ ]
98
+
99
+ Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
100
+ """
101
+
102
+ default_port = port_by_scheme["http"]
103
+
104
+ #: Disable Nagle's algorithm by default.
105
+ #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
106
+ default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
107
+
108
+ #: Whether this connection verifies the host's certificate.
109
+ is_verified = False
110
+
111
+ #: Whether this proxy connection (if used) verifies the proxy host's
112
+ #: certificate.
113
+ proxy_is_verified = None
114
+
115
+ def __init__(self, *args, **kw):
116
+ if not six.PY2:
117
+ kw.pop("strict", None)
118
+
119
+ # Pre-set source_address.
120
+ self.source_address = kw.get("source_address")
121
+
122
+ #: The socket options provided by the user. If no options are
123
+ #: provided, we use the default options.
124
+ self.socket_options = kw.pop("socket_options", self.default_socket_options)
125
+
126
+ # Proxy options provided by the user.
127
+ self.proxy = kw.pop("proxy", None)
128
+ self.proxy_config = kw.pop("proxy_config", None)
129
+
130
+ _HTTPConnection.__init__(self, *args, **kw)
131
+
132
+ @property
133
+ def host(self):
134
+ """
135
+ Getter method to remove any trailing dots that indicate the hostname is an FQDN.
136
+
137
+ In general, SSL certificates don't include the trailing dot indicating a
138
+ fully-qualified domain name, and thus, they don't validate properly when
139
+ checked against a domain name that includes the dot. In addition, some
140
+ servers may not expect to receive the trailing dot when provided.
141
+
142
+ However, the hostname with trailing dot is critical to DNS resolution; doing a
143
+ lookup with the trailing dot will properly only resolve the appropriate FQDN,
144
+ whereas a lookup without a trailing dot will search the system's search domain
145
+ list. Thus, it's important to keep the original host around for use only in
146
+ those cases where it's appropriate (i.e., when doing DNS lookup to establish the
147
+ actual TCP connection across which we're going to send HTTP requests).
148
+ """
149
+ return self._dns_host.rstrip(".")
150
+
151
+ @host.setter
152
+ def host(self, value):
153
+ """
154
+ Setter for the `host` property.
155
+
156
+ We assume that only urllib3 uses the _dns_host attribute; httplib itself
157
+ only uses `host`, and it seems reasonable that other libraries follow suit.
158
+ """
159
+ self._dns_host = value
160
+
161
+ def _new_conn(self):
162
+ """Establish a socket connection and set nodelay settings on it.
163
+
164
+ :return: New socket connection.
165
+ """
166
+ extra_kw = {}
167
+ if self.source_address:
168
+ extra_kw["source_address"] = self.source_address
169
+
170
+ if self.socket_options:
171
+ extra_kw["socket_options"] = self.socket_options
172
+
173
+ try:
174
+ conn = connection.create_connection(
175
+ (self._dns_host, self.port), self.timeout, **extra_kw
176
+ )
177
+
178
+ except SocketTimeout:
179
+ raise ConnectTimeoutError(
180
+ self,
181
+ "Connection to %s timed out. (connect timeout=%s)"
182
+ % (self.host, self.timeout),
183
+ )
184
+
185
+ except SocketError as e:
186
+ raise NewConnectionError(
187
+ self, "Failed to establish a new connection: %s" % e
188
+ )
189
+
190
+ return conn
191
+
192
+ def _is_using_tunnel(self):
193
+ # Google App Engine's httplib does not define _tunnel_host
194
+ return getattr(self, "_tunnel_host", None)
195
+
196
+ def _prepare_conn(self, conn):
197
+ self.sock = conn
198
+ if self._is_using_tunnel():
199
+ # TODO: Fix tunnel so it doesn't depend on self.sock state.
200
+ self._tunnel()
201
+ # Mark this connection as not reusable
202
+ self.auto_open = 0
203
+
204
+ def connect(self):
205
+ conn = self._new_conn()
206
+ self._prepare_conn(conn)
207
+
208
+ def putrequest(self, method, url, *args, **kwargs):
209
+ """ """
210
+ # Empty docstring because the indentation of CPython's implementation
211
+ # is broken but we don't want this method in our documentation.
212
+ match = _CONTAINS_CONTROL_CHAR_RE.search(method)
213
+ if match:
214
+ raise ValueError(
215
+ "Method cannot contain non-token characters %r (found at least %r)"
216
+ % (method, match.group())
217
+ )
218
+
219
+ return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)
220
+
221
+ def putheader(self, header, *values):
222
+ """ """
223
+ if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
224
+ _HTTPConnection.putheader(self, header, *values)
225
+ elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
226
+ raise ValueError(
227
+ "urllib3.util.SKIP_HEADER only supports '%s'"
228
+ % ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),)
229
+ )
230
+
231
+ def request(self, method, url, body=None, headers=None):
232
+ # Update the inner socket's timeout value to send the request.
233
+ # This only triggers if the connection is re-used.
234
+ if getattr(self, "sock", None) is not None:
235
+ self.sock.settimeout(self.timeout)
236
+
237
+ if headers is None:
238
+ headers = {}
239
+ else:
240
+ # Avoid modifying the headers passed into .request()
241
+ headers = headers.copy()
242
+ if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
243
+ headers["User-Agent"] = _get_default_user_agent()
244
+ super(HTTPConnection, self).request(method, url, body=body, headers=headers)
245
+
246
+ def request_chunked(self, method, url, body=None, headers=None):
247
+ """
248
+ Alternative to the common request method, which sends the
249
+ body with chunked encoding and not as one block
250
+ """
251
+ headers = headers or {}
252
+ header_keys = set([six.ensure_str(k.lower()) for k in headers])
253
+ skip_accept_encoding = "accept-encoding" in header_keys
254
+ skip_host = "host" in header_keys
255
+ self.putrequest(
256
+ method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
257
+ )
258
+ if "user-agent" not in header_keys:
259
+ self.putheader("User-Agent", _get_default_user_agent())
260
+ for header, value in headers.items():
261
+ self.putheader(header, value)
262
+ if "transfer-encoding" not in header_keys:
263
+ self.putheader("Transfer-Encoding", "chunked")
264
+ self.endheaders()
265
+
266
+ if body is not None:
267
+ stringish_types = six.string_types + (bytes,)
268
+ if isinstance(body, stringish_types):
269
+ body = (body,)
270
+ for chunk in body:
271
+ if not chunk:
272
+ continue
273
+ if not isinstance(chunk, bytes):
274
+ chunk = chunk.encode("utf8")
275
+ len_str = hex(len(chunk))[2:]
276
+ to_send = bytearray(len_str.encode())
277
+ to_send += b"\r\n"
278
+ to_send += chunk
279
+ to_send += b"\r\n"
280
+ self.send(to_send)
281
+
282
+ # After the if clause, to always have a closed body
283
+ self.send(b"0\r\n\r\n")
284
+
285
+
286
+ class HTTPSConnection(HTTPConnection):
287
+ """
288
+ Many of the parameters to this constructor are passed to the underlying SSL
289
+ socket by means of :py:func:`urllib3.util.ssl_wrap_socket`.
290
+ """
291
+
292
+ default_port = port_by_scheme["https"]
293
+
294
+ cert_reqs = None
295
+ ca_certs = None
296
+ ca_cert_dir = None
297
+ ca_cert_data = None
298
+ ssl_version = None
299
+ assert_fingerprint = None
300
+ tls_in_tls_required = False
301
+
302
+ def __init__(
303
+ self,
304
+ host,
305
+ port=None,
306
+ key_file=None,
307
+ cert_file=None,
308
+ key_password=None,
309
+ strict=None,
310
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
311
+ ssl_context=None,
312
+ server_hostname=None,
313
+ **kw
314
+ ):
315
+
316
+ HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw)
317
+
318
+ self.key_file = key_file
319
+ self.cert_file = cert_file
320
+ self.key_password = key_password
321
+ self.ssl_context = ssl_context
322
+ self.server_hostname = server_hostname
323
+
324
+ # Required property for Google AppEngine 1.9.0 which otherwise causes
325
+ # HTTPS requests to go out as HTTP. (See Issue #356)
326
+ self._protocol = "https"
327
+
328
+ def set_cert(
329
+ self,
330
+ key_file=None,
331
+ cert_file=None,
332
+ cert_reqs=None,
333
+ key_password=None,
334
+ ca_certs=None,
335
+ assert_hostname=None,
336
+ assert_fingerprint=None,
337
+ ca_cert_dir=None,
338
+ ca_cert_data=None,
339
+ ):
340
+ """
341
+ This method should only be called once, before the connection is used.
342
+ """
343
+ # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
344
+ # have an SSLContext object in which case we'll use its verify_mode.
345
+ if cert_reqs is None:
346
+ if self.ssl_context is not None:
347
+ cert_reqs = self.ssl_context.verify_mode
348
+ else:
349
+ cert_reqs = resolve_cert_reqs(None)
350
+
351
+ self.key_file = key_file
352
+ self.cert_file = cert_file
353
+ self.cert_reqs = cert_reqs
354
+ self.key_password = key_password
355
+ self.assert_hostname = assert_hostname
356
+ self.assert_fingerprint = assert_fingerprint
357
+ self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
358
+ self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
359
+ self.ca_cert_data = ca_cert_data
360
+
361
+ def connect(self):
362
+ # Add certificate verification
363
+ self.sock = conn = self._new_conn()
364
+ hostname = self.host
365
+ tls_in_tls = False
366
+
367
+ if self._is_using_tunnel():
368
+ if self.tls_in_tls_required:
369
+ self.sock = conn = self._connect_tls_proxy(hostname, conn)
370
+ tls_in_tls = True
371
+
372
+ # Calls self._set_hostport(), so self.host is
373
+ # self._tunnel_host below.
374
+ self._tunnel()
375
+ # Mark this connection as not reusable
376
+ self.auto_open = 0
377
+
378
+ # Override the host with the one we're requesting data from.
379
+ hostname = self._tunnel_host
380
+
381
+ server_hostname = hostname
382
+ if self.server_hostname is not None:
383
+ server_hostname = self.server_hostname
384
+
385
+ is_time_off = datetime.date.today() < RECENT_DATE
386
+ if is_time_off:
387
+ warnings.warn(
388
+ (
389
+ "System time is way off (before {0}). This will probably "
390
+ "lead to SSL verification errors"
391
+ ).format(RECENT_DATE),
392
+ SystemTimeWarning,
393
+ )
394
+
395
+ # Wrap socket using verification with the root certs in
396
+ # trusted_root_certs
397
+ default_ssl_context = False
398
+ if self.ssl_context is None:
399
+ default_ssl_context = True
400
+ self.ssl_context = create_urllib3_context(
401
+ ssl_version=resolve_ssl_version(self.ssl_version),
402
+ cert_reqs=resolve_cert_reqs(self.cert_reqs),
403
+ )
404
+
405
+ context = self.ssl_context
406
+ context.verify_mode = resolve_cert_reqs(self.cert_reqs)
407
+
408
+ # Try to load OS default certs if none are given.
409
+ # Works well on Windows (requires Python3.4+)
410
+ if (
411
+ not self.ca_certs
412
+ and not self.ca_cert_dir
413
+ and not self.ca_cert_data
414
+ and default_ssl_context
415
+ and hasattr(context, "load_default_certs")
416
+ ):
417
+ context.load_default_certs()
418
+
419
+ self.sock = ssl_wrap_socket(
420
+ sock=conn,
421
+ keyfile=self.key_file,
422
+ certfile=self.cert_file,
423
+ key_password=self.key_password,
424
+ ca_certs=self.ca_certs,
425
+ ca_cert_dir=self.ca_cert_dir,
426
+ ca_cert_data=self.ca_cert_data,
427
+ server_hostname=server_hostname,
428
+ ssl_context=context,
429
+ tls_in_tls=tls_in_tls,
430
+ )
431
+
432
+ # If we're using all defaults and the connection
433
+ # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
434
+ # for the host.
435
+ if (
436
+ default_ssl_context
437
+ and self.ssl_version is None
438
+ and hasattr(self.sock, "version")
439
+ and self.sock.version() in {"TLSv1", "TLSv1.1"}
440
+ ):
441
+ warnings.warn(
442
+ "Negotiating TLSv1/TLSv1.1 by default is deprecated "
443
+ "and will be disabled in urllib3 v2.0.0. Connecting to "
444
+ "'%s' with '%s' can be enabled by explicitly opting-in "
445
+ "with 'ssl_version'" % (self.host, self.sock.version()),
446
+ DeprecationWarning,
447
+ )
448
+
449
+ if self.assert_fingerprint:
450
+ assert_fingerprint(
451
+ self.sock.getpeercert(binary_form=True), self.assert_fingerprint
452
+ )
453
+ elif (
454
+ context.verify_mode != ssl.CERT_NONE
455
+ and not getattr(context, "check_hostname", False)
456
+ and self.assert_hostname is not False
457
+ ):
458
+ # While urllib3 attempts to always turn off hostname matching from
459
+ # the TLS library, this cannot always be done. So we check whether
460
+ # the TLS Library still thinks it's matching hostnames.
461
+ cert = self.sock.getpeercert()
462
+ if not cert.get("subjectAltName", ()):
463
+ warnings.warn(
464
+ (
465
+ "Certificate for {0} has no `subjectAltName`, falling back to check for a "
466
+ "`commonName` for now. This feature is being removed by major browsers and "
467
+ "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
468
+ "for details.)".format(hostname)
469
+ ),
470
+ SubjectAltNameWarning,
471
+ )
472
+ _match_hostname(cert, self.assert_hostname or server_hostname)
473
+
474
+ self.is_verified = (
475
+ context.verify_mode == ssl.CERT_REQUIRED
476
+ or self.assert_fingerprint is not None
477
+ )
478
+
479
+ def _connect_tls_proxy(self, hostname, conn):
480
+ """
481
+ Establish a TLS connection to the proxy using the provided SSL context.
482
+ """
483
+ proxy_config = self.proxy_config
484
+ ssl_context = proxy_config.ssl_context
485
+ if ssl_context:
486
+ # If the user provided a proxy context, we assume CA and client
487
+ # certificates have already been set
488
+ return ssl_wrap_socket(
489
+ sock=conn,
490
+ server_hostname=hostname,
491
+ ssl_context=ssl_context,
492
+ )
493
+
494
+ ssl_context = create_proxy_ssl_context(
495
+ self.ssl_version,
496
+ self.cert_reqs,
497
+ self.ca_certs,
498
+ self.ca_cert_dir,
499
+ self.ca_cert_data,
500
+ )
501
+
502
+ # If no cert was provided, use only the default options for server
503
+ # certificate validation
504
+ socket = ssl_wrap_socket(
505
+ sock=conn,
506
+ ca_certs=self.ca_certs,
507
+ ca_cert_dir=self.ca_cert_dir,
508
+ ca_cert_data=self.ca_cert_data,
509
+ server_hostname=hostname,
510
+ ssl_context=ssl_context,
511
+ )
512
+
513
+ if ssl_context.verify_mode != ssl.CERT_NONE and not getattr(
514
+ ssl_context, "check_hostname", False
515
+ ):
516
+ # While urllib3 attempts to always turn off hostname matching from
517
+ # the TLS library, this cannot always be done. So we check whether
518
+ # the TLS Library still thinks it's matching hostnames.
519
+ cert = socket.getpeercert()
520
+ if not cert.get("subjectAltName", ()):
521
+ warnings.warn(
522
+ (
523
+ "Certificate for {0} has no `subjectAltName`, falling back to check for a "
524
+ "`commonName` for now. This feature is being removed by major browsers and "
525
+ "deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
526
+ "for details.)".format(hostname)
527
+ ),
528
+ SubjectAltNameWarning,
529
+ )
530
+ _match_hostname(cert, hostname)
531
+
532
+ self.proxy_is_verified = ssl_context.verify_mode == ssl.CERT_REQUIRED
533
+ return socket
534
+
535
+
536
+ def _match_hostname(cert, asserted_hostname):
537
+ # Our upstream implementation of ssl.match_hostname()
538
+ # only applies this normalization to IP addresses so it doesn't
539
+ # match DNS SANs so we do the same thing!
540
+ stripped_hostname = asserted_hostname.strip("u[]")
541
+ if is_ipaddress(stripped_hostname):
542
+ asserted_hostname = stripped_hostname
543
+
544
+ try:
545
+ match_hostname(cert, asserted_hostname)
546
+ except CertificateError as e:
547
+ log.warning(
548
+ "Certificate did not match expected hostname: %s. Certificate: %s",
549
+ asserted_hostname,
550
+ cert,
551
+ )
552
+ # Add cert to exception and reraise so client code can inspect
553
+ # the cert when catching the exception, if they want to
554
+ e._peer_cert = cert
555
+ raise
556
+
557
+
558
+ def _get_default_user_agent():
559
+ return "python-urllib3/%s" % __version__
560
+
561
+
562
+ class DummyConnection(object):
563
+ """Used to detect a failed ConnectionCls import."""
564
+
565
+ pass
566
+
567
+
568
+ if not ssl:
569
+ HTTPSConnection = DummyConnection # noqa: F811
570
+
571
+
572
+ VerifiedHTTPSConnection = HTTPSConnection
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connectionpool.py ADDED
@@ -0,0 +1,1132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import errno
4
+ import logging
5
+ import re
6
+ import socket
7
+ import sys
8
+ import warnings
9
+ from socket import error as SocketError
10
+ from socket import timeout as SocketTimeout
11
+
12
+ from .connection import (
13
+ BaseSSLError,
14
+ BrokenPipeError,
15
+ DummyConnection,
16
+ HTTPConnection,
17
+ HTTPException,
18
+ HTTPSConnection,
19
+ VerifiedHTTPSConnection,
20
+ port_by_scheme,
21
+ )
22
+ from .exceptions import (
23
+ ClosedPoolError,
24
+ EmptyPoolError,
25
+ HeaderParsingError,
26
+ HostChangedError,
27
+ InsecureRequestWarning,
28
+ LocationValueError,
29
+ MaxRetryError,
30
+ NewConnectionError,
31
+ ProtocolError,
32
+ ProxyError,
33
+ ReadTimeoutError,
34
+ SSLError,
35
+ TimeoutError,
36
+ )
37
+ from .packages import six
38
+ from .packages.six.moves import queue
39
+ from .request import RequestMethods
40
+ from .response import HTTPResponse
41
+ from .util.connection import is_connection_dropped
42
+ from .util.proxy import connection_requires_http_tunnel
43
+ from .util.queue import LifoQueue
44
+ from .util.request import set_file_position
45
+ from .util.response import assert_header_parsing
46
+ from .util.retry import Retry
47
+ from .util.ssl_match_hostname import CertificateError
48
+ from .util.timeout import Timeout
49
+ from .util.url import Url, _encode_target
50
+ from .util.url import _normalize_host as normalize_host
51
+ from .util.url import get_host, parse_url
52
+
53
+ try: # Platform-specific: Python 3
54
+ import weakref
55
+
56
+ weakref_finalize = weakref.finalize
57
+ except AttributeError: # Platform-specific: Python 2
58
+ from .packages.backports.weakref_finalize import weakref_finalize
59
+
60
+ xrange = six.moves.xrange
61
+
62
+ log = logging.getLogger(__name__)
63
+
64
+ _Default = object()
65
+
66
+
67
+ # Pool objects
68
+ class ConnectionPool(object):
69
+ """
70
+ Base class for all connection pools, such as
71
+ :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
72
+
73
+ .. note::
74
+ ConnectionPool.urlopen() does not normalize or percent-encode target URIs
75
+ which is useful if your target server doesn't support percent-encoded
76
+ target URIs.
77
+ """
78
+
79
+ scheme = None
80
+ QueueCls = LifoQueue
81
+
82
+ def __init__(self, host, port=None):
83
+ if not host:
84
+ raise LocationValueError("No host specified.")
85
+
86
+ self.host = _normalize_host(host, scheme=self.scheme)
87
+ self._proxy_host = host.lower()
88
+ self.port = port
89
+
90
+ def __str__(self):
91
+ return "%s(host=%r, port=%r)" % (type(self).__name__, self.host, self.port)
92
+
93
+ def __enter__(self):
94
+ return self
95
+
96
+ def __exit__(self, exc_type, exc_val, exc_tb):
97
+ self.close()
98
+ # Return False to re-raise any potential exceptions
99
+ return False
100
+
101
+ def close(self):
102
+ """
103
+ Close all pooled connections and disable the pool.
104
+ """
105
+ pass
106
+
107
+
108
+ # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
109
+ _blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK}
110
+
111
+
112
+ class HTTPConnectionPool(ConnectionPool, RequestMethods):
113
+ """
114
+ Thread-safe connection pool for one host.
115
+
116
+ :param host:
117
+ Host used for this HTTP Connection (e.g. "localhost"), passed into
118
+ :class:`http.client.HTTPConnection`.
119
+
120
+ :param port:
121
+ Port used for this HTTP Connection (None is equivalent to 80), passed
122
+ into :class:`http.client.HTTPConnection`.
123
+
124
+ :param strict:
125
+ Causes BadStatusLine to be raised if the status line can't be parsed
126
+ as a valid HTTP/1.0 or 1.1 status line, passed into
127
+ :class:`http.client.HTTPConnection`.
128
+
129
+ .. note::
130
+ Only works in Python 2. This parameter is ignored in Python 3.
131
+
132
+ :param timeout:
133
+ Socket timeout in seconds for each individual connection. This can
134
+ be a float or integer, which sets the timeout for the HTTP request,
135
+ or an instance of :class:`urllib3.util.Timeout` which gives you more
136
+ fine-grained control over request timeouts. After the constructor has
137
+ been parsed, this is always a `urllib3.util.Timeout` object.
138
+
139
+ :param maxsize:
140
+ Number of connections to save that can be reused. More than 1 is useful
141
+ in multithreaded situations. If ``block`` is set to False, more
142
+ connections will be created but they will not be saved once they've
143
+ been used.
144
+
145
+ :param block:
146
+ If set to True, no more than ``maxsize`` connections will be used at
147
+ a time. When no free connections are available, the call will block
148
+ until a connection has been released. This is a useful side effect for
149
+ particular multithreaded situations where one does not want to use more
150
+ than maxsize connections per host to prevent flooding.
151
+
152
+ :param headers:
153
+ Headers to include with all requests, unless other headers are given
154
+ explicitly.
155
+
156
+ :param retries:
157
+ Retry configuration to use by default with requests in this pool.
158
+
159
+ :param _proxy:
160
+ Parsed proxy URL, should not be used directly, instead, see
161
+ :class:`urllib3.ProxyManager`
162
+
163
+ :param _proxy_headers:
164
+ A dictionary with proxy headers, should not be used directly,
165
+ instead, see :class:`urllib3.ProxyManager`
166
+
167
+ :param \\**conn_kw:
168
+ Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
169
+ :class:`urllib3.connection.HTTPSConnection` instances.
170
+ """
171
+
172
+ scheme = "http"
173
+ ConnectionCls = HTTPConnection
174
+ ResponseCls = HTTPResponse
175
+
176
+ def __init__(
177
+ self,
178
+ host,
179
+ port=None,
180
+ strict=False,
181
+ timeout=Timeout.DEFAULT_TIMEOUT,
182
+ maxsize=1,
183
+ block=False,
184
+ headers=None,
185
+ retries=None,
186
+ _proxy=None,
187
+ _proxy_headers=None,
188
+ _proxy_config=None,
189
+ **conn_kw
190
+ ):
191
+ ConnectionPool.__init__(self, host, port)
192
+ RequestMethods.__init__(self, headers)
193
+
194
+ self.strict = strict
195
+
196
+ if not isinstance(timeout, Timeout):
197
+ timeout = Timeout.from_float(timeout)
198
+
199
+ if retries is None:
200
+ retries = Retry.DEFAULT
201
+
202
+ self.timeout = timeout
203
+ self.retries = retries
204
+
205
+ self.pool = self.QueueCls(maxsize)
206
+ self.block = block
207
+
208
+ self.proxy = _proxy
209
+ self.proxy_headers = _proxy_headers or {}
210
+ self.proxy_config = _proxy_config
211
+
212
+ # Fill the queue up so that doing get() on it will block properly
213
+ for _ in xrange(maxsize):
214
+ self.pool.put(None)
215
+
216
+ # These are mostly for testing and debugging purposes.
217
+ self.num_connections = 0
218
+ self.num_requests = 0
219
+ self.conn_kw = conn_kw
220
+
221
+ if self.proxy:
222
+ # Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
223
+ # We cannot know if the user has added default socket options, so we cannot replace the
224
+ # list.
225
+ self.conn_kw.setdefault("socket_options", [])
226
+
227
+ self.conn_kw["proxy"] = self.proxy
228
+ self.conn_kw["proxy_config"] = self.proxy_config
229
+
230
+ # Do not pass 'self' as callback to 'finalize'.
231
+ # Then the 'finalize' would keep an endless living (leak) to self.
232
+ # By just passing a reference to the pool allows the garbage collector
233
+ # to free self if nobody else has a reference to it.
234
+ pool = self.pool
235
+
236
+ # Close all the HTTPConnections in the pool before the
237
+ # HTTPConnectionPool object is garbage collected.
238
+ weakref_finalize(self, _close_pool_connections, pool)
239
+
240
+ def _new_conn(self):
241
+ """
242
+ Return a fresh :class:`HTTPConnection`.
243
+ """
244
+ self.num_connections += 1
245
+ log.debug(
246
+ "Starting new HTTP connection (%d): %s:%s",
247
+ self.num_connections,
248
+ self.host,
249
+ self.port or "80",
250
+ )
251
+
252
+ conn = self.ConnectionCls(
253
+ host=self.host,
254
+ port=self.port,
255
+ timeout=self.timeout.connect_timeout,
256
+ strict=self.strict,
257
+ **self.conn_kw
258
+ )
259
+ return conn
260
+
261
+ def _get_conn(self, timeout=None):
262
+ """
263
+ Get a connection. Will return a pooled connection if one is available.
264
+
265
+ If no connections are available and :prop:`.block` is ``False``, then a
266
+ fresh connection is returned.
267
+
268
+ :param timeout:
269
+ Seconds to wait before giving up and raising
270
+ :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
271
+ :prop:`.block` is ``True``.
272
+ """
273
+ conn = None
274
+ try:
275
+ conn = self.pool.get(block=self.block, timeout=timeout)
276
+
277
+ except AttributeError: # self.pool is None
278
+ raise ClosedPoolError(self, "Pool is closed.")
279
+
280
+ except queue.Empty:
281
+ if self.block:
282
+ raise EmptyPoolError(
283
+ self,
284
+ "Pool reached maximum size and no more connections are allowed.",
285
+ )
286
+ pass # Oh well, we'll create a new connection then
287
+
288
+ # If this is a persistent connection, check if it got disconnected
289
+ if conn and is_connection_dropped(conn):
290
+ log.debug("Resetting dropped connection: %s", self.host)
291
+ conn.close()
292
+ if getattr(conn, "auto_open", 1) == 0:
293
+ # This is a proxied connection that has been mutated by
294
+ # http.client._tunnel() and cannot be reused (since it would
295
+ # attempt to bypass the proxy)
296
+ conn = None
297
+
298
+ return conn or self._new_conn()
299
+
300
+ def _put_conn(self, conn):
301
+ """
302
+ Put a connection back into the pool.
303
+
304
+ :param conn:
305
+ Connection object for the current host and port as returned by
306
+ :meth:`._new_conn` or :meth:`._get_conn`.
307
+
308
+ If the pool is already full, the connection is closed and discarded
309
+ because we exceeded maxsize. If connections are discarded frequently,
310
+ then maxsize should be increased.
311
+
312
+ If the pool is closed, then the connection will be closed and discarded.
313
+ """
314
+ try:
315
+ self.pool.put(conn, block=False)
316
+ return # Everything is dandy, done.
317
+ except AttributeError:
318
+ # self.pool is None.
319
+ pass
320
+ except queue.Full:
321
+ # This should never happen if self.block == True
322
+ log.warning(
323
+ "Connection pool is full, discarding connection: %s. Connection pool size: %s",
324
+ self.host,
325
+ self.pool.qsize(),
326
+ )
327
+ # Connection never got put back into the pool, close it.
328
+ if conn:
329
+ conn.close()
330
+
331
+ def _validate_conn(self, conn):
332
+ """
333
+ Called right before a request is made, after the socket is created.
334
+ """
335
+ pass
336
+
337
+ def _prepare_proxy(self, conn):
338
+ # Nothing to do for HTTP connections.
339
+ pass
340
+
341
+ def _get_timeout(self, timeout):
342
+ """Helper that always returns a :class:`urllib3.util.Timeout`"""
343
+ if timeout is _Default:
344
+ return self.timeout.clone()
345
+
346
+ if isinstance(timeout, Timeout):
347
+ return timeout.clone()
348
+ else:
349
+ # User passed us an int/float. This is for backwards compatibility,
350
+ # can be removed later
351
+ return Timeout.from_float(timeout)
352
+
353
+ def _raise_timeout(self, err, url, timeout_value):
354
+ """Is the error actually a timeout? Will raise a ReadTimeout or pass"""
355
+
356
+ if isinstance(err, SocketTimeout):
357
+ raise ReadTimeoutError(
358
+ self, url, "Read timed out. (read timeout=%s)" % timeout_value
359
+ )
360
+
361
+ # See the above comment about EAGAIN in Python 3. In Python 2 we have
362
+ # to specifically catch it and throw the timeout error
363
+ if hasattr(err, "errno") and err.errno in _blocking_errnos:
364
+ raise ReadTimeoutError(
365
+ self, url, "Read timed out. (read timeout=%s)" % timeout_value
366
+ )
367
+
368
+ # Catch possible read timeouts thrown as SSL errors. If not the
369
+ # case, rethrow the original. We need to do this because of:
370
+ # http://bugs.python.org/issue10272
371
+ if "timed out" in str(err) or "did not complete (read)" in str(
372
+ err
373
+ ): # Python < 2.7.4
374
+ raise ReadTimeoutError(
375
+ self, url, "Read timed out. (read timeout=%s)" % timeout_value
376
+ )
377
+
378
+ def _make_request(
379
+ self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw
380
+ ):
381
+ """
382
+ Perform a request on a given urllib connection object taken from our
383
+ pool.
384
+
385
+ :param conn:
386
+ a connection from one of our connection pools
387
+
388
+ :param timeout:
389
+ Socket timeout in seconds for the request. This can be a
390
+ float or integer, which will set the same timeout value for
391
+ the socket connect and the socket read, or an instance of
392
+ :class:`urllib3.util.Timeout`, which gives you more fine-grained
393
+ control over your timeouts.
394
+ """
395
+ self.num_requests += 1
396
+
397
+ timeout_obj = self._get_timeout(timeout)
398
+ timeout_obj.start_connect()
399
+ conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
400
+
401
+ # Trigger any extra validation we need to do.
402
+ try:
403
+ self._validate_conn(conn)
404
+ except (SocketTimeout, BaseSSLError) as e:
405
+ # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
406
+ self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
407
+ raise
408
+
409
+ # conn.request() calls http.client.*.request, not the method in
410
+ # urllib3.request. It also calls makefile (recv) on the socket.
411
+ try:
412
+ if chunked:
413
+ conn.request_chunked(method, url, **httplib_request_kw)
414
+ else:
415
+ conn.request(method, url, **httplib_request_kw)
416
+
417
+ # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
418
+ # legitimately able to close the connection after sending a valid response.
419
+ # With this behaviour, the received response is still readable.
420
+ except BrokenPipeError:
421
+ # Python 3
422
+ pass
423
+ except IOError as e:
424
+ # Python 2 and macOS/Linux
425
+ # EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS
426
+ # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/
427
+ if e.errno not in {
428
+ errno.EPIPE,
429
+ errno.ESHUTDOWN,
430
+ errno.EPROTOTYPE,
431
+ }:
432
+ raise
433
+
434
+ # Reset the timeout for the recv() on the socket
435
+ read_timeout = timeout_obj.read_timeout
436
+
437
+ # App Engine doesn't have a sock attr
438
+ if getattr(conn, "sock", None):
439
+ # In Python 3 socket.py will catch EAGAIN and return None when you
440
+ # try and read into the file pointer created by http.client, which
441
+ # instead raises a BadStatusLine exception. Instead of catching
442
+ # the exception and assuming all BadStatusLine exceptions are read
443
+ # timeouts, check for a zero timeout before making the request.
444
+ if read_timeout == 0:
445
+ raise ReadTimeoutError(
446
+ self, url, "Read timed out. (read timeout=%s)" % read_timeout
447
+ )
448
+ if read_timeout is Timeout.DEFAULT_TIMEOUT:
449
+ conn.sock.settimeout(socket.getdefaulttimeout())
450
+ else: # None or a value
451
+ conn.sock.settimeout(read_timeout)
452
+
453
+ # Receive the response from the server
454
+ try:
455
+ try:
456
+ # Python 2.7, use buffering of HTTP responses
457
+ httplib_response = conn.getresponse(buffering=True)
458
+ except TypeError:
459
+ # Python 3
460
+ try:
461
+ httplib_response = conn.getresponse()
462
+ except BaseException as e:
463
+ # Remove the TypeError from the exception chain in
464
+ # Python 3 (including for exceptions like SystemExit).
465
+ # Otherwise it looks like a bug in the code.
466
+ six.raise_from(e, None)
467
+ except (SocketTimeout, BaseSSLError, SocketError) as e:
468
+ self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
469
+ raise
470
+
471
+ # AppEngine doesn't have a version attr.
472
+ http_version = getattr(conn, "_http_vsn_str", "HTTP/?")
473
+ log.debug(
474
+ '%s://%s:%s "%s %s %s" %s %s',
475
+ self.scheme,
476
+ self.host,
477
+ self.port,
478
+ method,
479
+ url,
480
+ http_version,
481
+ httplib_response.status,
482
+ httplib_response.length,
483
+ )
484
+
485
+ try:
486
+ assert_header_parsing(httplib_response.msg)
487
+ except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3
488
+ log.warning(
489
+ "Failed to parse headers (url=%s): %s",
490
+ self._absolute_url(url),
491
+ hpe,
492
+ exc_info=True,
493
+ )
494
+
495
+ return httplib_response
496
+
497
+ def _absolute_url(self, path):
498
+ return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
499
+
500
+ def close(self):
501
+ """
502
+ Close all pooled connections and disable the pool.
503
+ """
504
+ if self.pool is None:
505
+ return
506
+ # Disable access to the pool
507
+ old_pool, self.pool = self.pool, None
508
+
509
+ # Close all the HTTPConnections in the pool.
510
+ _close_pool_connections(old_pool)
511
+
512
+ def is_same_host(self, url):
513
+ """
514
+ Check if the given ``url`` is a member of the same host as this
515
+ connection pool.
516
+ """
517
+ if url.startswith("/"):
518
+ return True
519
+
520
+ # TODO: Add optional support for socket.gethostbyname checking.
521
+ scheme, host, port = get_host(url)
522
+ if host is not None:
523
+ host = _normalize_host(host, scheme=scheme)
524
+
525
+ # Use explicit default port for comparison when none is given
526
+ if self.port and not port:
527
+ port = port_by_scheme.get(scheme)
528
+ elif not self.port and port == port_by_scheme.get(scheme):
529
+ port = None
530
+
531
+ return (scheme, host, port) == (self.scheme, self.host, self.port)
532
+
533
+ def urlopen(
534
+ self,
535
+ method,
536
+ url,
537
+ body=None,
538
+ headers=None,
539
+ retries=None,
540
+ redirect=True,
541
+ assert_same_host=True,
542
+ timeout=_Default,
543
+ pool_timeout=None,
544
+ release_conn=None,
545
+ chunked=False,
546
+ body_pos=None,
547
+ **response_kw
548
+ ):
549
+ """
550
+ Get a connection from the pool and perform an HTTP request. This is the
551
+ lowest level call for making a request, so you'll need to specify all
552
+ the raw details.
553
+
554
+ .. note::
555
+
556
+ More commonly, it's appropriate to use a convenience method provided
557
+ by :class:`.RequestMethods`, such as :meth:`request`.
558
+
559
+ .. note::
560
+
561
+ `release_conn` will only behave as expected if
562
+ `preload_content=False` because we want to make
563
+ `preload_content=False` the default behaviour someday soon without
564
+ breaking backwards compatibility.
565
+
566
+ :param method:
567
+ HTTP request method (such as GET, POST, PUT, etc.)
568
+
569
+ :param url:
570
+ The URL to perform the request on.
571
+
572
+ :param body:
573
+ Data to send in the request body, either :class:`str`, :class:`bytes`,
574
+ an iterable of :class:`str`/:class:`bytes`, or a file-like object.
575
+
576
+ :param headers:
577
+ Dictionary of custom headers to send, such as User-Agent,
578
+ If-None-Match, etc. If None, pool headers are used. If provided,
579
+ these headers completely replace any pool-specific headers.
580
+
581
+ :param retries:
582
+ Configure the number of retries to allow before raising a
583
+ :class:`~urllib3.exceptions.MaxRetryError` exception.
584
+
585
+ Pass ``None`` to retry until you receive a response. Pass a
586
+ :class:`~urllib3.util.retry.Retry` object for fine-grained control
587
+ over different types of retries.
588
+ Pass an integer number to retry connection errors that many times,
589
+ but no other types of errors. Pass zero to never retry.
590
+
591
+ If ``False``, then retries are disabled and any exception is raised
592
+ immediately. Also, instead of raising a MaxRetryError on redirects,
593
+ the redirect response will be returned.
594
+
595
+ :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
596
+
597
+ :param redirect:
598
+ If True, automatically handle redirects (status codes 301, 302,
599
+ 303, 307, 308). Each redirect counts as a retry. Disabling retries
600
+ will disable redirect, too.
601
+
602
+ :param assert_same_host:
603
+ If ``True``, will make sure that the host of the pool requests is
604
+ consistent else will raise HostChangedError. When ``False``, you can
605
+ use the pool on an HTTP proxy and request foreign hosts.
606
+
607
+ :param timeout:
608
+ If specified, overrides the default timeout for this one
609
+ request. It may be a float (in seconds) or an instance of
610
+ :class:`urllib3.util.Timeout`.
611
+
612
+ :param pool_timeout:
613
+ If set and the pool is set to block=True, then this method will
614
+ block for ``pool_timeout`` seconds and raise EmptyPoolError if no
615
+ connection is available within the time period.
616
+
617
+ :param release_conn:
618
+ If False, then the urlopen call will not release the connection
619
+ back into the pool once a response is received (but will release if
620
+ you read the entire contents of the response such as when
621
+ `preload_content=True`). This is useful if you're not preloading
622
+ the response's content immediately. You will need to call
623
+ ``r.release_conn()`` on the response ``r`` to return the connection
624
+ back into the pool. If None, it takes the value of
625
+ ``response_kw.get('preload_content', True)``.
626
+
627
+ :param chunked:
628
+ If True, urllib3 will send the body using chunked transfer
629
+ encoding. Otherwise, urllib3 will send the body using the standard
630
+ content-length form. Defaults to False.
631
+
632
+ :param int body_pos:
633
+ Position to seek to in file-like body in the event of a retry or
634
+ redirect. Typically this won't need to be set because urllib3 will
635
+ auto-populate the value when needed.
636
+
637
+ :param \\**response_kw:
638
+ Additional parameters are passed to
639
+ :meth:`urllib3.response.HTTPResponse.from_httplib`
640
+ """
641
+
642
+ parsed_url = parse_url(url)
643
+ destination_scheme = parsed_url.scheme
644
+
645
+ if headers is None:
646
+ headers = self.headers
647
+
648
+ if not isinstance(retries, Retry):
649
+ retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
650
+
651
+ if release_conn is None:
652
+ release_conn = response_kw.get("preload_content", True)
653
+
654
+ # Check host
655
+ if assert_same_host and not self.is_same_host(url):
656
+ raise HostChangedError(self, url, retries)
657
+
658
+ # Ensure that the URL we're connecting to is properly encoded
659
+ if url.startswith("/"):
660
+ url = six.ensure_str(_encode_target(url))
661
+ else:
662
+ url = six.ensure_str(parsed_url.url)
663
+
664
+ conn = None
665
+
666
+ # Track whether `conn` needs to be released before
667
+ # returning/raising/recursing. Update this variable if necessary, and
668
+ # leave `release_conn` constant throughout the function. That way, if
669
+ # the function recurses, the original value of `release_conn` will be
670
+ # passed down into the recursive call, and its value will be respected.
671
+ #
672
+ # See issue #651 [1] for details.
673
+ #
674
+ # [1] <https://github.com/urllib3/urllib3/issues/651>
675
+ release_this_conn = release_conn
676
+
677
+ http_tunnel_required = connection_requires_http_tunnel(
678
+ self.proxy, self.proxy_config, destination_scheme
679
+ )
680
+
681
+ # Merge the proxy headers. Only done when not using HTTP CONNECT. We
682
+ # have to copy the headers dict so we can safely change it without those
683
+ # changes being reflected in anyone else's copy.
684
+ if not http_tunnel_required:
685
+ headers = headers.copy()
686
+ headers.update(self.proxy_headers)
687
+
688
+ # Must keep the exception bound to a separate variable or else Python 3
689
+ # complains about UnboundLocalError.
690
+ err = None
691
+
692
+ # Keep track of whether we cleanly exited the except block. This
693
+ # ensures we do proper cleanup in finally.
694
+ clean_exit = False
695
+
696
+ # Rewind body position, if needed. Record current position
697
+ # for future rewinds in the event of a redirect/retry.
698
+ body_pos = set_file_position(body, body_pos)
699
+
700
+ try:
701
+ # Request a connection from the queue.
702
+ timeout_obj = self._get_timeout(timeout)
703
+ conn = self._get_conn(timeout=pool_timeout)
704
+
705
+ conn.timeout = timeout_obj.connect_timeout
706
+
707
+ is_new_proxy_conn = self.proxy is not None and not getattr(
708
+ conn, "sock", None
709
+ )
710
+ if is_new_proxy_conn and http_tunnel_required:
711
+ self._prepare_proxy(conn)
712
+
713
+ # Make the request on the httplib connection object.
714
+ httplib_response = self._make_request(
715
+ conn,
716
+ method,
717
+ url,
718
+ timeout=timeout_obj,
719
+ body=body,
720
+ headers=headers,
721
+ chunked=chunked,
722
+ )
723
+
724
+ # If we're going to release the connection in ``finally:``, then
725
+ # the response doesn't need to know about the connection. Otherwise
726
+ # it will also try to release it and we'll have a double-release
727
+ # mess.
728
+ response_conn = conn if not release_conn else None
729
+
730
+ # Pass method to Response for length checking
731
+ response_kw["request_method"] = method
732
+
733
+ # Import httplib's response into our own wrapper object
734
+ response = self.ResponseCls.from_httplib(
735
+ httplib_response,
736
+ pool=self,
737
+ connection=response_conn,
738
+ retries=retries,
739
+ **response_kw
740
+ )
741
+
742
+ # Everything went great!
743
+ clean_exit = True
744
+
745
+ except EmptyPoolError:
746
+ # Didn't get a connection from the pool, no need to clean up
747
+ clean_exit = True
748
+ release_this_conn = False
749
+ raise
750
+
751
+ except (
752
+ TimeoutError,
753
+ HTTPException,
754
+ SocketError,
755
+ ProtocolError,
756
+ BaseSSLError,
757
+ SSLError,
758
+ CertificateError,
759
+ ) as e:
760
+ # Discard the connection for these exceptions. It will be
761
+ # replaced during the next _get_conn() call.
762
+ clean_exit = False
763
+
764
+ def _is_ssl_error_message_from_http_proxy(ssl_error):
765
+ # We're trying to detect the message 'WRONG_VERSION_NUMBER' but
766
+ # SSLErrors are kinda all over the place when it comes to the message,
767
+ # so we try to cover our bases here!
768
+ message = " ".join(re.split("[^a-z]", str(ssl_error).lower()))
769
+ return (
770
+ "wrong version number" in message or "unknown protocol" in message
771
+ )
772
+
773
+ # Try to detect a common user error with proxies which is to
774
+ # set an HTTP proxy to be HTTPS when it should be 'http://'
775
+ # (ie {'http': 'http://proxy', 'https': 'https://proxy'})
776
+ # Instead we add a nice error message and point to a URL.
777
+ if (
778
+ isinstance(e, BaseSSLError)
779
+ and self.proxy
780
+ and _is_ssl_error_message_from_http_proxy(e)
781
+ and conn.proxy
782
+ and conn.proxy.scheme == "https"
783
+ ):
784
+ e = ProxyError(
785
+ "Your proxy appears to only use HTTP and not HTTPS, "
786
+ "try changing your proxy URL to be HTTP. See: "
787
+ "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
788
+ "#https-proxy-error-http-proxy",
789
+ SSLError(e),
790
+ )
791
+ elif isinstance(e, (BaseSSLError, CertificateError)):
792
+ e = SSLError(e)
793
+ elif isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
794
+ e = ProxyError("Cannot connect to proxy.", e)
795
+ elif isinstance(e, (SocketError, HTTPException)):
796
+ e = ProtocolError("Connection aborted.", e)
797
+
798
+ retries = retries.increment(
799
+ method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
800
+ )
801
+ retries.sleep()
802
+
803
+ # Keep track of the error for the retry warning.
804
+ err = e
805
+
806
+ finally:
807
+ if not clean_exit:
808
+ # We hit some kind of exception, handled or otherwise. We need
809
+ # to throw the connection away unless explicitly told not to.
810
+ # Close the connection, set the variable to None, and make sure
811
+ # we put the None back in the pool to avoid leaking it.
812
+ conn = conn and conn.close()
813
+ release_this_conn = True
814
+
815
+ if release_this_conn:
816
+ # Put the connection back to be reused. If the connection is
817
+ # expired then it will be None, which will get replaced with a
818
+ # fresh connection during _get_conn.
819
+ self._put_conn(conn)
820
+
821
+ if not conn:
822
+ # Try again
823
+ log.warning(
824
+ "Retrying (%r) after connection broken by '%r': %s", retries, err, url
825
+ )
826
+ return self.urlopen(
827
+ method,
828
+ url,
829
+ body,
830
+ headers,
831
+ retries,
832
+ redirect,
833
+ assert_same_host,
834
+ timeout=timeout,
835
+ pool_timeout=pool_timeout,
836
+ release_conn=release_conn,
837
+ chunked=chunked,
838
+ body_pos=body_pos,
839
+ **response_kw
840
+ )
841
+
842
+ # Handle redirect?
843
+ redirect_location = redirect and response.get_redirect_location()
844
+ if redirect_location:
845
+ if response.status == 303:
846
+ method = "GET"
847
+
848
+ try:
849
+ retries = retries.increment(method, url, response=response, _pool=self)
850
+ except MaxRetryError:
851
+ if retries.raise_on_redirect:
852
+ response.drain_conn()
853
+ raise
854
+ return response
855
+
856
+ response.drain_conn()
857
+ retries.sleep_for_retry(response)
858
+ log.debug("Redirecting %s -> %s", url, redirect_location)
859
+ return self.urlopen(
860
+ method,
861
+ redirect_location,
862
+ body,
863
+ headers,
864
+ retries=retries,
865
+ redirect=redirect,
866
+ assert_same_host=assert_same_host,
867
+ timeout=timeout,
868
+ pool_timeout=pool_timeout,
869
+ release_conn=release_conn,
870
+ chunked=chunked,
871
+ body_pos=body_pos,
872
+ **response_kw
873
+ )
874
+
875
+ # Check if we should retry the HTTP response.
876
+ has_retry_after = bool(response.headers.get("Retry-After"))
877
+ if retries.is_retry(method, response.status, has_retry_after):
878
+ try:
879
+ retries = retries.increment(method, url, response=response, _pool=self)
880
+ except MaxRetryError:
881
+ if retries.raise_on_status:
882
+ response.drain_conn()
883
+ raise
884
+ return response
885
+
886
+ response.drain_conn()
887
+ retries.sleep(response)
888
+ log.debug("Retry: %s", url)
889
+ return self.urlopen(
890
+ method,
891
+ url,
892
+ body,
893
+ headers,
894
+ retries=retries,
895
+ redirect=redirect,
896
+ assert_same_host=assert_same_host,
897
+ timeout=timeout,
898
+ pool_timeout=pool_timeout,
899
+ release_conn=release_conn,
900
+ chunked=chunked,
901
+ body_pos=body_pos,
902
+ **response_kw
903
+ )
904
+
905
+ return response
906
+
907
+
908
+ class HTTPSConnectionPool(HTTPConnectionPool):
909
+ """
910
+ Same as :class:`.HTTPConnectionPool`, but HTTPS.
911
+
912
+ :class:`.HTTPSConnection` uses one of ``assert_fingerprint``,
913
+ ``assert_hostname`` and ``host`` in this order to verify connections.
914
+ If ``assert_hostname`` is False, no verification is done.
915
+
916
+ The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
917
+ ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl`
918
+ is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
919
+ the connection socket into an SSL socket.
920
+ """
921
+
922
+ scheme = "https"
923
+ ConnectionCls = HTTPSConnection
924
+
925
+ def __init__(
926
+ self,
927
+ host,
928
+ port=None,
929
+ strict=False,
930
+ timeout=Timeout.DEFAULT_TIMEOUT,
931
+ maxsize=1,
932
+ block=False,
933
+ headers=None,
934
+ retries=None,
935
+ _proxy=None,
936
+ _proxy_headers=None,
937
+ key_file=None,
938
+ cert_file=None,
939
+ cert_reqs=None,
940
+ key_password=None,
941
+ ca_certs=None,
942
+ ssl_version=None,
943
+ assert_hostname=None,
944
+ assert_fingerprint=None,
945
+ ca_cert_dir=None,
946
+ **conn_kw
947
+ ):
948
+
949
+ HTTPConnectionPool.__init__(
950
+ self,
951
+ host,
952
+ port,
953
+ strict,
954
+ timeout,
955
+ maxsize,
956
+ block,
957
+ headers,
958
+ retries,
959
+ _proxy,
960
+ _proxy_headers,
961
+ **conn_kw
962
+ )
963
+
964
+ self.key_file = key_file
965
+ self.cert_file = cert_file
966
+ self.cert_reqs = cert_reqs
967
+ self.key_password = key_password
968
+ self.ca_certs = ca_certs
969
+ self.ca_cert_dir = ca_cert_dir
970
+ self.ssl_version = ssl_version
971
+ self.assert_hostname = assert_hostname
972
+ self.assert_fingerprint = assert_fingerprint
973
+
974
+ def _prepare_conn(self, conn):
975
+ """
976
+ Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
977
+ and establish the tunnel if proxy is used.
978
+ """
979
+
980
+ if isinstance(conn, VerifiedHTTPSConnection):
981
+ conn.set_cert(
982
+ key_file=self.key_file,
983
+ key_password=self.key_password,
984
+ cert_file=self.cert_file,
985
+ cert_reqs=self.cert_reqs,
986
+ ca_certs=self.ca_certs,
987
+ ca_cert_dir=self.ca_cert_dir,
988
+ assert_hostname=self.assert_hostname,
989
+ assert_fingerprint=self.assert_fingerprint,
990
+ )
991
+ conn.ssl_version = self.ssl_version
992
+ return conn
993
+
994
+ def _prepare_proxy(self, conn):
995
+ """
996
+ Establishes a tunnel connection through HTTP CONNECT.
997
+
998
+ Tunnel connection is established early because otherwise httplib would
999
+ improperly set Host: header to proxy's IP:port.
1000
+ """
1001
+
1002
+ conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
1003
+
1004
+ if self.proxy.scheme == "https":
1005
+ conn.tls_in_tls_required = True
1006
+
1007
+ conn.connect()
1008
+
1009
+ def _new_conn(self):
1010
+ """
1011
+ Return a fresh :class:`http.client.HTTPSConnection`.
1012
+ """
1013
+ self.num_connections += 1
1014
+ log.debug(
1015
+ "Starting new HTTPS connection (%d): %s:%s",
1016
+ self.num_connections,
1017
+ self.host,
1018
+ self.port or "443",
1019
+ )
1020
+
1021
+ if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
1022
+ raise SSLError(
1023
+ "Can't connect to HTTPS URL because the SSL module is not available."
1024
+ )
1025
+
1026
+ actual_host = self.host
1027
+ actual_port = self.port
1028
+ if self.proxy is not None:
1029
+ actual_host = self.proxy.host
1030
+ actual_port = self.proxy.port
1031
+
1032
+ conn = self.ConnectionCls(
1033
+ host=actual_host,
1034
+ port=actual_port,
1035
+ timeout=self.timeout.connect_timeout,
1036
+ strict=self.strict,
1037
+ cert_file=self.cert_file,
1038
+ key_file=self.key_file,
1039
+ key_password=self.key_password,
1040
+ **self.conn_kw
1041
+ )
1042
+
1043
+ return self._prepare_conn(conn)
1044
+
1045
+ def _validate_conn(self, conn):
1046
+ """
1047
+ Called right before a request is made, after the socket is created.
1048
+ """
1049
+ super(HTTPSConnectionPool, self)._validate_conn(conn)
1050
+
1051
+ # Force connect early to allow us to validate the connection.
1052
+ if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
1053
+ conn.connect()
1054
+
1055
+ if not conn.is_verified:
1056
+ warnings.warn(
1057
+ (
1058
+ "Unverified HTTPS request is being made to host '%s'. "
1059
+ "Adding certificate verification is strongly advised. See: "
1060
+ "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
1061
+ "#ssl-warnings" % conn.host
1062
+ ),
1063
+ InsecureRequestWarning,
1064
+ )
1065
+
1066
+ if getattr(conn, "proxy_is_verified", None) is False:
1067
+ warnings.warn(
1068
+ (
1069
+ "Unverified HTTPS connection done to an HTTPS proxy. "
1070
+ "Adding certificate verification is strongly advised. See: "
1071
+ "https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
1072
+ "#ssl-warnings"
1073
+ ),
1074
+ InsecureRequestWarning,
1075
+ )
1076
+
1077
+
1078
+ def connection_from_url(url, **kw):
1079
+ """
1080
+ Given a url, return an :class:`.ConnectionPool` instance of its host.
1081
+
1082
+ This is a shortcut for not having to parse out the scheme, host, and port
1083
+ of the url before creating an :class:`.ConnectionPool` instance.
1084
+
1085
+ :param url:
1086
+ Absolute URL string that must include the scheme. Port is optional.
1087
+
1088
+ :param \\**kw:
1089
+ Passes additional parameters to the constructor of the appropriate
1090
+ :class:`.ConnectionPool`. Useful for specifying things like
1091
+ timeout, maxsize, headers, etc.
1092
+
1093
+ Example::
1094
+
1095
+ >>> conn = connection_from_url('http://google.com/')
1096
+ >>> r = conn.request('GET', '/')
1097
+ """
1098
+ scheme, host, port = get_host(url)
1099
+ port = port or port_by_scheme.get(scheme, 80)
1100
+ if scheme == "https":
1101
+ return HTTPSConnectionPool(host, port=port, **kw)
1102
+ else:
1103
+ return HTTPConnectionPool(host, port=port, **kw)
1104
+
1105
+
1106
+ def _normalize_host(host, scheme):
1107
+ """
1108
+ Normalize hosts for comparisons and use with sockets.
1109
+ """
1110
+
1111
+ host = normalize_host(host, scheme)
1112
+
1113
+ # httplib doesn't like it when we include brackets in IPv6 addresses
1114
+ # Specifically, if we include brackets but also pass the port then
1115
+ # httplib crazily doubles up the square brackets on the Host header.
1116
+ # Instead, we need to make sure we never pass ``None`` as the port.
1117
+ # However, for backward compatibility reasons we can't actually
1118
+ # *assert* that. See http://bugs.python.org/issue28539
1119
+ if host.startswith("[") and host.endswith("]"):
1120
+ host = host[1:-1]
1121
+ return host
1122
+
1123
+
1124
+ def _close_pool_connections(pool):
1125
+ """Drains a queue of connections and closes each one."""
1126
+ try:
1127
+ while True:
1128
+ conn = pool.get(block=False)
1129
+ if conn:
1130
+ conn.close()
1131
+ except queue.Empty:
1132
+ pass # Done.
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/exceptions.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead
4
+
5
+ # Base Exceptions
6
+
7
+
8
+ class HTTPError(Exception):
9
+ """Base exception used by this module."""
10
+
11
+ pass
12
+
13
+
14
+ class HTTPWarning(Warning):
15
+ """Base warning used by this module."""
16
+
17
+ pass
18
+
19
+
20
+ class PoolError(HTTPError):
21
+ """Base exception for errors caused within a pool."""
22
+
23
+ def __init__(self, pool, message):
24
+ self.pool = pool
25
+ HTTPError.__init__(self, "%s: %s" % (pool, message))
26
+
27
+ def __reduce__(self):
28
+ # For pickling purposes.
29
+ return self.__class__, (None, None)
30
+
31
+
32
+ class RequestError(PoolError):
33
+ """Base exception for PoolErrors that have associated URLs."""
34
+
35
+ def __init__(self, pool, url, message):
36
+ self.url = url
37
+ PoolError.__init__(self, pool, message)
38
+
39
+ def __reduce__(self):
40
+ # For pickling purposes.
41
+ return self.__class__, (None, self.url, None)
42
+
43
+
44
+ class SSLError(HTTPError):
45
+ """Raised when SSL certificate fails in an HTTPS connection."""
46
+
47
+ pass
48
+
49
+
50
+ class ProxyError(HTTPError):
51
+ """Raised when the connection to a proxy fails."""
52
+
53
+ def __init__(self, message, error, *args):
54
+ super(ProxyError, self).__init__(message, error, *args)
55
+ self.original_error = error
56
+
57
+
58
+ class DecodeError(HTTPError):
59
+ """Raised when automatic decoding based on Content-Type fails."""
60
+
61
+ pass
62
+
63
+
64
+ class ProtocolError(HTTPError):
65
+ """Raised when something unexpected happens mid-request/response."""
66
+
67
+ pass
68
+
69
+
70
+ #: Renamed to ProtocolError but aliased for backwards compatibility.
71
+ ConnectionError = ProtocolError
72
+
73
+
74
+ # Leaf Exceptions
75
+
76
+
77
+ class MaxRetryError(RequestError):
78
+ """Raised when the maximum number of retries is exceeded.
79
+
80
+ :param pool: The connection pool
81
+ :type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
82
+ :param string url: The requested Url
83
+ :param exceptions.Exception reason: The underlying error
84
+
85
+ """
86
+
87
+ def __init__(self, pool, url, reason=None):
88
+ self.reason = reason
89
+
90
+ message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason)
91
+
92
+ RequestError.__init__(self, pool, url, message)
93
+
94
+
95
+ class HostChangedError(RequestError):
96
+ """Raised when an existing pool gets a request for a foreign host."""
97
+
98
+ def __init__(self, pool, url, retries=3):
99
+ message = "Tried to open a foreign host with url: %s" % url
100
+ RequestError.__init__(self, pool, url, message)
101
+ self.retries = retries
102
+
103
+
104
+ class TimeoutStateError(HTTPError):
105
+ """Raised when passing an invalid state to a timeout"""
106
+
107
+ pass
108
+
109
+
110
+ class TimeoutError(HTTPError):
111
+ """Raised when a socket timeout error occurs.
112
+
113
+ Catching this error will catch both :exc:`ReadTimeoutErrors
114
+ <ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
115
+ """
116
+
117
+ pass
118
+
119
+
120
+ class ReadTimeoutError(TimeoutError, RequestError):
121
+ """Raised when a socket timeout occurs while receiving data from a server"""
122
+
123
+ pass
124
+
125
+
126
+ # This timeout error does not have a URL attached and needs to inherit from the
127
+ # base HTTPError
128
+ class ConnectTimeoutError(TimeoutError):
129
+ """Raised when a socket timeout occurs while connecting to a server"""
130
+
131
+ pass
132
+
133
+
134
+ class NewConnectionError(ConnectTimeoutError, PoolError):
135
+ """Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
136
+
137
+ pass
138
+
139
+
140
+ class EmptyPoolError(PoolError):
141
+ """Raised when a pool runs out of connections and no more are allowed."""
142
+
143
+ pass
144
+
145
+
146
+ class ClosedPoolError(PoolError):
147
+ """Raised when a request enters a pool after the pool has been closed."""
148
+
149
+ pass
150
+
151
+
152
+ class LocationValueError(ValueError, HTTPError):
153
+ """Raised when there is something wrong with a given URL input."""
154
+
155
+ pass
156
+
157
+
158
+ class LocationParseError(LocationValueError):
159
+ """Raised when get_host or similar fails to parse the URL input."""
160
+
161
+ def __init__(self, location):
162
+ message = "Failed to parse: %s" % location
163
+ HTTPError.__init__(self, message)
164
+
165
+ self.location = location
166
+
167
+
168
+ class URLSchemeUnknown(LocationValueError):
169
+ """Raised when a URL input has an unsupported scheme."""
170
+
171
+ def __init__(self, scheme):
172
+ message = "Not supported URL scheme %s" % scheme
173
+ super(URLSchemeUnknown, self).__init__(message)
174
+
175
+ self.scheme = scheme
176
+
177
+
178
+ class ResponseError(HTTPError):
179
+ """Used as a container for an error reason supplied in a MaxRetryError."""
180
+
181
+ GENERIC_ERROR = "too many error responses"
182
+ SPECIFIC_ERROR = "too many {status_code} error responses"
183
+
184
+
185
+ class SecurityWarning(HTTPWarning):
186
+ """Warned when performing security reducing actions"""
187
+
188
+ pass
189
+
190
+
191
+ class SubjectAltNameWarning(SecurityWarning):
192
+ """Warned when connecting to a host with a certificate missing a SAN."""
193
+
194
+ pass
195
+
196
+
197
+ class InsecureRequestWarning(SecurityWarning):
198
+ """Warned when making an unverified HTTPS request."""
199
+
200
+ pass
201
+
202
+
203
+ class SystemTimeWarning(SecurityWarning):
204
+ """Warned when system time is suspected to be wrong"""
205
+
206
+ pass
207
+
208
+
209
+ class InsecurePlatformWarning(SecurityWarning):
210
+ """Warned when certain TLS/SSL configuration is not available on a platform."""
211
+
212
+ pass
213
+
214
+
215
+ class SNIMissingWarning(HTTPWarning):
216
+ """Warned when making a HTTPS request without SNI available."""
217
+
218
+ pass
219
+
220
+
221
+ class DependencyWarning(HTTPWarning):
222
+ """
223
+ Warned when an attempt is made to import a module with missing optional
224
+ dependencies.
225
+ """
226
+
227
+ pass
228
+
229
+
230
+ class ResponseNotChunked(ProtocolError, ValueError):
231
+ """Response needs to be chunked in order to read it as chunks."""
232
+
233
+ pass
234
+
235
+
236
+ class BodyNotHttplibCompatible(HTTPError):
237
+ """
238
+ Body should be :class:`http.client.HTTPResponse` like
239
+ (have an fp attribute which returns raw chunks) for read_chunked().
240
+ """
241
+
242
+ pass
243
+
244
+
245
+ class IncompleteRead(HTTPError, httplib_IncompleteRead):
246
+ """
247
+ Response length doesn't match expected Content-Length
248
+
249
+ Subclass of :class:`http.client.IncompleteRead` to allow int value
250
+ for ``partial`` to avoid creating large objects on streamed reads.
251
+ """
252
+
253
+ def __init__(self, partial, expected):
254
+ super(IncompleteRead, self).__init__(partial, expected)
255
+
256
+ def __repr__(self):
257
+ return "IncompleteRead(%i bytes read, %i more expected)" % (
258
+ self.partial,
259
+ self.expected,
260
+ )
261
+
262
+
263
+ class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
264
+ """Invalid chunk length in a chunked response."""
265
+
266
+ def __init__(self, response, length):
267
+ super(InvalidChunkLength, self).__init__(
268
+ response.tell(), response.length_remaining
269
+ )
270
+ self.response = response
271
+ self.length = length
272
+
273
+ def __repr__(self):
274
+ return "InvalidChunkLength(got length %r, %i bytes read)" % (
275
+ self.length,
276
+ self.partial,
277
+ )
278
+
279
+
280
+ class InvalidHeader(HTTPError):
281
+ """The header provided was somehow invalid."""
282
+
283
+ pass
284
+
285
+
286
+ class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
287
+ """ProxyManager does not support the supplied scheme"""
288
+
289
+ # TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
290
+
291
+ def __init__(self, scheme):
292
+ # 'localhost' is here because our URL parser parses
293
+ # localhost:8080 -> scheme=localhost, remove if we fix this.
294
+ if scheme == "localhost":
295
+ scheme = None
296
+ if scheme is None:
297
+ message = "Proxy URL had no scheme, should start with http:// or https://"
298
+ else:
299
+ message = (
300
+ "Proxy URL had unsupported scheme %s, should use http:// or https://"
301
+ % scheme
302
+ )
303
+ super(ProxySchemeUnknown, self).__init__(message)
304
+
305
+
306
+ class ProxySchemeUnsupported(ValueError):
307
+ """Fetching HTTPS resources through HTTPS proxies is unsupported"""
308
+
309
+ pass
310
+
311
+
312
+ class HeaderParsingError(HTTPError):
313
+ """Raised by assert_header_parsing, but we convert it to a log.warning statement."""
314
+
315
+ def __init__(self, defects, unparsed_data):
316
+ message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data)
317
+ super(HeaderParsingError, self).__init__(message)
318
+
319
+
320
+ class UnrewindableBodyError(HTTPError):
321
+ """urllib3 encountered an error when trying to rewind a body"""
322
+
323
+ pass
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/fields.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import email.utils
4
+ import mimetypes
5
+ import re
6
+
7
+ from .packages import six
8
+
9
+
10
+ def guess_content_type(filename, default="application/octet-stream"):
11
+ """
12
+ Guess the "Content-Type" of a file.
13
+
14
+ :param filename:
15
+ The filename to guess the "Content-Type" of using :mod:`mimetypes`.
16
+ :param default:
17
+ If no "Content-Type" can be guessed, default to `default`.
18
+ """
19
+ if filename:
20
+ return mimetypes.guess_type(filename)[0] or default
21
+ return default
22
+
23
+
24
+ def format_header_param_rfc2231(name, value):
25
+ """
26
+ Helper function to format and quote a single header parameter using the
27
+ strategy defined in RFC 2231.
28
+
29
+ Particularly useful for header parameters which might contain
30
+ non-ASCII values, like file names. This follows
31
+ `RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_.
32
+
33
+ :param name:
34
+ The name of the parameter, a string expected to be ASCII only.
35
+ :param value:
36
+ The value of the parameter, provided as ``bytes`` or `str``.
37
+ :ret:
38
+ An RFC-2231-formatted unicode string.
39
+ """
40
+ if isinstance(value, six.binary_type):
41
+ value = value.decode("utf-8")
42
+
43
+ if not any(ch in value for ch in '"\\\r\n'):
44
+ result = u'%s="%s"' % (name, value)
45
+ try:
46
+ result.encode("ascii")
47
+ except (UnicodeEncodeError, UnicodeDecodeError):
48
+ pass
49
+ else:
50
+ return result
51
+
52
+ if six.PY2: # Python 2:
53
+ value = value.encode("utf-8")
54
+
55
+ # encode_rfc2231 accepts an encoded string and returns an ascii-encoded
56
+ # string in Python 2 but accepts and returns unicode strings in Python 3
57
+ value = email.utils.encode_rfc2231(value, "utf-8")
58
+ value = "%s*=%s" % (name, value)
59
+
60
+ if six.PY2: # Python 2:
61
+ value = value.decode("utf-8")
62
+
63
+ return value
64
+
65
+
66
+ _HTML5_REPLACEMENTS = {
67
+ u"\u0022": u"%22",
68
+ # Replace "\" with "\\".
69
+ u"\u005C": u"\u005C\u005C",
70
+ }
71
+
72
+ # All control characters from 0x00 to 0x1F *except* 0x1B.
73
+ _HTML5_REPLACEMENTS.update(
74
+ {
75
+ six.unichr(cc): u"%{:02X}".format(cc)
76
+ for cc in range(0x00, 0x1F + 1)
77
+ if cc not in (0x1B,)
78
+ }
79
+ )
80
+
81
+
82
+ def _replace_multiple(value, needles_and_replacements):
83
+ def replacer(match):
84
+ return needles_and_replacements[match.group(0)]
85
+
86
+ pattern = re.compile(
87
+ r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
88
+ )
89
+
90
+ result = pattern.sub(replacer, value)
91
+
92
+ return result
93
+
94
+
95
+ def format_header_param_html5(name, value):
96
+ """
97
+ Helper function to format and quote a single header parameter using the
98
+ HTML5 strategy.
99
+
100
+ Particularly useful for header parameters which might contain
101
+ non-ASCII values, like file names. This follows the `HTML5 Working Draft
102
+ Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
103
+
104
+ .. _HTML5 Working Draft Section 4.10.22.7:
105
+ https://w3c.github.io/html/sec-forms.html#multipart-form-data
106
+
107
+ :param name:
108
+ The name of the parameter, a string expected to be ASCII only.
109
+ :param value:
110
+ The value of the parameter, provided as ``bytes`` or `str``.
111
+ :ret:
112
+ A unicode string, stripped of troublesome characters.
113
+ """
114
+ if isinstance(value, six.binary_type):
115
+ value = value.decode("utf-8")
116
+
117
+ value = _replace_multiple(value, _HTML5_REPLACEMENTS)
118
+
119
+ return u'%s="%s"' % (name, value)
120
+
121
+
122
+ # For backwards-compatibility.
123
+ format_header_param = format_header_param_html5
124
+
125
+
126
+ class RequestField(object):
127
+ """
128
+ A data container for request body parameters.
129
+
130
+ :param name:
131
+ The name of this request field. Must be unicode.
132
+ :param data:
133
+ The data/value body.
134
+ :param filename:
135
+ An optional filename of the request field. Must be unicode.
136
+ :param headers:
137
+ An optional dict-like object of headers to initially use for the field.
138
+ :param header_formatter:
139
+ An optional callable that is used to encode and format the headers. By
140
+ default, this is :func:`format_header_param_html5`.
141
+ """
142
+
143
+ def __init__(
144
+ self,
145
+ name,
146
+ data,
147
+ filename=None,
148
+ headers=None,
149
+ header_formatter=format_header_param_html5,
150
+ ):
151
+ self._name = name
152
+ self._filename = filename
153
+ self.data = data
154
+ self.headers = {}
155
+ if headers:
156
+ self.headers = dict(headers)
157
+ self.header_formatter = header_formatter
158
+
159
+ @classmethod
160
+ def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
161
+ """
162
+ A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
163
+
164
+ Supports constructing :class:`~urllib3.fields.RequestField` from
165
+ parameter of key/value strings AND key/filetuple. A filetuple is a
166
+ (filename, data, MIME type) tuple where the MIME type is optional.
167
+ For example::
168
+
169
+ 'foo': 'bar',
170
+ 'fakefile': ('foofile.txt', 'contents of foofile'),
171
+ 'realfile': ('barfile.txt', open('realfile').read()),
172
+ 'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
173
+ 'nonamefile': 'contents of nonamefile field',
174
+
175
+ Field names and filenames must be unicode.
176
+ """
177
+ if isinstance(value, tuple):
178
+ if len(value) == 3:
179
+ filename, data, content_type = value
180
+ else:
181
+ filename, data = value
182
+ content_type = guess_content_type(filename)
183
+ else:
184
+ filename = None
185
+ content_type = None
186
+ data = value
187
+
188
+ request_param = cls(
189
+ fieldname, data, filename=filename, header_formatter=header_formatter
190
+ )
191
+ request_param.make_multipart(content_type=content_type)
192
+
193
+ return request_param
194
+
195
+ def _render_part(self, name, value):
196
+ """
197
+ Overridable helper function to format a single header parameter. By
198
+ default, this calls ``self.header_formatter``.
199
+
200
+ :param name:
201
+ The name of the parameter, a string expected to be ASCII only.
202
+ :param value:
203
+ The value of the parameter, provided as a unicode string.
204
+ """
205
+
206
+ return self.header_formatter(name, value)
207
+
208
+ def _render_parts(self, header_parts):
209
+ """
210
+ Helper function to format and quote a single header.
211
+
212
+ Useful for single headers that are composed of multiple items. E.g.,
213
+ 'Content-Disposition' fields.
214
+
215
+ :param header_parts:
216
+ A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
217
+ as `k1="v1"; k2="v2"; ...`.
218
+ """
219
+ parts = []
220
+ iterable = header_parts
221
+ if isinstance(header_parts, dict):
222
+ iterable = header_parts.items()
223
+
224
+ for name, value in iterable:
225
+ if value is not None:
226
+ parts.append(self._render_part(name, value))
227
+
228
+ return u"; ".join(parts)
229
+
230
+ def render_headers(self):
231
+ """
232
+ Renders the headers for this request field.
233
+ """
234
+ lines = []
235
+
236
+ sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
237
+ for sort_key in sort_keys:
238
+ if self.headers.get(sort_key, False):
239
+ lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
240
+
241
+ for header_name, header_value in self.headers.items():
242
+ if header_name not in sort_keys:
243
+ if header_value:
244
+ lines.append(u"%s: %s" % (header_name, header_value))
245
+
246
+ lines.append(u"\r\n")
247
+ return u"\r\n".join(lines)
248
+
249
+ def make_multipart(
250
+ self, content_disposition=None, content_type=None, content_location=None
251
+ ):
252
+ """
253
+ Makes this request field into a multipart request field.
254
+
255
+ This method overrides "Content-Disposition", "Content-Type" and
256
+ "Content-Location" headers to the request parameter.
257
+
258
+ :param content_type:
259
+ The 'Content-Type' of the request body.
260
+ :param content_location:
261
+ The 'Content-Location' of the request body.
262
+
263
+ """
264
+ self.headers["Content-Disposition"] = content_disposition or u"form-data"
265
+ self.headers["Content-Disposition"] += u"; ".join(
266
+ [
267
+ u"",
268
+ self._render_parts(
269
+ ((u"name", self._name), (u"filename", self._filename))
270
+ ),
271
+ ]
272
+ )
273
+ self.headers["Content-Type"] = content_type
274
+ self.headers["Content-Location"] = content_location
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/filepost.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import binascii
4
+ import codecs
5
+ import os
6
+ from io import BytesIO
7
+
8
+ from .fields import RequestField
9
+ from .packages import six
10
+ from .packages.six import b
11
+
12
+ writer = codecs.lookup("utf-8")[3]
13
+
14
+
15
+ def choose_boundary():
16
+ """
17
+ Our embarrassingly-simple replacement for mimetools.choose_boundary.
18
+ """
19
+ boundary = binascii.hexlify(os.urandom(16))
20
+ if not six.PY2:
21
+ boundary = boundary.decode("ascii")
22
+ return boundary
23
+
24
+
25
+ def iter_field_objects(fields):
26
+ """
27
+ Iterate over fields.
28
+
29
+ Supports list of (k, v) tuples and dicts, and lists of
30
+ :class:`~urllib3.fields.RequestField`.
31
+
32
+ """
33
+ if isinstance(fields, dict):
34
+ i = six.iteritems(fields)
35
+ else:
36
+ i = iter(fields)
37
+
38
+ for field in i:
39
+ if isinstance(field, RequestField):
40
+ yield field
41
+ else:
42
+ yield RequestField.from_tuples(*field)
43
+
44
+
45
+ def iter_fields(fields):
46
+ """
47
+ .. deprecated:: 1.6
48
+
49
+ Iterate over fields.
50
+
51
+ The addition of :class:`~urllib3.fields.RequestField` makes this function
52
+ obsolete. Instead, use :func:`iter_field_objects`, which returns
53
+ :class:`~urllib3.fields.RequestField` objects.
54
+
55
+ Supports list of (k, v) tuples and dicts.
56
+ """
57
+ if isinstance(fields, dict):
58
+ return ((k, v) for k, v in six.iteritems(fields))
59
+
60
+ return ((k, v) for k, v in fields)
61
+
62
+
63
+ def encode_multipart_formdata(fields, boundary=None):
64
+ """
65
+ Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
66
+
67
+ :param fields:
68
+ Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
69
+
70
+ :param boundary:
71
+ If not specified, then a random boundary will be generated using
72
+ :func:`urllib3.filepost.choose_boundary`.
73
+ """
74
+ body = BytesIO()
75
+ if boundary is None:
76
+ boundary = choose_boundary()
77
+
78
+ for field in iter_field_objects(fields):
79
+ body.write(b("--%s\r\n" % (boundary)))
80
+
81
+ writer(body).write(field.render_headers())
82
+ data = field.data
83
+
84
+ if isinstance(data, int):
85
+ data = str(data) # Backwards compatibility
86
+
87
+ if isinstance(data, six.text_type):
88
+ writer(body).write(data)
89
+ else:
90
+ body.write(data)
91
+
92
+ body.write(b"\r\n")
93
+
94
+ body.write(b("--%s--\r\n" % (boundary)))
95
+
96
+ content_type = str("multipart/form-data; boundary=%s" % boundary)
97
+
98
+ return body.getvalue(), content_type
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/poolmanager.py ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import collections
4
+ import functools
5
+ import logging
6
+
7
+ from ._collections import RecentlyUsedContainer
8
+ from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
9
+ from .exceptions import (
10
+ LocationValueError,
11
+ MaxRetryError,
12
+ ProxySchemeUnknown,
13
+ ProxySchemeUnsupported,
14
+ URLSchemeUnknown,
15
+ )
16
+ from .packages import six
17
+ from .packages.six.moves.urllib.parse import urljoin
18
+ from .request import RequestMethods
19
+ from .util.proxy import connection_requires_http_tunnel
20
+ from .util.retry import Retry
21
+ from .util.url import parse_url
22
+
23
+ __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
24
+
25
+
26
+ log = logging.getLogger(__name__)
27
+
28
+ SSL_KEYWORDS = (
29
+ "key_file",
30
+ "cert_file",
31
+ "cert_reqs",
32
+ "ca_certs",
33
+ "ssl_version",
34
+ "ca_cert_dir",
35
+ "ssl_context",
36
+ "key_password",
37
+ "server_hostname",
38
+ )
39
+
40
+ # All known keyword arguments that could be provided to the pool manager, its
41
+ # pools, or the underlying connections. This is used to construct a pool key.
42
+ _key_fields = (
43
+ "key_scheme", # str
44
+ "key_host", # str
45
+ "key_port", # int
46
+ "key_timeout", # int or float or Timeout
47
+ "key_retries", # int or Retry
48
+ "key_strict", # bool
49
+ "key_block", # bool
50
+ "key_source_address", # str
51
+ "key_key_file", # str
52
+ "key_key_password", # str
53
+ "key_cert_file", # str
54
+ "key_cert_reqs", # str
55
+ "key_ca_certs", # str
56
+ "key_ssl_version", # str
57
+ "key_ca_cert_dir", # str
58
+ "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
59
+ "key_maxsize", # int
60
+ "key_headers", # dict
61
+ "key__proxy", # parsed proxy url
62
+ "key__proxy_headers", # dict
63
+ "key__proxy_config", # class
64
+ "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples
65
+ "key__socks_options", # dict
66
+ "key_assert_hostname", # bool or string
67
+ "key_assert_fingerprint", # str
68
+ "key_server_hostname", # str
69
+ )
70
+
71
+ #: The namedtuple class used to construct keys for the connection pool.
72
+ #: All custom key schemes should include the fields in this key at a minimum.
73
+ PoolKey = collections.namedtuple("PoolKey", _key_fields)
74
+
75
+ _proxy_config_fields = ("ssl_context", "use_forwarding_for_https")
76
+ ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields)
77
+
78
+
79
+ def _default_key_normalizer(key_class, request_context):
80
+ """
81
+ Create a pool key out of a request context dictionary.
82
+
83
+ According to RFC 3986, both the scheme and host are case-insensitive.
84
+ Therefore, this function normalizes both before constructing the pool
85
+ key for an HTTPS request. If you wish to change this behaviour, provide
86
+ alternate callables to ``key_fn_by_scheme``.
87
+
88
+ :param key_class:
89
+ The class to use when constructing the key. This should be a namedtuple
90
+ with the ``scheme`` and ``host`` keys at a minimum.
91
+ :type key_class: namedtuple
92
+ :param request_context:
93
+ A dictionary-like object that contain the context for a request.
94
+ :type request_context: dict
95
+
96
+ :return: A namedtuple that can be used as a connection pool key.
97
+ :rtype: PoolKey
98
+ """
99
+ # Since we mutate the dictionary, make a copy first
100
+ context = request_context.copy()
101
+ context["scheme"] = context["scheme"].lower()
102
+ context["host"] = context["host"].lower()
103
+
104
+ # These are both dictionaries and need to be transformed into frozensets
105
+ for key in ("headers", "_proxy_headers", "_socks_options"):
106
+ if key in context and context[key] is not None:
107
+ context[key] = frozenset(context[key].items())
108
+
109
+ # The socket_options key may be a list and needs to be transformed into a
110
+ # tuple.
111
+ socket_opts = context.get("socket_options")
112
+ if socket_opts is not None:
113
+ context["socket_options"] = tuple(socket_opts)
114
+
115
+ # Map the kwargs to the names in the namedtuple - this is necessary since
116
+ # namedtuples can't have fields starting with '_'.
117
+ for key in list(context.keys()):
118
+ context["key_" + key] = context.pop(key)
119
+
120
+ # Default to ``None`` for keys missing from the context
121
+ for field in key_class._fields:
122
+ if field not in context:
123
+ context[field] = None
124
+
125
+ return key_class(**context)
126
+
127
+
128
+ #: A dictionary that maps a scheme to a callable that creates a pool key.
129
+ #: This can be used to alter the way pool keys are constructed, if desired.
130
+ #: Each PoolManager makes a copy of this dictionary so they can be configured
131
+ #: globally here, or individually on the instance.
132
+ key_fn_by_scheme = {
133
+ "http": functools.partial(_default_key_normalizer, PoolKey),
134
+ "https": functools.partial(_default_key_normalizer, PoolKey),
135
+ }
136
+
137
+ pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
138
+
139
+
140
+ class PoolManager(RequestMethods):
141
+ """
142
+ Allows for arbitrary requests while transparently keeping track of
143
+ necessary connection pools for you.
144
+
145
+ :param num_pools:
146
+ Number of connection pools to cache before discarding the least
147
+ recently used pool.
148
+
149
+ :param headers:
150
+ Headers to include with all requests, unless other headers are given
151
+ explicitly.
152
+
153
+ :param \\**connection_pool_kw:
154
+ Additional parameters are used to create fresh
155
+ :class:`urllib3.connectionpool.ConnectionPool` instances.
156
+
157
+ Example::
158
+
159
+ >>> manager = PoolManager(num_pools=2)
160
+ >>> r = manager.request('GET', 'http://google.com/')
161
+ >>> r = manager.request('GET', 'http://google.com/mail')
162
+ >>> r = manager.request('GET', 'http://yahoo.com/')
163
+ >>> len(manager.pools)
164
+ 2
165
+
166
+ """
167
+
168
+ proxy = None
169
+ proxy_config = None
170
+
171
+ def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
172
+ RequestMethods.__init__(self, headers)
173
+ self.connection_pool_kw = connection_pool_kw
174
+ self.pools = RecentlyUsedContainer(num_pools)
175
+
176
+ # Locally set the pool classes and keys so other PoolManagers can
177
+ # override them.
178
+ self.pool_classes_by_scheme = pool_classes_by_scheme
179
+ self.key_fn_by_scheme = key_fn_by_scheme.copy()
180
+
181
+ def __enter__(self):
182
+ return self
183
+
184
+ def __exit__(self, exc_type, exc_val, exc_tb):
185
+ self.clear()
186
+ # Return False to re-raise any potential exceptions
187
+ return False
188
+
189
+ def _new_pool(self, scheme, host, port, request_context=None):
190
+ """
191
+ Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
192
+ any additional pool keyword arguments.
193
+
194
+ If ``request_context`` is provided, it is provided as keyword arguments
195
+ to the pool class used. This method is used to actually create the
196
+ connection pools handed out by :meth:`connection_from_url` and
197
+ companion methods. It is intended to be overridden for customization.
198
+ """
199
+ pool_cls = self.pool_classes_by_scheme[scheme]
200
+ if request_context is None:
201
+ request_context = self.connection_pool_kw.copy()
202
+
203
+ # Although the context has everything necessary to create the pool,
204
+ # this function has historically only used the scheme, host, and port
205
+ # in the positional args. When an API change is acceptable these can
206
+ # be removed.
207
+ for key in ("scheme", "host", "port"):
208
+ request_context.pop(key, None)
209
+
210
+ if scheme == "http":
211
+ for kw in SSL_KEYWORDS:
212
+ request_context.pop(kw, None)
213
+
214
+ return pool_cls(host, port, **request_context)
215
+
216
+ def clear(self):
217
+ """
218
+ Empty our store of pools and direct them all to close.
219
+
220
+ This will not affect in-flight connections, but they will not be
221
+ re-used after completion.
222
+ """
223
+ self.pools.clear()
224
+
225
+ def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
226
+ """
227
+ Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
228
+
229
+ If ``port`` isn't given, it will be derived from the ``scheme`` using
230
+ ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
231
+ provided, it is merged with the instance's ``connection_pool_kw``
232
+ variable and used to create the new connection pool, if one is
233
+ needed.
234
+ """
235
+
236
+ if not host:
237
+ raise LocationValueError("No host specified.")
238
+
239
+ request_context = self._merge_pool_kwargs(pool_kwargs)
240
+ request_context["scheme"] = scheme or "http"
241
+ if not port:
242
+ port = port_by_scheme.get(request_context["scheme"].lower(), 80)
243
+ request_context["port"] = port
244
+ request_context["host"] = host
245
+
246
+ return self.connection_from_context(request_context)
247
+
248
+ def connection_from_context(self, request_context):
249
+ """
250
+ Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
251
+
252
+ ``request_context`` must at least contain the ``scheme`` key and its
253
+ value must be a key in ``key_fn_by_scheme`` instance variable.
254
+ """
255
+ scheme = request_context["scheme"].lower()
256
+ pool_key_constructor = self.key_fn_by_scheme.get(scheme)
257
+ if not pool_key_constructor:
258
+ raise URLSchemeUnknown(scheme)
259
+ pool_key = pool_key_constructor(request_context)
260
+
261
+ return self.connection_from_pool_key(pool_key, request_context=request_context)
262
+
263
+ def connection_from_pool_key(self, pool_key, request_context=None):
264
+ """
265
+ Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
266
+
267
+ ``pool_key`` should be a namedtuple that only contains immutable
268
+ objects. At a minimum it must have the ``scheme``, ``host``, and
269
+ ``port`` fields.
270
+ """
271
+ with self.pools.lock:
272
+ # If the scheme, host, or port doesn't match existing open
273
+ # connections, open a new ConnectionPool.
274
+ pool = self.pools.get(pool_key)
275
+ if pool:
276
+ return pool
277
+
278
+ # Make a fresh ConnectionPool of the desired type
279
+ scheme = request_context["scheme"]
280
+ host = request_context["host"]
281
+ port = request_context["port"]
282
+ pool = self._new_pool(scheme, host, port, request_context=request_context)
283
+ self.pools[pool_key] = pool
284
+
285
+ return pool
286
+
287
+ def connection_from_url(self, url, pool_kwargs=None):
288
+ """
289
+ Similar to :func:`urllib3.connectionpool.connection_from_url`.
290
+
291
+ If ``pool_kwargs`` is not provided and a new pool needs to be
292
+ constructed, ``self.connection_pool_kw`` is used to initialize
293
+ the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
294
+ is provided, it is used instead. Note that if a new pool does not
295
+ need to be created for the request, the provided ``pool_kwargs`` are
296
+ not used.
297
+ """
298
+ u = parse_url(url)
299
+ return self.connection_from_host(
300
+ u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
301
+ )
302
+
303
+ def _merge_pool_kwargs(self, override):
304
+ """
305
+ Merge a dictionary of override values for self.connection_pool_kw.
306
+
307
+ This does not modify self.connection_pool_kw and returns a new dict.
308
+ Any keys in the override dictionary with a value of ``None`` are
309
+ removed from the merged dictionary.
310
+ """
311
+ base_pool_kwargs = self.connection_pool_kw.copy()
312
+ if override:
313
+ for key, value in override.items():
314
+ if value is None:
315
+ try:
316
+ del base_pool_kwargs[key]
317
+ except KeyError:
318
+ pass
319
+ else:
320
+ base_pool_kwargs[key] = value
321
+ return base_pool_kwargs
322
+
323
+ def _proxy_requires_url_absolute_form(self, parsed_url):
324
+ """
325
+ Indicates if the proxy requires the complete destination URL in the
326
+ request. Normally this is only needed when not using an HTTP CONNECT
327
+ tunnel.
328
+ """
329
+ if self.proxy is None:
330
+ return False
331
+
332
+ return not connection_requires_http_tunnel(
333
+ self.proxy, self.proxy_config, parsed_url.scheme
334
+ )
335
+
336
+ def _validate_proxy_scheme_url_selection(self, url_scheme):
337
+ """
338
+ Validates that were not attempting to do TLS in TLS connections on
339
+ Python2 or with unsupported SSL implementations.
340
+ """
341
+ if self.proxy is None or url_scheme != "https":
342
+ return
343
+
344
+ if self.proxy.scheme != "https":
345
+ return
346
+
347
+ if six.PY2 and not self.proxy_config.use_forwarding_for_https:
348
+ raise ProxySchemeUnsupported(
349
+ "Contacting HTTPS destinations through HTTPS proxies "
350
+ "'via CONNECT tunnels' is not supported in Python 2"
351
+ )
352
+
353
+ def urlopen(self, method, url, redirect=True, **kw):
354
+ """
355
+ Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
356
+ with custom cross-host redirect logic and only sends the request-uri
357
+ portion of the ``url``.
358
+
359
+ The given ``url`` parameter must be absolute, such that an appropriate
360
+ :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
361
+ """
362
+ u = parse_url(url)
363
+ self._validate_proxy_scheme_url_selection(u.scheme)
364
+
365
+ conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
366
+
367
+ kw["assert_same_host"] = False
368
+ kw["redirect"] = False
369
+
370
+ if "headers" not in kw:
371
+ kw["headers"] = self.headers.copy()
372
+
373
+ if self._proxy_requires_url_absolute_form(u):
374
+ response = conn.urlopen(method, url, **kw)
375
+ else:
376
+ response = conn.urlopen(method, u.request_uri, **kw)
377
+
378
+ redirect_location = redirect and response.get_redirect_location()
379
+ if not redirect_location:
380
+ return response
381
+
382
+ # Support relative URLs for redirecting.
383
+ redirect_location = urljoin(url, redirect_location)
384
+
385
+ # RFC 7231, Section 6.4.4
386
+ if response.status == 303:
387
+ method = "GET"
388
+
389
+ retries = kw.get("retries")
390
+ if not isinstance(retries, Retry):
391
+ retries = Retry.from_int(retries, redirect=redirect)
392
+
393
+ # Strip headers marked as unsafe to forward to the redirected location.
394
+ # Check remove_headers_on_redirect to avoid a potential network call within
395
+ # conn.is_same_host() which may use socket.gethostbyname() in the future.
396
+ if retries.remove_headers_on_redirect and not conn.is_same_host(
397
+ redirect_location
398
+ ):
399
+ headers = list(six.iterkeys(kw["headers"]))
400
+ for header in headers:
401
+ if header.lower() in retries.remove_headers_on_redirect:
402
+ kw["headers"].pop(header, None)
403
+
404
+ try:
405
+ retries = retries.increment(method, url, response=response, _pool=conn)
406
+ except MaxRetryError:
407
+ if retries.raise_on_redirect:
408
+ response.drain_conn()
409
+ raise
410
+ return response
411
+
412
+ kw["retries"] = retries
413
+ kw["redirect"] = redirect
414
+
415
+ log.info("Redirecting %s -> %s", url, redirect_location)
416
+
417
+ response.drain_conn()
418
+ return self.urlopen(method, redirect_location, **kw)
419
+
420
+
421
+ class ProxyManager(PoolManager):
422
+ """
423
+ Behaves just like :class:`PoolManager`, but sends all requests through
424
+ the defined proxy, using the CONNECT method for HTTPS URLs.
425
+
426
+ :param proxy_url:
427
+ The URL of the proxy to be used.
428
+
429
+ :param proxy_headers:
430
+ A dictionary containing headers that will be sent to the proxy. In case
431
+ of HTTP they are being sent with each request, while in the
432
+ HTTPS/CONNECT case they are sent only once. Could be used for proxy
433
+ authentication.
434
+
435
+ :param proxy_ssl_context:
436
+ The proxy SSL context is used to establish the TLS connection to the
437
+ proxy when using HTTPS proxies.
438
+
439
+ :param use_forwarding_for_https:
440
+ (Defaults to False) If set to True will forward requests to the HTTPS
441
+ proxy to be made on behalf of the client instead of creating a TLS
442
+ tunnel via the CONNECT method. **Enabling this flag means that request
443
+ and response headers and content will be visible from the HTTPS proxy**
444
+ whereas tunneling keeps request and response headers and content
445
+ private. IP address, target hostname, SNI, and port are always visible
446
+ to an HTTPS proxy even when this flag is disabled.
447
+
448
+ Example:
449
+ >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
450
+ >>> r1 = proxy.request('GET', 'http://google.com/')
451
+ >>> r2 = proxy.request('GET', 'http://httpbin.org/')
452
+ >>> len(proxy.pools)
453
+ 1
454
+ >>> r3 = proxy.request('GET', 'https://httpbin.org/')
455
+ >>> r4 = proxy.request('GET', 'https://twitter.com/')
456
+ >>> len(proxy.pools)
457
+ 3
458
+
459
+ """
460
+
461
+ def __init__(
462
+ self,
463
+ proxy_url,
464
+ num_pools=10,
465
+ headers=None,
466
+ proxy_headers=None,
467
+ proxy_ssl_context=None,
468
+ use_forwarding_for_https=False,
469
+ **connection_pool_kw
470
+ ):
471
+
472
+ if isinstance(proxy_url, HTTPConnectionPool):
473
+ proxy_url = "%s://%s:%i" % (
474
+ proxy_url.scheme,
475
+ proxy_url.host,
476
+ proxy_url.port,
477
+ )
478
+ proxy = parse_url(proxy_url)
479
+
480
+ if proxy.scheme not in ("http", "https"):
481
+ raise ProxySchemeUnknown(proxy.scheme)
482
+
483
+ if not proxy.port:
484
+ port = port_by_scheme.get(proxy.scheme, 80)
485
+ proxy = proxy._replace(port=port)
486
+
487
+ self.proxy = proxy
488
+ self.proxy_headers = proxy_headers or {}
489
+ self.proxy_ssl_context = proxy_ssl_context
490
+ self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
491
+
492
+ connection_pool_kw["_proxy"] = self.proxy
493
+ connection_pool_kw["_proxy_headers"] = self.proxy_headers
494
+ connection_pool_kw["_proxy_config"] = self.proxy_config
495
+
496
+ super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
497
+
498
+ def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
499
+ if scheme == "https":
500
+ return super(ProxyManager, self).connection_from_host(
501
+ host, port, scheme, pool_kwargs=pool_kwargs
502
+ )
503
+
504
+ return super(ProxyManager, self).connection_from_host(
505
+ self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
506
+ )
507
+
508
+ def _set_proxy_headers(self, url, headers=None):
509
+ """
510
+ Sets headers needed by proxies: specifically, the Accept and Host
511
+ headers. Only sets headers not provided by the user.
512
+ """
513
+ headers_ = {"Accept": "*/*"}
514
+
515
+ netloc = parse_url(url).netloc
516
+ if netloc:
517
+ headers_["Host"] = netloc
518
+
519
+ if headers:
520
+ headers_.update(headers)
521
+ return headers_
522
+
523
+ def urlopen(self, method, url, redirect=True, **kw):
524
+ "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
525
+ u = parse_url(url)
526
+ if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
527
+ # For connections using HTTP CONNECT, httplib sets the necessary
528
+ # headers on the CONNECT to the proxy. If we're not using CONNECT,
529
+ # we'll definitely need to set 'Host' at the very least.
530
+ headers = kw.get("headers", self.headers)
531
+ kw["headers"] = self._set_proxy_headers(url, headers)
532
+
533
+ return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
534
+
535
+
536
+ def proxy_from_url(url, **kw):
537
+ return ProxyManager(proxy_url=url, **kw)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/request.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import sys
4
+
5
+ from .filepost import encode_multipart_formdata
6
+ from .packages import six
7
+ from .packages.six.moves.urllib.parse import urlencode
8
+
9
+ __all__ = ["RequestMethods"]
10
+
11
+
12
+ class RequestMethods(object):
13
+ """
14
+ Convenience mixin for classes who implement a :meth:`urlopen` method, such
15
+ as :class:`urllib3.HTTPConnectionPool` and
16
+ :class:`urllib3.PoolManager`.
17
+
18
+ Provides behavior for making common types of HTTP request methods and
19
+ decides which type of request field encoding to use.
20
+
21
+ Specifically,
22
+
23
+ :meth:`.request_encode_url` is for sending requests whose fields are
24
+ encoded in the URL (such as GET, HEAD, DELETE).
25
+
26
+ :meth:`.request_encode_body` is for sending requests whose fields are
27
+ encoded in the *body* of the request using multipart or www-form-urlencoded
28
+ (such as for POST, PUT, PATCH).
29
+
30
+ :meth:`.request` is for making any kind of request, it will look up the
31
+ appropriate encoding format and use one of the above two methods to make
32
+ the request.
33
+
34
+ Initializer parameters:
35
+
36
+ :param headers:
37
+ Headers to include with all requests, unless other headers are given
38
+ explicitly.
39
+ """
40
+
41
+ _encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
42
+
43
+ def __init__(self, headers=None):
44
+ self.headers = headers or {}
45
+
46
+ def urlopen(
47
+ self,
48
+ method,
49
+ url,
50
+ body=None,
51
+ headers=None,
52
+ encode_multipart=True,
53
+ multipart_boundary=None,
54
+ **kw
55
+ ): # Abstract
56
+ raise NotImplementedError(
57
+ "Classes extending RequestMethods must implement "
58
+ "their own ``urlopen`` method."
59
+ )
60
+
61
+ def request(self, method, url, fields=None, headers=None, **urlopen_kw):
62
+ """
63
+ Make a request using :meth:`urlopen` with the appropriate encoding of
64
+ ``fields`` based on the ``method`` used.
65
+
66
+ This is a convenience method that requires the least amount of manual
67
+ effort. It can be used in most situations, while still having the
68
+ option to drop down to more specific methods when necessary, such as
69
+ :meth:`request_encode_url`, :meth:`request_encode_body`,
70
+ or even the lowest level :meth:`urlopen`.
71
+ """
72
+ method = method.upper()
73
+
74
+ urlopen_kw["request_url"] = url
75
+
76
+ if method in self._encode_url_methods:
77
+ return self.request_encode_url(
78
+ method, url, fields=fields, headers=headers, **urlopen_kw
79
+ )
80
+ else:
81
+ return self.request_encode_body(
82
+ method, url, fields=fields, headers=headers, **urlopen_kw
83
+ )
84
+
85
+ def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw):
86
+ """
87
+ Make a request using :meth:`urlopen` with the ``fields`` encoded in
88
+ the url. This is useful for request methods like GET, HEAD, DELETE, etc.
89
+ """
90
+ if headers is None:
91
+ headers = self.headers
92
+
93
+ extra_kw = {"headers": headers}
94
+ extra_kw.update(urlopen_kw)
95
+
96
+ if fields:
97
+ url += "?" + urlencode(fields)
98
+
99
+ return self.urlopen(method, url, **extra_kw)
100
+
101
+ def request_encode_body(
102
+ self,
103
+ method,
104
+ url,
105
+ fields=None,
106
+ headers=None,
107
+ encode_multipart=True,
108
+ multipart_boundary=None,
109
+ **urlopen_kw
110
+ ):
111
+ """
112
+ Make a request using :meth:`urlopen` with the ``fields`` encoded in
113
+ the body. This is useful for request methods like POST, PUT, PATCH, etc.
114
+
115
+ When ``encode_multipart=True`` (default), then
116
+ :func:`urllib3.encode_multipart_formdata` is used to encode
117
+ the payload with the appropriate content type. Otherwise
118
+ :func:`urllib.parse.urlencode` is used with the
119
+ 'application/x-www-form-urlencoded' content type.
120
+
121
+ Multipart encoding must be used when posting files, and it's reasonably
122
+ safe to use it in other times too. However, it may break request
123
+ signing, such as with OAuth.
124
+
125
+ Supports an optional ``fields`` parameter of key/value strings AND
126
+ key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
127
+ the MIME type is optional. For example::
128
+
129
+ fields = {
130
+ 'foo': 'bar',
131
+ 'fakefile': ('foofile.txt', 'contents of foofile'),
132
+ 'realfile': ('barfile.txt', open('realfile').read()),
133
+ 'typedfile': ('bazfile.bin', open('bazfile').read(),
134
+ 'image/jpeg'),
135
+ 'nonamefile': 'contents of nonamefile field',
136
+ }
137
+
138
+ When uploading a file, providing a filename (the first parameter of the
139
+ tuple) is optional but recommended to best mimic behavior of browsers.
140
+
141
+ Note that if ``headers`` are supplied, the 'Content-Type' header will
142
+ be overwritten because it depends on the dynamic random boundary string
143
+ which is used to compose the body of the request. The random boundary
144
+ string can be explicitly set with the ``multipart_boundary`` parameter.
145
+ """
146
+ if headers is None:
147
+ headers = self.headers
148
+
149
+ extra_kw = {"headers": {}}
150
+
151
+ if fields:
152
+ if "body" in urlopen_kw:
153
+ raise TypeError(
154
+ "request got values for both 'fields' and 'body', can only specify one."
155
+ )
156
+
157
+ if encode_multipart:
158
+ body, content_type = encode_multipart_formdata(
159
+ fields, boundary=multipart_boundary
160
+ )
161
+ else:
162
+ body, content_type = (
163
+ urlencode(fields),
164
+ "application/x-www-form-urlencoded",
165
+ )
166
+
167
+ extra_kw["body"] = body
168
+ extra_kw["headers"] = {"Content-Type": content_type}
169
+
170
+ extra_kw["headers"].update(headers)
171
+ extra_kw.update(urlopen_kw)
172
+
173
+ return self.urlopen(method, url, **extra_kw)
174
+
175
+
176
+ if not six.PY2:
177
+
178
+ class RequestModule(sys.modules[__name__].__class__):
179
+ def __call__(self, *args, **kwargs):
180
+ """
181
+ If user tries to call this module directly urllib3 v2.x style raise an error to the user
182
+ suggesting they may need urllib3 v2
183
+ """
184
+ raise TypeError(
185
+ "'module' object is not callable\n"
186
+ "urllib3.request() method is not supported in this release, "
187
+ "upgrade to urllib3 v2 to use it\n"
188
+ "see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html"
189
+ )
190
+
191
+ sys.modules[__name__].__class__ = RequestModule
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py ADDED
@@ -0,0 +1,879 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import io
4
+ import logging
5
+ import sys
6
+ import warnings
7
+ import zlib
8
+ from contextlib import contextmanager
9
+ from socket import error as SocketError
10
+ from socket import timeout as SocketTimeout
11
+
12
+ brotli = None
13
+
14
+ from . import util
15
+ from ._collections import HTTPHeaderDict
16
+ from .connection import BaseSSLError, HTTPException
17
+ from .exceptions import (
18
+ BodyNotHttplibCompatible,
19
+ DecodeError,
20
+ HTTPError,
21
+ IncompleteRead,
22
+ InvalidChunkLength,
23
+ InvalidHeader,
24
+ ProtocolError,
25
+ ReadTimeoutError,
26
+ ResponseNotChunked,
27
+ SSLError,
28
+ )
29
+ from .packages import six
30
+ from .util.response import is_fp_closed, is_response_to_head
31
+
32
+ log = logging.getLogger(__name__)
33
+
34
+
35
+ class DeflateDecoder(object):
36
+ def __init__(self):
37
+ self._first_try = True
38
+ self._data = b""
39
+ self._obj = zlib.decompressobj()
40
+
41
+ def __getattr__(self, name):
42
+ return getattr(self._obj, name)
43
+
44
+ def decompress(self, data):
45
+ if not data:
46
+ return data
47
+
48
+ if not self._first_try:
49
+ return self._obj.decompress(data)
50
+
51
+ self._data += data
52
+ try:
53
+ decompressed = self._obj.decompress(data)
54
+ if decompressed:
55
+ self._first_try = False
56
+ self._data = None
57
+ return decompressed
58
+ except zlib.error:
59
+ self._first_try = False
60
+ self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
61
+ try:
62
+ return self.decompress(self._data)
63
+ finally:
64
+ self._data = None
65
+
66
+
67
+ class GzipDecoderState(object):
68
+
69
+ FIRST_MEMBER = 0
70
+ OTHER_MEMBERS = 1
71
+ SWALLOW_DATA = 2
72
+
73
+
74
+ class GzipDecoder(object):
75
+ def __init__(self):
76
+ self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
77
+ self._state = GzipDecoderState.FIRST_MEMBER
78
+
79
+ def __getattr__(self, name):
80
+ return getattr(self._obj, name)
81
+
82
+ def decompress(self, data):
83
+ ret = bytearray()
84
+ if self._state == GzipDecoderState.SWALLOW_DATA or not data:
85
+ return bytes(ret)
86
+ while True:
87
+ try:
88
+ ret += self._obj.decompress(data)
89
+ except zlib.error:
90
+ previous_state = self._state
91
+ # Ignore data after the first error
92
+ self._state = GzipDecoderState.SWALLOW_DATA
93
+ if previous_state == GzipDecoderState.OTHER_MEMBERS:
94
+ # Allow trailing garbage acceptable in other gzip clients
95
+ return bytes(ret)
96
+ raise
97
+ data = self._obj.unused_data
98
+ if not data:
99
+ return bytes(ret)
100
+ self._state = GzipDecoderState.OTHER_MEMBERS
101
+ self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
102
+
103
+
104
+ if brotli is not None:
105
+
106
+ class BrotliDecoder(object):
107
+ # Supports both 'brotlipy' and 'Brotli' packages
108
+ # since they share an import name. The top branches
109
+ # are for 'brotlipy' and bottom branches for 'Brotli'
110
+ def __init__(self):
111
+ self._obj = brotli.Decompressor()
112
+ if hasattr(self._obj, "decompress"):
113
+ self.decompress = self._obj.decompress
114
+ else:
115
+ self.decompress = self._obj.process
116
+
117
+ def flush(self):
118
+ if hasattr(self._obj, "flush"):
119
+ return self._obj.flush()
120
+ return b""
121
+
122
+
123
+ class MultiDecoder(object):
124
+ """
125
+ From RFC7231:
126
+ If one or more encodings have been applied to a representation, the
127
+ sender that applied the encodings MUST generate a Content-Encoding
128
+ header field that lists the content codings in the order in which
129
+ they were applied.
130
+ """
131
+
132
+ def __init__(self, modes):
133
+ self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
134
+
135
+ def flush(self):
136
+ return self._decoders[0].flush()
137
+
138
+ def decompress(self, data):
139
+ for d in reversed(self._decoders):
140
+ data = d.decompress(data)
141
+ return data
142
+
143
+
144
+ def _get_decoder(mode):
145
+ if "," in mode:
146
+ return MultiDecoder(mode)
147
+
148
+ if mode == "gzip":
149
+ return GzipDecoder()
150
+
151
+ if brotli is not None and mode == "br":
152
+ return BrotliDecoder()
153
+
154
+ return DeflateDecoder()
155
+
156
+
157
+ class HTTPResponse(io.IOBase):
158
+ """
159
+ HTTP Response container.
160
+
161
+ Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
162
+ loaded and decoded on-demand when the ``data`` property is accessed. This
163
+ class is also compatible with the Python standard library's :mod:`io`
164
+ module, and can hence be treated as a readable object in the context of that
165
+ framework.
166
+
167
+ Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
168
+
169
+ :param preload_content:
170
+ If True, the response's body will be preloaded during construction.
171
+
172
+ :param decode_content:
173
+ If True, will attempt to decode the body based on the
174
+ 'content-encoding' header.
175
+
176
+ :param original_response:
177
+ When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
178
+ object, it's convenient to include the original for debug purposes. It's
179
+ otherwise unused.
180
+
181
+ :param retries:
182
+ The retries contains the last :class:`~urllib3.util.retry.Retry` that
183
+ was used during the request.
184
+
185
+ :param enforce_content_length:
186
+ Enforce content length checking. Body returned by server must match
187
+ value of Content-Length header, if present. Otherwise, raise error.
188
+ """
189
+
190
+ CONTENT_DECODERS = ["gzip", "deflate"]
191
+ if brotli is not None:
192
+ CONTENT_DECODERS += ["br"]
193
+ REDIRECT_STATUSES = [301, 302, 303, 307, 308]
194
+
195
+ def __init__(
196
+ self,
197
+ body="",
198
+ headers=None,
199
+ status=0,
200
+ version=0,
201
+ reason=None,
202
+ strict=0,
203
+ preload_content=True,
204
+ decode_content=True,
205
+ original_response=None,
206
+ pool=None,
207
+ connection=None,
208
+ msg=None,
209
+ retries=None,
210
+ enforce_content_length=False,
211
+ request_method=None,
212
+ request_url=None,
213
+ auto_close=True,
214
+ ):
215
+
216
+ if isinstance(headers, HTTPHeaderDict):
217
+ self.headers = headers
218
+ else:
219
+ self.headers = HTTPHeaderDict(headers)
220
+ self.status = status
221
+ self.version = version
222
+ self.reason = reason
223
+ self.strict = strict
224
+ self.decode_content = decode_content
225
+ self.retries = retries
226
+ self.enforce_content_length = enforce_content_length
227
+ self.auto_close = auto_close
228
+
229
+ self._decoder = None
230
+ self._body = None
231
+ self._fp = None
232
+ self._original_response = original_response
233
+ self._fp_bytes_read = 0
234
+ self.msg = msg
235
+ self._request_url = request_url
236
+
237
+ if body and isinstance(body, (six.string_types, bytes)):
238
+ self._body = body
239
+
240
+ self._pool = pool
241
+ self._connection = connection
242
+
243
+ if hasattr(body, "read"):
244
+ self._fp = body
245
+
246
+ # Are we using the chunked-style of transfer encoding?
247
+ self.chunked = False
248
+ self.chunk_left = None
249
+ tr_enc = self.headers.get("transfer-encoding", "").lower()
250
+ # Don't incur the penalty of creating a list and then discarding it
251
+ encodings = (enc.strip() for enc in tr_enc.split(","))
252
+ if "chunked" in encodings:
253
+ self.chunked = True
254
+
255
+ # Determine length of response
256
+ self.length_remaining = self._init_length(request_method)
257
+
258
+ # If requested, preload the body.
259
+ if preload_content and not self._body:
260
+ self._body = self.read(decode_content=decode_content)
261
+
262
+ def get_redirect_location(self):
263
+ """
264
+ Should we redirect and where to?
265
+
266
+ :returns: Truthy redirect location string if we got a redirect status
267
+ code and valid location. ``None`` if redirect status and no
268
+ location. ``False`` if not a redirect status code.
269
+ """
270
+ if self.status in self.REDIRECT_STATUSES:
271
+ return self.headers.get("location")
272
+
273
+ return False
274
+
275
+ def release_conn(self):
276
+ if not self._pool or not self._connection:
277
+ return
278
+
279
+ self._pool._put_conn(self._connection)
280
+ self._connection = None
281
+
282
+ def drain_conn(self):
283
+ """
284
+ Read and discard any remaining HTTP response data in the response connection.
285
+
286
+ Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
287
+ """
288
+ try:
289
+ self.read()
290
+ except (HTTPError, SocketError, BaseSSLError, HTTPException):
291
+ pass
292
+
293
+ @property
294
+ def data(self):
295
+ # For backwards-compat with earlier urllib3 0.4 and earlier.
296
+ if self._body:
297
+ return self._body
298
+
299
+ if self._fp:
300
+ return self.read(cache_content=True)
301
+
302
+ @property
303
+ def connection(self):
304
+ return self._connection
305
+
306
+ def isclosed(self):
307
+ return is_fp_closed(self._fp)
308
+
309
+ def tell(self):
310
+ """
311
+ Obtain the number of bytes pulled over the wire so far. May differ from
312
+ the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
313
+ if bytes are encoded on the wire (e.g, compressed).
314
+ """
315
+ return self._fp_bytes_read
316
+
317
+ def _init_length(self, request_method):
318
+ """
319
+ Set initial length value for Response content if available.
320
+ """
321
+ length = self.headers.get("content-length")
322
+
323
+ if length is not None:
324
+ if self.chunked:
325
+ # This Response will fail with an IncompleteRead if it can't be
326
+ # received as chunked. This method falls back to attempt reading
327
+ # the response before raising an exception.
328
+ log.warning(
329
+ "Received response with both Content-Length and "
330
+ "Transfer-Encoding set. This is expressly forbidden "
331
+ "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
332
+ "attempting to process response as Transfer-Encoding: "
333
+ "chunked."
334
+ )
335
+ return None
336
+
337
+ try:
338
+ # RFC 7230 section 3.3.2 specifies multiple content lengths can
339
+ # be sent in a single Content-Length header
340
+ # (e.g. Content-Length: 42, 42). This line ensures the values
341
+ # are all valid ints and that as long as the `set` length is 1,
342
+ # all values are the same. Otherwise, the header is invalid.
343
+ lengths = set([int(val) for val in length.split(",")])
344
+ if len(lengths) > 1:
345
+ raise InvalidHeader(
346
+ "Content-Length contained multiple "
347
+ "unmatching values (%s)" % length
348
+ )
349
+ length = lengths.pop()
350
+ except ValueError:
351
+ length = None
352
+ else:
353
+ if length < 0:
354
+ length = None
355
+
356
+ # Convert status to int for comparison
357
+ # In some cases, httplib returns a status of "_UNKNOWN"
358
+ try:
359
+ status = int(self.status)
360
+ except ValueError:
361
+ status = 0
362
+
363
+ # Check for responses that shouldn't include a body
364
+ if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
365
+ length = 0
366
+
367
+ return length
368
+
369
+ def _init_decoder(self):
370
+ """
371
+ Set-up the _decoder attribute if necessary.
372
+ """
373
+ # Note: content-encoding value should be case-insensitive, per RFC 7230
374
+ # Section 3.2
375
+ content_encoding = self.headers.get("content-encoding", "").lower()
376
+ if self._decoder is None:
377
+ if content_encoding in self.CONTENT_DECODERS:
378
+ self._decoder = _get_decoder(content_encoding)
379
+ elif "," in content_encoding:
380
+ encodings = [
381
+ e.strip()
382
+ for e in content_encoding.split(",")
383
+ if e.strip() in self.CONTENT_DECODERS
384
+ ]
385
+ if len(encodings):
386
+ self._decoder = _get_decoder(content_encoding)
387
+
388
+ DECODER_ERROR_CLASSES = (IOError, zlib.error)
389
+ if brotli is not None:
390
+ DECODER_ERROR_CLASSES += (brotli.error,)
391
+
392
+ def _decode(self, data, decode_content, flush_decoder):
393
+ """
394
+ Decode the data passed in and potentially flush the decoder.
395
+ """
396
+ if not decode_content:
397
+ return data
398
+
399
+ try:
400
+ if self._decoder:
401
+ data = self._decoder.decompress(data)
402
+ except self.DECODER_ERROR_CLASSES as e:
403
+ content_encoding = self.headers.get("content-encoding", "").lower()
404
+ raise DecodeError(
405
+ "Received response with content-encoding: %s, but "
406
+ "failed to decode it." % content_encoding,
407
+ e,
408
+ )
409
+ if flush_decoder:
410
+ data += self._flush_decoder()
411
+
412
+ return data
413
+
414
+ def _flush_decoder(self):
415
+ """
416
+ Flushes the decoder. Should only be called if the decoder is actually
417
+ being used.
418
+ """
419
+ if self._decoder:
420
+ buf = self._decoder.decompress(b"")
421
+ return buf + self._decoder.flush()
422
+
423
+ return b""
424
+
425
+ @contextmanager
426
+ def _error_catcher(self):
427
+ """
428
+ Catch low-level python exceptions, instead re-raising urllib3
429
+ variants, so that low-level exceptions are not leaked in the
430
+ high-level api.
431
+
432
+ On exit, release the connection back to the pool.
433
+ """
434
+ clean_exit = False
435
+
436
+ try:
437
+ try:
438
+ yield
439
+
440
+ except SocketTimeout:
441
+ # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
442
+ # there is yet no clean way to get at it from this context.
443
+ raise ReadTimeoutError(self._pool, None, "Read timed out.")
444
+
445
+ except BaseSSLError as e:
446
+ # FIXME: Is there a better way to differentiate between SSLErrors?
447
+ if "read operation timed out" not in str(e):
448
+ # SSL errors related to framing/MAC get wrapped and reraised here
449
+ raise SSLError(e)
450
+
451
+ raise ReadTimeoutError(self._pool, None, "Read timed out.")
452
+
453
+ except (HTTPException, SocketError) as e:
454
+ # This includes IncompleteRead.
455
+ raise ProtocolError("Connection broken: %r" % e, e)
456
+
457
+ # If no exception is thrown, we should avoid cleaning up
458
+ # unnecessarily.
459
+ clean_exit = True
460
+ finally:
461
+ # If we didn't terminate cleanly, we need to throw away our
462
+ # connection.
463
+ if not clean_exit:
464
+ # The response may not be closed but we're not going to use it
465
+ # anymore so close it now to ensure that the connection is
466
+ # released back to the pool.
467
+ if self._original_response:
468
+ self._original_response.close()
469
+
470
+ # Closing the response may not actually be sufficient to close
471
+ # everything, so if we have a hold of the connection close that
472
+ # too.
473
+ if self._connection:
474
+ self._connection.close()
475
+
476
+ # If we hold the original response but it's closed now, we should
477
+ # return the connection back to the pool.
478
+ if self._original_response and self._original_response.isclosed():
479
+ self.release_conn()
480
+
481
+ def _fp_read(self, amt):
482
+ """
483
+ Read a response with the thought that reading the number of bytes
484
+ larger than can fit in a 32-bit int at a time via SSL in some
485
+ known cases leads to an overflow error that has to be prevented
486
+ if `amt` or `self.length_remaining` indicate that a problem may
487
+ happen.
488
+
489
+ The known cases:
490
+ * 3.8 <= CPython < 3.9.7 because of a bug
491
+ https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
492
+ * urllib3 injected with pyOpenSSL-backed SSL-support.
493
+ * CPython < 3.10 only when `amt` does not fit 32-bit int.
494
+ """
495
+ assert self._fp
496
+ c_int_max = 2 ** 31 - 1
497
+ if (
498
+ (
499
+ (amt and amt > c_int_max)
500
+ or (self.length_remaining and self.length_remaining > c_int_max)
501
+ )
502
+ and not util.IS_SECURETRANSPORT
503
+ and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
504
+ ):
505
+ buffer = io.BytesIO()
506
+ # Besides `max_chunk_amt` being a maximum chunk size, it
507
+ # affects memory overhead of reading a response by this
508
+ # method in CPython.
509
+ # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
510
+ # chunk size that does not lead to an overflow error, but
511
+ # 256 MiB is a compromise.
512
+ max_chunk_amt = 2 ** 28
513
+ while amt is None or amt != 0:
514
+ if amt is not None:
515
+ chunk_amt = min(amt, max_chunk_amt)
516
+ amt -= chunk_amt
517
+ else:
518
+ chunk_amt = max_chunk_amt
519
+ data = self._fp.read(chunk_amt)
520
+ if not data:
521
+ break
522
+ buffer.write(data)
523
+ del data # to reduce peak memory usage by `max_chunk_amt`.
524
+ return buffer.getvalue()
525
+ else:
526
+ # StringIO doesn't like amt=None
527
+ return self._fp.read(amt) if amt is not None else self._fp.read()
528
+
529
+ def read(self, amt=None, decode_content=None, cache_content=False):
530
+ """
531
+ Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
532
+ parameters: ``decode_content`` and ``cache_content``.
533
+
534
+ :param amt:
535
+ How much of the content to read. If specified, caching is skipped
536
+ because it doesn't make sense to cache partial content as the full
537
+ response.
538
+
539
+ :param decode_content:
540
+ If True, will attempt to decode the body based on the
541
+ 'content-encoding' header.
542
+
543
+ :param cache_content:
544
+ If True, will save the returned data such that the same result is
545
+ returned despite of the state of the underlying file object. This
546
+ is useful if you want the ``.data`` property to continue working
547
+ after having ``.read()`` the file object. (Overridden if ``amt`` is
548
+ set.)
549
+ """
550
+ self._init_decoder()
551
+ if decode_content is None:
552
+ decode_content = self.decode_content
553
+
554
+ if self._fp is None:
555
+ return
556
+
557
+ flush_decoder = False
558
+ fp_closed = getattr(self._fp, "closed", False)
559
+
560
+ with self._error_catcher():
561
+ data = self._fp_read(amt) if not fp_closed else b""
562
+ if amt is None:
563
+ flush_decoder = True
564
+ else:
565
+ cache_content = False
566
+ if (
567
+ amt != 0 and not data
568
+ ): # Platform-specific: Buggy versions of Python.
569
+ # Close the connection when no data is returned
570
+ #
571
+ # This is redundant to what httplib/http.client _should_
572
+ # already do. However, versions of python released before
573
+ # December 15, 2012 (http://bugs.python.org/issue16298) do
574
+ # not properly close the connection in all cases. There is
575
+ # no harm in redundantly calling close.
576
+ self._fp.close()
577
+ flush_decoder = True
578
+ if self.enforce_content_length and self.length_remaining not in (
579
+ 0,
580
+ None,
581
+ ):
582
+ # This is an edge case that httplib failed to cover due
583
+ # to concerns of backward compatibility. We're
584
+ # addressing it here to make sure IncompleteRead is
585
+ # raised during streaming, so all calls with incorrect
586
+ # Content-Length are caught.
587
+ raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
588
+
589
+ if data:
590
+ self._fp_bytes_read += len(data)
591
+ if self.length_remaining is not None:
592
+ self.length_remaining -= len(data)
593
+
594
+ data = self._decode(data, decode_content, flush_decoder)
595
+
596
+ if cache_content:
597
+ self._body = data
598
+
599
+ return data
600
+
601
+ def stream(self, amt=2 ** 16, decode_content=None):
602
+ """
603
+ A generator wrapper for the read() method. A call will block until
604
+ ``amt`` bytes have been read from the connection or until the
605
+ connection is closed.
606
+
607
+ :param amt:
608
+ How much of the content to read. The generator will return up to
609
+ much data per iteration, but may return less. This is particularly
610
+ likely when using compressed data. However, the empty string will
611
+ never be returned.
612
+
613
+ :param decode_content:
614
+ If True, will attempt to decode the body based on the
615
+ 'content-encoding' header.
616
+ """
617
+ if self.chunked and self.supports_chunked_reads():
618
+ for line in self.read_chunked(amt, decode_content=decode_content):
619
+ yield line
620
+ else:
621
+ while not is_fp_closed(self._fp):
622
+ data = self.read(amt=amt, decode_content=decode_content)
623
+
624
+ if data:
625
+ yield data
626
+
627
+ @classmethod
628
+ def from_httplib(ResponseCls, r, **response_kw):
629
+ """
630
+ Given an :class:`http.client.HTTPResponse` instance ``r``, return a
631
+ corresponding :class:`urllib3.response.HTTPResponse` object.
632
+
633
+ Remaining parameters are passed to the HTTPResponse constructor, along
634
+ with ``original_response=r``.
635
+ """
636
+ headers = r.msg
637
+
638
+ if not isinstance(headers, HTTPHeaderDict):
639
+ if six.PY2:
640
+ # Python 2.7
641
+ headers = HTTPHeaderDict.from_httplib(headers)
642
+ else:
643
+ headers = HTTPHeaderDict(headers.items())
644
+
645
+ # HTTPResponse objects in Python 3 don't have a .strict attribute
646
+ strict = getattr(r, "strict", 0)
647
+ resp = ResponseCls(
648
+ body=r,
649
+ headers=headers,
650
+ status=r.status,
651
+ version=r.version,
652
+ reason=r.reason,
653
+ strict=strict,
654
+ original_response=r,
655
+ **response_kw
656
+ )
657
+ return resp
658
+
659
+ # Backwards-compatibility methods for http.client.HTTPResponse
660
+ def getheaders(self):
661
+ warnings.warn(
662
+ "HTTPResponse.getheaders() is deprecated and will be removed "
663
+ "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
664
+ category=DeprecationWarning,
665
+ stacklevel=2,
666
+ )
667
+ return self.headers
668
+
669
+ def getheader(self, name, default=None):
670
+ warnings.warn(
671
+ "HTTPResponse.getheader() is deprecated and will be removed "
672
+ "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
673
+ category=DeprecationWarning,
674
+ stacklevel=2,
675
+ )
676
+ return self.headers.get(name, default)
677
+
678
+ # Backwards compatibility for http.cookiejar
679
+ def info(self):
680
+ return self.headers
681
+
682
+ # Overrides from io.IOBase
683
+ def close(self):
684
+ if not self.closed:
685
+ self._fp.close()
686
+
687
+ if self._connection:
688
+ self._connection.close()
689
+
690
+ if not self.auto_close:
691
+ io.IOBase.close(self)
692
+
693
+ @property
694
+ def closed(self):
695
+ if not self.auto_close:
696
+ return io.IOBase.closed.__get__(self)
697
+ elif self._fp is None:
698
+ return True
699
+ elif hasattr(self._fp, "isclosed"):
700
+ return self._fp.isclosed()
701
+ elif hasattr(self._fp, "closed"):
702
+ return self._fp.closed
703
+ else:
704
+ return True
705
+
706
+ def fileno(self):
707
+ if self._fp is None:
708
+ raise IOError("HTTPResponse has no file to get a fileno from")
709
+ elif hasattr(self._fp, "fileno"):
710
+ return self._fp.fileno()
711
+ else:
712
+ raise IOError(
713
+ "The file-like object this HTTPResponse is wrapped "
714
+ "around has no file descriptor"
715
+ )
716
+
717
+ def flush(self):
718
+ if (
719
+ self._fp is not None
720
+ and hasattr(self._fp, "flush")
721
+ and not getattr(self._fp, "closed", False)
722
+ ):
723
+ return self._fp.flush()
724
+
725
+ def readable(self):
726
+ # This method is required for `io` module compatibility.
727
+ return True
728
+
729
+ def readinto(self, b):
730
+ # This method is required for `io` module compatibility.
731
+ temp = self.read(len(b))
732
+ if len(temp) == 0:
733
+ return 0
734
+ else:
735
+ b[: len(temp)] = temp
736
+ return len(temp)
737
+
738
+ def supports_chunked_reads(self):
739
+ """
740
+ Checks if the underlying file-like object looks like a
741
+ :class:`http.client.HTTPResponse` object. We do this by testing for
742
+ the fp attribute. If it is present we assume it returns raw chunks as
743
+ processed by read_chunked().
744
+ """
745
+ return hasattr(self._fp, "fp")
746
+
747
+ def _update_chunk_length(self):
748
+ # First, we'll figure out length of a chunk and then
749
+ # we'll try to read it from socket.
750
+ if self.chunk_left is not None:
751
+ return
752
+ line = self._fp.fp.readline()
753
+ line = line.split(b";", 1)[0]
754
+ try:
755
+ self.chunk_left = int(line, 16)
756
+ except ValueError:
757
+ # Invalid chunked protocol response, abort.
758
+ self.close()
759
+ raise InvalidChunkLength(self, line)
760
+
761
+ def _handle_chunk(self, amt):
762
+ returned_chunk = None
763
+ if amt is None:
764
+ chunk = self._fp._safe_read(self.chunk_left)
765
+ returned_chunk = chunk
766
+ self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
767
+ self.chunk_left = None
768
+ elif amt < self.chunk_left:
769
+ value = self._fp._safe_read(amt)
770
+ self.chunk_left = self.chunk_left - amt
771
+ returned_chunk = value
772
+ elif amt == self.chunk_left:
773
+ value = self._fp._safe_read(amt)
774
+ self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
775
+ self.chunk_left = None
776
+ returned_chunk = value
777
+ else: # amt > self.chunk_left
778
+ returned_chunk = self._fp._safe_read(self.chunk_left)
779
+ self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
780
+ self.chunk_left = None
781
+ return returned_chunk
782
+
783
+ def read_chunked(self, amt=None, decode_content=None):
784
+ """
785
+ Similar to :meth:`HTTPResponse.read`, but with an additional
786
+ parameter: ``decode_content``.
787
+
788
+ :param amt:
789
+ How much of the content to read. If specified, caching is skipped
790
+ because it doesn't make sense to cache partial content as the full
791
+ response.
792
+
793
+ :param decode_content:
794
+ If True, will attempt to decode the body based on the
795
+ 'content-encoding' header.
796
+ """
797
+ self._init_decoder()
798
+ # FIXME: Rewrite this method and make it a class with a better structured logic.
799
+ if not self.chunked:
800
+ raise ResponseNotChunked(
801
+ "Response is not chunked. "
802
+ "Header 'transfer-encoding: chunked' is missing."
803
+ )
804
+ if not self.supports_chunked_reads():
805
+ raise BodyNotHttplibCompatible(
806
+ "Body should be http.client.HTTPResponse like. "
807
+ "It should have have an fp attribute which returns raw chunks."
808
+ )
809
+
810
+ with self._error_catcher():
811
+ # Don't bother reading the body of a HEAD request.
812
+ if self._original_response and is_response_to_head(self._original_response):
813
+ self._original_response.close()
814
+ return
815
+
816
+ # If a response is already read and closed
817
+ # then return immediately.
818
+ if self._fp.fp is None:
819
+ return
820
+
821
+ while True:
822
+ self._update_chunk_length()
823
+ if self.chunk_left == 0:
824
+ break
825
+ chunk = self._handle_chunk(amt)
826
+ decoded = self._decode(
827
+ chunk, decode_content=decode_content, flush_decoder=False
828
+ )
829
+ if decoded:
830
+ yield decoded
831
+
832
+ if decode_content:
833
+ # On CPython and PyPy, we should never need to flush the
834
+ # decoder. However, on Jython we *might* need to, so
835
+ # lets defensively do it anyway.
836
+ decoded = self._flush_decoder()
837
+ if decoded: # Platform-specific: Jython.
838
+ yield decoded
839
+
840
+ # Chunk content ends with \r\n: discard it.
841
+ while True:
842
+ line = self._fp.fp.readline()
843
+ if not line:
844
+ # Some sites may not end with '\r\n'.
845
+ break
846
+ if line == b"\r\n":
847
+ break
848
+
849
+ # We read everything; close the "file".
850
+ if self._original_response:
851
+ self._original_response.close()
852
+
853
+ def geturl(self):
854
+ """
855
+ Returns the URL that was the source of this response.
856
+ If the request that generated this response redirected, this method
857
+ will return the final redirect location.
858
+ """
859
+ if self.retries is not None and len(self.retries.history):
860
+ return self.retries.history[-1].redirect_location
861
+ else:
862
+ return self._request_url
863
+
864
+ def __iter__(self):
865
+ buffer = []
866
+ for chunk in self.stream(decode_content=True):
867
+ if b"\n" in chunk:
868
+ chunk = chunk.split(b"\n")
869
+ yield b"".join(buffer) + chunk[0] + b"\n"
870
+ for x in chunk[1:-1]:
871
+ yield x + b"\n"
872
+ if chunk[-1]:
873
+ buffer = [chunk[-1]]
874
+ else:
875
+ buffer = []
876
+ else:
877
+ buffer.append(chunk)
878
+ if buffer:
879
+ yield b"".join(buffer)
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ # For backwards compatibility, provide imports that used to be here.
4
+ from .connection import is_connection_dropped
5
+ from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers
6
+ from .response import is_fp_closed
7
+ from .retry import Retry
8
+ from .ssl_ import (
9
+ ALPN_PROTOCOLS,
10
+ HAS_SNI,
11
+ IS_PYOPENSSL,
12
+ IS_SECURETRANSPORT,
13
+ PROTOCOL_TLS,
14
+ SSLContext,
15
+ assert_fingerprint,
16
+ resolve_cert_reqs,
17
+ resolve_ssl_version,
18
+ ssl_wrap_socket,
19
+ )
20
+ from .timeout import Timeout, current_time
21
+ from .url import Url, get_host, parse_url, split_first
22
+ from .wait import wait_for_read, wait_for_write
23
+
24
+ __all__ = (
25
+ "HAS_SNI",
26
+ "IS_PYOPENSSL",
27
+ "IS_SECURETRANSPORT",
28
+ "SSLContext",
29
+ "PROTOCOL_TLS",
30
+ "ALPN_PROTOCOLS",
31
+ "Retry",
32
+ "Timeout",
33
+ "Url",
34
+ "assert_fingerprint",
35
+ "current_time",
36
+ "is_connection_dropped",
37
+ "is_fp_closed",
38
+ "get_host",
39
+ "parse_url",
40
+ "make_headers",
41
+ "resolve_cert_reqs",
42
+ "resolve_ssl_version",
43
+ "split_first",
44
+ "ssl_wrap_socket",
45
+ "wait_for_read",
46
+ "wait_for_write",
47
+ "SKIP_HEADER",
48
+ "SKIPPABLE_HEADERS",
49
+ )
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/connection.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import
2
+
3
+ import socket
4
+
5
+ from ..contrib import _appengine_environ
6
+ from ..exceptions import LocationParseError
7
+ from ..packages import six
8
+ from .wait import NoWayToWaitForSocketError, wait_for_read
9
+
10
+
11
+ def is_connection_dropped(conn): # Platform-specific
12
+ """
13
+ Returns True if the connection is dropped and should be closed.
14
+
15
+ :param conn:
16
+ :class:`http.client.HTTPConnection` object.
17
+
18
+ Note: For platforms like AppEngine, this will always return ``False`` to
19
+ let the platform handle connection recycling transparently for us.
20
+ """
21
+ sock = getattr(conn, "sock", False)
22
+ if sock is False: # Platform-specific: AppEngine
23
+ return False
24
+ if sock is None: # Connection already closed (such as by httplib).
25
+ return True
26
+ try:
27
+ # Returns True if readable, which here means it's been dropped
28
+ return wait_for_read(sock, timeout=0.0)
29
+ except NoWayToWaitForSocketError: # Platform-specific: AppEngine
30
+ return False
31
+
32
+
33
+ # This function is copied from socket.py in the Python 2.7 standard
34
+ # library test suite. Added to its signature is only `socket_options`.
35
+ # One additional modification is that we avoid binding to IPv6 servers
36
+ # discovered in DNS if the system doesn't have IPv6 functionality.
37
+ def create_connection(
38
+ address,
39
+ timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
40
+ source_address=None,
41
+ socket_options=None,
42
+ ):
43
+ """Connect to *address* and return the socket object.
44
+
45
+ Convenience function. Connect to *address* (a 2-tuple ``(host,
46
+ port)``) and return the socket object. Passing the optional
47
+ *timeout* parameter will set the timeout on the socket instance
48
+ before attempting to connect. If no *timeout* is supplied, the
49
+ global default timeout setting returned by :func:`socket.getdefaulttimeout`
50
+ is used. If *source_address* is set it must be a tuple of (host, port)
51
+ for the socket to bind as a source address before making the connection.
52
+ An host of '' or port 0 tells the OS to use the default.
53
+ """
54
+
55
+ host, port = address
56
+ if host.startswith("["):
57
+ host = host.strip("[]")
58
+ err = None
59
+
60
+ # Using the value from allowed_gai_family() in the context of getaddrinfo lets
61
+ # us select whether to work with IPv4 DNS records, IPv6 records, or both.
62
+ # The original create_connection function always returns all records.
63
+ family = allowed_gai_family()
64
+
65
+ try:
66
+ host.encode("idna")
67
+ except UnicodeError:
68
+ return six.raise_from(
69
+ LocationParseError(u"'%s', label empty or too long" % host), None
70
+ )
71
+
72
+ for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
73
+ af, socktype, proto, canonname, sa = res
74
+ sock = None
75
+ try:
76
+ sock = socket.socket(af, socktype, proto)
77
+
78
+ # If provided, set socket level options before connecting.
79
+ _set_socket_options(sock, socket_options)
80
+
81
+ if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
82
+ sock.settimeout(timeout)
83
+ if source_address:
84
+ sock.bind(source_address)
85
+ sock.connect(sa)
86
+ return sock
87
+
88
+ except socket.error as e:
89
+ err = e
90
+ if sock is not None:
91
+ sock.close()
92
+ sock = None
93
+
94
+ if err is not None:
95
+ raise err
96
+
97
+ raise socket.error("getaddrinfo returns an empty list")
98
+
99
+
100
+ def _set_socket_options(sock, options):
101
+ if options is None:
102
+ return
103
+
104
+ for opt in options:
105
+ sock.setsockopt(*opt)
106
+
107
+
108
+ def allowed_gai_family():
109
+ """This function is designed to work in the context of
110
+ getaddrinfo, where family=socket.AF_UNSPEC is the default and
111
+ will perform a DNS search for both IPv6 and IPv4 records."""
112
+
113
+ family = socket.AF_INET
114
+ if HAS_IPV6:
115
+ family = socket.AF_UNSPEC
116
+ return family
117
+
118
+
119
+ def _has_ipv6(host):
120
+ """Returns True if the system can bind an IPv6 address."""
121
+ sock = None
122
+ has_ipv6 = False
123
+
124
+ # App Engine doesn't support IPV6 sockets and actually has a quota on the
125
+ # number of sockets that can be used, so just early out here instead of
126
+ # creating a socket needlessly.
127
+ # See https://github.com/urllib3/urllib3/issues/1446
128
+ if _appengine_environ.is_appengine_sandbox():
129
+ return False
130
+
131
+ if socket.has_ipv6:
132
+ # has_ipv6 returns true if cPython was compiled with IPv6 support.
133
+ # It does not tell us if the system has IPv6 support enabled. To
134
+ # determine that we must bind to an IPv6 address.
135
+ # https://github.com/urllib3/urllib3/pull/611
136
+ # https://bugs.python.org/issue658327
137
+ try:
138
+ sock = socket.socket(socket.AF_INET6)
139
+ sock.bind((host, 0))
140
+ has_ipv6 = True
141
+ except Exception:
142
+ pass
143
+
144
+ if sock:
145
+ sock.close()
146
+ return has_ipv6
147
+
148
+
149
+ HAS_IPV6 = _has_ipv6("::1")
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/proxy.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from .ssl_ import create_urllib3_context, resolve_cert_reqs, resolve_ssl_version
2
+
3
+
4
+ def connection_requires_http_tunnel(
5
+ proxy_url=None, proxy_config=None, destination_scheme=None
6
+ ):
7
+ """
8
+ Returns True if the connection requires an HTTP CONNECT through the proxy.
9
+
10
+ :param URL proxy_url:
11
+ URL of the proxy.
12
+ :param ProxyConfig proxy_config:
13
+ Proxy configuration from poolmanager.py
14
+ :param str destination_scheme:
15
+ The scheme of the destination. (i.e https, http, etc)
16
+ """
17
+ # If we're not using a proxy, no way to use a tunnel.
18
+ if proxy_url is None:
19
+ return False
20
+
21
+ # HTTP destinations never require tunneling, we always forward.
22
+ if destination_scheme == "http":
23
+ return False
24
+
25
+ # Support for forwarding with HTTPS proxies and HTTPS destinations.
26
+ if (
27
+ proxy_url.scheme == "https"
28
+ and proxy_config
29
+ and proxy_config.use_forwarding_for_https
30
+ ):
31
+ return False
32
+
33
+ # Otherwise always use a tunnel.
34
+ return True
35
+
36
+
37
+ def create_proxy_ssl_context(
38
+ ssl_version, cert_reqs, ca_certs=None, ca_cert_dir=None, ca_cert_data=None
39
+ ):
40
+ """
41
+ Generates a default proxy ssl context if one hasn't been provided by the
42
+ user.
43
+ """
44
+ ssl_context = create_urllib3_context(
45
+ ssl_version=resolve_ssl_version(ssl_version),
46
+ cert_reqs=resolve_cert_reqs(cert_reqs),
47
+ )
48
+
49
+ if (
50
+ not ca_certs
51
+ and not ca_cert_dir
52
+ and not ca_cert_data
53
+ and hasattr(ssl_context, "load_default_certs")
54
+ ):
55
+ ssl_context.load_default_certs()
56
+
57
+ return ssl_context