Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- LTA_openwebtext_dualt/logs/elfopt_8gpu/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024.log +617 -0
- LTA_openwebtext_dualt/logs/fullycoupled_loss1mt_floor0p25_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_loss1mt_floor0p25_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260514_230726.log +0 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0010000_state_fromstate_t1p45.log +8 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0020000_state_fromstate_t1p45.log +8 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0030000_state_fromstate_t1p45.log +8 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0040000_state_fromstate_t1p45.log +8 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0050000_state_fromstate_t1p45.log +8 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt +5 -0
- LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/watch_every10k_state_t1p45.nohup.log +46 -0
- LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2.dirichlet_resample.eval.log +8 -0
- LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce.flowmap.eval.log +8 -0
- LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor.dirichlet_resample.eval.log +8 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/__init__.py +33 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/compat.py +1138 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/database.py +1359 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/index.py +508 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/locators.py +1303 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/manifest.py +384 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/markers.py +167 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/metadata.py +1068 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/resources.py +358 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/scripts.py +452 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/util.py +2025 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/version.py +751 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/wheel.py +1099 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/__init__.py +322 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/actions.py +217 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/common.py +432 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/core.py +0 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/diagram/__init__.py +656 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/exceptions.py +299 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/helpers.py +1100 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/results.py +796 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/testing.py +331 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/unicode.py +361 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/util.py +284 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/__init__.py +102 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_collections.py +337 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_version.py +2 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connection.py +572 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connectionpool.py +1132 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/exceptions.py +323 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/fields.py +274 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/filepost.py +98 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/poolmanager.py +537 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/request.py +191 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py +879 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/__init__.py +49 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/connection.py +149 -0
- LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/proxy.py +57 -0
LTA_openwebtext_dualt/logs/elfopt_8gpu/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024.log
ADDED
|
@@ -0,0 +1,617 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[rank6]:[W513 02:30:29.571793088 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 6] using GPU 6 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 2 |
+
[rank2]:[W513 02:30:29.573074597 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 2] using GPU 2 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 3 |
+
[rank0]:[W513 02:30:29.621398720 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 0] using GPU 0 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 4 |
+
t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 5 |
+
t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 6 |
+
t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO cudaDriverVersion 12080
|
| 7 |
+
t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 8 |
+
t-20260513102957-v877z-worker-0:10222:10222 [0] NCCL INFO Comm config Blocking set to 1
|
| 9 |
+
t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO cudaDriverVersion 12080
|
| 10 |
+
t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 11 |
+
t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO cudaDriverVersion 12080
|
| 12 |
+
t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 13 |
+
t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 14 |
+
t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 15 |
+
t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 16 |
+
t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 17 |
+
t-20260513102957-v877z-worker-0:10228:10228 [6] NCCL INFO Comm config Blocking set to 1
|
| 18 |
+
t-20260513102957-v877z-worker-0:10224:10224 [2] NCCL INFO Comm config Blocking set to 1
|
| 19 |
+
[rank1]:[W513 02:30:29.693479602 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 1] using GPU 1 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 20 |
+
[rank3]:[W513 02:30:29.694131805 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 3] using GPU 3 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 21 |
+
[rank7]:[W513 02:30:29.696815174 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 7] using GPU 7 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 22 |
+
t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO cudaDriverVersion 12080
|
| 23 |
+
t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 24 |
+
t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 25 |
+
t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 26 |
+
t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO cudaDriverVersion 12080
|
| 27 |
+
t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 28 |
+
t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 29 |
+
t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 30 |
+
[rank5]:[W513 02:30:29.704393470 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 5] using GPU 5 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 31 |
+
t-20260513102957-v877z-worker-0:10223:10223 [1] NCCL INFO Comm config Blocking set to 1
|
| 32 |
+
t-20260513102957-v877z-worker-0:10225:10225 [3] NCCL INFO Comm config Blocking set to 1
|
| 33 |
+
t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO cudaDriverVersion 12080
|
| 34 |
+
t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 35 |
+
t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 36 |
+
t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 37 |
+
t-20260513102957-v877z-worker-0:10229:10229 [7] NCCL INFO Comm config Blocking set to 1
|
| 38 |
+
[rank4]:[W513 02:30:29.710692738 ProcessGroupNCCL.cpp:4571] [PG ID 0 PG GUID 0 Rank 4] using GPU 4 to perform barrier as devices used by this process are currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. Specify device_ids in barrier() to force use of a particular device, or call init_process_group() with a device_id.
|
| 39 |
+
t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO cudaDriverVersion 12080
|
| 40 |
+
t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 41 |
+
t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 42 |
+
t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 43 |
+
t-20260513102957-v877z-worker-0:10227:10227 [5] NCCL INFO Comm config Blocking set to 1
|
| 44 |
+
t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO cudaDriverVersion 12080
|
| 45 |
+
t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 46 |
+
t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO Bootstrap: Using eth1:10.82.112.1<0>
|
| 47 |
+
t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO NCCL version 2.25.1+cuda12.8
|
| 48 |
+
t-20260513102957-v877z-worker-0:10226:10226 [4] NCCL INFO Comm config Blocking set to 1
|
| 49 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 50 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 51 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 52 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO P2P plugin v9 IBext_v9
|
| 53 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 54 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 55 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 56 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 57 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO P2P plugin v9 IBext_v9
|
| 58 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 59 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 60 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 61 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 62 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO P2P plugin v9 IBext_v9
|
| 63 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 64 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 65 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 66 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 67 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO P2P plugin v9 IBext_v9
|
| 68 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 69 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 70 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 71 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 72 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO P2P plugin v9 IBext_v9
|
| 73 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 74 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 75 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 76 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 77 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO P2P plugin v9 IBext_v9
|
| 78 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 79 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 80 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 81 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 82 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO P2P plugin v9 IBext_v9
|
| 83 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 84 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/Plugin: Loaded net plugin NCCL RDMA Plugin v9 (v9)
|
| 85 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/Plugin: Loaded collnet plugin SHARP (v9)
|
| 86 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Plugin Path : /opt/hpcx/nccl_rdma_sharp_plugin/lib/libnccl-net.so
|
| 87 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO P2P plugin v9 IBext_v9
|
| 88 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_SOCKET_IFNAME set by environment to eth1
|
| 89 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 90 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 91 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 92 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Using network IBext_v9
|
| 93 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 94 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 95 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 96 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Using network IBext_v9
|
| 97 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 98 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 99 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 100 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Using network IBext_v9
|
| 101 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 102 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 103 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 104 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 105 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 106 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Using network IBext_v9
|
| 107 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 108 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Using network IBext_v9
|
| 109 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 110 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 111 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 112 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Using network IBext_v9
|
| 113 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 114 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 115 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 116 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Using network IBext_v9
|
| 117 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_IB_PCI_RELAXED_ORDERING set by environment to 1.
|
| 118 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NET/IB : Using [0]mlx5_1:1/RoCE [1]mlx5_4:1/RoCE [2]mlx5_5:1/RoCE [3]mlx5_6:1/RoCE [4]mlx5_7:1/RoCE [5]mlx5_8:1/RoCE [6]mlx5_9:1/RoCE [7]mlx5_10:1/RoCE [RO]; OOB eth1:10.82.112.1<0>
|
| 119 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO PROFILER/Plugin: Could not find: libnccl-profiler.so.
|
| 120 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Using network IBext_v9
|
| 121 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO ncclCommInitRankConfig comm 0x98ac490 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 69020 commId 0xc3d8f44253f33569 - Init START
|
| 122 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO ncclCommInitRankConfig comm 0xbd43ac0 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 65040 commId 0xc3d8f44253f33569 - Init START
|
| 123 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO ncclCommInitRankConfig comm 0xa99de10 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId 73020 commId 0xc3d8f44253f33569 - Init START
|
| 124 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO ncclCommInitRankConfig comm 0x98a1870 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 67020 commId 0xc3d8f44253f33569 - Init START
|
| 125 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO RAS client listening socket at ::1<28028>
|
| 126 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO ncclCommInitRankConfig comm 0x9b15700 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 6b020 commId 0xc3d8f44253f33569 - Init START
|
| 127 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO RAS client listening socket at ::1<28028>
|
| 128 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO ncclCommInitRankConfig comm 0xaa777e0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId 75020 commId 0xc3d8f44253f33569 - Init START
|
| 129 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO RAS client listening socket at ::1<28028>
|
| 130 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO RAS client listening socket at ::1<28028>
|
| 131 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO ncclCommInitRankConfig comm 0xa87b0d0 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 71020 commId 0xc3d8f44253f33569 - Init START
|
| 132 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO RAS client listening socket at ::1<28028>
|
| 133 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO ncclCommInitRankConfig comm 0xa1cc500 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 6f020 commId 0xc3d8f44253f33569 - Init START
|
| 134 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO RAS client listening socket at ::1<28028>
|
| 135 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO RAS client listening socket at ::1<28028>
|
| 136 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO RAS client listening socket at ::1<28028>
|
| 137 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Bootstrap timings total 0.008778 (create 0.000020, send 0.000074, recv 0.008247, ring 0.000142, delay 0.000000)
|
| 138 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Bootstrap timings total 0.011209 (create 0.000023, send 0.000069, recv 0.000102, ring 0.010580, delay 0.000001)
|
| 139 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Bootstrap timings total 0.000624 (create 0.000023, send 0.000070, recv 0.000111, ring 0.000105, delay 0.000000)
|
| 140 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Bootstrap timings total 0.002691 (create 0.000020, send 0.000076, recv 0.000032, ring 0.000105, delay 0.000000)
|
| 141 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Bootstrap timings total 0.022499 (create 0.000026, send 0.000074, recv 0.017801, ring 0.002214, delay 0.000001)
|
| 142 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Bootstrap timings total 0.078898 (create 0.000026, send 0.000068, recv 0.070159, ring 0.008328, delay 0.000000)
|
| 143 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Bootstrap timings total 0.004769 (create 0.000021, send 0.000079, recv 0.000076, ring 0.004295, delay 0.000000)
|
| 144 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Bootstrap timings total 0.072414 (create 0.000030, send 0.000069, recv 0.061280, ring 0.004294, delay 0.000001)
|
| 145 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO MNNVL busId 0x67020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 146 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO MNNVL busId 0x6b020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 147 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO MNNVL busId 0x75020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 148 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO MNNVL busId 0x6f020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 149 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO MNNVL busId 0x65040 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 150 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO MNNVL busId 0x69020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 151 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO MNNVL busId 0x71020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 152 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO MNNVL busId 0x73020 fabric UUID 0.0 cliqueId 0x0 state 3 healthMask 0x0
|
| 153 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 154 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 155 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 156 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 157 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 158 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 159 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 160 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NCCL_TOPO_FILE set by environment to /var/run/nvidia-topologyd/virtualTopology.xml
|
| 161 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Setting affinity for GPU 3 to 03ffffff,ffffffff,ffffffff
|
| 162 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Setting affinity for GPU 2 to 03ffffff,ffffffff,ffffffff
|
| 163 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Setting affinity for GPU 1 to 03ffffff,ffffffff,ffffffff
|
| 164 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Setting affinity for GPU 4 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
|
| 165 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Setting affinity for GPU 0 to 03ffffff,ffffffff,ffffffff
|
| 166 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Setting affinity for GPU 7 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
|
| 167 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Setting affinity for GPU 5 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
|
| 168 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Setting affinity for GPU 6 to 0fffff,ffffffff,ffffffff,fc000000,00000000,00000000
|
| 169 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO NVLS multicast support is available on dev 5
|
| 170 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO NVLS multicast support is available on dev 7
|
| 171 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO NVLS multicast support is available on dev 2
|
| 172 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO NVLS multicast support is available on dev 4
|
| 173 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO NVLS multicast support is available on dev 3
|
| 174 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO NVLS multicast support is available on dev 6
|
| 175 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO NVLS multicast support is available on dev 1
|
| 176 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO NVLS multicast support is available on dev 0
|
| 177 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO comm 0xaa777e0 rank 7 nRanks 8 nNodes 1 localRanks 8 localRank 7 MNNVL 0
|
| 178 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO comm 0xa99de10 rank 6 nRanks 8 nNodes 1 localRanks 8 localRank 6 MNNVL 0
|
| 179 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO comm 0x98ac490 rank 2 nRanks 8 nNodes 1 localRanks 8 localRank 2 MNNVL 0
|
| 180 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO comm 0x9b15700 rank 3 nRanks 8 nNodes 1 localRanks 8 localRank 3 MNNVL 0
|
| 181 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO comm 0x98a1870 rank 1 nRanks 8 nNodes 1 localRanks 8 localRank 1 MNNVL 0
|
| 182 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO comm 0xa87b0d0 rank 5 nRanks 8 nNodes 1 localRanks 8 localRank 5 MNNVL 0
|
| 183 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO comm 0xbd43ac0 rank 0 nRanks 8 nNodes 1 localRanks 8 localRank 0 MNNVL 0
|
| 184 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO comm 0xa1cc500 rank 4 nRanks 8 nNodes 1 localRanks 8 localRank 4 MNNVL 0
|
| 185 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Trees [0] -1/-1/-1->7->6 [1] -1/-1/-1->7->6 [2] -1/-1/-1->7->6 [3] -1/-1/-1->7->6 [4] -1/-1/-1->7->6 [5] -1/-1/-1->7->6 [6] -1/-1/-1->7->6 [7] -1/-1/-1->7->6 [8] -1/-1/-1->7->6 [9] -1/-1/-1->7->6 [10] -1/-1/-1->7->6 [11] -1/-1/-1->7->6 [12] -1/-1/-1->7->6 [13] -1/-1/-1->7->6 [14] -1/-1/-1->7->6 [15] -1/-1/-1->7->6 [16] -1/-1/-1->7->6 [17] -1/-1/-1->7->6 [18] -1/-1/-1->7->6 [19] -1/-1/-1->7->6 [20] -1/-1/-1->7->6 [21] -1/-1/-1->7->6 [22] -1/-1/-1->7->6 [23] -1/-1/-1->7->6
|
| 186 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Trees [0] 7/-1/-1->6->5 [1] 7/-1/-1->6->5 [2] 7/-1/-1->6->5 [3] 7/-1/-1->6->5 [4] 7/-1/-1->6->5 [5] 7/-1/-1->6->5 [6] 7/-1/-1->6->5 [7] 7/-1/-1->6->5 [8] 7/-1/-1->6->5 [9] 7/-1/-1->6->5 [10] 7/-1/-1->6->5 [11] 7/-1/-1->6->5 [12] 7/-1/-1->6->5 [13] 7/-1/-1->6->5 [14] 7/-1/-1->6->5 [15] 7/-1/-1->6->5 [16] 7/-1/-1->6->5 [17] 7/-1/-1->6->5 [18] 7/-1/-1->6->5 [19] 7/-1/-1->6->5 [20] 7/-1/-1->6->5 [21] 7/-1/-1->6->5 [22] 7/-1/-1->6->5 [23] 7/-1/-1->6->5
|
| 187 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO P2P Chunksize set to 524288
|
| 188 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO P2P Chunksize set to 524288
|
| 189 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Trees [0] 3/-1/-1->2->1 [1] 3/-1/-1->2->1 [2] 3/-1/-1->2->1 [3] 3/-1/-1->2->1 [4] 3/-1/-1->2->1 [5] 3/-1/-1->2->1 [6] 3/-1/-1->2->1 [7] 3/-1/-1->2->1 [8] 3/-1/-1->2->1 [9] 3/-1/-1->2->1 [10] 3/-1/-1->2->1 [11] 3/-1/-1->2->1 [12] 3/-1/-1->2->1 [13] 3/-1/-1->2->1 [14] 3/-1/-1->2->1 [15] 3/-1/-1->2->1 [16] 3/-1/-1->2->1 [17] 3/-1/-1->2->1 [18] 3/-1/-1->2->1 [19] 3/-1/-1->2->1 [20] 3/-1/-1->2->1 [21] 3/-1/-1->2->1 [22] 3/-1/-1->2->1 [23] 3/-1/-1->2->1
|
| 190 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Trees [0] 4/-1/-1->3->2 [1] 4/-1/-1->3->2 [2] 4/-1/-1->3->2 [3] 4/-1/-1->3->2 [4] 4/-1/-1->3->2 [5] 4/-1/-1->3->2 [6] 4/-1/-1->3->2 [7] 4/-1/-1->3->2 [8] 4/-1/-1->3->2 [9] 4/-1/-1->3->2 [10] 4/-1/-1->3->2 [11] 4/-1/-1->3->2 [12] 4/-1/-1->3->2 [13] 4/-1/-1->3->2 [14] 4/-1/-1->3->2 [15] 4/-1/-1->3->2 [16] 4/-1/-1->3->2 [17] 4/-1/-1->3->2 [18] 4/-1/-1->3->2 [19] 4/-1/-1->3->2 [20] 4/-1/-1->3->2 [21] 4/-1/-1->3->2 [22] 4/-1/-1->3->2 [23] 4/-1/-1->3->2
|
| 191 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 00/24 : 0 1 2 3 4 5 6 7
|
| 192 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO P2P Chunksize set to 524288
|
| 193 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Trees [0] 6/-1/-1->5->4 [1] 6/-1/-1->5->4 [2] 6/-1/-1->5->4 [3] 6/-1/-1->5->4 [4] 6/-1/-1->5->4 [5] 6/-1/-1->5->4 [6] 6/-1/-1->5->4 [7] 6/-1/-1->5->4 [8] 6/-1/-1->5->4 [9] 6/-1/-1->5->4 [10] 6/-1/-1->5->4 [11] 6/-1/-1->5->4 [12] 6/-1/-1->5->4 [13] 6/-1/-1->5->4 [14] 6/-1/-1->5->4 [15] 6/-1/-1->5->4 [16] 6/-1/-1->5->4 [17] 6/-1/-1->5->4 [18] 6/-1/-1->5->4 [19] 6/-1/-1->5->4 [20] 6/-1/-1->5->4 [21] 6/-1/-1->5->4 [22] 6/-1/-1->5->4 [23] 6/-1/-1->5->4
|
| 194 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO P2P Chunksize set to 524288
|
| 195 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 01/24 : 0 1 2 3 4 5 6 7
|
| 196 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Trees [0] 2/-1/-1->1->0 [1] 2/-1/-1->1->0 [2] 2/-1/-1->1->0 [3] 2/-1/-1->1->0 [4] 2/-1/-1->1->0 [5] 2/-1/-1->1->0 [6] 2/-1/-1->1->0 [7] 2/-1/-1->1->0 [8] 2/-1/-1->1->0 [9] 2/-1/-1->1->0 [10] 2/-1/-1->1->0 [11] 2/-1/-1->1->0 [12] 2/-1/-1->1->0 [13] 2/-1/-1->1->0 [14] 2/-1/-1->1->0 [15] 2/-1/-1->1->0 [16] 2/-1/-1->1->0 [17] 2/-1/-1->1->0 [18] 2/-1/-1->1->0 [19] 2/-1/-1->1->0 [20] 2/-1/-1->1->0 [21] 2/-1/-1->1->0 [22] 2/-1/-1->1->0 [23] 2/-1/-1->1->0
|
| 197 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 02/24 : 0 1 2 3 4 5 6 7
|
| 198 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO P2P Chunksize set to 524288
|
| 199 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Trees [0] 5/-1/-1->4->3 [1] 5/-1/-1->4->3 [2] 5/-1/-1->4->3 [3] 5/-1/-1->4->3 [4] 5/-1/-1->4->3 [5] 5/-1/-1->4->3 [6] 5/-1/-1->4->3 [7] 5/-1/-1->4->3 [8] 5/-1/-1->4->3 [9] 5/-1/-1->4->3 [10] 5/-1/-1->4->3 [11] 5/-1/-1->4->3 [12] 5/-1/-1->4->3 [13] 5/-1/-1->4->3 [14] 5/-1/-1->4->3 [15] 5/-1/-1->4->3 [16] 5/-1/-1->4->3 [17] 5/-1/-1->4->3 [18] 5/-1/-1->4->3 [19] 5/-1/-1->4->3 [20] 5/-1/-1->4->3 [21] 5/-1/-1->4->3 [22] 5/-1/-1->4->3 [23] 5/-1/-1->4->3
|
| 200 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO P2P Chunksize set to 524288
|
| 201 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 03/24 : 0 1 2 3 4 5 6 7
|
| 202 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO P2P Chunksize set to 524288
|
| 203 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 04/24 : 0 1 2 3 4 5 6 7
|
| 204 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 05/24 : 0 1 2 3 4 5 6 7
|
| 205 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 06/24 : 0 1 2 3 4 5 6 7
|
| 206 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 07/24 : 0 1 2 3 4 5 6 7
|
| 207 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 08/24 : 0 1 2 3 4 5 6 7
|
| 208 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 09/24 : 0 1 2 3 4 5 6 7
|
| 209 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 10/24 : 0 1 2 3 4 5 6 7
|
| 210 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 11/24 : 0 1 2 3 4 5 6 7
|
| 211 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 12/24 : 0 1 2 3 4 5 6 7
|
| 212 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 13/24 : 0 1 2 3 4 5 6 7
|
| 213 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 14/24 : 0 1 2 3 4 5 6 7
|
| 214 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 15/24 : 0 1 2 3 4 5 6 7
|
| 215 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 16/24 : 0 1 2 3 4 5 6 7
|
| 216 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 17/24 : 0 1 2 3 4 5 6 7
|
| 217 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 18/24 : 0 1 2 3 4 5 6 7
|
| 218 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 19/24 : 0 1 2 3 4 5 6 7
|
| 219 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 20/24 : 0 1 2 3 4 5 6 7
|
| 220 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 21/24 : 0 1 2 3 4 5 6 7
|
| 221 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 22/24 : 0 1 2 3 4 5 6 7
|
| 222 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Channel 23/24 : 0 1 2 3 4 5 6 7
|
| 223 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Trees [0] 1/-1/-1->0->-1 [1] 1/-1/-1->0->-1 [2] 1/-1/-1->0->-1 [3] 1/-1/-1->0->-1 [4] 1/-1/-1->0->-1 [5] 1/-1/-1->0->-1 [6] 1/-1/-1->0->-1 [7] 1/-1/-1->0->-1 [8] 1/-1/-1->0->-1 [9] 1/-1/-1->0->-1 [10] 1/-1/-1->0->-1 [11] 1/-1/-1->0->-1 [12] 1/-1/-1->0->-1 [13] 1/-1/-1->0->-1 [14] 1/-1/-1->0->-1 [15] 1/-1/-1->0->-1 [16] 1/-1/-1->0->-1 [17] 1/-1/-1->0->-1 [18] 1/-1/-1->0->-1 [19] 1/-1/-1->0->-1 [20] 1/-1/-1->0->-1 [21] 1/-1/-1->0->-1 [22] 1/-1/-1->0->-1 [23] 1/-1/-1->0->-1
|
| 224 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO P2P Chunksize set to 524288
|
| 225 |
+
t-20260513102957-v877z-worker-0:10225:10374 [3] NCCL INFO [Proxy Service] Device 3 CPU core 48
|
| 226 |
+
t-20260513102957-v877z-worker-0:10225:10375 [3] NCCL INFO [Proxy Service UDS] Device 3 CPU core 50
|
| 227 |
+
t-20260513102957-v877z-worker-0:10223:10376 [1] NCCL INFO [Proxy Service] Device 1 CPU core 2
|
| 228 |
+
t-20260513102957-v877z-worker-0:10223:10377 [1] NCCL INFO [Proxy Service UDS] Device 1 CPU core 4
|
| 229 |
+
t-20260513102957-v877z-worker-0:10229:10378 [7] NCCL INFO [Proxy Service] Device 7 CPU core 146
|
| 230 |
+
t-20260513102957-v877z-worker-0:10229:10379 [7] NCCL INFO [Proxy Service UDS] Device 7 CPU core 150
|
| 231 |
+
t-20260513102957-v877z-worker-0:10224:10380 [2] NCCL INFO [Proxy Service] Device 2 CPU core 48
|
| 232 |
+
t-20260513102957-v877z-worker-0:10224:10381 [2] NCCL INFO [Proxy Service UDS] Device 2 CPU core 50
|
| 233 |
+
t-20260513102957-v877z-worker-0:10227:10382 [5] NCCL INFO [Proxy Service] Device 5 CPU core 106
|
| 234 |
+
t-20260513102957-v877z-worker-0:10227:10383 [5] NCCL INFO [Proxy Service UDS] Device 5 CPU core 108
|
| 235 |
+
t-20260513102957-v877z-worker-0:10228:10384 [6] NCCL INFO [Proxy Service] Device 6 CPU core 94
|
| 236 |
+
t-20260513102957-v877z-worker-0:10228:10385 [6] NCCL INFO [Proxy Service UDS] Device 6 CPU core 98
|
| 237 |
+
t-20260513102957-v877z-worker-0:10226:10386 [4] NCCL INFO [Proxy Service] Device 4 CPU core 94
|
| 238 |
+
t-20260513102957-v877z-worker-0:10226:10387 [4] NCCL INFO [Proxy Service UDS] Device 4 CPU core 96
|
| 239 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Check P2P Type intraNodeP2pSupport 1 directMode 0
|
| 240 |
+
t-20260513102957-v877z-worker-0:10222:10388 [0] NCCL INFO [Proxy Service] Device 0 CPU core 52
|
| 241 |
+
t-20260513102957-v877z-worker-0:10222:10389 [0] NCCL INFO [Proxy Service UDS] Device 0 CPU core 48
|
| 242 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 243 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 244 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 245 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 246 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 247 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 248 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 249 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 250 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 251 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 252 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 253 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 254 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 255 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 256 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO threadThresholds 8/8/64 | 64/8/64 | 512 | 512
|
| 257 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO 24 coll channels, 24 collnet channels, 16 nvls channels, 32 p2p channels, 32 p2p channels per peer
|
| 258 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO CC Off, workFifoBytes 1048576
|
| 259 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 260 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 261 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 262 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 263 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 264 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 265 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 266 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 267 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 268 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 269 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 270 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 271 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 272 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 273 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 274 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 275 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v4 symbol.
|
| 276 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO ncclCommInitRankConfig comm 0xa1cc500 rank 4 nranks 8 cudaDev 4 nvmlDev 4 busId 6f020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 277 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 278 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 279 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 280 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO ncclCommInitRankConfig comm 0xa99de10 rank 6 nranks 8 cudaDev 6 nvmlDev 6 busId 73020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 281 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO ncclCommInitRankConfig comm 0xaa777e0 rank 7 nranks 8 cudaDev 7 nvmlDev 7 busId 75020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 282 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO ncclCommInitRankConfig comm 0xa87b0d0 rank 5 nranks 8 cudaDev 5 nvmlDev 5 busId 71020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 283 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v3 symbol.
|
| 284 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 285 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO ncclCommInitRankConfig comm 0xbd43ac0 rank 0 nranks 8 cudaDev 0 nvmlDev 0 busId 65040 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 286 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 287 |
+
t-20260513102957-v877z-worker-0:10226:10301 [4] NCCL INFO Init timings - ncclCommInitRankConfig: rank 4 nranks 8 total 2.15 (kernels 0.20, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
|
| 288 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO TUNER/Plugin: Failed to find ncclTunerPlugin_v2 symbol, using internal tuner instead.
|
| 289 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO ncclCommInitRankConfig comm 0x98ac490 rank 2 nranks 8 cudaDev 2 nvmlDev 2 busId 69020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 290 |
+
t-20260513102957-v877z-worker-0:10228:10295 [6] NCCL INFO Init timings - ncclCommInitRankConfig: rank 6 nranks 8 total 2.21 (kernels 0.22, alloc 1.03, bootstrap 0.02, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
|
| 291 |
+
t-20260513102957-v877z-worker-0:10227:10300 [5] NCCL INFO Init timings - ncclCommInitRankConfig: rank 5 nranks 8 total 2.16 (kernels 0.20, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
|
| 292 |
+
t-20260513102957-v877z-worker-0:10229:10299 [7] NCCL INFO Init timings - ncclCommInitRankConfig: rank 7 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.00, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
|
| 293 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO ncclCommInitRankConfig comm 0x9b15700 rank 3 nranks 8 cudaDev 3 nvmlDev 3 busId 6b020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 294 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO ncclCommInitRankConfig comm 0x98a1870 rank 1 nranks 8 cudaDev 1 nvmlDev 1 busId 67020 commId 0xc3d8f44253f33569 - Init COMPLETE
|
| 295 |
+
t-20260513102957-v877z-worker-0:10222:10294 [0] NCCL INFO Init timings - ncclCommInitRankConfig: rank 0 nranks 8 total 2.21 (kernels 0.21, alloc 0.99, bootstrap 0.07, allgathers 0.00, topo 0.54, graphs 0.01, connections 0.35, rest 0.03)
|
| 296 |
+
t-20260513102957-v877z-worker-0:10224:10296 [2] NCCL INFO Init timings - ncclCommInitRankConfig: rank 2 nranks 8 total 2.21 (kernels 0.21, alloc 0.98, bootstrap 0.08, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
|
| 297 |
+
t-20260513102957-v877z-worker-0:10225:10298 [3] NCCL INFO Init timings - ncclCommInitRankConfig: rank 3 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.01, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.02)
|
| 298 |
+
t-20260513102957-v877z-worker-0:10223:10297 [1] NCCL INFO Init timings - ncclCommInitRankConfig: rank 1 nranks 8 total 2.17 (kernels 0.21, alloc 1.02, bootstrap 0.01, allgathers 0.01, topo 0.54, graphs 0.01, connections 0.36, rest 0.03)
|
| 299 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 00/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 300 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 00/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 301 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 01/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 302 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 01/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 303 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 02/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 304 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 02/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 305 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 03/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 306 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 03/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 307 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 04/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 308 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 00/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 309 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 04/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 310 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 05/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 311 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 01/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 312 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 05/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 313 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 06/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 314 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 02/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 315 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 06/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 316 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 07/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 317 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 03/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 318 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 07/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 319 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 08/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 320 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 00/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 321 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 04/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 322 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 08/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 323 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 09/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 324 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 01/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 325 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 05/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 326 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 00/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 327 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 09/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 328 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 00/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 329 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 10/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 330 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 02/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 331 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 06/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 332 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 01/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 333 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 10/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 334 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 01/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 335 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 11/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 336 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 03/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 337 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 07/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 338 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 02/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 339 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 11/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 340 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 02/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 341 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 12/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 342 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 04/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 343 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 08/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 344 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 03/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 345 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 12/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 346 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 03/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 347 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 13/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 348 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 05/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 349 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 09/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 350 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 04/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 351 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 13/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 352 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 04/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 353 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 14/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 354 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 06/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 355 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 10/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 356 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 05/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 357 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 14/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 358 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 05/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 359 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 15/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 360 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 07/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 361 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 11/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 362 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 06/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 363 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 15/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 364 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 06/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 365 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 16/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 366 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 08/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 367 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 12/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 368 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 07/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 369 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 16/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 370 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 07/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 371 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 17/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 372 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 09/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 373 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 00/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 374 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 13/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 375 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 08/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 376 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 17/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 377 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 08/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 378 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 18/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 379 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 10/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 380 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 01/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 381 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 14/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 382 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 09/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 383 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 18/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 384 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 09/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 385 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 19/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 386 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 11/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 387 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 00/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 388 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 02/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 389 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 15/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 390 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 10/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 391 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 19/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 392 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 10/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 393 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 20/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 394 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 12/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 395 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 01/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 396 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 03/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 397 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 16/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 398 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 11/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 399 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 20/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 400 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 11/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 401 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 21/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 402 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 13/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 403 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 02/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 404 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 04/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 405 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 17/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 406 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 12/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 407 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 21/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 408 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 12/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 409 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 22/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 410 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 14/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 411 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 03/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 412 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 05/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 413 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 18/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 414 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 13/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 415 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 22/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 416 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 13/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 417 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Channel 23/0 : 4[4] -> 5[5] via P2P/CUMEM
|
| 418 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 15/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 419 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 04/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 420 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 06/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 421 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 19/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 422 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 14/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 423 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Channel 23/0 : 1[1] -> 2[2] via P2P/CUMEM
|
| 424 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 14/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 425 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 16/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 426 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 05/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 427 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 07/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 428 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 20/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 429 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 15/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 430 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 15/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 431 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 17/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 432 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 06/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 433 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 08/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 434 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 21/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 435 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 16/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 436 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 16/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 437 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 18/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 438 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 07/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 439 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 09/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 440 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 22/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 441 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 17/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 442 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 17/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 443 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 19/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 444 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 08/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 445 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 10/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 446 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Channel 23/0 : 7[7] -> 0[0] via P2P/CUMEM
|
| 447 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 18/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 448 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 18/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 449 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 20/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 450 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 09/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 451 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 11/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 452 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 19/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 453 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 19/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 454 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 21/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 455 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 10/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 456 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 12/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 457 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 20/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 458 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 20/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 459 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 22/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 460 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 11/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 461 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 13/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 462 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 21/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 463 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 21/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 464 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Channel 23/0 : 6[6] -> 7[7] via P2P/CUMEM
|
| 465 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 12/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 466 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 14/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 467 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 22/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 468 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 22/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 469 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 13/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 470 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 15/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 471 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Channel 23/0 : 3[3] -> 4[4] via P2P/CUMEM
|
| 472 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Channel 23/0 : 0[0] -> 1[1] via P2P/CUMEM
|
| 473 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 14/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 474 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 16/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 475 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 15/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 476 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 17/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 477 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 16/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 478 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 18/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 479 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 17/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 480 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 19/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 481 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 18/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 482 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 20/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 483 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 19/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 484 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 21/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 485 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 20/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 486 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 22/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 487 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 21/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 488 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Channel 23/0 : 5[5] -> 6[6] via P2P/CUMEM
|
| 489 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 22/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 490 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Channel 23/0 : 2[2] -> 3[3] via P2P/CUMEM
|
| 491 |
+
t-20260513102957-v877z-worker-0:10228:10394 [6] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 492 |
+
t-20260513102957-v877z-worker-0:10222:10393 [0] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 493 |
+
t-20260513102957-v877z-worker-0:10229:10391 [7] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 494 |
+
t-20260513102957-v877z-worker-0:10227:10396 [5] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 495 |
+
t-20260513102957-v877z-worker-0:10225:10395 [3] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 496 |
+
t-20260513102957-v877z-worker-0:10223:10390 [1] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 497 |
+
t-20260513102957-v877z-worker-0:10224:10397 [2] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 498 |
+
t-20260513102957-v877z-worker-0:10226:10392 [4] NCCL INFO Connected all rings, use ring PXN 0 GDR 1
|
| 499 |
+
{
|
| 500 |
+
"device": "cuda:0",
|
| 501 |
+
"rank": 0,
|
| 502 |
+
"world_size": 8,
|
| 503 |
+
"samples": "owt_cached_chunks:8734897",
|
| 504 |
+
"vocab_size": 50257,
|
| 505 |
+
"tokenizer_vocab_size": 50257,
|
| 506 |
+
"save_dir": "runs/lta_owt_len1024_elfopt_muon_ema_ddit768x12_8gpu_5epoch_20260513_023024",
|
| 507 |
+
"batch_size": 8,
|
| 508 |
+
"grad_accum": 8,
|
| 509 |
+
"effective_batch_size": 512,
|
| 510 |
+
"global_batch_size": 512,
|
| 511 |
+
"lr_schedule": "constant_warmup",
|
| 512 |
+
"optimizer": "muon",
|
| 513 |
+
"warmup_steps": 8531,
|
| 514 |
+
"min_lr": 0.0,
|
| 515 |
+
"weight_decay": 0.0,
|
| 516 |
+
"adamw_param_groups": "nanogpt",
|
| 517 |
+
"adam_beta1": 0.9,
|
| 518 |
+
"adam_beta2": 0.95,
|
| 519 |
+
"adam_eps": 1e-08,
|
| 520 |
+
"muon_momentum": 0.95,
|
| 521 |
+
"muon_ns_steps": 5,
|
| 522 |
+
"muon_update_scale": 1.0,
|
| 523 |
+
"ema_decay": 0.9999,
|
| 524 |
+
"ema_start_step": 0,
|
| 525 |
+
"model_type": "ddit",
|
| 526 |
+
"dual_t": true,
|
| 527 |
+
"corrupt_t_mode": "independent",
|
| 528 |
+
"corrupt_min_t": null,
|
| 529 |
+
"corrupt_max_t": null,
|
| 530 |
+
"prefix_block_prob": 0.0,
|
| 531 |
+
"prefix_block_len": 128,
|
| 532 |
+
"dirichlet_endpoint_mode": "categorical_dual_t",
|
| 533 |
+
"dirichlet_semantic_t_mode": "same",
|
| 534 |
+
"dirichlet_semantic_t_value": 0.0,
|
| 535 |
+
"categorical_wrong_from_full_vocab": true,
|
| 536 |
+
"categorical_wrong_from_batch_valid_tokens": false,
|
| 537 |
+
"mask_mixture_original_prob": 0.0,
|
| 538 |
+
"mask_mixture_lowk_prob": 0.0,
|
| 539 |
+
"mask_mixture_lowcorrupt_prob": 0.0,
|
| 540 |
+
"mask_mixture_block_prob": 0.0,
|
| 541 |
+
"mask_mixture_all_prob": 0.0,
|
| 542 |
+
"mask_mixture_lowk_clean_tokens": "1,2,4,8,16,32,64",
|
| 543 |
+
"mask_mixture_lowcorrupt_tokens": "1,2,4,8,16,32,64",
|
| 544 |
+
"mask_mixture_block_tokens": "64,128",
|
| 545 |
+
"simplex_bridge_sampler": "dirichlet",
|
| 546 |
+
"logistic_normal_sigma_min": 0.18,
|
| 547 |
+
"logistic_normal_sigma_max": 2.2,
|
| 548 |
+
"logistic_normal_tau_min": 0.65,
|
| 549 |
+
"logistic_normal_tau_max": 1.15,
|
| 550 |
+
"torch_compile": false,
|
| 551 |
+
"compile_mode": "max-autotune",
|
| 552 |
+
"state_format": "prob",
|
| 553 |
+
"target_loss": "hard_ce",
|
| 554 |
+
"meanflow_weight": 0.0,
|
| 555 |
+
"bridge_noise_init": "logistic_normal",
|
| 556 |
+
"noise_sigma": -1.0,
|
| 557 |
+
"wrap": true,
|
| 558 |
+
"wrap_mode": "stream",
|
| 559 |
+
"wrap_record_buffer_size": 200,
|
| 560 |
+
"owt_cached_chunks": true,
|
| 561 |
+
"owt_chunk_cache_dir": "/e2e-data/evad-tech-vla/wanghan58/data/small_benchmarks/langflow_2604_11748/openwebtext_lta_cached_chunks/gpt2_len1024_train_minus_100k",
|
| 562 |
+
"owt_chunk_cache_rebuild": false,
|
| 563 |
+
"owt_chunk_cache_write_batch": 4096,
|
| 564 |
+
"owt_exact_repeat_per_chunk": 0,
|
| 565 |
+
"online_chunk_shuffle": false,
|
| 566 |
+
"online_chunk_shuffle_buffer": 10000,
|
| 567 |
+
"openwebtext_split": "all",
|
| 568 |
+
"detokenizer": "auto",
|
| 569 |
+
"resolved_detokenizer": null,
|
| 570 |
+
"num_workers": 0,
|
| 571 |
+
"latest_every": 1000,
|
| 572 |
+
"resume_path": ""
|
| 573 |
+
}
|
| 574 |
+
t-20260513102957-v877z-worker-0:10222:10486 [0] NCCL INFO NVLS comm 0xbd43ac0 headRank 0 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 575 |
+
t-20260513102957-v877z-worker-0:10226:10487 [4] NCCL INFO NVLS comm 0xa1cc500 headRank 4 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 576 |
+
t-20260513102957-v877z-worker-0:10223:10488 [1] NCCL INFO NVLS comm 0x98a1870 headRank 1 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 577 |
+
t-20260513102957-v877z-worker-0:10224:10489 [2] NCCL INFO NVLS comm 0x98ac490 headRank 2 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 578 |
+
t-20260513102957-v877z-worker-0:10229:10490 [7] NCCL INFO NVLS comm 0xaa777e0 headRank 7 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 579 |
+
t-20260513102957-v877z-worker-0:10228:10491 [6] NCCL INFO NVLS comm 0xa99de10 headRank 6 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 580 |
+
t-20260513102957-v877z-worker-0:10227:10492 [5] NCCL INFO NVLS comm 0xa87b0d0 headRank 5 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 581 |
+
t-20260513102957-v877z-worker-0:10225:10493 [3] NCCL INFO NVLS comm 0x9b15700 headRank 3 nHeads 8 buffSize 1048576 nvlsPerRankSize 33554432 nvlsTotalSize 268435456
|
| 582 |
+
step=50 micro_steps=400 elapsed=48.6s lr=1.195639e-05 loss_all=10.8125 acc_all=0.5523 loss_corrupt=10.8125 acc_corrupt=0.3757 corrupt_frac=0.5552 loss=10.8125 loss_recon=10.8125 loss_meanflow=0.0000 mean_model_t=0.4944 mean_corrupt_t=0.5036 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4934 init_acc_corrupt=0.4725 init_gold_top10=0.5009 init_gold_top100=0.5309
|
| 583 |
+
step=100 micro_steps=800 elapsed=47.4s lr=2.367835e-05 loss_all=10.8125 acc_all=0.5656 loss_corrupt=10.8125 acc_corrupt=0.3881 corrupt_frac=0.5519 loss=10.8125 loss_recon=10.8125 loss_meanflow=0.0000 mean_model_t=0.4959 mean_corrupt_t=0.5064 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4916 init_acc_corrupt=0.4749 init_gold_top10=0.5030 init_gold_top100=0.5319
|
| 584 |
+
step=150 micro_steps=1200 elapsed=47.3s lr=3.540030e-05 loss_all=10.7976 acc_all=0.5622 loss_corrupt=10.8031 acc_corrupt=0.3899 corrupt_frac=0.5621 loss=10.8031 loss_recon=10.8031 loss_meanflow=0.0000 mean_model_t=0.5121 mean_corrupt_t=0.5055 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4925 init_acc_corrupt=0.4729 init_gold_top10=0.5021 init_gold_top100=0.5300
|
| 585 |
+
step=200 micro_steps=1600 elapsed=47.3s lr=4.712226e-05 loss_all=10.7770 acc_all=0.5614 loss_corrupt=10.7891 acc_corrupt=0.3900 corrupt_frac=0.5531 loss=10.7891 loss_recon=10.7891 loss_meanflow=0.0000 mean_model_t=0.5059 mean_corrupt_t=0.5118 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4887 init_acc_corrupt=0.4772 init_gold_top10=0.5058 init_gold_top100=0.5343
|
| 586 |
+
step=250 micro_steps=2000 elapsed=47.7s lr=5.884422e-05 loss_all=10.7465 acc_all=0.5551 loss_corrupt=10.7697 acc_corrupt=0.3777 corrupt_frac=0.5506 loss=10.7697 loss_recon=10.7697 loss_meanflow=0.0000 mean_model_t=0.4963 mean_corrupt_t=0.5016 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5008 init_acc_corrupt=0.4645 init_gold_top10=0.4934 init_gold_top100=0.5233
|
| 587 |
+
step=300 micro_steps=2400 elapsed=47.4s lr=7.056617e-05 loss_all=10.7068 acc_all=0.5429 loss_corrupt=10.7407 acc_corrupt=0.3755 corrupt_frac=0.5615 loss=10.7407 loss_recon=10.7407 loss_meanflow=0.0000 mean_model_t=0.4975 mean_corrupt_t=0.5060 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4925 init_acc_corrupt=0.4735 init_gold_top10=0.5019 init_gold_top100=0.5309
|
| 588 |
+
step=350 micro_steps=2800 elapsed=47.2s lr=8.228813e-05 loss_all=10.6568 acc_all=0.5378 loss_corrupt=10.7063 acc_corrupt=0.3645 corrupt_frac=0.5504 loss=10.7063 loss_recon=10.7063 loss_meanflow=0.0000 mean_model_t=0.4953 mean_corrupt_t=0.5088 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4963 init_acc_corrupt=0.4697 init_gold_top10=0.4980 init_gold_top100=0.5278
|
| 589 |
+
step=400 micro_steps=3200 elapsed=47.6s lr=9.401008e-05 loss_all=10.5988 acc_all=0.5316 loss_corrupt=10.6653 acc_corrupt=0.3571 corrupt_frac=0.5454 loss=10.6653 loss_recon=10.6653 loss_meanflow=0.0000 mean_model_t=0.4950 mean_corrupt_t=0.5016 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5001 init_acc_corrupt=0.4643 init_gold_top10=0.4942 init_gold_top100=0.5236
|
| 590 |
+
step=450 micro_steps=3600 elapsed=47.2s lr=1.057320e-04 loss_all=10.5302 acc_all=0.5254 loss_corrupt=10.6143 acc_corrupt=0.3554 corrupt_frac=0.5518 loss=10.6143 loss_recon=10.6143 loss_meanflow=0.0000 mean_model_t=0.5052 mean_corrupt_t=0.4974 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4970 init_acc_corrupt=0.4686 init_gold_top10=0.4972 init_gold_top100=0.5272
|
| 591 |
+
step=500 micro_steps=4000 elapsed=47.2s lr=1.174540e-04 loss_all=10.4504 acc_all=0.5179 loss_corrupt=10.5520 acc_corrupt=0.3546 corrupt_frac=0.5567 loss=10.5520 loss_recon=10.5520 loss_meanflow=0.0000 mean_model_t=0.5000 mean_corrupt_t=0.5107 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4885 init_acc_corrupt=0.4787 init_gold_top10=0.5065 init_gold_top100=0.5332
|
| 592 |
+
step=550 micro_steps=4400 elapsed=47.2s lr=1.291759e-04 loss_all=10.3596 acc_all=0.5119 loss_corrupt=10.4883 acc_corrupt=0.3445 corrupt_frac=0.5446 loss=10.4883 loss_recon=10.4883 loss_meanflow=0.0000 mean_model_t=0.5052 mean_corrupt_t=0.5019 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4987 init_acc_corrupt=0.4672 init_gold_top10=0.4957 init_gold_top100=0.5252
|
| 593 |
+
step=600 micro_steps=4800 elapsed=47.4s lr=1.408979e-04 loss_all=10.2615 acc_all=0.5024 loss_corrupt=10.4195 acc_corrupt=0.3340 corrupt_frac=0.5407 loss=10.4195 loss_recon=10.4195 loss_meanflow=0.0000 mean_model_t=0.5048 mean_corrupt_t=0.4963 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5042 init_acc_corrupt=0.4609 init_gold_top10=0.4897 init_gold_top100=0.5213
|
| 594 |
+
step=650 micro_steps=5200 elapsed=47.2s lr=1.526199e-04 loss_all=10.1555 acc_all=0.4913 loss_corrupt=10.3425 acc_corrupt=0.3260 corrupt_frac=0.5479 loss=10.3425 loss_recon=10.3425 loss_meanflow=0.0000 mean_model_t=0.4990 mean_corrupt_t=0.4946 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5096 init_acc_corrupt=0.4549 init_gold_top10=0.4844 init_gold_top100=0.5153
|
| 595 |
+
step=700 micro_steps=5600 elapsed=47.2s lr=1.643418e-04 loss_all=10.0314 acc_all=0.4856 loss_corrupt=10.2464 acc_corrupt=0.3260 corrupt_frac=0.5511 loss=10.2464 loss_recon=10.2464 loss_meanflow=0.0000 mean_model_t=0.4999 mean_corrupt_t=0.4957 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5051 init_acc_corrupt=0.4599 init_gold_top10=0.4890 init_gold_top100=0.5194
|
| 596 |
+
step=750 micro_steps=6000 elapsed=47.3s lr=1.760638e-04 loss_all=9.8863 acc_all=0.4836 loss_corrupt=10.1351 acc_corrupt=0.3265 corrupt_frac=0.5437 loss=10.1351 loss_recon=10.1351 loss_meanflow=0.0000 mean_model_t=0.5055 mean_corrupt_t=0.5025 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4950 init_acc_corrupt=0.4706 init_gold_top10=0.4993 init_gold_top100=0.5293
|
| 597 |
+
step=800 micro_steps=6400 elapsed=47.4s lr=1.877857e-04 loss_all=9.7404 acc_all=0.4758 loss_corrupt=10.0222 acc_corrupt=0.3227 corrupt_frac=0.5505 loss=10.0222 loss_recon=10.0222 loss_meanflow=0.0000 mean_model_t=0.5058 mean_corrupt_t=0.5005 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4982 init_acc_corrupt=0.4659 init_gold_top10=0.4959 init_gold_top100=0.5264
|
| 598 |
+
step=850 micro_steps=6800 elapsed=47.2s lr=1.995077e-04 loss_all=9.5817 acc_all=0.4695 loss_corrupt=9.8946 acc_corrupt=0.3218 corrupt_frac=0.5568 loss=9.8946 loss_recon=9.8946 loss_meanflow=0.0000 mean_model_t=0.5045 mean_corrupt_t=0.5047 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4955 init_acc_corrupt=0.4712 init_gold_top10=0.4988 init_gold_top100=0.5291
|
| 599 |
+
step=900 micro_steps=7200 elapsed=47.2s lr=2.112296e-04 loss_all=9.4086 acc_all=0.4659 loss_corrupt=9.7618 acc_corrupt=0.3203 corrupt_frac=0.5578 loss=9.7618 loss_recon=9.7618 loss_meanflow=0.0000 mean_model_t=0.4962 mean_corrupt_t=0.5050 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4990 init_acc_corrupt=0.4659 init_gold_top10=0.4954 init_gold_top100=0.5254
|
| 600 |
+
step=950 micro_steps=7600 elapsed=47.2s lr=2.229516e-04 loss_all=9.2066 acc_all=0.4715 loss_corrupt=9.6143 acc_corrupt=0.3233 corrupt_frac=0.5466 loss=9.6143 loss_recon=9.6143 loss_meanflow=0.0000 mean_model_t=0.5056 mean_corrupt_t=0.5013 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4970 init_acc_corrupt=0.4677 init_gold_top10=0.4974 init_gold_top100=0.5269
|
| 601 |
+
step=1000 micro_steps=8000 elapsed=48.0s lr=2.346735e-04 loss_all=9.0073 acc_all=0.4722 loss_corrupt=9.4652 acc_corrupt=0.3230 corrupt_frac=0.5506 loss=9.4652 loss_recon=9.4652 loss_meanflow=0.0000 mean_model_t=0.4978 mean_corrupt_t=0.4901 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5091 init_acc_corrupt=0.4545 init_gold_top10=0.4849 init_gold_top100=0.5158
|
| 602 |
+
step=1050 micro_steps=8400 elapsed=49.4s lr=2.463955e-04 loss_all=8.7539 acc_all=0.4867 loss_corrupt=9.2592 acc_corrupt=0.3387 corrupt_frac=0.5455 loss=9.2592 loss_recon=9.2592 loss_meanflow=0.0000 mean_model_t=0.5068 mean_corrupt_t=0.4987 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4964 init_acc_corrupt=0.4689 init_gold_top10=0.4977 init_gold_top100=0.5280
|
| 603 |
+
step=1100 micro_steps=8800 elapsed=47.3s lr=2.581175e-04 loss_all=8.5291 acc_all=0.4858 loss_corrupt=9.1069 acc_corrupt=0.3332 corrupt_frac=0.5487 loss=9.1069 loss_recon=9.1069 loss_meanflow=0.0000 mean_model_t=0.5076 mean_corrupt_t=0.4923 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5131 init_acc_corrupt=0.4507 init_gold_top10=0.4807 init_gold_top100=0.5131
|
| 604 |
+
step=1150 micro_steps=9200 elapsed=47.3s lr=2.698394e-04 loss_all=8.2256 acc_all=0.5009 loss_corrupt=8.8483 acc_corrupt=0.3518 corrupt_frac=0.5492 loss=8.8483 loss_recon=8.8483 loss_meanflow=0.0000 mean_model_t=0.4880 mean_corrupt_t=0.5105 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4939 init_acc_corrupt=0.4728 init_gold_top10=0.5006 init_gold_top100=0.5300
|
| 605 |
+
step=1200 micro_steps=9600 elapsed=47.2s lr=2.815614e-04 loss_all=7.9819 acc_all=0.4942 loss_corrupt=8.6724 acc_corrupt=0.3445 corrupt_frac=0.5594 loss=8.6724 loss_recon=8.6724 loss_meanflow=0.0000 mean_model_t=0.5078 mean_corrupt_t=0.4905 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5065 init_acc_corrupt=0.4569 init_gold_top10=0.4872 init_gold_top100=0.5201
|
| 606 |
+
step=1250 micro_steps=10000 elapsed=47.1s lr=2.932833e-04 loss_all=7.6269 acc_all=0.5052 loss_corrupt=8.4087 acc_corrupt=0.3514 corrupt_frac=0.5458 loss=8.4087 loss_recon=8.4087 loss_meanflow=0.0000 mean_model_t=0.4963 mean_corrupt_t=0.4999 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5008 init_acc_corrupt=0.4647 init_gold_top10=0.4933 init_gold_top100=0.5238
|
| 607 |
+
step=1300 micro_steps=10400 elapsed=47.2s lr=3.050053e-04 loss_all=7.3064 acc_all=0.5058 loss_corrupt=8.1541 acc_corrupt=0.3535 corrupt_frac=0.5543 loss=8.1541 loss_recon=8.1541 loss_meanflow=0.0000 mean_model_t=0.4980 mean_corrupt_t=0.4979 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5009 init_acc_corrupt=0.4644 init_gold_top10=0.4933 init_gold_top100=0.5236
|
| 608 |
+
step=1350 micro_steps=10800 elapsed=47.3s lr=3.167272e-04 loss_all=6.9403 acc_all=0.5105 loss_corrupt=7.8690 acc_corrupt=0.3577 corrupt_frac=0.5550 loss=7.8690 loss_recon=7.8690 loss_meanflow=0.0000 mean_model_t=0.4935 mean_corrupt_t=0.5024 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4980 init_acc_corrupt=0.4682 init_gold_top10=0.4961 init_gold_top100=0.5263
|
| 609 |
+
step=1400 micro_steps=11200 elapsed=47.3s lr=3.284492e-04 loss_all=6.6098 acc_all=0.5081 loss_corrupt=7.6173 acc_corrupt=0.3549 corrupt_frac=0.5601 loss=7.6173 loss_recon=7.6173 loss_meanflow=0.0000 mean_model_t=0.4999 mean_corrupt_t=0.4967 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5063 init_acc_corrupt=0.4582 init_gold_top10=0.4878 init_gold_top100=0.5183
|
| 610 |
+
step=1450 micro_steps=11600 elapsed=47.5s lr=3.401711e-04 loss_all=6.2064 acc_all=0.5188 loss_corrupt=7.2967 acc_corrupt=0.3645 corrupt_frac=0.5511 loss=7.2967 loss_recon=7.2967 loss_meanflow=0.0000 mean_model_t=0.4940 mean_corrupt_t=0.5044 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4966 init_acc_corrupt=0.4682 init_gold_top10=0.4978 init_gold_top100=0.5272
|
| 611 |
+
step=1500 micro_steps=12000 elapsed=47.1s lr=3.518931e-04 loss_all=5.8854 acc_all=0.5207 loss_corrupt=7.0334 acc_corrupt=0.3677 corrupt_frac=0.5555 loss=7.0334 loss_recon=7.0334 loss_meanflow=0.0000 mean_model_t=0.4951 mean_corrupt_t=0.5001 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4963 init_acc_corrupt=0.4692 init_gold_top10=0.4981 init_gold_top100=0.5273
|
| 612 |
+
step=1550 micro_steps=12400 elapsed=47.5s lr=3.636151e-04 loss_all=5.6304 acc_all=0.5225 loss_corrupt=6.8543 acc_corrupt=0.3653 corrupt_frac=0.5503 loss=6.8543 loss_recon=6.8543 loss_meanflow=0.0000 mean_model_t=0.4989 mean_corrupt_t=0.5031 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4987 init_acc_corrupt=0.4659 init_gold_top10=0.4958 init_gold_top100=0.5247
|
| 613 |
+
step=1600 micro_steps=12800 elapsed=47.3s lr=3.753370e-04 loss_all=5.4321 acc_all=0.5248 loss_corrupt=6.7023 acc_corrupt=0.3652 corrupt_frac=0.5462 loss=6.7023 loss_recon=6.7023 loss_meanflow=0.0000 mean_model_t=0.5041 mean_corrupt_t=0.4979 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.5059 init_acc_corrupt=0.4579 init_gold_top10=0.4881 init_gold_top100=0.5198
|
| 614 |
+
step=1650 micro_steps=13200 elapsed=47.2s lr=3.870590e-04 loss_all=5.2358 acc_all=0.5320 loss_corrupt=6.4720 acc_corrupt=0.3799 corrupt_frac=0.5497 loss=6.4720 loss_recon=6.4720 loss_meanflow=0.0000 mean_model_t=0.5084 mean_corrupt_t=0.5027 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4927 init_acc_corrupt=0.4734 init_gold_top10=0.5017 init_gold_top100=0.5309
|
| 615 |
+
step=1700 micro_steps=13600 elapsed=47.2s lr=3.987809e-04 loss_all=5.0956 acc_all=0.5353 loss_corrupt=6.3531 acc_corrupt=0.3809 corrupt_frac=0.5492 loss=6.3531 loss_recon=6.3531 loss_meanflow=0.0000 mean_model_t=0.4959 mean_corrupt_t=0.5046 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4967 init_acc_corrupt=0.4688 init_gold_top10=0.4975 init_gold_top100=0.5289
|
| 616 |
+
step=1750 micro_steps=14000 elapsed=47.1s lr=4.105029e-04 loss_all=4.9791 acc_all=0.5365 loss_corrupt=6.2240 acc_corrupt=0.3836 corrupt_frac=0.5514 loss=6.2240 loss_recon=6.2240 loss_meanflow=0.0000 mean_model_t=0.4896 mean_corrupt_t=0.5021 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4968 init_acc_corrupt=0.4682 init_gold_top10=0.4975 init_gold_top100=0.5266
|
| 617 |
+
step=1800 micro_steps=14400 elapsed=47.2s lr=4.222248e-04 loss_all=4.8119 acc_all=0.5441 loss_corrupt=6.0439 acc_corrupt=0.3934 corrupt_frac=0.5495 loss=6.0439 loss_recon=6.0439 loss_meanflow=0.0000 mean_model_t=0.5011 mean_corrupt_t=0.5048 mean_loss_t_weight=1.0000 prior_center_loss_beta=0.0000 wrong_frac=0.4894 init_acc_corrupt=0.4760 init_gold_top10=0.5053 init_gold_top100=0.5336
|
LTA_openwebtext_dualt/logs/fullycoupled_loss1mt_floor0p25_8gpu/lta_owt_gpt2cached_len1024_fullycoupled_rmsnorm_nobias_adamw_wd0p1_loss1mt_floor0p25_nanogpt_tf32_ddit768x12_gbs512_8gpu_1m_20260514_230726.log
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0010000_state_fromstate_t1p45.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] 2026-05-20_23:01:09 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 2 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt step=10000
|
| 3 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 4 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 5 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 6 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt", "step": 10000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 8.05236863767061, "nll_per_token": 2.085966288854589, "tokens": 7891, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 7.63913892725258, "nll_per_token": 2.0332848909696546, "tokens": 7911, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.8837622158001541, "unique_tokens": 274, "token_count": 8192, "distinct_1": 0.033447265625, "distinct_2": 0.09547244094488189, "top_token_mass": 0.3001708984375}}
|
| 7 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[watch-infer] 2026-05-20_23:01:31 done step_0010000
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0020000_state_fromstate_t1p45.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] 2026-05-21_01:03:31 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 2 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt step=20000
|
| 3 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 4 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 5 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 6 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt", "step": 20000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 43.29514508349064, "nll_per_token": 3.768040505939089, "tokens": 7277, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 44.71103365440866, "nll_per_token": 3.8002203090662934, "tokens": 7167, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.680576719579072, "unique_tokens": 989, "token_count": 8192, "distinct_1": 0.1207275390625, "distinct_2": 0.436884842519685, "top_token_mass": 0.0694580078125}}
|
| 7 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[watch-infer] 2026-05-21_01:04:00 done step_0020000
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0030000_state_fromstate_t1p45.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] 2026-05-21_03:42:01 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 2 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt step=30000
|
| 3 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 4 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 5 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 6 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt", "step": 30000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 32.519971378109084, "nll_per_token": 3.4818544045472755, "tokens": 7020, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 34.09647656081684, "nll_per_token": 3.529194052288641, "tokens": 6748, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.1030114352732854, "unique_tokens": 881, "token_count": 8192, "distinct_1": 0.1075439453125, "distinct_2": 0.3661417322834646, "top_token_mass": 0.100341796875}}
|
| 7 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[watch-infer] 2026-05-21_03:42:30 done step_0030000
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0040000_state_fromstate_t1p45.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] 2026-05-21_06:20:32 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 2 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt step=40000
|
| 3 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 4 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 5 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 6 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt", "step": 40000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 35.01775070788609, "nll_per_token": 3.555855096008144, "tokens": 7383, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 35.25691068369559, "nll_per_token": 3.5626615578078398, "tokens": 7376, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.4672587399947643, "unique_tokens": 1033, "token_count": 8192, "distinct_1": 0.1260986328125, "distinct_2": 0.42913385826771655, "top_token_mass": 0.06982421875}}
|
| 7 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[watch-infer] 2026-05-21_06:21:00 done step_0040000
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/infer_step_0050000_state_fromstate_t1p45.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] 2026-05-21_08:58:02 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 2 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt step=50000
|
| 3 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 4 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 5 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 6 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt", "step": 50000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 39.13239939115062, "nll_per_token": 3.666950752797587, "tokens": 5574, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 38.076086050208254, "nll_per_token": 3.6395864223457512, "tokens": 5543, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.8907815168852227, "unique_tokens": 899, "token_count": 8192, "distinct_1": 0.1097412109375, "distinct_2": 0.3246801181102362, "top_token_mass": 0.1265869140625}}
|
| 7 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[watch-infer] 2026-05-21_08:58:34 done step_0050000
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt
|
| 2 |
+
runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt
|
| 3 |
+
runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt
|
| 4 |
+
runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt
|
| 5 |
+
runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt
|
LTA_openwebtext_dualt/logs/lm1b_v8192_len128_infer_watch/watch_every10k_state_t1p45.nohup.log
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[watch-infer] run_dir=runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739
|
| 2 |
+
[watch-infer] out_base=docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45
|
| 3 |
+
[watch-infer] processed_file=logs/lm1b_v8192_len128_infer_watch/processed_every10k_state_fromstate_t1p45.txt
|
| 4 |
+
[watch-infer] decode=dual_line_resample anchor=state final_from=state final_sample=argmax temp=1.45 steps=128 cmax=1024 n=64
|
| 5 |
+
[watch-infer] 2026-05-20_22:59:09 no step_*.pt yet
|
| 6 |
+
[watch-infer] 2026-05-20_23:00:09 no step_*.pt yet
|
| 7 |
+
[watch-infer] 2026-05-20_23:01:09 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 8 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt step=10000
|
| 9 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 10 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 11 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 12 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0010000.pt", "step": 10000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 8.05236863767061, "nll_per_token": 2.085966288854589, "tokens": 7891, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 7.63913892725258, "nll_per_token": 2.0332848909696546, "tokens": 7911, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 1.8837622158001541, "unique_tokens": 274, "token_count": 8192, "distinct_1": 0.033447265625, "distinct_2": 0.09547244094488189, "top_token_mass": 0.3001708984375}}
|
| 13 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0010000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 14 |
+
[watch-infer] 2026-05-20_23:01:31 done step_0010000
|
| 15 |
+
[watch-infer] 2026-05-21_01:03:31 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 16 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt step=20000
|
| 17 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 18 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 19 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 20 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0020000.pt", "step": 20000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 43.29514508349064, "nll_per_token": 3.768040505939089, "tokens": 7277, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 44.71103365440866, "nll_per_token": 3.8002203090662934, "tokens": 7167, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.680576719579072, "unique_tokens": 989, "token_count": 8192, "distinct_1": 0.1207275390625, "distinct_2": 0.436884842519685, "top_token_mass": 0.0694580078125}}
|
| 21 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0020000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 22 |
+
[watch-infer] 2026-05-21_01:04:00 done step_0020000
|
| 23 |
+
[watch-infer] 2026-05-21_03:42:01 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 24 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt step=30000
|
| 25 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 26 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 27 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 28 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0030000.pt", "step": 30000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 32.519971378109084, "nll_per_token": 3.4818544045472755, "tokens": 7020, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 34.09647656081684, "nll_per_token": 3.529194052288641, "tokens": 6748, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.1030114352732854, "unique_tokens": 881, "token_count": 8192, "distinct_1": 0.1075439453125, "distinct_2": 0.3661417322834646, "top_token_mass": 0.100341796875}}
|
| 29 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0030000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 30 |
+
[watch-infer] 2026-05-21_03:42:30 done step_0030000
|
| 31 |
+
[watch-infer] 2026-05-21_06:20:32 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 32 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt step=40000
|
| 33 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 34 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 35 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 36 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0040000.pt", "step": 40000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 35.01775070788609, "nll_per_token": 3.555855096008144, "tokens": 7383, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 35.25691068369559, "nll_per_token": 3.5626615578078398, "tokens": 7376, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 3.4672587399947643, "unique_tokens": 1033, "token_count": 8192, "distinct_1": 0.1260986328125, "distinct_2": 0.42913385826771655, "top_token_mass": 0.06982421875}}
|
| 37 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0040000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 38 |
+
[watch-infer] 2026-05-21_06:21:00 done step_0040000
|
| 39 |
+
[watch-infer] 2026-05-21_08:58:02 infer runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt -> docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 40 |
+
[ckpt] runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt step=50000
|
| 41 |
+
[decode-base] n=64 max_len=128 steps=128 model_t=flow
|
| 42 |
+
[decode-time] schedule=linear s=[0.0,0.25] gumbel=(2.2,0.8) force_final=True t0=0.000000 t_mid=0.500000 t_end=1.000000 dt_mean=0.007812 dt_max=0.007812
|
| 43 |
+
[decode] temp=1.45 final=state rule=dual_line_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform decode_freq_penalty=0/0/0-1^1 final_sample=argmax/1/k64/p0.95 freq_penalty=0/0/0 start_t=0 start_init=noise time_path=0.0000->1.0000 generated 64/64
|
| 44 |
+
[summary] {"type": "summary", "checkpoint": "runs/lta_lm1b_compact_gpt2bpe_v8192_len128_repro_mask1_uniformt_fp32_ddit768x12_gbs512_4gpu_1m_20260520_221739/step_0050000.pt", "step": 50000, "decode": {"steps": 128, "model_t_mode": "flow", "decode_time_schedule": "linear", "decode_s_min_frac": 0.0, "decode_s_max_frac": 0.25, "decode_force_final_t": true, "decode_time_grid": [0.0, 0.0078125, 0.015625, 0.0234375, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.1015625, 0.109375, 0.1171875, 0.125, 0.1328125, 0.140625, 0.1484375, 0.15625, 0.1640625, 0.171875, 0.1796875, 0.1875, 0.1953125, 0.203125, 0.2109375, 0.21875, 0.2265625, 0.234375, 0.2421875, 0.25, 0.2578125, 0.265625, 0.2734375, 0.28125, 0.2890625, 0.296875, 0.3046875, 0.3125, 0.3203125, 0.328125, 0.3359375, 0.34375, 0.3515625, 0.359375, 0.3671875, 0.375, 0.3828125, 0.390625, 0.3984375, 0.40625, 0.4140625, 0.421875, 0.4296875, 0.4375, 0.4453125, 0.453125, 0.4609375, 0.46875, 0.4765625, 0.484375, 0.4921875, 0.5, 0.5078125, 0.515625, 0.5234375, 0.53125, 0.5390625, 0.546875, 0.5546875, 0.5625, 0.5703125, 0.578125, 0.5859375, 0.59375, 0.6015625, 0.609375, 0.6171875, 0.625, 0.6328125, 0.640625, 0.6484375, 0.65625, 0.6640625, 0.671875, 0.6796875, 0.6875, 0.6953125, 0.703125, 0.7109375, 0.71875, 0.7265625, 0.734375, 0.7421875, 0.75, 0.7578125, 0.765625, 0.7734375, 0.78125, 0.7890625, 0.796875, 0.8046875, 0.8125, 0.8203125, 0.828125, 0.8359375, 0.84375, 0.8515625, 0.859375, 0.8671875, 0.875, 0.8828125, 0.890625, 0.8984375, 0.90625, 0.9140625, 0.921875, 0.9296875, 0.9375, 0.9453125, 0.953125, 0.9609375, 0.96875, 0.9765625, 0.984375, 0.9921875, 1.0], "decode_rule": "dual_line_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "decode_freq_penalty_alpha": 0.0, "decode_freq_penalty_beta": 0.0, "decode_freq_penalty_floor": 0.0, "decode_freq_penalty_start": 0.0, "decode_freq_penalty_end": 1.0, "decode_freq_penalty_power": 1.0, "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 1024.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "final_sample_mode": "argmax", "final_sample_temp": 1.0, "final_top_k": 64, "final_top_p": 0.95, "final_freq_penalty_alpha": 0.0, "final_freq_penalty_beta": 0.0, "final_freq_penalty_floor": 0.0, "lock_bos": false, "n_samples": 64, "seed": 20260520}, "raw_genppl": {"ppl": 39.13239939115062, "nll_per_token": 3.666950752797587, "tokens": 5574, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 38.076086050208254, "nll_per_token": 3.6395864223457512, "tokens": 5543, "kept_samples": 64, "total_samples": 64, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.8907815168852227, "unique_tokens": 899, "token_count": 8192, "distinct_1": 0.1097412109375, "distinct_2": 0.3246801181102362, "top_token_mass": 0.1265869140625}}
|
| 45 |
+
[done] docs/lta_samples/metrics_20260520/lm1b_v8192_len128_every10k_dualline_state_fromstate_t1p45/step_0050000/decode_dual_line_resample_state_fromstate_t1p45_c1024_steps128.jsonl
|
| 46 |
+
[watch-infer] 2026-05-21_08:58:34 done step_0050000
|
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2.dirichlet_resample.eval.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_maskfloor_gamma2/latest.pt step=1000
|
| 2 |
+
[decode-base] n=16 max_len=1024 steps=64 model_t=flow
|
| 3 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
|
| 4 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
|
| 5 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
|
| 6 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
|
| 7 |
+
[summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_maskfloor_gamma2/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 205.89546320407374, "nll_per_token": 5.327368579813691, "tokens": 11862, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 186.53793153722665, "nll_per_token": 5.228634604662277, "tokens": 11770, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.3015467149505735, "unique_tokens": 2801, "token_count": 16384, "distinct_1": 0.17095947265625, "distinct_2": 0.34836265884652984, "top_token_mass": 0.55841064453125}}
|
| 8 |
+
[done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/maskfloor_gamma2_dirichlet_resample.jsonl
|
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce.flowmap.eval.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_drop_lowt_ce/latest.pt step=1000
|
| 2 |
+
[decode-base] n=16 max_len=1024 steps=64 model_t=flow
|
| 3 |
+
[decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
|
| 4 |
+
[decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
|
| 5 |
+
[decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
|
| 6 |
+
[decode] temp=1.45 final=state rule=flowmap support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
|
| 7 |
+
[summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_drop_lowt_ce/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "flowmap", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 18.545462901559578, "nll_per_token": 2.9202251716200354, "tokens": 406, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 12.73431918253559, "nll_per_token": 2.5443006466596554, "tokens": 390, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 0.04249307349759278, "unique_tokens": 18, "token_count": 16384, "distinct_1": 0.0010986328125, "distinct_2": 0.002993646138807429, "top_token_mass": 0.9935302734375}}
|
| 8 |
+
[done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_drop_lowt_ce_flowmap.jsonl
|
LTA_openwebtext_dualt/logs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor.dirichlet_resample.eval.log
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[ckpt] runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_linear_nomaskfloor/latest.pt step=1000
|
| 2 |
+
[decode-base] n=16 max_len=1024 steps=64 model_t=flow
|
| 3 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 4/16
|
| 4 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 8/16
|
| 5 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 12/16
|
| 6 |
+
[decode] temp=1.45 final=state rule=dirichlet_resample support=1 semantic=1 anchor=state cfg=0/1@0:uniform start_t=0 start_init=noise generated 16/16
|
| 7 |
+
[summary] {"type": "summary", "checkpoint": "runs/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234_old_linear_nomaskfloor/latest.pt", "step": 1000, "decode": {"steps": 64, "model_t_mode": "flow", "decode_rule": "dirichlet_resample", "support_power": 1.0, "semantic_power": 1.0, "anchor_mode": "state", "cfg_scale": 0.0, "cfg_power": 1.0, "cfg_start": 0.0, "cfg_prior": "uniform", "start_t": 0.0, "start_init": "noise", "noise_init": "dirichlet", "noise_sigma": -1.0, "dirichlet_concentration": 1.0, "concentration_min": 1.0, "concentration_max": 64.0, "target_prob": 1.0, "endpoint_temp": 1.45, "final_from": "state", "lock_bos": false, "n_samples": 16, "seed": 20260503}, "raw_genppl": {"ppl": 29.662822677002694, "nll_per_token": 3.389894499983462, "tokens": 15117, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "stripped_genppl": {"ppl": 26.313876829298174, "nll_per_token": 3.2700964361277274, "tokens": 14711, "kept_samples": 16, "total_samples": 16, "empty_rate": 0.0, "skipped_samples": 0}, "diversity": {"sample_entropy": 2.398402485948885, "unique_tokens": 704, "token_count": 16384, "distinct_1": 0.04296875, "distinct_2": 0.12243401759530792, "top_token_mass": 0.28253173828125}}
|
| 8 |
+
[done] docs/lta_samples/metrics_20260513/lowt_hard_vs_easy_c1024_fullvocab_ctx1024_384x6_20260513_202234/old_linear_nomaskfloor_dirichlet_resample.jsonl
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/__init__.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
import logging
|
| 8 |
+
|
| 9 |
+
__version__ = '0.3.8'
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class DistlibException(Exception):
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
try:
|
| 17 |
+
from logging import NullHandler
|
| 18 |
+
except ImportError: # pragma: no cover
|
| 19 |
+
|
| 20 |
+
class NullHandler(logging.Handler):
|
| 21 |
+
|
| 22 |
+
def handle(self, record):
|
| 23 |
+
pass
|
| 24 |
+
|
| 25 |
+
def emit(self, record):
|
| 26 |
+
pass
|
| 27 |
+
|
| 28 |
+
def createLock(self):
|
| 29 |
+
self.lock = None
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
logger.addHandler(NullHandler())
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/compat.py
ADDED
|
@@ -0,0 +1,1138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2013-2017 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
from __future__ import absolute_import
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
import shutil
|
| 12 |
+
import sys
|
| 13 |
+
|
| 14 |
+
try:
|
| 15 |
+
import ssl
|
| 16 |
+
except ImportError: # pragma: no cover
|
| 17 |
+
ssl = None
|
| 18 |
+
|
| 19 |
+
if sys.version_info[0] < 3: # pragma: no cover
|
| 20 |
+
from StringIO import StringIO
|
| 21 |
+
string_types = basestring,
|
| 22 |
+
text_type = unicode
|
| 23 |
+
from types import FileType as file_type
|
| 24 |
+
import __builtin__ as builtins
|
| 25 |
+
import ConfigParser as configparser
|
| 26 |
+
from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit
|
| 27 |
+
from urllib import (urlretrieve, quote as _quote, unquote, url2pathname,
|
| 28 |
+
pathname2url, ContentTooShortError, splittype)
|
| 29 |
+
|
| 30 |
+
def quote(s):
|
| 31 |
+
if isinstance(s, unicode):
|
| 32 |
+
s = s.encode('utf-8')
|
| 33 |
+
return _quote(s)
|
| 34 |
+
|
| 35 |
+
import urllib2
|
| 36 |
+
from urllib2 import (Request, urlopen, URLError, HTTPError,
|
| 37 |
+
HTTPBasicAuthHandler, HTTPPasswordMgr, HTTPHandler,
|
| 38 |
+
HTTPRedirectHandler, build_opener)
|
| 39 |
+
if ssl:
|
| 40 |
+
from urllib2 import HTTPSHandler
|
| 41 |
+
import httplib
|
| 42 |
+
import xmlrpclib
|
| 43 |
+
import Queue as queue
|
| 44 |
+
from HTMLParser import HTMLParser
|
| 45 |
+
import htmlentitydefs
|
| 46 |
+
raw_input = raw_input
|
| 47 |
+
from itertools import ifilter as filter
|
| 48 |
+
from itertools import ifilterfalse as filterfalse
|
| 49 |
+
|
| 50 |
+
# Leaving this around for now, in case it needs resurrecting in some way
|
| 51 |
+
# _userprog = None
|
| 52 |
+
# def splituser(host):
|
| 53 |
+
# """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
|
| 54 |
+
# global _userprog
|
| 55 |
+
# if _userprog is None:
|
| 56 |
+
# import re
|
| 57 |
+
# _userprog = re.compile('^(.*)@(.*)$')
|
| 58 |
+
|
| 59 |
+
# match = _userprog.match(host)
|
| 60 |
+
# if match: return match.group(1, 2)
|
| 61 |
+
# return None, host
|
| 62 |
+
|
| 63 |
+
else: # pragma: no cover
|
| 64 |
+
from io import StringIO
|
| 65 |
+
string_types = str,
|
| 66 |
+
text_type = str
|
| 67 |
+
from io import TextIOWrapper as file_type
|
| 68 |
+
import builtins
|
| 69 |
+
import configparser
|
| 70 |
+
from urllib.parse import (urlparse, urlunparse, urljoin, quote, unquote,
|
| 71 |
+
urlsplit, urlunsplit, splittype)
|
| 72 |
+
from urllib.request import (urlopen, urlretrieve, Request, url2pathname,
|
| 73 |
+
pathname2url, HTTPBasicAuthHandler,
|
| 74 |
+
HTTPPasswordMgr, HTTPHandler,
|
| 75 |
+
HTTPRedirectHandler, build_opener)
|
| 76 |
+
if ssl:
|
| 77 |
+
from urllib.request import HTTPSHandler
|
| 78 |
+
from urllib.error import HTTPError, URLError, ContentTooShortError
|
| 79 |
+
import http.client as httplib
|
| 80 |
+
import urllib.request as urllib2
|
| 81 |
+
import xmlrpc.client as xmlrpclib
|
| 82 |
+
import queue
|
| 83 |
+
from html.parser import HTMLParser
|
| 84 |
+
import html.entities as htmlentitydefs
|
| 85 |
+
raw_input = input
|
| 86 |
+
from itertools import filterfalse
|
| 87 |
+
filter = filter
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
from ssl import match_hostname, CertificateError
|
| 91 |
+
except ImportError: # pragma: no cover
|
| 92 |
+
|
| 93 |
+
class CertificateError(ValueError):
|
| 94 |
+
pass
|
| 95 |
+
|
| 96 |
+
def _dnsname_match(dn, hostname, max_wildcards=1):
|
| 97 |
+
"""Matching according to RFC 6125, section 6.4.3
|
| 98 |
+
|
| 99 |
+
http://tools.ietf.org/html/rfc6125#section-6.4.3
|
| 100 |
+
"""
|
| 101 |
+
pats = []
|
| 102 |
+
if not dn:
|
| 103 |
+
return False
|
| 104 |
+
|
| 105 |
+
parts = dn.split('.')
|
| 106 |
+
leftmost, remainder = parts[0], parts[1:]
|
| 107 |
+
|
| 108 |
+
wildcards = leftmost.count('*')
|
| 109 |
+
if wildcards > max_wildcards:
|
| 110 |
+
# Issue #17980: avoid denials of service by refusing more
|
| 111 |
+
# than one wildcard per fragment. A survey of established
|
| 112 |
+
# policy among SSL implementations showed it to be a
|
| 113 |
+
# reasonable choice.
|
| 114 |
+
raise CertificateError(
|
| 115 |
+
"too many wildcards in certificate DNS name: " + repr(dn))
|
| 116 |
+
|
| 117 |
+
# speed up common case w/o wildcards
|
| 118 |
+
if not wildcards:
|
| 119 |
+
return dn.lower() == hostname.lower()
|
| 120 |
+
|
| 121 |
+
# RFC 6125, section 6.4.3, subitem 1.
|
| 122 |
+
# The client SHOULD NOT attempt to match a presented identifier in which
|
| 123 |
+
# the wildcard character comprises a label other than the left-most label.
|
| 124 |
+
if leftmost == '*':
|
| 125 |
+
# When '*' is a fragment by itself, it matches a non-empty dotless
|
| 126 |
+
# fragment.
|
| 127 |
+
pats.append('[^.]+')
|
| 128 |
+
elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
|
| 129 |
+
# RFC 6125, section 6.4.3, subitem 3.
|
| 130 |
+
# The client SHOULD NOT attempt to match a presented identifier
|
| 131 |
+
# where the wildcard character is embedded within an A-label or
|
| 132 |
+
# U-label of an internationalized domain name.
|
| 133 |
+
pats.append(re.escape(leftmost))
|
| 134 |
+
else:
|
| 135 |
+
# Otherwise, '*' matches any dotless string, e.g. www*
|
| 136 |
+
pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
|
| 137 |
+
|
| 138 |
+
# add the remaining fragments, ignore any wildcards
|
| 139 |
+
for frag in remainder:
|
| 140 |
+
pats.append(re.escape(frag))
|
| 141 |
+
|
| 142 |
+
pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
|
| 143 |
+
return pat.match(hostname)
|
| 144 |
+
|
| 145 |
+
def match_hostname(cert, hostname):
|
| 146 |
+
"""Verify that *cert* (in decoded format as returned by
|
| 147 |
+
SSLSocket.getpeercert()) matches the *hostname*. RFC 2818 and RFC 6125
|
| 148 |
+
rules are followed, but IP addresses are not accepted for *hostname*.
|
| 149 |
+
|
| 150 |
+
CertificateError is raised on failure. On success, the function
|
| 151 |
+
returns nothing.
|
| 152 |
+
"""
|
| 153 |
+
if not cert:
|
| 154 |
+
raise ValueError("empty or no certificate, match_hostname needs a "
|
| 155 |
+
"SSL socket or SSL context with either "
|
| 156 |
+
"CERT_OPTIONAL or CERT_REQUIRED")
|
| 157 |
+
dnsnames = []
|
| 158 |
+
san = cert.get('subjectAltName', ())
|
| 159 |
+
for key, value in san:
|
| 160 |
+
if key == 'DNS':
|
| 161 |
+
if _dnsname_match(value, hostname):
|
| 162 |
+
return
|
| 163 |
+
dnsnames.append(value)
|
| 164 |
+
if not dnsnames:
|
| 165 |
+
# The subject is only checked when there is no dNSName entry
|
| 166 |
+
# in subjectAltName
|
| 167 |
+
for sub in cert.get('subject', ()):
|
| 168 |
+
for key, value in sub:
|
| 169 |
+
# XXX according to RFC 2818, the most specific Common Name
|
| 170 |
+
# must be used.
|
| 171 |
+
if key == 'commonName':
|
| 172 |
+
if _dnsname_match(value, hostname):
|
| 173 |
+
return
|
| 174 |
+
dnsnames.append(value)
|
| 175 |
+
if len(dnsnames) > 1:
|
| 176 |
+
raise CertificateError("hostname %r "
|
| 177 |
+
"doesn't match either of %s" %
|
| 178 |
+
(hostname, ', '.join(map(repr, dnsnames))))
|
| 179 |
+
elif len(dnsnames) == 1:
|
| 180 |
+
raise CertificateError("hostname %r "
|
| 181 |
+
"doesn't match %r" %
|
| 182 |
+
(hostname, dnsnames[0]))
|
| 183 |
+
else:
|
| 184 |
+
raise CertificateError("no appropriate commonName or "
|
| 185 |
+
"subjectAltName fields were found")
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
try:
|
| 189 |
+
from types import SimpleNamespace as Container
|
| 190 |
+
except ImportError: # pragma: no cover
|
| 191 |
+
|
| 192 |
+
class Container(object):
|
| 193 |
+
"""
|
| 194 |
+
A generic container for when multiple values need to be returned
|
| 195 |
+
"""
|
| 196 |
+
|
| 197 |
+
def __init__(self, **kwargs):
|
| 198 |
+
self.__dict__.update(kwargs)
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
try:
|
| 202 |
+
from shutil import which
|
| 203 |
+
except ImportError: # pragma: no cover
|
| 204 |
+
# Implementation from Python 3.3
|
| 205 |
+
def which(cmd, mode=os.F_OK | os.X_OK, path=None):
|
| 206 |
+
"""Given a command, mode, and a PATH string, return the path which
|
| 207 |
+
conforms to the given mode on the PATH, or None if there is no such
|
| 208 |
+
file.
|
| 209 |
+
|
| 210 |
+
`mode` defaults to os.F_OK | os.X_OK. `path` defaults to the result
|
| 211 |
+
of os.environ.get("PATH"), or can be overridden with a custom search
|
| 212 |
+
path.
|
| 213 |
+
|
| 214 |
+
"""
|
| 215 |
+
|
| 216 |
+
# Check that a given file can be accessed with the correct mode.
|
| 217 |
+
# Additionally check that `file` is not a directory, as on Windows
|
| 218 |
+
# directories pass the os.access check.
|
| 219 |
+
def _access_check(fn, mode):
|
| 220 |
+
return (os.path.exists(fn) and os.access(fn, mode)
|
| 221 |
+
and not os.path.isdir(fn))
|
| 222 |
+
|
| 223 |
+
# If we're given a path with a directory part, look it up directly rather
|
| 224 |
+
# than referring to PATH directories. This includes checking relative to the
|
| 225 |
+
# current directory, e.g. ./script
|
| 226 |
+
if os.path.dirname(cmd):
|
| 227 |
+
if _access_check(cmd, mode):
|
| 228 |
+
return cmd
|
| 229 |
+
return None
|
| 230 |
+
|
| 231 |
+
if path is None:
|
| 232 |
+
path = os.environ.get("PATH", os.defpath)
|
| 233 |
+
if not path:
|
| 234 |
+
return None
|
| 235 |
+
path = path.split(os.pathsep)
|
| 236 |
+
|
| 237 |
+
if sys.platform == "win32":
|
| 238 |
+
# The current directory takes precedence on Windows.
|
| 239 |
+
if os.curdir not in path:
|
| 240 |
+
path.insert(0, os.curdir)
|
| 241 |
+
|
| 242 |
+
# PATHEXT is necessary to check on Windows.
|
| 243 |
+
pathext = os.environ.get("PATHEXT", "").split(os.pathsep)
|
| 244 |
+
# See if the given file matches any of the expected path extensions.
|
| 245 |
+
# This will allow us to short circuit when given "python.exe".
|
| 246 |
+
# If it does match, only test that one, otherwise we have to try
|
| 247 |
+
# others.
|
| 248 |
+
if any(cmd.lower().endswith(ext.lower()) for ext in pathext):
|
| 249 |
+
files = [cmd]
|
| 250 |
+
else:
|
| 251 |
+
files = [cmd + ext for ext in pathext]
|
| 252 |
+
else:
|
| 253 |
+
# On other platforms you don't have things like PATHEXT to tell you
|
| 254 |
+
# what file suffixes are executable, so just pass on cmd as-is.
|
| 255 |
+
files = [cmd]
|
| 256 |
+
|
| 257 |
+
seen = set()
|
| 258 |
+
for dir in path:
|
| 259 |
+
normdir = os.path.normcase(dir)
|
| 260 |
+
if normdir not in seen:
|
| 261 |
+
seen.add(normdir)
|
| 262 |
+
for thefile in files:
|
| 263 |
+
name = os.path.join(dir, thefile)
|
| 264 |
+
if _access_check(name, mode):
|
| 265 |
+
return name
|
| 266 |
+
return None
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
# ZipFile is a context manager in 2.7, but not in 2.6
|
| 270 |
+
|
| 271 |
+
from zipfile import ZipFile as BaseZipFile
|
| 272 |
+
|
| 273 |
+
if hasattr(BaseZipFile, '__enter__'): # pragma: no cover
|
| 274 |
+
ZipFile = BaseZipFile
|
| 275 |
+
else: # pragma: no cover
|
| 276 |
+
from zipfile import ZipExtFile as BaseZipExtFile
|
| 277 |
+
|
| 278 |
+
class ZipExtFile(BaseZipExtFile):
|
| 279 |
+
|
| 280 |
+
def __init__(self, base):
|
| 281 |
+
self.__dict__.update(base.__dict__)
|
| 282 |
+
|
| 283 |
+
def __enter__(self):
|
| 284 |
+
return self
|
| 285 |
+
|
| 286 |
+
def __exit__(self, *exc_info):
|
| 287 |
+
self.close()
|
| 288 |
+
# return None, so if an exception occurred, it will propagate
|
| 289 |
+
|
| 290 |
+
class ZipFile(BaseZipFile):
|
| 291 |
+
|
| 292 |
+
def __enter__(self):
|
| 293 |
+
return self
|
| 294 |
+
|
| 295 |
+
def __exit__(self, *exc_info):
|
| 296 |
+
self.close()
|
| 297 |
+
# return None, so if an exception occurred, it will propagate
|
| 298 |
+
|
| 299 |
+
def open(self, *args, **kwargs):
|
| 300 |
+
base = BaseZipFile.open(self, *args, **kwargs)
|
| 301 |
+
return ZipExtFile(base)
|
| 302 |
+
|
| 303 |
+
|
| 304 |
+
try:
|
| 305 |
+
from platform import python_implementation
|
| 306 |
+
except ImportError: # pragma: no cover
|
| 307 |
+
|
| 308 |
+
def python_implementation():
|
| 309 |
+
"""Return a string identifying the Python implementation."""
|
| 310 |
+
if 'PyPy' in sys.version:
|
| 311 |
+
return 'PyPy'
|
| 312 |
+
if os.name == 'java':
|
| 313 |
+
return 'Jython'
|
| 314 |
+
if sys.version.startswith('IronPython'):
|
| 315 |
+
return 'IronPython'
|
| 316 |
+
return 'CPython'
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
import sysconfig
|
| 320 |
+
|
| 321 |
+
try:
|
| 322 |
+
callable = callable
|
| 323 |
+
except NameError: # pragma: no cover
|
| 324 |
+
from collections.abc import Callable
|
| 325 |
+
|
| 326 |
+
def callable(obj):
|
| 327 |
+
return isinstance(obj, Callable)
|
| 328 |
+
|
| 329 |
+
|
| 330 |
+
try:
|
| 331 |
+
fsencode = os.fsencode
|
| 332 |
+
fsdecode = os.fsdecode
|
| 333 |
+
except AttributeError: # pragma: no cover
|
| 334 |
+
# Issue #99: on some systems (e.g. containerised),
|
| 335 |
+
# sys.getfilesystemencoding() returns None, and we need a real value,
|
| 336 |
+
# so fall back to utf-8. From the CPython 2.7 docs relating to Unix and
|
| 337 |
+
# sys.getfilesystemencoding(): the return value is "the user’s preference
|
| 338 |
+
# according to the result of nl_langinfo(CODESET), or None if the
|
| 339 |
+
# nl_langinfo(CODESET) failed."
|
| 340 |
+
_fsencoding = sys.getfilesystemencoding() or 'utf-8'
|
| 341 |
+
if _fsencoding == 'mbcs':
|
| 342 |
+
_fserrors = 'strict'
|
| 343 |
+
else:
|
| 344 |
+
_fserrors = 'surrogateescape'
|
| 345 |
+
|
| 346 |
+
def fsencode(filename):
|
| 347 |
+
if isinstance(filename, bytes):
|
| 348 |
+
return filename
|
| 349 |
+
elif isinstance(filename, text_type):
|
| 350 |
+
return filename.encode(_fsencoding, _fserrors)
|
| 351 |
+
else:
|
| 352 |
+
raise TypeError("expect bytes or str, not %s" %
|
| 353 |
+
type(filename).__name__)
|
| 354 |
+
|
| 355 |
+
def fsdecode(filename):
|
| 356 |
+
if isinstance(filename, text_type):
|
| 357 |
+
return filename
|
| 358 |
+
elif isinstance(filename, bytes):
|
| 359 |
+
return filename.decode(_fsencoding, _fserrors)
|
| 360 |
+
else:
|
| 361 |
+
raise TypeError("expect bytes or str, not %s" %
|
| 362 |
+
type(filename).__name__)
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
try:
|
| 366 |
+
from tokenize import detect_encoding
|
| 367 |
+
except ImportError: # pragma: no cover
|
| 368 |
+
from codecs import BOM_UTF8, lookup
|
| 369 |
+
|
| 370 |
+
cookie_re = re.compile(r"coding[:=]\s*([-\w.]+)")
|
| 371 |
+
|
| 372 |
+
def _get_normal_name(orig_enc):
|
| 373 |
+
"""Imitates get_normal_name in tokenizer.c."""
|
| 374 |
+
# Only care about the first 12 characters.
|
| 375 |
+
enc = orig_enc[:12].lower().replace("_", "-")
|
| 376 |
+
if enc == "utf-8" or enc.startswith("utf-8-"):
|
| 377 |
+
return "utf-8"
|
| 378 |
+
if enc in ("latin-1", "iso-8859-1", "iso-latin-1") or \
|
| 379 |
+
enc.startswith(("latin-1-", "iso-8859-1-", "iso-latin-1-")):
|
| 380 |
+
return "iso-8859-1"
|
| 381 |
+
return orig_enc
|
| 382 |
+
|
| 383 |
+
def detect_encoding(readline):
|
| 384 |
+
"""
|
| 385 |
+
The detect_encoding() function is used to detect the encoding that should
|
| 386 |
+
be used to decode a Python source file. It requires one argument, readline,
|
| 387 |
+
in the same way as the tokenize() generator.
|
| 388 |
+
|
| 389 |
+
It will call readline a maximum of twice, and return the encoding used
|
| 390 |
+
(as a string) and a list of any lines (left as bytes) it has read in.
|
| 391 |
+
|
| 392 |
+
It detects the encoding from the presence of a utf-8 bom or an encoding
|
| 393 |
+
cookie as specified in pep-0263. If both a bom and a cookie are present,
|
| 394 |
+
but disagree, a SyntaxError will be raised. If the encoding cookie is an
|
| 395 |
+
invalid charset, raise a SyntaxError. Note that if a utf-8 bom is found,
|
| 396 |
+
'utf-8-sig' is returned.
|
| 397 |
+
|
| 398 |
+
If no encoding is specified, then the default of 'utf-8' will be returned.
|
| 399 |
+
"""
|
| 400 |
+
try:
|
| 401 |
+
filename = readline.__self__.name
|
| 402 |
+
except AttributeError:
|
| 403 |
+
filename = None
|
| 404 |
+
bom_found = False
|
| 405 |
+
encoding = None
|
| 406 |
+
default = 'utf-8'
|
| 407 |
+
|
| 408 |
+
def read_or_stop():
|
| 409 |
+
try:
|
| 410 |
+
return readline()
|
| 411 |
+
except StopIteration:
|
| 412 |
+
return b''
|
| 413 |
+
|
| 414 |
+
def find_cookie(line):
|
| 415 |
+
try:
|
| 416 |
+
# Decode as UTF-8. Either the line is an encoding declaration,
|
| 417 |
+
# in which case it should be pure ASCII, or it must be UTF-8
|
| 418 |
+
# per default encoding.
|
| 419 |
+
line_string = line.decode('utf-8')
|
| 420 |
+
except UnicodeDecodeError:
|
| 421 |
+
msg = "invalid or missing encoding declaration"
|
| 422 |
+
if filename is not None:
|
| 423 |
+
msg = '{} for {!r}'.format(msg, filename)
|
| 424 |
+
raise SyntaxError(msg)
|
| 425 |
+
|
| 426 |
+
matches = cookie_re.findall(line_string)
|
| 427 |
+
if not matches:
|
| 428 |
+
return None
|
| 429 |
+
encoding = _get_normal_name(matches[0])
|
| 430 |
+
try:
|
| 431 |
+
codec = lookup(encoding)
|
| 432 |
+
except LookupError:
|
| 433 |
+
# This behaviour mimics the Python interpreter
|
| 434 |
+
if filename is None:
|
| 435 |
+
msg = "unknown encoding: " + encoding
|
| 436 |
+
else:
|
| 437 |
+
msg = "unknown encoding for {!r}: {}".format(
|
| 438 |
+
filename, encoding)
|
| 439 |
+
raise SyntaxError(msg)
|
| 440 |
+
|
| 441 |
+
if bom_found:
|
| 442 |
+
if codec.name != 'utf-8':
|
| 443 |
+
# This behaviour mimics the Python interpreter
|
| 444 |
+
if filename is None:
|
| 445 |
+
msg = 'encoding problem: utf-8'
|
| 446 |
+
else:
|
| 447 |
+
msg = 'encoding problem for {!r}: utf-8'.format(
|
| 448 |
+
filename)
|
| 449 |
+
raise SyntaxError(msg)
|
| 450 |
+
encoding += '-sig'
|
| 451 |
+
return encoding
|
| 452 |
+
|
| 453 |
+
first = read_or_stop()
|
| 454 |
+
if first.startswith(BOM_UTF8):
|
| 455 |
+
bom_found = True
|
| 456 |
+
first = first[3:]
|
| 457 |
+
default = 'utf-8-sig'
|
| 458 |
+
if not first:
|
| 459 |
+
return default, []
|
| 460 |
+
|
| 461 |
+
encoding = find_cookie(first)
|
| 462 |
+
if encoding:
|
| 463 |
+
return encoding, [first]
|
| 464 |
+
|
| 465 |
+
second = read_or_stop()
|
| 466 |
+
if not second:
|
| 467 |
+
return default, [first]
|
| 468 |
+
|
| 469 |
+
encoding = find_cookie(second)
|
| 470 |
+
if encoding:
|
| 471 |
+
return encoding, [first, second]
|
| 472 |
+
|
| 473 |
+
return default, [first, second]
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
# For converting & <-> & etc.
|
| 477 |
+
try:
|
| 478 |
+
from html import escape
|
| 479 |
+
except ImportError:
|
| 480 |
+
from cgi import escape
|
| 481 |
+
if sys.version_info[:2] < (3, 4):
|
| 482 |
+
unescape = HTMLParser().unescape
|
| 483 |
+
else:
|
| 484 |
+
from html import unescape
|
| 485 |
+
|
| 486 |
+
try:
|
| 487 |
+
from collections import ChainMap
|
| 488 |
+
except ImportError: # pragma: no cover
|
| 489 |
+
from collections import MutableMapping
|
| 490 |
+
|
| 491 |
+
try:
|
| 492 |
+
from reprlib import recursive_repr as _recursive_repr
|
| 493 |
+
except ImportError:
|
| 494 |
+
|
| 495 |
+
def _recursive_repr(fillvalue='...'):
|
| 496 |
+
'''
|
| 497 |
+
Decorator to make a repr function return fillvalue for a recursive
|
| 498 |
+
call
|
| 499 |
+
'''
|
| 500 |
+
|
| 501 |
+
def decorating_function(user_function):
|
| 502 |
+
repr_running = set()
|
| 503 |
+
|
| 504 |
+
def wrapper(self):
|
| 505 |
+
key = id(self), get_ident()
|
| 506 |
+
if key in repr_running:
|
| 507 |
+
return fillvalue
|
| 508 |
+
repr_running.add(key)
|
| 509 |
+
try:
|
| 510 |
+
result = user_function(self)
|
| 511 |
+
finally:
|
| 512 |
+
repr_running.discard(key)
|
| 513 |
+
return result
|
| 514 |
+
|
| 515 |
+
# Can't use functools.wraps() here because of bootstrap issues
|
| 516 |
+
wrapper.__module__ = getattr(user_function, '__module__')
|
| 517 |
+
wrapper.__doc__ = getattr(user_function, '__doc__')
|
| 518 |
+
wrapper.__name__ = getattr(user_function, '__name__')
|
| 519 |
+
wrapper.__annotations__ = getattr(user_function,
|
| 520 |
+
'__annotations__', {})
|
| 521 |
+
return wrapper
|
| 522 |
+
|
| 523 |
+
return decorating_function
|
| 524 |
+
|
| 525 |
+
class ChainMap(MutableMapping):
|
| 526 |
+
'''
|
| 527 |
+
A ChainMap groups multiple dicts (or other mappings) together
|
| 528 |
+
to create a single, updateable view.
|
| 529 |
+
|
| 530 |
+
The underlying mappings are stored in a list. That list is public and can
|
| 531 |
+
accessed or updated using the *maps* attribute. There is no other state.
|
| 532 |
+
|
| 533 |
+
Lookups search the underlying mappings successively until a key is found.
|
| 534 |
+
In contrast, writes, updates, and deletions only operate on the first
|
| 535 |
+
mapping.
|
| 536 |
+
'''
|
| 537 |
+
|
| 538 |
+
def __init__(self, *maps):
|
| 539 |
+
'''Initialize a ChainMap by setting *maps* to the given mappings.
|
| 540 |
+
If no mappings are provided, a single empty dictionary is used.
|
| 541 |
+
|
| 542 |
+
'''
|
| 543 |
+
self.maps = list(maps) or [{}] # always at least one map
|
| 544 |
+
|
| 545 |
+
def __missing__(self, key):
|
| 546 |
+
raise KeyError(key)
|
| 547 |
+
|
| 548 |
+
def __getitem__(self, key):
|
| 549 |
+
for mapping in self.maps:
|
| 550 |
+
try:
|
| 551 |
+
return mapping[
|
| 552 |
+
key] # can't use 'key in mapping' with defaultdict
|
| 553 |
+
except KeyError:
|
| 554 |
+
pass
|
| 555 |
+
return self.__missing__(
|
| 556 |
+
key) # support subclasses that define __missing__
|
| 557 |
+
|
| 558 |
+
def get(self, key, default=None):
|
| 559 |
+
return self[key] if key in self else default
|
| 560 |
+
|
| 561 |
+
def __len__(self):
|
| 562 |
+
return len(set().union(
|
| 563 |
+
*self.maps)) # reuses stored hash values if possible
|
| 564 |
+
|
| 565 |
+
def __iter__(self):
|
| 566 |
+
return iter(set().union(*self.maps))
|
| 567 |
+
|
| 568 |
+
def __contains__(self, key):
|
| 569 |
+
return any(key in m for m in self.maps)
|
| 570 |
+
|
| 571 |
+
def __bool__(self):
|
| 572 |
+
return any(self.maps)
|
| 573 |
+
|
| 574 |
+
@_recursive_repr()
|
| 575 |
+
def __repr__(self):
|
| 576 |
+
return '{0.__class__.__name__}({1})'.format(
|
| 577 |
+
self, ', '.join(map(repr, self.maps)))
|
| 578 |
+
|
| 579 |
+
@classmethod
|
| 580 |
+
def fromkeys(cls, iterable, *args):
|
| 581 |
+
'Create a ChainMap with a single dict created from the iterable.'
|
| 582 |
+
return cls(dict.fromkeys(iterable, *args))
|
| 583 |
+
|
| 584 |
+
def copy(self):
|
| 585 |
+
'New ChainMap or subclass with a new copy of maps[0] and refs to maps[1:]'
|
| 586 |
+
return self.__class__(self.maps[0].copy(), *self.maps[1:])
|
| 587 |
+
|
| 588 |
+
__copy__ = copy
|
| 589 |
+
|
| 590 |
+
def new_child(self): # like Django's Context.push()
|
| 591 |
+
'New ChainMap with a new dict followed by all previous maps.'
|
| 592 |
+
return self.__class__({}, *self.maps)
|
| 593 |
+
|
| 594 |
+
@property
|
| 595 |
+
def parents(self): # like Django's Context.pop()
|
| 596 |
+
'New ChainMap from maps[1:].'
|
| 597 |
+
return self.__class__(*self.maps[1:])
|
| 598 |
+
|
| 599 |
+
def __setitem__(self, key, value):
|
| 600 |
+
self.maps[0][key] = value
|
| 601 |
+
|
| 602 |
+
def __delitem__(self, key):
|
| 603 |
+
try:
|
| 604 |
+
del self.maps[0][key]
|
| 605 |
+
except KeyError:
|
| 606 |
+
raise KeyError(
|
| 607 |
+
'Key not found in the first mapping: {!r}'.format(key))
|
| 608 |
+
|
| 609 |
+
def popitem(self):
|
| 610 |
+
'Remove and return an item pair from maps[0]. Raise KeyError is maps[0] is empty.'
|
| 611 |
+
try:
|
| 612 |
+
return self.maps[0].popitem()
|
| 613 |
+
except KeyError:
|
| 614 |
+
raise KeyError('No keys found in the first mapping.')
|
| 615 |
+
|
| 616 |
+
def pop(self, key, *args):
|
| 617 |
+
'Remove *key* from maps[0] and return its value. Raise KeyError if *key* not in maps[0].'
|
| 618 |
+
try:
|
| 619 |
+
return self.maps[0].pop(key, *args)
|
| 620 |
+
except KeyError:
|
| 621 |
+
raise KeyError(
|
| 622 |
+
'Key not found in the first mapping: {!r}'.format(key))
|
| 623 |
+
|
| 624 |
+
def clear(self):
|
| 625 |
+
'Clear maps[0], leaving maps[1:] intact.'
|
| 626 |
+
self.maps[0].clear()
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
try:
|
| 630 |
+
from importlib.util import cache_from_source # Python >= 3.4
|
| 631 |
+
except ImportError: # pragma: no cover
|
| 632 |
+
|
| 633 |
+
def cache_from_source(path, debug_override=None):
|
| 634 |
+
assert path.endswith('.py')
|
| 635 |
+
if debug_override is None:
|
| 636 |
+
debug_override = __debug__
|
| 637 |
+
if debug_override:
|
| 638 |
+
suffix = 'c'
|
| 639 |
+
else:
|
| 640 |
+
suffix = 'o'
|
| 641 |
+
return path + suffix
|
| 642 |
+
|
| 643 |
+
|
| 644 |
+
try:
|
| 645 |
+
from collections import OrderedDict
|
| 646 |
+
except ImportError: # pragma: no cover
|
| 647 |
+
# {{{ http://code.activestate.com/recipes/576693/ (r9)
|
| 648 |
+
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
|
| 649 |
+
# Passes Python2.7's test suite and incorporates all the latest updates.
|
| 650 |
+
try:
|
| 651 |
+
from thread import get_ident as _get_ident
|
| 652 |
+
except ImportError:
|
| 653 |
+
from dummy_thread import get_ident as _get_ident
|
| 654 |
+
|
| 655 |
+
try:
|
| 656 |
+
from _abcoll import KeysView, ValuesView, ItemsView
|
| 657 |
+
except ImportError:
|
| 658 |
+
pass
|
| 659 |
+
|
| 660 |
+
class OrderedDict(dict):
|
| 661 |
+
'Dictionary that remembers insertion order'
|
| 662 |
+
|
| 663 |
+
# An inherited dict maps keys to values.
|
| 664 |
+
# The inherited dict provides __getitem__, __len__, __contains__, and get.
|
| 665 |
+
# The remaining methods are order-aware.
|
| 666 |
+
# Big-O running times for all methods are the same as for regular dictionaries.
|
| 667 |
+
|
| 668 |
+
# The internal self.__map dictionary maps keys to links in a doubly linked list.
|
| 669 |
+
# The circular doubly linked list starts and ends with a sentinel element.
|
| 670 |
+
# The sentinel element never gets deleted (this simplifies the algorithm).
|
| 671 |
+
# Each link is stored as a list of length three: [PREV, NEXT, KEY].
|
| 672 |
+
|
| 673 |
+
def __init__(self, *args, **kwds):
|
| 674 |
+
'''Initialize an ordered dictionary. Signature is the same as for
|
| 675 |
+
regular dictionaries, but keyword arguments are not recommended
|
| 676 |
+
because their insertion order is arbitrary.
|
| 677 |
+
|
| 678 |
+
'''
|
| 679 |
+
if len(args) > 1:
|
| 680 |
+
raise TypeError('expected at most 1 arguments, got %d' %
|
| 681 |
+
len(args))
|
| 682 |
+
try:
|
| 683 |
+
self.__root
|
| 684 |
+
except AttributeError:
|
| 685 |
+
self.__root = root = [] # sentinel node
|
| 686 |
+
root[:] = [root, root, None]
|
| 687 |
+
self.__map = {}
|
| 688 |
+
self.__update(*args, **kwds)
|
| 689 |
+
|
| 690 |
+
def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
|
| 691 |
+
'od.__setitem__(i, y) <==> od[i]=y'
|
| 692 |
+
# Setting a new item creates a new link which goes at the end of the linked
|
| 693 |
+
# list, and the inherited dictionary is updated with the new key/value pair.
|
| 694 |
+
if key not in self:
|
| 695 |
+
root = self.__root
|
| 696 |
+
last = root[0]
|
| 697 |
+
last[1] = root[0] = self.__map[key] = [last, root, key]
|
| 698 |
+
dict_setitem(self, key, value)
|
| 699 |
+
|
| 700 |
+
def __delitem__(self, key, dict_delitem=dict.__delitem__):
|
| 701 |
+
'od.__delitem__(y) <==> del od[y]'
|
| 702 |
+
# Deleting an existing item uses self.__map to find the link which is
|
| 703 |
+
# then removed by updating the links in the predecessor and successor nodes.
|
| 704 |
+
dict_delitem(self, key)
|
| 705 |
+
link_prev, link_next, key = self.__map.pop(key)
|
| 706 |
+
link_prev[1] = link_next
|
| 707 |
+
link_next[0] = link_prev
|
| 708 |
+
|
| 709 |
+
def __iter__(self):
|
| 710 |
+
'od.__iter__() <==> iter(od)'
|
| 711 |
+
root = self.__root
|
| 712 |
+
curr = root[1]
|
| 713 |
+
while curr is not root:
|
| 714 |
+
yield curr[2]
|
| 715 |
+
curr = curr[1]
|
| 716 |
+
|
| 717 |
+
def __reversed__(self):
|
| 718 |
+
'od.__reversed__() <==> reversed(od)'
|
| 719 |
+
root = self.__root
|
| 720 |
+
curr = root[0]
|
| 721 |
+
while curr is not root:
|
| 722 |
+
yield curr[2]
|
| 723 |
+
curr = curr[0]
|
| 724 |
+
|
| 725 |
+
def clear(self):
|
| 726 |
+
'od.clear() -> None. Remove all items from od.'
|
| 727 |
+
try:
|
| 728 |
+
for node in self.__map.itervalues():
|
| 729 |
+
del node[:]
|
| 730 |
+
root = self.__root
|
| 731 |
+
root[:] = [root, root, None]
|
| 732 |
+
self.__map.clear()
|
| 733 |
+
except AttributeError:
|
| 734 |
+
pass
|
| 735 |
+
dict.clear(self)
|
| 736 |
+
|
| 737 |
+
def popitem(self, last=True):
|
| 738 |
+
'''od.popitem() -> (k, v), return and remove a (key, value) pair.
|
| 739 |
+
Pairs are returned in LIFO order if last is true or FIFO order if false.
|
| 740 |
+
|
| 741 |
+
'''
|
| 742 |
+
if not self:
|
| 743 |
+
raise KeyError('dictionary is empty')
|
| 744 |
+
root = self.__root
|
| 745 |
+
if last:
|
| 746 |
+
link = root[0]
|
| 747 |
+
link_prev = link[0]
|
| 748 |
+
link_prev[1] = root
|
| 749 |
+
root[0] = link_prev
|
| 750 |
+
else:
|
| 751 |
+
link = root[1]
|
| 752 |
+
link_next = link[1]
|
| 753 |
+
root[1] = link_next
|
| 754 |
+
link_next[0] = root
|
| 755 |
+
key = link[2]
|
| 756 |
+
del self.__map[key]
|
| 757 |
+
value = dict.pop(self, key)
|
| 758 |
+
return key, value
|
| 759 |
+
|
| 760 |
+
# -- the following methods do not depend on the internal structure --
|
| 761 |
+
|
| 762 |
+
def keys(self):
|
| 763 |
+
'od.keys() -> list of keys in od'
|
| 764 |
+
return list(self)
|
| 765 |
+
|
| 766 |
+
def values(self):
|
| 767 |
+
'od.values() -> list of values in od'
|
| 768 |
+
return [self[key] for key in self]
|
| 769 |
+
|
| 770 |
+
def items(self):
|
| 771 |
+
'od.items() -> list of (key, value) pairs in od'
|
| 772 |
+
return [(key, self[key]) for key in self]
|
| 773 |
+
|
| 774 |
+
def iterkeys(self):
|
| 775 |
+
'od.iterkeys() -> an iterator over the keys in od'
|
| 776 |
+
return iter(self)
|
| 777 |
+
|
| 778 |
+
def itervalues(self):
|
| 779 |
+
'od.itervalues -> an iterator over the values in od'
|
| 780 |
+
for k in self:
|
| 781 |
+
yield self[k]
|
| 782 |
+
|
| 783 |
+
def iteritems(self):
|
| 784 |
+
'od.iteritems -> an iterator over the (key, value) items in od'
|
| 785 |
+
for k in self:
|
| 786 |
+
yield (k, self[k])
|
| 787 |
+
|
| 788 |
+
def update(*args, **kwds):
|
| 789 |
+
'''od.update(E, **F) -> None. Update od from dict/iterable E and F.
|
| 790 |
+
|
| 791 |
+
If E is a dict instance, does: for k in E: od[k] = E[k]
|
| 792 |
+
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k]
|
| 793 |
+
Or if E is an iterable of items, does: for k, v in E: od[k] = v
|
| 794 |
+
In either case, this is followed by: for k, v in F.items(): od[k] = v
|
| 795 |
+
|
| 796 |
+
'''
|
| 797 |
+
if len(args) > 2:
|
| 798 |
+
raise TypeError('update() takes at most 2 positional '
|
| 799 |
+
'arguments (%d given)' % (len(args), ))
|
| 800 |
+
elif not args:
|
| 801 |
+
raise TypeError('update() takes at least 1 argument (0 given)')
|
| 802 |
+
self = args[0]
|
| 803 |
+
# Make progressively weaker assumptions about "other"
|
| 804 |
+
other = ()
|
| 805 |
+
if len(args) == 2:
|
| 806 |
+
other = args[1]
|
| 807 |
+
if isinstance(other, dict):
|
| 808 |
+
for key in other:
|
| 809 |
+
self[key] = other[key]
|
| 810 |
+
elif hasattr(other, 'keys'):
|
| 811 |
+
for key in other.keys():
|
| 812 |
+
self[key] = other[key]
|
| 813 |
+
else:
|
| 814 |
+
for key, value in other:
|
| 815 |
+
self[key] = value
|
| 816 |
+
for key, value in kwds.items():
|
| 817 |
+
self[key] = value
|
| 818 |
+
|
| 819 |
+
__update = update # let subclasses override update without breaking __init__
|
| 820 |
+
|
| 821 |
+
__marker = object()
|
| 822 |
+
|
| 823 |
+
def pop(self, key, default=__marker):
|
| 824 |
+
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
| 825 |
+
If key is not found, d is returned if given, otherwise KeyError is raised.
|
| 826 |
+
|
| 827 |
+
'''
|
| 828 |
+
if key in self:
|
| 829 |
+
result = self[key]
|
| 830 |
+
del self[key]
|
| 831 |
+
return result
|
| 832 |
+
if default is self.__marker:
|
| 833 |
+
raise KeyError(key)
|
| 834 |
+
return default
|
| 835 |
+
|
| 836 |
+
def setdefault(self, key, default=None):
|
| 837 |
+
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
|
| 838 |
+
if key in self:
|
| 839 |
+
return self[key]
|
| 840 |
+
self[key] = default
|
| 841 |
+
return default
|
| 842 |
+
|
| 843 |
+
def __repr__(self, _repr_running=None):
|
| 844 |
+
'od.__repr__() <==> repr(od)'
|
| 845 |
+
if not _repr_running:
|
| 846 |
+
_repr_running = {}
|
| 847 |
+
call_key = id(self), _get_ident()
|
| 848 |
+
if call_key in _repr_running:
|
| 849 |
+
return '...'
|
| 850 |
+
_repr_running[call_key] = 1
|
| 851 |
+
try:
|
| 852 |
+
if not self:
|
| 853 |
+
return '%s()' % (self.__class__.__name__, )
|
| 854 |
+
return '%s(%r)' % (self.__class__.__name__, self.items())
|
| 855 |
+
finally:
|
| 856 |
+
del _repr_running[call_key]
|
| 857 |
+
|
| 858 |
+
def __reduce__(self):
|
| 859 |
+
'Return state information for pickling'
|
| 860 |
+
items = [[k, self[k]] for k in self]
|
| 861 |
+
inst_dict = vars(self).copy()
|
| 862 |
+
for k in vars(OrderedDict()):
|
| 863 |
+
inst_dict.pop(k, None)
|
| 864 |
+
if inst_dict:
|
| 865 |
+
return (self.__class__, (items, ), inst_dict)
|
| 866 |
+
return self.__class__, (items, )
|
| 867 |
+
|
| 868 |
+
def copy(self):
|
| 869 |
+
'od.copy() -> a shallow copy of od'
|
| 870 |
+
return self.__class__(self)
|
| 871 |
+
|
| 872 |
+
@classmethod
|
| 873 |
+
def fromkeys(cls, iterable, value=None):
|
| 874 |
+
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
|
| 875 |
+
and values equal to v (which defaults to None).
|
| 876 |
+
|
| 877 |
+
'''
|
| 878 |
+
d = cls()
|
| 879 |
+
for key in iterable:
|
| 880 |
+
d[key] = value
|
| 881 |
+
return d
|
| 882 |
+
|
| 883 |
+
def __eq__(self, other):
|
| 884 |
+
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
|
| 885 |
+
while comparison to a regular mapping is order-insensitive.
|
| 886 |
+
|
| 887 |
+
'''
|
| 888 |
+
if isinstance(other, OrderedDict):
|
| 889 |
+
return len(self) == len(
|
| 890 |
+
other) and self.items() == other.items()
|
| 891 |
+
return dict.__eq__(self, other)
|
| 892 |
+
|
| 893 |
+
def __ne__(self, other):
|
| 894 |
+
return not self == other
|
| 895 |
+
|
| 896 |
+
# -- the following methods are only used in Python 2.7 --
|
| 897 |
+
|
| 898 |
+
def viewkeys(self):
|
| 899 |
+
"od.viewkeys() -> a set-like object providing a view on od's keys"
|
| 900 |
+
return KeysView(self)
|
| 901 |
+
|
| 902 |
+
def viewvalues(self):
|
| 903 |
+
"od.viewvalues() -> an object providing a view on od's values"
|
| 904 |
+
return ValuesView(self)
|
| 905 |
+
|
| 906 |
+
def viewitems(self):
|
| 907 |
+
"od.viewitems() -> a set-like object providing a view on od's items"
|
| 908 |
+
return ItemsView(self)
|
| 909 |
+
|
| 910 |
+
|
| 911 |
+
try:
|
| 912 |
+
from logging.config import BaseConfigurator, valid_ident
|
| 913 |
+
except ImportError: # pragma: no cover
|
| 914 |
+
IDENTIFIER = re.compile('^[a-z_][a-z0-9_]*$', re.I)
|
| 915 |
+
|
| 916 |
+
def valid_ident(s):
|
| 917 |
+
m = IDENTIFIER.match(s)
|
| 918 |
+
if not m:
|
| 919 |
+
raise ValueError('Not a valid Python identifier: %r' % s)
|
| 920 |
+
return True
|
| 921 |
+
|
| 922 |
+
# The ConvertingXXX classes are wrappers around standard Python containers,
|
| 923 |
+
# and they serve to convert any suitable values in the container. The
|
| 924 |
+
# conversion converts base dicts, lists and tuples to their wrapped
|
| 925 |
+
# equivalents, whereas strings which match a conversion format are converted
|
| 926 |
+
# appropriately.
|
| 927 |
+
#
|
| 928 |
+
# Each wrapper should have a configurator attribute holding the actual
|
| 929 |
+
# configurator to use for conversion.
|
| 930 |
+
|
| 931 |
+
class ConvertingDict(dict):
|
| 932 |
+
"""A converting dictionary wrapper."""
|
| 933 |
+
|
| 934 |
+
def __getitem__(self, key):
|
| 935 |
+
value = dict.__getitem__(self, key)
|
| 936 |
+
result = self.configurator.convert(value)
|
| 937 |
+
# If the converted value is different, save for next time
|
| 938 |
+
if value is not result:
|
| 939 |
+
self[key] = result
|
| 940 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 941 |
+
ConvertingTuple):
|
| 942 |
+
result.parent = self
|
| 943 |
+
result.key = key
|
| 944 |
+
return result
|
| 945 |
+
|
| 946 |
+
def get(self, key, default=None):
|
| 947 |
+
value = dict.get(self, key, default)
|
| 948 |
+
result = self.configurator.convert(value)
|
| 949 |
+
# If the converted value is different, save for next time
|
| 950 |
+
if value is not result:
|
| 951 |
+
self[key] = result
|
| 952 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 953 |
+
ConvertingTuple):
|
| 954 |
+
result.parent = self
|
| 955 |
+
result.key = key
|
| 956 |
+
return result
|
| 957 |
+
|
| 958 |
+
def pop(self, key, default=None):
|
| 959 |
+
value = dict.pop(self, key, default)
|
| 960 |
+
result = self.configurator.convert(value)
|
| 961 |
+
if value is not result:
|
| 962 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 963 |
+
ConvertingTuple):
|
| 964 |
+
result.parent = self
|
| 965 |
+
result.key = key
|
| 966 |
+
return result
|
| 967 |
+
|
| 968 |
+
class ConvertingList(list):
|
| 969 |
+
"""A converting list wrapper."""
|
| 970 |
+
|
| 971 |
+
def __getitem__(self, key):
|
| 972 |
+
value = list.__getitem__(self, key)
|
| 973 |
+
result = self.configurator.convert(value)
|
| 974 |
+
# If the converted value is different, save for next time
|
| 975 |
+
if value is not result:
|
| 976 |
+
self[key] = result
|
| 977 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 978 |
+
ConvertingTuple):
|
| 979 |
+
result.parent = self
|
| 980 |
+
result.key = key
|
| 981 |
+
return result
|
| 982 |
+
|
| 983 |
+
def pop(self, idx=-1):
|
| 984 |
+
value = list.pop(self, idx)
|
| 985 |
+
result = self.configurator.convert(value)
|
| 986 |
+
if value is not result:
|
| 987 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 988 |
+
ConvertingTuple):
|
| 989 |
+
result.parent = self
|
| 990 |
+
return result
|
| 991 |
+
|
| 992 |
+
class ConvertingTuple(tuple):
|
| 993 |
+
"""A converting tuple wrapper."""
|
| 994 |
+
|
| 995 |
+
def __getitem__(self, key):
|
| 996 |
+
value = tuple.__getitem__(self, key)
|
| 997 |
+
result = self.configurator.convert(value)
|
| 998 |
+
if value is not result:
|
| 999 |
+
if type(result) in (ConvertingDict, ConvertingList,
|
| 1000 |
+
ConvertingTuple):
|
| 1001 |
+
result.parent = self
|
| 1002 |
+
result.key = key
|
| 1003 |
+
return result
|
| 1004 |
+
|
| 1005 |
+
class BaseConfigurator(object):
|
| 1006 |
+
"""
|
| 1007 |
+
The configurator base class which defines some useful defaults.
|
| 1008 |
+
"""
|
| 1009 |
+
|
| 1010 |
+
CONVERT_PATTERN = re.compile(r'^(?P<prefix>[a-z]+)://(?P<suffix>.*)$')
|
| 1011 |
+
|
| 1012 |
+
WORD_PATTERN = re.compile(r'^\s*(\w+)\s*')
|
| 1013 |
+
DOT_PATTERN = re.compile(r'^\.\s*(\w+)\s*')
|
| 1014 |
+
INDEX_PATTERN = re.compile(r'^\[\s*(\w+)\s*\]\s*')
|
| 1015 |
+
DIGIT_PATTERN = re.compile(r'^\d+$')
|
| 1016 |
+
|
| 1017 |
+
value_converters = {
|
| 1018 |
+
'ext': 'ext_convert',
|
| 1019 |
+
'cfg': 'cfg_convert',
|
| 1020 |
+
}
|
| 1021 |
+
|
| 1022 |
+
# We might want to use a different one, e.g. importlib
|
| 1023 |
+
importer = staticmethod(__import__)
|
| 1024 |
+
|
| 1025 |
+
def __init__(self, config):
|
| 1026 |
+
self.config = ConvertingDict(config)
|
| 1027 |
+
self.config.configurator = self
|
| 1028 |
+
|
| 1029 |
+
def resolve(self, s):
|
| 1030 |
+
"""
|
| 1031 |
+
Resolve strings to objects using standard import and attribute
|
| 1032 |
+
syntax.
|
| 1033 |
+
"""
|
| 1034 |
+
name = s.split('.')
|
| 1035 |
+
used = name.pop(0)
|
| 1036 |
+
try:
|
| 1037 |
+
found = self.importer(used)
|
| 1038 |
+
for frag in name:
|
| 1039 |
+
used += '.' + frag
|
| 1040 |
+
try:
|
| 1041 |
+
found = getattr(found, frag)
|
| 1042 |
+
except AttributeError:
|
| 1043 |
+
self.importer(used)
|
| 1044 |
+
found = getattr(found, frag)
|
| 1045 |
+
return found
|
| 1046 |
+
except ImportError:
|
| 1047 |
+
e, tb = sys.exc_info()[1:]
|
| 1048 |
+
v = ValueError('Cannot resolve %r: %s' % (s, e))
|
| 1049 |
+
v.__cause__, v.__traceback__ = e, tb
|
| 1050 |
+
raise v
|
| 1051 |
+
|
| 1052 |
+
def ext_convert(self, value):
|
| 1053 |
+
"""Default converter for the ext:// protocol."""
|
| 1054 |
+
return self.resolve(value)
|
| 1055 |
+
|
| 1056 |
+
def cfg_convert(self, value):
|
| 1057 |
+
"""Default converter for the cfg:// protocol."""
|
| 1058 |
+
rest = value
|
| 1059 |
+
m = self.WORD_PATTERN.match(rest)
|
| 1060 |
+
if m is None:
|
| 1061 |
+
raise ValueError("Unable to convert %r" % value)
|
| 1062 |
+
else:
|
| 1063 |
+
rest = rest[m.end():]
|
| 1064 |
+
d = self.config[m.groups()[0]]
|
| 1065 |
+
while rest:
|
| 1066 |
+
m = self.DOT_PATTERN.match(rest)
|
| 1067 |
+
if m:
|
| 1068 |
+
d = d[m.groups()[0]]
|
| 1069 |
+
else:
|
| 1070 |
+
m = self.INDEX_PATTERN.match(rest)
|
| 1071 |
+
if m:
|
| 1072 |
+
idx = m.groups()[0]
|
| 1073 |
+
if not self.DIGIT_PATTERN.match(idx):
|
| 1074 |
+
d = d[idx]
|
| 1075 |
+
else:
|
| 1076 |
+
try:
|
| 1077 |
+
n = int(
|
| 1078 |
+
idx
|
| 1079 |
+
) # try as number first (most likely)
|
| 1080 |
+
d = d[n]
|
| 1081 |
+
except TypeError:
|
| 1082 |
+
d = d[idx]
|
| 1083 |
+
if m:
|
| 1084 |
+
rest = rest[m.end():]
|
| 1085 |
+
else:
|
| 1086 |
+
raise ValueError('Unable to convert '
|
| 1087 |
+
'%r at %r' % (value, rest))
|
| 1088 |
+
# rest should be empty
|
| 1089 |
+
return d
|
| 1090 |
+
|
| 1091 |
+
def convert(self, value):
|
| 1092 |
+
"""
|
| 1093 |
+
Convert values to an appropriate type. dicts, lists and tuples are
|
| 1094 |
+
replaced by their converting alternatives. Strings are checked to
|
| 1095 |
+
see if they have a conversion format and are converted if they do.
|
| 1096 |
+
"""
|
| 1097 |
+
if not isinstance(value, ConvertingDict) and isinstance(
|
| 1098 |
+
value, dict):
|
| 1099 |
+
value = ConvertingDict(value)
|
| 1100 |
+
value.configurator = self
|
| 1101 |
+
elif not isinstance(value, ConvertingList) and isinstance(
|
| 1102 |
+
value, list):
|
| 1103 |
+
value = ConvertingList(value)
|
| 1104 |
+
value.configurator = self
|
| 1105 |
+
elif not isinstance(value, ConvertingTuple) and isinstance(value, tuple):
|
| 1106 |
+
value = ConvertingTuple(value)
|
| 1107 |
+
value.configurator = self
|
| 1108 |
+
elif isinstance(value, string_types):
|
| 1109 |
+
m = self.CONVERT_PATTERN.match(value)
|
| 1110 |
+
if m:
|
| 1111 |
+
d = m.groupdict()
|
| 1112 |
+
prefix = d['prefix']
|
| 1113 |
+
converter = self.value_converters.get(prefix, None)
|
| 1114 |
+
if converter:
|
| 1115 |
+
suffix = d['suffix']
|
| 1116 |
+
converter = getattr(self, converter)
|
| 1117 |
+
value = converter(suffix)
|
| 1118 |
+
return value
|
| 1119 |
+
|
| 1120 |
+
def configure_custom(self, config):
|
| 1121 |
+
"""Configure an object with a user-supplied factory."""
|
| 1122 |
+
c = config.pop('()')
|
| 1123 |
+
if not callable(c):
|
| 1124 |
+
c = self.resolve(c)
|
| 1125 |
+
props = config.pop('.', None)
|
| 1126 |
+
# Check for valid identifiers
|
| 1127 |
+
kwargs = dict([(k, config[k]) for k in config if valid_ident(k)])
|
| 1128 |
+
result = c(**kwargs)
|
| 1129 |
+
if props:
|
| 1130 |
+
for name, value in props.items():
|
| 1131 |
+
setattr(result, name, value)
|
| 1132 |
+
return result
|
| 1133 |
+
|
| 1134 |
+
def as_tuple(self, value):
|
| 1135 |
+
"""Utility function which converts lists to tuples."""
|
| 1136 |
+
if isinstance(value, list):
|
| 1137 |
+
value = tuple(value)
|
| 1138 |
+
return value
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/database.py
ADDED
|
@@ -0,0 +1,1359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 The Python Software Foundation.
|
| 4 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 5 |
+
#
|
| 6 |
+
"""PEP 376 implementation."""
|
| 7 |
+
|
| 8 |
+
from __future__ import unicode_literals
|
| 9 |
+
|
| 10 |
+
import base64
|
| 11 |
+
import codecs
|
| 12 |
+
import contextlib
|
| 13 |
+
import hashlib
|
| 14 |
+
import logging
|
| 15 |
+
import os
|
| 16 |
+
import posixpath
|
| 17 |
+
import sys
|
| 18 |
+
import zipimport
|
| 19 |
+
|
| 20 |
+
from . import DistlibException, resources
|
| 21 |
+
from .compat import StringIO
|
| 22 |
+
from .version import get_scheme, UnsupportedVersionError
|
| 23 |
+
from .metadata import (Metadata, METADATA_FILENAME, WHEEL_METADATA_FILENAME,
|
| 24 |
+
LEGACY_METADATA_FILENAME)
|
| 25 |
+
from .util import (parse_requirement, cached_property, parse_name_and_version,
|
| 26 |
+
read_exports, write_exports, CSVReader, CSVWriter)
|
| 27 |
+
|
| 28 |
+
__all__ = [
|
| 29 |
+
'Distribution', 'BaseInstalledDistribution', 'InstalledDistribution',
|
| 30 |
+
'EggInfoDistribution', 'DistributionPath'
|
| 31 |
+
]
|
| 32 |
+
|
| 33 |
+
logger = logging.getLogger(__name__)
|
| 34 |
+
|
| 35 |
+
EXPORTS_FILENAME = 'pydist-exports.json'
|
| 36 |
+
COMMANDS_FILENAME = 'pydist-commands.json'
|
| 37 |
+
|
| 38 |
+
DIST_FILES = ('INSTALLER', METADATA_FILENAME, 'RECORD', 'REQUESTED',
|
| 39 |
+
'RESOURCES', EXPORTS_FILENAME, 'SHARED')
|
| 40 |
+
|
| 41 |
+
DISTINFO_EXT = '.dist-info'
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class _Cache(object):
|
| 45 |
+
"""
|
| 46 |
+
A simple cache mapping names and .dist-info paths to distributions
|
| 47 |
+
"""
|
| 48 |
+
|
| 49 |
+
def __init__(self):
|
| 50 |
+
"""
|
| 51 |
+
Initialise an instance. There is normally one for each DistributionPath.
|
| 52 |
+
"""
|
| 53 |
+
self.name = {}
|
| 54 |
+
self.path = {}
|
| 55 |
+
self.generated = False
|
| 56 |
+
|
| 57 |
+
def clear(self):
|
| 58 |
+
"""
|
| 59 |
+
Clear the cache, setting it to its initial state.
|
| 60 |
+
"""
|
| 61 |
+
self.name.clear()
|
| 62 |
+
self.path.clear()
|
| 63 |
+
self.generated = False
|
| 64 |
+
|
| 65 |
+
def add(self, dist):
|
| 66 |
+
"""
|
| 67 |
+
Add a distribution to the cache.
|
| 68 |
+
:param dist: The distribution to add.
|
| 69 |
+
"""
|
| 70 |
+
if dist.path not in self.path:
|
| 71 |
+
self.path[dist.path] = dist
|
| 72 |
+
self.name.setdefault(dist.key, []).append(dist)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
class DistributionPath(object):
|
| 76 |
+
"""
|
| 77 |
+
Represents a set of distributions installed on a path (typically sys.path).
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
def __init__(self, path=None, include_egg=False):
|
| 81 |
+
"""
|
| 82 |
+
Create an instance from a path, optionally including legacy (distutils/
|
| 83 |
+
setuptools/distribute) distributions.
|
| 84 |
+
:param path: The path to use, as a list of directories. If not specified,
|
| 85 |
+
sys.path is used.
|
| 86 |
+
:param include_egg: If True, this instance will look for and return legacy
|
| 87 |
+
distributions as well as those based on PEP 376.
|
| 88 |
+
"""
|
| 89 |
+
if path is None:
|
| 90 |
+
path = sys.path
|
| 91 |
+
self.path = path
|
| 92 |
+
self._include_dist = True
|
| 93 |
+
self._include_egg = include_egg
|
| 94 |
+
|
| 95 |
+
self._cache = _Cache()
|
| 96 |
+
self._cache_egg = _Cache()
|
| 97 |
+
self._cache_enabled = True
|
| 98 |
+
self._scheme = get_scheme('default')
|
| 99 |
+
|
| 100 |
+
def _get_cache_enabled(self):
|
| 101 |
+
return self._cache_enabled
|
| 102 |
+
|
| 103 |
+
def _set_cache_enabled(self, value):
|
| 104 |
+
self._cache_enabled = value
|
| 105 |
+
|
| 106 |
+
cache_enabled = property(_get_cache_enabled, _set_cache_enabled)
|
| 107 |
+
|
| 108 |
+
def clear_cache(self):
|
| 109 |
+
"""
|
| 110 |
+
Clears the internal cache.
|
| 111 |
+
"""
|
| 112 |
+
self._cache.clear()
|
| 113 |
+
self._cache_egg.clear()
|
| 114 |
+
|
| 115 |
+
def _yield_distributions(self):
|
| 116 |
+
"""
|
| 117 |
+
Yield .dist-info and/or .egg(-info) distributions.
|
| 118 |
+
"""
|
| 119 |
+
# We need to check if we've seen some resources already, because on
|
| 120 |
+
# some Linux systems (e.g. some Debian/Ubuntu variants) there are
|
| 121 |
+
# symlinks which alias other files in the environment.
|
| 122 |
+
seen = set()
|
| 123 |
+
for path in self.path:
|
| 124 |
+
finder = resources.finder_for_path(path)
|
| 125 |
+
if finder is None:
|
| 126 |
+
continue
|
| 127 |
+
r = finder.find('')
|
| 128 |
+
if not r or not r.is_container:
|
| 129 |
+
continue
|
| 130 |
+
rset = sorted(r.resources)
|
| 131 |
+
for entry in rset:
|
| 132 |
+
r = finder.find(entry)
|
| 133 |
+
if not r or r.path in seen:
|
| 134 |
+
continue
|
| 135 |
+
try:
|
| 136 |
+
if self._include_dist and entry.endswith(DISTINFO_EXT):
|
| 137 |
+
possible_filenames = [
|
| 138 |
+
METADATA_FILENAME, WHEEL_METADATA_FILENAME,
|
| 139 |
+
LEGACY_METADATA_FILENAME
|
| 140 |
+
]
|
| 141 |
+
for metadata_filename in possible_filenames:
|
| 142 |
+
metadata_path = posixpath.join(
|
| 143 |
+
entry, metadata_filename)
|
| 144 |
+
pydist = finder.find(metadata_path)
|
| 145 |
+
if pydist:
|
| 146 |
+
break
|
| 147 |
+
else:
|
| 148 |
+
continue
|
| 149 |
+
|
| 150 |
+
with contextlib.closing(pydist.as_stream()) as stream:
|
| 151 |
+
metadata = Metadata(fileobj=stream,
|
| 152 |
+
scheme='legacy')
|
| 153 |
+
logger.debug('Found %s', r.path)
|
| 154 |
+
seen.add(r.path)
|
| 155 |
+
yield new_dist_class(r.path,
|
| 156 |
+
metadata=metadata,
|
| 157 |
+
env=self)
|
| 158 |
+
elif self._include_egg and entry.endswith(
|
| 159 |
+
('.egg-info', '.egg')):
|
| 160 |
+
logger.debug('Found %s', r.path)
|
| 161 |
+
seen.add(r.path)
|
| 162 |
+
yield old_dist_class(r.path, self)
|
| 163 |
+
except Exception as e:
|
| 164 |
+
msg = 'Unable to read distribution at %s, perhaps due to bad metadata: %s'
|
| 165 |
+
logger.warning(msg, r.path, e)
|
| 166 |
+
import warnings
|
| 167 |
+
warnings.warn(msg % (r.path, e), stacklevel=2)
|
| 168 |
+
|
| 169 |
+
def _generate_cache(self):
|
| 170 |
+
"""
|
| 171 |
+
Scan the path for distributions and populate the cache with
|
| 172 |
+
those that are found.
|
| 173 |
+
"""
|
| 174 |
+
gen_dist = not self._cache.generated
|
| 175 |
+
gen_egg = self._include_egg and not self._cache_egg.generated
|
| 176 |
+
if gen_dist or gen_egg:
|
| 177 |
+
for dist in self._yield_distributions():
|
| 178 |
+
if isinstance(dist, InstalledDistribution):
|
| 179 |
+
self._cache.add(dist)
|
| 180 |
+
else:
|
| 181 |
+
self._cache_egg.add(dist)
|
| 182 |
+
|
| 183 |
+
if gen_dist:
|
| 184 |
+
self._cache.generated = True
|
| 185 |
+
if gen_egg:
|
| 186 |
+
self._cache_egg.generated = True
|
| 187 |
+
|
| 188 |
+
@classmethod
|
| 189 |
+
def distinfo_dirname(cls, name, version):
|
| 190 |
+
"""
|
| 191 |
+
The *name* and *version* parameters are converted into their
|
| 192 |
+
filename-escaped form, i.e. any ``'-'`` characters are replaced
|
| 193 |
+
with ``'_'`` other than the one in ``'dist-info'`` and the one
|
| 194 |
+
separating the name from the version number.
|
| 195 |
+
|
| 196 |
+
:parameter name: is converted to a standard distribution name by replacing
|
| 197 |
+
any runs of non- alphanumeric characters with a single
|
| 198 |
+
``'-'``.
|
| 199 |
+
:type name: string
|
| 200 |
+
:parameter version: is converted to a standard version string. Spaces
|
| 201 |
+
become dots, and all other non-alphanumeric characters
|
| 202 |
+
(except dots) become dashes, with runs of multiple
|
| 203 |
+
dashes condensed to a single dash.
|
| 204 |
+
:type version: string
|
| 205 |
+
:returns: directory name
|
| 206 |
+
:rtype: string"""
|
| 207 |
+
name = name.replace('-', '_')
|
| 208 |
+
return '-'.join([name, version]) + DISTINFO_EXT
|
| 209 |
+
|
| 210 |
+
def get_distributions(self):
|
| 211 |
+
"""
|
| 212 |
+
Provides an iterator that looks for distributions and returns
|
| 213 |
+
:class:`InstalledDistribution` or
|
| 214 |
+
:class:`EggInfoDistribution` instances for each one of them.
|
| 215 |
+
|
| 216 |
+
:rtype: iterator of :class:`InstalledDistribution` and
|
| 217 |
+
:class:`EggInfoDistribution` instances
|
| 218 |
+
"""
|
| 219 |
+
if not self._cache_enabled:
|
| 220 |
+
for dist in self._yield_distributions():
|
| 221 |
+
yield dist
|
| 222 |
+
else:
|
| 223 |
+
self._generate_cache()
|
| 224 |
+
|
| 225 |
+
for dist in self._cache.path.values():
|
| 226 |
+
yield dist
|
| 227 |
+
|
| 228 |
+
if self._include_egg:
|
| 229 |
+
for dist in self._cache_egg.path.values():
|
| 230 |
+
yield dist
|
| 231 |
+
|
| 232 |
+
def get_distribution(self, name):
|
| 233 |
+
"""
|
| 234 |
+
Looks for a named distribution on the path.
|
| 235 |
+
|
| 236 |
+
This function only returns the first result found, as no more than one
|
| 237 |
+
value is expected. If nothing is found, ``None`` is returned.
|
| 238 |
+
|
| 239 |
+
:rtype: :class:`InstalledDistribution`, :class:`EggInfoDistribution`
|
| 240 |
+
or ``None``
|
| 241 |
+
"""
|
| 242 |
+
result = None
|
| 243 |
+
name = name.lower()
|
| 244 |
+
if not self._cache_enabled:
|
| 245 |
+
for dist in self._yield_distributions():
|
| 246 |
+
if dist.key == name:
|
| 247 |
+
result = dist
|
| 248 |
+
break
|
| 249 |
+
else:
|
| 250 |
+
self._generate_cache()
|
| 251 |
+
|
| 252 |
+
if name in self._cache.name:
|
| 253 |
+
result = self._cache.name[name][0]
|
| 254 |
+
elif self._include_egg and name in self._cache_egg.name:
|
| 255 |
+
result = self._cache_egg.name[name][0]
|
| 256 |
+
return result
|
| 257 |
+
|
| 258 |
+
def provides_distribution(self, name, version=None):
|
| 259 |
+
"""
|
| 260 |
+
Iterates over all distributions to find which distributions provide *name*.
|
| 261 |
+
If a *version* is provided, it will be used to filter the results.
|
| 262 |
+
|
| 263 |
+
This function only returns the first result found, since no more than
|
| 264 |
+
one values are expected. If the directory is not found, returns ``None``.
|
| 265 |
+
|
| 266 |
+
:parameter version: a version specifier that indicates the version
|
| 267 |
+
required, conforming to the format in ``PEP-345``
|
| 268 |
+
|
| 269 |
+
:type name: string
|
| 270 |
+
:type version: string
|
| 271 |
+
"""
|
| 272 |
+
matcher = None
|
| 273 |
+
if version is not None:
|
| 274 |
+
try:
|
| 275 |
+
matcher = self._scheme.matcher('%s (%s)' % (name, version))
|
| 276 |
+
except ValueError:
|
| 277 |
+
raise DistlibException('invalid name or version: %r, %r' %
|
| 278 |
+
(name, version))
|
| 279 |
+
|
| 280 |
+
for dist in self.get_distributions():
|
| 281 |
+
# We hit a problem on Travis where enum34 was installed and doesn't
|
| 282 |
+
# have a provides attribute ...
|
| 283 |
+
if not hasattr(dist, 'provides'):
|
| 284 |
+
logger.debug('No "provides": %s', dist)
|
| 285 |
+
else:
|
| 286 |
+
provided = dist.provides
|
| 287 |
+
|
| 288 |
+
for p in provided:
|
| 289 |
+
p_name, p_ver = parse_name_and_version(p)
|
| 290 |
+
if matcher is None:
|
| 291 |
+
if p_name == name:
|
| 292 |
+
yield dist
|
| 293 |
+
break
|
| 294 |
+
else:
|
| 295 |
+
if p_name == name and matcher.match(p_ver):
|
| 296 |
+
yield dist
|
| 297 |
+
break
|
| 298 |
+
|
| 299 |
+
def get_file_path(self, name, relative_path):
|
| 300 |
+
"""
|
| 301 |
+
Return the path to a resource file.
|
| 302 |
+
"""
|
| 303 |
+
dist = self.get_distribution(name)
|
| 304 |
+
if dist is None:
|
| 305 |
+
raise LookupError('no distribution named %r found' % name)
|
| 306 |
+
return dist.get_resource_path(relative_path)
|
| 307 |
+
|
| 308 |
+
def get_exported_entries(self, category, name=None):
|
| 309 |
+
"""
|
| 310 |
+
Return all of the exported entries in a particular category.
|
| 311 |
+
|
| 312 |
+
:param category: The category to search for entries.
|
| 313 |
+
:param name: If specified, only entries with that name are returned.
|
| 314 |
+
"""
|
| 315 |
+
for dist in self.get_distributions():
|
| 316 |
+
r = dist.exports
|
| 317 |
+
if category in r:
|
| 318 |
+
d = r[category]
|
| 319 |
+
if name is not None:
|
| 320 |
+
if name in d:
|
| 321 |
+
yield d[name]
|
| 322 |
+
else:
|
| 323 |
+
for v in d.values():
|
| 324 |
+
yield v
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
class Distribution(object):
|
| 328 |
+
"""
|
| 329 |
+
A base class for distributions, whether installed or from indexes.
|
| 330 |
+
Either way, it must have some metadata, so that's all that's needed
|
| 331 |
+
for construction.
|
| 332 |
+
"""
|
| 333 |
+
|
| 334 |
+
build_time_dependency = False
|
| 335 |
+
"""
|
| 336 |
+
Set to True if it's known to be only a build-time dependency (i.e.
|
| 337 |
+
not needed after installation).
|
| 338 |
+
"""
|
| 339 |
+
|
| 340 |
+
requested = False
|
| 341 |
+
"""A boolean that indicates whether the ``REQUESTED`` metadata file is
|
| 342 |
+
present (in other words, whether the package was installed by user
|
| 343 |
+
request or it was installed as a dependency)."""
|
| 344 |
+
|
| 345 |
+
def __init__(self, metadata):
|
| 346 |
+
"""
|
| 347 |
+
Initialise an instance.
|
| 348 |
+
:param metadata: The instance of :class:`Metadata` describing this
|
| 349 |
+
distribution.
|
| 350 |
+
"""
|
| 351 |
+
self.metadata = metadata
|
| 352 |
+
self.name = metadata.name
|
| 353 |
+
self.key = self.name.lower() # for case-insensitive comparisons
|
| 354 |
+
self.version = metadata.version
|
| 355 |
+
self.locator = None
|
| 356 |
+
self.digest = None
|
| 357 |
+
self.extras = None # additional features requested
|
| 358 |
+
self.context = None # environment marker overrides
|
| 359 |
+
self.download_urls = set()
|
| 360 |
+
self.digests = {}
|
| 361 |
+
|
| 362 |
+
@property
|
| 363 |
+
def source_url(self):
|
| 364 |
+
"""
|
| 365 |
+
The source archive download URL for this distribution.
|
| 366 |
+
"""
|
| 367 |
+
return self.metadata.source_url
|
| 368 |
+
|
| 369 |
+
download_url = source_url # Backward compatibility
|
| 370 |
+
|
| 371 |
+
@property
|
| 372 |
+
def name_and_version(self):
|
| 373 |
+
"""
|
| 374 |
+
A utility property which displays the name and version in parentheses.
|
| 375 |
+
"""
|
| 376 |
+
return '%s (%s)' % (self.name, self.version)
|
| 377 |
+
|
| 378 |
+
@property
|
| 379 |
+
def provides(self):
|
| 380 |
+
"""
|
| 381 |
+
A set of distribution names and versions provided by this distribution.
|
| 382 |
+
:return: A set of "name (version)" strings.
|
| 383 |
+
"""
|
| 384 |
+
plist = self.metadata.provides
|
| 385 |
+
s = '%s (%s)' % (self.name, self.version)
|
| 386 |
+
if s not in plist:
|
| 387 |
+
plist.append(s)
|
| 388 |
+
return plist
|
| 389 |
+
|
| 390 |
+
def _get_requirements(self, req_attr):
|
| 391 |
+
md = self.metadata
|
| 392 |
+
reqts = getattr(md, req_attr)
|
| 393 |
+
logger.debug('%s: got requirements %r from metadata: %r', self.name,
|
| 394 |
+
req_attr, reqts)
|
| 395 |
+
return set(
|
| 396 |
+
md.get_requirements(reqts, extras=self.extras, env=self.context))
|
| 397 |
+
|
| 398 |
+
@property
|
| 399 |
+
def run_requires(self):
|
| 400 |
+
return self._get_requirements('run_requires')
|
| 401 |
+
|
| 402 |
+
@property
|
| 403 |
+
def meta_requires(self):
|
| 404 |
+
return self._get_requirements('meta_requires')
|
| 405 |
+
|
| 406 |
+
@property
|
| 407 |
+
def build_requires(self):
|
| 408 |
+
return self._get_requirements('build_requires')
|
| 409 |
+
|
| 410 |
+
@property
|
| 411 |
+
def test_requires(self):
|
| 412 |
+
return self._get_requirements('test_requires')
|
| 413 |
+
|
| 414 |
+
@property
|
| 415 |
+
def dev_requires(self):
|
| 416 |
+
return self._get_requirements('dev_requires')
|
| 417 |
+
|
| 418 |
+
def matches_requirement(self, req):
|
| 419 |
+
"""
|
| 420 |
+
Say if this instance matches (fulfills) a requirement.
|
| 421 |
+
:param req: The requirement to match.
|
| 422 |
+
:rtype req: str
|
| 423 |
+
:return: True if it matches, else False.
|
| 424 |
+
"""
|
| 425 |
+
# Requirement may contain extras - parse to lose those
|
| 426 |
+
# from what's passed to the matcher
|
| 427 |
+
r = parse_requirement(req)
|
| 428 |
+
scheme = get_scheme(self.metadata.scheme)
|
| 429 |
+
try:
|
| 430 |
+
matcher = scheme.matcher(r.requirement)
|
| 431 |
+
except UnsupportedVersionError:
|
| 432 |
+
# XXX compat-mode if cannot read the version
|
| 433 |
+
logger.warning('could not read version %r - using name only', req)
|
| 434 |
+
name = req.split()[0]
|
| 435 |
+
matcher = scheme.matcher(name)
|
| 436 |
+
|
| 437 |
+
name = matcher.key # case-insensitive
|
| 438 |
+
|
| 439 |
+
result = False
|
| 440 |
+
for p in self.provides:
|
| 441 |
+
p_name, p_ver = parse_name_and_version(p)
|
| 442 |
+
if p_name != name:
|
| 443 |
+
continue
|
| 444 |
+
try:
|
| 445 |
+
result = matcher.match(p_ver)
|
| 446 |
+
break
|
| 447 |
+
except UnsupportedVersionError:
|
| 448 |
+
pass
|
| 449 |
+
return result
|
| 450 |
+
|
| 451 |
+
def __repr__(self):
|
| 452 |
+
"""
|
| 453 |
+
Return a textual representation of this instance,
|
| 454 |
+
"""
|
| 455 |
+
if self.source_url:
|
| 456 |
+
suffix = ' [%s]' % self.source_url
|
| 457 |
+
else:
|
| 458 |
+
suffix = ''
|
| 459 |
+
return '<Distribution %s (%s)%s>' % (self.name, self.version, suffix)
|
| 460 |
+
|
| 461 |
+
def __eq__(self, other):
|
| 462 |
+
"""
|
| 463 |
+
See if this distribution is the same as another.
|
| 464 |
+
:param other: The distribution to compare with. To be equal to one
|
| 465 |
+
another. distributions must have the same type, name,
|
| 466 |
+
version and source_url.
|
| 467 |
+
:return: True if it is the same, else False.
|
| 468 |
+
"""
|
| 469 |
+
if type(other) is not type(self):
|
| 470 |
+
result = False
|
| 471 |
+
else:
|
| 472 |
+
result = (self.name == other.name and self.version == other.version
|
| 473 |
+
and self.source_url == other.source_url)
|
| 474 |
+
return result
|
| 475 |
+
|
| 476 |
+
def __hash__(self):
|
| 477 |
+
"""
|
| 478 |
+
Compute hash in a way which matches the equality test.
|
| 479 |
+
"""
|
| 480 |
+
return hash(self.name) + hash(self.version) + hash(self.source_url)
|
| 481 |
+
|
| 482 |
+
|
| 483 |
+
class BaseInstalledDistribution(Distribution):
|
| 484 |
+
"""
|
| 485 |
+
This is the base class for installed distributions (whether PEP 376 or
|
| 486 |
+
legacy).
|
| 487 |
+
"""
|
| 488 |
+
|
| 489 |
+
hasher = None
|
| 490 |
+
|
| 491 |
+
def __init__(self, metadata, path, env=None):
|
| 492 |
+
"""
|
| 493 |
+
Initialise an instance.
|
| 494 |
+
:param metadata: An instance of :class:`Metadata` which describes the
|
| 495 |
+
distribution. This will normally have been initialised
|
| 496 |
+
from a metadata file in the ``path``.
|
| 497 |
+
:param path: The path of the ``.dist-info`` or ``.egg-info``
|
| 498 |
+
directory for the distribution.
|
| 499 |
+
:param env: This is normally the :class:`DistributionPath`
|
| 500 |
+
instance where this distribution was found.
|
| 501 |
+
"""
|
| 502 |
+
super(BaseInstalledDistribution, self).__init__(metadata)
|
| 503 |
+
self.path = path
|
| 504 |
+
self.dist_path = env
|
| 505 |
+
|
| 506 |
+
def get_hash(self, data, hasher=None):
|
| 507 |
+
"""
|
| 508 |
+
Get the hash of some data, using a particular hash algorithm, if
|
| 509 |
+
specified.
|
| 510 |
+
|
| 511 |
+
:param data: The data to be hashed.
|
| 512 |
+
:type data: bytes
|
| 513 |
+
:param hasher: The name of a hash implementation, supported by hashlib,
|
| 514 |
+
or ``None``. Examples of valid values are ``'sha1'``,
|
| 515 |
+
``'sha224'``, ``'sha384'``, '``sha256'``, ``'md5'`` and
|
| 516 |
+
``'sha512'``. If no hasher is specified, the ``hasher``
|
| 517 |
+
attribute of the :class:`InstalledDistribution` instance
|
| 518 |
+
is used. If the hasher is determined to be ``None``, MD5
|
| 519 |
+
is used as the hashing algorithm.
|
| 520 |
+
:returns: The hash of the data. If a hasher was explicitly specified,
|
| 521 |
+
the returned hash will be prefixed with the specified hasher
|
| 522 |
+
followed by '='.
|
| 523 |
+
:rtype: str
|
| 524 |
+
"""
|
| 525 |
+
if hasher is None:
|
| 526 |
+
hasher = self.hasher
|
| 527 |
+
if hasher is None:
|
| 528 |
+
hasher = hashlib.md5
|
| 529 |
+
prefix = ''
|
| 530 |
+
else:
|
| 531 |
+
hasher = getattr(hashlib, hasher)
|
| 532 |
+
prefix = '%s=' % self.hasher
|
| 533 |
+
digest = hasher(data).digest()
|
| 534 |
+
digest = base64.urlsafe_b64encode(digest).rstrip(b'=').decode('ascii')
|
| 535 |
+
return '%s%s' % (prefix, digest)
|
| 536 |
+
|
| 537 |
+
|
| 538 |
+
class InstalledDistribution(BaseInstalledDistribution):
|
| 539 |
+
"""
|
| 540 |
+
Created with the *path* of the ``.dist-info`` directory provided to the
|
| 541 |
+
constructor. It reads the metadata contained in ``pydist.json`` when it is
|
| 542 |
+
instantiated., or uses a passed in Metadata instance (useful for when
|
| 543 |
+
dry-run mode is being used).
|
| 544 |
+
"""
|
| 545 |
+
|
| 546 |
+
hasher = 'sha256'
|
| 547 |
+
|
| 548 |
+
def __init__(self, path, metadata=None, env=None):
|
| 549 |
+
self.modules = []
|
| 550 |
+
self.finder = finder = resources.finder_for_path(path)
|
| 551 |
+
if finder is None:
|
| 552 |
+
raise ValueError('finder unavailable for %s' % path)
|
| 553 |
+
if env and env._cache_enabled and path in env._cache.path:
|
| 554 |
+
metadata = env._cache.path[path].metadata
|
| 555 |
+
elif metadata is None:
|
| 556 |
+
r = finder.find(METADATA_FILENAME)
|
| 557 |
+
# Temporary - for Wheel 0.23 support
|
| 558 |
+
if r is None:
|
| 559 |
+
r = finder.find(WHEEL_METADATA_FILENAME)
|
| 560 |
+
# Temporary - for legacy support
|
| 561 |
+
if r is None:
|
| 562 |
+
r = finder.find(LEGACY_METADATA_FILENAME)
|
| 563 |
+
if r is None:
|
| 564 |
+
raise ValueError('no %s found in %s' %
|
| 565 |
+
(METADATA_FILENAME, path))
|
| 566 |
+
with contextlib.closing(r.as_stream()) as stream:
|
| 567 |
+
metadata = Metadata(fileobj=stream, scheme='legacy')
|
| 568 |
+
|
| 569 |
+
super(InstalledDistribution, self).__init__(metadata, path, env)
|
| 570 |
+
|
| 571 |
+
if env and env._cache_enabled:
|
| 572 |
+
env._cache.add(self)
|
| 573 |
+
|
| 574 |
+
r = finder.find('REQUESTED')
|
| 575 |
+
self.requested = r is not None
|
| 576 |
+
p = os.path.join(path, 'top_level.txt')
|
| 577 |
+
if os.path.exists(p):
|
| 578 |
+
with open(p, 'rb') as f:
|
| 579 |
+
data = f.read().decode('utf-8')
|
| 580 |
+
self.modules = data.splitlines()
|
| 581 |
+
|
| 582 |
+
def __repr__(self):
|
| 583 |
+
return '<InstalledDistribution %r %s at %r>' % (
|
| 584 |
+
self.name, self.version, self.path)
|
| 585 |
+
|
| 586 |
+
def __str__(self):
|
| 587 |
+
return "%s %s" % (self.name, self.version)
|
| 588 |
+
|
| 589 |
+
def _get_records(self):
|
| 590 |
+
"""
|
| 591 |
+
Get the list of installed files for the distribution
|
| 592 |
+
:return: A list of tuples of path, hash and size. Note that hash and
|
| 593 |
+
size might be ``None`` for some entries. The path is exactly
|
| 594 |
+
as stored in the file (which is as in PEP 376).
|
| 595 |
+
"""
|
| 596 |
+
results = []
|
| 597 |
+
r = self.get_distinfo_resource('RECORD')
|
| 598 |
+
with contextlib.closing(r.as_stream()) as stream:
|
| 599 |
+
with CSVReader(stream=stream) as record_reader:
|
| 600 |
+
# Base location is parent dir of .dist-info dir
|
| 601 |
+
# base_location = os.path.dirname(self.path)
|
| 602 |
+
# base_location = os.path.abspath(base_location)
|
| 603 |
+
for row in record_reader:
|
| 604 |
+
missing = [None for i in range(len(row), 3)]
|
| 605 |
+
path, checksum, size = row + missing
|
| 606 |
+
# if not os.path.isabs(path):
|
| 607 |
+
# path = path.replace('/', os.sep)
|
| 608 |
+
# path = os.path.join(base_location, path)
|
| 609 |
+
results.append((path, checksum, size))
|
| 610 |
+
return results
|
| 611 |
+
|
| 612 |
+
@cached_property
|
| 613 |
+
def exports(self):
|
| 614 |
+
"""
|
| 615 |
+
Return the information exported by this distribution.
|
| 616 |
+
:return: A dictionary of exports, mapping an export category to a dict
|
| 617 |
+
of :class:`ExportEntry` instances describing the individual
|
| 618 |
+
export entries, and keyed by name.
|
| 619 |
+
"""
|
| 620 |
+
result = {}
|
| 621 |
+
r = self.get_distinfo_resource(EXPORTS_FILENAME)
|
| 622 |
+
if r:
|
| 623 |
+
result = self.read_exports()
|
| 624 |
+
return result
|
| 625 |
+
|
| 626 |
+
def read_exports(self):
|
| 627 |
+
"""
|
| 628 |
+
Read exports data from a file in .ini format.
|
| 629 |
+
|
| 630 |
+
:return: A dictionary of exports, mapping an export category to a list
|
| 631 |
+
of :class:`ExportEntry` instances describing the individual
|
| 632 |
+
export entries.
|
| 633 |
+
"""
|
| 634 |
+
result = {}
|
| 635 |
+
r = self.get_distinfo_resource(EXPORTS_FILENAME)
|
| 636 |
+
if r:
|
| 637 |
+
with contextlib.closing(r.as_stream()) as stream:
|
| 638 |
+
result = read_exports(stream)
|
| 639 |
+
return result
|
| 640 |
+
|
| 641 |
+
def write_exports(self, exports):
|
| 642 |
+
"""
|
| 643 |
+
Write a dictionary of exports to a file in .ini format.
|
| 644 |
+
:param exports: A dictionary of exports, mapping an export category to
|
| 645 |
+
a list of :class:`ExportEntry` instances describing the
|
| 646 |
+
individual export entries.
|
| 647 |
+
"""
|
| 648 |
+
rf = self.get_distinfo_file(EXPORTS_FILENAME)
|
| 649 |
+
with open(rf, 'w') as f:
|
| 650 |
+
write_exports(exports, f)
|
| 651 |
+
|
| 652 |
+
def get_resource_path(self, relative_path):
|
| 653 |
+
"""
|
| 654 |
+
NOTE: This API may change in the future.
|
| 655 |
+
|
| 656 |
+
Return the absolute path to a resource file with the given relative
|
| 657 |
+
path.
|
| 658 |
+
|
| 659 |
+
:param relative_path: The path, relative to .dist-info, of the resource
|
| 660 |
+
of interest.
|
| 661 |
+
:return: The absolute path where the resource is to be found.
|
| 662 |
+
"""
|
| 663 |
+
r = self.get_distinfo_resource('RESOURCES')
|
| 664 |
+
with contextlib.closing(r.as_stream()) as stream:
|
| 665 |
+
with CSVReader(stream=stream) as resources_reader:
|
| 666 |
+
for relative, destination in resources_reader:
|
| 667 |
+
if relative == relative_path:
|
| 668 |
+
return destination
|
| 669 |
+
raise KeyError('no resource file with relative path %r '
|
| 670 |
+
'is installed' % relative_path)
|
| 671 |
+
|
| 672 |
+
def list_installed_files(self):
|
| 673 |
+
"""
|
| 674 |
+
Iterates over the ``RECORD`` entries and returns a tuple
|
| 675 |
+
``(path, hash, size)`` for each line.
|
| 676 |
+
|
| 677 |
+
:returns: iterator of (path, hash, size)
|
| 678 |
+
"""
|
| 679 |
+
for result in self._get_records():
|
| 680 |
+
yield result
|
| 681 |
+
|
| 682 |
+
def write_installed_files(self, paths, prefix, dry_run=False):
|
| 683 |
+
"""
|
| 684 |
+
Writes the ``RECORD`` file, using the ``paths`` iterable passed in. Any
|
| 685 |
+
existing ``RECORD`` file is silently overwritten.
|
| 686 |
+
|
| 687 |
+
prefix is used to determine when to write absolute paths.
|
| 688 |
+
"""
|
| 689 |
+
prefix = os.path.join(prefix, '')
|
| 690 |
+
base = os.path.dirname(self.path)
|
| 691 |
+
base_under_prefix = base.startswith(prefix)
|
| 692 |
+
base = os.path.join(base, '')
|
| 693 |
+
record_path = self.get_distinfo_file('RECORD')
|
| 694 |
+
logger.info('creating %s', record_path)
|
| 695 |
+
if dry_run:
|
| 696 |
+
return None
|
| 697 |
+
with CSVWriter(record_path) as writer:
|
| 698 |
+
for path in paths:
|
| 699 |
+
if os.path.isdir(path) or path.endswith(('.pyc', '.pyo')):
|
| 700 |
+
# do not put size and hash, as in PEP-376
|
| 701 |
+
hash_value = size = ''
|
| 702 |
+
else:
|
| 703 |
+
size = '%d' % os.path.getsize(path)
|
| 704 |
+
with open(path, 'rb') as fp:
|
| 705 |
+
hash_value = self.get_hash(fp.read())
|
| 706 |
+
if path.startswith(base) or (base_under_prefix
|
| 707 |
+
and path.startswith(prefix)):
|
| 708 |
+
path = os.path.relpath(path, base)
|
| 709 |
+
writer.writerow((path, hash_value, size))
|
| 710 |
+
|
| 711 |
+
# add the RECORD file itself
|
| 712 |
+
if record_path.startswith(base):
|
| 713 |
+
record_path = os.path.relpath(record_path, base)
|
| 714 |
+
writer.writerow((record_path, '', ''))
|
| 715 |
+
return record_path
|
| 716 |
+
|
| 717 |
+
def check_installed_files(self):
|
| 718 |
+
"""
|
| 719 |
+
Checks that the hashes and sizes of the files in ``RECORD`` are
|
| 720 |
+
matched by the files themselves. Returns a (possibly empty) list of
|
| 721 |
+
mismatches. Each entry in the mismatch list will be a tuple consisting
|
| 722 |
+
of the path, 'exists', 'size' or 'hash' according to what didn't match
|
| 723 |
+
(existence is checked first, then size, then hash), the expected
|
| 724 |
+
value and the actual value.
|
| 725 |
+
"""
|
| 726 |
+
mismatches = []
|
| 727 |
+
base = os.path.dirname(self.path)
|
| 728 |
+
record_path = self.get_distinfo_file('RECORD')
|
| 729 |
+
for path, hash_value, size in self.list_installed_files():
|
| 730 |
+
if not os.path.isabs(path):
|
| 731 |
+
path = os.path.join(base, path)
|
| 732 |
+
if path == record_path:
|
| 733 |
+
continue
|
| 734 |
+
if not os.path.exists(path):
|
| 735 |
+
mismatches.append((path, 'exists', True, False))
|
| 736 |
+
elif os.path.isfile(path):
|
| 737 |
+
actual_size = str(os.path.getsize(path))
|
| 738 |
+
if size and actual_size != size:
|
| 739 |
+
mismatches.append((path, 'size', size, actual_size))
|
| 740 |
+
elif hash_value:
|
| 741 |
+
if '=' in hash_value:
|
| 742 |
+
hasher = hash_value.split('=', 1)[0]
|
| 743 |
+
else:
|
| 744 |
+
hasher = None
|
| 745 |
+
|
| 746 |
+
with open(path, 'rb') as f:
|
| 747 |
+
actual_hash = self.get_hash(f.read(), hasher)
|
| 748 |
+
if actual_hash != hash_value:
|
| 749 |
+
mismatches.append(
|
| 750 |
+
(path, 'hash', hash_value, actual_hash))
|
| 751 |
+
return mismatches
|
| 752 |
+
|
| 753 |
+
@cached_property
|
| 754 |
+
def shared_locations(self):
|
| 755 |
+
"""
|
| 756 |
+
A dictionary of shared locations whose keys are in the set 'prefix',
|
| 757 |
+
'purelib', 'platlib', 'scripts', 'headers', 'data' and 'namespace'.
|
| 758 |
+
The corresponding value is the absolute path of that category for
|
| 759 |
+
this distribution, and takes into account any paths selected by the
|
| 760 |
+
user at installation time (e.g. via command-line arguments). In the
|
| 761 |
+
case of the 'namespace' key, this would be a list of absolute paths
|
| 762 |
+
for the roots of namespace packages in this distribution.
|
| 763 |
+
|
| 764 |
+
The first time this property is accessed, the relevant information is
|
| 765 |
+
read from the SHARED file in the .dist-info directory.
|
| 766 |
+
"""
|
| 767 |
+
result = {}
|
| 768 |
+
shared_path = os.path.join(self.path, 'SHARED')
|
| 769 |
+
if os.path.isfile(shared_path):
|
| 770 |
+
with codecs.open(shared_path, 'r', encoding='utf-8') as f:
|
| 771 |
+
lines = f.read().splitlines()
|
| 772 |
+
for line in lines:
|
| 773 |
+
key, value = line.split('=', 1)
|
| 774 |
+
if key == 'namespace':
|
| 775 |
+
result.setdefault(key, []).append(value)
|
| 776 |
+
else:
|
| 777 |
+
result[key] = value
|
| 778 |
+
return result
|
| 779 |
+
|
| 780 |
+
def write_shared_locations(self, paths, dry_run=False):
|
| 781 |
+
"""
|
| 782 |
+
Write shared location information to the SHARED file in .dist-info.
|
| 783 |
+
:param paths: A dictionary as described in the documentation for
|
| 784 |
+
:meth:`shared_locations`.
|
| 785 |
+
:param dry_run: If True, the action is logged but no file is actually
|
| 786 |
+
written.
|
| 787 |
+
:return: The path of the file written to.
|
| 788 |
+
"""
|
| 789 |
+
shared_path = os.path.join(self.path, 'SHARED')
|
| 790 |
+
logger.info('creating %s', shared_path)
|
| 791 |
+
if dry_run:
|
| 792 |
+
return None
|
| 793 |
+
lines = []
|
| 794 |
+
for key in ('prefix', 'lib', 'headers', 'scripts', 'data'):
|
| 795 |
+
path = paths[key]
|
| 796 |
+
if os.path.isdir(paths[key]):
|
| 797 |
+
lines.append('%s=%s' % (key, path))
|
| 798 |
+
for ns in paths.get('namespace', ()):
|
| 799 |
+
lines.append('namespace=%s' % ns)
|
| 800 |
+
|
| 801 |
+
with codecs.open(shared_path, 'w', encoding='utf-8') as f:
|
| 802 |
+
f.write('\n'.join(lines))
|
| 803 |
+
return shared_path
|
| 804 |
+
|
| 805 |
+
def get_distinfo_resource(self, path):
|
| 806 |
+
if path not in DIST_FILES:
|
| 807 |
+
raise DistlibException('invalid path for a dist-info file: '
|
| 808 |
+
'%r at %r' % (path, self.path))
|
| 809 |
+
finder = resources.finder_for_path(self.path)
|
| 810 |
+
if finder is None:
|
| 811 |
+
raise DistlibException('Unable to get a finder for %s' % self.path)
|
| 812 |
+
return finder.find(path)
|
| 813 |
+
|
| 814 |
+
def get_distinfo_file(self, path):
|
| 815 |
+
"""
|
| 816 |
+
Returns a path located under the ``.dist-info`` directory. Returns a
|
| 817 |
+
string representing the path.
|
| 818 |
+
|
| 819 |
+
:parameter path: a ``'/'``-separated path relative to the
|
| 820 |
+
``.dist-info`` directory or an absolute path;
|
| 821 |
+
If *path* is an absolute path and doesn't start
|
| 822 |
+
with the ``.dist-info`` directory path,
|
| 823 |
+
a :class:`DistlibException` is raised
|
| 824 |
+
:type path: str
|
| 825 |
+
:rtype: str
|
| 826 |
+
"""
|
| 827 |
+
# Check if it is an absolute path # XXX use relpath, add tests
|
| 828 |
+
if path.find(os.sep) >= 0:
|
| 829 |
+
# it's an absolute path?
|
| 830 |
+
distinfo_dirname, path = path.split(os.sep)[-2:]
|
| 831 |
+
if distinfo_dirname != self.path.split(os.sep)[-1]:
|
| 832 |
+
raise DistlibException(
|
| 833 |
+
'dist-info file %r does not belong to the %r %s '
|
| 834 |
+
'distribution' % (path, self.name, self.version))
|
| 835 |
+
|
| 836 |
+
# The file must be relative
|
| 837 |
+
if path not in DIST_FILES:
|
| 838 |
+
raise DistlibException('invalid path for a dist-info file: '
|
| 839 |
+
'%r at %r' % (path, self.path))
|
| 840 |
+
|
| 841 |
+
return os.path.join(self.path, path)
|
| 842 |
+
|
| 843 |
+
def list_distinfo_files(self):
|
| 844 |
+
"""
|
| 845 |
+
Iterates over the ``RECORD`` entries and returns paths for each line if
|
| 846 |
+
the path is pointing to a file located in the ``.dist-info`` directory
|
| 847 |
+
or one of its subdirectories.
|
| 848 |
+
|
| 849 |
+
:returns: iterator of paths
|
| 850 |
+
"""
|
| 851 |
+
base = os.path.dirname(self.path)
|
| 852 |
+
for path, checksum, size in self._get_records():
|
| 853 |
+
# XXX add separator or use real relpath algo
|
| 854 |
+
if not os.path.isabs(path):
|
| 855 |
+
path = os.path.join(base, path)
|
| 856 |
+
if path.startswith(self.path):
|
| 857 |
+
yield path
|
| 858 |
+
|
| 859 |
+
def __eq__(self, other):
|
| 860 |
+
return (isinstance(other, InstalledDistribution)
|
| 861 |
+
and self.path == other.path)
|
| 862 |
+
|
| 863 |
+
# See http://docs.python.org/reference/datamodel#object.__hash__
|
| 864 |
+
__hash__ = object.__hash__
|
| 865 |
+
|
| 866 |
+
|
| 867 |
+
class EggInfoDistribution(BaseInstalledDistribution):
|
| 868 |
+
"""Created with the *path* of the ``.egg-info`` directory or file provided
|
| 869 |
+
to the constructor. It reads the metadata contained in the file itself, or
|
| 870 |
+
if the given path happens to be a directory, the metadata is read from the
|
| 871 |
+
file ``PKG-INFO`` under that directory."""
|
| 872 |
+
|
| 873 |
+
requested = True # as we have no way of knowing, assume it was
|
| 874 |
+
shared_locations = {}
|
| 875 |
+
|
| 876 |
+
def __init__(self, path, env=None):
|
| 877 |
+
|
| 878 |
+
def set_name_and_version(s, n, v):
|
| 879 |
+
s.name = n
|
| 880 |
+
s.key = n.lower() # for case-insensitive comparisons
|
| 881 |
+
s.version = v
|
| 882 |
+
|
| 883 |
+
self.path = path
|
| 884 |
+
self.dist_path = env
|
| 885 |
+
if env and env._cache_enabled and path in env._cache_egg.path:
|
| 886 |
+
metadata = env._cache_egg.path[path].metadata
|
| 887 |
+
set_name_and_version(self, metadata.name, metadata.version)
|
| 888 |
+
else:
|
| 889 |
+
metadata = self._get_metadata(path)
|
| 890 |
+
|
| 891 |
+
# Need to be set before caching
|
| 892 |
+
set_name_and_version(self, metadata.name, metadata.version)
|
| 893 |
+
|
| 894 |
+
if env and env._cache_enabled:
|
| 895 |
+
env._cache_egg.add(self)
|
| 896 |
+
super(EggInfoDistribution, self).__init__(metadata, path, env)
|
| 897 |
+
|
| 898 |
+
def _get_metadata(self, path):
|
| 899 |
+
requires = None
|
| 900 |
+
|
| 901 |
+
def parse_requires_data(data):
|
| 902 |
+
"""Create a list of dependencies from a requires.txt file.
|
| 903 |
+
|
| 904 |
+
*data*: the contents of a setuptools-produced requires.txt file.
|
| 905 |
+
"""
|
| 906 |
+
reqs = []
|
| 907 |
+
lines = data.splitlines()
|
| 908 |
+
for line in lines:
|
| 909 |
+
line = line.strip()
|
| 910 |
+
# sectioned files have bare newlines (separating sections)
|
| 911 |
+
if not line: # pragma: no cover
|
| 912 |
+
continue
|
| 913 |
+
if line.startswith('['): # pragma: no cover
|
| 914 |
+
logger.warning(
|
| 915 |
+
'Unexpected line: quitting requirement scan: %r', line)
|
| 916 |
+
break
|
| 917 |
+
r = parse_requirement(line)
|
| 918 |
+
if not r: # pragma: no cover
|
| 919 |
+
logger.warning('Not recognised as a requirement: %r', line)
|
| 920 |
+
continue
|
| 921 |
+
if r.extras: # pragma: no cover
|
| 922 |
+
logger.warning('extra requirements in requires.txt are '
|
| 923 |
+
'not supported')
|
| 924 |
+
if not r.constraints:
|
| 925 |
+
reqs.append(r.name)
|
| 926 |
+
else:
|
| 927 |
+
cons = ', '.join('%s%s' % c for c in r.constraints)
|
| 928 |
+
reqs.append('%s (%s)' % (r.name, cons))
|
| 929 |
+
return reqs
|
| 930 |
+
|
| 931 |
+
def parse_requires_path(req_path):
|
| 932 |
+
"""Create a list of dependencies from a requires.txt file.
|
| 933 |
+
|
| 934 |
+
*req_path*: the path to a setuptools-produced requires.txt file.
|
| 935 |
+
"""
|
| 936 |
+
|
| 937 |
+
reqs = []
|
| 938 |
+
try:
|
| 939 |
+
with codecs.open(req_path, 'r', 'utf-8') as fp:
|
| 940 |
+
reqs = parse_requires_data(fp.read())
|
| 941 |
+
except IOError:
|
| 942 |
+
pass
|
| 943 |
+
return reqs
|
| 944 |
+
|
| 945 |
+
tl_path = tl_data = None
|
| 946 |
+
if path.endswith('.egg'):
|
| 947 |
+
if os.path.isdir(path):
|
| 948 |
+
p = os.path.join(path, 'EGG-INFO')
|
| 949 |
+
meta_path = os.path.join(p, 'PKG-INFO')
|
| 950 |
+
metadata = Metadata(path=meta_path, scheme='legacy')
|
| 951 |
+
req_path = os.path.join(p, 'requires.txt')
|
| 952 |
+
tl_path = os.path.join(p, 'top_level.txt')
|
| 953 |
+
requires = parse_requires_path(req_path)
|
| 954 |
+
else:
|
| 955 |
+
# FIXME handle the case where zipfile is not available
|
| 956 |
+
zipf = zipimport.zipimporter(path)
|
| 957 |
+
fileobj = StringIO(
|
| 958 |
+
zipf.get_data('EGG-INFO/PKG-INFO').decode('utf8'))
|
| 959 |
+
metadata = Metadata(fileobj=fileobj, scheme='legacy')
|
| 960 |
+
try:
|
| 961 |
+
data = zipf.get_data('EGG-INFO/requires.txt')
|
| 962 |
+
tl_data = zipf.get_data('EGG-INFO/top_level.txt').decode(
|
| 963 |
+
'utf-8')
|
| 964 |
+
requires = parse_requires_data(data.decode('utf-8'))
|
| 965 |
+
except IOError:
|
| 966 |
+
requires = None
|
| 967 |
+
elif path.endswith('.egg-info'):
|
| 968 |
+
if os.path.isdir(path):
|
| 969 |
+
req_path = os.path.join(path, 'requires.txt')
|
| 970 |
+
requires = parse_requires_path(req_path)
|
| 971 |
+
path = os.path.join(path, 'PKG-INFO')
|
| 972 |
+
tl_path = os.path.join(path, 'top_level.txt')
|
| 973 |
+
metadata = Metadata(path=path, scheme='legacy')
|
| 974 |
+
else:
|
| 975 |
+
raise DistlibException('path must end with .egg-info or .egg, '
|
| 976 |
+
'got %r' % path)
|
| 977 |
+
|
| 978 |
+
if requires:
|
| 979 |
+
metadata.add_requirements(requires)
|
| 980 |
+
# look for top-level modules in top_level.txt, if present
|
| 981 |
+
if tl_data is None:
|
| 982 |
+
if tl_path is not None and os.path.exists(tl_path):
|
| 983 |
+
with open(tl_path, 'rb') as f:
|
| 984 |
+
tl_data = f.read().decode('utf-8')
|
| 985 |
+
if not tl_data:
|
| 986 |
+
tl_data = []
|
| 987 |
+
else:
|
| 988 |
+
tl_data = tl_data.splitlines()
|
| 989 |
+
self.modules = tl_data
|
| 990 |
+
return metadata
|
| 991 |
+
|
| 992 |
+
def __repr__(self):
|
| 993 |
+
return '<EggInfoDistribution %r %s at %r>' % (self.name, self.version,
|
| 994 |
+
self.path)
|
| 995 |
+
|
| 996 |
+
def __str__(self):
|
| 997 |
+
return "%s %s" % (self.name, self.version)
|
| 998 |
+
|
| 999 |
+
def check_installed_files(self):
|
| 1000 |
+
"""
|
| 1001 |
+
Checks that the hashes and sizes of the files in ``RECORD`` are
|
| 1002 |
+
matched by the files themselves. Returns a (possibly empty) list of
|
| 1003 |
+
mismatches. Each entry in the mismatch list will be a tuple consisting
|
| 1004 |
+
of the path, 'exists', 'size' or 'hash' according to what didn't match
|
| 1005 |
+
(existence is checked first, then size, then hash), the expected
|
| 1006 |
+
value and the actual value.
|
| 1007 |
+
"""
|
| 1008 |
+
mismatches = []
|
| 1009 |
+
record_path = os.path.join(self.path, 'installed-files.txt')
|
| 1010 |
+
if os.path.exists(record_path):
|
| 1011 |
+
for path, _, _ in self.list_installed_files():
|
| 1012 |
+
if path == record_path:
|
| 1013 |
+
continue
|
| 1014 |
+
if not os.path.exists(path):
|
| 1015 |
+
mismatches.append((path, 'exists', True, False))
|
| 1016 |
+
return mismatches
|
| 1017 |
+
|
| 1018 |
+
def list_installed_files(self):
|
| 1019 |
+
"""
|
| 1020 |
+
Iterates over the ``installed-files.txt`` entries and returns a tuple
|
| 1021 |
+
``(path, hash, size)`` for each line.
|
| 1022 |
+
|
| 1023 |
+
:returns: a list of (path, hash, size)
|
| 1024 |
+
"""
|
| 1025 |
+
|
| 1026 |
+
def _md5(path):
|
| 1027 |
+
f = open(path, 'rb')
|
| 1028 |
+
try:
|
| 1029 |
+
content = f.read()
|
| 1030 |
+
finally:
|
| 1031 |
+
f.close()
|
| 1032 |
+
return hashlib.md5(content).hexdigest()
|
| 1033 |
+
|
| 1034 |
+
def _size(path):
|
| 1035 |
+
return os.stat(path).st_size
|
| 1036 |
+
|
| 1037 |
+
record_path = os.path.join(self.path, 'installed-files.txt')
|
| 1038 |
+
result = []
|
| 1039 |
+
if os.path.exists(record_path):
|
| 1040 |
+
with codecs.open(record_path, 'r', encoding='utf-8') as f:
|
| 1041 |
+
for line in f:
|
| 1042 |
+
line = line.strip()
|
| 1043 |
+
p = os.path.normpath(os.path.join(self.path, line))
|
| 1044 |
+
# "./" is present as a marker between installed files
|
| 1045 |
+
# and installation metadata files
|
| 1046 |
+
if not os.path.exists(p):
|
| 1047 |
+
logger.warning('Non-existent file: %s', p)
|
| 1048 |
+
if p.endswith(('.pyc', '.pyo')):
|
| 1049 |
+
continue
|
| 1050 |
+
# otherwise fall through and fail
|
| 1051 |
+
if not os.path.isdir(p):
|
| 1052 |
+
result.append((p, _md5(p), _size(p)))
|
| 1053 |
+
result.append((record_path, None, None))
|
| 1054 |
+
return result
|
| 1055 |
+
|
| 1056 |
+
def list_distinfo_files(self, absolute=False):
|
| 1057 |
+
"""
|
| 1058 |
+
Iterates over the ``installed-files.txt`` entries and returns paths for
|
| 1059 |
+
each line if the path is pointing to a file located in the
|
| 1060 |
+
``.egg-info`` directory or one of its subdirectories.
|
| 1061 |
+
|
| 1062 |
+
:parameter absolute: If *absolute* is ``True``, each returned path is
|
| 1063 |
+
transformed into a local absolute path. Otherwise the
|
| 1064 |
+
raw value from ``installed-files.txt`` is returned.
|
| 1065 |
+
:type absolute: boolean
|
| 1066 |
+
:returns: iterator of paths
|
| 1067 |
+
"""
|
| 1068 |
+
record_path = os.path.join(self.path, 'installed-files.txt')
|
| 1069 |
+
if os.path.exists(record_path):
|
| 1070 |
+
skip = True
|
| 1071 |
+
with codecs.open(record_path, 'r', encoding='utf-8') as f:
|
| 1072 |
+
for line in f:
|
| 1073 |
+
line = line.strip()
|
| 1074 |
+
if line == './':
|
| 1075 |
+
skip = False
|
| 1076 |
+
continue
|
| 1077 |
+
if not skip:
|
| 1078 |
+
p = os.path.normpath(os.path.join(self.path, line))
|
| 1079 |
+
if p.startswith(self.path):
|
| 1080 |
+
if absolute:
|
| 1081 |
+
yield p
|
| 1082 |
+
else:
|
| 1083 |
+
yield line
|
| 1084 |
+
|
| 1085 |
+
def __eq__(self, other):
|
| 1086 |
+
return (isinstance(other, EggInfoDistribution)
|
| 1087 |
+
and self.path == other.path)
|
| 1088 |
+
|
| 1089 |
+
# See http://docs.python.org/reference/datamodel#object.__hash__
|
| 1090 |
+
__hash__ = object.__hash__
|
| 1091 |
+
|
| 1092 |
+
|
| 1093 |
+
new_dist_class = InstalledDistribution
|
| 1094 |
+
old_dist_class = EggInfoDistribution
|
| 1095 |
+
|
| 1096 |
+
|
| 1097 |
+
class DependencyGraph(object):
|
| 1098 |
+
"""
|
| 1099 |
+
Represents a dependency graph between distributions.
|
| 1100 |
+
|
| 1101 |
+
The dependency relationships are stored in an ``adjacency_list`` that maps
|
| 1102 |
+
distributions to a list of ``(other, label)`` tuples where ``other``
|
| 1103 |
+
is a distribution and the edge is labeled with ``label`` (i.e. the version
|
| 1104 |
+
specifier, if such was provided). Also, for more efficient traversal, for
|
| 1105 |
+
every distribution ``x``, a list of predecessors is kept in
|
| 1106 |
+
``reverse_list[x]``. An edge from distribution ``a`` to
|
| 1107 |
+
distribution ``b`` means that ``a`` depends on ``b``. If any missing
|
| 1108 |
+
dependencies are found, they are stored in ``missing``, which is a
|
| 1109 |
+
dictionary that maps distributions to a list of requirements that were not
|
| 1110 |
+
provided by any other distributions.
|
| 1111 |
+
"""
|
| 1112 |
+
|
| 1113 |
+
def __init__(self):
|
| 1114 |
+
self.adjacency_list = {}
|
| 1115 |
+
self.reverse_list = {}
|
| 1116 |
+
self.missing = {}
|
| 1117 |
+
|
| 1118 |
+
def add_distribution(self, distribution):
|
| 1119 |
+
"""Add the *distribution* to the graph.
|
| 1120 |
+
|
| 1121 |
+
:type distribution: :class:`distutils2.database.InstalledDistribution`
|
| 1122 |
+
or :class:`distutils2.database.EggInfoDistribution`
|
| 1123 |
+
"""
|
| 1124 |
+
self.adjacency_list[distribution] = []
|
| 1125 |
+
self.reverse_list[distribution] = []
|
| 1126 |
+
# self.missing[distribution] = []
|
| 1127 |
+
|
| 1128 |
+
def add_edge(self, x, y, label=None):
|
| 1129 |
+
"""Add an edge from distribution *x* to distribution *y* with the given
|
| 1130 |
+
*label*.
|
| 1131 |
+
|
| 1132 |
+
:type x: :class:`distutils2.database.InstalledDistribution` or
|
| 1133 |
+
:class:`distutils2.database.EggInfoDistribution`
|
| 1134 |
+
:type y: :class:`distutils2.database.InstalledDistribution` or
|
| 1135 |
+
:class:`distutils2.database.EggInfoDistribution`
|
| 1136 |
+
:type label: ``str`` or ``None``
|
| 1137 |
+
"""
|
| 1138 |
+
self.adjacency_list[x].append((y, label))
|
| 1139 |
+
# multiple edges are allowed, so be careful
|
| 1140 |
+
if x not in self.reverse_list[y]:
|
| 1141 |
+
self.reverse_list[y].append(x)
|
| 1142 |
+
|
| 1143 |
+
def add_missing(self, distribution, requirement):
|
| 1144 |
+
"""
|
| 1145 |
+
Add a missing *requirement* for the given *distribution*.
|
| 1146 |
+
|
| 1147 |
+
:type distribution: :class:`distutils2.database.InstalledDistribution`
|
| 1148 |
+
or :class:`distutils2.database.EggInfoDistribution`
|
| 1149 |
+
:type requirement: ``str``
|
| 1150 |
+
"""
|
| 1151 |
+
logger.debug('%s missing %r', distribution, requirement)
|
| 1152 |
+
self.missing.setdefault(distribution, []).append(requirement)
|
| 1153 |
+
|
| 1154 |
+
def _repr_dist(self, dist):
|
| 1155 |
+
return '%s %s' % (dist.name, dist.version)
|
| 1156 |
+
|
| 1157 |
+
def repr_node(self, dist, level=1):
|
| 1158 |
+
"""Prints only a subgraph"""
|
| 1159 |
+
output = [self._repr_dist(dist)]
|
| 1160 |
+
for other, label in self.adjacency_list[dist]:
|
| 1161 |
+
dist = self._repr_dist(other)
|
| 1162 |
+
if label is not None:
|
| 1163 |
+
dist = '%s [%s]' % (dist, label)
|
| 1164 |
+
output.append(' ' * level + str(dist))
|
| 1165 |
+
suboutput = self.repr_node(other, level + 1)
|
| 1166 |
+
subs = suboutput.split('\n')
|
| 1167 |
+
output.extend(subs[1:])
|
| 1168 |
+
return '\n'.join(output)
|
| 1169 |
+
|
| 1170 |
+
def to_dot(self, f, skip_disconnected=True):
|
| 1171 |
+
"""Writes a DOT output for the graph to the provided file *f*.
|
| 1172 |
+
|
| 1173 |
+
If *skip_disconnected* is set to ``True``, then all distributions
|
| 1174 |
+
that are not dependent on any other distribution are skipped.
|
| 1175 |
+
|
| 1176 |
+
:type f: has to support ``file``-like operations
|
| 1177 |
+
:type skip_disconnected: ``bool``
|
| 1178 |
+
"""
|
| 1179 |
+
disconnected = []
|
| 1180 |
+
|
| 1181 |
+
f.write("digraph dependencies {\n")
|
| 1182 |
+
for dist, adjs in self.adjacency_list.items():
|
| 1183 |
+
if len(adjs) == 0 and not skip_disconnected:
|
| 1184 |
+
disconnected.append(dist)
|
| 1185 |
+
for other, label in adjs:
|
| 1186 |
+
if label is not None:
|
| 1187 |
+
f.write('"%s" -> "%s" [label="%s"]\n' %
|
| 1188 |
+
(dist.name, other.name, label))
|
| 1189 |
+
else:
|
| 1190 |
+
f.write('"%s" -> "%s"\n' % (dist.name, other.name))
|
| 1191 |
+
if not skip_disconnected and len(disconnected) > 0:
|
| 1192 |
+
f.write('subgraph disconnected {\n')
|
| 1193 |
+
f.write('label = "Disconnected"\n')
|
| 1194 |
+
f.write('bgcolor = red\n')
|
| 1195 |
+
|
| 1196 |
+
for dist in disconnected:
|
| 1197 |
+
f.write('"%s"' % dist.name)
|
| 1198 |
+
f.write('\n')
|
| 1199 |
+
f.write('}\n')
|
| 1200 |
+
f.write('}\n')
|
| 1201 |
+
|
| 1202 |
+
def topological_sort(self):
|
| 1203 |
+
"""
|
| 1204 |
+
Perform a topological sort of the graph.
|
| 1205 |
+
:return: A tuple, the first element of which is a topologically sorted
|
| 1206 |
+
list of distributions, and the second element of which is a
|
| 1207 |
+
list of distributions that cannot be sorted because they have
|
| 1208 |
+
circular dependencies and so form a cycle.
|
| 1209 |
+
"""
|
| 1210 |
+
result = []
|
| 1211 |
+
# Make a shallow copy of the adjacency list
|
| 1212 |
+
alist = {}
|
| 1213 |
+
for k, v in self.adjacency_list.items():
|
| 1214 |
+
alist[k] = v[:]
|
| 1215 |
+
while True:
|
| 1216 |
+
# See what we can remove in this run
|
| 1217 |
+
to_remove = []
|
| 1218 |
+
for k, v in list(alist.items())[:]:
|
| 1219 |
+
if not v:
|
| 1220 |
+
to_remove.append(k)
|
| 1221 |
+
del alist[k]
|
| 1222 |
+
if not to_remove:
|
| 1223 |
+
# What's left in alist (if anything) is a cycle.
|
| 1224 |
+
break
|
| 1225 |
+
# Remove from the adjacency list of others
|
| 1226 |
+
for k, v in alist.items():
|
| 1227 |
+
alist[k] = [(d, r) for d, r in v if d not in to_remove]
|
| 1228 |
+
logger.debug('Moving to result: %s',
|
| 1229 |
+
['%s (%s)' % (d.name, d.version) for d in to_remove])
|
| 1230 |
+
result.extend(to_remove)
|
| 1231 |
+
return result, list(alist.keys())
|
| 1232 |
+
|
| 1233 |
+
def __repr__(self):
|
| 1234 |
+
"""Representation of the graph"""
|
| 1235 |
+
output = []
|
| 1236 |
+
for dist, adjs in self.adjacency_list.items():
|
| 1237 |
+
output.append(self.repr_node(dist))
|
| 1238 |
+
return '\n'.join(output)
|
| 1239 |
+
|
| 1240 |
+
|
| 1241 |
+
def make_graph(dists, scheme='default'):
|
| 1242 |
+
"""Makes a dependency graph from the given distributions.
|
| 1243 |
+
|
| 1244 |
+
:parameter dists: a list of distributions
|
| 1245 |
+
:type dists: list of :class:`distutils2.database.InstalledDistribution` and
|
| 1246 |
+
:class:`distutils2.database.EggInfoDistribution` instances
|
| 1247 |
+
:rtype: a :class:`DependencyGraph` instance
|
| 1248 |
+
"""
|
| 1249 |
+
scheme = get_scheme(scheme)
|
| 1250 |
+
graph = DependencyGraph()
|
| 1251 |
+
provided = {} # maps names to lists of (version, dist) tuples
|
| 1252 |
+
|
| 1253 |
+
# first, build the graph and find out what's provided
|
| 1254 |
+
for dist in dists:
|
| 1255 |
+
graph.add_distribution(dist)
|
| 1256 |
+
|
| 1257 |
+
for p in dist.provides:
|
| 1258 |
+
name, version = parse_name_and_version(p)
|
| 1259 |
+
logger.debug('Add to provided: %s, %s, %s', name, version, dist)
|
| 1260 |
+
provided.setdefault(name, []).append((version, dist))
|
| 1261 |
+
|
| 1262 |
+
# now make the edges
|
| 1263 |
+
for dist in dists:
|
| 1264 |
+
requires = (dist.run_requires | dist.meta_requires
|
| 1265 |
+
| dist.build_requires | dist.dev_requires)
|
| 1266 |
+
for req in requires:
|
| 1267 |
+
try:
|
| 1268 |
+
matcher = scheme.matcher(req)
|
| 1269 |
+
except UnsupportedVersionError:
|
| 1270 |
+
# XXX compat-mode if cannot read the version
|
| 1271 |
+
logger.warning('could not read version %r - using name only',
|
| 1272 |
+
req)
|
| 1273 |
+
name = req.split()[0]
|
| 1274 |
+
matcher = scheme.matcher(name)
|
| 1275 |
+
|
| 1276 |
+
name = matcher.key # case-insensitive
|
| 1277 |
+
|
| 1278 |
+
matched = False
|
| 1279 |
+
if name in provided:
|
| 1280 |
+
for version, provider in provided[name]:
|
| 1281 |
+
try:
|
| 1282 |
+
match = matcher.match(version)
|
| 1283 |
+
except UnsupportedVersionError:
|
| 1284 |
+
match = False
|
| 1285 |
+
|
| 1286 |
+
if match:
|
| 1287 |
+
graph.add_edge(dist, provider, req)
|
| 1288 |
+
matched = True
|
| 1289 |
+
break
|
| 1290 |
+
if not matched:
|
| 1291 |
+
graph.add_missing(dist, req)
|
| 1292 |
+
return graph
|
| 1293 |
+
|
| 1294 |
+
|
| 1295 |
+
def get_dependent_dists(dists, dist):
|
| 1296 |
+
"""Recursively generate a list of distributions from *dists* that are
|
| 1297 |
+
dependent on *dist*.
|
| 1298 |
+
|
| 1299 |
+
:param dists: a list of distributions
|
| 1300 |
+
:param dist: a distribution, member of *dists* for which we are interested
|
| 1301 |
+
"""
|
| 1302 |
+
if dist not in dists:
|
| 1303 |
+
raise DistlibException('given distribution %r is not a member '
|
| 1304 |
+
'of the list' % dist.name)
|
| 1305 |
+
graph = make_graph(dists)
|
| 1306 |
+
|
| 1307 |
+
dep = [dist] # dependent distributions
|
| 1308 |
+
todo = graph.reverse_list[dist] # list of nodes we should inspect
|
| 1309 |
+
|
| 1310 |
+
while todo:
|
| 1311 |
+
d = todo.pop()
|
| 1312 |
+
dep.append(d)
|
| 1313 |
+
for succ in graph.reverse_list[d]:
|
| 1314 |
+
if succ not in dep:
|
| 1315 |
+
todo.append(succ)
|
| 1316 |
+
|
| 1317 |
+
dep.pop(0) # remove dist from dep, was there to prevent infinite loops
|
| 1318 |
+
return dep
|
| 1319 |
+
|
| 1320 |
+
|
| 1321 |
+
def get_required_dists(dists, dist):
|
| 1322 |
+
"""Recursively generate a list of distributions from *dists* that are
|
| 1323 |
+
required by *dist*.
|
| 1324 |
+
|
| 1325 |
+
:param dists: a list of distributions
|
| 1326 |
+
:param dist: a distribution, member of *dists* for which we are interested
|
| 1327 |
+
in finding the dependencies.
|
| 1328 |
+
"""
|
| 1329 |
+
if dist not in dists:
|
| 1330 |
+
raise DistlibException('given distribution %r is not a member '
|
| 1331 |
+
'of the list' % dist.name)
|
| 1332 |
+
graph = make_graph(dists)
|
| 1333 |
+
|
| 1334 |
+
req = set() # required distributions
|
| 1335 |
+
todo = graph.adjacency_list[dist] # list of nodes we should inspect
|
| 1336 |
+
seen = set(t[0] for t in todo) # already added to todo
|
| 1337 |
+
|
| 1338 |
+
while todo:
|
| 1339 |
+
d = todo.pop()[0]
|
| 1340 |
+
req.add(d)
|
| 1341 |
+
pred_list = graph.adjacency_list[d]
|
| 1342 |
+
for pred in pred_list:
|
| 1343 |
+
d = pred[0]
|
| 1344 |
+
if d not in req and d not in seen:
|
| 1345 |
+
seen.add(d)
|
| 1346 |
+
todo.append(pred)
|
| 1347 |
+
return req
|
| 1348 |
+
|
| 1349 |
+
|
| 1350 |
+
def make_dist(name, version, **kwargs):
|
| 1351 |
+
"""
|
| 1352 |
+
A convenience method for making a dist given just a name and version.
|
| 1353 |
+
"""
|
| 1354 |
+
summary = kwargs.pop('summary', 'Placeholder for summary')
|
| 1355 |
+
md = Metadata(**kwargs)
|
| 1356 |
+
md.name = name
|
| 1357 |
+
md.version = version
|
| 1358 |
+
md.summary = summary or 'Placeholder for summary'
|
| 1359 |
+
return Distribution(md)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/index.py
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2013-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
import hashlib
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
import shutil
|
| 11 |
+
import subprocess
|
| 12 |
+
import tempfile
|
| 13 |
+
try:
|
| 14 |
+
from threading import Thread
|
| 15 |
+
except ImportError: # pragma: no cover
|
| 16 |
+
from dummy_threading import Thread
|
| 17 |
+
|
| 18 |
+
from . import DistlibException
|
| 19 |
+
from .compat import (HTTPBasicAuthHandler, Request, HTTPPasswordMgr,
|
| 20 |
+
urlparse, build_opener, string_types)
|
| 21 |
+
from .util import zip_dir, ServerProxy
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
DEFAULT_INDEX = 'https://pypi.org/pypi'
|
| 26 |
+
DEFAULT_REALM = 'pypi'
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
class PackageIndex(object):
|
| 30 |
+
"""
|
| 31 |
+
This class represents a package index compatible with PyPI, the Python
|
| 32 |
+
Package Index.
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
boundary = b'----------ThIs_Is_tHe_distlib_index_bouNdaRY_$'
|
| 36 |
+
|
| 37 |
+
def __init__(self, url=None):
|
| 38 |
+
"""
|
| 39 |
+
Initialise an instance.
|
| 40 |
+
|
| 41 |
+
:param url: The URL of the index. If not specified, the URL for PyPI is
|
| 42 |
+
used.
|
| 43 |
+
"""
|
| 44 |
+
self.url = url or DEFAULT_INDEX
|
| 45 |
+
self.read_configuration()
|
| 46 |
+
scheme, netloc, path, params, query, frag = urlparse(self.url)
|
| 47 |
+
if params or query or frag or scheme not in ('http', 'https'):
|
| 48 |
+
raise DistlibException('invalid repository: %s' % self.url)
|
| 49 |
+
self.password_handler = None
|
| 50 |
+
self.ssl_verifier = None
|
| 51 |
+
self.gpg = None
|
| 52 |
+
self.gpg_home = None
|
| 53 |
+
with open(os.devnull, 'w') as sink:
|
| 54 |
+
# Use gpg by default rather than gpg2, as gpg2 insists on
|
| 55 |
+
# prompting for passwords
|
| 56 |
+
for s in ('gpg', 'gpg2'):
|
| 57 |
+
try:
|
| 58 |
+
rc = subprocess.check_call([s, '--version'], stdout=sink,
|
| 59 |
+
stderr=sink)
|
| 60 |
+
if rc == 0:
|
| 61 |
+
self.gpg = s
|
| 62 |
+
break
|
| 63 |
+
except OSError:
|
| 64 |
+
pass
|
| 65 |
+
|
| 66 |
+
def _get_pypirc_command(self):
|
| 67 |
+
"""
|
| 68 |
+
Get the distutils command for interacting with PyPI configurations.
|
| 69 |
+
:return: the command.
|
| 70 |
+
"""
|
| 71 |
+
from .util import _get_pypirc_command as cmd
|
| 72 |
+
return cmd()
|
| 73 |
+
|
| 74 |
+
def read_configuration(self):
|
| 75 |
+
"""
|
| 76 |
+
Read the PyPI access configuration as supported by distutils. This populates
|
| 77 |
+
``username``, ``password``, ``realm`` and ``url`` attributes from the
|
| 78 |
+
configuration.
|
| 79 |
+
"""
|
| 80 |
+
from .util import _load_pypirc
|
| 81 |
+
cfg = _load_pypirc(self)
|
| 82 |
+
self.username = cfg.get('username')
|
| 83 |
+
self.password = cfg.get('password')
|
| 84 |
+
self.realm = cfg.get('realm', 'pypi')
|
| 85 |
+
self.url = cfg.get('repository', self.url)
|
| 86 |
+
|
| 87 |
+
def save_configuration(self):
|
| 88 |
+
"""
|
| 89 |
+
Save the PyPI access configuration. You must have set ``username`` and
|
| 90 |
+
``password`` attributes before calling this method.
|
| 91 |
+
"""
|
| 92 |
+
self.check_credentials()
|
| 93 |
+
from .util import _store_pypirc
|
| 94 |
+
_store_pypirc(self)
|
| 95 |
+
|
| 96 |
+
def check_credentials(self):
|
| 97 |
+
"""
|
| 98 |
+
Check that ``username`` and ``password`` have been set, and raise an
|
| 99 |
+
exception if not.
|
| 100 |
+
"""
|
| 101 |
+
if self.username is None or self.password is None:
|
| 102 |
+
raise DistlibException('username and password must be set')
|
| 103 |
+
pm = HTTPPasswordMgr()
|
| 104 |
+
_, netloc, _, _, _, _ = urlparse(self.url)
|
| 105 |
+
pm.add_password(self.realm, netloc, self.username, self.password)
|
| 106 |
+
self.password_handler = HTTPBasicAuthHandler(pm)
|
| 107 |
+
|
| 108 |
+
def register(self, metadata): # pragma: no cover
|
| 109 |
+
"""
|
| 110 |
+
Register a distribution on PyPI, using the provided metadata.
|
| 111 |
+
|
| 112 |
+
:param metadata: A :class:`Metadata` instance defining at least a name
|
| 113 |
+
and version number for the distribution to be
|
| 114 |
+
registered.
|
| 115 |
+
:return: The HTTP response received from PyPI upon submission of the
|
| 116 |
+
request.
|
| 117 |
+
"""
|
| 118 |
+
self.check_credentials()
|
| 119 |
+
metadata.validate()
|
| 120 |
+
d = metadata.todict()
|
| 121 |
+
d[':action'] = 'verify'
|
| 122 |
+
request = self.encode_request(d.items(), [])
|
| 123 |
+
self.send_request(request)
|
| 124 |
+
d[':action'] = 'submit'
|
| 125 |
+
request = self.encode_request(d.items(), [])
|
| 126 |
+
return self.send_request(request)
|
| 127 |
+
|
| 128 |
+
def _reader(self, name, stream, outbuf):
|
| 129 |
+
"""
|
| 130 |
+
Thread runner for reading lines of from a subprocess into a buffer.
|
| 131 |
+
|
| 132 |
+
:param name: The logical name of the stream (used for logging only).
|
| 133 |
+
:param stream: The stream to read from. This will typically a pipe
|
| 134 |
+
connected to the output stream of a subprocess.
|
| 135 |
+
:param outbuf: The list to append the read lines to.
|
| 136 |
+
"""
|
| 137 |
+
while True:
|
| 138 |
+
s = stream.readline()
|
| 139 |
+
if not s:
|
| 140 |
+
break
|
| 141 |
+
s = s.decode('utf-8').rstrip()
|
| 142 |
+
outbuf.append(s)
|
| 143 |
+
logger.debug('%s: %s' % (name, s))
|
| 144 |
+
stream.close()
|
| 145 |
+
|
| 146 |
+
def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover
|
| 147 |
+
"""
|
| 148 |
+
Return a suitable command for signing a file.
|
| 149 |
+
|
| 150 |
+
:param filename: The pathname to the file to be signed.
|
| 151 |
+
:param signer: The identifier of the signer of the file.
|
| 152 |
+
:param sign_password: The passphrase for the signer's
|
| 153 |
+
private key used for signing.
|
| 154 |
+
:param keystore: The path to a directory which contains the keys
|
| 155 |
+
used in verification. If not specified, the
|
| 156 |
+
instance's ``gpg_home`` attribute is used instead.
|
| 157 |
+
:return: The signing command as a list suitable to be
|
| 158 |
+
passed to :class:`subprocess.Popen`.
|
| 159 |
+
"""
|
| 160 |
+
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
| 161 |
+
if keystore is None:
|
| 162 |
+
keystore = self.gpg_home
|
| 163 |
+
if keystore:
|
| 164 |
+
cmd.extend(['--homedir', keystore])
|
| 165 |
+
if sign_password is not None:
|
| 166 |
+
cmd.extend(['--batch', '--passphrase-fd', '0'])
|
| 167 |
+
td = tempfile.mkdtemp()
|
| 168 |
+
sf = os.path.join(td, os.path.basename(filename) + '.asc')
|
| 169 |
+
cmd.extend(['--detach-sign', '--armor', '--local-user',
|
| 170 |
+
signer, '--output', sf, filename])
|
| 171 |
+
logger.debug('invoking: %s', ' '.join(cmd))
|
| 172 |
+
return cmd, sf
|
| 173 |
+
|
| 174 |
+
def run_command(self, cmd, input_data=None):
|
| 175 |
+
"""
|
| 176 |
+
Run a command in a child process , passing it any input data specified.
|
| 177 |
+
|
| 178 |
+
:param cmd: The command to run.
|
| 179 |
+
:param input_data: If specified, this must be a byte string containing
|
| 180 |
+
data to be sent to the child process.
|
| 181 |
+
:return: A tuple consisting of the subprocess' exit code, a list of
|
| 182 |
+
lines read from the subprocess' ``stdout``, and a list of
|
| 183 |
+
lines read from the subprocess' ``stderr``.
|
| 184 |
+
"""
|
| 185 |
+
kwargs = {
|
| 186 |
+
'stdout': subprocess.PIPE,
|
| 187 |
+
'stderr': subprocess.PIPE,
|
| 188 |
+
}
|
| 189 |
+
if input_data is not None:
|
| 190 |
+
kwargs['stdin'] = subprocess.PIPE
|
| 191 |
+
stdout = []
|
| 192 |
+
stderr = []
|
| 193 |
+
p = subprocess.Popen(cmd, **kwargs)
|
| 194 |
+
# We don't use communicate() here because we may need to
|
| 195 |
+
# get clever with interacting with the command
|
| 196 |
+
t1 = Thread(target=self._reader, args=('stdout', p.stdout, stdout))
|
| 197 |
+
t1.start()
|
| 198 |
+
t2 = Thread(target=self._reader, args=('stderr', p.stderr, stderr))
|
| 199 |
+
t2.start()
|
| 200 |
+
if input_data is not None:
|
| 201 |
+
p.stdin.write(input_data)
|
| 202 |
+
p.stdin.close()
|
| 203 |
+
|
| 204 |
+
p.wait()
|
| 205 |
+
t1.join()
|
| 206 |
+
t2.join()
|
| 207 |
+
return p.returncode, stdout, stderr
|
| 208 |
+
|
| 209 |
+
def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover
|
| 210 |
+
"""
|
| 211 |
+
Sign a file.
|
| 212 |
+
|
| 213 |
+
:param filename: The pathname to the file to be signed.
|
| 214 |
+
:param signer: The identifier of the signer of the file.
|
| 215 |
+
:param sign_password: The passphrase for the signer's
|
| 216 |
+
private key used for signing.
|
| 217 |
+
:param keystore: The path to a directory which contains the keys
|
| 218 |
+
used in signing. If not specified, the instance's
|
| 219 |
+
``gpg_home`` attribute is used instead.
|
| 220 |
+
:return: The absolute pathname of the file where the signature is
|
| 221 |
+
stored.
|
| 222 |
+
"""
|
| 223 |
+
cmd, sig_file = self.get_sign_command(filename, signer, sign_password,
|
| 224 |
+
keystore)
|
| 225 |
+
rc, stdout, stderr = self.run_command(cmd,
|
| 226 |
+
sign_password.encode('utf-8'))
|
| 227 |
+
if rc != 0:
|
| 228 |
+
raise DistlibException('sign command failed with error '
|
| 229 |
+
'code %s' % rc)
|
| 230 |
+
return sig_file
|
| 231 |
+
|
| 232 |
+
def upload_file(self, metadata, filename, signer=None, sign_password=None,
|
| 233 |
+
filetype='sdist', pyversion='source', keystore=None):
|
| 234 |
+
"""
|
| 235 |
+
Upload a release file to the index.
|
| 236 |
+
|
| 237 |
+
:param metadata: A :class:`Metadata` instance defining at least a name
|
| 238 |
+
and version number for the file to be uploaded.
|
| 239 |
+
:param filename: The pathname of the file to be uploaded.
|
| 240 |
+
:param signer: The identifier of the signer of the file.
|
| 241 |
+
:param sign_password: The passphrase for the signer's
|
| 242 |
+
private key used for signing.
|
| 243 |
+
:param filetype: The type of the file being uploaded. This is the
|
| 244 |
+
distutils command which produced that file, e.g.
|
| 245 |
+
``sdist`` or ``bdist_wheel``.
|
| 246 |
+
:param pyversion: The version of Python which the release relates
|
| 247 |
+
to. For code compatible with any Python, this would
|
| 248 |
+
be ``source``, otherwise it would be e.g. ``3.2``.
|
| 249 |
+
:param keystore: The path to a directory which contains the keys
|
| 250 |
+
used in signing. If not specified, the instance's
|
| 251 |
+
``gpg_home`` attribute is used instead.
|
| 252 |
+
:return: The HTTP response received from PyPI upon submission of the
|
| 253 |
+
request.
|
| 254 |
+
"""
|
| 255 |
+
self.check_credentials()
|
| 256 |
+
if not os.path.exists(filename):
|
| 257 |
+
raise DistlibException('not found: %s' % filename)
|
| 258 |
+
metadata.validate()
|
| 259 |
+
d = metadata.todict()
|
| 260 |
+
sig_file = None
|
| 261 |
+
if signer:
|
| 262 |
+
if not self.gpg:
|
| 263 |
+
logger.warning('no signing program available - not signed')
|
| 264 |
+
else:
|
| 265 |
+
sig_file = self.sign_file(filename, signer, sign_password,
|
| 266 |
+
keystore)
|
| 267 |
+
with open(filename, 'rb') as f:
|
| 268 |
+
file_data = f.read()
|
| 269 |
+
md5_digest = hashlib.md5(file_data).hexdigest()
|
| 270 |
+
sha256_digest = hashlib.sha256(file_data).hexdigest()
|
| 271 |
+
d.update({
|
| 272 |
+
':action': 'file_upload',
|
| 273 |
+
'protocol_version': '1',
|
| 274 |
+
'filetype': filetype,
|
| 275 |
+
'pyversion': pyversion,
|
| 276 |
+
'md5_digest': md5_digest,
|
| 277 |
+
'sha256_digest': sha256_digest,
|
| 278 |
+
})
|
| 279 |
+
files = [('content', os.path.basename(filename), file_data)]
|
| 280 |
+
if sig_file:
|
| 281 |
+
with open(sig_file, 'rb') as f:
|
| 282 |
+
sig_data = f.read()
|
| 283 |
+
files.append(('gpg_signature', os.path.basename(sig_file),
|
| 284 |
+
sig_data))
|
| 285 |
+
shutil.rmtree(os.path.dirname(sig_file))
|
| 286 |
+
request = self.encode_request(d.items(), files)
|
| 287 |
+
return self.send_request(request)
|
| 288 |
+
|
| 289 |
+
def upload_documentation(self, metadata, doc_dir): # pragma: no cover
|
| 290 |
+
"""
|
| 291 |
+
Upload documentation to the index.
|
| 292 |
+
|
| 293 |
+
:param metadata: A :class:`Metadata` instance defining at least a name
|
| 294 |
+
and version number for the documentation to be
|
| 295 |
+
uploaded.
|
| 296 |
+
:param doc_dir: The pathname of the directory which contains the
|
| 297 |
+
documentation. This should be the directory that
|
| 298 |
+
contains the ``index.html`` for the documentation.
|
| 299 |
+
:return: The HTTP response received from PyPI upon submission of the
|
| 300 |
+
request.
|
| 301 |
+
"""
|
| 302 |
+
self.check_credentials()
|
| 303 |
+
if not os.path.isdir(doc_dir):
|
| 304 |
+
raise DistlibException('not a directory: %r' % doc_dir)
|
| 305 |
+
fn = os.path.join(doc_dir, 'index.html')
|
| 306 |
+
if not os.path.exists(fn):
|
| 307 |
+
raise DistlibException('not found: %r' % fn)
|
| 308 |
+
metadata.validate()
|
| 309 |
+
name, version = metadata.name, metadata.version
|
| 310 |
+
zip_data = zip_dir(doc_dir).getvalue()
|
| 311 |
+
fields = [(':action', 'doc_upload'),
|
| 312 |
+
('name', name), ('version', version)]
|
| 313 |
+
files = [('content', name, zip_data)]
|
| 314 |
+
request = self.encode_request(fields, files)
|
| 315 |
+
return self.send_request(request)
|
| 316 |
+
|
| 317 |
+
def get_verify_command(self, signature_filename, data_filename,
|
| 318 |
+
keystore=None):
|
| 319 |
+
"""
|
| 320 |
+
Return a suitable command for verifying a file.
|
| 321 |
+
|
| 322 |
+
:param signature_filename: The pathname to the file containing the
|
| 323 |
+
signature.
|
| 324 |
+
:param data_filename: The pathname to the file containing the
|
| 325 |
+
signed data.
|
| 326 |
+
:param keystore: The path to a directory which contains the keys
|
| 327 |
+
used in verification. If not specified, the
|
| 328 |
+
instance's ``gpg_home`` attribute is used instead.
|
| 329 |
+
:return: The verifying command as a list suitable to be
|
| 330 |
+
passed to :class:`subprocess.Popen`.
|
| 331 |
+
"""
|
| 332 |
+
cmd = [self.gpg, '--status-fd', '2', '--no-tty']
|
| 333 |
+
if keystore is None:
|
| 334 |
+
keystore = self.gpg_home
|
| 335 |
+
if keystore:
|
| 336 |
+
cmd.extend(['--homedir', keystore])
|
| 337 |
+
cmd.extend(['--verify', signature_filename, data_filename])
|
| 338 |
+
logger.debug('invoking: %s', ' '.join(cmd))
|
| 339 |
+
return cmd
|
| 340 |
+
|
| 341 |
+
def verify_signature(self, signature_filename, data_filename,
|
| 342 |
+
keystore=None):
|
| 343 |
+
"""
|
| 344 |
+
Verify a signature for a file.
|
| 345 |
+
|
| 346 |
+
:param signature_filename: The pathname to the file containing the
|
| 347 |
+
signature.
|
| 348 |
+
:param data_filename: The pathname to the file containing the
|
| 349 |
+
signed data.
|
| 350 |
+
:param keystore: The path to a directory which contains the keys
|
| 351 |
+
used in verification. If not specified, the
|
| 352 |
+
instance's ``gpg_home`` attribute is used instead.
|
| 353 |
+
:return: True if the signature was verified, else False.
|
| 354 |
+
"""
|
| 355 |
+
if not self.gpg:
|
| 356 |
+
raise DistlibException('verification unavailable because gpg '
|
| 357 |
+
'unavailable')
|
| 358 |
+
cmd = self.get_verify_command(signature_filename, data_filename,
|
| 359 |
+
keystore)
|
| 360 |
+
rc, stdout, stderr = self.run_command(cmd)
|
| 361 |
+
if rc not in (0, 1):
|
| 362 |
+
raise DistlibException('verify command failed with error code %s' % rc)
|
| 363 |
+
return rc == 0
|
| 364 |
+
|
| 365 |
+
def download_file(self, url, destfile, digest=None, reporthook=None):
|
| 366 |
+
"""
|
| 367 |
+
This is a convenience method for downloading a file from an URL.
|
| 368 |
+
Normally, this will be a file from the index, though currently
|
| 369 |
+
no check is made for this (i.e. a file can be downloaded from
|
| 370 |
+
anywhere).
|
| 371 |
+
|
| 372 |
+
The method is just like the :func:`urlretrieve` function in the
|
| 373 |
+
standard library, except that it allows digest computation to be
|
| 374 |
+
done during download and checking that the downloaded data
|
| 375 |
+
matched any expected value.
|
| 376 |
+
|
| 377 |
+
:param url: The URL of the file to be downloaded (assumed to be
|
| 378 |
+
available via an HTTP GET request).
|
| 379 |
+
:param destfile: The pathname where the downloaded file is to be
|
| 380 |
+
saved.
|
| 381 |
+
:param digest: If specified, this must be a (hasher, value)
|
| 382 |
+
tuple, where hasher is the algorithm used (e.g.
|
| 383 |
+
``'md5'``) and ``value`` is the expected value.
|
| 384 |
+
:param reporthook: The same as for :func:`urlretrieve` in the
|
| 385 |
+
standard library.
|
| 386 |
+
"""
|
| 387 |
+
if digest is None:
|
| 388 |
+
digester = None
|
| 389 |
+
logger.debug('No digest specified')
|
| 390 |
+
else:
|
| 391 |
+
if isinstance(digest, (list, tuple)):
|
| 392 |
+
hasher, digest = digest
|
| 393 |
+
else:
|
| 394 |
+
hasher = 'md5'
|
| 395 |
+
digester = getattr(hashlib, hasher)()
|
| 396 |
+
logger.debug('Digest specified: %s' % digest)
|
| 397 |
+
# The following code is equivalent to urlretrieve.
|
| 398 |
+
# We need to do it this way so that we can compute the
|
| 399 |
+
# digest of the file as we go.
|
| 400 |
+
with open(destfile, 'wb') as dfp:
|
| 401 |
+
# addinfourl is not a context manager on 2.x
|
| 402 |
+
# so we have to use try/finally
|
| 403 |
+
sfp = self.send_request(Request(url))
|
| 404 |
+
try:
|
| 405 |
+
headers = sfp.info()
|
| 406 |
+
blocksize = 8192
|
| 407 |
+
size = -1
|
| 408 |
+
read = 0
|
| 409 |
+
blocknum = 0
|
| 410 |
+
if "content-length" in headers:
|
| 411 |
+
size = int(headers["Content-Length"])
|
| 412 |
+
if reporthook:
|
| 413 |
+
reporthook(blocknum, blocksize, size)
|
| 414 |
+
while True:
|
| 415 |
+
block = sfp.read(blocksize)
|
| 416 |
+
if not block:
|
| 417 |
+
break
|
| 418 |
+
read += len(block)
|
| 419 |
+
dfp.write(block)
|
| 420 |
+
if digester:
|
| 421 |
+
digester.update(block)
|
| 422 |
+
blocknum += 1
|
| 423 |
+
if reporthook:
|
| 424 |
+
reporthook(blocknum, blocksize, size)
|
| 425 |
+
finally:
|
| 426 |
+
sfp.close()
|
| 427 |
+
|
| 428 |
+
# check that we got the whole file, if we can
|
| 429 |
+
if size >= 0 and read < size:
|
| 430 |
+
raise DistlibException(
|
| 431 |
+
'retrieval incomplete: got only %d out of %d bytes'
|
| 432 |
+
% (read, size))
|
| 433 |
+
# if we have a digest, it must match.
|
| 434 |
+
if digester:
|
| 435 |
+
actual = digester.hexdigest()
|
| 436 |
+
if digest != actual:
|
| 437 |
+
raise DistlibException('%s digest mismatch for %s: expected '
|
| 438 |
+
'%s, got %s' % (hasher, destfile,
|
| 439 |
+
digest, actual))
|
| 440 |
+
logger.debug('Digest verified: %s', digest)
|
| 441 |
+
|
| 442 |
+
def send_request(self, req):
|
| 443 |
+
"""
|
| 444 |
+
Send a standard library :class:`Request` to PyPI and return its
|
| 445 |
+
response.
|
| 446 |
+
|
| 447 |
+
:param req: The request to send.
|
| 448 |
+
:return: The HTTP response from PyPI (a standard library HTTPResponse).
|
| 449 |
+
"""
|
| 450 |
+
handlers = []
|
| 451 |
+
if self.password_handler:
|
| 452 |
+
handlers.append(self.password_handler)
|
| 453 |
+
if self.ssl_verifier:
|
| 454 |
+
handlers.append(self.ssl_verifier)
|
| 455 |
+
opener = build_opener(*handlers)
|
| 456 |
+
return opener.open(req)
|
| 457 |
+
|
| 458 |
+
def encode_request(self, fields, files):
|
| 459 |
+
"""
|
| 460 |
+
Encode fields and files for posting to an HTTP server.
|
| 461 |
+
|
| 462 |
+
:param fields: The fields to send as a list of (fieldname, value)
|
| 463 |
+
tuples.
|
| 464 |
+
:param files: The files to send as a list of (fieldname, filename,
|
| 465 |
+
file_bytes) tuple.
|
| 466 |
+
"""
|
| 467 |
+
# Adapted from packaging, which in turn was adapted from
|
| 468 |
+
# http://code.activestate.com/recipes/146306
|
| 469 |
+
|
| 470 |
+
parts = []
|
| 471 |
+
boundary = self.boundary
|
| 472 |
+
for k, values in fields:
|
| 473 |
+
if not isinstance(values, (list, tuple)):
|
| 474 |
+
values = [values]
|
| 475 |
+
|
| 476 |
+
for v in values:
|
| 477 |
+
parts.extend((
|
| 478 |
+
b'--' + boundary,
|
| 479 |
+
('Content-Disposition: form-data; name="%s"' %
|
| 480 |
+
k).encode('utf-8'),
|
| 481 |
+
b'',
|
| 482 |
+
v.encode('utf-8')))
|
| 483 |
+
for key, filename, value in files:
|
| 484 |
+
parts.extend((
|
| 485 |
+
b'--' + boundary,
|
| 486 |
+
('Content-Disposition: form-data; name="%s"; filename="%s"' %
|
| 487 |
+
(key, filename)).encode('utf-8'),
|
| 488 |
+
b'',
|
| 489 |
+
value))
|
| 490 |
+
|
| 491 |
+
parts.extend((b'--' + boundary + b'--', b''))
|
| 492 |
+
|
| 493 |
+
body = b'\r\n'.join(parts)
|
| 494 |
+
ct = b'multipart/form-data; boundary=' + boundary
|
| 495 |
+
headers = {
|
| 496 |
+
'Content-type': ct,
|
| 497 |
+
'Content-length': str(len(body))
|
| 498 |
+
}
|
| 499 |
+
return Request(self.url, body, headers)
|
| 500 |
+
|
| 501 |
+
def search(self, terms, operator=None): # pragma: no cover
|
| 502 |
+
if isinstance(terms, string_types):
|
| 503 |
+
terms = {'name': terms}
|
| 504 |
+
rpc_proxy = ServerProxy(self.url, timeout=3.0)
|
| 505 |
+
try:
|
| 506 |
+
return rpc_proxy.search(terms, operator or 'and')
|
| 507 |
+
finally:
|
| 508 |
+
rpc_proxy('close')()
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/locators.py
ADDED
|
@@ -0,0 +1,1303 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
|
| 8 |
+
import gzip
|
| 9 |
+
from io import BytesIO
|
| 10 |
+
import json
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import posixpath
|
| 14 |
+
import re
|
| 15 |
+
try:
|
| 16 |
+
import threading
|
| 17 |
+
except ImportError: # pragma: no cover
|
| 18 |
+
import dummy_threading as threading
|
| 19 |
+
import zlib
|
| 20 |
+
|
| 21 |
+
from . import DistlibException
|
| 22 |
+
from .compat import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
|
| 23 |
+
queue, quote, unescape, build_opener,
|
| 24 |
+
HTTPRedirectHandler as BaseRedirectHandler, text_type,
|
| 25 |
+
Request, HTTPError, URLError)
|
| 26 |
+
from .database import Distribution, DistributionPath, make_dist
|
| 27 |
+
from .metadata import Metadata, MetadataInvalidError
|
| 28 |
+
from .util import (cached_property, ensure_slash, split_filename, get_project_data,
|
| 29 |
+
parse_requirement, parse_name_and_version, ServerProxy,
|
| 30 |
+
normalize_name)
|
| 31 |
+
from .version import get_scheme, UnsupportedVersionError
|
| 32 |
+
from .wheel import Wheel, is_compatible
|
| 33 |
+
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
HASHER_HASH = re.compile(r'^(\w+)=([a-f0-9]+)')
|
| 37 |
+
CHARSET = re.compile(r';\s*charset\s*=\s*(.*)\s*$', re.I)
|
| 38 |
+
HTML_CONTENT_TYPE = re.compile('text/html|application/x(ht)?ml')
|
| 39 |
+
DEFAULT_INDEX = 'https://pypi.org/pypi'
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def get_all_distribution_names(url=None):
|
| 43 |
+
"""
|
| 44 |
+
Return all distribution names known by an index.
|
| 45 |
+
:param url: The URL of the index.
|
| 46 |
+
:return: A list of all known distribution names.
|
| 47 |
+
"""
|
| 48 |
+
if url is None:
|
| 49 |
+
url = DEFAULT_INDEX
|
| 50 |
+
client = ServerProxy(url, timeout=3.0)
|
| 51 |
+
try:
|
| 52 |
+
return client.list_packages()
|
| 53 |
+
finally:
|
| 54 |
+
client('close')()
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
class RedirectHandler(BaseRedirectHandler):
|
| 58 |
+
"""
|
| 59 |
+
A class to work around a bug in some Python 3.2.x releases.
|
| 60 |
+
"""
|
| 61 |
+
# There's a bug in the base version for some 3.2.x
|
| 62 |
+
# (e.g. 3.2.2 on Ubuntu Oneiric). If a Location header
|
| 63 |
+
# returns e.g. /abc, it bails because it says the scheme ''
|
| 64 |
+
# is bogus, when actually it should use the request's
|
| 65 |
+
# URL for the scheme. See Python issue #13696.
|
| 66 |
+
def http_error_302(self, req, fp, code, msg, headers):
|
| 67 |
+
# Some servers (incorrectly) return multiple Location headers
|
| 68 |
+
# (so probably same goes for URI). Use first header.
|
| 69 |
+
newurl = None
|
| 70 |
+
for key in ('location', 'uri'):
|
| 71 |
+
if key in headers:
|
| 72 |
+
newurl = headers[key]
|
| 73 |
+
break
|
| 74 |
+
if newurl is None: # pragma: no cover
|
| 75 |
+
return
|
| 76 |
+
urlparts = urlparse(newurl)
|
| 77 |
+
if urlparts.scheme == '':
|
| 78 |
+
newurl = urljoin(req.get_full_url(), newurl)
|
| 79 |
+
if hasattr(headers, 'replace_header'):
|
| 80 |
+
headers.replace_header(key, newurl)
|
| 81 |
+
else:
|
| 82 |
+
headers[key] = newurl
|
| 83 |
+
return BaseRedirectHandler.http_error_302(self, req, fp, code, msg,
|
| 84 |
+
headers)
|
| 85 |
+
|
| 86 |
+
http_error_301 = http_error_303 = http_error_307 = http_error_302
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
class Locator(object):
|
| 90 |
+
"""
|
| 91 |
+
A base class for locators - things that locate distributions.
|
| 92 |
+
"""
|
| 93 |
+
source_extensions = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz')
|
| 94 |
+
binary_extensions = ('.egg', '.exe', '.whl')
|
| 95 |
+
excluded_extensions = ('.pdf',)
|
| 96 |
+
|
| 97 |
+
# A list of tags indicating which wheels you want to match. The default
|
| 98 |
+
# value of None matches against the tags compatible with the running
|
| 99 |
+
# Python. If you want to match other values, set wheel_tags on a locator
|
| 100 |
+
# instance to a list of tuples (pyver, abi, arch) which you want to match.
|
| 101 |
+
wheel_tags = None
|
| 102 |
+
|
| 103 |
+
downloadable_extensions = source_extensions + ('.whl',)
|
| 104 |
+
|
| 105 |
+
def __init__(self, scheme='default'):
|
| 106 |
+
"""
|
| 107 |
+
Initialise an instance.
|
| 108 |
+
:param scheme: Because locators look for most recent versions, they
|
| 109 |
+
need to know the version scheme to use. This specifies
|
| 110 |
+
the current PEP-recommended scheme - use ``'legacy'``
|
| 111 |
+
if you need to support existing distributions on PyPI.
|
| 112 |
+
"""
|
| 113 |
+
self._cache = {}
|
| 114 |
+
self.scheme = scheme
|
| 115 |
+
# Because of bugs in some of the handlers on some of the platforms,
|
| 116 |
+
# we use our own opener rather than just using urlopen.
|
| 117 |
+
self.opener = build_opener(RedirectHandler())
|
| 118 |
+
# If get_project() is called from locate(), the matcher instance
|
| 119 |
+
# is set from the requirement passed to locate(). See issue #18 for
|
| 120 |
+
# why this can be useful to know.
|
| 121 |
+
self.matcher = None
|
| 122 |
+
self.errors = queue.Queue()
|
| 123 |
+
|
| 124 |
+
def get_errors(self):
|
| 125 |
+
"""
|
| 126 |
+
Return any errors which have occurred.
|
| 127 |
+
"""
|
| 128 |
+
result = []
|
| 129 |
+
while not self.errors.empty(): # pragma: no cover
|
| 130 |
+
try:
|
| 131 |
+
e = self.errors.get(False)
|
| 132 |
+
result.append(e)
|
| 133 |
+
except self.errors.Empty:
|
| 134 |
+
continue
|
| 135 |
+
self.errors.task_done()
|
| 136 |
+
return result
|
| 137 |
+
|
| 138 |
+
def clear_errors(self):
|
| 139 |
+
"""
|
| 140 |
+
Clear any errors which may have been logged.
|
| 141 |
+
"""
|
| 142 |
+
# Just get the errors and throw them away
|
| 143 |
+
self.get_errors()
|
| 144 |
+
|
| 145 |
+
def clear_cache(self):
|
| 146 |
+
self._cache.clear()
|
| 147 |
+
|
| 148 |
+
def _get_scheme(self):
|
| 149 |
+
return self._scheme
|
| 150 |
+
|
| 151 |
+
def _set_scheme(self, value):
|
| 152 |
+
self._scheme = value
|
| 153 |
+
|
| 154 |
+
scheme = property(_get_scheme, _set_scheme)
|
| 155 |
+
|
| 156 |
+
def _get_project(self, name):
|
| 157 |
+
"""
|
| 158 |
+
For a given project, get a dictionary mapping available versions to Distribution
|
| 159 |
+
instances.
|
| 160 |
+
|
| 161 |
+
This should be implemented in subclasses.
|
| 162 |
+
|
| 163 |
+
If called from a locate() request, self.matcher will be set to a
|
| 164 |
+
matcher for the requirement to satisfy, otherwise it will be None.
|
| 165 |
+
"""
|
| 166 |
+
raise NotImplementedError('Please implement in the subclass')
|
| 167 |
+
|
| 168 |
+
def get_distribution_names(self):
|
| 169 |
+
"""
|
| 170 |
+
Return all the distribution names known to this locator.
|
| 171 |
+
"""
|
| 172 |
+
raise NotImplementedError('Please implement in the subclass')
|
| 173 |
+
|
| 174 |
+
def get_project(self, name):
|
| 175 |
+
"""
|
| 176 |
+
For a given project, get a dictionary mapping available versions to Distribution
|
| 177 |
+
instances.
|
| 178 |
+
|
| 179 |
+
This calls _get_project to do all the work, and just implements a caching layer on top.
|
| 180 |
+
"""
|
| 181 |
+
if self._cache is None: # pragma: no cover
|
| 182 |
+
result = self._get_project(name)
|
| 183 |
+
elif name in self._cache:
|
| 184 |
+
result = self._cache[name]
|
| 185 |
+
else:
|
| 186 |
+
self.clear_errors()
|
| 187 |
+
result = self._get_project(name)
|
| 188 |
+
self._cache[name] = result
|
| 189 |
+
return result
|
| 190 |
+
|
| 191 |
+
def score_url(self, url):
|
| 192 |
+
"""
|
| 193 |
+
Give an url a score which can be used to choose preferred URLs
|
| 194 |
+
for a given project release.
|
| 195 |
+
"""
|
| 196 |
+
t = urlparse(url)
|
| 197 |
+
basename = posixpath.basename(t.path)
|
| 198 |
+
compatible = True
|
| 199 |
+
is_wheel = basename.endswith('.whl')
|
| 200 |
+
is_downloadable = basename.endswith(self.downloadable_extensions)
|
| 201 |
+
if is_wheel:
|
| 202 |
+
compatible = is_compatible(Wheel(basename), self.wheel_tags)
|
| 203 |
+
return (t.scheme == 'https', 'pypi.org' in t.netloc,
|
| 204 |
+
is_downloadable, is_wheel, compatible, basename)
|
| 205 |
+
|
| 206 |
+
def prefer_url(self, url1, url2):
|
| 207 |
+
"""
|
| 208 |
+
Choose one of two URLs where both are candidates for distribution
|
| 209 |
+
archives for the same version of a distribution (for example,
|
| 210 |
+
.tar.gz vs. zip).
|
| 211 |
+
|
| 212 |
+
The current implementation favours https:// URLs over http://, archives
|
| 213 |
+
from PyPI over those from other locations, wheel compatibility (if a
|
| 214 |
+
wheel) and then the archive name.
|
| 215 |
+
"""
|
| 216 |
+
result = url2
|
| 217 |
+
if url1:
|
| 218 |
+
s1 = self.score_url(url1)
|
| 219 |
+
s2 = self.score_url(url2)
|
| 220 |
+
if s1 > s2:
|
| 221 |
+
result = url1
|
| 222 |
+
if result != url2:
|
| 223 |
+
logger.debug('Not replacing %r with %r', url1, url2)
|
| 224 |
+
else:
|
| 225 |
+
logger.debug('Replacing %r with %r', url1, url2)
|
| 226 |
+
return result
|
| 227 |
+
|
| 228 |
+
def split_filename(self, filename, project_name):
|
| 229 |
+
"""
|
| 230 |
+
Attempt to split a filename in project name, version and Python version.
|
| 231 |
+
"""
|
| 232 |
+
return split_filename(filename, project_name)
|
| 233 |
+
|
| 234 |
+
def convert_url_to_download_info(self, url, project_name):
|
| 235 |
+
"""
|
| 236 |
+
See if a URL is a candidate for a download URL for a project (the URL
|
| 237 |
+
has typically been scraped from an HTML page).
|
| 238 |
+
|
| 239 |
+
If it is, a dictionary is returned with keys "name", "version",
|
| 240 |
+
"filename" and "url"; otherwise, None is returned.
|
| 241 |
+
"""
|
| 242 |
+
def same_project(name1, name2):
|
| 243 |
+
return normalize_name(name1) == normalize_name(name2)
|
| 244 |
+
|
| 245 |
+
result = None
|
| 246 |
+
scheme, netloc, path, params, query, frag = urlparse(url)
|
| 247 |
+
if frag.lower().startswith('egg='): # pragma: no cover
|
| 248 |
+
logger.debug('%s: version hint in fragment: %r',
|
| 249 |
+
project_name, frag)
|
| 250 |
+
m = HASHER_HASH.match(frag)
|
| 251 |
+
if m:
|
| 252 |
+
algo, digest = m.groups()
|
| 253 |
+
else:
|
| 254 |
+
algo, digest = None, None
|
| 255 |
+
origpath = path
|
| 256 |
+
if path and path[-1] == '/': # pragma: no cover
|
| 257 |
+
path = path[:-1]
|
| 258 |
+
if path.endswith('.whl'):
|
| 259 |
+
try:
|
| 260 |
+
wheel = Wheel(path)
|
| 261 |
+
if not is_compatible(wheel, self.wheel_tags):
|
| 262 |
+
logger.debug('Wheel not compatible: %s', path)
|
| 263 |
+
else:
|
| 264 |
+
if project_name is None:
|
| 265 |
+
include = True
|
| 266 |
+
else:
|
| 267 |
+
include = same_project(wheel.name, project_name)
|
| 268 |
+
if include:
|
| 269 |
+
result = {
|
| 270 |
+
'name': wheel.name,
|
| 271 |
+
'version': wheel.version,
|
| 272 |
+
'filename': wheel.filename,
|
| 273 |
+
'url': urlunparse((scheme, netloc, origpath,
|
| 274 |
+
params, query, '')),
|
| 275 |
+
'python-version': ', '.join(
|
| 276 |
+
['.'.join(list(v[2:])) for v in wheel.pyver]),
|
| 277 |
+
}
|
| 278 |
+
except Exception: # pragma: no cover
|
| 279 |
+
logger.warning('invalid path for wheel: %s', path)
|
| 280 |
+
elif not path.endswith(self.downloadable_extensions): # pragma: no cover
|
| 281 |
+
logger.debug('Not downloadable: %s', path)
|
| 282 |
+
else: # downloadable extension
|
| 283 |
+
path = filename = posixpath.basename(path)
|
| 284 |
+
for ext in self.downloadable_extensions:
|
| 285 |
+
if path.endswith(ext):
|
| 286 |
+
path = path[:-len(ext)]
|
| 287 |
+
t = self.split_filename(path, project_name)
|
| 288 |
+
if not t: # pragma: no cover
|
| 289 |
+
logger.debug('No match for project/version: %s', path)
|
| 290 |
+
else:
|
| 291 |
+
name, version, pyver = t
|
| 292 |
+
if not project_name or same_project(project_name, name):
|
| 293 |
+
result = {
|
| 294 |
+
'name': name,
|
| 295 |
+
'version': version,
|
| 296 |
+
'filename': filename,
|
| 297 |
+
'url': urlunparse((scheme, netloc, origpath,
|
| 298 |
+
params, query, '')),
|
| 299 |
+
}
|
| 300 |
+
if pyver: # pragma: no cover
|
| 301 |
+
result['python-version'] = pyver
|
| 302 |
+
break
|
| 303 |
+
if result and algo:
|
| 304 |
+
result['%s_digest' % algo] = digest
|
| 305 |
+
return result
|
| 306 |
+
|
| 307 |
+
def _get_digest(self, info):
|
| 308 |
+
"""
|
| 309 |
+
Get a digest from a dictionary by looking at a "digests" dictionary
|
| 310 |
+
or keys of the form 'algo_digest'.
|
| 311 |
+
|
| 312 |
+
Returns a 2-tuple (algo, digest) if found, else None. Currently
|
| 313 |
+
looks only for SHA256, then MD5.
|
| 314 |
+
"""
|
| 315 |
+
result = None
|
| 316 |
+
if 'digests' in info:
|
| 317 |
+
digests = info['digests']
|
| 318 |
+
for algo in ('sha256', 'md5'):
|
| 319 |
+
if algo in digests:
|
| 320 |
+
result = (algo, digests[algo])
|
| 321 |
+
break
|
| 322 |
+
if not result:
|
| 323 |
+
for algo in ('sha256', 'md5'):
|
| 324 |
+
key = '%s_digest' % algo
|
| 325 |
+
if key in info:
|
| 326 |
+
result = (algo, info[key])
|
| 327 |
+
break
|
| 328 |
+
return result
|
| 329 |
+
|
| 330 |
+
def _update_version_data(self, result, info):
|
| 331 |
+
"""
|
| 332 |
+
Update a result dictionary (the final result from _get_project) with a
|
| 333 |
+
dictionary for a specific version, which typically holds information
|
| 334 |
+
gleaned from a filename or URL for an archive for the distribution.
|
| 335 |
+
"""
|
| 336 |
+
name = info.pop('name')
|
| 337 |
+
version = info.pop('version')
|
| 338 |
+
if version in result:
|
| 339 |
+
dist = result[version]
|
| 340 |
+
md = dist.metadata
|
| 341 |
+
else:
|
| 342 |
+
dist = make_dist(name, version, scheme=self.scheme)
|
| 343 |
+
md = dist.metadata
|
| 344 |
+
dist.digest = digest = self._get_digest(info)
|
| 345 |
+
url = info['url']
|
| 346 |
+
result['digests'][url] = digest
|
| 347 |
+
if md.source_url != info['url']:
|
| 348 |
+
md.source_url = self.prefer_url(md.source_url, url)
|
| 349 |
+
result['urls'].setdefault(version, set()).add(url)
|
| 350 |
+
dist.locator = self
|
| 351 |
+
result[version] = dist
|
| 352 |
+
|
| 353 |
+
def locate(self, requirement, prereleases=False):
|
| 354 |
+
"""
|
| 355 |
+
Find the most recent distribution which matches the given
|
| 356 |
+
requirement.
|
| 357 |
+
|
| 358 |
+
:param requirement: A requirement of the form 'foo (1.0)' or perhaps
|
| 359 |
+
'foo (>= 1.0, < 2.0, != 1.3)'
|
| 360 |
+
:param prereleases: If ``True``, allow pre-release versions
|
| 361 |
+
to be located. Otherwise, pre-release versions
|
| 362 |
+
are not returned.
|
| 363 |
+
:return: A :class:`Distribution` instance, or ``None`` if no such
|
| 364 |
+
distribution could be located.
|
| 365 |
+
"""
|
| 366 |
+
result = None
|
| 367 |
+
r = parse_requirement(requirement)
|
| 368 |
+
if r is None: # pragma: no cover
|
| 369 |
+
raise DistlibException('Not a valid requirement: %r' % requirement)
|
| 370 |
+
scheme = get_scheme(self.scheme)
|
| 371 |
+
self.matcher = matcher = scheme.matcher(r.requirement)
|
| 372 |
+
logger.debug('matcher: %s (%s)', matcher, type(matcher).__name__)
|
| 373 |
+
versions = self.get_project(r.name)
|
| 374 |
+
if len(versions) > 2: # urls and digests keys are present
|
| 375 |
+
# sometimes, versions are invalid
|
| 376 |
+
slist = []
|
| 377 |
+
vcls = matcher.version_class
|
| 378 |
+
for k in versions:
|
| 379 |
+
if k in ('urls', 'digests'):
|
| 380 |
+
continue
|
| 381 |
+
try:
|
| 382 |
+
if not matcher.match(k):
|
| 383 |
+
pass # logger.debug('%s did not match %r', matcher, k)
|
| 384 |
+
else:
|
| 385 |
+
if prereleases or not vcls(k).is_prerelease:
|
| 386 |
+
slist.append(k)
|
| 387 |
+
except Exception: # pragma: no cover
|
| 388 |
+
logger.warning('error matching %s with %r', matcher, k)
|
| 389 |
+
pass # slist.append(k)
|
| 390 |
+
if len(slist) > 1:
|
| 391 |
+
slist = sorted(slist, key=scheme.key)
|
| 392 |
+
if slist:
|
| 393 |
+
logger.debug('sorted list: %s', slist)
|
| 394 |
+
version = slist[-1]
|
| 395 |
+
result = versions[version]
|
| 396 |
+
if result:
|
| 397 |
+
if r.extras:
|
| 398 |
+
result.extras = r.extras
|
| 399 |
+
result.download_urls = versions.get('urls', {}).get(version, set())
|
| 400 |
+
d = {}
|
| 401 |
+
sd = versions.get('digests', {})
|
| 402 |
+
for url in result.download_urls:
|
| 403 |
+
if url in sd: # pragma: no cover
|
| 404 |
+
d[url] = sd[url]
|
| 405 |
+
result.digests = d
|
| 406 |
+
self.matcher = None
|
| 407 |
+
return result
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
class PyPIRPCLocator(Locator):
|
| 411 |
+
"""
|
| 412 |
+
This locator uses XML-RPC to locate distributions. It therefore
|
| 413 |
+
cannot be used with simple mirrors (that only mirror file content).
|
| 414 |
+
"""
|
| 415 |
+
def __init__(self, url, **kwargs):
|
| 416 |
+
"""
|
| 417 |
+
Initialise an instance.
|
| 418 |
+
|
| 419 |
+
:param url: The URL to use for XML-RPC.
|
| 420 |
+
:param kwargs: Passed to the superclass constructor.
|
| 421 |
+
"""
|
| 422 |
+
super(PyPIRPCLocator, self).__init__(**kwargs)
|
| 423 |
+
self.base_url = url
|
| 424 |
+
self.client = ServerProxy(url, timeout=3.0)
|
| 425 |
+
|
| 426 |
+
def get_distribution_names(self):
|
| 427 |
+
"""
|
| 428 |
+
Return all the distribution names known to this locator.
|
| 429 |
+
"""
|
| 430 |
+
return set(self.client.list_packages())
|
| 431 |
+
|
| 432 |
+
def _get_project(self, name):
|
| 433 |
+
result = {'urls': {}, 'digests': {}}
|
| 434 |
+
versions = self.client.package_releases(name, True)
|
| 435 |
+
for v in versions:
|
| 436 |
+
urls = self.client.release_urls(name, v)
|
| 437 |
+
data = self.client.release_data(name, v)
|
| 438 |
+
metadata = Metadata(scheme=self.scheme)
|
| 439 |
+
metadata.name = data['name']
|
| 440 |
+
metadata.version = data['version']
|
| 441 |
+
metadata.license = data.get('license')
|
| 442 |
+
metadata.keywords = data.get('keywords', [])
|
| 443 |
+
metadata.summary = data.get('summary')
|
| 444 |
+
dist = Distribution(metadata)
|
| 445 |
+
if urls:
|
| 446 |
+
info = urls[0]
|
| 447 |
+
metadata.source_url = info['url']
|
| 448 |
+
dist.digest = self._get_digest(info)
|
| 449 |
+
dist.locator = self
|
| 450 |
+
result[v] = dist
|
| 451 |
+
for info in urls:
|
| 452 |
+
url = info['url']
|
| 453 |
+
digest = self._get_digest(info)
|
| 454 |
+
result['urls'].setdefault(v, set()).add(url)
|
| 455 |
+
result['digests'][url] = digest
|
| 456 |
+
return result
|
| 457 |
+
|
| 458 |
+
|
| 459 |
+
class PyPIJSONLocator(Locator):
|
| 460 |
+
"""
|
| 461 |
+
This locator uses PyPI's JSON interface. It's very limited in functionality
|
| 462 |
+
and probably not worth using.
|
| 463 |
+
"""
|
| 464 |
+
def __init__(self, url, **kwargs):
|
| 465 |
+
super(PyPIJSONLocator, self).__init__(**kwargs)
|
| 466 |
+
self.base_url = ensure_slash(url)
|
| 467 |
+
|
| 468 |
+
def get_distribution_names(self):
|
| 469 |
+
"""
|
| 470 |
+
Return all the distribution names known to this locator.
|
| 471 |
+
"""
|
| 472 |
+
raise NotImplementedError('Not available from this locator')
|
| 473 |
+
|
| 474 |
+
def _get_project(self, name):
|
| 475 |
+
result = {'urls': {}, 'digests': {}}
|
| 476 |
+
url = urljoin(self.base_url, '%s/json' % quote(name))
|
| 477 |
+
try:
|
| 478 |
+
resp = self.opener.open(url)
|
| 479 |
+
data = resp.read().decode() # for now
|
| 480 |
+
d = json.loads(data)
|
| 481 |
+
md = Metadata(scheme=self.scheme)
|
| 482 |
+
data = d['info']
|
| 483 |
+
md.name = data['name']
|
| 484 |
+
md.version = data['version']
|
| 485 |
+
md.license = data.get('license')
|
| 486 |
+
md.keywords = data.get('keywords', [])
|
| 487 |
+
md.summary = data.get('summary')
|
| 488 |
+
dist = Distribution(md)
|
| 489 |
+
dist.locator = self
|
| 490 |
+
# urls = d['urls']
|
| 491 |
+
result[md.version] = dist
|
| 492 |
+
for info in d['urls']:
|
| 493 |
+
url = info['url']
|
| 494 |
+
dist.download_urls.add(url)
|
| 495 |
+
dist.digests[url] = self._get_digest(info)
|
| 496 |
+
result['urls'].setdefault(md.version, set()).add(url)
|
| 497 |
+
result['digests'][url] = self._get_digest(info)
|
| 498 |
+
# Now get other releases
|
| 499 |
+
for version, infos in d['releases'].items():
|
| 500 |
+
if version == md.version:
|
| 501 |
+
continue # already done
|
| 502 |
+
omd = Metadata(scheme=self.scheme)
|
| 503 |
+
omd.name = md.name
|
| 504 |
+
omd.version = version
|
| 505 |
+
odist = Distribution(omd)
|
| 506 |
+
odist.locator = self
|
| 507 |
+
result[version] = odist
|
| 508 |
+
for info in infos:
|
| 509 |
+
url = info['url']
|
| 510 |
+
odist.download_urls.add(url)
|
| 511 |
+
odist.digests[url] = self._get_digest(info)
|
| 512 |
+
result['urls'].setdefault(version, set()).add(url)
|
| 513 |
+
result['digests'][url] = self._get_digest(info)
|
| 514 |
+
# for info in urls:
|
| 515 |
+
# md.source_url = info['url']
|
| 516 |
+
# dist.digest = self._get_digest(info)
|
| 517 |
+
# dist.locator = self
|
| 518 |
+
# for info in urls:
|
| 519 |
+
# url = info['url']
|
| 520 |
+
# result['urls'].setdefault(md.version, set()).add(url)
|
| 521 |
+
# result['digests'][url] = self._get_digest(info)
|
| 522 |
+
except Exception as e:
|
| 523 |
+
self.errors.put(text_type(e))
|
| 524 |
+
logger.exception('JSON fetch failed: %s', e)
|
| 525 |
+
return result
|
| 526 |
+
|
| 527 |
+
|
| 528 |
+
class Page(object):
|
| 529 |
+
"""
|
| 530 |
+
This class represents a scraped HTML page.
|
| 531 |
+
"""
|
| 532 |
+
# The following slightly hairy-looking regex just looks for the contents of
|
| 533 |
+
# an anchor link, which has an attribute "href" either immediately preceded
|
| 534 |
+
# or immediately followed by a "rel" attribute. The attribute values can be
|
| 535 |
+
# declared with double quotes, single quotes or no quotes - which leads to
|
| 536 |
+
# the length of the expression.
|
| 537 |
+
_href = re.compile("""
|
| 538 |
+
(rel\\s*=\\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\\s\n]*))\\s+)?
|
| 539 |
+
href\\s*=\\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\\s\n]*))
|
| 540 |
+
(\\s+rel\\s*=\\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\\s\n]*)))?
|
| 541 |
+
""", re.I | re.S | re.X)
|
| 542 |
+
_base = re.compile(r"""<base\s+href\s*=\s*['"]?([^'">]+)""", re.I | re.S)
|
| 543 |
+
|
| 544 |
+
def __init__(self, data, url):
|
| 545 |
+
"""
|
| 546 |
+
Initialise an instance with the Unicode page contents and the URL they
|
| 547 |
+
came from.
|
| 548 |
+
"""
|
| 549 |
+
self.data = data
|
| 550 |
+
self.base_url = self.url = url
|
| 551 |
+
m = self._base.search(self.data)
|
| 552 |
+
if m:
|
| 553 |
+
self.base_url = m.group(1)
|
| 554 |
+
|
| 555 |
+
_clean_re = re.compile(r'[^a-z0-9$&+,/:;=?@.#%_\\|-]', re.I)
|
| 556 |
+
|
| 557 |
+
@cached_property
|
| 558 |
+
def links(self):
|
| 559 |
+
"""
|
| 560 |
+
Return the URLs of all the links on a page together with information
|
| 561 |
+
about their "rel" attribute, for determining which ones to treat as
|
| 562 |
+
downloads and which ones to queue for further scraping.
|
| 563 |
+
"""
|
| 564 |
+
def clean(url):
|
| 565 |
+
"Tidy up an URL."
|
| 566 |
+
scheme, netloc, path, params, query, frag = urlparse(url)
|
| 567 |
+
return urlunparse((scheme, netloc, quote(path),
|
| 568 |
+
params, query, frag))
|
| 569 |
+
|
| 570 |
+
result = set()
|
| 571 |
+
for match in self._href.finditer(self.data):
|
| 572 |
+
d = match.groupdict('')
|
| 573 |
+
rel = (d['rel1'] or d['rel2'] or d['rel3'] or
|
| 574 |
+
d['rel4'] or d['rel5'] or d['rel6'])
|
| 575 |
+
url = d['url1'] or d['url2'] or d['url3']
|
| 576 |
+
url = urljoin(self.base_url, url)
|
| 577 |
+
url = unescape(url)
|
| 578 |
+
url = self._clean_re.sub(lambda m: '%%%2x' % ord(m.group(0)), url)
|
| 579 |
+
result.add((url, rel))
|
| 580 |
+
# We sort the result, hoping to bring the most recent versions
|
| 581 |
+
# to the front
|
| 582 |
+
result = sorted(result, key=lambda t: t[0], reverse=True)
|
| 583 |
+
return result
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
class SimpleScrapingLocator(Locator):
|
| 587 |
+
"""
|
| 588 |
+
A locator which scrapes HTML pages to locate downloads for a distribution.
|
| 589 |
+
This runs multiple threads to do the I/O; performance is at least as good
|
| 590 |
+
as pip's PackageFinder, which works in an analogous fashion.
|
| 591 |
+
"""
|
| 592 |
+
|
| 593 |
+
# These are used to deal with various Content-Encoding schemes.
|
| 594 |
+
decoders = {
|
| 595 |
+
'deflate': zlib.decompress,
|
| 596 |
+
'gzip': lambda b: gzip.GzipFile(fileobj=BytesIO(b)).read(),
|
| 597 |
+
'none': lambda b: b,
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
def __init__(self, url, timeout=None, num_workers=10, **kwargs):
|
| 601 |
+
"""
|
| 602 |
+
Initialise an instance.
|
| 603 |
+
:param url: The root URL to use for scraping.
|
| 604 |
+
:param timeout: The timeout, in seconds, to be applied to requests.
|
| 605 |
+
This defaults to ``None`` (no timeout specified).
|
| 606 |
+
:param num_workers: The number of worker threads you want to do I/O,
|
| 607 |
+
This defaults to 10.
|
| 608 |
+
:param kwargs: Passed to the superclass.
|
| 609 |
+
"""
|
| 610 |
+
super(SimpleScrapingLocator, self).__init__(**kwargs)
|
| 611 |
+
self.base_url = ensure_slash(url)
|
| 612 |
+
self.timeout = timeout
|
| 613 |
+
self._page_cache = {}
|
| 614 |
+
self._seen = set()
|
| 615 |
+
self._to_fetch = queue.Queue()
|
| 616 |
+
self._bad_hosts = set()
|
| 617 |
+
self.skip_externals = False
|
| 618 |
+
self.num_workers = num_workers
|
| 619 |
+
self._lock = threading.RLock()
|
| 620 |
+
# See issue #45: we need to be resilient when the locator is used
|
| 621 |
+
# in a thread, e.g. with concurrent.futures. We can't use self._lock
|
| 622 |
+
# as it is for coordinating our internal threads - the ones created
|
| 623 |
+
# in _prepare_threads.
|
| 624 |
+
self._gplock = threading.RLock()
|
| 625 |
+
self.platform_check = False # See issue #112
|
| 626 |
+
|
| 627 |
+
def _prepare_threads(self):
|
| 628 |
+
"""
|
| 629 |
+
Threads are created only when get_project is called, and terminate
|
| 630 |
+
before it returns. They are there primarily to parallelise I/O (i.e.
|
| 631 |
+
fetching web pages).
|
| 632 |
+
"""
|
| 633 |
+
self._threads = []
|
| 634 |
+
for i in range(self.num_workers):
|
| 635 |
+
t = threading.Thread(target=self._fetch)
|
| 636 |
+
t.daemon = True
|
| 637 |
+
t.start()
|
| 638 |
+
self._threads.append(t)
|
| 639 |
+
|
| 640 |
+
def _wait_threads(self):
|
| 641 |
+
"""
|
| 642 |
+
Tell all the threads to terminate (by sending a sentinel value) and
|
| 643 |
+
wait for them to do so.
|
| 644 |
+
"""
|
| 645 |
+
# Note that you need two loops, since you can't say which
|
| 646 |
+
# thread will get each sentinel
|
| 647 |
+
for t in self._threads:
|
| 648 |
+
self._to_fetch.put(None) # sentinel
|
| 649 |
+
for t in self._threads:
|
| 650 |
+
t.join()
|
| 651 |
+
self._threads = []
|
| 652 |
+
|
| 653 |
+
def _get_project(self, name):
|
| 654 |
+
result = {'urls': {}, 'digests': {}}
|
| 655 |
+
with self._gplock:
|
| 656 |
+
self.result = result
|
| 657 |
+
self.project_name = name
|
| 658 |
+
url = urljoin(self.base_url, '%s/' % quote(name))
|
| 659 |
+
self._seen.clear()
|
| 660 |
+
self._page_cache.clear()
|
| 661 |
+
self._prepare_threads()
|
| 662 |
+
try:
|
| 663 |
+
logger.debug('Queueing %s', url)
|
| 664 |
+
self._to_fetch.put(url)
|
| 665 |
+
self._to_fetch.join()
|
| 666 |
+
finally:
|
| 667 |
+
self._wait_threads()
|
| 668 |
+
del self.result
|
| 669 |
+
return result
|
| 670 |
+
|
| 671 |
+
platform_dependent = re.compile(r'\b(linux_(i\d86|x86_64|arm\w+)|'
|
| 672 |
+
r'win(32|_amd64)|macosx_?\d+)\b', re.I)
|
| 673 |
+
|
| 674 |
+
def _is_platform_dependent(self, url):
|
| 675 |
+
"""
|
| 676 |
+
Does an URL refer to a platform-specific download?
|
| 677 |
+
"""
|
| 678 |
+
return self.platform_dependent.search(url)
|
| 679 |
+
|
| 680 |
+
def _process_download(self, url):
|
| 681 |
+
"""
|
| 682 |
+
See if an URL is a suitable download for a project.
|
| 683 |
+
|
| 684 |
+
If it is, register information in the result dictionary (for
|
| 685 |
+
_get_project) about the specific version it's for.
|
| 686 |
+
|
| 687 |
+
Note that the return value isn't actually used other than as a boolean
|
| 688 |
+
value.
|
| 689 |
+
"""
|
| 690 |
+
if self.platform_check and self._is_platform_dependent(url):
|
| 691 |
+
info = None
|
| 692 |
+
else:
|
| 693 |
+
info = self.convert_url_to_download_info(url, self.project_name)
|
| 694 |
+
logger.debug('process_download: %s -> %s', url, info)
|
| 695 |
+
if info:
|
| 696 |
+
with self._lock: # needed because self.result is shared
|
| 697 |
+
self._update_version_data(self.result, info)
|
| 698 |
+
return info
|
| 699 |
+
|
| 700 |
+
def _should_queue(self, link, referrer, rel):
|
| 701 |
+
"""
|
| 702 |
+
Determine whether a link URL from a referring page and with a
|
| 703 |
+
particular "rel" attribute should be queued for scraping.
|
| 704 |
+
"""
|
| 705 |
+
scheme, netloc, path, _, _, _ = urlparse(link)
|
| 706 |
+
if path.endswith(self.source_extensions + self.binary_extensions +
|
| 707 |
+
self.excluded_extensions):
|
| 708 |
+
result = False
|
| 709 |
+
elif self.skip_externals and not link.startswith(self.base_url):
|
| 710 |
+
result = False
|
| 711 |
+
elif not referrer.startswith(self.base_url):
|
| 712 |
+
result = False
|
| 713 |
+
elif rel not in ('homepage', 'download'):
|
| 714 |
+
result = False
|
| 715 |
+
elif scheme not in ('http', 'https', 'ftp'):
|
| 716 |
+
result = False
|
| 717 |
+
elif self._is_platform_dependent(link):
|
| 718 |
+
result = False
|
| 719 |
+
else:
|
| 720 |
+
host = netloc.split(':', 1)[0]
|
| 721 |
+
if host.lower() == 'localhost':
|
| 722 |
+
result = False
|
| 723 |
+
else:
|
| 724 |
+
result = True
|
| 725 |
+
logger.debug('should_queue: %s (%s) from %s -> %s', link, rel,
|
| 726 |
+
referrer, result)
|
| 727 |
+
return result
|
| 728 |
+
|
| 729 |
+
def _fetch(self):
|
| 730 |
+
"""
|
| 731 |
+
Get a URL to fetch from the work queue, get the HTML page, examine its
|
| 732 |
+
links for download candidates and candidates for further scraping.
|
| 733 |
+
|
| 734 |
+
This is a handy method to run in a thread.
|
| 735 |
+
"""
|
| 736 |
+
while True:
|
| 737 |
+
url = self._to_fetch.get()
|
| 738 |
+
try:
|
| 739 |
+
if url:
|
| 740 |
+
page = self.get_page(url)
|
| 741 |
+
if page is None: # e.g. after an error
|
| 742 |
+
continue
|
| 743 |
+
for link, rel in page.links:
|
| 744 |
+
if link not in self._seen:
|
| 745 |
+
try:
|
| 746 |
+
self._seen.add(link)
|
| 747 |
+
if (not self._process_download(link) and
|
| 748 |
+
self._should_queue(link, url, rel)):
|
| 749 |
+
logger.debug('Queueing %s from %s', link, url)
|
| 750 |
+
self._to_fetch.put(link)
|
| 751 |
+
except MetadataInvalidError: # e.g. invalid versions
|
| 752 |
+
pass
|
| 753 |
+
except Exception as e: # pragma: no cover
|
| 754 |
+
self.errors.put(text_type(e))
|
| 755 |
+
finally:
|
| 756 |
+
# always do this, to avoid hangs :-)
|
| 757 |
+
self._to_fetch.task_done()
|
| 758 |
+
if not url:
|
| 759 |
+
# logger.debug('Sentinel seen, quitting.')
|
| 760 |
+
break
|
| 761 |
+
|
| 762 |
+
def get_page(self, url):
|
| 763 |
+
"""
|
| 764 |
+
Get the HTML for an URL, possibly from an in-memory cache.
|
| 765 |
+
|
| 766 |
+
XXX TODO Note: this cache is never actually cleared. It's assumed that
|
| 767 |
+
the data won't get stale over the lifetime of a locator instance (not
|
| 768 |
+
necessarily true for the default_locator).
|
| 769 |
+
"""
|
| 770 |
+
# http://peak.telecommunity.com/DevCenter/EasyInstall#package-index-api
|
| 771 |
+
scheme, netloc, path, _, _, _ = urlparse(url)
|
| 772 |
+
if scheme == 'file' and os.path.isdir(url2pathname(path)):
|
| 773 |
+
url = urljoin(ensure_slash(url), 'index.html')
|
| 774 |
+
|
| 775 |
+
if url in self._page_cache:
|
| 776 |
+
result = self._page_cache[url]
|
| 777 |
+
logger.debug('Returning %s from cache: %s', url, result)
|
| 778 |
+
else:
|
| 779 |
+
host = netloc.split(':', 1)[0]
|
| 780 |
+
result = None
|
| 781 |
+
if host in self._bad_hosts:
|
| 782 |
+
logger.debug('Skipping %s due to bad host %s', url, host)
|
| 783 |
+
else:
|
| 784 |
+
req = Request(url, headers={'Accept-encoding': 'identity'})
|
| 785 |
+
try:
|
| 786 |
+
logger.debug('Fetching %s', url)
|
| 787 |
+
resp = self.opener.open(req, timeout=self.timeout)
|
| 788 |
+
logger.debug('Fetched %s', url)
|
| 789 |
+
headers = resp.info()
|
| 790 |
+
content_type = headers.get('Content-Type', '')
|
| 791 |
+
if HTML_CONTENT_TYPE.match(content_type):
|
| 792 |
+
final_url = resp.geturl()
|
| 793 |
+
data = resp.read()
|
| 794 |
+
encoding = headers.get('Content-Encoding')
|
| 795 |
+
if encoding:
|
| 796 |
+
decoder = self.decoders[encoding] # fail if not found
|
| 797 |
+
data = decoder(data)
|
| 798 |
+
encoding = 'utf-8'
|
| 799 |
+
m = CHARSET.search(content_type)
|
| 800 |
+
if m:
|
| 801 |
+
encoding = m.group(1)
|
| 802 |
+
try:
|
| 803 |
+
data = data.decode(encoding)
|
| 804 |
+
except UnicodeError: # pragma: no cover
|
| 805 |
+
data = data.decode('latin-1') # fallback
|
| 806 |
+
result = Page(data, final_url)
|
| 807 |
+
self._page_cache[final_url] = result
|
| 808 |
+
except HTTPError as e:
|
| 809 |
+
if e.code != 404:
|
| 810 |
+
logger.exception('Fetch failed: %s: %s', url, e)
|
| 811 |
+
except URLError as e: # pragma: no cover
|
| 812 |
+
logger.exception('Fetch failed: %s: %s', url, e)
|
| 813 |
+
with self._lock:
|
| 814 |
+
self._bad_hosts.add(host)
|
| 815 |
+
except Exception as e: # pragma: no cover
|
| 816 |
+
logger.exception('Fetch failed: %s: %s', url, e)
|
| 817 |
+
finally:
|
| 818 |
+
self._page_cache[url] = result # even if None (failure)
|
| 819 |
+
return result
|
| 820 |
+
|
| 821 |
+
_distname_re = re.compile('<a href=[^>]*>([^<]+)<')
|
| 822 |
+
|
| 823 |
+
def get_distribution_names(self):
|
| 824 |
+
"""
|
| 825 |
+
Return all the distribution names known to this locator.
|
| 826 |
+
"""
|
| 827 |
+
result = set()
|
| 828 |
+
page = self.get_page(self.base_url)
|
| 829 |
+
if not page:
|
| 830 |
+
raise DistlibException('Unable to get %s' % self.base_url)
|
| 831 |
+
for match in self._distname_re.finditer(page.data):
|
| 832 |
+
result.add(match.group(1))
|
| 833 |
+
return result
|
| 834 |
+
|
| 835 |
+
|
| 836 |
+
class DirectoryLocator(Locator):
|
| 837 |
+
"""
|
| 838 |
+
This class locates distributions in a directory tree.
|
| 839 |
+
"""
|
| 840 |
+
|
| 841 |
+
def __init__(self, path, **kwargs):
|
| 842 |
+
"""
|
| 843 |
+
Initialise an instance.
|
| 844 |
+
:param path: The root of the directory tree to search.
|
| 845 |
+
:param kwargs: Passed to the superclass constructor,
|
| 846 |
+
except for:
|
| 847 |
+
* recursive - if True (the default), subdirectories are
|
| 848 |
+
recursed into. If False, only the top-level directory
|
| 849 |
+
is searched,
|
| 850 |
+
"""
|
| 851 |
+
self.recursive = kwargs.pop('recursive', True)
|
| 852 |
+
super(DirectoryLocator, self).__init__(**kwargs)
|
| 853 |
+
path = os.path.abspath(path)
|
| 854 |
+
if not os.path.isdir(path): # pragma: no cover
|
| 855 |
+
raise DistlibException('Not a directory: %r' % path)
|
| 856 |
+
self.base_dir = path
|
| 857 |
+
|
| 858 |
+
def should_include(self, filename, parent):
|
| 859 |
+
"""
|
| 860 |
+
Should a filename be considered as a candidate for a distribution
|
| 861 |
+
archive? As well as the filename, the directory which contains it
|
| 862 |
+
is provided, though not used by the current implementation.
|
| 863 |
+
"""
|
| 864 |
+
return filename.endswith(self.downloadable_extensions)
|
| 865 |
+
|
| 866 |
+
def _get_project(self, name):
|
| 867 |
+
result = {'urls': {}, 'digests': {}}
|
| 868 |
+
for root, dirs, files in os.walk(self.base_dir):
|
| 869 |
+
for fn in files:
|
| 870 |
+
if self.should_include(fn, root):
|
| 871 |
+
fn = os.path.join(root, fn)
|
| 872 |
+
url = urlunparse(('file', '',
|
| 873 |
+
pathname2url(os.path.abspath(fn)),
|
| 874 |
+
'', '', ''))
|
| 875 |
+
info = self.convert_url_to_download_info(url, name)
|
| 876 |
+
if info:
|
| 877 |
+
self._update_version_data(result, info)
|
| 878 |
+
if not self.recursive:
|
| 879 |
+
break
|
| 880 |
+
return result
|
| 881 |
+
|
| 882 |
+
def get_distribution_names(self):
|
| 883 |
+
"""
|
| 884 |
+
Return all the distribution names known to this locator.
|
| 885 |
+
"""
|
| 886 |
+
result = set()
|
| 887 |
+
for root, dirs, files in os.walk(self.base_dir):
|
| 888 |
+
for fn in files:
|
| 889 |
+
if self.should_include(fn, root):
|
| 890 |
+
fn = os.path.join(root, fn)
|
| 891 |
+
url = urlunparse(('file', '',
|
| 892 |
+
pathname2url(os.path.abspath(fn)),
|
| 893 |
+
'', '', ''))
|
| 894 |
+
info = self.convert_url_to_download_info(url, None)
|
| 895 |
+
if info:
|
| 896 |
+
result.add(info['name'])
|
| 897 |
+
if not self.recursive:
|
| 898 |
+
break
|
| 899 |
+
return result
|
| 900 |
+
|
| 901 |
+
|
| 902 |
+
class JSONLocator(Locator):
|
| 903 |
+
"""
|
| 904 |
+
This locator uses special extended metadata (not available on PyPI) and is
|
| 905 |
+
the basis of performant dependency resolution in distlib. Other locators
|
| 906 |
+
require archive downloads before dependencies can be determined! As you
|
| 907 |
+
might imagine, that can be slow.
|
| 908 |
+
"""
|
| 909 |
+
def get_distribution_names(self):
|
| 910 |
+
"""
|
| 911 |
+
Return all the distribution names known to this locator.
|
| 912 |
+
"""
|
| 913 |
+
raise NotImplementedError('Not available from this locator')
|
| 914 |
+
|
| 915 |
+
def _get_project(self, name):
|
| 916 |
+
result = {'urls': {}, 'digests': {}}
|
| 917 |
+
data = get_project_data(name)
|
| 918 |
+
if data:
|
| 919 |
+
for info in data.get('files', []):
|
| 920 |
+
if info['ptype'] != 'sdist' or info['pyversion'] != 'source':
|
| 921 |
+
continue
|
| 922 |
+
# We don't store summary in project metadata as it makes
|
| 923 |
+
# the data bigger for no benefit during dependency
|
| 924 |
+
# resolution
|
| 925 |
+
dist = make_dist(data['name'], info['version'],
|
| 926 |
+
summary=data.get('summary',
|
| 927 |
+
'Placeholder for summary'),
|
| 928 |
+
scheme=self.scheme)
|
| 929 |
+
md = dist.metadata
|
| 930 |
+
md.source_url = info['url']
|
| 931 |
+
# TODO SHA256 digest
|
| 932 |
+
if 'digest' in info and info['digest']:
|
| 933 |
+
dist.digest = ('md5', info['digest'])
|
| 934 |
+
md.dependencies = info.get('requirements', {})
|
| 935 |
+
dist.exports = info.get('exports', {})
|
| 936 |
+
result[dist.version] = dist
|
| 937 |
+
result['urls'].setdefault(dist.version, set()).add(info['url'])
|
| 938 |
+
return result
|
| 939 |
+
|
| 940 |
+
|
| 941 |
+
class DistPathLocator(Locator):
|
| 942 |
+
"""
|
| 943 |
+
This locator finds installed distributions in a path. It can be useful for
|
| 944 |
+
adding to an :class:`AggregatingLocator`.
|
| 945 |
+
"""
|
| 946 |
+
def __init__(self, distpath, **kwargs):
|
| 947 |
+
"""
|
| 948 |
+
Initialise an instance.
|
| 949 |
+
|
| 950 |
+
:param distpath: A :class:`DistributionPath` instance to search.
|
| 951 |
+
"""
|
| 952 |
+
super(DistPathLocator, self).__init__(**kwargs)
|
| 953 |
+
assert isinstance(distpath, DistributionPath)
|
| 954 |
+
self.distpath = distpath
|
| 955 |
+
|
| 956 |
+
def _get_project(self, name):
|
| 957 |
+
dist = self.distpath.get_distribution(name)
|
| 958 |
+
if dist is None:
|
| 959 |
+
result = {'urls': {}, 'digests': {}}
|
| 960 |
+
else:
|
| 961 |
+
result = {
|
| 962 |
+
dist.version: dist,
|
| 963 |
+
'urls': {dist.version: set([dist.source_url])},
|
| 964 |
+
'digests': {dist.version: set([None])}
|
| 965 |
+
}
|
| 966 |
+
return result
|
| 967 |
+
|
| 968 |
+
|
| 969 |
+
class AggregatingLocator(Locator):
|
| 970 |
+
"""
|
| 971 |
+
This class allows you to chain and/or merge a list of locators.
|
| 972 |
+
"""
|
| 973 |
+
def __init__(self, *locators, **kwargs):
|
| 974 |
+
"""
|
| 975 |
+
Initialise an instance.
|
| 976 |
+
|
| 977 |
+
:param locators: The list of locators to search.
|
| 978 |
+
:param kwargs: Passed to the superclass constructor,
|
| 979 |
+
except for:
|
| 980 |
+
* merge - if False (the default), the first successful
|
| 981 |
+
search from any of the locators is returned. If True,
|
| 982 |
+
the results from all locators are merged (this can be
|
| 983 |
+
slow).
|
| 984 |
+
"""
|
| 985 |
+
self.merge = kwargs.pop('merge', False)
|
| 986 |
+
self.locators = locators
|
| 987 |
+
super(AggregatingLocator, self).__init__(**kwargs)
|
| 988 |
+
|
| 989 |
+
def clear_cache(self):
|
| 990 |
+
super(AggregatingLocator, self).clear_cache()
|
| 991 |
+
for locator in self.locators:
|
| 992 |
+
locator.clear_cache()
|
| 993 |
+
|
| 994 |
+
def _set_scheme(self, value):
|
| 995 |
+
self._scheme = value
|
| 996 |
+
for locator in self.locators:
|
| 997 |
+
locator.scheme = value
|
| 998 |
+
|
| 999 |
+
scheme = property(Locator.scheme.fget, _set_scheme)
|
| 1000 |
+
|
| 1001 |
+
def _get_project(self, name):
|
| 1002 |
+
result = {}
|
| 1003 |
+
for locator in self.locators:
|
| 1004 |
+
d = locator.get_project(name)
|
| 1005 |
+
if d:
|
| 1006 |
+
if self.merge:
|
| 1007 |
+
files = result.get('urls', {})
|
| 1008 |
+
digests = result.get('digests', {})
|
| 1009 |
+
# next line could overwrite result['urls'], result['digests']
|
| 1010 |
+
result.update(d)
|
| 1011 |
+
df = result.get('urls')
|
| 1012 |
+
if files and df:
|
| 1013 |
+
for k, v in files.items():
|
| 1014 |
+
if k in df:
|
| 1015 |
+
df[k] |= v
|
| 1016 |
+
else:
|
| 1017 |
+
df[k] = v
|
| 1018 |
+
dd = result.get('digests')
|
| 1019 |
+
if digests and dd:
|
| 1020 |
+
dd.update(digests)
|
| 1021 |
+
else:
|
| 1022 |
+
# See issue #18. If any dists are found and we're looking
|
| 1023 |
+
# for specific constraints, we only return something if
|
| 1024 |
+
# a match is found. For example, if a DirectoryLocator
|
| 1025 |
+
# returns just foo (1.0) while we're looking for
|
| 1026 |
+
# foo (>= 2.0), we'll pretend there was nothing there so
|
| 1027 |
+
# that subsequent locators can be queried. Otherwise we
|
| 1028 |
+
# would just return foo (1.0) which would then lead to a
|
| 1029 |
+
# failure to find foo (>= 2.0), because other locators
|
| 1030 |
+
# weren't searched. Note that this only matters when
|
| 1031 |
+
# merge=False.
|
| 1032 |
+
if self.matcher is None:
|
| 1033 |
+
found = True
|
| 1034 |
+
else:
|
| 1035 |
+
found = False
|
| 1036 |
+
for k in d:
|
| 1037 |
+
if self.matcher.match(k):
|
| 1038 |
+
found = True
|
| 1039 |
+
break
|
| 1040 |
+
if found:
|
| 1041 |
+
result = d
|
| 1042 |
+
break
|
| 1043 |
+
return result
|
| 1044 |
+
|
| 1045 |
+
def get_distribution_names(self):
|
| 1046 |
+
"""
|
| 1047 |
+
Return all the distribution names known to this locator.
|
| 1048 |
+
"""
|
| 1049 |
+
result = set()
|
| 1050 |
+
for locator in self.locators:
|
| 1051 |
+
try:
|
| 1052 |
+
result |= locator.get_distribution_names()
|
| 1053 |
+
except NotImplementedError:
|
| 1054 |
+
pass
|
| 1055 |
+
return result
|
| 1056 |
+
|
| 1057 |
+
|
| 1058 |
+
# We use a legacy scheme simply because most of the dists on PyPI use legacy
|
| 1059 |
+
# versions which don't conform to PEP 440.
|
| 1060 |
+
default_locator = AggregatingLocator(
|
| 1061 |
+
# JSONLocator(), # don't use as PEP 426 is withdrawn
|
| 1062 |
+
SimpleScrapingLocator('https://pypi.org/simple/',
|
| 1063 |
+
timeout=3.0),
|
| 1064 |
+
scheme='legacy')
|
| 1065 |
+
|
| 1066 |
+
locate = default_locator.locate
|
| 1067 |
+
|
| 1068 |
+
|
| 1069 |
+
class DependencyFinder(object):
|
| 1070 |
+
"""
|
| 1071 |
+
Locate dependencies for distributions.
|
| 1072 |
+
"""
|
| 1073 |
+
|
| 1074 |
+
def __init__(self, locator=None):
|
| 1075 |
+
"""
|
| 1076 |
+
Initialise an instance, using the specified locator
|
| 1077 |
+
to locate distributions.
|
| 1078 |
+
"""
|
| 1079 |
+
self.locator = locator or default_locator
|
| 1080 |
+
self.scheme = get_scheme(self.locator.scheme)
|
| 1081 |
+
|
| 1082 |
+
def add_distribution(self, dist):
|
| 1083 |
+
"""
|
| 1084 |
+
Add a distribution to the finder. This will update internal information
|
| 1085 |
+
about who provides what.
|
| 1086 |
+
:param dist: The distribution to add.
|
| 1087 |
+
"""
|
| 1088 |
+
logger.debug('adding distribution %s', dist)
|
| 1089 |
+
name = dist.key
|
| 1090 |
+
self.dists_by_name[name] = dist
|
| 1091 |
+
self.dists[(name, dist.version)] = dist
|
| 1092 |
+
for p in dist.provides:
|
| 1093 |
+
name, version = parse_name_and_version(p)
|
| 1094 |
+
logger.debug('Add to provided: %s, %s, %s', name, version, dist)
|
| 1095 |
+
self.provided.setdefault(name, set()).add((version, dist))
|
| 1096 |
+
|
| 1097 |
+
def remove_distribution(self, dist):
|
| 1098 |
+
"""
|
| 1099 |
+
Remove a distribution from the finder. This will update internal
|
| 1100 |
+
information about who provides what.
|
| 1101 |
+
:param dist: The distribution to remove.
|
| 1102 |
+
"""
|
| 1103 |
+
logger.debug('removing distribution %s', dist)
|
| 1104 |
+
name = dist.key
|
| 1105 |
+
del self.dists_by_name[name]
|
| 1106 |
+
del self.dists[(name, dist.version)]
|
| 1107 |
+
for p in dist.provides:
|
| 1108 |
+
name, version = parse_name_and_version(p)
|
| 1109 |
+
logger.debug('Remove from provided: %s, %s, %s', name, version, dist)
|
| 1110 |
+
s = self.provided[name]
|
| 1111 |
+
s.remove((version, dist))
|
| 1112 |
+
if not s:
|
| 1113 |
+
del self.provided[name]
|
| 1114 |
+
|
| 1115 |
+
def get_matcher(self, reqt):
|
| 1116 |
+
"""
|
| 1117 |
+
Get a version matcher for a requirement.
|
| 1118 |
+
:param reqt: The requirement
|
| 1119 |
+
:type reqt: str
|
| 1120 |
+
:return: A version matcher (an instance of
|
| 1121 |
+
:class:`distlib.version.Matcher`).
|
| 1122 |
+
"""
|
| 1123 |
+
try:
|
| 1124 |
+
matcher = self.scheme.matcher(reqt)
|
| 1125 |
+
except UnsupportedVersionError: # pragma: no cover
|
| 1126 |
+
# XXX compat-mode if cannot read the version
|
| 1127 |
+
name = reqt.split()[0]
|
| 1128 |
+
matcher = self.scheme.matcher(name)
|
| 1129 |
+
return matcher
|
| 1130 |
+
|
| 1131 |
+
def find_providers(self, reqt):
|
| 1132 |
+
"""
|
| 1133 |
+
Find the distributions which can fulfill a requirement.
|
| 1134 |
+
|
| 1135 |
+
:param reqt: The requirement.
|
| 1136 |
+
:type reqt: str
|
| 1137 |
+
:return: A set of distribution which can fulfill the requirement.
|
| 1138 |
+
"""
|
| 1139 |
+
matcher = self.get_matcher(reqt)
|
| 1140 |
+
name = matcher.key # case-insensitive
|
| 1141 |
+
result = set()
|
| 1142 |
+
provided = self.provided
|
| 1143 |
+
if name in provided:
|
| 1144 |
+
for version, provider in provided[name]:
|
| 1145 |
+
try:
|
| 1146 |
+
match = matcher.match(version)
|
| 1147 |
+
except UnsupportedVersionError:
|
| 1148 |
+
match = False
|
| 1149 |
+
|
| 1150 |
+
if match:
|
| 1151 |
+
result.add(provider)
|
| 1152 |
+
break
|
| 1153 |
+
return result
|
| 1154 |
+
|
| 1155 |
+
def try_to_replace(self, provider, other, problems):
|
| 1156 |
+
"""
|
| 1157 |
+
Attempt to replace one provider with another. This is typically used
|
| 1158 |
+
when resolving dependencies from multiple sources, e.g. A requires
|
| 1159 |
+
(B >= 1.0) while C requires (B >= 1.1).
|
| 1160 |
+
|
| 1161 |
+
For successful replacement, ``provider`` must meet all the requirements
|
| 1162 |
+
which ``other`` fulfills.
|
| 1163 |
+
|
| 1164 |
+
:param provider: The provider we are trying to replace with.
|
| 1165 |
+
:param other: The provider we're trying to replace.
|
| 1166 |
+
:param problems: If False is returned, this will contain what
|
| 1167 |
+
problems prevented replacement. This is currently
|
| 1168 |
+
a tuple of the literal string 'cantreplace',
|
| 1169 |
+
``provider``, ``other`` and the set of requirements
|
| 1170 |
+
that ``provider`` couldn't fulfill.
|
| 1171 |
+
:return: True if we can replace ``other`` with ``provider``, else
|
| 1172 |
+
False.
|
| 1173 |
+
"""
|
| 1174 |
+
rlist = self.reqts[other]
|
| 1175 |
+
unmatched = set()
|
| 1176 |
+
for s in rlist:
|
| 1177 |
+
matcher = self.get_matcher(s)
|
| 1178 |
+
if not matcher.match(provider.version):
|
| 1179 |
+
unmatched.add(s)
|
| 1180 |
+
if unmatched:
|
| 1181 |
+
# can't replace other with provider
|
| 1182 |
+
problems.add(('cantreplace', provider, other,
|
| 1183 |
+
frozenset(unmatched)))
|
| 1184 |
+
result = False
|
| 1185 |
+
else:
|
| 1186 |
+
# can replace other with provider
|
| 1187 |
+
self.remove_distribution(other)
|
| 1188 |
+
del self.reqts[other]
|
| 1189 |
+
for s in rlist:
|
| 1190 |
+
self.reqts.setdefault(provider, set()).add(s)
|
| 1191 |
+
self.add_distribution(provider)
|
| 1192 |
+
result = True
|
| 1193 |
+
return result
|
| 1194 |
+
|
| 1195 |
+
def find(self, requirement, meta_extras=None, prereleases=False):
|
| 1196 |
+
"""
|
| 1197 |
+
Find a distribution and all distributions it depends on.
|
| 1198 |
+
|
| 1199 |
+
:param requirement: The requirement specifying the distribution to
|
| 1200 |
+
find, or a Distribution instance.
|
| 1201 |
+
:param meta_extras: A list of meta extras such as :test:, :build: and
|
| 1202 |
+
so on.
|
| 1203 |
+
:param prereleases: If ``True``, allow pre-release versions to be
|
| 1204 |
+
returned - otherwise, don't return prereleases
|
| 1205 |
+
unless they're all that's available.
|
| 1206 |
+
|
| 1207 |
+
Return a set of :class:`Distribution` instances and a set of
|
| 1208 |
+
problems.
|
| 1209 |
+
|
| 1210 |
+
The distributions returned should be such that they have the
|
| 1211 |
+
:attr:`required` attribute set to ``True`` if they were
|
| 1212 |
+
from the ``requirement`` passed to ``find()``, and they have the
|
| 1213 |
+
:attr:`build_time_dependency` attribute set to ``True`` unless they
|
| 1214 |
+
are post-installation dependencies of the ``requirement``.
|
| 1215 |
+
|
| 1216 |
+
The problems should be a tuple consisting of the string
|
| 1217 |
+
``'unsatisfied'`` and the requirement which couldn't be satisfied
|
| 1218 |
+
by any distribution known to the locator.
|
| 1219 |
+
"""
|
| 1220 |
+
|
| 1221 |
+
self.provided = {}
|
| 1222 |
+
self.dists = {}
|
| 1223 |
+
self.dists_by_name = {}
|
| 1224 |
+
self.reqts = {}
|
| 1225 |
+
|
| 1226 |
+
meta_extras = set(meta_extras or [])
|
| 1227 |
+
if ':*:' in meta_extras:
|
| 1228 |
+
meta_extras.remove(':*:')
|
| 1229 |
+
# :meta: and :run: are implicitly included
|
| 1230 |
+
meta_extras |= set([':test:', ':build:', ':dev:'])
|
| 1231 |
+
|
| 1232 |
+
if isinstance(requirement, Distribution):
|
| 1233 |
+
dist = odist = requirement
|
| 1234 |
+
logger.debug('passed %s as requirement', odist)
|
| 1235 |
+
else:
|
| 1236 |
+
dist = odist = self.locator.locate(requirement,
|
| 1237 |
+
prereleases=prereleases)
|
| 1238 |
+
if dist is None:
|
| 1239 |
+
raise DistlibException('Unable to locate %r' % requirement)
|
| 1240 |
+
logger.debug('located %s', odist)
|
| 1241 |
+
dist.requested = True
|
| 1242 |
+
problems = set()
|
| 1243 |
+
todo = set([dist])
|
| 1244 |
+
install_dists = set([odist])
|
| 1245 |
+
while todo:
|
| 1246 |
+
dist = todo.pop()
|
| 1247 |
+
name = dist.key # case-insensitive
|
| 1248 |
+
if name not in self.dists_by_name:
|
| 1249 |
+
self.add_distribution(dist)
|
| 1250 |
+
else:
|
| 1251 |
+
# import pdb; pdb.set_trace()
|
| 1252 |
+
other = self.dists_by_name[name]
|
| 1253 |
+
if other != dist:
|
| 1254 |
+
self.try_to_replace(dist, other, problems)
|
| 1255 |
+
|
| 1256 |
+
ireqts = dist.run_requires | dist.meta_requires
|
| 1257 |
+
sreqts = dist.build_requires
|
| 1258 |
+
ereqts = set()
|
| 1259 |
+
if meta_extras and dist in install_dists:
|
| 1260 |
+
for key in ('test', 'build', 'dev'):
|
| 1261 |
+
e = ':%s:' % key
|
| 1262 |
+
if e in meta_extras:
|
| 1263 |
+
ereqts |= getattr(dist, '%s_requires' % key)
|
| 1264 |
+
all_reqts = ireqts | sreqts | ereqts
|
| 1265 |
+
for r in all_reqts:
|
| 1266 |
+
providers = self.find_providers(r)
|
| 1267 |
+
if not providers:
|
| 1268 |
+
logger.debug('No providers found for %r', r)
|
| 1269 |
+
provider = self.locator.locate(r, prereleases=prereleases)
|
| 1270 |
+
# If no provider is found and we didn't consider
|
| 1271 |
+
# prereleases, consider them now.
|
| 1272 |
+
if provider is None and not prereleases:
|
| 1273 |
+
provider = self.locator.locate(r, prereleases=True)
|
| 1274 |
+
if provider is None:
|
| 1275 |
+
logger.debug('Cannot satisfy %r', r)
|
| 1276 |
+
problems.add(('unsatisfied', r))
|
| 1277 |
+
else:
|
| 1278 |
+
n, v = provider.key, provider.version
|
| 1279 |
+
if (n, v) not in self.dists:
|
| 1280 |
+
todo.add(provider)
|
| 1281 |
+
providers.add(provider)
|
| 1282 |
+
if r in ireqts and dist in install_dists:
|
| 1283 |
+
install_dists.add(provider)
|
| 1284 |
+
logger.debug('Adding %s to install_dists',
|
| 1285 |
+
provider.name_and_version)
|
| 1286 |
+
for p in providers:
|
| 1287 |
+
name = p.key
|
| 1288 |
+
if name not in self.dists_by_name:
|
| 1289 |
+
self.reqts.setdefault(p, set()).add(r)
|
| 1290 |
+
else:
|
| 1291 |
+
other = self.dists_by_name[name]
|
| 1292 |
+
if other != p:
|
| 1293 |
+
# see if other can be replaced by p
|
| 1294 |
+
self.try_to_replace(p, other, problems)
|
| 1295 |
+
|
| 1296 |
+
dists = set(self.dists.values())
|
| 1297 |
+
for dist in dists:
|
| 1298 |
+
dist.build_time_dependency = dist not in install_dists
|
| 1299 |
+
if dist.build_time_dependency:
|
| 1300 |
+
logger.debug('%s is a build-time dependency only.',
|
| 1301 |
+
dist.name_and_version)
|
| 1302 |
+
logger.debug('find done for %s', odist)
|
| 1303 |
+
return dists, problems
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/manifest.py
ADDED
|
@@ -0,0 +1,384 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 Python Software Foundation.
|
| 4 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 5 |
+
#
|
| 6 |
+
"""
|
| 7 |
+
Class representing the list of files in a distribution.
|
| 8 |
+
|
| 9 |
+
Equivalent to distutils.filelist, but fixes some problems.
|
| 10 |
+
"""
|
| 11 |
+
import fnmatch
|
| 12 |
+
import logging
|
| 13 |
+
import os
|
| 14 |
+
import re
|
| 15 |
+
import sys
|
| 16 |
+
|
| 17 |
+
from . import DistlibException
|
| 18 |
+
from .compat import fsdecode
|
| 19 |
+
from .util import convert_path
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
__all__ = ['Manifest']
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
# a \ followed by some spaces + EOL
|
| 27 |
+
_COLLAPSE_PATTERN = re.compile('\\\\w*\n', re.M)
|
| 28 |
+
_COMMENTED_LINE = re.compile('#.*?(?=\n)|\n(?=$)', re.M | re.S)
|
| 29 |
+
|
| 30 |
+
#
|
| 31 |
+
# Due to the different results returned by fnmatch.translate, we need
|
| 32 |
+
# to do slightly different processing for Python 2.7 and 3.2 ... this needed
|
| 33 |
+
# to be brought in for Python 3.6 onwards.
|
| 34 |
+
#
|
| 35 |
+
_PYTHON_VERSION = sys.version_info[:2]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class Manifest(object):
|
| 39 |
+
"""
|
| 40 |
+
A list of files built by exploring the filesystem and filtered by applying various
|
| 41 |
+
patterns to what we find there.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, base=None):
|
| 45 |
+
"""
|
| 46 |
+
Initialise an instance.
|
| 47 |
+
|
| 48 |
+
:param base: The base directory to explore under.
|
| 49 |
+
"""
|
| 50 |
+
self.base = os.path.abspath(os.path.normpath(base or os.getcwd()))
|
| 51 |
+
self.prefix = self.base + os.sep
|
| 52 |
+
self.allfiles = None
|
| 53 |
+
self.files = set()
|
| 54 |
+
|
| 55 |
+
#
|
| 56 |
+
# Public API
|
| 57 |
+
#
|
| 58 |
+
|
| 59 |
+
def findall(self):
|
| 60 |
+
"""Find all files under the base and set ``allfiles`` to the absolute
|
| 61 |
+
pathnames of files found.
|
| 62 |
+
"""
|
| 63 |
+
from stat import S_ISREG, S_ISDIR, S_ISLNK
|
| 64 |
+
|
| 65 |
+
self.allfiles = allfiles = []
|
| 66 |
+
root = self.base
|
| 67 |
+
stack = [root]
|
| 68 |
+
pop = stack.pop
|
| 69 |
+
push = stack.append
|
| 70 |
+
|
| 71 |
+
while stack:
|
| 72 |
+
root = pop()
|
| 73 |
+
names = os.listdir(root)
|
| 74 |
+
|
| 75 |
+
for name in names:
|
| 76 |
+
fullname = os.path.join(root, name)
|
| 77 |
+
|
| 78 |
+
# Avoid excess stat calls -- just one will do, thank you!
|
| 79 |
+
stat = os.stat(fullname)
|
| 80 |
+
mode = stat.st_mode
|
| 81 |
+
if S_ISREG(mode):
|
| 82 |
+
allfiles.append(fsdecode(fullname))
|
| 83 |
+
elif S_ISDIR(mode) and not S_ISLNK(mode):
|
| 84 |
+
push(fullname)
|
| 85 |
+
|
| 86 |
+
def add(self, item):
|
| 87 |
+
"""
|
| 88 |
+
Add a file to the manifest.
|
| 89 |
+
|
| 90 |
+
:param item: The pathname to add. This can be relative to the base.
|
| 91 |
+
"""
|
| 92 |
+
if not item.startswith(self.prefix):
|
| 93 |
+
item = os.path.join(self.base, item)
|
| 94 |
+
self.files.add(os.path.normpath(item))
|
| 95 |
+
|
| 96 |
+
def add_many(self, items):
|
| 97 |
+
"""
|
| 98 |
+
Add a list of files to the manifest.
|
| 99 |
+
|
| 100 |
+
:param items: The pathnames to add. These can be relative to the base.
|
| 101 |
+
"""
|
| 102 |
+
for item in items:
|
| 103 |
+
self.add(item)
|
| 104 |
+
|
| 105 |
+
def sorted(self, wantdirs=False):
|
| 106 |
+
"""
|
| 107 |
+
Return sorted files in directory order
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
def add_dir(dirs, d):
|
| 111 |
+
dirs.add(d)
|
| 112 |
+
logger.debug('add_dir added %s', d)
|
| 113 |
+
if d != self.base:
|
| 114 |
+
parent, _ = os.path.split(d)
|
| 115 |
+
assert parent not in ('', '/')
|
| 116 |
+
add_dir(dirs, parent)
|
| 117 |
+
|
| 118 |
+
result = set(self.files) # make a copy!
|
| 119 |
+
if wantdirs:
|
| 120 |
+
dirs = set()
|
| 121 |
+
for f in result:
|
| 122 |
+
add_dir(dirs, os.path.dirname(f))
|
| 123 |
+
result |= dirs
|
| 124 |
+
return [os.path.join(*path_tuple) for path_tuple in
|
| 125 |
+
sorted(os.path.split(path) for path in result)]
|
| 126 |
+
|
| 127 |
+
def clear(self):
|
| 128 |
+
"""Clear all collected files."""
|
| 129 |
+
self.files = set()
|
| 130 |
+
self.allfiles = []
|
| 131 |
+
|
| 132 |
+
def process_directive(self, directive):
|
| 133 |
+
"""
|
| 134 |
+
Process a directive which either adds some files from ``allfiles`` to
|
| 135 |
+
``files``, or removes some files from ``files``.
|
| 136 |
+
|
| 137 |
+
:param directive: The directive to process. This should be in a format
|
| 138 |
+
compatible with distutils ``MANIFEST.in`` files:
|
| 139 |
+
|
| 140 |
+
http://docs.python.org/distutils/sourcedist.html#commands
|
| 141 |
+
"""
|
| 142 |
+
# Parse the line: split it up, make sure the right number of words
|
| 143 |
+
# is there, and return the relevant words. 'action' is always
|
| 144 |
+
# defined: it's the first word of the line. Which of the other
|
| 145 |
+
# three are defined depends on the action; it'll be either
|
| 146 |
+
# patterns, (dir and patterns), or (dirpattern).
|
| 147 |
+
action, patterns, thedir, dirpattern = self._parse_directive(directive)
|
| 148 |
+
|
| 149 |
+
# OK, now we know that the action is valid and we have the
|
| 150 |
+
# right number of words on the line for that action -- so we
|
| 151 |
+
# can proceed with minimal error-checking.
|
| 152 |
+
if action == 'include':
|
| 153 |
+
for pattern in patterns:
|
| 154 |
+
if not self._include_pattern(pattern, anchor=True):
|
| 155 |
+
logger.warning('no files found matching %r', pattern)
|
| 156 |
+
|
| 157 |
+
elif action == 'exclude':
|
| 158 |
+
for pattern in patterns:
|
| 159 |
+
self._exclude_pattern(pattern, anchor=True)
|
| 160 |
+
|
| 161 |
+
elif action == 'global-include':
|
| 162 |
+
for pattern in patterns:
|
| 163 |
+
if not self._include_pattern(pattern, anchor=False):
|
| 164 |
+
logger.warning('no files found matching %r '
|
| 165 |
+
'anywhere in distribution', pattern)
|
| 166 |
+
|
| 167 |
+
elif action == 'global-exclude':
|
| 168 |
+
for pattern in patterns:
|
| 169 |
+
self._exclude_pattern(pattern, anchor=False)
|
| 170 |
+
|
| 171 |
+
elif action == 'recursive-include':
|
| 172 |
+
for pattern in patterns:
|
| 173 |
+
if not self._include_pattern(pattern, prefix=thedir):
|
| 174 |
+
logger.warning('no files found matching %r '
|
| 175 |
+
'under directory %r', pattern, thedir)
|
| 176 |
+
|
| 177 |
+
elif action == 'recursive-exclude':
|
| 178 |
+
for pattern in patterns:
|
| 179 |
+
self._exclude_pattern(pattern, prefix=thedir)
|
| 180 |
+
|
| 181 |
+
elif action == 'graft':
|
| 182 |
+
if not self._include_pattern(None, prefix=dirpattern):
|
| 183 |
+
logger.warning('no directories found matching %r',
|
| 184 |
+
dirpattern)
|
| 185 |
+
|
| 186 |
+
elif action == 'prune':
|
| 187 |
+
if not self._exclude_pattern(None, prefix=dirpattern):
|
| 188 |
+
logger.warning('no previously-included directories found '
|
| 189 |
+
'matching %r', dirpattern)
|
| 190 |
+
else: # pragma: no cover
|
| 191 |
+
# This should never happen, as it should be caught in
|
| 192 |
+
# _parse_template_line
|
| 193 |
+
raise DistlibException(
|
| 194 |
+
'invalid action %r' % action)
|
| 195 |
+
|
| 196 |
+
#
|
| 197 |
+
# Private API
|
| 198 |
+
#
|
| 199 |
+
|
| 200 |
+
def _parse_directive(self, directive):
|
| 201 |
+
"""
|
| 202 |
+
Validate a directive.
|
| 203 |
+
:param directive: The directive to validate.
|
| 204 |
+
:return: A tuple of action, patterns, thedir, dir_patterns
|
| 205 |
+
"""
|
| 206 |
+
words = directive.split()
|
| 207 |
+
if len(words) == 1 and words[0] not in ('include', 'exclude',
|
| 208 |
+
'global-include',
|
| 209 |
+
'global-exclude',
|
| 210 |
+
'recursive-include',
|
| 211 |
+
'recursive-exclude',
|
| 212 |
+
'graft', 'prune'):
|
| 213 |
+
# no action given, let's use the default 'include'
|
| 214 |
+
words.insert(0, 'include')
|
| 215 |
+
|
| 216 |
+
action = words[0]
|
| 217 |
+
patterns = thedir = dir_pattern = None
|
| 218 |
+
|
| 219 |
+
if action in ('include', 'exclude',
|
| 220 |
+
'global-include', 'global-exclude'):
|
| 221 |
+
if len(words) < 2:
|
| 222 |
+
raise DistlibException(
|
| 223 |
+
'%r expects <pattern1> <pattern2> ...' % action)
|
| 224 |
+
|
| 225 |
+
patterns = [convert_path(word) for word in words[1:]]
|
| 226 |
+
|
| 227 |
+
elif action in ('recursive-include', 'recursive-exclude'):
|
| 228 |
+
if len(words) < 3:
|
| 229 |
+
raise DistlibException(
|
| 230 |
+
'%r expects <dir> <pattern1> <pattern2> ...' % action)
|
| 231 |
+
|
| 232 |
+
thedir = convert_path(words[1])
|
| 233 |
+
patterns = [convert_path(word) for word in words[2:]]
|
| 234 |
+
|
| 235 |
+
elif action in ('graft', 'prune'):
|
| 236 |
+
if len(words) != 2:
|
| 237 |
+
raise DistlibException(
|
| 238 |
+
'%r expects a single <dir_pattern>' % action)
|
| 239 |
+
|
| 240 |
+
dir_pattern = convert_path(words[1])
|
| 241 |
+
|
| 242 |
+
else:
|
| 243 |
+
raise DistlibException('unknown action %r' % action)
|
| 244 |
+
|
| 245 |
+
return action, patterns, thedir, dir_pattern
|
| 246 |
+
|
| 247 |
+
def _include_pattern(self, pattern, anchor=True, prefix=None,
|
| 248 |
+
is_regex=False):
|
| 249 |
+
"""Select strings (presumably filenames) from 'self.files' that
|
| 250 |
+
match 'pattern', a Unix-style wildcard (glob) pattern.
|
| 251 |
+
|
| 252 |
+
Patterns are not quite the same as implemented by the 'fnmatch'
|
| 253 |
+
module: '*' and '?' match non-special characters, where "special"
|
| 254 |
+
is platform-dependent: slash on Unix; colon, slash, and backslash on
|
| 255 |
+
DOS/Windows; and colon on Mac OS.
|
| 256 |
+
|
| 257 |
+
If 'anchor' is true (the default), then the pattern match is more
|
| 258 |
+
stringent: "*.py" will match "foo.py" but not "foo/bar.py". If
|
| 259 |
+
'anchor' is false, both of these will match.
|
| 260 |
+
|
| 261 |
+
If 'prefix' is supplied, then only filenames starting with 'prefix'
|
| 262 |
+
(itself a pattern) and ending with 'pattern', with anything in between
|
| 263 |
+
them, will match. 'anchor' is ignored in this case.
|
| 264 |
+
|
| 265 |
+
If 'is_regex' is true, 'anchor' and 'prefix' are ignored, and
|
| 266 |
+
'pattern' is assumed to be either a string containing a regex or a
|
| 267 |
+
regex object -- no translation is done, the regex is just compiled
|
| 268 |
+
and used as-is.
|
| 269 |
+
|
| 270 |
+
Selected strings will be added to self.files.
|
| 271 |
+
|
| 272 |
+
Return True if files are found.
|
| 273 |
+
"""
|
| 274 |
+
# XXX docstring lying about what the special chars are?
|
| 275 |
+
found = False
|
| 276 |
+
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
| 277 |
+
|
| 278 |
+
# delayed loading of allfiles list
|
| 279 |
+
if self.allfiles is None:
|
| 280 |
+
self.findall()
|
| 281 |
+
|
| 282 |
+
for name in self.allfiles:
|
| 283 |
+
if pattern_re.search(name):
|
| 284 |
+
self.files.add(name)
|
| 285 |
+
found = True
|
| 286 |
+
return found
|
| 287 |
+
|
| 288 |
+
def _exclude_pattern(self, pattern, anchor=True, prefix=None,
|
| 289 |
+
is_regex=False):
|
| 290 |
+
"""Remove strings (presumably filenames) from 'files' that match
|
| 291 |
+
'pattern'.
|
| 292 |
+
|
| 293 |
+
Other parameters are the same as for 'include_pattern()', above.
|
| 294 |
+
The list 'self.files' is modified in place. Return True if files are
|
| 295 |
+
found.
|
| 296 |
+
|
| 297 |
+
This API is public to allow e.g. exclusion of SCM subdirs, e.g. when
|
| 298 |
+
packaging source distributions
|
| 299 |
+
"""
|
| 300 |
+
found = False
|
| 301 |
+
pattern_re = self._translate_pattern(pattern, anchor, prefix, is_regex)
|
| 302 |
+
for f in list(self.files):
|
| 303 |
+
if pattern_re.search(f):
|
| 304 |
+
self.files.remove(f)
|
| 305 |
+
found = True
|
| 306 |
+
return found
|
| 307 |
+
|
| 308 |
+
def _translate_pattern(self, pattern, anchor=True, prefix=None,
|
| 309 |
+
is_regex=False):
|
| 310 |
+
"""Translate a shell-like wildcard pattern to a compiled regular
|
| 311 |
+
expression.
|
| 312 |
+
|
| 313 |
+
Return the compiled regex. If 'is_regex' true,
|
| 314 |
+
then 'pattern' is directly compiled to a regex (if it's a string)
|
| 315 |
+
or just returned as-is (assumes it's a regex object).
|
| 316 |
+
"""
|
| 317 |
+
if is_regex:
|
| 318 |
+
if isinstance(pattern, str):
|
| 319 |
+
return re.compile(pattern)
|
| 320 |
+
else:
|
| 321 |
+
return pattern
|
| 322 |
+
|
| 323 |
+
if _PYTHON_VERSION > (3, 2):
|
| 324 |
+
# ditch start and end characters
|
| 325 |
+
start, _, end = self._glob_to_re('_').partition('_')
|
| 326 |
+
|
| 327 |
+
if pattern:
|
| 328 |
+
pattern_re = self._glob_to_re(pattern)
|
| 329 |
+
if _PYTHON_VERSION > (3, 2):
|
| 330 |
+
assert pattern_re.startswith(start) and pattern_re.endswith(end)
|
| 331 |
+
else:
|
| 332 |
+
pattern_re = ''
|
| 333 |
+
|
| 334 |
+
base = re.escape(os.path.join(self.base, ''))
|
| 335 |
+
if prefix is not None:
|
| 336 |
+
# ditch end of pattern character
|
| 337 |
+
if _PYTHON_VERSION <= (3, 2):
|
| 338 |
+
empty_pattern = self._glob_to_re('')
|
| 339 |
+
prefix_re = self._glob_to_re(prefix)[:-len(empty_pattern)]
|
| 340 |
+
else:
|
| 341 |
+
prefix_re = self._glob_to_re(prefix)
|
| 342 |
+
assert prefix_re.startswith(start) and prefix_re.endswith(end)
|
| 343 |
+
prefix_re = prefix_re[len(start): len(prefix_re) - len(end)]
|
| 344 |
+
sep = os.sep
|
| 345 |
+
if os.sep == '\\':
|
| 346 |
+
sep = r'\\'
|
| 347 |
+
if _PYTHON_VERSION <= (3, 2):
|
| 348 |
+
pattern_re = '^' + base + sep.join((prefix_re,
|
| 349 |
+
'.*' + pattern_re))
|
| 350 |
+
else:
|
| 351 |
+
pattern_re = pattern_re[len(start): len(pattern_re) - len(end)]
|
| 352 |
+
pattern_re = r'%s%s%s%s.*%s%s' % (start, base, prefix_re, sep,
|
| 353 |
+
pattern_re, end)
|
| 354 |
+
else: # no prefix -- respect anchor flag
|
| 355 |
+
if anchor:
|
| 356 |
+
if _PYTHON_VERSION <= (3, 2):
|
| 357 |
+
pattern_re = '^' + base + pattern_re
|
| 358 |
+
else:
|
| 359 |
+
pattern_re = r'%s%s%s' % (start, base, pattern_re[len(start):])
|
| 360 |
+
|
| 361 |
+
return re.compile(pattern_re)
|
| 362 |
+
|
| 363 |
+
def _glob_to_re(self, pattern):
|
| 364 |
+
"""Translate a shell-like glob pattern to a regular expression.
|
| 365 |
+
|
| 366 |
+
Return a string containing the regex. Differs from
|
| 367 |
+
'fnmatch.translate()' in that '*' does not match "special characters"
|
| 368 |
+
(which are platform-specific).
|
| 369 |
+
"""
|
| 370 |
+
pattern_re = fnmatch.translate(pattern)
|
| 371 |
+
|
| 372 |
+
# '?' and '*' in the glob pattern become '.' and '.*' in the RE, which
|
| 373 |
+
# IMHO is wrong -- '?' and '*' aren't supposed to match slash in Unix,
|
| 374 |
+
# and by extension they shouldn't match such "special characters" under
|
| 375 |
+
# any OS. So change all non-escaped dots in the RE to match any
|
| 376 |
+
# character except the special characters (currently: just os.sep).
|
| 377 |
+
sep = os.sep
|
| 378 |
+
if os.sep == '\\':
|
| 379 |
+
# we're using a regex to manipulate a regex, so we need
|
| 380 |
+
# to escape the backslash twice
|
| 381 |
+
sep = r'\\\\'
|
| 382 |
+
escaped = r'\1[^%s]' % sep
|
| 383 |
+
pattern_re = re.sub(r'((?<!\\)(\\\\)*)\.', escaped, pattern_re)
|
| 384 |
+
return pattern_re
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/markers.py
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
"""
|
| 8 |
+
Parser for the environment markers micro-language defined in PEP 508.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
# Note: In PEP 345, the micro-language was Python compatible, so the ast
|
| 12 |
+
# module could be used to parse it. However, PEP 508 introduced operators such
|
| 13 |
+
# as ~= and === which aren't in Python, necessitating a different approach.
|
| 14 |
+
|
| 15 |
+
import os
|
| 16 |
+
import re
|
| 17 |
+
import sys
|
| 18 |
+
import platform
|
| 19 |
+
|
| 20 |
+
from .compat import string_types
|
| 21 |
+
from .util import in_venv, parse_marker
|
| 22 |
+
from .version import LegacyVersion as LV
|
| 23 |
+
|
| 24 |
+
__all__ = ['interpret']
|
| 25 |
+
|
| 26 |
+
_VERSION_PATTERN = re.compile(
|
| 27 |
+
r'((\d+(\.\d+)*\w*)|\'(\d+(\.\d+)*\w*)\'|\"(\d+(\.\d+)*\w*)\")')
|
| 28 |
+
_VERSION_MARKERS = {'python_version', 'python_full_version'}
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _is_version_marker(s):
|
| 32 |
+
return isinstance(s, string_types) and s in _VERSION_MARKERS
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def _is_literal(o):
|
| 36 |
+
if not isinstance(o, string_types) or not o:
|
| 37 |
+
return False
|
| 38 |
+
return o[0] in '\'"'
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _get_versions(s):
|
| 42 |
+
return {LV(m.groups()[0]) for m in _VERSION_PATTERN.finditer(s)}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class Evaluator(object):
|
| 46 |
+
"""
|
| 47 |
+
This class is used to evaluate marker expressions.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
operations = {
|
| 51 |
+
'==': lambda x, y: x == y,
|
| 52 |
+
'===': lambda x, y: x == y,
|
| 53 |
+
'~=': lambda x, y: x == y or x > y,
|
| 54 |
+
'!=': lambda x, y: x != y,
|
| 55 |
+
'<': lambda x, y: x < y,
|
| 56 |
+
'<=': lambda x, y: x == y or x < y,
|
| 57 |
+
'>': lambda x, y: x > y,
|
| 58 |
+
'>=': lambda x, y: x == y or x > y,
|
| 59 |
+
'and': lambda x, y: x and y,
|
| 60 |
+
'or': lambda x, y: x or y,
|
| 61 |
+
'in': lambda x, y: x in y,
|
| 62 |
+
'not in': lambda x, y: x not in y,
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
def evaluate(self, expr, context):
|
| 66 |
+
"""
|
| 67 |
+
Evaluate a marker expression returned by the :func:`parse_requirement`
|
| 68 |
+
function in the specified context.
|
| 69 |
+
"""
|
| 70 |
+
if isinstance(expr, string_types):
|
| 71 |
+
if expr[0] in '\'"':
|
| 72 |
+
result = expr[1:-1]
|
| 73 |
+
else:
|
| 74 |
+
if expr not in context:
|
| 75 |
+
raise SyntaxError('unknown variable: %s' % expr)
|
| 76 |
+
result = context[expr]
|
| 77 |
+
else:
|
| 78 |
+
assert isinstance(expr, dict)
|
| 79 |
+
op = expr['op']
|
| 80 |
+
if op not in self.operations:
|
| 81 |
+
raise NotImplementedError('op not implemented: %s' % op)
|
| 82 |
+
elhs = expr['lhs']
|
| 83 |
+
erhs = expr['rhs']
|
| 84 |
+
if _is_literal(expr['lhs']) and _is_literal(expr['rhs']):
|
| 85 |
+
raise SyntaxError('invalid comparison: %s %s %s' %
|
| 86 |
+
(elhs, op, erhs))
|
| 87 |
+
|
| 88 |
+
lhs = self.evaluate(elhs, context)
|
| 89 |
+
rhs = self.evaluate(erhs, context)
|
| 90 |
+
if ((_is_version_marker(elhs) or _is_version_marker(erhs))
|
| 91 |
+
and op in ('<', '<=', '>', '>=', '===', '==', '!=', '~=')):
|
| 92 |
+
lhs = LV(lhs)
|
| 93 |
+
rhs = LV(rhs)
|
| 94 |
+
elif _is_version_marker(elhs) and op in ('in', 'not in'):
|
| 95 |
+
lhs = LV(lhs)
|
| 96 |
+
rhs = _get_versions(rhs)
|
| 97 |
+
result = self.operations[op](lhs, rhs)
|
| 98 |
+
return result
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
_DIGITS = re.compile(r'\d+\.\d+')
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def default_context():
|
| 105 |
+
|
| 106 |
+
def format_full_version(info):
|
| 107 |
+
version = '%s.%s.%s' % (info.major, info.minor, info.micro)
|
| 108 |
+
kind = info.releaselevel
|
| 109 |
+
if kind != 'final':
|
| 110 |
+
version += kind[0] + str(info.serial)
|
| 111 |
+
return version
|
| 112 |
+
|
| 113 |
+
if hasattr(sys, 'implementation'):
|
| 114 |
+
implementation_version = format_full_version(
|
| 115 |
+
sys.implementation.version)
|
| 116 |
+
implementation_name = sys.implementation.name
|
| 117 |
+
else:
|
| 118 |
+
implementation_version = '0'
|
| 119 |
+
implementation_name = ''
|
| 120 |
+
|
| 121 |
+
ppv = platform.python_version()
|
| 122 |
+
m = _DIGITS.match(ppv)
|
| 123 |
+
pv = m.group(0)
|
| 124 |
+
result = {
|
| 125 |
+
'implementation_name': implementation_name,
|
| 126 |
+
'implementation_version': implementation_version,
|
| 127 |
+
'os_name': os.name,
|
| 128 |
+
'platform_machine': platform.machine(),
|
| 129 |
+
'platform_python_implementation': platform.python_implementation(),
|
| 130 |
+
'platform_release': platform.release(),
|
| 131 |
+
'platform_system': platform.system(),
|
| 132 |
+
'platform_version': platform.version(),
|
| 133 |
+
'platform_in_venv': str(in_venv()),
|
| 134 |
+
'python_full_version': ppv,
|
| 135 |
+
'python_version': pv,
|
| 136 |
+
'sys_platform': sys.platform,
|
| 137 |
+
}
|
| 138 |
+
return result
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
DEFAULT_CONTEXT = default_context()
|
| 142 |
+
del default_context
|
| 143 |
+
|
| 144 |
+
evaluator = Evaluator()
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def interpret(marker, execution_context=None):
|
| 148 |
+
"""
|
| 149 |
+
Interpret a marker and return a result depending on environment.
|
| 150 |
+
|
| 151 |
+
:param marker: The marker to interpret.
|
| 152 |
+
:type marker: str
|
| 153 |
+
:param execution_context: The context used for name lookup.
|
| 154 |
+
:type execution_context: mapping
|
| 155 |
+
"""
|
| 156 |
+
try:
|
| 157 |
+
expr, rest = parse_marker(marker)
|
| 158 |
+
except Exception as e:
|
| 159 |
+
raise SyntaxError('Unable to interpret marker syntax: %s: %s' %
|
| 160 |
+
(marker, e))
|
| 161 |
+
if rest and rest[0] != '#':
|
| 162 |
+
raise SyntaxError('unexpected trailing data in marker: %s: %s' %
|
| 163 |
+
(marker, rest))
|
| 164 |
+
context = dict(DEFAULT_CONTEXT)
|
| 165 |
+
if execution_context:
|
| 166 |
+
context.update(execution_context)
|
| 167 |
+
return evaluator.evaluate(expr, context)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/metadata.py
ADDED
|
@@ -0,0 +1,1068 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012 The Python Software Foundation.
|
| 4 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 5 |
+
#
|
| 6 |
+
"""Implementation of the Metadata for Python packages PEPs.
|
| 7 |
+
|
| 8 |
+
Supports all metadata formats (1.0, 1.1, 1.2, 1.3/2.1 and 2.2).
|
| 9 |
+
"""
|
| 10 |
+
from __future__ import unicode_literals
|
| 11 |
+
|
| 12 |
+
import codecs
|
| 13 |
+
from email import message_from_file
|
| 14 |
+
import json
|
| 15 |
+
import logging
|
| 16 |
+
import re
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
from . import DistlibException, __version__
|
| 20 |
+
from .compat import StringIO, string_types, text_type
|
| 21 |
+
from .markers import interpret
|
| 22 |
+
from .util import extract_by_key, get_extras
|
| 23 |
+
from .version import get_scheme, PEP440_VERSION_RE
|
| 24 |
+
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class MetadataMissingError(DistlibException):
|
| 29 |
+
"""A required metadata is missing"""
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class MetadataConflictError(DistlibException):
|
| 33 |
+
"""Attempt to read or write metadata fields that are conflictual."""
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class MetadataUnrecognizedVersionError(DistlibException):
|
| 37 |
+
"""Unknown metadata version number."""
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class MetadataInvalidError(DistlibException):
|
| 41 |
+
"""A metadata value is invalid"""
|
| 42 |
+
|
| 43 |
+
# public API of this module
|
| 44 |
+
__all__ = ['Metadata', 'PKG_INFO_ENCODING', 'PKG_INFO_PREFERRED_VERSION']
|
| 45 |
+
|
| 46 |
+
# Encoding used for the PKG-INFO files
|
| 47 |
+
PKG_INFO_ENCODING = 'utf-8'
|
| 48 |
+
|
| 49 |
+
# preferred version. Hopefully will be changed
|
| 50 |
+
# to 1.2 once PEP 345 is supported everywhere
|
| 51 |
+
PKG_INFO_PREFERRED_VERSION = '1.1'
|
| 52 |
+
|
| 53 |
+
_LINE_PREFIX_1_2 = re.compile('\n \\|')
|
| 54 |
+
_LINE_PREFIX_PRE_1_2 = re.compile('\n ')
|
| 55 |
+
_241_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
|
| 56 |
+
'Summary', 'Description',
|
| 57 |
+
'Keywords', 'Home-page', 'Author', 'Author-email',
|
| 58 |
+
'License')
|
| 59 |
+
|
| 60 |
+
_314_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
|
| 61 |
+
'Supported-Platform', 'Summary', 'Description',
|
| 62 |
+
'Keywords', 'Home-page', 'Author', 'Author-email',
|
| 63 |
+
'License', 'Classifier', 'Download-URL', 'Obsoletes',
|
| 64 |
+
'Provides', 'Requires')
|
| 65 |
+
|
| 66 |
+
_314_MARKERS = ('Obsoletes', 'Provides', 'Requires', 'Classifier',
|
| 67 |
+
'Download-URL')
|
| 68 |
+
|
| 69 |
+
_345_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
|
| 70 |
+
'Supported-Platform', 'Summary', 'Description',
|
| 71 |
+
'Keywords', 'Home-page', 'Author', 'Author-email',
|
| 72 |
+
'Maintainer', 'Maintainer-email', 'License',
|
| 73 |
+
'Classifier', 'Download-URL', 'Obsoletes-Dist',
|
| 74 |
+
'Project-URL', 'Provides-Dist', 'Requires-Dist',
|
| 75 |
+
'Requires-Python', 'Requires-External')
|
| 76 |
+
|
| 77 |
+
_345_MARKERS = ('Provides-Dist', 'Requires-Dist', 'Requires-Python',
|
| 78 |
+
'Obsoletes-Dist', 'Requires-External', 'Maintainer',
|
| 79 |
+
'Maintainer-email', 'Project-URL')
|
| 80 |
+
|
| 81 |
+
_426_FIELDS = ('Metadata-Version', 'Name', 'Version', 'Platform',
|
| 82 |
+
'Supported-Platform', 'Summary', 'Description',
|
| 83 |
+
'Keywords', 'Home-page', 'Author', 'Author-email',
|
| 84 |
+
'Maintainer', 'Maintainer-email', 'License',
|
| 85 |
+
'Classifier', 'Download-URL', 'Obsoletes-Dist',
|
| 86 |
+
'Project-URL', 'Provides-Dist', 'Requires-Dist',
|
| 87 |
+
'Requires-Python', 'Requires-External', 'Private-Version',
|
| 88 |
+
'Obsoleted-By', 'Setup-Requires-Dist', 'Extension',
|
| 89 |
+
'Provides-Extra')
|
| 90 |
+
|
| 91 |
+
_426_MARKERS = ('Private-Version', 'Provides-Extra', 'Obsoleted-By',
|
| 92 |
+
'Setup-Requires-Dist', 'Extension')
|
| 93 |
+
|
| 94 |
+
# See issue #106: Sometimes 'Requires' and 'Provides' occur wrongly in
|
| 95 |
+
# the metadata. Include them in the tuple literal below to allow them
|
| 96 |
+
# (for now).
|
| 97 |
+
# Ditto for Obsoletes - see issue #140.
|
| 98 |
+
_566_FIELDS = _426_FIELDS + ('Description-Content-Type',
|
| 99 |
+
'Requires', 'Provides', 'Obsoletes')
|
| 100 |
+
|
| 101 |
+
_566_MARKERS = ('Description-Content-Type',)
|
| 102 |
+
|
| 103 |
+
_643_MARKERS = ('Dynamic', 'License-File')
|
| 104 |
+
|
| 105 |
+
_643_FIELDS = _566_FIELDS + _643_MARKERS
|
| 106 |
+
|
| 107 |
+
_ALL_FIELDS = set()
|
| 108 |
+
_ALL_FIELDS.update(_241_FIELDS)
|
| 109 |
+
_ALL_FIELDS.update(_314_FIELDS)
|
| 110 |
+
_ALL_FIELDS.update(_345_FIELDS)
|
| 111 |
+
_ALL_FIELDS.update(_426_FIELDS)
|
| 112 |
+
_ALL_FIELDS.update(_566_FIELDS)
|
| 113 |
+
_ALL_FIELDS.update(_643_FIELDS)
|
| 114 |
+
|
| 115 |
+
EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''')
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def _version2fieldlist(version):
|
| 119 |
+
if version == '1.0':
|
| 120 |
+
return _241_FIELDS
|
| 121 |
+
elif version == '1.1':
|
| 122 |
+
return _314_FIELDS
|
| 123 |
+
elif version == '1.2':
|
| 124 |
+
return _345_FIELDS
|
| 125 |
+
elif version in ('1.3', '2.1'):
|
| 126 |
+
# avoid adding field names if already there
|
| 127 |
+
return _345_FIELDS + tuple(f for f in _566_FIELDS if f not in _345_FIELDS)
|
| 128 |
+
elif version == '2.0':
|
| 129 |
+
raise ValueError('Metadata 2.0 is withdrawn and not supported')
|
| 130 |
+
# return _426_FIELDS
|
| 131 |
+
elif version == '2.2':
|
| 132 |
+
return _643_FIELDS
|
| 133 |
+
raise MetadataUnrecognizedVersionError(version)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _best_version(fields):
|
| 137 |
+
"""Detect the best version depending on the fields used."""
|
| 138 |
+
def _has_marker(keys, markers):
|
| 139 |
+
return any(marker in keys for marker in markers)
|
| 140 |
+
|
| 141 |
+
keys = [key for key, value in fields.items() if value not in ([], 'UNKNOWN', None)]
|
| 142 |
+
possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.1', '2.2'] # 2.0 removed
|
| 143 |
+
|
| 144 |
+
# first let's try to see if a field is not part of one of the version
|
| 145 |
+
for key in keys:
|
| 146 |
+
if key not in _241_FIELDS and '1.0' in possible_versions:
|
| 147 |
+
possible_versions.remove('1.0')
|
| 148 |
+
logger.debug('Removed 1.0 due to %s', key)
|
| 149 |
+
if key not in _314_FIELDS and '1.1' in possible_versions:
|
| 150 |
+
possible_versions.remove('1.1')
|
| 151 |
+
logger.debug('Removed 1.1 due to %s', key)
|
| 152 |
+
if key not in _345_FIELDS and '1.2' in possible_versions:
|
| 153 |
+
possible_versions.remove('1.2')
|
| 154 |
+
logger.debug('Removed 1.2 due to %s', key)
|
| 155 |
+
if key not in _566_FIELDS and '1.3' in possible_versions:
|
| 156 |
+
possible_versions.remove('1.3')
|
| 157 |
+
logger.debug('Removed 1.3 due to %s', key)
|
| 158 |
+
if key not in _566_FIELDS and '2.1' in possible_versions:
|
| 159 |
+
if key != 'Description': # In 2.1, description allowed after headers
|
| 160 |
+
possible_versions.remove('2.1')
|
| 161 |
+
logger.debug('Removed 2.1 due to %s', key)
|
| 162 |
+
if key not in _643_FIELDS and '2.2' in possible_versions:
|
| 163 |
+
possible_versions.remove('2.2')
|
| 164 |
+
logger.debug('Removed 2.2 due to %s', key)
|
| 165 |
+
# if key not in _426_FIELDS and '2.0' in possible_versions:
|
| 166 |
+
# possible_versions.remove('2.0')
|
| 167 |
+
# logger.debug('Removed 2.0 due to %s', key)
|
| 168 |
+
|
| 169 |
+
# possible_version contains qualified versions
|
| 170 |
+
if len(possible_versions) == 1:
|
| 171 |
+
return possible_versions[0] # found !
|
| 172 |
+
elif len(possible_versions) == 0:
|
| 173 |
+
logger.debug('Out of options - unknown metadata set: %s', fields)
|
| 174 |
+
raise MetadataConflictError('Unknown metadata set')
|
| 175 |
+
|
| 176 |
+
# let's see if one unique marker is found
|
| 177 |
+
is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS)
|
| 178 |
+
is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS)
|
| 179 |
+
is_2_1 = '2.1' in possible_versions and _has_marker(keys, _566_MARKERS)
|
| 180 |
+
# is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS)
|
| 181 |
+
is_2_2 = '2.2' in possible_versions and _has_marker(keys, _643_MARKERS)
|
| 182 |
+
if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_2) > 1:
|
| 183 |
+
raise MetadataConflictError('You used incompatible 1.1/1.2/2.1/2.2 fields')
|
| 184 |
+
|
| 185 |
+
# we have the choice, 1.0, or 1.2, 2.1 or 2.2
|
| 186 |
+
# - 1.0 has a broken Summary field but works with all tools
|
| 187 |
+
# - 1.1 is to avoid
|
| 188 |
+
# - 1.2 fixes Summary but has little adoption
|
| 189 |
+
# - 2.1 adds more features
|
| 190 |
+
# - 2.2 is the latest
|
| 191 |
+
if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_2:
|
| 192 |
+
# we couldn't find any specific marker
|
| 193 |
+
if PKG_INFO_PREFERRED_VERSION in possible_versions:
|
| 194 |
+
return PKG_INFO_PREFERRED_VERSION
|
| 195 |
+
if is_1_1:
|
| 196 |
+
return '1.1'
|
| 197 |
+
if is_1_2:
|
| 198 |
+
return '1.2'
|
| 199 |
+
if is_2_1:
|
| 200 |
+
return '2.1'
|
| 201 |
+
# if is_2_2:
|
| 202 |
+
# return '2.2'
|
| 203 |
+
|
| 204 |
+
return '2.2'
|
| 205 |
+
|
| 206 |
+
# This follows the rules about transforming keys as described in
|
| 207 |
+
# https://www.python.org/dev/peps/pep-0566/#id17
|
| 208 |
+
_ATTR2FIELD = {
|
| 209 |
+
name.lower().replace("-", "_"): name for name in _ALL_FIELDS
|
| 210 |
+
}
|
| 211 |
+
_FIELD2ATTR = {field: attr for attr, field in _ATTR2FIELD.items()}
|
| 212 |
+
|
| 213 |
+
_PREDICATE_FIELDS = ('Requires-Dist', 'Obsoletes-Dist', 'Provides-Dist')
|
| 214 |
+
_VERSIONS_FIELDS = ('Requires-Python',)
|
| 215 |
+
_VERSION_FIELDS = ('Version',)
|
| 216 |
+
_LISTFIELDS = ('Platform', 'Classifier', 'Obsoletes',
|
| 217 |
+
'Requires', 'Provides', 'Obsoletes-Dist',
|
| 218 |
+
'Provides-Dist', 'Requires-Dist', 'Requires-External',
|
| 219 |
+
'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist',
|
| 220 |
+
'Provides-Extra', 'Extension', 'License-File')
|
| 221 |
+
_LISTTUPLEFIELDS = ('Project-URL',)
|
| 222 |
+
|
| 223 |
+
_ELEMENTSFIELD = ('Keywords',)
|
| 224 |
+
|
| 225 |
+
_UNICODEFIELDS = ('Author', 'Maintainer', 'Summary', 'Description')
|
| 226 |
+
|
| 227 |
+
_MISSING = object()
|
| 228 |
+
|
| 229 |
+
_FILESAFE = re.compile('[^A-Za-z0-9.]+')
|
| 230 |
+
|
| 231 |
+
|
| 232 |
+
def _get_name_and_version(name, version, for_filename=False):
|
| 233 |
+
"""Return the distribution name with version.
|
| 234 |
+
|
| 235 |
+
If for_filename is true, return a filename-escaped form."""
|
| 236 |
+
if for_filename:
|
| 237 |
+
# For both name and version any runs of non-alphanumeric or '.'
|
| 238 |
+
# characters are replaced with a single '-'. Additionally any
|
| 239 |
+
# spaces in the version string become '.'
|
| 240 |
+
name = _FILESAFE.sub('-', name)
|
| 241 |
+
version = _FILESAFE.sub('-', version.replace(' ', '.'))
|
| 242 |
+
return '%s-%s' % (name, version)
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
class LegacyMetadata(object):
|
| 246 |
+
"""The legacy metadata of a release.
|
| 247 |
+
|
| 248 |
+
Supports versions 1.0, 1.1, 1.2, 2.0 and 1.3/2.1 (auto-detected). You can
|
| 249 |
+
instantiate the class with one of these arguments (or none):
|
| 250 |
+
- *path*, the path to a metadata file
|
| 251 |
+
- *fileobj* give a file-like object with metadata as content
|
| 252 |
+
- *mapping* is a dict-like object
|
| 253 |
+
- *scheme* is a version scheme name
|
| 254 |
+
"""
|
| 255 |
+
# TODO document the mapping API and UNKNOWN default key
|
| 256 |
+
|
| 257 |
+
def __init__(self, path=None, fileobj=None, mapping=None,
|
| 258 |
+
scheme='default'):
|
| 259 |
+
if [path, fileobj, mapping].count(None) < 2:
|
| 260 |
+
raise TypeError('path, fileobj and mapping are exclusive')
|
| 261 |
+
self._fields = {}
|
| 262 |
+
self.requires_files = []
|
| 263 |
+
self._dependencies = None
|
| 264 |
+
self.scheme = scheme
|
| 265 |
+
if path is not None:
|
| 266 |
+
self.read(path)
|
| 267 |
+
elif fileobj is not None:
|
| 268 |
+
self.read_file(fileobj)
|
| 269 |
+
elif mapping is not None:
|
| 270 |
+
self.update(mapping)
|
| 271 |
+
self.set_metadata_version()
|
| 272 |
+
|
| 273 |
+
def set_metadata_version(self):
|
| 274 |
+
self._fields['Metadata-Version'] = _best_version(self._fields)
|
| 275 |
+
|
| 276 |
+
def _write_field(self, fileobj, name, value):
|
| 277 |
+
fileobj.write('%s: %s\n' % (name, value))
|
| 278 |
+
|
| 279 |
+
def __getitem__(self, name):
|
| 280 |
+
return self.get(name)
|
| 281 |
+
|
| 282 |
+
def __setitem__(self, name, value):
|
| 283 |
+
return self.set(name, value)
|
| 284 |
+
|
| 285 |
+
def __delitem__(self, name):
|
| 286 |
+
field_name = self._convert_name(name)
|
| 287 |
+
try:
|
| 288 |
+
del self._fields[field_name]
|
| 289 |
+
except KeyError:
|
| 290 |
+
raise KeyError(name)
|
| 291 |
+
|
| 292 |
+
def __contains__(self, name):
|
| 293 |
+
return (name in self._fields or
|
| 294 |
+
self._convert_name(name) in self._fields)
|
| 295 |
+
|
| 296 |
+
def _convert_name(self, name):
|
| 297 |
+
if name in _ALL_FIELDS:
|
| 298 |
+
return name
|
| 299 |
+
name = name.replace('-', '_').lower()
|
| 300 |
+
return _ATTR2FIELD.get(name, name)
|
| 301 |
+
|
| 302 |
+
def _default_value(self, name):
|
| 303 |
+
if name in _LISTFIELDS or name in _ELEMENTSFIELD:
|
| 304 |
+
return []
|
| 305 |
+
return 'UNKNOWN'
|
| 306 |
+
|
| 307 |
+
def _remove_line_prefix(self, value):
|
| 308 |
+
if self.metadata_version in ('1.0', '1.1'):
|
| 309 |
+
return _LINE_PREFIX_PRE_1_2.sub('\n', value)
|
| 310 |
+
else:
|
| 311 |
+
return _LINE_PREFIX_1_2.sub('\n', value)
|
| 312 |
+
|
| 313 |
+
def __getattr__(self, name):
|
| 314 |
+
if name in _ATTR2FIELD:
|
| 315 |
+
return self[name]
|
| 316 |
+
raise AttributeError(name)
|
| 317 |
+
|
| 318 |
+
#
|
| 319 |
+
# Public API
|
| 320 |
+
#
|
| 321 |
+
|
| 322 |
+
# dependencies = property(_get_dependencies, _set_dependencies)
|
| 323 |
+
|
| 324 |
+
def get_fullname(self, filesafe=False):
|
| 325 |
+
"""Return the distribution name with version.
|
| 326 |
+
|
| 327 |
+
If filesafe is true, return a filename-escaped form."""
|
| 328 |
+
return _get_name_and_version(self['Name'], self['Version'], filesafe)
|
| 329 |
+
|
| 330 |
+
def is_field(self, name):
|
| 331 |
+
"""return True if name is a valid metadata key"""
|
| 332 |
+
name = self._convert_name(name)
|
| 333 |
+
return name in _ALL_FIELDS
|
| 334 |
+
|
| 335 |
+
def is_multi_field(self, name):
|
| 336 |
+
name = self._convert_name(name)
|
| 337 |
+
return name in _LISTFIELDS
|
| 338 |
+
|
| 339 |
+
def read(self, filepath):
|
| 340 |
+
"""Read the metadata values from a file path."""
|
| 341 |
+
fp = codecs.open(filepath, 'r', encoding='utf-8')
|
| 342 |
+
try:
|
| 343 |
+
self.read_file(fp)
|
| 344 |
+
finally:
|
| 345 |
+
fp.close()
|
| 346 |
+
|
| 347 |
+
def read_file(self, fileob):
|
| 348 |
+
"""Read the metadata values from a file object."""
|
| 349 |
+
msg = message_from_file(fileob)
|
| 350 |
+
self._fields['Metadata-Version'] = msg['metadata-version']
|
| 351 |
+
|
| 352 |
+
# When reading, get all the fields we can
|
| 353 |
+
for field in _ALL_FIELDS:
|
| 354 |
+
if field not in msg:
|
| 355 |
+
continue
|
| 356 |
+
if field in _LISTFIELDS:
|
| 357 |
+
# we can have multiple lines
|
| 358 |
+
values = msg.get_all(field)
|
| 359 |
+
if field in _LISTTUPLEFIELDS and values is not None:
|
| 360 |
+
values = [tuple(value.split(',')) for value in values]
|
| 361 |
+
self.set(field, values)
|
| 362 |
+
else:
|
| 363 |
+
# single line
|
| 364 |
+
value = msg[field]
|
| 365 |
+
if value is not None and value != 'UNKNOWN':
|
| 366 |
+
self.set(field, value)
|
| 367 |
+
|
| 368 |
+
# PEP 566 specifies that the body be used for the description, if
|
| 369 |
+
# available
|
| 370 |
+
body = msg.get_payload()
|
| 371 |
+
self["Description"] = body if body else self["Description"]
|
| 372 |
+
# logger.debug('Attempting to set metadata for %s', self)
|
| 373 |
+
# self.set_metadata_version()
|
| 374 |
+
|
| 375 |
+
def write(self, filepath, skip_unknown=False):
|
| 376 |
+
"""Write the metadata fields to filepath."""
|
| 377 |
+
fp = codecs.open(filepath, 'w', encoding='utf-8')
|
| 378 |
+
try:
|
| 379 |
+
self.write_file(fp, skip_unknown)
|
| 380 |
+
finally:
|
| 381 |
+
fp.close()
|
| 382 |
+
|
| 383 |
+
def write_file(self, fileobject, skip_unknown=False):
|
| 384 |
+
"""Write the PKG-INFO format data to a file object."""
|
| 385 |
+
self.set_metadata_version()
|
| 386 |
+
|
| 387 |
+
for field in _version2fieldlist(self['Metadata-Version']):
|
| 388 |
+
values = self.get(field)
|
| 389 |
+
if skip_unknown and values in ('UNKNOWN', [], ['UNKNOWN']):
|
| 390 |
+
continue
|
| 391 |
+
if field in _ELEMENTSFIELD:
|
| 392 |
+
self._write_field(fileobject, field, ','.join(values))
|
| 393 |
+
continue
|
| 394 |
+
if field not in _LISTFIELDS:
|
| 395 |
+
if field == 'Description':
|
| 396 |
+
if self.metadata_version in ('1.0', '1.1'):
|
| 397 |
+
values = values.replace('\n', '\n ')
|
| 398 |
+
else:
|
| 399 |
+
values = values.replace('\n', '\n |')
|
| 400 |
+
values = [values]
|
| 401 |
+
|
| 402 |
+
if field in _LISTTUPLEFIELDS:
|
| 403 |
+
values = [','.join(value) for value in values]
|
| 404 |
+
|
| 405 |
+
for value in values:
|
| 406 |
+
self._write_field(fileobject, field, value)
|
| 407 |
+
|
| 408 |
+
def update(self, other=None, **kwargs):
|
| 409 |
+
"""Set metadata values from the given iterable `other` and kwargs.
|
| 410 |
+
|
| 411 |
+
Behavior is like `dict.update`: If `other` has a ``keys`` method,
|
| 412 |
+
they are looped over and ``self[key]`` is assigned ``other[key]``.
|
| 413 |
+
Else, ``other`` is an iterable of ``(key, value)`` iterables.
|
| 414 |
+
|
| 415 |
+
Keys that don't match a metadata field or that have an empty value are
|
| 416 |
+
dropped.
|
| 417 |
+
"""
|
| 418 |
+
def _set(key, value):
|
| 419 |
+
if key in _ATTR2FIELD and value:
|
| 420 |
+
self.set(self._convert_name(key), value)
|
| 421 |
+
|
| 422 |
+
if not other:
|
| 423 |
+
# other is None or empty container
|
| 424 |
+
pass
|
| 425 |
+
elif hasattr(other, 'keys'):
|
| 426 |
+
for k in other.keys():
|
| 427 |
+
_set(k, other[k])
|
| 428 |
+
else:
|
| 429 |
+
for k, v in other:
|
| 430 |
+
_set(k, v)
|
| 431 |
+
|
| 432 |
+
if kwargs:
|
| 433 |
+
for k, v in kwargs.items():
|
| 434 |
+
_set(k, v)
|
| 435 |
+
|
| 436 |
+
def set(self, name, value):
|
| 437 |
+
"""Control then set a metadata field."""
|
| 438 |
+
name = self._convert_name(name)
|
| 439 |
+
|
| 440 |
+
if ((name in _ELEMENTSFIELD or name == 'Platform') and
|
| 441 |
+
not isinstance(value, (list, tuple))):
|
| 442 |
+
if isinstance(value, string_types):
|
| 443 |
+
value = [v.strip() for v in value.split(',')]
|
| 444 |
+
else:
|
| 445 |
+
value = []
|
| 446 |
+
elif (name in _LISTFIELDS and
|
| 447 |
+
not isinstance(value, (list, tuple))):
|
| 448 |
+
if isinstance(value, string_types):
|
| 449 |
+
value = [value]
|
| 450 |
+
else:
|
| 451 |
+
value = []
|
| 452 |
+
|
| 453 |
+
if logger.isEnabledFor(logging.WARNING):
|
| 454 |
+
project_name = self['Name']
|
| 455 |
+
|
| 456 |
+
scheme = get_scheme(self.scheme)
|
| 457 |
+
if name in _PREDICATE_FIELDS and value is not None:
|
| 458 |
+
for v in value:
|
| 459 |
+
# check that the values are valid
|
| 460 |
+
if not scheme.is_valid_matcher(v.split(';')[0]):
|
| 461 |
+
logger.warning(
|
| 462 |
+
"'%s': '%s' is not valid (field '%s')",
|
| 463 |
+
project_name, v, name)
|
| 464 |
+
# FIXME this rejects UNKNOWN, is that right?
|
| 465 |
+
elif name in _VERSIONS_FIELDS and value is not None:
|
| 466 |
+
if not scheme.is_valid_constraint_list(value):
|
| 467 |
+
logger.warning("'%s': '%s' is not a valid version (field '%s')",
|
| 468 |
+
project_name, value, name)
|
| 469 |
+
elif name in _VERSION_FIELDS and value is not None:
|
| 470 |
+
if not scheme.is_valid_version(value):
|
| 471 |
+
logger.warning("'%s': '%s' is not a valid version (field '%s')",
|
| 472 |
+
project_name, value, name)
|
| 473 |
+
|
| 474 |
+
if name in _UNICODEFIELDS:
|
| 475 |
+
if name == 'Description':
|
| 476 |
+
value = self._remove_line_prefix(value)
|
| 477 |
+
|
| 478 |
+
self._fields[name] = value
|
| 479 |
+
|
| 480 |
+
def get(self, name, default=_MISSING):
|
| 481 |
+
"""Get a metadata field."""
|
| 482 |
+
name = self._convert_name(name)
|
| 483 |
+
if name not in self._fields:
|
| 484 |
+
if default is _MISSING:
|
| 485 |
+
default = self._default_value(name)
|
| 486 |
+
return default
|
| 487 |
+
if name in _UNICODEFIELDS:
|
| 488 |
+
value = self._fields[name]
|
| 489 |
+
return value
|
| 490 |
+
elif name in _LISTFIELDS:
|
| 491 |
+
value = self._fields[name]
|
| 492 |
+
if value is None:
|
| 493 |
+
return []
|
| 494 |
+
res = []
|
| 495 |
+
for val in value:
|
| 496 |
+
if name not in _LISTTUPLEFIELDS:
|
| 497 |
+
res.append(val)
|
| 498 |
+
else:
|
| 499 |
+
# That's for Project-URL
|
| 500 |
+
res.append((val[0], val[1]))
|
| 501 |
+
return res
|
| 502 |
+
|
| 503 |
+
elif name in _ELEMENTSFIELD:
|
| 504 |
+
value = self._fields[name]
|
| 505 |
+
if isinstance(value, string_types):
|
| 506 |
+
return value.split(',')
|
| 507 |
+
return self._fields[name]
|
| 508 |
+
|
| 509 |
+
def check(self, strict=False):
|
| 510 |
+
"""Check if the metadata is compliant. If strict is True then raise if
|
| 511 |
+
no Name or Version are provided"""
|
| 512 |
+
self.set_metadata_version()
|
| 513 |
+
|
| 514 |
+
# XXX should check the versions (if the file was loaded)
|
| 515 |
+
missing, warnings = [], []
|
| 516 |
+
|
| 517 |
+
for attr in ('Name', 'Version'): # required by PEP 345
|
| 518 |
+
if attr not in self:
|
| 519 |
+
missing.append(attr)
|
| 520 |
+
|
| 521 |
+
if strict and missing != []:
|
| 522 |
+
msg = 'missing required metadata: %s' % ', '.join(missing)
|
| 523 |
+
raise MetadataMissingError(msg)
|
| 524 |
+
|
| 525 |
+
for attr in ('Home-page', 'Author'):
|
| 526 |
+
if attr not in self:
|
| 527 |
+
missing.append(attr)
|
| 528 |
+
|
| 529 |
+
# checking metadata 1.2 (XXX needs to check 1.1, 1.0)
|
| 530 |
+
if self['Metadata-Version'] != '1.2':
|
| 531 |
+
return missing, warnings
|
| 532 |
+
|
| 533 |
+
scheme = get_scheme(self.scheme)
|
| 534 |
+
|
| 535 |
+
def are_valid_constraints(value):
|
| 536 |
+
for v in value:
|
| 537 |
+
if not scheme.is_valid_matcher(v.split(';')[0]):
|
| 538 |
+
return False
|
| 539 |
+
return True
|
| 540 |
+
|
| 541 |
+
for fields, controller in ((_PREDICATE_FIELDS, are_valid_constraints),
|
| 542 |
+
(_VERSIONS_FIELDS,
|
| 543 |
+
scheme.is_valid_constraint_list),
|
| 544 |
+
(_VERSION_FIELDS,
|
| 545 |
+
scheme.is_valid_version)):
|
| 546 |
+
for field in fields:
|
| 547 |
+
value = self.get(field, None)
|
| 548 |
+
if value is not None and not controller(value):
|
| 549 |
+
warnings.append("Wrong value for '%s': %s" % (field, value))
|
| 550 |
+
|
| 551 |
+
return missing, warnings
|
| 552 |
+
|
| 553 |
+
def todict(self, skip_missing=False):
|
| 554 |
+
"""Return fields as a dict.
|
| 555 |
+
|
| 556 |
+
Field names will be converted to use the underscore-lowercase style
|
| 557 |
+
instead of hyphen-mixed case (i.e. home_page instead of Home-page).
|
| 558 |
+
This is as per https://www.python.org/dev/peps/pep-0566/#id17.
|
| 559 |
+
"""
|
| 560 |
+
self.set_metadata_version()
|
| 561 |
+
|
| 562 |
+
fields = _version2fieldlist(self['Metadata-Version'])
|
| 563 |
+
|
| 564 |
+
data = {}
|
| 565 |
+
|
| 566 |
+
for field_name in fields:
|
| 567 |
+
if not skip_missing or field_name in self._fields:
|
| 568 |
+
key = _FIELD2ATTR[field_name]
|
| 569 |
+
if key != 'project_url':
|
| 570 |
+
data[key] = self[field_name]
|
| 571 |
+
else:
|
| 572 |
+
data[key] = [','.join(u) for u in self[field_name]]
|
| 573 |
+
|
| 574 |
+
return data
|
| 575 |
+
|
| 576 |
+
def add_requirements(self, requirements):
|
| 577 |
+
if self['Metadata-Version'] == '1.1':
|
| 578 |
+
# we can't have 1.1 metadata *and* Setuptools requires
|
| 579 |
+
for field in ('Obsoletes', 'Requires', 'Provides'):
|
| 580 |
+
if field in self:
|
| 581 |
+
del self[field]
|
| 582 |
+
self['Requires-Dist'] += requirements
|
| 583 |
+
|
| 584 |
+
# Mapping API
|
| 585 |
+
# TODO could add iter* variants
|
| 586 |
+
|
| 587 |
+
def keys(self):
|
| 588 |
+
return list(_version2fieldlist(self['Metadata-Version']))
|
| 589 |
+
|
| 590 |
+
def __iter__(self):
|
| 591 |
+
for key in self.keys():
|
| 592 |
+
yield key
|
| 593 |
+
|
| 594 |
+
def values(self):
|
| 595 |
+
return [self[key] for key in self.keys()]
|
| 596 |
+
|
| 597 |
+
def items(self):
|
| 598 |
+
return [(key, self[key]) for key in self.keys()]
|
| 599 |
+
|
| 600 |
+
def __repr__(self):
|
| 601 |
+
return '<%s %s %s>' % (self.__class__.__name__, self.name,
|
| 602 |
+
self.version)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
METADATA_FILENAME = 'pydist.json'
|
| 606 |
+
WHEEL_METADATA_FILENAME = 'metadata.json'
|
| 607 |
+
LEGACY_METADATA_FILENAME = 'METADATA'
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
class Metadata(object):
|
| 611 |
+
"""
|
| 612 |
+
The metadata of a release. This implementation uses 2.1
|
| 613 |
+
metadata where possible. If not possible, it wraps a LegacyMetadata
|
| 614 |
+
instance which handles the key-value metadata format.
|
| 615 |
+
"""
|
| 616 |
+
|
| 617 |
+
METADATA_VERSION_MATCHER = re.compile(r'^\d+(\.\d+)*$')
|
| 618 |
+
|
| 619 |
+
NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I)
|
| 620 |
+
|
| 621 |
+
FIELDNAME_MATCHER = re.compile('^[A-Z]([0-9A-Z-]*[0-9A-Z])?$', re.I)
|
| 622 |
+
|
| 623 |
+
VERSION_MATCHER = PEP440_VERSION_RE
|
| 624 |
+
|
| 625 |
+
SUMMARY_MATCHER = re.compile('.{1,2047}')
|
| 626 |
+
|
| 627 |
+
METADATA_VERSION = '2.0'
|
| 628 |
+
|
| 629 |
+
GENERATOR = 'distlib (%s)' % __version__
|
| 630 |
+
|
| 631 |
+
MANDATORY_KEYS = {
|
| 632 |
+
'name': (),
|
| 633 |
+
'version': (),
|
| 634 |
+
'summary': ('legacy',),
|
| 635 |
+
}
|
| 636 |
+
|
| 637 |
+
INDEX_KEYS = ('name version license summary description author '
|
| 638 |
+
'author_email keywords platform home_page classifiers '
|
| 639 |
+
'download_url')
|
| 640 |
+
|
| 641 |
+
DEPENDENCY_KEYS = ('extras run_requires test_requires build_requires '
|
| 642 |
+
'dev_requires provides meta_requires obsoleted_by '
|
| 643 |
+
'supports_environments')
|
| 644 |
+
|
| 645 |
+
SYNTAX_VALIDATORS = {
|
| 646 |
+
'metadata_version': (METADATA_VERSION_MATCHER, ()),
|
| 647 |
+
'name': (NAME_MATCHER, ('legacy',)),
|
| 648 |
+
'version': (VERSION_MATCHER, ('legacy',)),
|
| 649 |
+
'summary': (SUMMARY_MATCHER, ('legacy',)),
|
| 650 |
+
'dynamic': (FIELDNAME_MATCHER, ('legacy',)),
|
| 651 |
+
}
|
| 652 |
+
|
| 653 |
+
__slots__ = ('_legacy', '_data', 'scheme')
|
| 654 |
+
|
| 655 |
+
def __init__(self, path=None, fileobj=None, mapping=None,
|
| 656 |
+
scheme='default'):
|
| 657 |
+
if [path, fileobj, mapping].count(None) < 2:
|
| 658 |
+
raise TypeError('path, fileobj and mapping are exclusive')
|
| 659 |
+
self._legacy = None
|
| 660 |
+
self._data = None
|
| 661 |
+
self.scheme = scheme
|
| 662 |
+
#import pdb; pdb.set_trace()
|
| 663 |
+
if mapping is not None:
|
| 664 |
+
try:
|
| 665 |
+
self._validate_mapping(mapping, scheme)
|
| 666 |
+
self._data = mapping
|
| 667 |
+
except MetadataUnrecognizedVersionError:
|
| 668 |
+
self._legacy = LegacyMetadata(mapping=mapping, scheme=scheme)
|
| 669 |
+
self.validate()
|
| 670 |
+
else:
|
| 671 |
+
data = None
|
| 672 |
+
if path:
|
| 673 |
+
with open(path, 'rb') as f:
|
| 674 |
+
data = f.read()
|
| 675 |
+
elif fileobj:
|
| 676 |
+
data = fileobj.read()
|
| 677 |
+
if data is None:
|
| 678 |
+
# Initialised with no args - to be added
|
| 679 |
+
self._data = {
|
| 680 |
+
'metadata_version': self.METADATA_VERSION,
|
| 681 |
+
'generator': self.GENERATOR,
|
| 682 |
+
}
|
| 683 |
+
else:
|
| 684 |
+
if not isinstance(data, text_type):
|
| 685 |
+
data = data.decode('utf-8')
|
| 686 |
+
try:
|
| 687 |
+
self._data = json.loads(data)
|
| 688 |
+
self._validate_mapping(self._data, scheme)
|
| 689 |
+
except ValueError:
|
| 690 |
+
# Note: MetadataUnrecognizedVersionError does not
|
| 691 |
+
# inherit from ValueError (it's a DistlibException,
|
| 692 |
+
# which should not inherit from ValueError).
|
| 693 |
+
# The ValueError comes from the json.load - if that
|
| 694 |
+
# succeeds and we get a validation error, we want
|
| 695 |
+
# that to propagate
|
| 696 |
+
self._legacy = LegacyMetadata(fileobj=StringIO(data),
|
| 697 |
+
scheme=scheme)
|
| 698 |
+
self.validate()
|
| 699 |
+
|
| 700 |
+
common_keys = set(('name', 'version', 'license', 'keywords', 'summary'))
|
| 701 |
+
|
| 702 |
+
none_list = (None, list)
|
| 703 |
+
none_dict = (None, dict)
|
| 704 |
+
|
| 705 |
+
mapped_keys = {
|
| 706 |
+
'run_requires': ('Requires-Dist', list),
|
| 707 |
+
'build_requires': ('Setup-Requires-Dist', list),
|
| 708 |
+
'dev_requires': none_list,
|
| 709 |
+
'test_requires': none_list,
|
| 710 |
+
'meta_requires': none_list,
|
| 711 |
+
'extras': ('Provides-Extra', list),
|
| 712 |
+
'modules': none_list,
|
| 713 |
+
'namespaces': none_list,
|
| 714 |
+
'exports': none_dict,
|
| 715 |
+
'commands': none_dict,
|
| 716 |
+
'classifiers': ('Classifier', list),
|
| 717 |
+
'source_url': ('Download-URL', None),
|
| 718 |
+
'metadata_version': ('Metadata-Version', None),
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
+
del none_list, none_dict
|
| 722 |
+
|
| 723 |
+
def __getattribute__(self, key):
|
| 724 |
+
common = object.__getattribute__(self, 'common_keys')
|
| 725 |
+
mapped = object.__getattribute__(self, 'mapped_keys')
|
| 726 |
+
if key in mapped:
|
| 727 |
+
lk, maker = mapped[key]
|
| 728 |
+
if self._legacy:
|
| 729 |
+
if lk is None:
|
| 730 |
+
result = None if maker is None else maker()
|
| 731 |
+
else:
|
| 732 |
+
result = self._legacy.get(lk)
|
| 733 |
+
else:
|
| 734 |
+
value = None if maker is None else maker()
|
| 735 |
+
if key not in ('commands', 'exports', 'modules', 'namespaces',
|
| 736 |
+
'classifiers'):
|
| 737 |
+
result = self._data.get(key, value)
|
| 738 |
+
else:
|
| 739 |
+
# special cases for PEP 459
|
| 740 |
+
sentinel = object()
|
| 741 |
+
result = sentinel
|
| 742 |
+
d = self._data.get('extensions')
|
| 743 |
+
if d:
|
| 744 |
+
if key == 'commands':
|
| 745 |
+
result = d.get('python.commands', value)
|
| 746 |
+
elif key == 'classifiers':
|
| 747 |
+
d = d.get('python.details')
|
| 748 |
+
if d:
|
| 749 |
+
result = d.get(key, value)
|
| 750 |
+
else:
|
| 751 |
+
d = d.get('python.exports')
|
| 752 |
+
if not d:
|
| 753 |
+
d = self._data.get('python.exports')
|
| 754 |
+
if d:
|
| 755 |
+
result = d.get(key, value)
|
| 756 |
+
if result is sentinel:
|
| 757 |
+
result = value
|
| 758 |
+
elif key not in common:
|
| 759 |
+
result = object.__getattribute__(self, key)
|
| 760 |
+
elif self._legacy:
|
| 761 |
+
result = self._legacy.get(key)
|
| 762 |
+
else:
|
| 763 |
+
result = self._data.get(key)
|
| 764 |
+
return result
|
| 765 |
+
|
| 766 |
+
def _validate_value(self, key, value, scheme=None):
|
| 767 |
+
if key in self.SYNTAX_VALIDATORS:
|
| 768 |
+
pattern, exclusions = self.SYNTAX_VALIDATORS[key]
|
| 769 |
+
if (scheme or self.scheme) not in exclusions:
|
| 770 |
+
m = pattern.match(value)
|
| 771 |
+
if not m:
|
| 772 |
+
raise MetadataInvalidError("'%s' is an invalid value for "
|
| 773 |
+
"the '%s' property" % (value,
|
| 774 |
+
key))
|
| 775 |
+
|
| 776 |
+
def __setattr__(self, key, value):
|
| 777 |
+
self._validate_value(key, value)
|
| 778 |
+
common = object.__getattribute__(self, 'common_keys')
|
| 779 |
+
mapped = object.__getattribute__(self, 'mapped_keys')
|
| 780 |
+
if key in mapped:
|
| 781 |
+
lk, _ = mapped[key]
|
| 782 |
+
if self._legacy:
|
| 783 |
+
if lk is None:
|
| 784 |
+
raise NotImplementedError
|
| 785 |
+
self._legacy[lk] = value
|
| 786 |
+
elif key not in ('commands', 'exports', 'modules', 'namespaces',
|
| 787 |
+
'classifiers'):
|
| 788 |
+
self._data[key] = value
|
| 789 |
+
else:
|
| 790 |
+
# special cases for PEP 459
|
| 791 |
+
d = self._data.setdefault('extensions', {})
|
| 792 |
+
if key == 'commands':
|
| 793 |
+
d['python.commands'] = value
|
| 794 |
+
elif key == 'classifiers':
|
| 795 |
+
d = d.setdefault('python.details', {})
|
| 796 |
+
d[key] = value
|
| 797 |
+
else:
|
| 798 |
+
d = d.setdefault('python.exports', {})
|
| 799 |
+
d[key] = value
|
| 800 |
+
elif key not in common:
|
| 801 |
+
object.__setattr__(self, key, value)
|
| 802 |
+
else:
|
| 803 |
+
if key == 'keywords':
|
| 804 |
+
if isinstance(value, string_types):
|
| 805 |
+
value = value.strip()
|
| 806 |
+
if value:
|
| 807 |
+
value = value.split()
|
| 808 |
+
else:
|
| 809 |
+
value = []
|
| 810 |
+
if self._legacy:
|
| 811 |
+
self._legacy[key] = value
|
| 812 |
+
else:
|
| 813 |
+
self._data[key] = value
|
| 814 |
+
|
| 815 |
+
@property
|
| 816 |
+
def name_and_version(self):
|
| 817 |
+
return _get_name_and_version(self.name, self.version, True)
|
| 818 |
+
|
| 819 |
+
@property
|
| 820 |
+
def provides(self):
|
| 821 |
+
if self._legacy:
|
| 822 |
+
result = self._legacy['Provides-Dist']
|
| 823 |
+
else:
|
| 824 |
+
result = self._data.setdefault('provides', [])
|
| 825 |
+
s = '%s (%s)' % (self.name, self.version)
|
| 826 |
+
if s not in result:
|
| 827 |
+
result.append(s)
|
| 828 |
+
return result
|
| 829 |
+
|
| 830 |
+
@provides.setter
|
| 831 |
+
def provides(self, value):
|
| 832 |
+
if self._legacy:
|
| 833 |
+
self._legacy['Provides-Dist'] = value
|
| 834 |
+
else:
|
| 835 |
+
self._data['provides'] = value
|
| 836 |
+
|
| 837 |
+
def get_requirements(self, reqts, extras=None, env=None):
|
| 838 |
+
"""
|
| 839 |
+
Base method to get dependencies, given a set of extras
|
| 840 |
+
to satisfy and an optional environment context.
|
| 841 |
+
:param reqts: A list of sometimes-wanted dependencies,
|
| 842 |
+
perhaps dependent on extras and environment.
|
| 843 |
+
:param extras: A list of optional components being requested.
|
| 844 |
+
:param env: An optional environment for marker evaluation.
|
| 845 |
+
"""
|
| 846 |
+
if self._legacy:
|
| 847 |
+
result = reqts
|
| 848 |
+
else:
|
| 849 |
+
result = []
|
| 850 |
+
extras = get_extras(extras or [], self.extras)
|
| 851 |
+
for d in reqts:
|
| 852 |
+
if 'extra' not in d and 'environment' not in d:
|
| 853 |
+
# unconditional
|
| 854 |
+
include = True
|
| 855 |
+
else:
|
| 856 |
+
if 'extra' not in d:
|
| 857 |
+
# Not extra-dependent - only environment-dependent
|
| 858 |
+
include = True
|
| 859 |
+
else:
|
| 860 |
+
include = d.get('extra') in extras
|
| 861 |
+
if include:
|
| 862 |
+
# Not excluded because of extras, check environment
|
| 863 |
+
marker = d.get('environment')
|
| 864 |
+
if marker:
|
| 865 |
+
include = interpret(marker, env)
|
| 866 |
+
if include:
|
| 867 |
+
result.extend(d['requires'])
|
| 868 |
+
for key in ('build', 'dev', 'test'):
|
| 869 |
+
e = ':%s:' % key
|
| 870 |
+
if e in extras:
|
| 871 |
+
extras.remove(e)
|
| 872 |
+
# A recursive call, but it should terminate since 'test'
|
| 873 |
+
# has been removed from the extras
|
| 874 |
+
reqts = self._data.get('%s_requires' % key, [])
|
| 875 |
+
result.extend(self.get_requirements(reqts, extras=extras,
|
| 876 |
+
env=env))
|
| 877 |
+
return result
|
| 878 |
+
|
| 879 |
+
@property
|
| 880 |
+
def dictionary(self):
|
| 881 |
+
if self._legacy:
|
| 882 |
+
return self._from_legacy()
|
| 883 |
+
return self._data
|
| 884 |
+
|
| 885 |
+
@property
|
| 886 |
+
def dependencies(self):
|
| 887 |
+
if self._legacy:
|
| 888 |
+
raise NotImplementedError
|
| 889 |
+
else:
|
| 890 |
+
return extract_by_key(self._data, self.DEPENDENCY_KEYS)
|
| 891 |
+
|
| 892 |
+
@dependencies.setter
|
| 893 |
+
def dependencies(self, value):
|
| 894 |
+
if self._legacy:
|
| 895 |
+
raise NotImplementedError
|
| 896 |
+
else:
|
| 897 |
+
self._data.update(value)
|
| 898 |
+
|
| 899 |
+
def _validate_mapping(self, mapping, scheme):
|
| 900 |
+
if mapping.get('metadata_version') != self.METADATA_VERSION:
|
| 901 |
+
raise MetadataUnrecognizedVersionError()
|
| 902 |
+
missing = []
|
| 903 |
+
for key, exclusions in self.MANDATORY_KEYS.items():
|
| 904 |
+
if key not in mapping:
|
| 905 |
+
if scheme not in exclusions:
|
| 906 |
+
missing.append(key)
|
| 907 |
+
if missing:
|
| 908 |
+
msg = 'Missing metadata items: %s' % ', '.join(missing)
|
| 909 |
+
raise MetadataMissingError(msg)
|
| 910 |
+
for k, v in mapping.items():
|
| 911 |
+
self._validate_value(k, v, scheme)
|
| 912 |
+
|
| 913 |
+
def validate(self):
|
| 914 |
+
if self._legacy:
|
| 915 |
+
missing, warnings = self._legacy.check(True)
|
| 916 |
+
if missing or warnings:
|
| 917 |
+
logger.warning('Metadata: missing: %s, warnings: %s',
|
| 918 |
+
missing, warnings)
|
| 919 |
+
else:
|
| 920 |
+
self._validate_mapping(self._data, self.scheme)
|
| 921 |
+
|
| 922 |
+
def todict(self):
|
| 923 |
+
if self._legacy:
|
| 924 |
+
return self._legacy.todict(True)
|
| 925 |
+
else:
|
| 926 |
+
result = extract_by_key(self._data, self.INDEX_KEYS)
|
| 927 |
+
return result
|
| 928 |
+
|
| 929 |
+
def _from_legacy(self):
|
| 930 |
+
assert self._legacy and not self._data
|
| 931 |
+
result = {
|
| 932 |
+
'metadata_version': self.METADATA_VERSION,
|
| 933 |
+
'generator': self.GENERATOR,
|
| 934 |
+
}
|
| 935 |
+
lmd = self._legacy.todict(True) # skip missing ones
|
| 936 |
+
for k in ('name', 'version', 'license', 'summary', 'description',
|
| 937 |
+
'classifier'):
|
| 938 |
+
if k in lmd:
|
| 939 |
+
if k == 'classifier':
|
| 940 |
+
nk = 'classifiers'
|
| 941 |
+
else:
|
| 942 |
+
nk = k
|
| 943 |
+
result[nk] = lmd[k]
|
| 944 |
+
kw = lmd.get('Keywords', [])
|
| 945 |
+
if kw == ['']:
|
| 946 |
+
kw = []
|
| 947 |
+
result['keywords'] = kw
|
| 948 |
+
keys = (('requires_dist', 'run_requires'),
|
| 949 |
+
('setup_requires_dist', 'build_requires'))
|
| 950 |
+
for ok, nk in keys:
|
| 951 |
+
if ok in lmd and lmd[ok]:
|
| 952 |
+
result[nk] = [{'requires': lmd[ok]}]
|
| 953 |
+
result['provides'] = self.provides
|
| 954 |
+
author = {}
|
| 955 |
+
maintainer = {}
|
| 956 |
+
return result
|
| 957 |
+
|
| 958 |
+
LEGACY_MAPPING = {
|
| 959 |
+
'name': 'Name',
|
| 960 |
+
'version': 'Version',
|
| 961 |
+
('extensions', 'python.details', 'license'): 'License',
|
| 962 |
+
'summary': 'Summary',
|
| 963 |
+
'description': 'Description',
|
| 964 |
+
('extensions', 'python.project', 'project_urls', 'Home'): 'Home-page',
|
| 965 |
+
('extensions', 'python.project', 'contacts', 0, 'name'): 'Author',
|
| 966 |
+
('extensions', 'python.project', 'contacts', 0, 'email'): 'Author-email',
|
| 967 |
+
'source_url': 'Download-URL',
|
| 968 |
+
('extensions', 'python.details', 'classifiers'): 'Classifier',
|
| 969 |
+
}
|
| 970 |
+
|
| 971 |
+
def _to_legacy(self):
|
| 972 |
+
def process_entries(entries):
|
| 973 |
+
reqts = set()
|
| 974 |
+
for e in entries:
|
| 975 |
+
extra = e.get('extra')
|
| 976 |
+
env = e.get('environment')
|
| 977 |
+
rlist = e['requires']
|
| 978 |
+
for r in rlist:
|
| 979 |
+
if not env and not extra:
|
| 980 |
+
reqts.add(r)
|
| 981 |
+
else:
|
| 982 |
+
marker = ''
|
| 983 |
+
if extra:
|
| 984 |
+
marker = 'extra == "%s"' % extra
|
| 985 |
+
if env:
|
| 986 |
+
if marker:
|
| 987 |
+
marker = '(%s) and %s' % (env, marker)
|
| 988 |
+
else:
|
| 989 |
+
marker = env
|
| 990 |
+
reqts.add(';'.join((r, marker)))
|
| 991 |
+
return reqts
|
| 992 |
+
|
| 993 |
+
assert self._data and not self._legacy
|
| 994 |
+
result = LegacyMetadata()
|
| 995 |
+
nmd = self._data
|
| 996 |
+
# import pdb; pdb.set_trace()
|
| 997 |
+
for nk, ok in self.LEGACY_MAPPING.items():
|
| 998 |
+
if not isinstance(nk, tuple):
|
| 999 |
+
if nk in nmd:
|
| 1000 |
+
result[ok] = nmd[nk]
|
| 1001 |
+
else:
|
| 1002 |
+
d = nmd
|
| 1003 |
+
found = True
|
| 1004 |
+
for k in nk:
|
| 1005 |
+
try:
|
| 1006 |
+
d = d[k]
|
| 1007 |
+
except (KeyError, IndexError):
|
| 1008 |
+
found = False
|
| 1009 |
+
break
|
| 1010 |
+
if found:
|
| 1011 |
+
result[ok] = d
|
| 1012 |
+
r1 = process_entries(self.run_requires + self.meta_requires)
|
| 1013 |
+
r2 = process_entries(self.build_requires + self.dev_requires)
|
| 1014 |
+
if self.extras:
|
| 1015 |
+
result['Provides-Extra'] = sorted(self.extras)
|
| 1016 |
+
result['Requires-Dist'] = sorted(r1)
|
| 1017 |
+
result['Setup-Requires-Dist'] = sorted(r2)
|
| 1018 |
+
# TODO: any other fields wanted
|
| 1019 |
+
return result
|
| 1020 |
+
|
| 1021 |
+
def write(self, path=None, fileobj=None, legacy=False, skip_unknown=True):
|
| 1022 |
+
if [path, fileobj].count(None) != 1:
|
| 1023 |
+
raise ValueError('Exactly one of path and fileobj is needed')
|
| 1024 |
+
self.validate()
|
| 1025 |
+
if legacy:
|
| 1026 |
+
if self._legacy:
|
| 1027 |
+
legacy_md = self._legacy
|
| 1028 |
+
else:
|
| 1029 |
+
legacy_md = self._to_legacy()
|
| 1030 |
+
if path:
|
| 1031 |
+
legacy_md.write(path, skip_unknown=skip_unknown)
|
| 1032 |
+
else:
|
| 1033 |
+
legacy_md.write_file(fileobj, skip_unknown=skip_unknown)
|
| 1034 |
+
else:
|
| 1035 |
+
if self._legacy:
|
| 1036 |
+
d = self._from_legacy()
|
| 1037 |
+
else:
|
| 1038 |
+
d = self._data
|
| 1039 |
+
if fileobj:
|
| 1040 |
+
json.dump(d, fileobj, ensure_ascii=True, indent=2,
|
| 1041 |
+
sort_keys=True)
|
| 1042 |
+
else:
|
| 1043 |
+
with codecs.open(path, 'w', 'utf-8') as f:
|
| 1044 |
+
json.dump(d, f, ensure_ascii=True, indent=2,
|
| 1045 |
+
sort_keys=True)
|
| 1046 |
+
|
| 1047 |
+
def add_requirements(self, requirements):
|
| 1048 |
+
if self._legacy:
|
| 1049 |
+
self._legacy.add_requirements(requirements)
|
| 1050 |
+
else:
|
| 1051 |
+
run_requires = self._data.setdefault('run_requires', [])
|
| 1052 |
+
always = None
|
| 1053 |
+
for entry in run_requires:
|
| 1054 |
+
if 'environment' not in entry and 'extra' not in entry:
|
| 1055 |
+
always = entry
|
| 1056 |
+
break
|
| 1057 |
+
if always is None:
|
| 1058 |
+
always = { 'requires': requirements }
|
| 1059 |
+
run_requires.insert(0, always)
|
| 1060 |
+
else:
|
| 1061 |
+
rset = set(always['requires']) | set(requirements)
|
| 1062 |
+
always['requires'] = sorted(rset)
|
| 1063 |
+
|
| 1064 |
+
def __repr__(self):
|
| 1065 |
+
name = self.name or '(no name)'
|
| 1066 |
+
version = self.version or 'no version'
|
| 1067 |
+
return '<%s %s %s (%s)>' % (self.__class__.__name__,
|
| 1068 |
+
self.metadata_version, name, version)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/resources.py
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2013-2017 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
from __future__ import unicode_literals
|
| 8 |
+
|
| 9 |
+
import bisect
|
| 10 |
+
import io
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import pkgutil
|
| 14 |
+
import sys
|
| 15 |
+
import types
|
| 16 |
+
import zipimport
|
| 17 |
+
|
| 18 |
+
from . import DistlibException
|
| 19 |
+
from .util import cached_property, get_cache_base, Cache
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
cache = None # created when needed
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class ResourceCache(Cache):
|
| 28 |
+
def __init__(self, base=None):
|
| 29 |
+
if base is None:
|
| 30 |
+
# Use native string to avoid issues on 2.x: see Python #20140.
|
| 31 |
+
base = os.path.join(get_cache_base(), str('resource-cache'))
|
| 32 |
+
super(ResourceCache, self).__init__(base)
|
| 33 |
+
|
| 34 |
+
def is_stale(self, resource, path):
|
| 35 |
+
"""
|
| 36 |
+
Is the cache stale for the given resource?
|
| 37 |
+
|
| 38 |
+
:param resource: The :class:`Resource` being cached.
|
| 39 |
+
:param path: The path of the resource in the cache.
|
| 40 |
+
:return: True if the cache is stale.
|
| 41 |
+
"""
|
| 42 |
+
# Cache invalidation is a hard problem :-)
|
| 43 |
+
return True
|
| 44 |
+
|
| 45 |
+
def get(self, resource):
|
| 46 |
+
"""
|
| 47 |
+
Get a resource into the cache,
|
| 48 |
+
|
| 49 |
+
:param resource: A :class:`Resource` instance.
|
| 50 |
+
:return: The pathname of the resource in the cache.
|
| 51 |
+
"""
|
| 52 |
+
prefix, path = resource.finder.get_cache_info(resource)
|
| 53 |
+
if prefix is None:
|
| 54 |
+
result = path
|
| 55 |
+
else:
|
| 56 |
+
result = os.path.join(self.base, self.prefix_to_dir(prefix), path)
|
| 57 |
+
dirname = os.path.dirname(result)
|
| 58 |
+
if not os.path.isdir(dirname):
|
| 59 |
+
os.makedirs(dirname)
|
| 60 |
+
if not os.path.exists(result):
|
| 61 |
+
stale = True
|
| 62 |
+
else:
|
| 63 |
+
stale = self.is_stale(resource, path)
|
| 64 |
+
if stale:
|
| 65 |
+
# write the bytes of the resource to the cache location
|
| 66 |
+
with open(result, 'wb') as f:
|
| 67 |
+
f.write(resource.bytes)
|
| 68 |
+
return result
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
class ResourceBase(object):
|
| 72 |
+
def __init__(self, finder, name):
|
| 73 |
+
self.finder = finder
|
| 74 |
+
self.name = name
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class Resource(ResourceBase):
|
| 78 |
+
"""
|
| 79 |
+
A class representing an in-package resource, such as a data file. This is
|
| 80 |
+
not normally instantiated by user code, but rather by a
|
| 81 |
+
:class:`ResourceFinder` which manages the resource.
|
| 82 |
+
"""
|
| 83 |
+
is_container = False # Backwards compatibility
|
| 84 |
+
|
| 85 |
+
def as_stream(self):
|
| 86 |
+
"""
|
| 87 |
+
Get the resource as a stream.
|
| 88 |
+
|
| 89 |
+
This is not a property to make it obvious that it returns a new stream
|
| 90 |
+
each time.
|
| 91 |
+
"""
|
| 92 |
+
return self.finder.get_stream(self)
|
| 93 |
+
|
| 94 |
+
@cached_property
|
| 95 |
+
def file_path(self):
|
| 96 |
+
global cache
|
| 97 |
+
if cache is None:
|
| 98 |
+
cache = ResourceCache()
|
| 99 |
+
return cache.get(self)
|
| 100 |
+
|
| 101 |
+
@cached_property
|
| 102 |
+
def bytes(self):
|
| 103 |
+
return self.finder.get_bytes(self)
|
| 104 |
+
|
| 105 |
+
@cached_property
|
| 106 |
+
def size(self):
|
| 107 |
+
return self.finder.get_size(self)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class ResourceContainer(ResourceBase):
|
| 111 |
+
is_container = True # Backwards compatibility
|
| 112 |
+
|
| 113 |
+
@cached_property
|
| 114 |
+
def resources(self):
|
| 115 |
+
return self.finder.get_resources(self)
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
class ResourceFinder(object):
|
| 119 |
+
"""
|
| 120 |
+
Resource finder for file system resources.
|
| 121 |
+
"""
|
| 122 |
+
|
| 123 |
+
if sys.platform.startswith('java'):
|
| 124 |
+
skipped_extensions = ('.pyc', '.pyo', '.class')
|
| 125 |
+
else:
|
| 126 |
+
skipped_extensions = ('.pyc', '.pyo')
|
| 127 |
+
|
| 128 |
+
def __init__(self, module):
|
| 129 |
+
self.module = module
|
| 130 |
+
self.loader = getattr(module, '__loader__', None)
|
| 131 |
+
self.base = os.path.dirname(getattr(module, '__file__', ''))
|
| 132 |
+
|
| 133 |
+
def _adjust_path(self, path):
|
| 134 |
+
return os.path.realpath(path)
|
| 135 |
+
|
| 136 |
+
def _make_path(self, resource_name):
|
| 137 |
+
# Issue #50: need to preserve type of path on Python 2.x
|
| 138 |
+
# like os.path._get_sep
|
| 139 |
+
if isinstance(resource_name, bytes): # should only happen on 2.x
|
| 140 |
+
sep = b'/'
|
| 141 |
+
else:
|
| 142 |
+
sep = '/'
|
| 143 |
+
parts = resource_name.split(sep)
|
| 144 |
+
parts.insert(0, self.base)
|
| 145 |
+
result = os.path.join(*parts)
|
| 146 |
+
return self._adjust_path(result)
|
| 147 |
+
|
| 148 |
+
def _find(self, path):
|
| 149 |
+
return os.path.exists(path)
|
| 150 |
+
|
| 151 |
+
def get_cache_info(self, resource):
|
| 152 |
+
return None, resource.path
|
| 153 |
+
|
| 154 |
+
def find(self, resource_name):
|
| 155 |
+
path = self._make_path(resource_name)
|
| 156 |
+
if not self._find(path):
|
| 157 |
+
result = None
|
| 158 |
+
else:
|
| 159 |
+
if self._is_directory(path):
|
| 160 |
+
result = ResourceContainer(self, resource_name)
|
| 161 |
+
else:
|
| 162 |
+
result = Resource(self, resource_name)
|
| 163 |
+
result.path = path
|
| 164 |
+
return result
|
| 165 |
+
|
| 166 |
+
def get_stream(self, resource):
|
| 167 |
+
return open(resource.path, 'rb')
|
| 168 |
+
|
| 169 |
+
def get_bytes(self, resource):
|
| 170 |
+
with open(resource.path, 'rb') as f:
|
| 171 |
+
return f.read()
|
| 172 |
+
|
| 173 |
+
def get_size(self, resource):
|
| 174 |
+
return os.path.getsize(resource.path)
|
| 175 |
+
|
| 176 |
+
def get_resources(self, resource):
|
| 177 |
+
def allowed(f):
|
| 178 |
+
return (f != '__pycache__' and not
|
| 179 |
+
f.endswith(self.skipped_extensions))
|
| 180 |
+
return set([f for f in os.listdir(resource.path) if allowed(f)])
|
| 181 |
+
|
| 182 |
+
def is_container(self, resource):
|
| 183 |
+
return self._is_directory(resource.path)
|
| 184 |
+
|
| 185 |
+
_is_directory = staticmethod(os.path.isdir)
|
| 186 |
+
|
| 187 |
+
def iterator(self, resource_name):
|
| 188 |
+
resource = self.find(resource_name)
|
| 189 |
+
if resource is not None:
|
| 190 |
+
todo = [resource]
|
| 191 |
+
while todo:
|
| 192 |
+
resource = todo.pop(0)
|
| 193 |
+
yield resource
|
| 194 |
+
if resource.is_container:
|
| 195 |
+
rname = resource.name
|
| 196 |
+
for name in resource.resources:
|
| 197 |
+
if not rname:
|
| 198 |
+
new_name = name
|
| 199 |
+
else:
|
| 200 |
+
new_name = '/'.join([rname, name])
|
| 201 |
+
child = self.find(new_name)
|
| 202 |
+
if child.is_container:
|
| 203 |
+
todo.append(child)
|
| 204 |
+
else:
|
| 205 |
+
yield child
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
class ZipResourceFinder(ResourceFinder):
|
| 209 |
+
"""
|
| 210 |
+
Resource finder for resources in .zip files.
|
| 211 |
+
"""
|
| 212 |
+
def __init__(self, module):
|
| 213 |
+
super(ZipResourceFinder, self).__init__(module)
|
| 214 |
+
archive = self.loader.archive
|
| 215 |
+
self.prefix_len = 1 + len(archive)
|
| 216 |
+
# PyPy doesn't have a _files attr on zipimporter, and you can't set one
|
| 217 |
+
if hasattr(self.loader, '_files'):
|
| 218 |
+
self._files = self.loader._files
|
| 219 |
+
else:
|
| 220 |
+
self._files = zipimport._zip_directory_cache[archive]
|
| 221 |
+
self.index = sorted(self._files)
|
| 222 |
+
|
| 223 |
+
def _adjust_path(self, path):
|
| 224 |
+
return path
|
| 225 |
+
|
| 226 |
+
def _find(self, path):
|
| 227 |
+
path = path[self.prefix_len:]
|
| 228 |
+
if path in self._files:
|
| 229 |
+
result = True
|
| 230 |
+
else:
|
| 231 |
+
if path and path[-1] != os.sep:
|
| 232 |
+
path = path + os.sep
|
| 233 |
+
i = bisect.bisect(self.index, path)
|
| 234 |
+
try:
|
| 235 |
+
result = self.index[i].startswith(path)
|
| 236 |
+
except IndexError:
|
| 237 |
+
result = False
|
| 238 |
+
if not result:
|
| 239 |
+
logger.debug('_find failed: %r %r', path, self.loader.prefix)
|
| 240 |
+
else:
|
| 241 |
+
logger.debug('_find worked: %r %r', path, self.loader.prefix)
|
| 242 |
+
return result
|
| 243 |
+
|
| 244 |
+
def get_cache_info(self, resource):
|
| 245 |
+
prefix = self.loader.archive
|
| 246 |
+
path = resource.path[1 + len(prefix):]
|
| 247 |
+
return prefix, path
|
| 248 |
+
|
| 249 |
+
def get_bytes(self, resource):
|
| 250 |
+
return self.loader.get_data(resource.path)
|
| 251 |
+
|
| 252 |
+
def get_stream(self, resource):
|
| 253 |
+
return io.BytesIO(self.get_bytes(resource))
|
| 254 |
+
|
| 255 |
+
def get_size(self, resource):
|
| 256 |
+
path = resource.path[self.prefix_len:]
|
| 257 |
+
return self._files[path][3]
|
| 258 |
+
|
| 259 |
+
def get_resources(self, resource):
|
| 260 |
+
path = resource.path[self.prefix_len:]
|
| 261 |
+
if path and path[-1] != os.sep:
|
| 262 |
+
path += os.sep
|
| 263 |
+
plen = len(path)
|
| 264 |
+
result = set()
|
| 265 |
+
i = bisect.bisect(self.index, path)
|
| 266 |
+
while i < len(self.index):
|
| 267 |
+
if not self.index[i].startswith(path):
|
| 268 |
+
break
|
| 269 |
+
s = self.index[i][plen:]
|
| 270 |
+
result.add(s.split(os.sep, 1)[0]) # only immediate children
|
| 271 |
+
i += 1
|
| 272 |
+
return result
|
| 273 |
+
|
| 274 |
+
def _is_directory(self, path):
|
| 275 |
+
path = path[self.prefix_len:]
|
| 276 |
+
if path and path[-1] != os.sep:
|
| 277 |
+
path += os.sep
|
| 278 |
+
i = bisect.bisect(self.index, path)
|
| 279 |
+
try:
|
| 280 |
+
result = self.index[i].startswith(path)
|
| 281 |
+
except IndexError:
|
| 282 |
+
result = False
|
| 283 |
+
return result
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
_finder_registry = {
|
| 287 |
+
type(None): ResourceFinder,
|
| 288 |
+
zipimport.zipimporter: ZipResourceFinder
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
try:
|
| 292 |
+
# In Python 3.6, _frozen_importlib -> _frozen_importlib_external
|
| 293 |
+
try:
|
| 294 |
+
import _frozen_importlib_external as _fi
|
| 295 |
+
except ImportError:
|
| 296 |
+
import _frozen_importlib as _fi
|
| 297 |
+
_finder_registry[_fi.SourceFileLoader] = ResourceFinder
|
| 298 |
+
_finder_registry[_fi.FileFinder] = ResourceFinder
|
| 299 |
+
# See issue #146
|
| 300 |
+
_finder_registry[_fi.SourcelessFileLoader] = ResourceFinder
|
| 301 |
+
del _fi
|
| 302 |
+
except (ImportError, AttributeError):
|
| 303 |
+
pass
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def register_finder(loader, finder_maker):
|
| 307 |
+
_finder_registry[type(loader)] = finder_maker
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
_finder_cache = {}
|
| 311 |
+
|
| 312 |
+
|
| 313 |
+
def finder(package):
|
| 314 |
+
"""
|
| 315 |
+
Return a resource finder for a package.
|
| 316 |
+
:param package: The name of the package.
|
| 317 |
+
:return: A :class:`ResourceFinder` instance for the package.
|
| 318 |
+
"""
|
| 319 |
+
if package in _finder_cache:
|
| 320 |
+
result = _finder_cache[package]
|
| 321 |
+
else:
|
| 322 |
+
if package not in sys.modules:
|
| 323 |
+
__import__(package)
|
| 324 |
+
module = sys.modules[package]
|
| 325 |
+
path = getattr(module, '__path__', None)
|
| 326 |
+
if path is None:
|
| 327 |
+
raise DistlibException('You cannot get a finder for a module, '
|
| 328 |
+
'only for a package')
|
| 329 |
+
loader = getattr(module, '__loader__', None)
|
| 330 |
+
finder_maker = _finder_registry.get(type(loader))
|
| 331 |
+
if finder_maker is None:
|
| 332 |
+
raise DistlibException('Unable to locate finder for %r' % package)
|
| 333 |
+
result = finder_maker(module)
|
| 334 |
+
_finder_cache[package] = result
|
| 335 |
+
return result
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
_dummy_module = types.ModuleType(str('__dummy__'))
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
def finder_for_path(path):
|
| 342 |
+
"""
|
| 343 |
+
Return a resource finder for a path, which should represent a container.
|
| 344 |
+
|
| 345 |
+
:param path: The path.
|
| 346 |
+
:return: A :class:`ResourceFinder` instance for the path.
|
| 347 |
+
"""
|
| 348 |
+
result = None
|
| 349 |
+
# calls any path hooks, gets importer into cache
|
| 350 |
+
pkgutil.get_importer(path)
|
| 351 |
+
loader = sys.path_importer_cache.get(path)
|
| 352 |
+
finder = _finder_registry.get(type(loader))
|
| 353 |
+
if finder:
|
| 354 |
+
module = _dummy_module
|
| 355 |
+
module.__file__ = os.path.join(path, '')
|
| 356 |
+
module.__loader__ = loader
|
| 357 |
+
result = finder(module)
|
| 358 |
+
return result
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/scripts.py
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2013-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
from io import BytesIO
|
| 8 |
+
import logging
|
| 9 |
+
import os
|
| 10 |
+
import re
|
| 11 |
+
import struct
|
| 12 |
+
import sys
|
| 13 |
+
import time
|
| 14 |
+
from zipfile import ZipInfo
|
| 15 |
+
|
| 16 |
+
from .compat import sysconfig, detect_encoding, ZipFile
|
| 17 |
+
from .resources import finder
|
| 18 |
+
from .util import (FileOperator, get_export_entry, convert_path,
|
| 19 |
+
get_executable, get_platform, in_venv)
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger(__name__)
|
| 22 |
+
|
| 23 |
+
_DEFAULT_MANIFEST = '''
|
| 24 |
+
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
| 25 |
+
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
|
| 26 |
+
<assemblyIdentity version="1.0.0.0"
|
| 27 |
+
processorArchitecture="X86"
|
| 28 |
+
name="%s"
|
| 29 |
+
type="win32"/>
|
| 30 |
+
|
| 31 |
+
<!-- Identify the application security requirements. -->
|
| 32 |
+
<trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
|
| 33 |
+
<security>
|
| 34 |
+
<requestedPrivileges>
|
| 35 |
+
<requestedExecutionLevel level="asInvoker" uiAccess="false"/>
|
| 36 |
+
</requestedPrivileges>
|
| 37 |
+
</security>
|
| 38 |
+
</trustInfo>
|
| 39 |
+
</assembly>'''.strip()
|
| 40 |
+
|
| 41 |
+
# check if Python is called on the first line with this expression
|
| 42 |
+
FIRST_LINE_RE = re.compile(b'^#!.*pythonw?[0-9.]*([ \t].*)?$')
|
| 43 |
+
SCRIPT_TEMPLATE = r'''# -*- coding: utf-8 -*-
|
| 44 |
+
import re
|
| 45 |
+
import sys
|
| 46 |
+
from %(module)s import %(import_name)s
|
| 47 |
+
if __name__ == '__main__':
|
| 48 |
+
sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
|
| 49 |
+
sys.exit(%(func)s())
|
| 50 |
+
'''
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def enquote_executable(executable):
|
| 54 |
+
if ' ' in executable:
|
| 55 |
+
# make sure we quote only the executable in case of env
|
| 56 |
+
# for example /usr/bin/env "/dir with spaces/bin/jython"
|
| 57 |
+
# instead of "/usr/bin/env /dir with spaces/bin/jython"
|
| 58 |
+
# otherwise whole
|
| 59 |
+
if executable.startswith('/usr/bin/env '):
|
| 60 |
+
env, _executable = executable.split(' ', 1)
|
| 61 |
+
if ' ' in _executable and not _executable.startswith('"'):
|
| 62 |
+
executable = '%s "%s"' % (env, _executable)
|
| 63 |
+
else:
|
| 64 |
+
if not executable.startswith('"'):
|
| 65 |
+
executable = '"%s"' % executable
|
| 66 |
+
return executable
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
# Keep the old name around (for now), as there is at least one project using it!
|
| 70 |
+
_enquote_executable = enquote_executable
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class ScriptMaker(object):
|
| 74 |
+
"""
|
| 75 |
+
A class to copy or create scripts from source scripts or callable
|
| 76 |
+
specifications.
|
| 77 |
+
"""
|
| 78 |
+
script_template = SCRIPT_TEMPLATE
|
| 79 |
+
|
| 80 |
+
executable = None # for shebangs
|
| 81 |
+
|
| 82 |
+
def __init__(self,
|
| 83 |
+
source_dir,
|
| 84 |
+
target_dir,
|
| 85 |
+
add_launchers=True,
|
| 86 |
+
dry_run=False,
|
| 87 |
+
fileop=None):
|
| 88 |
+
self.source_dir = source_dir
|
| 89 |
+
self.target_dir = target_dir
|
| 90 |
+
self.add_launchers = add_launchers
|
| 91 |
+
self.force = False
|
| 92 |
+
self.clobber = False
|
| 93 |
+
# It only makes sense to set mode bits on POSIX.
|
| 94 |
+
self.set_mode = (os.name == 'posix') or (os.name == 'java'
|
| 95 |
+
and os._name == 'posix')
|
| 96 |
+
self.variants = set(('', 'X.Y'))
|
| 97 |
+
self._fileop = fileop or FileOperator(dry_run)
|
| 98 |
+
|
| 99 |
+
self._is_nt = os.name == 'nt' or (os.name == 'java'
|
| 100 |
+
and os._name == 'nt')
|
| 101 |
+
self.version_info = sys.version_info
|
| 102 |
+
|
| 103 |
+
def _get_alternate_executable(self, executable, options):
|
| 104 |
+
if options.get('gui', False) and self._is_nt: # pragma: no cover
|
| 105 |
+
dn, fn = os.path.split(executable)
|
| 106 |
+
fn = fn.replace('python', 'pythonw')
|
| 107 |
+
executable = os.path.join(dn, fn)
|
| 108 |
+
return executable
|
| 109 |
+
|
| 110 |
+
if sys.platform.startswith('java'): # pragma: no cover
|
| 111 |
+
|
| 112 |
+
def _is_shell(self, executable):
|
| 113 |
+
"""
|
| 114 |
+
Determine if the specified executable is a script
|
| 115 |
+
(contains a #! line)
|
| 116 |
+
"""
|
| 117 |
+
try:
|
| 118 |
+
with open(executable) as fp:
|
| 119 |
+
return fp.read(2) == '#!'
|
| 120 |
+
except (OSError, IOError):
|
| 121 |
+
logger.warning('Failed to open %s', executable)
|
| 122 |
+
return False
|
| 123 |
+
|
| 124 |
+
def _fix_jython_executable(self, executable):
|
| 125 |
+
if self._is_shell(executable):
|
| 126 |
+
# Workaround for Jython is not needed on Linux systems.
|
| 127 |
+
import java
|
| 128 |
+
|
| 129 |
+
if java.lang.System.getProperty('os.name') == 'Linux':
|
| 130 |
+
return executable
|
| 131 |
+
elif executable.lower().endswith('jython.exe'):
|
| 132 |
+
# Use wrapper exe for Jython on Windows
|
| 133 |
+
return executable
|
| 134 |
+
return '/usr/bin/env %s' % executable
|
| 135 |
+
|
| 136 |
+
def _build_shebang(self, executable, post_interp):
|
| 137 |
+
"""
|
| 138 |
+
Build a shebang line. In the simple case (on Windows, or a shebang line
|
| 139 |
+
which is not too long or contains spaces) use a simple formulation for
|
| 140 |
+
the shebang. Otherwise, use /bin/sh as the executable, with a contrived
|
| 141 |
+
shebang which allows the script to run either under Python or sh, using
|
| 142 |
+
suitable quoting. Thanks to Harald Nordgren for his input.
|
| 143 |
+
|
| 144 |
+
See also: http://www.in-ulm.de/~mascheck/various/shebang/#length
|
| 145 |
+
https://hg.mozilla.org/mozilla-central/file/tip/mach
|
| 146 |
+
"""
|
| 147 |
+
if os.name != 'posix':
|
| 148 |
+
simple_shebang = True
|
| 149 |
+
else:
|
| 150 |
+
# Add 3 for '#!' prefix and newline suffix.
|
| 151 |
+
shebang_length = len(executable) + len(post_interp) + 3
|
| 152 |
+
if sys.platform == 'darwin':
|
| 153 |
+
max_shebang_length = 512
|
| 154 |
+
else:
|
| 155 |
+
max_shebang_length = 127
|
| 156 |
+
simple_shebang = ((b' ' not in executable)
|
| 157 |
+
and (shebang_length <= max_shebang_length))
|
| 158 |
+
|
| 159 |
+
if simple_shebang:
|
| 160 |
+
result = b'#!' + executable + post_interp + b'\n'
|
| 161 |
+
else:
|
| 162 |
+
result = b'#!/bin/sh\n'
|
| 163 |
+
result += b"'''exec' " + executable + post_interp + b' "$0" "$@"\n'
|
| 164 |
+
result += b"' '''"
|
| 165 |
+
return result
|
| 166 |
+
|
| 167 |
+
def _get_shebang(self, encoding, post_interp=b'', options=None):
|
| 168 |
+
enquote = True
|
| 169 |
+
if self.executable:
|
| 170 |
+
executable = self.executable
|
| 171 |
+
enquote = False # assume this will be taken care of
|
| 172 |
+
elif not sysconfig.is_python_build():
|
| 173 |
+
executable = get_executable()
|
| 174 |
+
elif in_venv(): # pragma: no cover
|
| 175 |
+
executable = os.path.join(
|
| 176 |
+
sysconfig.get_path('scripts'),
|
| 177 |
+
'python%s' % sysconfig.get_config_var('EXE'))
|
| 178 |
+
else: # pragma: no cover
|
| 179 |
+
if os.name == 'nt':
|
| 180 |
+
# for Python builds from source on Windows, no Python executables with
|
| 181 |
+
# a version suffix are created, so we use python.exe
|
| 182 |
+
executable = os.path.join(
|
| 183 |
+
sysconfig.get_config_var('BINDIR'),
|
| 184 |
+
'python%s' % (sysconfig.get_config_var('EXE')))
|
| 185 |
+
else:
|
| 186 |
+
executable = os.path.join(
|
| 187 |
+
sysconfig.get_config_var('BINDIR'),
|
| 188 |
+
'python%s%s' % (sysconfig.get_config_var('VERSION'),
|
| 189 |
+
sysconfig.get_config_var('EXE')))
|
| 190 |
+
if options:
|
| 191 |
+
executable = self._get_alternate_executable(executable, options)
|
| 192 |
+
|
| 193 |
+
if sys.platform.startswith('java'): # pragma: no cover
|
| 194 |
+
executable = self._fix_jython_executable(executable)
|
| 195 |
+
|
| 196 |
+
# Normalise case for Windows - COMMENTED OUT
|
| 197 |
+
# executable = os.path.normcase(executable)
|
| 198 |
+
# N.B. The normalising operation above has been commented out: See
|
| 199 |
+
# issue #124. Although paths in Windows are generally case-insensitive,
|
| 200 |
+
# they aren't always. For example, a path containing a ẞ (which is a
|
| 201 |
+
# LATIN CAPITAL LETTER SHARP S - U+1E9E) is normcased to ß (which is a
|
| 202 |
+
# LATIN SMALL LETTER SHARP S' - U+00DF). The two are not considered by
|
| 203 |
+
# Windows as equivalent in path names.
|
| 204 |
+
|
| 205 |
+
# If the user didn't specify an executable, it may be necessary to
|
| 206 |
+
# cater for executable paths with spaces (not uncommon on Windows)
|
| 207 |
+
if enquote:
|
| 208 |
+
executable = enquote_executable(executable)
|
| 209 |
+
# Issue #51: don't use fsencode, since we later try to
|
| 210 |
+
# check that the shebang is decodable using utf-8.
|
| 211 |
+
executable = executable.encode('utf-8')
|
| 212 |
+
# in case of IronPython, play safe and enable frames support
|
| 213 |
+
if (sys.platform == 'cli' and '-X:Frames' not in post_interp
|
| 214 |
+
and '-X:FullFrames' not in post_interp): # pragma: no cover
|
| 215 |
+
post_interp += b' -X:Frames'
|
| 216 |
+
shebang = self._build_shebang(executable, post_interp)
|
| 217 |
+
# Python parser starts to read a script using UTF-8 until
|
| 218 |
+
# it gets a #coding:xxx cookie. The shebang has to be the
|
| 219 |
+
# first line of a file, the #coding:xxx cookie cannot be
|
| 220 |
+
# written before. So the shebang has to be decodable from
|
| 221 |
+
# UTF-8.
|
| 222 |
+
try:
|
| 223 |
+
shebang.decode('utf-8')
|
| 224 |
+
except UnicodeDecodeError: # pragma: no cover
|
| 225 |
+
raise ValueError('The shebang (%r) is not decodable from utf-8' %
|
| 226 |
+
shebang)
|
| 227 |
+
# If the script is encoded to a custom encoding (use a
|
| 228 |
+
# #coding:xxx cookie), the shebang has to be decodable from
|
| 229 |
+
# the script encoding too.
|
| 230 |
+
if encoding != 'utf-8':
|
| 231 |
+
try:
|
| 232 |
+
shebang.decode(encoding)
|
| 233 |
+
except UnicodeDecodeError: # pragma: no cover
|
| 234 |
+
raise ValueError('The shebang (%r) is not decodable '
|
| 235 |
+
'from the script encoding (%r)' %
|
| 236 |
+
(shebang, encoding))
|
| 237 |
+
return shebang
|
| 238 |
+
|
| 239 |
+
def _get_script_text(self, entry):
|
| 240 |
+
return self.script_template % dict(
|
| 241 |
+
module=entry.prefix,
|
| 242 |
+
import_name=entry.suffix.split('.')[0],
|
| 243 |
+
func=entry.suffix)
|
| 244 |
+
|
| 245 |
+
manifest = _DEFAULT_MANIFEST
|
| 246 |
+
|
| 247 |
+
def get_manifest(self, exename):
|
| 248 |
+
base = os.path.basename(exename)
|
| 249 |
+
return self.manifest % base
|
| 250 |
+
|
| 251 |
+
def _write_script(self, names, shebang, script_bytes, filenames, ext):
|
| 252 |
+
use_launcher = self.add_launchers and self._is_nt
|
| 253 |
+
linesep = os.linesep.encode('utf-8')
|
| 254 |
+
if not shebang.endswith(linesep):
|
| 255 |
+
shebang += linesep
|
| 256 |
+
if not use_launcher:
|
| 257 |
+
script_bytes = shebang + script_bytes
|
| 258 |
+
else: # pragma: no cover
|
| 259 |
+
if ext == 'py':
|
| 260 |
+
launcher = self._get_launcher('t')
|
| 261 |
+
else:
|
| 262 |
+
launcher = self._get_launcher('w')
|
| 263 |
+
stream = BytesIO()
|
| 264 |
+
with ZipFile(stream, 'w') as zf:
|
| 265 |
+
source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH')
|
| 266 |
+
if source_date_epoch:
|
| 267 |
+
date_time = time.gmtime(int(source_date_epoch))[:6]
|
| 268 |
+
zinfo = ZipInfo(filename='__main__.py',
|
| 269 |
+
date_time=date_time)
|
| 270 |
+
zf.writestr(zinfo, script_bytes)
|
| 271 |
+
else:
|
| 272 |
+
zf.writestr('__main__.py', script_bytes)
|
| 273 |
+
zip_data = stream.getvalue()
|
| 274 |
+
script_bytes = launcher + shebang + zip_data
|
| 275 |
+
for name in names:
|
| 276 |
+
outname = os.path.join(self.target_dir, name)
|
| 277 |
+
if use_launcher: # pragma: no cover
|
| 278 |
+
n, e = os.path.splitext(outname)
|
| 279 |
+
if e.startswith('.py'):
|
| 280 |
+
outname = n
|
| 281 |
+
outname = '%s.exe' % outname
|
| 282 |
+
try:
|
| 283 |
+
self._fileop.write_binary_file(outname, script_bytes)
|
| 284 |
+
except Exception:
|
| 285 |
+
# Failed writing an executable - it might be in use.
|
| 286 |
+
logger.warning('Failed to write executable - trying to '
|
| 287 |
+
'use .deleteme logic')
|
| 288 |
+
dfname = '%s.deleteme' % outname
|
| 289 |
+
if os.path.exists(dfname):
|
| 290 |
+
os.remove(dfname) # Not allowed to fail here
|
| 291 |
+
os.rename(outname, dfname) # nor here
|
| 292 |
+
self._fileop.write_binary_file(outname, script_bytes)
|
| 293 |
+
logger.debug('Able to replace executable using '
|
| 294 |
+
'.deleteme logic')
|
| 295 |
+
try:
|
| 296 |
+
os.remove(dfname)
|
| 297 |
+
except Exception:
|
| 298 |
+
pass # still in use - ignore error
|
| 299 |
+
else:
|
| 300 |
+
if self._is_nt and not outname.endswith(
|
| 301 |
+
'.' + ext): # pragma: no cover
|
| 302 |
+
outname = '%s.%s' % (outname, ext)
|
| 303 |
+
if os.path.exists(outname) and not self.clobber:
|
| 304 |
+
logger.warning('Skipping existing file %s', outname)
|
| 305 |
+
continue
|
| 306 |
+
self._fileop.write_binary_file(outname, script_bytes)
|
| 307 |
+
if self.set_mode:
|
| 308 |
+
self._fileop.set_executable_mode([outname])
|
| 309 |
+
filenames.append(outname)
|
| 310 |
+
|
| 311 |
+
variant_separator = '-'
|
| 312 |
+
|
| 313 |
+
def get_script_filenames(self, name):
|
| 314 |
+
result = set()
|
| 315 |
+
if '' in self.variants:
|
| 316 |
+
result.add(name)
|
| 317 |
+
if 'X' in self.variants:
|
| 318 |
+
result.add('%s%s' % (name, self.version_info[0]))
|
| 319 |
+
if 'X.Y' in self.variants:
|
| 320 |
+
result.add('%s%s%s.%s' %
|
| 321 |
+
(name, self.variant_separator, self.version_info[0],
|
| 322 |
+
self.version_info[1]))
|
| 323 |
+
return result
|
| 324 |
+
|
| 325 |
+
def _make_script(self, entry, filenames, options=None):
|
| 326 |
+
post_interp = b''
|
| 327 |
+
if options:
|
| 328 |
+
args = options.get('interpreter_args', [])
|
| 329 |
+
if args:
|
| 330 |
+
args = ' %s' % ' '.join(args)
|
| 331 |
+
post_interp = args.encode('utf-8')
|
| 332 |
+
shebang = self._get_shebang('utf-8', post_interp, options=options)
|
| 333 |
+
script = self._get_script_text(entry).encode('utf-8')
|
| 334 |
+
scriptnames = self.get_script_filenames(entry.name)
|
| 335 |
+
if options and options.get('gui', False):
|
| 336 |
+
ext = 'pyw'
|
| 337 |
+
else:
|
| 338 |
+
ext = 'py'
|
| 339 |
+
self._write_script(scriptnames, shebang, script, filenames, ext)
|
| 340 |
+
|
| 341 |
+
def _copy_script(self, script, filenames):
|
| 342 |
+
adjust = False
|
| 343 |
+
script = os.path.join(self.source_dir, convert_path(script))
|
| 344 |
+
outname = os.path.join(self.target_dir, os.path.basename(script))
|
| 345 |
+
if not self.force and not self._fileop.newer(script, outname):
|
| 346 |
+
logger.debug('not copying %s (up-to-date)', script)
|
| 347 |
+
return
|
| 348 |
+
|
| 349 |
+
# Always open the file, but ignore failures in dry-run mode --
|
| 350 |
+
# that way, we'll get accurate feedback if we can read the
|
| 351 |
+
# script.
|
| 352 |
+
try:
|
| 353 |
+
f = open(script, 'rb')
|
| 354 |
+
except IOError: # pragma: no cover
|
| 355 |
+
if not self.dry_run:
|
| 356 |
+
raise
|
| 357 |
+
f = None
|
| 358 |
+
else:
|
| 359 |
+
first_line = f.readline()
|
| 360 |
+
if not first_line: # pragma: no cover
|
| 361 |
+
logger.warning('%s is an empty file (skipping)', script)
|
| 362 |
+
return
|
| 363 |
+
|
| 364 |
+
match = FIRST_LINE_RE.match(first_line.replace(b'\r\n', b'\n'))
|
| 365 |
+
if match:
|
| 366 |
+
adjust = True
|
| 367 |
+
post_interp = match.group(1) or b''
|
| 368 |
+
|
| 369 |
+
if not adjust:
|
| 370 |
+
if f:
|
| 371 |
+
f.close()
|
| 372 |
+
self._fileop.copy_file(script, outname)
|
| 373 |
+
if self.set_mode:
|
| 374 |
+
self._fileop.set_executable_mode([outname])
|
| 375 |
+
filenames.append(outname)
|
| 376 |
+
else:
|
| 377 |
+
logger.info('copying and adjusting %s -> %s', script,
|
| 378 |
+
self.target_dir)
|
| 379 |
+
if not self._fileop.dry_run:
|
| 380 |
+
encoding, lines = detect_encoding(f.readline)
|
| 381 |
+
f.seek(0)
|
| 382 |
+
shebang = self._get_shebang(encoding, post_interp)
|
| 383 |
+
if b'pythonw' in first_line: # pragma: no cover
|
| 384 |
+
ext = 'pyw'
|
| 385 |
+
else:
|
| 386 |
+
ext = 'py'
|
| 387 |
+
n = os.path.basename(outname)
|
| 388 |
+
self._write_script([n], shebang, f.read(), filenames, ext)
|
| 389 |
+
if f:
|
| 390 |
+
f.close()
|
| 391 |
+
|
| 392 |
+
@property
|
| 393 |
+
def dry_run(self):
|
| 394 |
+
return self._fileop.dry_run
|
| 395 |
+
|
| 396 |
+
@dry_run.setter
|
| 397 |
+
def dry_run(self, value):
|
| 398 |
+
self._fileop.dry_run = value
|
| 399 |
+
|
| 400 |
+
if os.name == 'nt' or (os.name == 'java'
|
| 401 |
+
and os._name == 'nt'): # pragma: no cover
|
| 402 |
+
# Executable launcher support.
|
| 403 |
+
# Launchers are from https://bitbucket.org/vinay.sajip/simple_launcher/
|
| 404 |
+
|
| 405 |
+
def _get_launcher(self, kind):
|
| 406 |
+
if struct.calcsize('P') == 8: # 64-bit
|
| 407 |
+
bits = '64'
|
| 408 |
+
else:
|
| 409 |
+
bits = '32'
|
| 410 |
+
platform_suffix = '-arm' if get_platform() == 'win-arm64' else ''
|
| 411 |
+
name = '%s%s%s.exe' % (kind, bits, platform_suffix)
|
| 412 |
+
# Issue 31: don't hardcode an absolute package name, but
|
| 413 |
+
# determine it relative to the current package
|
| 414 |
+
distlib_package = __name__.rsplit('.', 1)[0]
|
| 415 |
+
resource = finder(distlib_package).find(name)
|
| 416 |
+
if not resource:
|
| 417 |
+
msg = ('Unable to find resource %s in package %s' %
|
| 418 |
+
(name, distlib_package))
|
| 419 |
+
raise ValueError(msg)
|
| 420 |
+
return resource.bytes
|
| 421 |
+
|
| 422 |
+
# Public API follows
|
| 423 |
+
|
| 424 |
+
def make(self, specification, options=None):
|
| 425 |
+
"""
|
| 426 |
+
Make a script.
|
| 427 |
+
|
| 428 |
+
:param specification: The specification, which is either a valid export
|
| 429 |
+
entry specification (to make a script from a
|
| 430 |
+
callable) or a filename (to make a script by
|
| 431 |
+
copying from a source location).
|
| 432 |
+
:param options: A dictionary of options controlling script generation.
|
| 433 |
+
:return: A list of all absolute pathnames written to.
|
| 434 |
+
"""
|
| 435 |
+
filenames = []
|
| 436 |
+
entry = get_export_entry(specification)
|
| 437 |
+
if entry is None:
|
| 438 |
+
self._copy_script(specification, filenames)
|
| 439 |
+
else:
|
| 440 |
+
self._make_script(entry, filenames, options=options)
|
| 441 |
+
return filenames
|
| 442 |
+
|
| 443 |
+
def make_multiple(self, specifications, options=None):
|
| 444 |
+
"""
|
| 445 |
+
Take a list of specifications and make scripts from them,
|
| 446 |
+
:param specifications: A list of specifications.
|
| 447 |
+
:return: A list of all absolute pathnames written to,
|
| 448 |
+
"""
|
| 449 |
+
filenames = []
|
| 450 |
+
for specification in specifications:
|
| 451 |
+
filenames.extend(self.make(specification, options))
|
| 452 |
+
return filenames
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/util.py
ADDED
|
@@ -0,0 +1,2025 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#
|
| 2 |
+
# Copyright (C) 2012-2023 The Python Software Foundation.
|
| 3 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 4 |
+
#
|
| 5 |
+
import codecs
|
| 6 |
+
from collections import deque
|
| 7 |
+
import contextlib
|
| 8 |
+
import csv
|
| 9 |
+
from glob import iglob as std_iglob
|
| 10 |
+
import io
|
| 11 |
+
import json
|
| 12 |
+
import logging
|
| 13 |
+
import os
|
| 14 |
+
import py_compile
|
| 15 |
+
import re
|
| 16 |
+
import socket
|
| 17 |
+
try:
|
| 18 |
+
import ssl
|
| 19 |
+
except ImportError: # pragma: no cover
|
| 20 |
+
ssl = None
|
| 21 |
+
import subprocess
|
| 22 |
+
import sys
|
| 23 |
+
import tarfile
|
| 24 |
+
import tempfile
|
| 25 |
+
import textwrap
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
import threading
|
| 29 |
+
except ImportError: # pragma: no cover
|
| 30 |
+
import dummy_threading as threading
|
| 31 |
+
import time
|
| 32 |
+
|
| 33 |
+
from . import DistlibException
|
| 34 |
+
from .compat import (string_types, text_type, shutil, raw_input, StringIO,
|
| 35 |
+
cache_from_source, urlopen, urljoin, httplib, xmlrpclib,
|
| 36 |
+
HTTPHandler, BaseConfigurator, valid_ident,
|
| 37 |
+
Container, configparser, URLError, ZipFile, fsdecode,
|
| 38 |
+
unquote, urlparse)
|
| 39 |
+
|
| 40 |
+
logger = logging.getLogger(__name__)
|
| 41 |
+
|
| 42 |
+
#
|
| 43 |
+
# Requirement parsing code as per PEP 508
|
| 44 |
+
#
|
| 45 |
+
|
| 46 |
+
IDENTIFIER = re.compile(r'^([\w\.-]+)\s*')
|
| 47 |
+
VERSION_IDENTIFIER = re.compile(r'^([\w\.*+-]+)\s*')
|
| 48 |
+
COMPARE_OP = re.compile(r'^(<=?|>=?|={2,3}|[~!]=)\s*')
|
| 49 |
+
MARKER_OP = re.compile(r'^((<=?)|(>=?)|={2,3}|[~!]=|in|not\s+in)\s*')
|
| 50 |
+
OR = re.compile(r'^or\b\s*')
|
| 51 |
+
AND = re.compile(r'^and\b\s*')
|
| 52 |
+
NON_SPACE = re.compile(r'(\S+)\s*')
|
| 53 |
+
STRING_CHUNK = re.compile(r'([\s\w\.{}()*+#:;,/?!~`@$%^&=|<>\[\]-]+)')
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def parse_marker(marker_string):
|
| 57 |
+
"""
|
| 58 |
+
Parse a marker string and return a dictionary containing a marker expression.
|
| 59 |
+
|
| 60 |
+
The dictionary will contain keys "op", "lhs" and "rhs" for non-terminals in
|
| 61 |
+
the expression grammar, or strings. A string contained in quotes is to be
|
| 62 |
+
interpreted as a literal string, and a string not contained in quotes is a
|
| 63 |
+
variable (such as os_name).
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def marker_var(remaining):
|
| 67 |
+
# either identifier, or literal string
|
| 68 |
+
m = IDENTIFIER.match(remaining)
|
| 69 |
+
if m:
|
| 70 |
+
result = m.groups()[0]
|
| 71 |
+
remaining = remaining[m.end():]
|
| 72 |
+
elif not remaining:
|
| 73 |
+
raise SyntaxError('unexpected end of input')
|
| 74 |
+
else:
|
| 75 |
+
q = remaining[0]
|
| 76 |
+
if q not in '\'"':
|
| 77 |
+
raise SyntaxError('invalid expression: %s' % remaining)
|
| 78 |
+
oq = '\'"'.replace(q, '')
|
| 79 |
+
remaining = remaining[1:]
|
| 80 |
+
parts = [q]
|
| 81 |
+
while remaining:
|
| 82 |
+
# either a string chunk, or oq, or q to terminate
|
| 83 |
+
if remaining[0] == q:
|
| 84 |
+
break
|
| 85 |
+
elif remaining[0] == oq:
|
| 86 |
+
parts.append(oq)
|
| 87 |
+
remaining = remaining[1:]
|
| 88 |
+
else:
|
| 89 |
+
m = STRING_CHUNK.match(remaining)
|
| 90 |
+
if not m:
|
| 91 |
+
raise SyntaxError('error in string literal: %s' %
|
| 92 |
+
remaining)
|
| 93 |
+
parts.append(m.groups()[0])
|
| 94 |
+
remaining = remaining[m.end():]
|
| 95 |
+
else:
|
| 96 |
+
s = ''.join(parts)
|
| 97 |
+
raise SyntaxError('unterminated string: %s' % s)
|
| 98 |
+
parts.append(q)
|
| 99 |
+
result = ''.join(parts)
|
| 100 |
+
remaining = remaining[1:].lstrip() # skip past closing quote
|
| 101 |
+
return result, remaining
|
| 102 |
+
|
| 103 |
+
def marker_expr(remaining):
|
| 104 |
+
if remaining and remaining[0] == '(':
|
| 105 |
+
result, remaining = marker(remaining[1:].lstrip())
|
| 106 |
+
if remaining[0] != ')':
|
| 107 |
+
raise SyntaxError('unterminated parenthesis: %s' % remaining)
|
| 108 |
+
remaining = remaining[1:].lstrip()
|
| 109 |
+
else:
|
| 110 |
+
lhs, remaining = marker_var(remaining)
|
| 111 |
+
while remaining:
|
| 112 |
+
m = MARKER_OP.match(remaining)
|
| 113 |
+
if not m:
|
| 114 |
+
break
|
| 115 |
+
op = m.groups()[0]
|
| 116 |
+
remaining = remaining[m.end():]
|
| 117 |
+
rhs, remaining = marker_var(remaining)
|
| 118 |
+
lhs = {'op': op, 'lhs': lhs, 'rhs': rhs}
|
| 119 |
+
result = lhs
|
| 120 |
+
return result, remaining
|
| 121 |
+
|
| 122 |
+
def marker_and(remaining):
|
| 123 |
+
lhs, remaining = marker_expr(remaining)
|
| 124 |
+
while remaining:
|
| 125 |
+
m = AND.match(remaining)
|
| 126 |
+
if not m:
|
| 127 |
+
break
|
| 128 |
+
remaining = remaining[m.end():]
|
| 129 |
+
rhs, remaining = marker_expr(remaining)
|
| 130 |
+
lhs = {'op': 'and', 'lhs': lhs, 'rhs': rhs}
|
| 131 |
+
return lhs, remaining
|
| 132 |
+
|
| 133 |
+
def marker(remaining):
|
| 134 |
+
lhs, remaining = marker_and(remaining)
|
| 135 |
+
while remaining:
|
| 136 |
+
m = OR.match(remaining)
|
| 137 |
+
if not m:
|
| 138 |
+
break
|
| 139 |
+
remaining = remaining[m.end():]
|
| 140 |
+
rhs, remaining = marker_and(remaining)
|
| 141 |
+
lhs = {'op': 'or', 'lhs': lhs, 'rhs': rhs}
|
| 142 |
+
return lhs, remaining
|
| 143 |
+
|
| 144 |
+
return marker(marker_string)
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def parse_requirement(req):
|
| 148 |
+
"""
|
| 149 |
+
Parse a requirement passed in as a string. Return a Container
|
| 150 |
+
whose attributes contain the various parts of the requirement.
|
| 151 |
+
"""
|
| 152 |
+
remaining = req.strip()
|
| 153 |
+
if not remaining or remaining.startswith('#'):
|
| 154 |
+
return None
|
| 155 |
+
m = IDENTIFIER.match(remaining)
|
| 156 |
+
if not m:
|
| 157 |
+
raise SyntaxError('name expected: %s' % remaining)
|
| 158 |
+
distname = m.groups()[0]
|
| 159 |
+
remaining = remaining[m.end():]
|
| 160 |
+
extras = mark_expr = versions = uri = None
|
| 161 |
+
if remaining and remaining[0] == '[':
|
| 162 |
+
i = remaining.find(']', 1)
|
| 163 |
+
if i < 0:
|
| 164 |
+
raise SyntaxError('unterminated extra: %s' % remaining)
|
| 165 |
+
s = remaining[1:i]
|
| 166 |
+
remaining = remaining[i + 1:].lstrip()
|
| 167 |
+
extras = []
|
| 168 |
+
while s:
|
| 169 |
+
m = IDENTIFIER.match(s)
|
| 170 |
+
if not m:
|
| 171 |
+
raise SyntaxError('malformed extra: %s' % s)
|
| 172 |
+
extras.append(m.groups()[0])
|
| 173 |
+
s = s[m.end():]
|
| 174 |
+
if not s:
|
| 175 |
+
break
|
| 176 |
+
if s[0] != ',':
|
| 177 |
+
raise SyntaxError('comma expected in extras: %s' % s)
|
| 178 |
+
s = s[1:].lstrip()
|
| 179 |
+
if not extras:
|
| 180 |
+
extras = None
|
| 181 |
+
if remaining:
|
| 182 |
+
if remaining[0] == '@':
|
| 183 |
+
# it's a URI
|
| 184 |
+
remaining = remaining[1:].lstrip()
|
| 185 |
+
m = NON_SPACE.match(remaining)
|
| 186 |
+
if not m:
|
| 187 |
+
raise SyntaxError('invalid URI: %s' % remaining)
|
| 188 |
+
uri = m.groups()[0]
|
| 189 |
+
t = urlparse(uri)
|
| 190 |
+
# there are issues with Python and URL parsing, so this test
|
| 191 |
+
# is a bit crude. See bpo-20271, bpo-23505. Python doesn't
|
| 192 |
+
# always parse invalid URLs correctly - it should raise
|
| 193 |
+
# exceptions for malformed URLs
|
| 194 |
+
if not (t.scheme and t.netloc):
|
| 195 |
+
raise SyntaxError('Invalid URL: %s' % uri)
|
| 196 |
+
remaining = remaining[m.end():].lstrip()
|
| 197 |
+
else:
|
| 198 |
+
|
| 199 |
+
def get_versions(ver_remaining):
|
| 200 |
+
"""
|
| 201 |
+
Return a list of operator, version tuples if any are
|
| 202 |
+
specified, else None.
|
| 203 |
+
"""
|
| 204 |
+
m = COMPARE_OP.match(ver_remaining)
|
| 205 |
+
versions = None
|
| 206 |
+
if m:
|
| 207 |
+
versions = []
|
| 208 |
+
while True:
|
| 209 |
+
op = m.groups()[0]
|
| 210 |
+
ver_remaining = ver_remaining[m.end():]
|
| 211 |
+
m = VERSION_IDENTIFIER.match(ver_remaining)
|
| 212 |
+
if not m:
|
| 213 |
+
raise SyntaxError('invalid version: %s' %
|
| 214 |
+
ver_remaining)
|
| 215 |
+
v = m.groups()[0]
|
| 216 |
+
versions.append((op, v))
|
| 217 |
+
ver_remaining = ver_remaining[m.end():]
|
| 218 |
+
if not ver_remaining or ver_remaining[0] != ',':
|
| 219 |
+
break
|
| 220 |
+
ver_remaining = ver_remaining[1:].lstrip()
|
| 221 |
+
# Some packages have a trailing comma which would break things
|
| 222 |
+
# See issue #148
|
| 223 |
+
if not ver_remaining:
|
| 224 |
+
break
|
| 225 |
+
m = COMPARE_OP.match(ver_remaining)
|
| 226 |
+
if not m:
|
| 227 |
+
raise SyntaxError('invalid constraint: %s' %
|
| 228 |
+
ver_remaining)
|
| 229 |
+
if not versions:
|
| 230 |
+
versions = None
|
| 231 |
+
return versions, ver_remaining
|
| 232 |
+
|
| 233 |
+
if remaining[0] != '(':
|
| 234 |
+
versions, remaining = get_versions(remaining)
|
| 235 |
+
else:
|
| 236 |
+
i = remaining.find(')', 1)
|
| 237 |
+
if i < 0:
|
| 238 |
+
raise SyntaxError('unterminated parenthesis: %s' %
|
| 239 |
+
remaining)
|
| 240 |
+
s = remaining[1:i]
|
| 241 |
+
remaining = remaining[i + 1:].lstrip()
|
| 242 |
+
# As a special diversion from PEP 508, allow a version number
|
| 243 |
+
# a.b.c in parentheses as a synonym for ~= a.b.c (because this
|
| 244 |
+
# is allowed in earlier PEPs)
|
| 245 |
+
if COMPARE_OP.match(s):
|
| 246 |
+
versions, _ = get_versions(s)
|
| 247 |
+
else:
|
| 248 |
+
m = VERSION_IDENTIFIER.match(s)
|
| 249 |
+
if not m:
|
| 250 |
+
raise SyntaxError('invalid constraint: %s' % s)
|
| 251 |
+
v = m.groups()[0]
|
| 252 |
+
s = s[m.end():].lstrip()
|
| 253 |
+
if s:
|
| 254 |
+
raise SyntaxError('invalid constraint: %s' % s)
|
| 255 |
+
versions = [('~=', v)]
|
| 256 |
+
|
| 257 |
+
if remaining:
|
| 258 |
+
if remaining[0] != ';':
|
| 259 |
+
raise SyntaxError('invalid requirement: %s' % remaining)
|
| 260 |
+
remaining = remaining[1:].lstrip()
|
| 261 |
+
|
| 262 |
+
mark_expr, remaining = parse_marker(remaining)
|
| 263 |
+
|
| 264 |
+
if remaining and remaining[0] != '#':
|
| 265 |
+
raise SyntaxError('unexpected trailing data: %s' % remaining)
|
| 266 |
+
|
| 267 |
+
if not versions:
|
| 268 |
+
rs = distname
|
| 269 |
+
else:
|
| 270 |
+
rs = '%s %s' % (distname, ', '.join(
|
| 271 |
+
['%s %s' % con for con in versions]))
|
| 272 |
+
return Container(name=distname,
|
| 273 |
+
extras=extras,
|
| 274 |
+
constraints=versions,
|
| 275 |
+
marker=mark_expr,
|
| 276 |
+
url=uri,
|
| 277 |
+
requirement=rs)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def get_resources_dests(resources_root, rules):
|
| 281 |
+
"""Find destinations for resources files"""
|
| 282 |
+
|
| 283 |
+
def get_rel_path(root, path):
|
| 284 |
+
# normalizes and returns a lstripped-/-separated path
|
| 285 |
+
root = root.replace(os.path.sep, '/')
|
| 286 |
+
path = path.replace(os.path.sep, '/')
|
| 287 |
+
assert path.startswith(root)
|
| 288 |
+
return path[len(root):].lstrip('/')
|
| 289 |
+
|
| 290 |
+
destinations = {}
|
| 291 |
+
for base, suffix, dest in rules:
|
| 292 |
+
prefix = os.path.join(resources_root, base)
|
| 293 |
+
for abs_base in iglob(prefix):
|
| 294 |
+
abs_glob = os.path.join(abs_base, suffix)
|
| 295 |
+
for abs_path in iglob(abs_glob):
|
| 296 |
+
resource_file = get_rel_path(resources_root, abs_path)
|
| 297 |
+
if dest is None: # remove the entry if it was here
|
| 298 |
+
destinations.pop(resource_file, None)
|
| 299 |
+
else:
|
| 300 |
+
rel_path = get_rel_path(abs_base, abs_path)
|
| 301 |
+
rel_dest = dest.replace(os.path.sep, '/').rstrip('/')
|
| 302 |
+
destinations[resource_file] = rel_dest + '/' + rel_path
|
| 303 |
+
return destinations
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
def in_venv():
|
| 307 |
+
if hasattr(sys, 'real_prefix'):
|
| 308 |
+
# virtualenv venvs
|
| 309 |
+
result = True
|
| 310 |
+
else:
|
| 311 |
+
# PEP 405 venvs
|
| 312 |
+
result = sys.prefix != getattr(sys, 'base_prefix', sys.prefix)
|
| 313 |
+
return result
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
def get_executable():
|
| 317 |
+
# The __PYVENV_LAUNCHER__ dance is apparently no longer needed, as
|
| 318 |
+
# changes to the stub launcher mean that sys.executable always points
|
| 319 |
+
# to the stub on OS X
|
| 320 |
+
# if sys.platform == 'darwin' and ('__PYVENV_LAUNCHER__'
|
| 321 |
+
# in os.environ):
|
| 322 |
+
# result = os.environ['__PYVENV_LAUNCHER__']
|
| 323 |
+
# else:
|
| 324 |
+
# result = sys.executable
|
| 325 |
+
# return result
|
| 326 |
+
# Avoid normcasing: see issue #143
|
| 327 |
+
# result = os.path.normcase(sys.executable)
|
| 328 |
+
result = sys.executable
|
| 329 |
+
if not isinstance(result, text_type):
|
| 330 |
+
result = fsdecode(result)
|
| 331 |
+
return result
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
def proceed(prompt, allowed_chars, error_prompt=None, default=None):
|
| 335 |
+
p = prompt
|
| 336 |
+
while True:
|
| 337 |
+
s = raw_input(p)
|
| 338 |
+
p = prompt
|
| 339 |
+
if not s and default:
|
| 340 |
+
s = default
|
| 341 |
+
if s:
|
| 342 |
+
c = s[0].lower()
|
| 343 |
+
if c in allowed_chars:
|
| 344 |
+
break
|
| 345 |
+
if error_prompt:
|
| 346 |
+
p = '%c: %s\n%s' % (c, error_prompt, prompt)
|
| 347 |
+
return c
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def extract_by_key(d, keys):
|
| 351 |
+
if isinstance(keys, string_types):
|
| 352 |
+
keys = keys.split()
|
| 353 |
+
result = {}
|
| 354 |
+
for key in keys:
|
| 355 |
+
if key in d:
|
| 356 |
+
result[key] = d[key]
|
| 357 |
+
return result
|
| 358 |
+
|
| 359 |
+
|
| 360 |
+
def read_exports(stream):
|
| 361 |
+
if sys.version_info[0] >= 3:
|
| 362 |
+
# needs to be a text stream
|
| 363 |
+
stream = codecs.getreader('utf-8')(stream)
|
| 364 |
+
# Try to load as JSON, falling back on legacy format
|
| 365 |
+
data = stream.read()
|
| 366 |
+
stream = StringIO(data)
|
| 367 |
+
try:
|
| 368 |
+
jdata = json.load(stream)
|
| 369 |
+
result = jdata['extensions']['python.exports']['exports']
|
| 370 |
+
for group, entries in result.items():
|
| 371 |
+
for k, v in entries.items():
|
| 372 |
+
s = '%s = %s' % (k, v)
|
| 373 |
+
entry = get_export_entry(s)
|
| 374 |
+
assert entry is not None
|
| 375 |
+
entries[k] = entry
|
| 376 |
+
return result
|
| 377 |
+
except Exception:
|
| 378 |
+
stream.seek(0, 0)
|
| 379 |
+
|
| 380 |
+
def read_stream(cp, stream):
|
| 381 |
+
if hasattr(cp, 'read_file'):
|
| 382 |
+
cp.read_file(stream)
|
| 383 |
+
else:
|
| 384 |
+
cp.readfp(stream)
|
| 385 |
+
|
| 386 |
+
cp = configparser.ConfigParser()
|
| 387 |
+
try:
|
| 388 |
+
read_stream(cp, stream)
|
| 389 |
+
except configparser.MissingSectionHeaderError:
|
| 390 |
+
stream.close()
|
| 391 |
+
data = textwrap.dedent(data)
|
| 392 |
+
stream = StringIO(data)
|
| 393 |
+
read_stream(cp, stream)
|
| 394 |
+
|
| 395 |
+
result = {}
|
| 396 |
+
for key in cp.sections():
|
| 397 |
+
result[key] = entries = {}
|
| 398 |
+
for name, value in cp.items(key):
|
| 399 |
+
s = '%s = %s' % (name, value)
|
| 400 |
+
entry = get_export_entry(s)
|
| 401 |
+
assert entry is not None
|
| 402 |
+
# entry.dist = self
|
| 403 |
+
entries[name] = entry
|
| 404 |
+
return result
|
| 405 |
+
|
| 406 |
+
|
| 407 |
+
def write_exports(exports, stream):
|
| 408 |
+
if sys.version_info[0] >= 3:
|
| 409 |
+
# needs to be a text stream
|
| 410 |
+
stream = codecs.getwriter('utf-8')(stream)
|
| 411 |
+
cp = configparser.ConfigParser()
|
| 412 |
+
for k, v in exports.items():
|
| 413 |
+
# TODO check k, v for valid values
|
| 414 |
+
cp.add_section(k)
|
| 415 |
+
for entry in v.values():
|
| 416 |
+
if entry.suffix is None:
|
| 417 |
+
s = entry.prefix
|
| 418 |
+
else:
|
| 419 |
+
s = '%s:%s' % (entry.prefix, entry.suffix)
|
| 420 |
+
if entry.flags:
|
| 421 |
+
s = '%s [%s]' % (s, ', '.join(entry.flags))
|
| 422 |
+
cp.set(k, entry.name, s)
|
| 423 |
+
cp.write(stream)
|
| 424 |
+
|
| 425 |
+
|
| 426 |
+
@contextlib.contextmanager
|
| 427 |
+
def tempdir():
|
| 428 |
+
td = tempfile.mkdtemp()
|
| 429 |
+
try:
|
| 430 |
+
yield td
|
| 431 |
+
finally:
|
| 432 |
+
shutil.rmtree(td)
|
| 433 |
+
|
| 434 |
+
|
| 435 |
+
@contextlib.contextmanager
|
| 436 |
+
def chdir(d):
|
| 437 |
+
cwd = os.getcwd()
|
| 438 |
+
try:
|
| 439 |
+
os.chdir(d)
|
| 440 |
+
yield
|
| 441 |
+
finally:
|
| 442 |
+
os.chdir(cwd)
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
@contextlib.contextmanager
|
| 446 |
+
def socket_timeout(seconds=15):
|
| 447 |
+
cto = socket.getdefaulttimeout()
|
| 448 |
+
try:
|
| 449 |
+
socket.setdefaulttimeout(seconds)
|
| 450 |
+
yield
|
| 451 |
+
finally:
|
| 452 |
+
socket.setdefaulttimeout(cto)
|
| 453 |
+
|
| 454 |
+
|
| 455 |
+
class cached_property(object):
|
| 456 |
+
|
| 457 |
+
def __init__(self, func):
|
| 458 |
+
self.func = func
|
| 459 |
+
# for attr in ('__name__', '__module__', '__doc__'):
|
| 460 |
+
# setattr(self, attr, getattr(func, attr, None))
|
| 461 |
+
|
| 462 |
+
def __get__(self, obj, cls=None):
|
| 463 |
+
if obj is None:
|
| 464 |
+
return self
|
| 465 |
+
value = self.func(obj)
|
| 466 |
+
object.__setattr__(obj, self.func.__name__, value)
|
| 467 |
+
# obj.__dict__[self.func.__name__] = value = self.func(obj)
|
| 468 |
+
return value
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
def convert_path(pathname):
|
| 472 |
+
"""Return 'pathname' as a name that will work on the native filesystem.
|
| 473 |
+
|
| 474 |
+
The path is split on '/' and put back together again using the current
|
| 475 |
+
directory separator. Needed because filenames in the setup script are
|
| 476 |
+
always supplied in Unix style, and have to be converted to the local
|
| 477 |
+
convention before we can actually use them in the filesystem. Raises
|
| 478 |
+
ValueError on non-Unix-ish systems if 'pathname' either starts or
|
| 479 |
+
ends with a slash.
|
| 480 |
+
"""
|
| 481 |
+
if os.sep == '/':
|
| 482 |
+
return pathname
|
| 483 |
+
if not pathname:
|
| 484 |
+
return pathname
|
| 485 |
+
if pathname[0] == '/':
|
| 486 |
+
raise ValueError("path '%s' cannot be absolute" % pathname)
|
| 487 |
+
if pathname[-1] == '/':
|
| 488 |
+
raise ValueError("path '%s' cannot end with '/'" % pathname)
|
| 489 |
+
|
| 490 |
+
paths = pathname.split('/')
|
| 491 |
+
while os.curdir in paths:
|
| 492 |
+
paths.remove(os.curdir)
|
| 493 |
+
if not paths:
|
| 494 |
+
return os.curdir
|
| 495 |
+
return os.path.join(*paths)
|
| 496 |
+
|
| 497 |
+
|
| 498 |
+
class FileOperator(object):
|
| 499 |
+
|
| 500 |
+
def __init__(self, dry_run=False):
|
| 501 |
+
self.dry_run = dry_run
|
| 502 |
+
self.ensured = set()
|
| 503 |
+
self._init_record()
|
| 504 |
+
|
| 505 |
+
def _init_record(self):
|
| 506 |
+
self.record = False
|
| 507 |
+
self.files_written = set()
|
| 508 |
+
self.dirs_created = set()
|
| 509 |
+
|
| 510 |
+
def record_as_written(self, path):
|
| 511 |
+
if self.record:
|
| 512 |
+
self.files_written.add(path)
|
| 513 |
+
|
| 514 |
+
def newer(self, source, target):
|
| 515 |
+
"""Tell if the target is newer than the source.
|
| 516 |
+
|
| 517 |
+
Returns true if 'source' exists and is more recently modified than
|
| 518 |
+
'target', or if 'source' exists and 'target' doesn't.
|
| 519 |
+
|
| 520 |
+
Returns false if both exist and 'target' is the same age or younger
|
| 521 |
+
than 'source'. Raise PackagingFileError if 'source' does not exist.
|
| 522 |
+
|
| 523 |
+
Note that this test is not very accurate: files created in the same
|
| 524 |
+
second will have the same "age".
|
| 525 |
+
"""
|
| 526 |
+
if not os.path.exists(source):
|
| 527 |
+
raise DistlibException("file '%r' does not exist" %
|
| 528 |
+
os.path.abspath(source))
|
| 529 |
+
if not os.path.exists(target):
|
| 530 |
+
return True
|
| 531 |
+
|
| 532 |
+
return os.stat(source).st_mtime > os.stat(target).st_mtime
|
| 533 |
+
|
| 534 |
+
def copy_file(self, infile, outfile, check=True):
|
| 535 |
+
"""Copy a file respecting dry-run and force flags.
|
| 536 |
+
"""
|
| 537 |
+
self.ensure_dir(os.path.dirname(outfile))
|
| 538 |
+
logger.info('Copying %s to %s', infile, outfile)
|
| 539 |
+
if not self.dry_run:
|
| 540 |
+
msg = None
|
| 541 |
+
if check:
|
| 542 |
+
if os.path.islink(outfile):
|
| 543 |
+
msg = '%s is a symlink' % outfile
|
| 544 |
+
elif os.path.exists(outfile) and not os.path.isfile(outfile):
|
| 545 |
+
msg = '%s is a non-regular file' % outfile
|
| 546 |
+
if msg:
|
| 547 |
+
raise ValueError(msg + ' which would be overwritten')
|
| 548 |
+
shutil.copyfile(infile, outfile)
|
| 549 |
+
self.record_as_written(outfile)
|
| 550 |
+
|
| 551 |
+
def copy_stream(self, instream, outfile, encoding=None):
|
| 552 |
+
assert not os.path.isdir(outfile)
|
| 553 |
+
self.ensure_dir(os.path.dirname(outfile))
|
| 554 |
+
logger.info('Copying stream %s to %s', instream, outfile)
|
| 555 |
+
if not self.dry_run:
|
| 556 |
+
if encoding is None:
|
| 557 |
+
outstream = open(outfile, 'wb')
|
| 558 |
+
else:
|
| 559 |
+
outstream = codecs.open(outfile, 'w', encoding=encoding)
|
| 560 |
+
try:
|
| 561 |
+
shutil.copyfileobj(instream, outstream)
|
| 562 |
+
finally:
|
| 563 |
+
outstream.close()
|
| 564 |
+
self.record_as_written(outfile)
|
| 565 |
+
|
| 566 |
+
def write_binary_file(self, path, data):
|
| 567 |
+
self.ensure_dir(os.path.dirname(path))
|
| 568 |
+
if not self.dry_run:
|
| 569 |
+
if os.path.exists(path):
|
| 570 |
+
os.remove(path)
|
| 571 |
+
with open(path, 'wb') as f:
|
| 572 |
+
f.write(data)
|
| 573 |
+
self.record_as_written(path)
|
| 574 |
+
|
| 575 |
+
def write_text_file(self, path, data, encoding):
|
| 576 |
+
self.write_binary_file(path, data.encode(encoding))
|
| 577 |
+
|
| 578 |
+
def set_mode(self, bits, mask, files):
|
| 579 |
+
if os.name == 'posix' or (os.name == 'java' and os._name == 'posix'):
|
| 580 |
+
# Set the executable bits (owner, group, and world) on
|
| 581 |
+
# all the files specified.
|
| 582 |
+
for f in files:
|
| 583 |
+
if self.dry_run:
|
| 584 |
+
logger.info("changing mode of %s", f)
|
| 585 |
+
else:
|
| 586 |
+
mode = (os.stat(f).st_mode | bits) & mask
|
| 587 |
+
logger.info("changing mode of %s to %o", f, mode)
|
| 588 |
+
os.chmod(f, mode)
|
| 589 |
+
|
| 590 |
+
set_executable_mode = lambda s, f: s.set_mode(0o555, 0o7777, f)
|
| 591 |
+
|
| 592 |
+
def ensure_dir(self, path):
|
| 593 |
+
path = os.path.abspath(path)
|
| 594 |
+
if path not in self.ensured and not os.path.exists(path):
|
| 595 |
+
self.ensured.add(path)
|
| 596 |
+
d, f = os.path.split(path)
|
| 597 |
+
self.ensure_dir(d)
|
| 598 |
+
logger.info('Creating %s' % path)
|
| 599 |
+
if not self.dry_run:
|
| 600 |
+
os.mkdir(path)
|
| 601 |
+
if self.record:
|
| 602 |
+
self.dirs_created.add(path)
|
| 603 |
+
|
| 604 |
+
def byte_compile(self,
|
| 605 |
+
path,
|
| 606 |
+
optimize=False,
|
| 607 |
+
force=False,
|
| 608 |
+
prefix=None,
|
| 609 |
+
hashed_invalidation=False):
|
| 610 |
+
dpath = cache_from_source(path, not optimize)
|
| 611 |
+
logger.info('Byte-compiling %s to %s', path, dpath)
|
| 612 |
+
if not self.dry_run:
|
| 613 |
+
if force or self.newer(path, dpath):
|
| 614 |
+
if not prefix:
|
| 615 |
+
diagpath = None
|
| 616 |
+
else:
|
| 617 |
+
assert path.startswith(prefix)
|
| 618 |
+
diagpath = path[len(prefix):]
|
| 619 |
+
compile_kwargs = {}
|
| 620 |
+
if hashed_invalidation and hasattr(py_compile,
|
| 621 |
+
'PycInvalidationMode'):
|
| 622 |
+
compile_kwargs[
|
| 623 |
+
'invalidation_mode'] = py_compile.PycInvalidationMode.CHECKED_HASH
|
| 624 |
+
py_compile.compile(path, dpath, diagpath, True,
|
| 625 |
+
**compile_kwargs) # raise error
|
| 626 |
+
self.record_as_written(dpath)
|
| 627 |
+
return dpath
|
| 628 |
+
|
| 629 |
+
def ensure_removed(self, path):
|
| 630 |
+
if os.path.exists(path):
|
| 631 |
+
if os.path.isdir(path) and not os.path.islink(path):
|
| 632 |
+
logger.debug('Removing directory tree at %s', path)
|
| 633 |
+
if not self.dry_run:
|
| 634 |
+
shutil.rmtree(path)
|
| 635 |
+
if self.record:
|
| 636 |
+
if path in self.dirs_created:
|
| 637 |
+
self.dirs_created.remove(path)
|
| 638 |
+
else:
|
| 639 |
+
if os.path.islink(path):
|
| 640 |
+
s = 'link'
|
| 641 |
+
else:
|
| 642 |
+
s = 'file'
|
| 643 |
+
logger.debug('Removing %s %s', s, path)
|
| 644 |
+
if not self.dry_run:
|
| 645 |
+
os.remove(path)
|
| 646 |
+
if self.record:
|
| 647 |
+
if path in self.files_written:
|
| 648 |
+
self.files_written.remove(path)
|
| 649 |
+
|
| 650 |
+
def is_writable(self, path):
|
| 651 |
+
result = False
|
| 652 |
+
while not result:
|
| 653 |
+
if os.path.exists(path):
|
| 654 |
+
result = os.access(path, os.W_OK)
|
| 655 |
+
break
|
| 656 |
+
parent = os.path.dirname(path)
|
| 657 |
+
if parent == path:
|
| 658 |
+
break
|
| 659 |
+
path = parent
|
| 660 |
+
return result
|
| 661 |
+
|
| 662 |
+
def commit(self):
|
| 663 |
+
"""
|
| 664 |
+
Commit recorded changes, turn off recording, return
|
| 665 |
+
changes.
|
| 666 |
+
"""
|
| 667 |
+
assert self.record
|
| 668 |
+
result = self.files_written, self.dirs_created
|
| 669 |
+
self._init_record()
|
| 670 |
+
return result
|
| 671 |
+
|
| 672 |
+
def rollback(self):
|
| 673 |
+
if not self.dry_run:
|
| 674 |
+
for f in list(self.files_written):
|
| 675 |
+
if os.path.exists(f):
|
| 676 |
+
os.remove(f)
|
| 677 |
+
# dirs should all be empty now, except perhaps for
|
| 678 |
+
# __pycache__ subdirs
|
| 679 |
+
# reverse so that subdirs appear before their parents
|
| 680 |
+
dirs = sorted(self.dirs_created, reverse=True)
|
| 681 |
+
for d in dirs:
|
| 682 |
+
flist = os.listdir(d)
|
| 683 |
+
if flist:
|
| 684 |
+
assert flist == ['__pycache__']
|
| 685 |
+
sd = os.path.join(d, flist[0])
|
| 686 |
+
os.rmdir(sd)
|
| 687 |
+
os.rmdir(d) # should fail if non-empty
|
| 688 |
+
self._init_record()
|
| 689 |
+
|
| 690 |
+
|
| 691 |
+
def resolve(module_name, dotted_path):
|
| 692 |
+
if module_name in sys.modules:
|
| 693 |
+
mod = sys.modules[module_name]
|
| 694 |
+
else:
|
| 695 |
+
mod = __import__(module_name)
|
| 696 |
+
if dotted_path is None:
|
| 697 |
+
result = mod
|
| 698 |
+
else:
|
| 699 |
+
parts = dotted_path.split('.')
|
| 700 |
+
result = getattr(mod, parts.pop(0))
|
| 701 |
+
for p in parts:
|
| 702 |
+
result = getattr(result, p)
|
| 703 |
+
return result
|
| 704 |
+
|
| 705 |
+
|
| 706 |
+
class ExportEntry(object):
|
| 707 |
+
|
| 708 |
+
def __init__(self, name, prefix, suffix, flags):
|
| 709 |
+
self.name = name
|
| 710 |
+
self.prefix = prefix
|
| 711 |
+
self.suffix = suffix
|
| 712 |
+
self.flags = flags
|
| 713 |
+
|
| 714 |
+
@cached_property
|
| 715 |
+
def value(self):
|
| 716 |
+
return resolve(self.prefix, self.suffix)
|
| 717 |
+
|
| 718 |
+
def __repr__(self): # pragma: no cover
|
| 719 |
+
return '<ExportEntry %s = %s:%s %s>' % (self.name, self.prefix,
|
| 720 |
+
self.suffix, self.flags)
|
| 721 |
+
|
| 722 |
+
def __eq__(self, other):
|
| 723 |
+
if not isinstance(other, ExportEntry):
|
| 724 |
+
result = False
|
| 725 |
+
else:
|
| 726 |
+
result = (self.name == other.name and self.prefix == other.prefix
|
| 727 |
+
and self.suffix == other.suffix
|
| 728 |
+
and self.flags == other.flags)
|
| 729 |
+
return result
|
| 730 |
+
|
| 731 |
+
__hash__ = object.__hash__
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
ENTRY_RE = re.compile(
|
| 735 |
+
r'''(?P<name>([^\[]\S*))
|
| 736 |
+
\s*=\s*(?P<callable>(\w+)([:\.]\w+)*)
|
| 737 |
+
\s*(\[\s*(?P<flags>[\w-]+(=\w+)?(,\s*\w+(=\w+)?)*)\s*\])?
|
| 738 |
+
''', re.VERBOSE)
|
| 739 |
+
|
| 740 |
+
|
| 741 |
+
def get_export_entry(specification):
|
| 742 |
+
m = ENTRY_RE.search(specification)
|
| 743 |
+
if not m:
|
| 744 |
+
result = None
|
| 745 |
+
if '[' in specification or ']' in specification:
|
| 746 |
+
raise DistlibException("Invalid specification "
|
| 747 |
+
"'%s'" % specification)
|
| 748 |
+
else:
|
| 749 |
+
d = m.groupdict()
|
| 750 |
+
name = d['name']
|
| 751 |
+
path = d['callable']
|
| 752 |
+
colons = path.count(':')
|
| 753 |
+
if colons == 0:
|
| 754 |
+
prefix, suffix = path, None
|
| 755 |
+
else:
|
| 756 |
+
if colons != 1:
|
| 757 |
+
raise DistlibException("Invalid specification "
|
| 758 |
+
"'%s'" % specification)
|
| 759 |
+
prefix, suffix = path.split(':')
|
| 760 |
+
flags = d['flags']
|
| 761 |
+
if flags is None:
|
| 762 |
+
if '[' in specification or ']' in specification:
|
| 763 |
+
raise DistlibException("Invalid specification "
|
| 764 |
+
"'%s'" % specification)
|
| 765 |
+
flags = []
|
| 766 |
+
else:
|
| 767 |
+
flags = [f.strip() for f in flags.split(',')]
|
| 768 |
+
result = ExportEntry(name, prefix, suffix, flags)
|
| 769 |
+
return result
|
| 770 |
+
|
| 771 |
+
|
| 772 |
+
def get_cache_base(suffix=None):
|
| 773 |
+
"""
|
| 774 |
+
Return the default base location for distlib caches. If the directory does
|
| 775 |
+
not exist, it is created. Use the suffix provided for the base directory,
|
| 776 |
+
and default to '.distlib' if it isn't provided.
|
| 777 |
+
|
| 778 |
+
On Windows, if LOCALAPPDATA is defined in the environment, then it is
|
| 779 |
+
assumed to be a directory, and will be the parent directory of the result.
|
| 780 |
+
On POSIX, and on Windows if LOCALAPPDATA is not defined, the user's home
|
| 781 |
+
directory - using os.expanduser('~') - will be the parent directory of
|
| 782 |
+
the result.
|
| 783 |
+
|
| 784 |
+
The result is just the directory '.distlib' in the parent directory as
|
| 785 |
+
determined above, or with the name specified with ``suffix``.
|
| 786 |
+
"""
|
| 787 |
+
if suffix is None:
|
| 788 |
+
suffix = '.distlib'
|
| 789 |
+
if os.name == 'nt' and 'LOCALAPPDATA' in os.environ:
|
| 790 |
+
result = os.path.expandvars('$localappdata')
|
| 791 |
+
else:
|
| 792 |
+
# Assume posix, or old Windows
|
| 793 |
+
result = os.path.expanduser('~')
|
| 794 |
+
# we use 'isdir' instead of 'exists', because we want to
|
| 795 |
+
# fail if there's a file with that name
|
| 796 |
+
if os.path.isdir(result):
|
| 797 |
+
usable = os.access(result, os.W_OK)
|
| 798 |
+
if not usable:
|
| 799 |
+
logger.warning('Directory exists but is not writable: %s', result)
|
| 800 |
+
else:
|
| 801 |
+
try:
|
| 802 |
+
os.makedirs(result)
|
| 803 |
+
usable = True
|
| 804 |
+
except OSError:
|
| 805 |
+
logger.warning('Unable to create %s', result, exc_info=True)
|
| 806 |
+
usable = False
|
| 807 |
+
if not usable:
|
| 808 |
+
result = tempfile.mkdtemp()
|
| 809 |
+
logger.warning('Default location unusable, using %s', result)
|
| 810 |
+
return os.path.join(result, suffix)
|
| 811 |
+
|
| 812 |
+
|
| 813 |
+
def path_to_cache_dir(path):
|
| 814 |
+
"""
|
| 815 |
+
Convert an absolute path to a directory name for use in a cache.
|
| 816 |
+
|
| 817 |
+
The algorithm used is:
|
| 818 |
+
|
| 819 |
+
#. On Windows, any ``':'`` in the drive is replaced with ``'---'``.
|
| 820 |
+
#. Any occurrence of ``os.sep`` is replaced with ``'--'``.
|
| 821 |
+
#. ``'.cache'`` is appended.
|
| 822 |
+
"""
|
| 823 |
+
d, p = os.path.splitdrive(os.path.abspath(path))
|
| 824 |
+
if d:
|
| 825 |
+
d = d.replace(':', '---')
|
| 826 |
+
p = p.replace(os.sep, '--')
|
| 827 |
+
return d + p + '.cache'
|
| 828 |
+
|
| 829 |
+
|
| 830 |
+
def ensure_slash(s):
|
| 831 |
+
if not s.endswith('/'):
|
| 832 |
+
return s + '/'
|
| 833 |
+
return s
|
| 834 |
+
|
| 835 |
+
|
| 836 |
+
def parse_credentials(netloc):
|
| 837 |
+
username = password = None
|
| 838 |
+
if '@' in netloc:
|
| 839 |
+
prefix, netloc = netloc.rsplit('@', 1)
|
| 840 |
+
if ':' not in prefix:
|
| 841 |
+
username = prefix
|
| 842 |
+
else:
|
| 843 |
+
username, password = prefix.split(':', 1)
|
| 844 |
+
if username:
|
| 845 |
+
username = unquote(username)
|
| 846 |
+
if password:
|
| 847 |
+
password = unquote(password)
|
| 848 |
+
return username, password, netloc
|
| 849 |
+
|
| 850 |
+
|
| 851 |
+
def get_process_umask():
|
| 852 |
+
result = os.umask(0o22)
|
| 853 |
+
os.umask(result)
|
| 854 |
+
return result
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
def is_string_sequence(seq):
|
| 858 |
+
result = True
|
| 859 |
+
i = None
|
| 860 |
+
for i, s in enumerate(seq):
|
| 861 |
+
if not isinstance(s, string_types):
|
| 862 |
+
result = False
|
| 863 |
+
break
|
| 864 |
+
assert i is not None
|
| 865 |
+
return result
|
| 866 |
+
|
| 867 |
+
|
| 868 |
+
PROJECT_NAME_AND_VERSION = re.compile(
|
| 869 |
+
'([a-z0-9_]+([.-][a-z_][a-z0-9_]*)*)-'
|
| 870 |
+
'([a-z0-9_.+-]+)', re.I)
|
| 871 |
+
PYTHON_VERSION = re.compile(r'-py(\d\.?\d?)')
|
| 872 |
+
|
| 873 |
+
|
| 874 |
+
def split_filename(filename, project_name=None):
|
| 875 |
+
"""
|
| 876 |
+
Extract name, version, python version from a filename (no extension)
|
| 877 |
+
|
| 878 |
+
Return name, version, pyver or None
|
| 879 |
+
"""
|
| 880 |
+
result = None
|
| 881 |
+
pyver = None
|
| 882 |
+
filename = unquote(filename).replace(' ', '-')
|
| 883 |
+
m = PYTHON_VERSION.search(filename)
|
| 884 |
+
if m:
|
| 885 |
+
pyver = m.group(1)
|
| 886 |
+
filename = filename[:m.start()]
|
| 887 |
+
if project_name and len(filename) > len(project_name) + 1:
|
| 888 |
+
m = re.match(re.escape(project_name) + r'\b', filename)
|
| 889 |
+
if m:
|
| 890 |
+
n = m.end()
|
| 891 |
+
result = filename[:n], filename[n + 1:], pyver
|
| 892 |
+
if result is None:
|
| 893 |
+
m = PROJECT_NAME_AND_VERSION.match(filename)
|
| 894 |
+
if m:
|
| 895 |
+
result = m.group(1), m.group(3), pyver
|
| 896 |
+
return result
|
| 897 |
+
|
| 898 |
+
|
| 899 |
+
# Allow spaces in name because of legacy dists like "Twisted Core"
|
| 900 |
+
NAME_VERSION_RE = re.compile(r'(?P<name>[\w .-]+)\s*'
|
| 901 |
+
r'\(\s*(?P<ver>[^\s)]+)\)$')
|
| 902 |
+
|
| 903 |
+
|
| 904 |
+
def parse_name_and_version(p):
|
| 905 |
+
"""
|
| 906 |
+
A utility method used to get name and version from a string.
|
| 907 |
+
|
| 908 |
+
From e.g. a Provides-Dist value.
|
| 909 |
+
|
| 910 |
+
:param p: A value in a form 'foo (1.0)'
|
| 911 |
+
:return: The name and version as a tuple.
|
| 912 |
+
"""
|
| 913 |
+
m = NAME_VERSION_RE.match(p)
|
| 914 |
+
if not m:
|
| 915 |
+
raise DistlibException('Ill-formed name/version string: \'%s\'' % p)
|
| 916 |
+
d = m.groupdict()
|
| 917 |
+
return d['name'].strip().lower(), d['ver']
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def get_extras(requested, available):
|
| 921 |
+
result = set()
|
| 922 |
+
requested = set(requested or [])
|
| 923 |
+
available = set(available or [])
|
| 924 |
+
if '*' in requested:
|
| 925 |
+
requested.remove('*')
|
| 926 |
+
result |= available
|
| 927 |
+
for r in requested:
|
| 928 |
+
if r == '-':
|
| 929 |
+
result.add(r)
|
| 930 |
+
elif r.startswith('-'):
|
| 931 |
+
unwanted = r[1:]
|
| 932 |
+
if unwanted not in available:
|
| 933 |
+
logger.warning('undeclared extra: %s' % unwanted)
|
| 934 |
+
if unwanted in result:
|
| 935 |
+
result.remove(unwanted)
|
| 936 |
+
else:
|
| 937 |
+
if r not in available:
|
| 938 |
+
logger.warning('undeclared extra: %s' % r)
|
| 939 |
+
result.add(r)
|
| 940 |
+
return result
|
| 941 |
+
|
| 942 |
+
|
| 943 |
+
#
|
| 944 |
+
# Extended metadata functionality
|
| 945 |
+
#
|
| 946 |
+
|
| 947 |
+
|
| 948 |
+
def _get_external_data(url):
|
| 949 |
+
result = {}
|
| 950 |
+
try:
|
| 951 |
+
# urlopen might fail if it runs into redirections,
|
| 952 |
+
# because of Python issue #13696. Fixed in locators
|
| 953 |
+
# using a custom redirect handler.
|
| 954 |
+
resp = urlopen(url)
|
| 955 |
+
headers = resp.info()
|
| 956 |
+
ct = headers.get('Content-Type')
|
| 957 |
+
if not ct.startswith('application/json'):
|
| 958 |
+
logger.debug('Unexpected response for JSON request: %s', ct)
|
| 959 |
+
else:
|
| 960 |
+
reader = codecs.getreader('utf-8')(resp)
|
| 961 |
+
# data = reader.read().decode('utf-8')
|
| 962 |
+
# result = json.loads(data)
|
| 963 |
+
result = json.load(reader)
|
| 964 |
+
except Exception as e:
|
| 965 |
+
logger.exception('Failed to get external data for %s: %s', url, e)
|
| 966 |
+
return result
|
| 967 |
+
|
| 968 |
+
|
| 969 |
+
_external_data_base_url = 'https://www.red-dove.com/pypi/projects/'
|
| 970 |
+
|
| 971 |
+
|
| 972 |
+
def get_project_data(name):
|
| 973 |
+
url = '%s/%s/project.json' % (name[0].upper(), name)
|
| 974 |
+
url = urljoin(_external_data_base_url, url)
|
| 975 |
+
result = _get_external_data(url)
|
| 976 |
+
return result
|
| 977 |
+
|
| 978 |
+
|
| 979 |
+
def get_package_data(name, version):
|
| 980 |
+
url = '%s/%s/package-%s.json' % (name[0].upper(), name, version)
|
| 981 |
+
url = urljoin(_external_data_base_url, url)
|
| 982 |
+
return _get_external_data(url)
|
| 983 |
+
|
| 984 |
+
|
| 985 |
+
class Cache(object):
|
| 986 |
+
"""
|
| 987 |
+
A class implementing a cache for resources that need to live in the file system
|
| 988 |
+
e.g. shared libraries. This class was moved from resources to here because it
|
| 989 |
+
could be used by other modules, e.g. the wheel module.
|
| 990 |
+
"""
|
| 991 |
+
|
| 992 |
+
def __init__(self, base):
|
| 993 |
+
"""
|
| 994 |
+
Initialise an instance.
|
| 995 |
+
|
| 996 |
+
:param base: The base directory where the cache should be located.
|
| 997 |
+
"""
|
| 998 |
+
# we use 'isdir' instead of 'exists', because we want to
|
| 999 |
+
# fail if there's a file with that name
|
| 1000 |
+
if not os.path.isdir(base): # pragma: no cover
|
| 1001 |
+
os.makedirs(base)
|
| 1002 |
+
if (os.stat(base).st_mode & 0o77) != 0:
|
| 1003 |
+
logger.warning('Directory \'%s\' is not private', base)
|
| 1004 |
+
self.base = os.path.abspath(os.path.normpath(base))
|
| 1005 |
+
|
| 1006 |
+
def prefix_to_dir(self, prefix):
|
| 1007 |
+
"""
|
| 1008 |
+
Converts a resource prefix to a directory name in the cache.
|
| 1009 |
+
"""
|
| 1010 |
+
return path_to_cache_dir(prefix)
|
| 1011 |
+
|
| 1012 |
+
def clear(self):
|
| 1013 |
+
"""
|
| 1014 |
+
Clear the cache.
|
| 1015 |
+
"""
|
| 1016 |
+
not_removed = []
|
| 1017 |
+
for fn in os.listdir(self.base):
|
| 1018 |
+
fn = os.path.join(self.base, fn)
|
| 1019 |
+
try:
|
| 1020 |
+
if os.path.islink(fn) or os.path.isfile(fn):
|
| 1021 |
+
os.remove(fn)
|
| 1022 |
+
elif os.path.isdir(fn):
|
| 1023 |
+
shutil.rmtree(fn)
|
| 1024 |
+
except Exception:
|
| 1025 |
+
not_removed.append(fn)
|
| 1026 |
+
return not_removed
|
| 1027 |
+
|
| 1028 |
+
|
| 1029 |
+
class EventMixin(object):
|
| 1030 |
+
"""
|
| 1031 |
+
A very simple publish/subscribe system.
|
| 1032 |
+
"""
|
| 1033 |
+
|
| 1034 |
+
def __init__(self):
|
| 1035 |
+
self._subscribers = {}
|
| 1036 |
+
|
| 1037 |
+
def add(self, event, subscriber, append=True):
|
| 1038 |
+
"""
|
| 1039 |
+
Add a subscriber for an event.
|
| 1040 |
+
|
| 1041 |
+
:param event: The name of an event.
|
| 1042 |
+
:param subscriber: The subscriber to be added (and called when the
|
| 1043 |
+
event is published).
|
| 1044 |
+
:param append: Whether to append or prepend the subscriber to an
|
| 1045 |
+
existing subscriber list for the event.
|
| 1046 |
+
"""
|
| 1047 |
+
subs = self._subscribers
|
| 1048 |
+
if event not in subs:
|
| 1049 |
+
subs[event] = deque([subscriber])
|
| 1050 |
+
else:
|
| 1051 |
+
sq = subs[event]
|
| 1052 |
+
if append:
|
| 1053 |
+
sq.append(subscriber)
|
| 1054 |
+
else:
|
| 1055 |
+
sq.appendleft(subscriber)
|
| 1056 |
+
|
| 1057 |
+
def remove(self, event, subscriber):
|
| 1058 |
+
"""
|
| 1059 |
+
Remove a subscriber for an event.
|
| 1060 |
+
|
| 1061 |
+
:param event: The name of an event.
|
| 1062 |
+
:param subscriber: The subscriber to be removed.
|
| 1063 |
+
"""
|
| 1064 |
+
subs = self._subscribers
|
| 1065 |
+
if event not in subs:
|
| 1066 |
+
raise ValueError('No subscribers: %r' % event)
|
| 1067 |
+
subs[event].remove(subscriber)
|
| 1068 |
+
|
| 1069 |
+
def get_subscribers(self, event):
|
| 1070 |
+
"""
|
| 1071 |
+
Return an iterator for the subscribers for an event.
|
| 1072 |
+
:param event: The event to return subscribers for.
|
| 1073 |
+
"""
|
| 1074 |
+
return iter(self._subscribers.get(event, ()))
|
| 1075 |
+
|
| 1076 |
+
def publish(self, event, *args, **kwargs):
|
| 1077 |
+
"""
|
| 1078 |
+
Publish a event and return a list of values returned by its
|
| 1079 |
+
subscribers.
|
| 1080 |
+
|
| 1081 |
+
:param event: The event to publish.
|
| 1082 |
+
:param args: The positional arguments to pass to the event's
|
| 1083 |
+
subscribers.
|
| 1084 |
+
:param kwargs: The keyword arguments to pass to the event's
|
| 1085 |
+
subscribers.
|
| 1086 |
+
"""
|
| 1087 |
+
result = []
|
| 1088 |
+
for subscriber in self.get_subscribers(event):
|
| 1089 |
+
try:
|
| 1090 |
+
value = subscriber(event, *args, **kwargs)
|
| 1091 |
+
except Exception:
|
| 1092 |
+
logger.exception('Exception during event publication')
|
| 1093 |
+
value = None
|
| 1094 |
+
result.append(value)
|
| 1095 |
+
logger.debug('publish %s: args = %s, kwargs = %s, result = %s', event,
|
| 1096 |
+
args, kwargs, result)
|
| 1097 |
+
return result
|
| 1098 |
+
|
| 1099 |
+
|
| 1100 |
+
#
|
| 1101 |
+
# Simple sequencing
|
| 1102 |
+
#
|
| 1103 |
+
class Sequencer(object):
|
| 1104 |
+
|
| 1105 |
+
def __init__(self):
|
| 1106 |
+
self._preds = {}
|
| 1107 |
+
self._succs = {}
|
| 1108 |
+
self._nodes = set() # nodes with no preds/succs
|
| 1109 |
+
|
| 1110 |
+
def add_node(self, node):
|
| 1111 |
+
self._nodes.add(node)
|
| 1112 |
+
|
| 1113 |
+
def remove_node(self, node, edges=False):
|
| 1114 |
+
if node in self._nodes:
|
| 1115 |
+
self._nodes.remove(node)
|
| 1116 |
+
if edges:
|
| 1117 |
+
for p in set(self._preds.get(node, ())):
|
| 1118 |
+
self.remove(p, node)
|
| 1119 |
+
for s in set(self._succs.get(node, ())):
|
| 1120 |
+
self.remove(node, s)
|
| 1121 |
+
# Remove empties
|
| 1122 |
+
for k, v in list(self._preds.items()):
|
| 1123 |
+
if not v:
|
| 1124 |
+
del self._preds[k]
|
| 1125 |
+
for k, v in list(self._succs.items()):
|
| 1126 |
+
if not v:
|
| 1127 |
+
del self._succs[k]
|
| 1128 |
+
|
| 1129 |
+
def add(self, pred, succ):
|
| 1130 |
+
assert pred != succ
|
| 1131 |
+
self._preds.setdefault(succ, set()).add(pred)
|
| 1132 |
+
self._succs.setdefault(pred, set()).add(succ)
|
| 1133 |
+
|
| 1134 |
+
def remove(self, pred, succ):
|
| 1135 |
+
assert pred != succ
|
| 1136 |
+
try:
|
| 1137 |
+
preds = self._preds[succ]
|
| 1138 |
+
succs = self._succs[pred]
|
| 1139 |
+
except KeyError: # pragma: no cover
|
| 1140 |
+
raise ValueError('%r not a successor of anything' % succ)
|
| 1141 |
+
try:
|
| 1142 |
+
preds.remove(pred)
|
| 1143 |
+
succs.remove(succ)
|
| 1144 |
+
except KeyError: # pragma: no cover
|
| 1145 |
+
raise ValueError('%r not a successor of %r' % (succ, pred))
|
| 1146 |
+
|
| 1147 |
+
def is_step(self, step):
|
| 1148 |
+
return (step in self._preds or step in self._succs
|
| 1149 |
+
or step in self._nodes)
|
| 1150 |
+
|
| 1151 |
+
def get_steps(self, final):
|
| 1152 |
+
if not self.is_step(final):
|
| 1153 |
+
raise ValueError('Unknown: %r' % final)
|
| 1154 |
+
result = []
|
| 1155 |
+
todo = []
|
| 1156 |
+
seen = set()
|
| 1157 |
+
todo.append(final)
|
| 1158 |
+
while todo:
|
| 1159 |
+
step = todo.pop(0)
|
| 1160 |
+
if step in seen:
|
| 1161 |
+
# if a step was already seen,
|
| 1162 |
+
# move it to the end (so it will appear earlier
|
| 1163 |
+
# when reversed on return) ... but not for the
|
| 1164 |
+
# final step, as that would be confusing for
|
| 1165 |
+
# users
|
| 1166 |
+
if step != final:
|
| 1167 |
+
result.remove(step)
|
| 1168 |
+
result.append(step)
|
| 1169 |
+
else:
|
| 1170 |
+
seen.add(step)
|
| 1171 |
+
result.append(step)
|
| 1172 |
+
preds = self._preds.get(step, ())
|
| 1173 |
+
todo.extend(preds)
|
| 1174 |
+
return reversed(result)
|
| 1175 |
+
|
| 1176 |
+
@property
|
| 1177 |
+
def strong_connections(self):
|
| 1178 |
+
# http://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm
|
| 1179 |
+
index_counter = [0]
|
| 1180 |
+
stack = []
|
| 1181 |
+
lowlinks = {}
|
| 1182 |
+
index = {}
|
| 1183 |
+
result = []
|
| 1184 |
+
|
| 1185 |
+
graph = self._succs
|
| 1186 |
+
|
| 1187 |
+
def strongconnect(node):
|
| 1188 |
+
# set the depth index for this node to the smallest unused index
|
| 1189 |
+
index[node] = index_counter[0]
|
| 1190 |
+
lowlinks[node] = index_counter[0]
|
| 1191 |
+
index_counter[0] += 1
|
| 1192 |
+
stack.append(node)
|
| 1193 |
+
|
| 1194 |
+
# Consider successors
|
| 1195 |
+
try:
|
| 1196 |
+
successors = graph[node]
|
| 1197 |
+
except Exception:
|
| 1198 |
+
successors = []
|
| 1199 |
+
for successor in successors:
|
| 1200 |
+
if successor not in lowlinks:
|
| 1201 |
+
# Successor has not yet been visited
|
| 1202 |
+
strongconnect(successor)
|
| 1203 |
+
lowlinks[node] = min(lowlinks[node], lowlinks[successor])
|
| 1204 |
+
elif successor in stack:
|
| 1205 |
+
# the successor is in the stack and hence in the current
|
| 1206 |
+
# strongly connected component (SCC)
|
| 1207 |
+
lowlinks[node] = min(lowlinks[node], index[successor])
|
| 1208 |
+
|
| 1209 |
+
# If `node` is a root node, pop the stack and generate an SCC
|
| 1210 |
+
if lowlinks[node] == index[node]:
|
| 1211 |
+
connected_component = []
|
| 1212 |
+
|
| 1213 |
+
while True:
|
| 1214 |
+
successor = stack.pop()
|
| 1215 |
+
connected_component.append(successor)
|
| 1216 |
+
if successor == node:
|
| 1217 |
+
break
|
| 1218 |
+
component = tuple(connected_component)
|
| 1219 |
+
# storing the result
|
| 1220 |
+
result.append(component)
|
| 1221 |
+
|
| 1222 |
+
for node in graph:
|
| 1223 |
+
if node not in lowlinks:
|
| 1224 |
+
strongconnect(node)
|
| 1225 |
+
|
| 1226 |
+
return result
|
| 1227 |
+
|
| 1228 |
+
@property
|
| 1229 |
+
def dot(self):
|
| 1230 |
+
result = ['digraph G {']
|
| 1231 |
+
for succ in self._preds:
|
| 1232 |
+
preds = self._preds[succ]
|
| 1233 |
+
for pred in preds:
|
| 1234 |
+
result.append(' %s -> %s;' % (pred, succ))
|
| 1235 |
+
for node in self._nodes:
|
| 1236 |
+
result.append(' %s;' % node)
|
| 1237 |
+
result.append('}')
|
| 1238 |
+
return '\n'.join(result)
|
| 1239 |
+
|
| 1240 |
+
|
| 1241 |
+
#
|
| 1242 |
+
# Unarchiving functionality for zip, tar, tgz, tbz, whl
|
| 1243 |
+
#
|
| 1244 |
+
|
| 1245 |
+
ARCHIVE_EXTENSIONS = ('.tar.gz', '.tar.bz2', '.tar', '.zip', '.tgz', '.tbz',
|
| 1246 |
+
'.whl')
|
| 1247 |
+
|
| 1248 |
+
|
| 1249 |
+
def unarchive(archive_filename, dest_dir, format=None, check=True):
|
| 1250 |
+
|
| 1251 |
+
def check_path(path):
|
| 1252 |
+
if not isinstance(path, text_type):
|
| 1253 |
+
path = path.decode('utf-8')
|
| 1254 |
+
p = os.path.abspath(os.path.join(dest_dir, path))
|
| 1255 |
+
if not p.startswith(dest_dir) or p[plen] != os.sep:
|
| 1256 |
+
raise ValueError('path outside destination: %r' % p)
|
| 1257 |
+
|
| 1258 |
+
dest_dir = os.path.abspath(dest_dir)
|
| 1259 |
+
plen = len(dest_dir)
|
| 1260 |
+
archive = None
|
| 1261 |
+
if format is None:
|
| 1262 |
+
if archive_filename.endswith(('.zip', '.whl')):
|
| 1263 |
+
format = 'zip'
|
| 1264 |
+
elif archive_filename.endswith(('.tar.gz', '.tgz')):
|
| 1265 |
+
format = 'tgz'
|
| 1266 |
+
mode = 'r:gz'
|
| 1267 |
+
elif archive_filename.endswith(('.tar.bz2', '.tbz')):
|
| 1268 |
+
format = 'tbz'
|
| 1269 |
+
mode = 'r:bz2'
|
| 1270 |
+
elif archive_filename.endswith('.tar'):
|
| 1271 |
+
format = 'tar'
|
| 1272 |
+
mode = 'r'
|
| 1273 |
+
else: # pragma: no cover
|
| 1274 |
+
raise ValueError('Unknown format for %r' % archive_filename)
|
| 1275 |
+
try:
|
| 1276 |
+
if format == 'zip':
|
| 1277 |
+
archive = ZipFile(archive_filename, 'r')
|
| 1278 |
+
if check:
|
| 1279 |
+
names = archive.namelist()
|
| 1280 |
+
for name in names:
|
| 1281 |
+
check_path(name)
|
| 1282 |
+
else:
|
| 1283 |
+
archive = tarfile.open(archive_filename, mode)
|
| 1284 |
+
if check:
|
| 1285 |
+
names = archive.getnames()
|
| 1286 |
+
for name in names:
|
| 1287 |
+
check_path(name)
|
| 1288 |
+
if format != 'zip' and sys.version_info[0] < 3:
|
| 1289 |
+
# See Python issue 17153. If the dest path contains Unicode,
|
| 1290 |
+
# tarfile extraction fails on Python 2.x if a member path name
|
| 1291 |
+
# contains non-ASCII characters - it leads to an implicit
|
| 1292 |
+
# bytes -> unicode conversion using ASCII to decode.
|
| 1293 |
+
for tarinfo in archive.getmembers():
|
| 1294 |
+
if not isinstance(tarinfo.name, text_type):
|
| 1295 |
+
tarinfo.name = tarinfo.name.decode('utf-8')
|
| 1296 |
+
|
| 1297 |
+
# Limit extraction of dangerous items, if this Python
|
| 1298 |
+
# allows it easily. If not, just trust the input.
|
| 1299 |
+
# See: https://docs.python.org/3/library/tarfile.html#extraction-filters
|
| 1300 |
+
def extraction_filter(member, path):
|
| 1301 |
+
"""Run tarfile.tar_filter, but raise the expected ValueError"""
|
| 1302 |
+
# This is only called if the current Python has tarfile filters
|
| 1303 |
+
try:
|
| 1304 |
+
return tarfile.tar_filter(member, path)
|
| 1305 |
+
except tarfile.FilterError as exc:
|
| 1306 |
+
raise ValueError(str(exc))
|
| 1307 |
+
|
| 1308 |
+
archive.extraction_filter = extraction_filter
|
| 1309 |
+
|
| 1310 |
+
archive.extractall(dest_dir)
|
| 1311 |
+
|
| 1312 |
+
finally:
|
| 1313 |
+
if archive:
|
| 1314 |
+
archive.close()
|
| 1315 |
+
|
| 1316 |
+
|
| 1317 |
+
def zip_dir(directory):
|
| 1318 |
+
"""zip a directory tree into a BytesIO object"""
|
| 1319 |
+
result = io.BytesIO()
|
| 1320 |
+
dlen = len(directory)
|
| 1321 |
+
with ZipFile(result, "w") as zf:
|
| 1322 |
+
for root, dirs, files in os.walk(directory):
|
| 1323 |
+
for name in files:
|
| 1324 |
+
full = os.path.join(root, name)
|
| 1325 |
+
rel = root[dlen:]
|
| 1326 |
+
dest = os.path.join(rel, name)
|
| 1327 |
+
zf.write(full, dest)
|
| 1328 |
+
return result
|
| 1329 |
+
|
| 1330 |
+
|
| 1331 |
+
#
|
| 1332 |
+
# Simple progress bar
|
| 1333 |
+
#
|
| 1334 |
+
|
| 1335 |
+
UNITS = ('', 'K', 'M', 'G', 'T', 'P')
|
| 1336 |
+
|
| 1337 |
+
|
| 1338 |
+
class Progress(object):
|
| 1339 |
+
unknown = 'UNKNOWN'
|
| 1340 |
+
|
| 1341 |
+
def __init__(self, minval=0, maxval=100):
|
| 1342 |
+
assert maxval is None or maxval >= minval
|
| 1343 |
+
self.min = self.cur = minval
|
| 1344 |
+
self.max = maxval
|
| 1345 |
+
self.started = None
|
| 1346 |
+
self.elapsed = 0
|
| 1347 |
+
self.done = False
|
| 1348 |
+
|
| 1349 |
+
def update(self, curval):
|
| 1350 |
+
assert self.min <= curval
|
| 1351 |
+
assert self.max is None or curval <= self.max
|
| 1352 |
+
self.cur = curval
|
| 1353 |
+
now = time.time()
|
| 1354 |
+
if self.started is None:
|
| 1355 |
+
self.started = now
|
| 1356 |
+
else:
|
| 1357 |
+
self.elapsed = now - self.started
|
| 1358 |
+
|
| 1359 |
+
def increment(self, incr):
|
| 1360 |
+
assert incr >= 0
|
| 1361 |
+
self.update(self.cur + incr)
|
| 1362 |
+
|
| 1363 |
+
def start(self):
|
| 1364 |
+
self.update(self.min)
|
| 1365 |
+
return self
|
| 1366 |
+
|
| 1367 |
+
def stop(self):
|
| 1368 |
+
if self.max is not None:
|
| 1369 |
+
self.update(self.max)
|
| 1370 |
+
self.done = True
|
| 1371 |
+
|
| 1372 |
+
@property
|
| 1373 |
+
def maximum(self):
|
| 1374 |
+
return self.unknown if self.max is None else self.max
|
| 1375 |
+
|
| 1376 |
+
@property
|
| 1377 |
+
def percentage(self):
|
| 1378 |
+
if self.done:
|
| 1379 |
+
result = '100 %'
|
| 1380 |
+
elif self.max is None:
|
| 1381 |
+
result = ' ?? %'
|
| 1382 |
+
else:
|
| 1383 |
+
v = 100.0 * (self.cur - self.min) / (self.max - self.min)
|
| 1384 |
+
result = '%3d %%' % v
|
| 1385 |
+
return result
|
| 1386 |
+
|
| 1387 |
+
def format_duration(self, duration):
|
| 1388 |
+
if (duration <= 0) and self.max is None or self.cur == self.min:
|
| 1389 |
+
result = '??:??:??'
|
| 1390 |
+
# elif duration < 1:
|
| 1391 |
+
# result = '--:--:--'
|
| 1392 |
+
else:
|
| 1393 |
+
result = time.strftime('%H:%M:%S', time.gmtime(duration))
|
| 1394 |
+
return result
|
| 1395 |
+
|
| 1396 |
+
@property
|
| 1397 |
+
def ETA(self):
|
| 1398 |
+
if self.done:
|
| 1399 |
+
prefix = 'Done'
|
| 1400 |
+
t = self.elapsed
|
| 1401 |
+
# import pdb; pdb.set_trace()
|
| 1402 |
+
else:
|
| 1403 |
+
prefix = 'ETA '
|
| 1404 |
+
if self.max is None:
|
| 1405 |
+
t = -1
|
| 1406 |
+
elif self.elapsed == 0 or (self.cur == self.min):
|
| 1407 |
+
t = 0
|
| 1408 |
+
else:
|
| 1409 |
+
# import pdb; pdb.set_trace()
|
| 1410 |
+
t = float(self.max - self.min)
|
| 1411 |
+
t /= self.cur - self.min
|
| 1412 |
+
t = (t - 1) * self.elapsed
|
| 1413 |
+
return '%s: %s' % (prefix, self.format_duration(t))
|
| 1414 |
+
|
| 1415 |
+
@property
|
| 1416 |
+
def speed(self):
|
| 1417 |
+
if self.elapsed == 0:
|
| 1418 |
+
result = 0.0
|
| 1419 |
+
else:
|
| 1420 |
+
result = (self.cur - self.min) / self.elapsed
|
| 1421 |
+
for unit in UNITS:
|
| 1422 |
+
if result < 1000:
|
| 1423 |
+
break
|
| 1424 |
+
result /= 1000.0
|
| 1425 |
+
return '%d %sB/s' % (result, unit)
|
| 1426 |
+
|
| 1427 |
+
|
| 1428 |
+
#
|
| 1429 |
+
# Glob functionality
|
| 1430 |
+
#
|
| 1431 |
+
|
| 1432 |
+
RICH_GLOB = re.compile(r'\{([^}]*)\}')
|
| 1433 |
+
_CHECK_RECURSIVE_GLOB = re.compile(r'[^/\\,{]\*\*|\*\*[^/\\,}]')
|
| 1434 |
+
_CHECK_MISMATCH_SET = re.compile(r'^[^{]*\}|\{[^}]*$')
|
| 1435 |
+
|
| 1436 |
+
|
| 1437 |
+
def iglob(path_glob):
|
| 1438 |
+
"""Extended globbing function that supports ** and {opt1,opt2,opt3}."""
|
| 1439 |
+
if _CHECK_RECURSIVE_GLOB.search(path_glob):
|
| 1440 |
+
msg = """invalid glob %r: recursive glob "**" must be used alone"""
|
| 1441 |
+
raise ValueError(msg % path_glob)
|
| 1442 |
+
if _CHECK_MISMATCH_SET.search(path_glob):
|
| 1443 |
+
msg = """invalid glob %r: mismatching set marker '{' or '}'"""
|
| 1444 |
+
raise ValueError(msg % path_glob)
|
| 1445 |
+
return _iglob(path_glob)
|
| 1446 |
+
|
| 1447 |
+
|
| 1448 |
+
def _iglob(path_glob):
|
| 1449 |
+
rich_path_glob = RICH_GLOB.split(path_glob, 1)
|
| 1450 |
+
if len(rich_path_glob) > 1:
|
| 1451 |
+
assert len(rich_path_glob) == 3, rich_path_glob
|
| 1452 |
+
prefix, set, suffix = rich_path_glob
|
| 1453 |
+
for item in set.split(','):
|
| 1454 |
+
for path in _iglob(''.join((prefix, item, suffix))):
|
| 1455 |
+
yield path
|
| 1456 |
+
else:
|
| 1457 |
+
if '**' not in path_glob:
|
| 1458 |
+
for item in std_iglob(path_glob):
|
| 1459 |
+
yield item
|
| 1460 |
+
else:
|
| 1461 |
+
prefix, radical = path_glob.split('**', 1)
|
| 1462 |
+
if prefix == '':
|
| 1463 |
+
prefix = '.'
|
| 1464 |
+
if radical == '':
|
| 1465 |
+
radical = '*'
|
| 1466 |
+
else:
|
| 1467 |
+
# we support both
|
| 1468 |
+
radical = radical.lstrip('/')
|
| 1469 |
+
radical = radical.lstrip('\\')
|
| 1470 |
+
for path, dir, files in os.walk(prefix):
|
| 1471 |
+
path = os.path.normpath(path)
|
| 1472 |
+
for fn in _iglob(os.path.join(path, radical)):
|
| 1473 |
+
yield fn
|
| 1474 |
+
|
| 1475 |
+
|
| 1476 |
+
if ssl:
|
| 1477 |
+
from .compat import (HTTPSHandler as BaseHTTPSHandler, match_hostname,
|
| 1478 |
+
CertificateError)
|
| 1479 |
+
|
| 1480 |
+
#
|
| 1481 |
+
# HTTPSConnection which verifies certificates/matches domains
|
| 1482 |
+
#
|
| 1483 |
+
|
| 1484 |
+
class HTTPSConnection(httplib.HTTPSConnection):
|
| 1485 |
+
ca_certs = None # set this to the path to the certs file (.pem)
|
| 1486 |
+
check_domain = True # only used if ca_certs is not None
|
| 1487 |
+
|
| 1488 |
+
# noinspection PyPropertyAccess
|
| 1489 |
+
def connect(self):
|
| 1490 |
+
sock = socket.create_connection((self.host, self.port),
|
| 1491 |
+
self.timeout)
|
| 1492 |
+
if getattr(self, '_tunnel_host', False):
|
| 1493 |
+
self.sock = sock
|
| 1494 |
+
self._tunnel()
|
| 1495 |
+
|
| 1496 |
+
context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
|
| 1497 |
+
if hasattr(ssl, 'OP_NO_SSLv2'):
|
| 1498 |
+
context.options |= ssl.OP_NO_SSLv2
|
| 1499 |
+
if getattr(self, 'cert_file', None):
|
| 1500 |
+
context.load_cert_chain(self.cert_file, self.key_file)
|
| 1501 |
+
kwargs = {}
|
| 1502 |
+
if self.ca_certs:
|
| 1503 |
+
context.verify_mode = ssl.CERT_REQUIRED
|
| 1504 |
+
context.load_verify_locations(cafile=self.ca_certs)
|
| 1505 |
+
if getattr(ssl, 'HAS_SNI', False):
|
| 1506 |
+
kwargs['server_hostname'] = self.host
|
| 1507 |
+
|
| 1508 |
+
self.sock = context.wrap_socket(sock, **kwargs)
|
| 1509 |
+
if self.ca_certs and self.check_domain:
|
| 1510 |
+
try:
|
| 1511 |
+
match_hostname(self.sock.getpeercert(), self.host)
|
| 1512 |
+
logger.debug('Host verified: %s', self.host)
|
| 1513 |
+
except CertificateError: # pragma: no cover
|
| 1514 |
+
self.sock.shutdown(socket.SHUT_RDWR)
|
| 1515 |
+
self.sock.close()
|
| 1516 |
+
raise
|
| 1517 |
+
|
| 1518 |
+
class HTTPSHandler(BaseHTTPSHandler):
|
| 1519 |
+
|
| 1520 |
+
def __init__(self, ca_certs, check_domain=True):
|
| 1521 |
+
BaseHTTPSHandler.__init__(self)
|
| 1522 |
+
self.ca_certs = ca_certs
|
| 1523 |
+
self.check_domain = check_domain
|
| 1524 |
+
|
| 1525 |
+
def _conn_maker(self, *args, **kwargs):
|
| 1526 |
+
"""
|
| 1527 |
+
This is called to create a connection instance. Normally you'd
|
| 1528 |
+
pass a connection class to do_open, but it doesn't actually check for
|
| 1529 |
+
a class, and just expects a callable. As long as we behave just as a
|
| 1530 |
+
constructor would have, we should be OK. If it ever changes so that
|
| 1531 |
+
we *must* pass a class, we'll create an UnsafeHTTPSConnection class
|
| 1532 |
+
which just sets check_domain to False in the class definition, and
|
| 1533 |
+
choose which one to pass to do_open.
|
| 1534 |
+
"""
|
| 1535 |
+
result = HTTPSConnection(*args, **kwargs)
|
| 1536 |
+
if self.ca_certs:
|
| 1537 |
+
result.ca_certs = self.ca_certs
|
| 1538 |
+
result.check_domain = self.check_domain
|
| 1539 |
+
return result
|
| 1540 |
+
|
| 1541 |
+
def https_open(self, req):
|
| 1542 |
+
try:
|
| 1543 |
+
return self.do_open(self._conn_maker, req)
|
| 1544 |
+
except URLError as e:
|
| 1545 |
+
if 'certificate verify failed' in str(e.reason):
|
| 1546 |
+
raise CertificateError(
|
| 1547 |
+
'Unable to verify server certificate '
|
| 1548 |
+
'for %s' % req.host)
|
| 1549 |
+
else:
|
| 1550 |
+
raise
|
| 1551 |
+
|
| 1552 |
+
#
|
| 1553 |
+
# To prevent against mixing HTTP traffic with HTTPS (examples: A Man-In-The-
|
| 1554 |
+
# Middle proxy using HTTP listens on port 443, or an index mistakenly serves
|
| 1555 |
+
# HTML containing a http://xyz link when it should be https://xyz),
|
| 1556 |
+
# you can use the following handler class, which does not allow HTTP traffic.
|
| 1557 |
+
#
|
| 1558 |
+
# It works by inheriting from HTTPHandler - so build_opener won't add a
|
| 1559 |
+
# handler for HTTP itself.
|
| 1560 |
+
#
|
| 1561 |
+
class HTTPSOnlyHandler(HTTPSHandler, HTTPHandler):
|
| 1562 |
+
|
| 1563 |
+
def http_open(self, req):
|
| 1564 |
+
raise URLError(
|
| 1565 |
+
'Unexpected HTTP request on what should be a secure '
|
| 1566 |
+
'connection: %s' % req)
|
| 1567 |
+
|
| 1568 |
+
|
| 1569 |
+
#
|
| 1570 |
+
# XML-RPC with timeouts
|
| 1571 |
+
#
|
| 1572 |
+
class Transport(xmlrpclib.Transport):
|
| 1573 |
+
|
| 1574 |
+
def __init__(self, timeout, use_datetime=0):
|
| 1575 |
+
self.timeout = timeout
|
| 1576 |
+
xmlrpclib.Transport.__init__(self, use_datetime)
|
| 1577 |
+
|
| 1578 |
+
def make_connection(self, host):
|
| 1579 |
+
h, eh, x509 = self.get_host_info(host)
|
| 1580 |
+
if not self._connection or host != self._connection[0]:
|
| 1581 |
+
self._extra_headers = eh
|
| 1582 |
+
self._connection = host, httplib.HTTPConnection(h)
|
| 1583 |
+
return self._connection[1]
|
| 1584 |
+
|
| 1585 |
+
|
| 1586 |
+
if ssl:
|
| 1587 |
+
|
| 1588 |
+
class SafeTransport(xmlrpclib.SafeTransport):
|
| 1589 |
+
|
| 1590 |
+
def __init__(self, timeout, use_datetime=0):
|
| 1591 |
+
self.timeout = timeout
|
| 1592 |
+
xmlrpclib.SafeTransport.__init__(self, use_datetime)
|
| 1593 |
+
|
| 1594 |
+
def make_connection(self, host):
|
| 1595 |
+
h, eh, kwargs = self.get_host_info(host)
|
| 1596 |
+
if not kwargs:
|
| 1597 |
+
kwargs = {}
|
| 1598 |
+
kwargs['timeout'] = self.timeout
|
| 1599 |
+
if not self._connection or host != self._connection[0]:
|
| 1600 |
+
self._extra_headers = eh
|
| 1601 |
+
self._connection = host, httplib.HTTPSConnection(
|
| 1602 |
+
h, None, **kwargs)
|
| 1603 |
+
return self._connection[1]
|
| 1604 |
+
|
| 1605 |
+
|
| 1606 |
+
class ServerProxy(xmlrpclib.ServerProxy):
|
| 1607 |
+
|
| 1608 |
+
def __init__(self, uri, **kwargs):
|
| 1609 |
+
self.timeout = timeout = kwargs.pop('timeout', None)
|
| 1610 |
+
# The above classes only come into play if a timeout
|
| 1611 |
+
# is specified
|
| 1612 |
+
if timeout is not None:
|
| 1613 |
+
# scheme = splittype(uri) # deprecated as of Python 3.8
|
| 1614 |
+
scheme = urlparse(uri)[0]
|
| 1615 |
+
use_datetime = kwargs.get('use_datetime', 0)
|
| 1616 |
+
if scheme == 'https':
|
| 1617 |
+
tcls = SafeTransport
|
| 1618 |
+
else:
|
| 1619 |
+
tcls = Transport
|
| 1620 |
+
kwargs['transport'] = t = tcls(timeout, use_datetime=use_datetime)
|
| 1621 |
+
self.transport = t
|
| 1622 |
+
xmlrpclib.ServerProxy.__init__(self, uri, **kwargs)
|
| 1623 |
+
|
| 1624 |
+
|
| 1625 |
+
#
|
| 1626 |
+
# CSV functionality. This is provided because on 2.x, the csv module can't
|
| 1627 |
+
# handle Unicode. However, we need to deal with Unicode in e.g. RECORD files.
|
| 1628 |
+
#
|
| 1629 |
+
|
| 1630 |
+
|
| 1631 |
+
def _csv_open(fn, mode, **kwargs):
|
| 1632 |
+
if sys.version_info[0] < 3:
|
| 1633 |
+
mode += 'b'
|
| 1634 |
+
else:
|
| 1635 |
+
kwargs['newline'] = ''
|
| 1636 |
+
# Python 3 determines encoding from locale. Force 'utf-8'
|
| 1637 |
+
# file encoding to match other forced utf-8 encoding
|
| 1638 |
+
kwargs['encoding'] = 'utf-8'
|
| 1639 |
+
return open(fn, mode, **kwargs)
|
| 1640 |
+
|
| 1641 |
+
|
| 1642 |
+
class CSVBase(object):
|
| 1643 |
+
defaults = {
|
| 1644 |
+
'delimiter': str(','), # The strs are used because we need native
|
| 1645 |
+
'quotechar': str('"'), # str in the csv API (2.x won't take
|
| 1646 |
+
'lineterminator': str('\n') # Unicode)
|
| 1647 |
+
}
|
| 1648 |
+
|
| 1649 |
+
def __enter__(self):
|
| 1650 |
+
return self
|
| 1651 |
+
|
| 1652 |
+
def __exit__(self, *exc_info):
|
| 1653 |
+
self.stream.close()
|
| 1654 |
+
|
| 1655 |
+
|
| 1656 |
+
class CSVReader(CSVBase):
|
| 1657 |
+
|
| 1658 |
+
def __init__(self, **kwargs):
|
| 1659 |
+
if 'stream' in kwargs:
|
| 1660 |
+
stream = kwargs['stream']
|
| 1661 |
+
if sys.version_info[0] >= 3:
|
| 1662 |
+
# needs to be a text stream
|
| 1663 |
+
stream = codecs.getreader('utf-8')(stream)
|
| 1664 |
+
self.stream = stream
|
| 1665 |
+
else:
|
| 1666 |
+
self.stream = _csv_open(kwargs['path'], 'r')
|
| 1667 |
+
self.reader = csv.reader(self.stream, **self.defaults)
|
| 1668 |
+
|
| 1669 |
+
def __iter__(self):
|
| 1670 |
+
return self
|
| 1671 |
+
|
| 1672 |
+
def next(self):
|
| 1673 |
+
result = next(self.reader)
|
| 1674 |
+
if sys.version_info[0] < 3:
|
| 1675 |
+
for i, item in enumerate(result):
|
| 1676 |
+
if not isinstance(item, text_type):
|
| 1677 |
+
result[i] = item.decode('utf-8')
|
| 1678 |
+
return result
|
| 1679 |
+
|
| 1680 |
+
__next__ = next
|
| 1681 |
+
|
| 1682 |
+
|
| 1683 |
+
class CSVWriter(CSVBase):
|
| 1684 |
+
|
| 1685 |
+
def __init__(self, fn, **kwargs):
|
| 1686 |
+
self.stream = _csv_open(fn, 'w')
|
| 1687 |
+
self.writer = csv.writer(self.stream, **self.defaults)
|
| 1688 |
+
|
| 1689 |
+
def writerow(self, row):
|
| 1690 |
+
if sys.version_info[0] < 3:
|
| 1691 |
+
r = []
|
| 1692 |
+
for item in row:
|
| 1693 |
+
if isinstance(item, text_type):
|
| 1694 |
+
item = item.encode('utf-8')
|
| 1695 |
+
r.append(item)
|
| 1696 |
+
row = r
|
| 1697 |
+
self.writer.writerow(row)
|
| 1698 |
+
|
| 1699 |
+
|
| 1700 |
+
#
|
| 1701 |
+
# Configurator functionality
|
| 1702 |
+
#
|
| 1703 |
+
|
| 1704 |
+
|
| 1705 |
+
class Configurator(BaseConfigurator):
|
| 1706 |
+
|
| 1707 |
+
value_converters = dict(BaseConfigurator.value_converters)
|
| 1708 |
+
value_converters['inc'] = 'inc_convert'
|
| 1709 |
+
|
| 1710 |
+
def __init__(self, config, base=None):
|
| 1711 |
+
super(Configurator, self).__init__(config)
|
| 1712 |
+
self.base = base or os.getcwd()
|
| 1713 |
+
|
| 1714 |
+
def configure_custom(self, config):
|
| 1715 |
+
|
| 1716 |
+
def convert(o):
|
| 1717 |
+
if isinstance(o, (list, tuple)):
|
| 1718 |
+
result = type(o)([convert(i) for i in o])
|
| 1719 |
+
elif isinstance(o, dict):
|
| 1720 |
+
if '()' in o:
|
| 1721 |
+
result = self.configure_custom(o)
|
| 1722 |
+
else:
|
| 1723 |
+
result = {}
|
| 1724 |
+
for k in o:
|
| 1725 |
+
result[k] = convert(o[k])
|
| 1726 |
+
else:
|
| 1727 |
+
result = self.convert(o)
|
| 1728 |
+
return result
|
| 1729 |
+
|
| 1730 |
+
c = config.pop('()')
|
| 1731 |
+
if not callable(c):
|
| 1732 |
+
c = self.resolve(c)
|
| 1733 |
+
props = config.pop('.', None)
|
| 1734 |
+
# Check for valid identifiers
|
| 1735 |
+
args = config.pop('[]', ())
|
| 1736 |
+
if args:
|
| 1737 |
+
args = tuple([convert(o) for o in args])
|
| 1738 |
+
items = [(k, convert(config[k])) for k in config if valid_ident(k)]
|
| 1739 |
+
kwargs = dict(items)
|
| 1740 |
+
result = c(*args, **kwargs)
|
| 1741 |
+
if props:
|
| 1742 |
+
for n, v in props.items():
|
| 1743 |
+
setattr(result, n, convert(v))
|
| 1744 |
+
return result
|
| 1745 |
+
|
| 1746 |
+
def __getitem__(self, key):
|
| 1747 |
+
result = self.config[key]
|
| 1748 |
+
if isinstance(result, dict) and '()' in result:
|
| 1749 |
+
self.config[key] = result = self.configure_custom(result)
|
| 1750 |
+
return result
|
| 1751 |
+
|
| 1752 |
+
def inc_convert(self, value):
|
| 1753 |
+
"""Default converter for the inc:// protocol."""
|
| 1754 |
+
if not os.path.isabs(value):
|
| 1755 |
+
value = os.path.join(self.base, value)
|
| 1756 |
+
with codecs.open(value, 'r', encoding='utf-8') as f:
|
| 1757 |
+
result = json.load(f)
|
| 1758 |
+
return result
|
| 1759 |
+
|
| 1760 |
+
|
| 1761 |
+
class SubprocessMixin(object):
|
| 1762 |
+
"""
|
| 1763 |
+
Mixin for running subprocesses and capturing their output
|
| 1764 |
+
"""
|
| 1765 |
+
|
| 1766 |
+
def __init__(self, verbose=False, progress=None):
|
| 1767 |
+
self.verbose = verbose
|
| 1768 |
+
self.progress = progress
|
| 1769 |
+
|
| 1770 |
+
def reader(self, stream, context):
|
| 1771 |
+
"""
|
| 1772 |
+
Read lines from a subprocess' output stream and either pass to a progress
|
| 1773 |
+
callable (if specified) or write progress information to sys.stderr.
|
| 1774 |
+
"""
|
| 1775 |
+
progress = self.progress
|
| 1776 |
+
verbose = self.verbose
|
| 1777 |
+
while True:
|
| 1778 |
+
s = stream.readline()
|
| 1779 |
+
if not s:
|
| 1780 |
+
break
|
| 1781 |
+
if progress is not None:
|
| 1782 |
+
progress(s, context)
|
| 1783 |
+
else:
|
| 1784 |
+
if not verbose:
|
| 1785 |
+
sys.stderr.write('.')
|
| 1786 |
+
else:
|
| 1787 |
+
sys.stderr.write(s.decode('utf-8'))
|
| 1788 |
+
sys.stderr.flush()
|
| 1789 |
+
stream.close()
|
| 1790 |
+
|
| 1791 |
+
def run_command(self, cmd, **kwargs):
|
| 1792 |
+
p = subprocess.Popen(cmd,
|
| 1793 |
+
stdout=subprocess.PIPE,
|
| 1794 |
+
stderr=subprocess.PIPE,
|
| 1795 |
+
**kwargs)
|
| 1796 |
+
t1 = threading.Thread(target=self.reader, args=(p.stdout, 'stdout'))
|
| 1797 |
+
t1.start()
|
| 1798 |
+
t2 = threading.Thread(target=self.reader, args=(p.stderr, 'stderr'))
|
| 1799 |
+
t2.start()
|
| 1800 |
+
p.wait()
|
| 1801 |
+
t1.join()
|
| 1802 |
+
t2.join()
|
| 1803 |
+
if self.progress is not None:
|
| 1804 |
+
self.progress('done.', 'main')
|
| 1805 |
+
elif self.verbose:
|
| 1806 |
+
sys.stderr.write('done.\n')
|
| 1807 |
+
return p
|
| 1808 |
+
|
| 1809 |
+
|
| 1810 |
+
def normalize_name(name):
|
| 1811 |
+
"""Normalize a python package name a la PEP 503"""
|
| 1812 |
+
# https://www.python.org/dev/peps/pep-0503/#normalized-names
|
| 1813 |
+
return re.sub('[-_.]+', '-', name).lower()
|
| 1814 |
+
|
| 1815 |
+
|
| 1816 |
+
# def _get_pypirc_command():
|
| 1817 |
+
# """
|
| 1818 |
+
# Get the distutils command for interacting with PyPI configurations.
|
| 1819 |
+
# :return: the command.
|
| 1820 |
+
# """
|
| 1821 |
+
# from distutils.core import Distribution
|
| 1822 |
+
# from distutils.config import PyPIRCCommand
|
| 1823 |
+
# d = Distribution()
|
| 1824 |
+
# return PyPIRCCommand(d)
|
| 1825 |
+
|
| 1826 |
+
|
| 1827 |
+
class PyPIRCFile(object):
|
| 1828 |
+
|
| 1829 |
+
DEFAULT_REPOSITORY = 'https://upload.pypi.org/legacy/'
|
| 1830 |
+
DEFAULT_REALM = 'pypi'
|
| 1831 |
+
|
| 1832 |
+
def __init__(self, fn=None, url=None):
|
| 1833 |
+
if fn is None:
|
| 1834 |
+
fn = os.path.join(os.path.expanduser('~'), '.pypirc')
|
| 1835 |
+
self.filename = fn
|
| 1836 |
+
self.url = url
|
| 1837 |
+
|
| 1838 |
+
def read(self):
|
| 1839 |
+
result = {}
|
| 1840 |
+
|
| 1841 |
+
if os.path.exists(self.filename):
|
| 1842 |
+
repository = self.url or self.DEFAULT_REPOSITORY
|
| 1843 |
+
|
| 1844 |
+
config = configparser.RawConfigParser()
|
| 1845 |
+
config.read(self.filename)
|
| 1846 |
+
sections = config.sections()
|
| 1847 |
+
if 'distutils' in sections:
|
| 1848 |
+
# let's get the list of servers
|
| 1849 |
+
index_servers = config.get('distutils', 'index-servers')
|
| 1850 |
+
_servers = [
|
| 1851 |
+
server.strip() for server in index_servers.split('\n')
|
| 1852 |
+
if server.strip() != ''
|
| 1853 |
+
]
|
| 1854 |
+
if _servers == []:
|
| 1855 |
+
# nothing set, let's try to get the default pypi
|
| 1856 |
+
if 'pypi' in sections:
|
| 1857 |
+
_servers = ['pypi']
|
| 1858 |
+
else:
|
| 1859 |
+
for server in _servers:
|
| 1860 |
+
result = {'server': server}
|
| 1861 |
+
result['username'] = config.get(server, 'username')
|
| 1862 |
+
|
| 1863 |
+
# optional params
|
| 1864 |
+
for key, default in (('repository',
|
| 1865 |
+
self.DEFAULT_REPOSITORY),
|
| 1866 |
+
('realm', self.DEFAULT_REALM),
|
| 1867 |
+
('password', None)):
|
| 1868 |
+
if config.has_option(server, key):
|
| 1869 |
+
result[key] = config.get(server, key)
|
| 1870 |
+
else:
|
| 1871 |
+
result[key] = default
|
| 1872 |
+
|
| 1873 |
+
# work around people having "repository" for the "pypi"
|
| 1874 |
+
# section of their config set to the HTTP (rather than
|
| 1875 |
+
# HTTPS) URL
|
| 1876 |
+
if (server == 'pypi' and repository
|
| 1877 |
+
in (self.DEFAULT_REPOSITORY, 'pypi')):
|
| 1878 |
+
result['repository'] = self.DEFAULT_REPOSITORY
|
| 1879 |
+
elif (result['server'] != repository
|
| 1880 |
+
and result['repository'] != repository):
|
| 1881 |
+
result = {}
|
| 1882 |
+
elif 'server-login' in sections:
|
| 1883 |
+
# old format
|
| 1884 |
+
server = 'server-login'
|
| 1885 |
+
if config.has_option(server, 'repository'):
|
| 1886 |
+
repository = config.get(server, 'repository')
|
| 1887 |
+
else:
|
| 1888 |
+
repository = self.DEFAULT_REPOSITORY
|
| 1889 |
+
result = {
|
| 1890 |
+
'username': config.get(server, 'username'),
|
| 1891 |
+
'password': config.get(server, 'password'),
|
| 1892 |
+
'repository': repository,
|
| 1893 |
+
'server': server,
|
| 1894 |
+
'realm': self.DEFAULT_REALM
|
| 1895 |
+
}
|
| 1896 |
+
return result
|
| 1897 |
+
|
| 1898 |
+
def update(self, username, password):
|
| 1899 |
+
# import pdb; pdb.set_trace()
|
| 1900 |
+
config = configparser.RawConfigParser()
|
| 1901 |
+
fn = self.filename
|
| 1902 |
+
config.read(fn)
|
| 1903 |
+
if not config.has_section('pypi'):
|
| 1904 |
+
config.add_section('pypi')
|
| 1905 |
+
config.set('pypi', 'username', username)
|
| 1906 |
+
config.set('pypi', 'password', password)
|
| 1907 |
+
with open(fn, 'w') as f:
|
| 1908 |
+
config.write(f)
|
| 1909 |
+
|
| 1910 |
+
|
| 1911 |
+
def _load_pypirc(index):
|
| 1912 |
+
"""
|
| 1913 |
+
Read the PyPI access configuration as supported by distutils.
|
| 1914 |
+
"""
|
| 1915 |
+
return PyPIRCFile(url=index.url).read()
|
| 1916 |
+
|
| 1917 |
+
|
| 1918 |
+
def _store_pypirc(index):
|
| 1919 |
+
PyPIRCFile().update(index.username, index.password)
|
| 1920 |
+
|
| 1921 |
+
|
| 1922 |
+
#
|
| 1923 |
+
# get_platform()/get_host_platform() copied from Python 3.10.a0 source, with some minor
|
| 1924 |
+
# tweaks
|
| 1925 |
+
#
|
| 1926 |
+
|
| 1927 |
+
|
| 1928 |
+
def get_host_platform():
|
| 1929 |
+
"""Return a string that identifies the current platform. This is used mainly to
|
| 1930 |
+
distinguish platform-specific build directories and platform-specific built
|
| 1931 |
+
distributions. Typically includes the OS name and version and the
|
| 1932 |
+
architecture (as supplied by 'os.uname()'), although the exact information
|
| 1933 |
+
included depends on the OS; eg. on Linux, the kernel version isn't
|
| 1934 |
+
particularly important.
|
| 1935 |
+
|
| 1936 |
+
Examples of returned values:
|
| 1937 |
+
linux-i586
|
| 1938 |
+
linux-alpha (?)
|
| 1939 |
+
solaris-2.6-sun4u
|
| 1940 |
+
|
| 1941 |
+
Windows will return one of:
|
| 1942 |
+
win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc)
|
| 1943 |
+
win32 (all others - specifically, sys.platform is returned)
|
| 1944 |
+
|
| 1945 |
+
For other non-POSIX platforms, currently just returns 'sys.platform'.
|
| 1946 |
+
|
| 1947 |
+
"""
|
| 1948 |
+
if os.name == 'nt':
|
| 1949 |
+
if 'amd64' in sys.version.lower():
|
| 1950 |
+
return 'win-amd64'
|
| 1951 |
+
if '(arm)' in sys.version.lower():
|
| 1952 |
+
return 'win-arm32'
|
| 1953 |
+
if '(arm64)' in sys.version.lower():
|
| 1954 |
+
return 'win-arm64'
|
| 1955 |
+
return sys.platform
|
| 1956 |
+
|
| 1957 |
+
# Set for cross builds explicitly
|
| 1958 |
+
if "_PYTHON_HOST_PLATFORM" in os.environ:
|
| 1959 |
+
return os.environ["_PYTHON_HOST_PLATFORM"]
|
| 1960 |
+
|
| 1961 |
+
if os.name != 'posix' or not hasattr(os, 'uname'):
|
| 1962 |
+
# XXX what about the architecture? NT is Intel or Alpha,
|
| 1963 |
+
# Mac OS is M68k or PPC, etc.
|
| 1964 |
+
return sys.platform
|
| 1965 |
+
|
| 1966 |
+
# Try to distinguish various flavours of Unix
|
| 1967 |
+
|
| 1968 |
+
(osname, host, release, version, machine) = os.uname()
|
| 1969 |
+
|
| 1970 |
+
# Convert the OS name to lowercase, remove '/' characters, and translate
|
| 1971 |
+
# spaces (for "Power Macintosh")
|
| 1972 |
+
osname = osname.lower().replace('/', '')
|
| 1973 |
+
machine = machine.replace(' ', '_').replace('/', '-')
|
| 1974 |
+
|
| 1975 |
+
if osname[:5] == 'linux':
|
| 1976 |
+
# At least on Linux/Intel, 'machine' is the processor --
|
| 1977 |
+
# i386, etc.
|
| 1978 |
+
# XXX what about Alpha, SPARC, etc?
|
| 1979 |
+
return "%s-%s" % (osname, machine)
|
| 1980 |
+
|
| 1981 |
+
elif osname[:5] == 'sunos':
|
| 1982 |
+
if release[0] >= '5': # SunOS 5 == Solaris 2
|
| 1983 |
+
osname = 'solaris'
|
| 1984 |
+
release = '%d.%s' % (int(release[0]) - 3, release[2:])
|
| 1985 |
+
# We can't use 'platform.architecture()[0]' because a
|
| 1986 |
+
# bootstrap problem. We use a dict to get an error
|
| 1987 |
+
# if some suspicious happens.
|
| 1988 |
+
bitness = {2147483647: '32bit', 9223372036854775807: '64bit'}
|
| 1989 |
+
machine += '.%s' % bitness[sys.maxsize]
|
| 1990 |
+
# fall through to standard osname-release-machine representation
|
| 1991 |
+
elif osname[:3] == 'aix':
|
| 1992 |
+
from _aix_support import aix_platform
|
| 1993 |
+
return aix_platform()
|
| 1994 |
+
elif osname[:6] == 'cygwin':
|
| 1995 |
+
osname = 'cygwin'
|
| 1996 |
+
rel_re = re.compile(r'[\d.]+', re.ASCII)
|
| 1997 |
+
m = rel_re.match(release)
|
| 1998 |
+
if m:
|
| 1999 |
+
release = m.group()
|
| 2000 |
+
elif osname[:6] == 'darwin':
|
| 2001 |
+
import _osx_support
|
| 2002 |
+
try:
|
| 2003 |
+
from distutils import sysconfig
|
| 2004 |
+
except ImportError:
|
| 2005 |
+
import sysconfig
|
| 2006 |
+
osname, release, machine = _osx_support.get_platform_osx(
|
| 2007 |
+
sysconfig.get_config_vars(), osname, release, machine)
|
| 2008 |
+
|
| 2009 |
+
return '%s-%s-%s' % (osname, release, machine)
|
| 2010 |
+
|
| 2011 |
+
|
| 2012 |
+
_TARGET_TO_PLAT = {
|
| 2013 |
+
'x86': 'win32',
|
| 2014 |
+
'x64': 'win-amd64',
|
| 2015 |
+
'arm': 'win-arm32',
|
| 2016 |
+
}
|
| 2017 |
+
|
| 2018 |
+
|
| 2019 |
+
def get_platform():
|
| 2020 |
+
if os.name != 'nt':
|
| 2021 |
+
return get_host_platform()
|
| 2022 |
+
cross_compilation_target = os.environ.get('VSCMD_ARG_TGT_ARCH')
|
| 2023 |
+
if cross_compilation_target not in _TARGET_TO_PLAT:
|
| 2024 |
+
return get_host_platform()
|
| 2025 |
+
return _TARGET_TO_PLAT[cross_compilation_target]
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/version.py
ADDED
|
@@ -0,0 +1,751 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2012-2023 The Python Software Foundation.
|
| 4 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 5 |
+
#
|
| 6 |
+
"""
|
| 7 |
+
Implementation of a flexible versioning scheme providing support for PEP-440,
|
| 8 |
+
setuptools-compatible and semantic versioning.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
import re
|
| 13 |
+
|
| 14 |
+
from .compat import string_types
|
| 15 |
+
from .util import parse_requirement
|
| 16 |
+
|
| 17 |
+
__all__ = ['NormalizedVersion', 'NormalizedMatcher',
|
| 18 |
+
'LegacyVersion', 'LegacyMatcher',
|
| 19 |
+
'SemanticVersion', 'SemanticMatcher',
|
| 20 |
+
'UnsupportedVersionError', 'get_scheme']
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class UnsupportedVersionError(ValueError):
|
| 26 |
+
"""This is an unsupported version."""
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class Version(object):
|
| 31 |
+
def __init__(self, s):
|
| 32 |
+
self._string = s = s.strip()
|
| 33 |
+
self._parts = parts = self.parse(s)
|
| 34 |
+
assert isinstance(parts, tuple)
|
| 35 |
+
assert len(parts) > 0
|
| 36 |
+
|
| 37 |
+
def parse(self, s):
|
| 38 |
+
raise NotImplementedError('please implement in a subclass')
|
| 39 |
+
|
| 40 |
+
def _check_compatible(self, other):
|
| 41 |
+
if type(self) != type(other):
|
| 42 |
+
raise TypeError('cannot compare %r and %r' % (self, other))
|
| 43 |
+
|
| 44 |
+
def __eq__(self, other):
|
| 45 |
+
self._check_compatible(other)
|
| 46 |
+
return self._parts == other._parts
|
| 47 |
+
|
| 48 |
+
def __ne__(self, other):
|
| 49 |
+
return not self.__eq__(other)
|
| 50 |
+
|
| 51 |
+
def __lt__(self, other):
|
| 52 |
+
self._check_compatible(other)
|
| 53 |
+
return self._parts < other._parts
|
| 54 |
+
|
| 55 |
+
def __gt__(self, other):
|
| 56 |
+
return not (self.__lt__(other) or self.__eq__(other))
|
| 57 |
+
|
| 58 |
+
def __le__(self, other):
|
| 59 |
+
return self.__lt__(other) or self.__eq__(other)
|
| 60 |
+
|
| 61 |
+
def __ge__(self, other):
|
| 62 |
+
return self.__gt__(other) or self.__eq__(other)
|
| 63 |
+
|
| 64 |
+
# See http://docs.python.org/reference/datamodel#object.__hash__
|
| 65 |
+
def __hash__(self):
|
| 66 |
+
return hash(self._parts)
|
| 67 |
+
|
| 68 |
+
def __repr__(self):
|
| 69 |
+
return "%s('%s')" % (self.__class__.__name__, self._string)
|
| 70 |
+
|
| 71 |
+
def __str__(self):
|
| 72 |
+
return self._string
|
| 73 |
+
|
| 74 |
+
@property
|
| 75 |
+
def is_prerelease(self):
|
| 76 |
+
raise NotImplementedError('Please implement in subclasses.')
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
class Matcher(object):
|
| 80 |
+
version_class = None
|
| 81 |
+
|
| 82 |
+
# value is either a callable or the name of a method
|
| 83 |
+
_operators = {
|
| 84 |
+
'<': lambda v, c, p: v < c,
|
| 85 |
+
'>': lambda v, c, p: v > c,
|
| 86 |
+
'<=': lambda v, c, p: v == c or v < c,
|
| 87 |
+
'>=': lambda v, c, p: v == c or v > c,
|
| 88 |
+
'==': lambda v, c, p: v == c,
|
| 89 |
+
'===': lambda v, c, p: v == c,
|
| 90 |
+
# by default, compatible => >=.
|
| 91 |
+
'~=': lambda v, c, p: v == c or v > c,
|
| 92 |
+
'!=': lambda v, c, p: v != c,
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
# this is a method only to support alternative implementations
|
| 96 |
+
# via overriding
|
| 97 |
+
def parse_requirement(self, s):
|
| 98 |
+
return parse_requirement(s)
|
| 99 |
+
|
| 100 |
+
def __init__(self, s):
|
| 101 |
+
if self.version_class is None:
|
| 102 |
+
raise ValueError('Please specify a version class')
|
| 103 |
+
self._string = s = s.strip()
|
| 104 |
+
r = self.parse_requirement(s)
|
| 105 |
+
if not r:
|
| 106 |
+
raise ValueError('Not valid: %r' % s)
|
| 107 |
+
self.name = r.name
|
| 108 |
+
self.key = self.name.lower() # for case-insensitive comparisons
|
| 109 |
+
clist = []
|
| 110 |
+
if r.constraints:
|
| 111 |
+
# import pdb; pdb.set_trace()
|
| 112 |
+
for op, s in r.constraints:
|
| 113 |
+
if s.endswith('.*'):
|
| 114 |
+
if op not in ('==', '!='):
|
| 115 |
+
raise ValueError('\'.*\' not allowed for '
|
| 116 |
+
'%r constraints' % op)
|
| 117 |
+
# Could be a partial version (e.g. for '2.*') which
|
| 118 |
+
# won't parse as a version, so keep it as a string
|
| 119 |
+
vn, prefix = s[:-2], True
|
| 120 |
+
# Just to check that vn is a valid version
|
| 121 |
+
self.version_class(vn)
|
| 122 |
+
else:
|
| 123 |
+
# Should parse as a version, so we can create an
|
| 124 |
+
# instance for the comparison
|
| 125 |
+
vn, prefix = self.version_class(s), False
|
| 126 |
+
clist.append((op, vn, prefix))
|
| 127 |
+
self._parts = tuple(clist)
|
| 128 |
+
|
| 129 |
+
def match(self, version):
|
| 130 |
+
"""
|
| 131 |
+
Check if the provided version matches the constraints.
|
| 132 |
+
|
| 133 |
+
:param version: The version to match against this instance.
|
| 134 |
+
:type version: String or :class:`Version` instance.
|
| 135 |
+
"""
|
| 136 |
+
if isinstance(version, string_types):
|
| 137 |
+
version = self.version_class(version)
|
| 138 |
+
for operator, constraint, prefix in self._parts:
|
| 139 |
+
f = self._operators.get(operator)
|
| 140 |
+
if isinstance(f, string_types):
|
| 141 |
+
f = getattr(self, f)
|
| 142 |
+
if not f:
|
| 143 |
+
msg = ('%r not implemented '
|
| 144 |
+
'for %s' % (operator, self.__class__.__name__))
|
| 145 |
+
raise NotImplementedError(msg)
|
| 146 |
+
if not f(version, constraint, prefix):
|
| 147 |
+
return False
|
| 148 |
+
return True
|
| 149 |
+
|
| 150 |
+
@property
|
| 151 |
+
def exact_version(self):
|
| 152 |
+
result = None
|
| 153 |
+
if len(self._parts) == 1 and self._parts[0][0] in ('==', '==='):
|
| 154 |
+
result = self._parts[0][1]
|
| 155 |
+
return result
|
| 156 |
+
|
| 157 |
+
def _check_compatible(self, other):
|
| 158 |
+
if type(self) != type(other) or self.name != other.name:
|
| 159 |
+
raise TypeError('cannot compare %s and %s' % (self, other))
|
| 160 |
+
|
| 161 |
+
def __eq__(self, other):
|
| 162 |
+
self._check_compatible(other)
|
| 163 |
+
return self.key == other.key and self._parts == other._parts
|
| 164 |
+
|
| 165 |
+
def __ne__(self, other):
|
| 166 |
+
return not self.__eq__(other)
|
| 167 |
+
|
| 168 |
+
# See http://docs.python.org/reference/datamodel#object.__hash__
|
| 169 |
+
def __hash__(self):
|
| 170 |
+
return hash(self.key) + hash(self._parts)
|
| 171 |
+
|
| 172 |
+
def __repr__(self):
|
| 173 |
+
return "%s(%r)" % (self.__class__.__name__, self._string)
|
| 174 |
+
|
| 175 |
+
def __str__(self):
|
| 176 |
+
return self._string
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
PEP440_VERSION_RE = re.compile(r'^v?(\d+!)?(\d+(\.\d+)*)((a|alpha|b|beta|c|rc|pre|preview)(\d+)?)?'
|
| 180 |
+
r'(\.(post|r|rev)(\d+)?)?([._-]?(dev)(\d+)?)?'
|
| 181 |
+
r'(\+([a-zA-Z\d]+(\.[a-zA-Z\d]+)?))?$', re.I)
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def _pep_440_key(s):
|
| 185 |
+
s = s.strip()
|
| 186 |
+
m = PEP440_VERSION_RE.match(s)
|
| 187 |
+
if not m:
|
| 188 |
+
raise UnsupportedVersionError('Not a valid version: %s' % s)
|
| 189 |
+
groups = m.groups()
|
| 190 |
+
nums = tuple(int(v) for v in groups[1].split('.'))
|
| 191 |
+
while len(nums) > 1 and nums[-1] == 0:
|
| 192 |
+
nums = nums[:-1]
|
| 193 |
+
|
| 194 |
+
if not groups[0]:
|
| 195 |
+
epoch = 0
|
| 196 |
+
else:
|
| 197 |
+
epoch = int(groups[0][:-1])
|
| 198 |
+
pre = groups[4:6]
|
| 199 |
+
post = groups[7:9]
|
| 200 |
+
dev = groups[10:12]
|
| 201 |
+
local = groups[13]
|
| 202 |
+
if pre == (None, None):
|
| 203 |
+
pre = ()
|
| 204 |
+
else:
|
| 205 |
+
if pre[1] is None:
|
| 206 |
+
pre = pre[0], 0
|
| 207 |
+
else:
|
| 208 |
+
pre = pre[0], int(pre[1])
|
| 209 |
+
if post == (None, None):
|
| 210 |
+
post = ()
|
| 211 |
+
else:
|
| 212 |
+
if post[1] is None:
|
| 213 |
+
post = post[0], 0
|
| 214 |
+
else:
|
| 215 |
+
post = post[0], int(post[1])
|
| 216 |
+
if dev == (None, None):
|
| 217 |
+
dev = ()
|
| 218 |
+
else:
|
| 219 |
+
if dev[1] is None:
|
| 220 |
+
dev = dev[0], 0
|
| 221 |
+
else:
|
| 222 |
+
dev = dev[0], int(dev[1])
|
| 223 |
+
if local is None:
|
| 224 |
+
local = ()
|
| 225 |
+
else:
|
| 226 |
+
parts = []
|
| 227 |
+
for part in local.split('.'):
|
| 228 |
+
# to ensure that numeric compares as > lexicographic, avoid
|
| 229 |
+
# comparing them directly, but encode a tuple which ensures
|
| 230 |
+
# correct sorting
|
| 231 |
+
if part.isdigit():
|
| 232 |
+
part = (1, int(part))
|
| 233 |
+
else:
|
| 234 |
+
part = (0, part)
|
| 235 |
+
parts.append(part)
|
| 236 |
+
local = tuple(parts)
|
| 237 |
+
if not pre:
|
| 238 |
+
# either before pre-release, or final release and after
|
| 239 |
+
if not post and dev:
|
| 240 |
+
# before pre-release
|
| 241 |
+
pre = ('a', -1) # to sort before a0
|
| 242 |
+
else:
|
| 243 |
+
pre = ('z',) # to sort after all pre-releases
|
| 244 |
+
# now look at the state of post and dev.
|
| 245 |
+
if not post:
|
| 246 |
+
post = ('_',) # sort before 'a'
|
| 247 |
+
if not dev:
|
| 248 |
+
dev = ('final',)
|
| 249 |
+
|
| 250 |
+
return epoch, nums, pre, post, dev, local
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
_normalized_key = _pep_440_key
|
| 254 |
+
|
| 255 |
+
|
| 256 |
+
class NormalizedVersion(Version):
|
| 257 |
+
"""A rational version.
|
| 258 |
+
|
| 259 |
+
Good:
|
| 260 |
+
1.2 # equivalent to "1.2.0"
|
| 261 |
+
1.2.0
|
| 262 |
+
1.2a1
|
| 263 |
+
1.2.3a2
|
| 264 |
+
1.2.3b1
|
| 265 |
+
1.2.3c1
|
| 266 |
+
1.2.3.4
|
| 267 |
+
TODO: fill this out
|
| 268 |
+
|
| 269 |
+
Bad:
|
| 270 |
+
1 # minimum two numbers
|
| 271 |
+
1.2a # release level must have a release serial
|
| 272 |
+
1.2.3b
|
| 273 |
+
"""
|
| 274 |
+
def parse(self, s):
|
| 275 |
+
result = _normalized_key(s)
|
| 276 |
+
# _normalized_key loses trailing zeroes in the release
|
| 277 |
+
# clause, since that's needed to ensure that X.Y == X.Y.0 == X.Y.0.0
|
| 278 |
+
# However, PEP 440 prefix matching needs it: for example,
|
| 279 |
+
# (~= 1.4.5.0) matches differently to (~= 1.4.5.0.0).
|
| 280 |
+
m = PEP440_VERSION_RE.match(s) # must succeed
|
| 281 |
+
groups = m.groups()
|
| 282 |
+
self._release_clause = tuple(int(v) for v in groups[1].split('.'))
|
| 283 |
+
return result
|
| 284 |
+
|
| 285 |
+
PREREL_TAGS = set(['a', 'b', 'c', 'rc', 'dev'])
|
| 286 |
+
|
| 287 |
+
@property
|
| 288 |
+
def is_prerelease(self):
|
| 289 |
+
return any(t[0] in self.PREREL_TAGS for t in self._parts if t)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _match_prefix(x, y):
|
| 293 |
+
x = str(x)
|
| 294 |
+
y = str(y)
|
| 295 |
+
if x == y:
|
| 296 |
+
return True
|
| 297 |
+
if not x.startswith(y):
|
| 298 |
+
return False
|
| 299 |
+
n = len(y)
|
| 300 |
+
return x[n] == '.'
|
| 301 |
+
|
| 302 |
+
|
| 303 |
+
class NormalizedMatcher(Matcher):
|
| 304 |
+
version_class = NormalizedVersion
|
| 305 |
+
|
| 306 |
+
# value is either a callable or the name of a method
|
| 307 |
+
_operators = {
|
| 308 |
+
'~=': '_match_compatible',
|
| 309 |
+
'<': '_match_lt',
|
| 310 |
+
'>': '_match_gt',
|
| 311 |
+
'<=': '_match_le',
|
| 312 |
+
'>=': '_match_ge',
|
| 313 |
+
'==': '_match_eq',
|
| 314 |
+
'===': '_match_arbitrary',
|
| 315 |
+
'!=': '_match_ne',
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
def _adjust_local(self, version, constraint, prefix):
|
| 319 |
+
if prefix:
|
| 320 |
+
strip_local = '+' not in constraint and version._parts[-1]
|
| 321 |
+
else:
|
| 322 |
+
# both constraint and version are
|
| 323 |
+
# NormalizedVersion instances.
|
| 324 |
+
# If constraint does not have a local component,
|
| 325 |
+
# ensure the version doesn't, either.
|
| 326 |
+
strip_local = not constraint._parts[-1] and version._parts[-1]
|
| 327 |
+
if strip_local:
|
| 328 |
+
s = version._string.split('+', 1)[0]
|
| 329 |
+
version = self.version_class(s)
|
| 330 |
+
return version, constraint
|
| 331 |
+
|
| 332 |
+
def _match_lt(self, version, constraint, prefix):
|
| 333 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 334 |
+
if version >= constraint:
|
| 335 |
+
return False
|
| 336 |
+
release_clause = constraint._release_clause
|
| 337 |
+
pfx = '.'.join([str(i) for i in release_clause])
|
| 338 |
+
return not _match_prefix(version, pfx)
|
| 339 |
+
|
| 340 |
+
def _match_gt(self, version, constraint, prefix):
|
| 341 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 342 |
+
if version <= constraint:
|
| 343 |
+
return False
|
| 344 |
+
release_clause = constraint._release_clause
|
| 345 |
+
pfx = '.'.join([str(i) for i in release_clause])
|
| 346 |
+
return not _match_prefix(version, pfx)
|
| 347 |
+
|
| 348 |
+
def _match_le(self, version, constraint, prefix):
|
| 349 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 350 |
+
return version <= constraint
|
| 351 |
+
|
| 352 |
+
def _match_ge(self, version, constraint, prefix):
|
| 353 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 354 |
+
return version >= constraint
|
| 355 |
+
|
| 356 |
+
def _match_eq(self, version, constraint, prefix):
|
| 357 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 358 |
+
if not prefix:
|
| 359 |
+
result = (version == constraint)
|
| 360 |
+
else:
|
| 361 |
+
result = _match_prefix(version, constraint)
|
| 362 |
+
return result
|
| 363 |
+
|
| 364 |
+
def _match_arbitrary(self, version, constraint, prefix):
|
| 365 |
+
return str(version) == str(constraint)
|
| 366 |
+
|
| 367 |
+
def _match_ne(self, version, constraint, prefix):
|
| 368 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 369 |
+
if not prefix:
|
| 370 |
+
result = (version != constraint)
|
| 371 |
+
else:
|
| 372 |
+
result = not _match_prefix(version, constraint)
|
| 373 |
+
return result
|
| 374 |
+
|
| 375 |
+
def _match_compatible(self, version, constraint, prefix):
|
| 376 |
+
version, constraint = self._adjust_local(version, constraint, prefix)
|
| 377 |
+
if version == constraint:
|
| 378 |
+
return True
|
| 379 |
+
if version < constraint:
|
| 380 |
+
return False
|
| 381 |
+
# if not prefix:
|
| 382 |
+
# return True
|
| 383 |
+
release_clause = constraint._release_clause
|
| 384 |
+
if len(release_clause) > 1:
|
| 385 |
+
release_clause = release_clause[:-1]
|
| 386 |
+
pfx = '.'.join([str(i) for i in release_clause])
|
| 387 |
+
return _match_prefix(version, pfx)
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
_REPLACEMENTS = (
|
| 391 |
+
(re.compile('[.+-]$'), ''), # remove trailing puncts
|
| 392 |
+
(re.compile(r'^[.](\d)'), r'0.\1'), # .N -> 0.N at start
|
| 393 |
+
(re.compile('^[.-]'), ''), # remove leading puncts
|
| 394 |
+
(re.compile(r'^\((.*)\)$'), r'\1'), # remove parentheses
|
| 395 |
+
(re.compile(r'^v(ersion)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
| 396 |
+
(re.compile(r'^r(ev)?\s*(\d+)'), r'\2'), # remove leading v(ersion)
|
| 397 |
+
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
| 398 |
+
(re.compile(r'\b(alfa|apha)\b'), 'alpha'), # misspelt alpha
|
| 399 |
+
(re.compile(r'\b(pre-alpha|prealpha)\b'),
|
| 400 |
+
'pre.alpha'), # standardise
|
| 401 |
+
(re.compile(r'\(beta\)$'), 'beta'), # remove parentheses
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
_SUFFIX_REPLACEMENTS = (
|
| 405 |
+
(re.compile('^[:~._+-]+'), ''), # remove leading puncts
|
| 406 |
+
(re.compile('[,*")([\\]]'), ''), # remove unwanted chars
|
| 407 |
+
(re.compile('[~:+_ -]'), '.'), # replace illegal chars
|
| 408 |
+
(re.compile('[.]{2,}'), '.'), # multiple runs of '.'
|
| 409 |
+
(re.compile(r'\.$'), ''), # trailing '.'
|
| 410 |
+
)
|
| 411 |
+
|
| 412 |
+
_NUMERIC_PREFIX = re.compile(r'(\d+(\.\d+)*)')
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
def _suggest_semantic_version(s):
|
| 416 |
+
"""
|
| 417 |
+
Try to suggest a semantic form for a version for which
|
| 418 |
+
_suggest_normalized_version couldn't come up with anything.
|
| 419 |
+
"""
|
| 420 |
+
result = s.strip().lower()
|
| 421 |
+
for pat, repl in _REPLACEMENTS:
|
| 422 |
+
result = pat.sub(repl, result)
|
| 423 |
+
if not result:
|
| 424 |
+
result = '0.0.0'
|
| 425 |
+
|
| 426 |
+
# Now look for numeric prefix, and separate it out from
|
| 427 |
+
# the rest.
|
| 428 |
+
# import pdb; pdb.set_trace()
|
| 429 |
+
m = _NUMERIC_PREFIX.match(result)
|
| 430 |
+
if not m:
|
| 431 |
+
prefix = '0.0.0'
|
| 432 |
+
suffix = result
|
| 433 |
+
else:
|
| 434 |
+
prefix = m.groups()[0].split('.')
|
| 435 |
+
prefix = [int(i) for i in prefix]
|
| 436 |
+
while len(prefix) < 3:
|
| 437 |
+
prefix.append(0)
|
| 438 |
+
if len(prefix) == 3:
|
| 439 |
+
suffix = result[m.end():]
|
| 440 |
+
else:
|
| 441 |
+
suffix = '.'.join([str(i) for i in prefix[3:]]) + result[m.end():]
|
| 442 |
+
prefix = prefix[:3]
|
| 443 |
+
prefix = '.'.join([str(i) for i in prefix])
|
| 444 |
+
suffix = suffix.strip()
|
| 445 |
+
if suffix:
|
| 446 |
+
# import pdb; pdb.set_trace()
|
| 447 |
+
# massage the suffix.
|
| 448 |
+
for pat, repl in _SUFFIX_REPLACEMENTS:
|
| 449 |
+
suffix = pat.sub(repl, suffix)
|
| 450 |
+
|
| 451 |
+
if not suffix:
|
| 452 |
+
result = prefix
|
| 453 |
+
else:
|
| 454 |
+
sep = '-' if 'dev' in suffix else '+'
|
| 455 |
+
result = prefix + sep + suffix
|
| 456 |
+
if not is_semver(result):
|
| 457 |
+
result = None
|
| 458 |
+
return result
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
def _suggest_normalized_version(s):
|
| 462 |
+
"""Suggest a normalized version close to the given version string.
|
| 463 |
+
|
| 464 |
+
If you have a version string that isn't rational (i.e. NormalizedVersion
|
| 465 |
+
doesn't like it) then you might be able to get an equivalent (or close)
|
| 466 |
+
rational version from this function.
|
| 467 |
+
|
| 468 |
+
This does a number of simple normalizations to the given string, based
|
| 469 |
+
on observation of versions currently in use on PyPI. Given a dump of
|
| 470 |
+
those version during PyCon 2009, 4287 of them:
|
| 471 |
+
- 2312 (53.93%) match NormalizedVersion without change
|
| 472 |
+
with the automatic suggestion
|
| 473 |
+
- 3474 (81.04%) match when using this suggestion method
|
| 474 |
+
|
| 475 |
+
@param s {str} An irrational version string.
|
| 476 |
+
@returns A rational version string, or None, if couldn't determine one.
|
| 477 |
+
"""
|
| 478 |
+
try:
|
| 479 |
+
_normalized_key(s)
|
| 480 |
+
return s # already rational
|
| 481 |
+
except UnsupportedVersionError:
|
| 482 |
+
pass
|
| 483 |
+
|
| 484 |
+
rs = s.lower()
|
| 485 |
+
|
| 486 |
+
# part of this could use maketrans
|
| 487 |
+
for orig, repl in (('-alpha', 'a'), ('-beta', 'b'), ('alpha', 'a'),
|
| 488 |
+
('beta', 'b'), ('rc', 'c'), ('-final', ''),
|
| 489 |
+
('-pre', 'c'),
|
| 490 |
+
('-release', ''), ('.release', ''), ('-stable', ''),
|
| 491 |
+
('+', '.'), ('_', '.'), (' ', ''), ('.final', ''),
|
| 492 |
+
('final', '')):
|
| 493 |
+
rs = rs.replace(orig, repl)
|
| 494 |
+
|
| 495 |
+
# if something ends with dev or pre, we add a 0
|
| 496 |
+
rs = re.sub(r"pre$", r"pre0", rs)
|
| 497 |
+
rs = re.sub(r"dev$", r"dev0", rs)
|
| 498 |
+
|
| 499 |
+
# if we have something like "b-2" or "a.2" at the end of the
|
| 500 |
+
# version, that is probably beta, alpha, etc
|
| 501 |
+
# let's remove the dash or dot
|
| 502 |
+
rs = re.sub(r"([abc]|rc)[\-\.](\d+)$", r"\1\2", rs)
|
| 503 |
+
|
| 504 |
+
# 1.0-dev-r371 -> 1.0.dev371
|
| 505 |
+
# 0.1-dev-r79 -> 0.1.dev79
|
| 506 |
+
rs = re.sub(r"[\-\.](dev)[\-\.]?r?(\d+)$", r".\1\2", rs)
|
| 507 |
+
|
| 508 |
+
# Clean: 2.0.a.3, 2.0.b1, 0.9.0~c1
|
| 509 |
+
rs = re.sub(r"[.~]?([abc])\.?", r"\1", rs)
|
| 510 |
+
|
| 511 |
+
# Clean: v0.3, v1.0
|
| 512 |
+
if rs.startswith('v'):
|
| 513 |
+
rs = rs[1:]
|
| 514 |
+
|
| 515 |
+
# Clean leading '0's on numbers.
|
| 516 |
+
# TODO: unintended side-effect on, e.g., "2003.05.09"
|
| 517 |
+
# PyPI stats: 77 (~2%) better
|
| 518 |
+
rs = re.sub(r"\b0+(\d+)(?!\d)", r"\1", rs)
|
| 519 |
+
|
| 520 |
+
# Clean a/b/c with no version. E.g. "1.0a" -> "1.0a0". Setuptools infers
|
| 521 |
+
# zero.
|
| 522 |
+
# PyPI stats: 245 (7.56%) better
|
| 523 |
+
rs = re.sub(r"(\d+[abc])$", r"\g<1>0", rs)
|
| 524 |
+
|
| 525 |
+
# the 'dev-rNNN' tag is a dev tag
|
| 526 |
+
rs = re.sub(r"\.?(dev-r|dev\.r)\.?(\d+)$", r".dev\2", rs)
|
| 527 |
+
|
| 528 |
+
# clean the - when used as a pre delimiter
|
| 529 |
+
rs = re.sub(r"-(a|b|c)(\d+)$", r"\1\2", rs)
|
| 530 |
+
|
| 531 |
+
# a terminal "dev" or "devel" can be changed into ".dev0"
|
| 532 |
+
rs = re.sub(r"[\.\-](dev|devel)$", r".dev0", rs)
|
| 533 |
+
|
| 534 |
+
# a terminal "dev" can be changed into ".dev0"
|
| 535 |
+
rs = re.sub(r"(?![\.\-])dev$", r".dev0", rs)
|
| 536 |
+
|
| 537 |
+
# a terminal "final" or "stable" can be removed
|
| 538 |
+
rs = re.sub(r"(final|stable)$", "", rs)
|
| 539 |
+
|
| 540 |
+
# The 'r' and the '-' tags are post release tags
|
| 541 |
+
# 0.4a1.r10 -> 0.4a1.post10
|
| 542 |
+
# 0.9.33-17222 -> 0.9.33.post17222
|
| 543 |
+
# 0.9.33-r17222 -> 0.9.33.post17222
|
| 544 |
+
rs = re.sub(r"\.?(r|-|-r)\.?(\d+)$", r".post\2", rs)
|
| 545 |
+
|
| 546 |
+
# Clean 'r' instead of 'dev' usage:
|
| 547 |
+
# 0.9.33+r17222 -> 0.9.33.dev17222
|
| 548 |
+
# 1.0dev123 -> 1.0.dev123
|
| 549 |
+
# 1.0.git123 -> 1.0.dev123
|
| 550 |
+
# 1.0.bzr123 -> 1.0.dev123
|
| 551 |
+
# 0.1a0dev.123 -> 0.1a0.dev123
|
| 552 |
+
# PyPI stats: ~150 (~4%) better
|
| 553 |
+
rs = re.sub(r"\.?(dev|git|bzr)\.?(\d+)$", r".dev\2", rs)
|
| 554 |
+
|
| 555 |
+
# Clean '.pre' (normalized from '-pre' above) instead of 'c' usage:
|
| 556 |
+
# 0.2.pre1 -> 0.2c1
|
| 557 |
+
# 0.2-c1 -> 0.2c1
|
| 558 |
+
# 1.0preview123 -> 1.0c123
|
| 559 |
+
# PyPI stats: ~21 (0.62%) better
|
| 560 |
+
rs = re.sub(r"\.?(pre|preview|-c)(\d+)$", r"c\g<2>", rs)
|
| 561 |
+
|
| 562 |
+
# Tcl/Tk uses "px" for their post release markers
|
| 563 |
+
rs = re.sub(r"p(\d+)$", r".post\1", rs)
|
| 564 |
+
|
| 565 |
+
try:
|
| 566 |
+
_normalized_key(rs)
|
| 567 |
+
except UnsupportedVersionError:
|
| 568 |
+
rs = None
|
| 569 |
+
return rs
|
| 570 |
+
|
| 571 |
+
#
|
| 572 |
+
# Legacy version processing (distribute-compatible)
|
| 573 |
+
#
|
| 574 |
+
|
| 575 |
+
|
| 576 |
+
_VERSION_PART = re.compile(r'([a-z]+|\d+|[\.-])', re.I)
|
| 577 |
+
_VERSION_REPLACE = {
|
| 578 |
+
'pre': 'c',
|
| 579 |
+
'preview': 'c',
|
| 580 |
+
'-': 'final-',
|
| 581 |
+
'rc': 'c',
|
| 582 |
+
'dev': '@',
|
| 583 |
+
'': None,
|
| 584 |
+
'.': None,
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
def _legacy_key(s):
|
| 589 |
+
def get_parts(s):
|
| 590 |
+
result = []
|
| 591 |
+
for p in _VERSION_PART.split(s.lower()):
|
| 592 |
+
p = _VERSION_REPLACE.get(p, p)
|
| 593 |
+
if p:
|
| 594 |
+
if '0' <= p[:1] <= '9':
|
| 595 |
+
p = p.zfill(8)
|
| 596 |
+
else:
|
| 597 |
+
p = '*' + p
|
| 598 |
+
result.append(p)
|
| 599 |
+
result.append('*final')
|
| 600 |
+
return result
|
| 601 |
+
|
| 602 |
+
result = []
|
| 603 |
+
for p in get_parts(s):
|
| 604 |
+
if p.startswith('*'):
|
| 605 |
+
if p < '*final':
|
| 606 |
+
while result and result[-1] == '*final-':
|
| 607 |
+
result.pop()
|
| 608 |
+
while result and result[-1] == '00000000':
|
| 609 |
+
result.pop()
|
| 610 |
+
result.append(p)
|
| 611 |
+
return tuple(result)
|
| 612 |
+
|
| 613 |
+
|
| 614 |
+
class LegacyVersion(Version):
|
| 615 |
+
def parse(self, s):
|
| 616 |
+
return _legacy_key(s)
|
| 617 |
+
|
| 618 |
+
@property
|
| 619 |
+
def is_prerelease(self):
|
| 620 |
+
result = False
|
| 621 |
+
for x in self._parts:
|
| 622 |
+
if (isinstance(x, string_types) and x.startswith('*') and
|
| 623 |
+
x < '*final'):
|
| 624 |
+
result = True
|
| 625 |
+
break
|
| 626 |
+
return result
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
class LegacyMatcher(Matcher):
|
| 630 |
+
version_class = LegacyVersion
|
| 631 |
+
|
| 632 |
+
_operators = dict(Matcher._operators)
|
| 633 |
+
_operators['~='] = '_match_compatible'
|
| 634 |
+
|
| 635 |
+
numeric_re = re.compile(r'^(\d+(\.\d+)*)')
|
| 636 |
+
|
| 637 |
+
def _match_compatible(self, version, constraint, prefix):
|
| 638 |
+
if version < constraint:
|
| 639 |
+
return False
|
| 640 |
+
m = self.numeric_re.match(str(constraint))
|
| 641 |
+
if not m:
|
| 642 |
+
logger.warning('Cannot compute compatible match for version %s '
|
| 643 |
+
' and constraint %s', version, constraint)
|
| 644 |
+
return True
|
| 645 |
+
s = m.groups()[0]
|
| 646 |
+
if '.' in s:
|
| 647 |
+
s = s.rsplit('.', 1)[0]
|
| 648 |
+
return _match_prefix(version, s)
|
| 649 |
+
|
| 650 |
+
#
|
| 651 |
+
# Semantic versioning
|
| 652 |
+
#
|
| 653 |
+
|
| 654 |
+
|
| 655 |
+
_SEMVER_RE = re.compile(r'^(\d+)\.(\d+)\.(\d+)'
|
| 656 |
+
r'(-[a-z0-9]+(\.[a-z0-9-]+)*)?'
|
| 657 |
+
r'(\+[a-z0-9]+(\.[a-z0-9-]+)*)?$', re.I)
|
| 658 |
+
|
| 659 |
+
|
| 660 |
+
def is_semver(s):
|
| 661 |
+
return _SEMVER_RE.match(s)
|
| 662 |
+
|
| 663 |
+
|
| 664 |
+
def _semantic_key(s):
|
| 665 |
+
def make_tuple(s, absent):
|
| 666 |
+
if s is None:
|
| 667 |
+
result = (absent,)
|
| 668 |
+
else:
|
| 669 |
+
parts = s[1:].split('.')
|
| 670 |
+
# We can't compare ints and strings on Python 3, so fudge it
|
| 671 |
+
# by zero-filling numeric values so simulate a numeric comparison
|
| 672 |
+
result = tuple([p.zfill(8) if p.isdigit() else p for p in parts])
|
| 673 |
+
return result
|
| 674 |
+
|
| 675 |
+
m = is_semver(s)
|
| 676 |
+
if not m:
|
| 677 |
+
raise UnsupportedVersionError(s)
|
| 678 |
+
groups = m.groups()
|
| 679 |
+
major, minor, patch = [int(i) for i in groups[:3]]
|
| 680 |
+
# choose the '|' and '*' so that versions sort correctly
|
| 681 |
+
pre, build = make_tuple(groups[3], '|'), make_tuple(groups[5], '*')
|
| 682 |
+
return (major, minor, patch), pre, build
|
| 683 |
+
|
| 684 |
+
|
| 685 |
+
class SemanticVersion(Version):
|
| 686 |
+
def parse(self, s):
|
| 687 |
+
return _semantic_key(s)
|
| 688 |
+
|
| 689 |
+
@property
|
| 690 |
+
def is_prerelease(self):
|
| 691 |
+
return self._parts[1][0] != '|'
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
class SemanticMatcher(Matcher):
|
| 695 |
+
version_class = SemanticVersion
|
| 696 |
+
|
| 697 |
+
|
| 698 |
+
class VersionScheme(object):
|
| 699 |
+
def __init__(self, key, matcher, suggester=None):
|
| 700 |
+
self.key = key
|
| 701 |
+
self.matcher = matcher
|
| 702 |
+
self.suggester = suggester
|
| 703 |
+
|
| 704 |
+
def is_valid_version(self, s):
|
| 705 |
+
try:
|
| 706 |
+
self.matcher.version_class(s)
|
| 707 |
+
result = True
|
| 708 |
+
except UnsupportedVersionError:
|
| 709 |
+
result = False
|
| 710 |
+
return result
|
| 711 |
+
|
| 712 |
+
def is_valid_matcher(self, s):
|
| 713 |
+
try:
|
| 714 |
+
self.matcher(s)
|
| 715 |
+
result = True
|
| 716 |
+
except UnsupportedVersionError:
|
| 717 |
+
result = False
|
| 718 |
+
return result
|
| 719 |
+
|
| 720 |
+
def is_valid_constraint_list(self, s):
|
| 721 |
+
"""
|
| 722 |
+
Used for processing some metadata fields
|
| 723 |
+
"""
|
| 724 |
+
# See issue #140. Be tolerant of a single trailing comma.
|
| 725 |
+
if s.endswith(','):
|
| 726 |
+
s = s[:-1]
|
| 727 |
+
return self.is_valid_matcher('dummy_name (%s)' % s)
|
| 728 |
+
|
| 729 |
+
def suggest(self, s):
|
| 730 |
+
if self.suggester is None:
|
| 731 |
+
result = None
|
| 732 |
+
else:
|
| 733 |
+
result = self.suggester(s)
|
| 734 |
+
return result
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
_SCHEMES = {
|
| 738 |
+
'normalized': VersionScheme(_normalized_key, NormalizedMatcher,
|
| 739 |
+
_suggest_normalized_version),
|
| 740 |
+
'legacy': VersionScheme(_legacy_key, LegacyMatcher, lambda self, s: s),
|
| 741 |
+
'semantic': VersionScheme(_semantic_key, SemanticMatcher,
|
| 742 |
+
_suggest_semantic_version),
|
| 743 |
+
}
|
| 744 |
+
|
| 745 |
+
_SCHEMES['default'] = _SCHEMES['normalized']
|
| 746 |
+
|
| 747 |
+
|
| 748 |
+
def get_scheme(name):
|
| 749 |
+
if name not in _SCHEMES:
|
| 750 |
+
raise ValueError('unknown scheme name: %r' % name)
|
| 751 |
+
return _SCHEMES[name]
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/distlib/wheel.py
ADDED
|
@@ -0,0 +1,1099 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
#
|
| 3 |
+
# Copyright (C) 2013-2023 Vinay Sajip.
|
| 4 |
+
# Licensed to the Python Software Foundation under a contributor agreement.
|
| 5 |
+
# See LICENSE.txt and CONTRIBUTORS.txt.
|
| 6 |
+
#
|
| 7 |
+
from __future__ import unicode_literals
|
| 8 |
+
|
| 9 |
+
import base64
|
| 10 |
+
import codecs
|
| 11 |
+
import datetime
|
| 12 |
+
from email import message_from_file
|
| 13 |
+
import hashlib
|
| 14 |
+
import json
|
| 15 |
+
import logging
|
| 16 |
+
import os
|
| 17 |
+
import posixpath
|
| 18 |
+
import re
|
| 19 |
+
import shutil
|
| 20 |
+
import sys
|
| 21 |
+
import tempfile
|
| 22 |
+
import zipfile
|
| 23 |
+
|
| 24 |
+
from . import __version__, DistlibException
|
| 25 |
+
from .compat import sysconfig, ZipFile, fsdecode, text_type, filter
|
| 26 |
+
from .database import InstalledDistribution
|
| 27 |
+
from .metadata import Metadata, WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME
|
| 28 |
+
from .util import (FileOperator, convert_path, CSVReader, CSVWriter, Cache,
|
| 29 |
+
cached_property, get_cache_base, read_exports, tempdir,
|
| 30 |
+
get_platform)
|
| 31 |
+
from .version import NormalizedVersion, UnsupportedVersionError
|
| 32 |
+
|
| 33 |
+
logger = logging.getLogger(__name__)
|
| 34 |
+
|
| 35 |
+
cache = None # created when needed
|
| 36 |
+
|
| 37 |
+
if hasattr(sys, 'pypy_version_info'): # pragma: no cover
|
| 38 |
+
IMP_PREFIX = 'pp'
|
| 39 |
+
elif sys.platform.startswith('java'): # pragma: no cover
|
| 40 |
+
IMP_PREFIX = 'jy'
|
| 41 |
+
elif sys.platform == 'cli': # pragma: no cover
|
| 42 |
+
IMP_PREFIX = 'ip'
|
| 43 |
+
else:
|
| 44 |
+
IMP_PREFIX = 'cp'
|
| 45 |
+
|
| 46 |
+
VER_SUFFIX = sysconfig.get_config_var('py_version_nodot')
|
| 47 |
+
if not VER_SUFFIX: # pragma: no cover
|
| 48 |
+
VER_SUFFIX = '%s%s' % sys.version_info[:2]
|
| 49 |
+
PYVER = 'py' + VER_SUFFIX
|
| 50 |
+
IMPVER = IMP_PREFIX + VER_SUFFIX
|
| 51 |
+
|
| 52 |
+
ARCH = get_platform().replace('-', '_').replace('.', '_')
|
| 53 |
+
|
| 54 |
+
ABI = sysconfig.get_config_var('SOABI')
|
| 55 |
+
if ABI and ABI.startswith('cpython-'):
|
| 56 |
+
ABI = ABI.replace('cpython-', 'cp').split('-')[0]
|
| 57 |
+
else:
|
| 58 |
+
|
| 59 |
+
def _derive_abi():
|
| 60 |
+
parts = ['cp', VER_SUFFIX]
|
| 61 |
+
if sysconfig.get_config_var('Py_DEBUG'):
|
| 62 |
+
parts.append('d')
|
| 63 |
+
if IMP_PREFIX == 'cp':
|
| 64 |
+
vi = sys.version_info[:2]
|
| 65 |
+
if vi < (3, 8):
|
| 66 |
+
wpm = sysconfig.get_config_var('WITH_PYMALLOC')
|
| 67 |
+
if wpm is None:
|
| 68 |
+
wpm = True
|
| 69 |
+
if wpm:
|
| 70 |
+
parts.append('m')
|
| 71 |
+
if vi < (3, 3):
|
| 72 |
+
us = sysconfig.get_config_var('Py_UNICODE_SIZE')
|
| 73 |
+
if us == 4 or (us is None and sys.maxunicode == 0x10FFFF):
|
| 74 |
+
parts.append('u')
|
| 75 |
+
return ''.join(parts)
|
| 76 |
+
|
| 77 |
+
ABI = _derive_abi()
|
| 78 |
+
del _derive_abi
|
| 79 |
+
|
| 80 |
+
FILENAME_RE = re.compile(
|
| 81 |
+
r'''
|
| 82 |
+
(?P<nm>[^-]+)
|
| 83 |
+
-(?P<vn>\d+[^-]*)
|
| 84 |
+
(-(?P<bn>\d+[^-]*))?
|
| 85 |
+
-(?P<py>\w+\d+(\.\w+\d+)*)
|
| 86 |
+
-(?P<bi>\w+)
|
| 87 |
+
-(?P<ar>\w+(\.\w+)*)
|
| 88 |
+
\.whl$
|
| 89 |
+
''', re.IGNORECASE | re.VERBOSE)
|
| 90 |
+
|
| 91 |
+
NAME_VERSION_RE = re.compile(
|
| 92 |
+
r'''
|
| 93 |
+
(?P<nm>[^-]+)
|
| 94 |
+
-(?P<vn>\d+[^-]*)
|
| 95 |
+
(-(?P<bn>\d+[^-]*))?$
|
| 96 |
+
''', re.IGNORECASE | re.VERBOSE)
|
| 97 |
+
|
| 98 |
+
SHEBANG_RE = re.compile(br'\s*#![^\r\n]*')
|
| 99 |
+
SHEBANG_DETAIL_RE = re.compile(br'^(\s*#!("[^"]+"|\S+))\s+(.*)$')
|
| 100 |
+
SHEBANG_PYTHON = b'#!python'
|
| 101 |
+
SHEBANG_PYTHONW = b'#!pythonw'
|
| 102 |
+
|
| 103 |
+
if os.sep == '/':
|
| 104 |
+
to_posix = lambda o: o
|
| 105 |
+
else:
|
| 106 |
+
to_posix = lambda o: o.replace(os.sep, '/')
|
| 107 |
+
|
| 108 |
+
if sys.version_info[0] < 3:
|
| 109 |
+
import imp
|
| 110 |
+
else:
|
| 111 |
+
imp = None
|
| 112 |
+
import importlib.machinery
|
| 113 |
+
import importlib.util
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def _get_suffixes():
|
| 117 |
+
if imp:
|
| 118 |
+
return [s[0] for s in imp.get_suffixes()]
|
| 119 |
+
else:
|
| 120 |
+
return importlib.machinery.EXTENSION_SUFFIXES
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _load_dynamic(name, path):
|
| 124 |
+
# https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
|
| 125 |
+
if imp:
|
| 126 |
+
return imp.load_dynamic(name, path)
|
| 127 |
+
else:
|
| 128 |
+
spec = importlib.util.spec_from_file_location(name, path)
|
| 129 |
+
module = importlib.util.module_from_spec(spec)
|
| 130 |
+
sys.modules[name] = module
|
| 131 |
+
spec.loader.exec_module(module)
|
| 132 |
+
return module
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
class Mounter(object):
|
| 136 |
+
|
| 137 |
+
def __init__(self):
|
| 138 |
+
self.impure_wheels = {}
|
| 139 |
+
self.libs = {}
|
| 140 |
+
|
| 141 |
+
def add(self, pathname, extensions):
|
| 142 |
+
self.impure_wheels[pathname] = extensions
|
| 143 |
+
self.libs.update(extensions)
|
| 144 |
+
|
| 145 |
+
def remove(self, pathname):
|
| 146 |
+
extensions = self.impure_wheels.pop(pathname)
|
| 147 |
+
for k, v in extensions:
|
| 148 |
+
if k in self.libs:
|
| 149 |
+
del self.libs[k]
|
| 150 |
+
|
| 151 |
+
def find_module(self, fullname, path=None):
|
| 152 |
+
if fullname in self.libs:
|
| 153 |
+
result = self
|
| 154 |
+
else:
|
| 155 |
+
result = None
|
| 156 |
+
return result
|
| 157 |
+
|
| 158 |
+
def load_module(self, fullname):
|
| 159 |
+
if fullname in sys.modules:
|
| 160 |
+
result = sys.modules[fullname]
|
| 161 |
+
else:
|
| 162 |
+
if fullname not in self.libs:
|
| 163 |
+
raise ImportError('unable to find extension for %s' % fullname)
|
| 164 |
+
result = _load_dynamic(fullname, self.libs[fullname])
|
| 165 |
+
result.__loader__ = self
|
| 166 |
+
parts = fullname.rsplit('.', 1)
|
| 167 |
+
if len(parts) > 1:
|
| 168 |
+
result.__package__ = parts[0]
|
| 169 |
+
return result
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
_hook = Mounter()
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
class Wheel(object):
|
| 176 |
+
"""
|
| 177 |
+
Class to build and install from Wheel files (PEP 427).
|
| 178 |
+
"""
|
| 179 |
+
|
| 180 |
+
wheel_version = (1, 1)
|
| 181 |
+
hash_kind = 'sha256'
|
| 182 |
+
|
| 183 |
+
def __init__(self, filename=None, sign=False, verify=False):
|
| 184 |
+
"""
|
| 185 |
+
Initialise an instance using a (valid) filename.
|
| 186 |
+
"""
|
| 187 |
+
self.sign = sign
|
| 188 |
+
self.should_verify = verify
|
| 189 |
+
self.buildver = ''
|
| 190 |
+
self.pyver = [PYVER]
|
| 191 |
+
self.abi = ['none']
|
| 192 |
+
self.arch = ['any']
|
| 193 |
+
self.dirname = os.getcwd()
|
| 194 |
+
if filename is None:
|
| 195 |
+
self.name = 'dummy'
|
| 196 |
+
self.version = '0.1'
|
| 197 |
+
self._filename = self.filename
|
| 198 |
+
else:
|
| 199 |
+
m = NAME_VERSION_RE.match(filename)
|
| 200 |
+
if m:
|
| 201 |
+
info = m.groupdict('')
|
| 202 |
+
self.name = info['nm']
|
| 203 |
+
# Reinstate the local version separator
|
| 204 |
+
self.version = info['vn'].replace('_', '-')
|
| 205 |
+
self.buildver = info['bn']
|
| 206 |
+
self._filename = self.filename
|
| 207 |
+
else:
|
| 208 |
+
dirname, filename = os.path.split(filename)
|
| 209 |
+
m = FILENAME_RE.match(filename)
|
| 210 |
+
if not m:
|
| 211 |
+
raise DistlibException('Invalid name or '
|
| 212 |
+
'filename: %r' % filename)
|
| 213 |
+
if dirname:
|
| 214 |
+
self.dirname = os.path.abspath(dirname)
|
| 215 |
+
self._filename = filename
|
| 216 |
+
info = m.groupdict('')
|
| 217 |
+
self.name = info['nm']
|
| 218 |
+
self.version = info['vn']
|
| 219 |
+
self.buildver = info['bn']
|
| 220 |
+
self.pyver = info['py'].split('.')
|
| 221 |
+
self.abi = info['bi'].split('.')
|
| 222 |
+
self.arch = info['ar'].split('.')
|
| 223 |
+
|
| 224 |
+
@property
|
| 225 |
+
def filename(self):
|
| 226 |
+
"""
|
| 227 |
+
Build and return a filename from the various components.
|
| 228 |
+
"""
|
| 229 |
+
if self.buildver:
|
| 230 |
+
buildver = '-' + self.buildver
|
| 231 |
+
else:
|
| 232 |
+
buildver = ''
|
| 233 |
+
pyver = '.'.join(self.pyver)
|
| 234 |
+
abi = '.'.join(self.abi)
|
| 235 |
+
arch = '.'.join(self.arch)
|
| 236 |
+
# replace - with _ as a local version separator
|
| 237 |
+
version = self.version.replace('-', '_')
|
| 238 |
+
return '%s-%s%s-%s-%s-%s.whl' % (self.name, version, buildver, pyver,
|
| 239 |
+
abi, arch)
|
| 240 |
+
|
| 241 |
+
@property
|
| 242 |
+
def exists(self):
|
| 243 |
+
path = os.path.join(self.dirname, self.filename)
|
| 244 |
+
return os.path.isfile(path)
|
| 245 |
+
|
| 246 |
+
@property
|
| 247 |
+
def tags(self):
|
| 248 |
+
for pyver in self.pyver:
|
| 249 |
+
for abi in self.abi:
|
| 250 |
+
for arch in self.arch:
|
| 251 |
+
yield pyver, abi, arch
|
| 252 |
+
|
| 253 |
+
@cached_property
|
| 254 |
+
def metadata(self):
|
| 255 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 256 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 257 |
+
info_dir = '%s.dist-info' % name_ver
|
| 258 |
+
wrapper = codecs.getreader('utf-8')
|
| 259 |
+
with ZipFile(pathname, 'r') as zf:
|
| 260 |
+
self.get_wheel_metadata(zf)
|
| 261 |
+
# wv = wheel_metadata['Wheel-Version'].split('.', 1)
|
| 262 |
+
# file_version = tuple([int(i) for i in wv])
|
| 263 |
+
# if file_version < (1, 1):
|
| 264 |
+
# fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME,
|
| 265 |
+
# LEGACY_METADATA_FILENAME]
|
| 266 |
+
# else:
|
| 267 |
+
# fns = [WHEEL_METADATA_FILENAME, METADATA_FILENAME]
|
| 268 |
+
fns = [WHEEL_METADATA_FILENAME, LEGACY_METADATA_FILENAME]
|
| 269 |
+
result = None
|
| 270 |
+
for fn in fns:
|
| 271 |
+
try:
|
| 272 |
+
metadata_filename = posixpath.join(info_dir, fn)
|
| 273 |
+
with zf.open(metadata_filename) as bf:
|
| 274 |
+
wf = wrapper(bf)
|
| 275 |
+
result = Metadata(fileobj=wf)
|
| 276 |
+
if result:
|
| 277 |
+
break
|
| 278 |
+
except KeyError:
|
| 279 |
+
pass
|
| 280 |
+
if not result:
|
| 281 |
+
raise ValueError('Invalid wheel, because metadata is '
|
| 282 |
+
'missing: looked in %s' % ', '.join(fns))
|
| 283 |
+
return result
|
| 284 |
+
|
| 285 |
+
def get_wheel_metadata(self, zf):
|
| 286 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 287 |
+
info_dir = '%s.dist-info' % name_ver
|
| 288 |
+
metadata_filename = posixpath.join(info_dir, 'WHEEL')
|
| 289 |
+
with zf.open(metadata_filename) as bf:
|
| 290 |
+
wf = codecs.getreader('utf-8')(bf)
|
| 291 |
+
message = message_from_file(wf)
|
| 292 |
+
return dict(message)
|
| 293 |
+
|
| 294 |
+
@cached_property
|
| 295 |
+
def info(self):
|
| 296 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 297 |
+
with ZipFile(pathname, 'r') as zf:
|
| 298 |
+
result = self.get_wheel_metadata(zf)
|
| 299 |
+
return result
|
| 300 |
+
|
| 301 |
+
def process_shebang(self, data):
|
| 302 |
+
m = SHEBANG_RE.match(data)
|
| 303 |
+
if m:
|
| 304 |
+
end = m.end()
|
| 305 |
+
shebang, data_after_shebang = data[:end], data[end:]
|
| 306 |
+
# Preserve any arguments after the interpreter
|
| 307 |
+
if b'pythonw' in shebang.lower():
|
| 308 |
+
shebang_python = SHEBANG_PYTHONW
|
| 309 |
+
else:
|
| 310 |
+
shebang_python = SHEBANG_PYTHON
|
| 311 |
+
m = SHEBANG_DETAIL_RE.match(shebang)
|
| 312 |
+
if m:
|
| 313 |
+
args = b' ' + m.groups()[-1]
|
| 314 |
+
else:
|
| 315 |
+
args = b''
|
| 316 |
+
shebang = shebang_python + args
|
| 317 |
+
data = shebang + data_after_shebang
|
| 318 |
+
else:
|
| 319 |
+
cr = data.find(b'\r')
|
| 320 |
+
lf = data.find(b'\n')
|
| 321 |
+
if cr < 0 or cr > lf:
|
| 322 |
+
term = b'\n'
|
| 323 |
+
else:
|
| 324 |
+
if data[cr:cr + 2] == b'\r\n':
|
| 325 |
+
term = b'\r\n'
|
| 326 |
+
else:
|
| 327 |
+
term = b'\r'
|
| 328 |
+
data = SHEBANG_PYTHON + term + data
|
| 329 |
+
return data
|
| 330 |
+
|
| 331 |
+
def get_hash(self, data, hash_kind=None):
|
| 332 |
+
if hash_kind is None:
|
| 333 |
+
hash_kind = self.hash_kind
|
| 334 |
+
try:
|
| 335 |
+
hasher = getattr(hashlib, hash_kind)
|
| 336 |
+
except AttributeError:
|
| 337 |
+
raise DistlibException('Unsupported hash algorithm: %r' %
|
| 338 |
+
hash_kind)
|
| 339 |
+
result = hasher(data).digest()
|
| 340 |
+
result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii')
|
| 341 |
+
return hash_kind, result
|
| 342 |
+
|
| 343 |
+
def write_record(self, records, record_path, archive_record_path):
|
| 344 |
+
records = list(records) # make a copy, as mutated
|
| 345 |
+
records.append((archive_record_path, '', ''))
|
| 346 |
+
with CSVWriter(record_path) as writer:
|
| 347 |
+
for row in records:
|
| 348 |
+
writer.writerow(row)
|
| 349 |
+
|
| 350 |
+
def write_records(self, info, libdir, archive_paths):
|
| 351 |
+
records = []
|
| 352 |
+
distinfo, info_dir = info
|
| 353 |
+
# hasher = getattr(hashlib, self.hash_kind)
|
| 354 |
+
for ap, p in archive_paths:
|
| 355 |
+
with open(p, 'rb') as f:
|
| 356 |
+
data = f.read()
|
| 357 |
+
digest = '%s=%s' % self.get_hash(data)
|
| 358 |
+
size = os.path.getsize(p)
|
| 359 |
+
records.append((ap, digest, size))
|
| 360 |
+
|
| 361 |
+
p = os.path.join(distinfo, 'RECORD')
|
| 362 |
+
ap = to_posix(os.path.join(info_dir, 'RECORD'))
|
| 363 |
+
self.write_record(records, p, ap)
|
| 364 |
+
archive_paths.append((ap, p))
|
| 365 |
+
|
| 366 |
+
def build_zip(self, pathname, archive_paths):
|
| 367 |
+
with ZipFile(pathname, 'w', zipfile.ZIP_DEFLATED) as zf:
|
| 368 |
+
for ap, p in archive_paths:
|
| 369 |
+
logger.debug('Wrote %s to %s in wheel', p, ap)
|
| 370 |
+
zf.write(p, ap)
|
| 371 |
+
|
| 372 |
+
def build(self, paths, tags=None, wheel_version=None):
|
| 373 |
+
"""
|
| 374 |
+
Build a wheel from files in specified paths, and use any specified tags
|
| 375 |
+
when determining the name of the wheel.
|
| 376 |
+
"""
|
| 377 |
+
if tags is None:
|
| 378 |
+
tags = {}
|
| 379 |
+
|
| 380 |
+
libkey = list(filter(lambda o: o in paths, ('purelib', 'platlib')))[0]
|
| 381 |
+
if libkey == 'platlib':
|
| 382 |
+
is_pure = 'false'
|
| 383 |
+
default_pyver = [IMPVER]
|
| 384 |
+
default_abi = [ABI]
|
| 385 |
+
default_arch = [ARCH]
|
| 386 |
+
else:
|
| 387 |
+
is_pure = 'true'
|
| 388 |
+
default_pyver = [PYVER]
|
| 389 |
+
default_abi = ['none']
|
| 390 |
+
default_arch = ['any']
|
| 391 |
+
|
| 392 |
+
self.pyver = tags.get('pyver', default_pyver)
|
| 393 |
+
self.abi = tags.get('abi', default_abi)
|
| 394 |
+
self.arch = tags.get('arch', default_arch)
|
| 395 |
+
|
| 396 |
+
libdir = paths[libkey]
|
| 397 |
+
|
| 398 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 399 |
+
data_dir = '%s.data' % name_ver
|
| 400 |
+
info_dir = '%s.dist-info' % name_ver
|
| 401 |
+
|
| 402 |
+
archive_paths = []
|
| 403 |
+
|
| 404 |
+
# First, stuff which is not in site-packages
|
| 405 |
+
for key in ('data', 'headers', 'scripts'):
|
| 406 |
+
if key not in paths:
|
| 407 |
+
continue
|
| 408 |
+
path = paths[key]
|
| 409 |
+
if os.path.isdir(path):
|
| 410 |
+
for root, dirs, files in os.walk(path):
|
| 411 |
+
for fn in files:
|
| 412 |
+
p = fsdecode(os.path.join(root, fn))
|
| 413 |
+
rp = os.path.relpath(p, path)
|
| 414 |
+
ap = to_posix(os.path.join(data_dir, key, rp))
|
| 415 |
+
archive_paths.append((ap, p))
|
| 416 |
+
if key == 'scripts' and not p.endswith('.exe'):
|
| 417 |
+
with open(p, 'rb') as f:
|
| 418 |
+
data = f.read()
|
| 419 |
+
data = self.process_shebang(data)
|
| 420 |
+
with open(p, 'wb') as f:
|
| 421 |
+
f.write(data)
|
| 422 |
+
|
| 423 |
+
# Now, stuff which is in site-packages, other than the
|
| 424 |
+
# distinfo stuff.
|
| 425 |
+
path = libdir
|
| 426 |
+
distinfo = None
|
| 427 |
+
for root, dirs, files in os.walk(path):
|
| 428 |
+
if root == path:
|
| 429 |
+
# At the top level only, save distinfo for later
|
| 430 |
+
# and skip it for now
|
| 431 |
+
for i, dn in enumerate(dirs):
|
| 432 |
+
dn = fsdecode(dn)
|
| 433 |
+
if dn.endswith('.dist-info'):
|
| 434 |
+
distinfo = os.path.join(root, dn)
|
| 435 |
+
del dirs[i]
|
| 436 |
+
break
|
| 437 |
+
assert distinfo, '.dist-info directory expected, not found'
|
| 438 |
+
|
| 439 |
+
for fn in files:
|
| 440 |
+
# comment out next suite to leave .pyc files in
|
| 441 |
+
if fsdecode(fn).endswith(('.pyc', '.pyo')):
|
| 442 |
+
continue
|
| 443 |
+
p = os.path.join(root, fn)
|
| 444 |
+
rp = to_posix(os.path.relpath(p, path))
|
| 445 |
+
archive_paths.append((rp, p))
|
| 446 |
+
|
| 447 |
+
# Now distinfo. Assumed to be flat, i.e. os.listdir is enough.
|
| 448 |
+
files = os.listdir(distinfo)
|
| 449 |
+
for fn in files:
|
| 450 |
+
if fn not in ('RECORD', 'INSTALLER', 'SHARED', 'WHEEL'):
|
| 451 |
+
p = fsdecode(os.path.join(distinfo, fn))
|
| 452 |
+
ap = to_posix(os.path.join(info_dir, fn))
|
| 453 |
+
archive_paths.append((ap, p))
|
| 454 |
+
|
| 455 |
+
wheel_metadata = [
|
| 456 |
+
'Wheel-Version: %d.%d' % (wheel_version or self.wheel_version),
|
| 457 |
+
'Generator: distlib %s' % __version__,
|
| 458 |
+
'Root-Is-Purelib: %s' % is_pure,
|
| 459 |
+
]
|
| 460 |
+
for pyver, abi, arch in self.tags:
|
| 461 |
+
wheel_metadata.append('Tag: %s-%s-%s' % (pyver, abi, arch))
|
| 462 |
+
p = os.path.join(distinfo, 'WHEEL')
|
| 463 |
+
with open(p, 'w') as f:
|
| 464 |
+
f.write('\n'.join(wheel_metadata))
|
| 465 |
+
ap = to_posix(os.path.join(info_dir, 'WHEEL'))
|
| 466 |
+
archive_paths.append((ap, p))
|
| 467 |
+
|
| 468 |
+
# sort the entries by archive path. Not needed by any spec, but it
|
| 469 |
+
# keeps the archive listing and RECORD tidier than they would otherwise
|
| 470 |
+
# be. Use the number of path segments to keep directory entries together,
|
| 471 |
+
# and keep the dist-info stuff at the end.
|
| 472 |
+
def sorter(t):
|
| 473 |
+
ap = t[0]
|
| 474 |
+
n = ap.count('/')
|
| 475 |
+
if '.dist-info' in ap:
|
| 476 |
+
n += 10000
|
| 477 |
+
return (n, ap)
|
| 478 |
+
|
| 479 |
+
archive_paths = sorted(archive_paths, key=sorter)
|
| 480 |
+
|
| 481 |
+
# Now, at last, RECORD.
|
| 482 |
+
# Paths in here are archive paths - nothing else makes sense.
|
| 483 |
+
self.write_records((distinfo, info_dir), libdir, archive_paths)
|
| 484 |
+
# Now, ready to build the zip file
|
| 485 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 486 |
+
self.build_zip(pathname, archive_paths)
|
| 487 |
+
return pathname
|
| 488 |
+
|
| 489 |
+
def skip_entry(self, arcname):
|
| 490 |
+
"""
|
| 491 |
+
Determine whether an archive entry should be skipped when verifying
|
| 492 |
+
or installing.
|
| 493 |
+
"""
|
| 494 |
+
# The signature file won't be in RECORD,
|
| 495 |
+
# and we don't currently don't do anything with it
|
| 496 |
+
# We also skip directories, as they won't be in RECORD
|
| 497 |
+
# either. See:
|
| 498 |
+
#
|
| 499 |
+
# https://github.com/pypa/wheel/issues/294
|
| 500 |
+
# https://github.com/pypa/wheel/issues/287
|
| 501 |
+
# https://github.com/pypa/wheel/pull/289
|
| 502 |
+
#
|
| 503 |
+
return arcname.endswith(('/', '/RECORD.jws'))
|
| 504 |
+
|
| 505 |
+
def install(self, paths, maker, **kwargs):
|
| 506 |
+
"""
|
| 507 |
+
Install a wheel to the specified paths. If kwarg ``warner`` is
|
| 508 |
+
specified, it should be a callable, which will be called with two
|
| 509 |
+
tuples indicating the wheel version of this software and the wheel
|
| 510 |
+
version in the file, if there is a discrepancy in the versions.
|
| 511 |
+
This can be used to issue any warnings to raise any exceptions.
|
| 512 |
+
If kwarg ``lib_only`` is True, only the purelib/platlib files are
|
| 513 |
+
installed, and the headers, scripts, data and dist-info metadata are
|
| 514 |
+
not written. If kwarg ``bytecode_hashed_invalidation`` is True, written
|
| 515 |
+
bytecode will try to use file-hash based invalidation (PEP-552) on
|
| 516 |
+
supported interpreter versions (CPython 2.7+).
|
| 517 |
+
|
| 518 |
+
The return value is a :class:`InstalledDistribution` instance unless
|
| 519 |
+
``options.lib_only`` is True, in which case the return value is ``None``.
|
| 520 |
+
"""
|
| 521 |
+
|
| 522 |
+
dry_run = maker.dry_run
|
| 523 |
+
warner = kwargs.get('warner')
|
| 524 |
+
lib_only = kwargs.get('lib_only', False)
|
| 525 |
+
bc_hashed_invalidation = kwargs.get('bytecode_hashed_invalidation',
|
| 526 |
+
False)
|
| 527 |
+
|
| 528 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 529 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 530 |
+
data_dir = '%s.data' % name_ver
|
| 531 |
+
info_dir = '%s.dist-info' % name_ver
|
| 532 |
+
|
| 533 |
+
metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME)
|
| 534 |
+
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
| 535 |
+
record_name = posixpath.join(info_dir, 'RECORD')
|
| 536 |
+
|
| 537 |
+
wrapper = codecs.getreader('utf-8')
|
| 538 |
+
|
| 539 |
+
with ZipFile(pathname, 'r') as zf:
|
| 540 |
+
with zf.open(wheel_metadata_name) as bwf:
|
| 541 |
+
wf = wrapper(bwf)
|
| 542 |
+
message = message_from_file(wf)
|
| 543 |
+
wv = message['Wheel-Version'].split('.', 1)
|
| 544 |
+
file_version = tuple([int(i) for i in wv])
|
| 545 |
+
if (file_version != self.wheel_version) and warner:
|
| 546 |
+
warner(self.wheel_version, file_version)
|
| 547 |
+
|
| 548 |
+
if message['Root-Is-Purelib'] == 'true':
|
| 549 |
+
libdir = paths['purelib']
|
| 550 |
+
else:
|
| 551 |
+
libdir = paths['platlib']
|
| 552 |
+
|
| 553 |
+
records = {}
|
| 554 |
+
with zf.open(record_name) as bf:
|
| 555 |
+
with CSVReader(stream=bf) as reader:
|
| 556 |
+
for row in reader:
|
| 557 |
+
p = row[0]
|
| 558 |
+
records[p] = row
|
| 559 |
+
|
| 560 |
+
data_pfx = posixpath.join(data_dir, '')
|
| 561 |
+
info_pfx = posixpath.join(info_dir, '')
|
| 562 |
+
script_pfx = posixpath.join(data_dir, 'scripts', '')
|
| 563 |
+
|
| 564 |
+
# make a new instance rather than a copy of maker's,
|
| 565 |
+
# as we mutate it
|
| 566 |
+
fileop = FileOperator(dry_run=dry_run)
|
| 567 |
+
fileop.record = True # so we can rollback if needed
|
| 568 |
+
|
| 569 |
+
bc = not sys.dont_write_bytecode # Double negatives. Lovely!
|
| 570 |
+
|
| 571 |
+
outfiles = [] # for RECORD writing
|
| 572 |
+
|
| 573 |
+
# for script copying/shebang processing
|
| 574 |
+
workdir = tempfile.mkdtemp()
|
| 575 |
+
# set target dir later
|
| 576 |
+
# we default add_launchers to False, as the
|
| 577 |
+
# Python Launcher should be used instead
|
| 578 |
+
maker.source_dir = workdir
|
| 579 |
+
maker.target_dir = None
|
| 580 |
+
try:
|
| 581 |
+
for zinfo in zf.infolist():
|
| 582 |
+
arcname = zinfo.filename
|
| 583 |
+
if isinstance(arcname, text_type):
|
| 584 |
+
u_arcname = arcname
|
| 585 |
+
else:
|
| 586 |
+
u_arcname = arcname.decode('utf-8')
|
| 587 |
+
if self.skip_entry(u_arcname):
|
| 588 |
+
continue
|
| 589 |
+
row = records[u_arcname]
|
| 590 |
+
if row[2] and str(zinfo.file_size) != row[2]:
|
| 591 |
+
raise DistlibException('size mismatch for '
|
| 592 |
+
'%s' % u_arcname)
|
| 593 |
+
if row[1]:
|
| 594 |
+
kind, value = row[1].split('=', 1)
|
| 595 |
+
with zf.open(arcname) as bf:
|
| 596 |
+
data = bf.read()
|
| 597 |
+
_, digest = self.get_hash(data, kind)
|
| 598 |
+
if digest != value:
|
| 599 |
+
raise DistlibException('digest mismatch for '
|
| 600 |
+
'%s' % arcname)
|
| 601 |
+
|
| 602 |
+
if lib_only and u_arcname.startswith((info_pfx, data_pfx)):
|
| 603 |
+
logger.debug('lib_only: skipping %s', u_arcname)
|
| 604 |
+
continue
|
| 605 |
+
is_script = (u_arcname.startswith(script_pfx)
|
| 606 |
+
and not u_arcname.endswith('.exe'))
|
| 607 |
+
|
| 608 |
+
if u_arcname.startswith(data_pfx):
|
| 609 |
+
_, where, rp = u_arcname.split('/', 2)
|
| 610 |
+
outfile = os.path.join(paths[where], convert_path(rp))
|
| 611 |
+
else:
|
| 612 |
+
# meant for site-packages.
|
| 613 |
+
if u_arcname in (wheel_metadata_name, record_name):
|
| 614 |
+
continue
|
| 615 |
+
outfile = os.path.join(libdir, convert_path(u_arcname))
|
| 616 |
+
if not is_script:
|
| 617 |
+
with zf.open(arcname) as bf:
|
| 618 |
+
fileop.copy_stream(bf, outfile)
|
| 619 |
+
# Issue #147: permission bits aren't preserved. Using
|
| 620 |
+
# zf.extract(zinfo, libdir) should have worked, but didn't,
|
| 621 |
+
# see https://www.thetopsites.net/article/53834422.shtml
|
| 622 |
+
# So ... manually preserve permission bits as given in zinfo
|
| 623 |
+
if os.name == 'posix':
|
| 624 |
+
# just set the normal permission bits
|
| 625 |
+
os.chmod(outfile,
|
| 626 |
+
(zinfo.external_attr >> 16) & 0x1FF)
|
| 627 |
+
outfiles.append(outfile)
|
| 628 |
+
# Double check the digest of the written file
|
| 629 |
+
if not dry_run and row[1]:
|
| 630 |
+
with open(outfile, 'rb') as bf:
|
| 631 |
+
data = bf.read()
|
| 632 |
+
_, newdigest = self.get_hash(data, kind)
|
| 633 |
+
if newdigest != digest:
|
| 634 |
+
raise DistlibException('digest mismatch '
|
| 635 |
+
'on write for '
|
| 636 |
+
'%s' % outfile)
|
| 637 |
+
if bc and outfile.endswith('.py'):
|
| 638 |
+
try:
|
| 639 |
+
pyc = fileop.byte_compile(
|
| 640 |
+
outfile,
|
| 641 |
+
hashed_invalidation=bc_hashed_invalidation)
|
| 642 |
+
outfiles.append(pyc)
|
| 643 |
+
except Exception:
|
| 644 |
+
# Don't give up if byte-compilation fails,
|
| 645 |
+
# but log it and perhaps warn the user
|
| 646 |
+
logger.warning('Byte-compilation failed',
|
| 647 |
+
exc_info=True)
|
| 648 |
+
else:
|
| 649 |
+
fn = os.path.basename(convert_path(arcname))
|
| 650 |
+
workname = os.path.join(workdir, fn)
|
| 651 |
+
with zf.open(arcname) as bf:
|
| 652 |
+
fileop.copy_stream(bf, workname)
|
| 653 |
+
|
| 654 |
+
dn, fn = os.path.split(outfile)
|
| 655 |
+
maker.target_dir = dn
|
| 656 |
+
filenames = maker.make(fn)
|
| 657 |
+
fileop.set_executable_mode(filenames)
|
| 658 |
+
outfiles.extend(filenames)
|
| 659 |
+
|
| 660 |
+
if lib_only:
|
| 661 |
+
logger.debug('lib_only: returning None')
|
| 662 |
+
dist = None
|
| 663 |
+
else:
|
| 664 |
+
# Generate scripts
|
| 665 |
+
|
| 666 |
+
# Try to get pydist.json so we can see if there are
|
| 667 |
+
# any commands to generate. If this fails (e.g. because
|
| 668 |
+
# of a legacy wheel), log a warning but don't give up.
|
| 669 |
+
commands = None
|
| 670 |
+
file_version = self.info['Wheel-Version']
|
| 671 |
+
if file_version == '1.0':
|
| 672 |
+
# Use legacy info
|
| 673 |
+
ep = posixpath.join(info_dir, 'entry_points.txt')
|
| 674 |
+
try:
|
| 675 |
+
with zf.open(ep) as bwf:
|
| 676 |
+
epdata = read_exports(bwf)
|
| 677 |
+
commands = {}
|
| 678 |
+
for key in ('console', 'gui'):
|
| 679 |
+
k = '%s_scripts' % key
|
| 680 |
+
if k in epdata:
|
| 681 |
+
commands['wrap_%s' % key] = d = {}
|
| 682 |
+
for v in epdata[k].values():
|
| 683 |
+
s = '%s:%s' % (v.prefix, v.suffix)
|
| 684 |
+
if v.flags:
|
| 685 |
+
s += ' [%s]' % ','.join(v.flags)
|
| 686 |
+
d[v.name] = s
|
| 687 |
+
except Exception:
|
| 688 |
+
logger.warning('Unable to read legacy script '
|
| 689 |
+
'metadata, so cannot generate '
|
| 690 |
+
'scripts')
|
| 691 |
+
else:
|
| 692 |
+
try:
|
| 693 |
+
with zf.open(metadata_name) as bwf:
|
| 694 |
+
wf = wrapper(bwf)
|
| 695 |
+
commands = json.load(wf).get('extensions')
|
| 696 |
+
if commands:
|
| 697 |
+
commands = commands.get('python.commands')
|
| 698 |
+
except Exception:
|
| 699 |
+
logger.warning('Unable to read JSON metadata, so '
|
| 700 |
+
'cannot generate scripts')
|
| 701 |
+
if commands:
|
| 702 |
+
console_scripts = commands.get('wrap_console', {})
|
| 703 |
+
gui_scripts = commands.get('wrap_gui', {})
|
| 704 |
+
if console_scripts or gui_scripts:
|
| 705 |
+
script_dir = paths.get('scripts', '')
|
| 706 |
+
if not os.path.isdir(script_dir):
|
| 707 |
+
raise ValueError('Valid script path not '
|
| 708 |
+
'specified')
|
| 709 |
+
maker.target_dir = script_dir
|
| 710 |
+
for k, v in console_scripts.items():
|
| 711 |
+
script = '%s = %s' % (k, v)
|
| 712 |
+
filenames = maker.make(script)
|
| 713 |
+
fileop.set_executable_mode(filenames)
|
| 714 |
+
|
| 715 |
+
if gui_scripts:
|
| 716 |
+
options = {'gui': True}
|
| 717 |
+
for k, v in gui_scripts.items():
|
| 718 |
+
script = '%s = %s' % (k, v)
|
| 719 |
+
filenames = maker.make(script, options)
|
| 720 |
+
fileop.set_executable_mode(filenames)
|
| 721 |
+
|
| 722 |
+
p = os.path.join(libdir, info_dir)
|
| 723 |
+
dist = InstalledDistribution(p)
|
| 724 |
+
|
| 725 |
+
# Write SHARED
|
| 726 |
+
paths = dict(paths) # don't change passed in dict
|
| 727 |
+
del paths['purelib']
|
| 728 |
+
del paths['platlib']
|
| 729 |
+
paths['lib'] = libdir
|
| 730 |
+
p = dist.write_shared_locations(paths, dry_run)
|
| 731 |
+
if p:
|
| 732 |
+
outfiles.append(p)
|
| 733 |
+
|
| 734 |
+
# Write RECORD
|
| 735 |
+
dist.write_installed_files(outfiles, paths['prefix'],
|
| 736 |
+
dry_run)
|
| 737 |
+
return dist
|
| 738 |
+
except Exception: # pragma: no cover
|
| 739 |
+
logger.exception('installation failed.')
|
| 740 |
+
fileop.rollback()
|
| 741 |
+
raise
|
| 742 |
+
finally:
|
| 743 |
+
shutil.rmtree(workdir)
|
| 744 |
+
|
| 745 |
+
def _get_dylib_cache(self):
|
| 746 |
+
global cache
|
| 747 |
+
if cache is None:
|
| 748 |
+
# Use native string to avoid issues on 2.x: see Python #20140.
|
| 749 |
+
base = os.path.join(get_cache_base(), str('dylib-cache'),
|
| 750 |
+
'%s.%s' % sys.version_info[:2])
|
| 751 |
+
cache = Cache(base)
|
| 752 |
+
return cache
|
| 753 |
+
|
| 754 |
+
def _get_extensions(self):
|
| 755 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 756 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 757 |
+
info_dir = '%s.dist-info' % name_ver
|
| 758 |
+
arcname = posixpath.join(info_dir, 'EXTENSIONS')
|
| 759 |
+
wrapper = codecs.getreader('utf-8')
|
| 760 |
+
result = []
|
| 761 |
+
with ZipFile(pathname, 'r') as zf:
|
| 762 |
+
try:
|
| 763 |
+
with zf.open(arcname) as bf:
|
| 764 |
+
wf = wrapper(bf)
|
| 765 |
+
extensions = json.load(wf)
|
| 766 |
+
cache = self._get_dylib_cache()
|
| 767 |
+
prefix = cache.prefix_to_dir(pathname)
|
| 768 |
+
cache_base = os.path.join(cache.base, prefix)
|
| 769 |
+
if not os.path.isdir(cache_base):
|
| 770 |
+
os.makedirs(cache_base)
|
| 771 |
+
for name, relpath in extensions.items():
|
| 772 |
+
dest = os.path.join(cache_base, convert_path(relpath))
|
| 773 |
+
if not os.path.exists(dest):
|
| 774 |
+
extract = True
|
| 775 |
+
else:
|
| 776 |
+
file_time = os.stat(dest).st_mtime
|
| 777 |
+
file_time = datetime.datetime.fromtimestamp(
|
| 778 |
+
file_time)
|
| 779 |
+
info = zf.getinfo(relpath)
|
| 780 |
+
wheel_time = datetime.datetime(*info.date_time)
|
| 781 |
+
extract = wheel_time > file_time
|
| 782 |
+
if extract:
|
| 783 |
+
zf.extract(relpath, cache_base)
|
| 784 |
+
result.append((name, dest))
|
| 785 |
+
except KeyError:
|
| 786 |
+
pass
|
| 787 |
+
return result
|
| 788 |
+
|
| 789 |
+
def is_compatible(self):
|
| 790 |
+
"""
|
| 791 |
+
Determine if a wheel is compatible with the running system.
|
| 792 |
+
"""
|
| 793 |
+
return is_compatible(self)
|
| 794 |
+
|
| 795 |
+
def is_mountable(self):
|
| 796 |
+
"""
|
| 797 |
+
Determine if a wheel is asserted as mountable by its metadata.
|
| 798 |
+
"""
|
| 799 |
+
return True # for now - metadata details TBD
|
| 800 |
+
|
| 801 |
+
def mount(self, append=False):
|
| 802 |
+
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
| 803 |
+
if not self.is_compatible():
|
| 804 |
+
msg = 'Wheel %s not compatible with this Python.' % pathname
|
| 805 |
+
raise DistlibException(msg)
|
| 806 |
+
if not self.is_mountable():
|
| 807 |
+
msg = 'Wheel %s is marked as not mountable.' % pathname
|
| 808 |
+
raise DistlibException(msg)
|
| 809 |
+
if pathname in sys.path:
|
| 810 |
+
logger.debug('%s already in path', pathname)
|
| 811 |
+
else:
|
| 812 |
+
if append:
|
| 813 |
+
sys.path.append(pathname)
|
| 814 |
+
else:
|
| 815 |
+
sys.path.insert(0, pathname)
|
| 816 |
+
extensions = self._get_extensions()
|
| 817 |
+
if extensions:
|
| 818 |
+
if _hook not in sys.meta_path:
|
| 819 |
+
sys.meta_path.append(_hook)
|
| 820 |
+
_hook.add(pathname, extensions)
|
| 821 |
+
|
| 822 |
+
def unmount(self):
|
| 823 |
+
pathname = os.path.abspath(os.path.join(self.dirname, self.filename))
|
| 824 |
+
if pathname not in sys.path:
|
| 825 |
+
logger.debug('%s not in path', pathname)
|
| 826 |
+
else:
|
| 827 |
+
sys.path.remove(pathname)
|
| 828 |
+
if pathname in _hook.impure_wheels:
|
| 829 |
+
_hook.remove(pathname)
|
| 830 |
+
if not _hook.impure_wheels:
|
| 831 |
+
if _hook in sys.meta_path:
|
| 832 |
+
sys.meta_path.remove(_hook)
|
| 833 |
+
|
| 834 |
+
def verify(self):
|
| 835 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 836 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 837 |
+
# data_dir = '%s.data' % name_ver
|
| 838 |
+
info_dir = '%s.dist-info' % name_ver
|
| 839 |
+
|
| 840 |
+
# metadata_name = posixpath.join(info_dir, LEGACY_METADATA_FILENAME)
|
| 841 |
+
wheel_metadata_name = posixpath.join(info_dir, 'WHEEL')
|
| 842 |
+
record_name = posixpath.join(info_dir, 'RECORD')
|
| 843 |
+
|
| 844 |
+
wrapper = codecs.getreader('utf-8')
|
| 845 |
+
|
| 846 |
+
with ZipFile(pathname, 'r') as zf:
|
| 847 |
+
with zf.open(wheel_metadata_name) as bwf:
|
| 848 |
+
wf = wrapper(bwf)
|
| 849 |
+
message_from_file(wf)
|
| 850 |
+
# wv = message['Wheel-Version'].split('.', 1)
|
| 851 |
+
# file_version = tuple([int(i) for i in wv])
|
| 852 |
+
# TODO version verification
|
| 853 |
+
|
| 854 |
+
records = {}
|
| 855 |
+
with zf.open(record_name) as bf:
|
| 856 |
+
with CSVReader(stream=bf) as reader:
|
| 857 |
+
for row in reader:
|
| 858 |
+
p = row[0]
|
| 859 |
+
records[p] = row
|
| 860 |
+
|
| 861 |
+
for zinfo in zf.infolist():
|
| 862 |
+
arcname = zinfo.filename
|
| 863 |
+
if isinstance(arcname, text_type):
|
| 864 |
+
u_arcname = arcname
|
| 865 |
+
else:
|
| 866 |
+
u_arcname = arcname.decode('utf-8')
|
| 867 |
+
# See issue #115: some wheels have .. in their entries, but
|
| 868 |
+
# in the filename ... e.g. __main__..py ! So the check is
|
| 869 |
+
# updated to look for .. in the directory portions
|
| 870 |
+
p = u_arcname.split('/')
|
| 871 |
+
if '..' in p:
|
| 872 |
+
raise DistlibException('invalid entry in '
|
| 873 |
+
'wheel: %r' % u_arcname)
|
| 874 |
+
|
| 875 |
+
if self.skip_entry(u_arcname):
|
| 876 |
+
continue
|
| 877 |
+
row = records[u_arcname]
|
| 878 |
+
if row[2] and str(zinfo.file_size) != row[2]:
|
| 879 |
+
raise DistlibException('size mismatch for '
|
| 880 |
+
'%s' % u_arcname)
|
| 881 |
+
if row[1]:
|
| 882 |
+
kind, value = row[1].split('=', 1)
|
| 883 |
+
with zf.open(arcname) as bf:
|
| 884 |
+
data = bf.read()
|
| 885 |
+
_, digest = self.get_hash(data, kind)
|
| 886 |
+
if digest != value:
|
| 887 |
+
raise DistlibException('digest mismatch for '
|
| 888 |
+
'%s' % arcname)
|
| 889 |
+
|
| 890 |
+
def update(self, modifier, dest_dir=None, **kwargs):
|
| 891 |
+
"""
|
| 892 |
+
Update the contents of a wheel in a generic way. The modifier should
|
| 893 |
+
be a callable which expects a dictionary argument: its keys are
|
| 894 |
+
archive-entry paths, and its values are absolute filesystem paths
|
| 895 |
+
where the contents the corresponding archive entries can be found. The
|
| 896 |
+
modifier is free to change the contents of the files pointed to, add
|
| 897 |
+
new entries and remove entries, before returning. This method will
|
| 898 |
+
extract the entire contents of the wheel to a temporary location, call
|
| 899 |
+
the modifier, and then use the passed (and possibly updated)
|
| 900 |
+
dictionary to write a new wheel. If ``dest_dir`` is specified, the new
|
| 901 |
+
wheel is written there -- otherwise, the original wheel is overwritten.
|
| 902 |
+
|
| 903 |
+
The modifier should return True if it updated the wheel, else False.
|
| 904 |
+
This method returns the same value the modifier returns.
|
| 905 |
+
"""
|
| 906 |
+
|
| 907 |
+
def get_version(path_map, info_dir):
|
| 908 |
+
version = path = None
|
| 909 |
+
key = '%s/%s' % (info_dir, LEGACY_METADATA_FILENAME)
|
| 910 |
+
if key not in path_map:
|
| 911 |
+
key = '%s/PKG-INFO' % info_dir
|
| 912 |
+
if key in path_map:
|
| 913 |
+
path = path_map[key]
|
| 914 |
+
version = Metadata(path=path).version
|
| 915 |
+
return version, path
|
| 916 |
+
|
| 917 |
+
def update_version(version, path):
|
| 918 |
+
updated = None
|
| 919 |
+
try:
|
| 920 |
+
NormalizedVersion(version)
|
| 921 |
+
i = version.find('-')
|
| 922 |
+
if i < 0:
|
| 923 |
+
updated = '%s+1' % version
|
| 924 |
+
else:
|
| 925 |
+
parts = [int(s) for s in version[i + 1:].split('.')]
|
| 926 |
+
parts[-1] += 1
|
| 927 |
+
updated = '%s+%s' % (version[:i], '.'.join(
|
| 928 |
+
str(i) for i in parts))
|
| 929 |
+
except UnsupportedVersionError:
|
| 930 |
+
logger.debug(
|
| 931 |
+
'Cannot update non-compliant (PEP-440) '
|
| 932 |
+
'version %r', version)
|
| 933 |
+
if updated:
|
| 934 |
+
md = Metadata(path=path)
|
| 935 |
+
md.version = updated
|
| 936 |
+
legacy = path.endswith(LEGACY_METADATA_FILENAME)
|
| 937 |
+
md.write(path=path, legacy=legacy)
|
| 938 |
+
logger.debug('Version updated from %r to %r', version, updated)
|
| 939 |
+
|
| 940 |
+
pathname = os.path.join(self.dirname, self.filename)
|
| 941 |
+
name_ver = '%s-%s' % (self.name, self.version)
|
| 942 |
+
info_dir = '%s.dist-info' % name_ver
|
| 943 |
+
record_name = posixpath.join(info_dir, 'RECORD')
|
| 944 |
+
with tempdir() as workdir:
|
| 945 |
+
with ZipFile(pathname, 'r') as zf:
|
| 946 |
+
path_map = {}
|
| 947 |
+
for zinfo in zf.infolist():
|
| 948 |
+
arcname = zinfo.filename
|
| 949 |
+
if isinstance(arcname, text_type):
|
| 950 |
+
u_arcname = arcname
|
| 951 |
+
else:
|
| 952 |
+
u_arcname = arcname.decode('utf-8')
|
| 953 |
+
if u_arcname == record_name:
|
| 954 |
+
continue
|
| 955 |
+
if '..' in u_arcname:
|
| 956 |
+
raise DistlibException('invalid entry in '
|
| 957 |
+
'wheel: %r' % u_arcname)
|
| 958 |
+
zf.extract(zinfo, workdir)
|
| 959 |
+
path = os.path.join(workdir, convert_path(u_arcname))
|
| 960 |
+
path_map[u_arcname] = path
|
| 961 |
+
|
| 962 |
+
# Remember the version.
|
| 963 |
+
original_version, _ = get_version(path_map, info_dir)
|
| 964 |
+
# Files extracted. Call the modifier.
|
| 965 |
+
modified = modifier(path_map, **kwargs)
|
| 966 |
+
if modified:
|
| 967 |
+
# Something changed - need to build a new wheel.
|
| 968 |
+
current_version, path = get_version(path_map, info_dir)
|
| 969 |
+
if current_version and (current_version == original_version):
|
| 970 |
+
# Add or update local version to signify changes.
|
| 971 |
+
update_version(current_version, path)
|
| 972 |
+
# Decide where the new wheel goes.
|
| 973 |
+
if dest_dir is None:
|
| 974 |
+
fd, newpath = tempfile.mkstemp(suffix='.whl',
|
| 975 |
+
prefix='wheel-update-',
|
| 976 |
+
dir=workdir)
|
| 977 |
+
os.close(fd)
|
| 978 |
+
else:
|
| 979 |
+
if not os.path.isdir(dest_dir):
|
| 980 |
+
raise DistlibException('Not a directory: %r' %
|
| 981 |
+
dest_dir)
|
| 982 |
+
newpath = os.path.join(dest_dir, self.filename)
|
| 983 |
+
archive_paths = list(path_map.items())
|
| 984 |
+
distinfo = os.path.join(workdir, info_dir)
|
| 985 |
+
info = distinfo, info_dir
|
| 986 |
+
self.write_records(info, workdir, archive_paths)
|
| 987 |
+
self.build_zip(newpath, archive_paths)
|
| 988 |
+
if dest_dir is None:
|
| 989 |
+
shutil.copyfile(newpath, pathname)
|
| 990 |
+
return modified
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
def _get_glibc_version():
|
| 994 |
+
import platform
|
| 995 |
+
ver = platform.libc_ver()
|
| 996 |
+
result = []
|
| 997 |
+
if ver[0] == 'glibc':
|
| 998 |
+
for s in ver[1].split('.'):
|
| 999 |
+
result.append(int(s) if s.isdigit() else 0)
|
| 1000 |
+
result = tuple(result)
|
| 1001 |
+
return result
|
| 1002 |
+
|
| 1003 |
+
|
| 1004 |
+
def compatible_tags():
|
| 1005 |
+
"""
|
| 1006 |
+
Return (pyver, abi, arch) tuples compatible with this Python.
|
| 1007 |
+
"""
|
| 1008 |
+
versions = [VER_SUFFIX]
|
| 1009 |
+
major = VER_SUFFIX[0]
|
| 1010 |
+
for minor in range(sys.version_info[1] - 1, -1, -1):
|
| 1011 |
+
versions.append(''.join([major, str(minor)]))
|
| 1012 |
+
|
| 1013 |
+
abis = []
|
| 1014 |
+
for suffix in _get_suffixes():
|
| 1015 |
+
if suffix.startswith('.abi'):
|
| 1016 |
+
abis.append(suffix.split('.', 2)[1])
|
| 1017 |
+
abis.sort()
|
| 1018 |
+
if ABI != 'none':
|
| 1019 |
+
abis.insert(0, ABI)
|
| 1020 |
+
abis.append('none')
|
| 1021 |
+
result = []
|
| 1022 |
+
|
| 1023 |
+
arches = [ARCH]
|
| 1024 |
+
if sys.platform == 'darwin':
|
| 1025 |
+
m = re.match(r'(\w+)_(\d+)_(\d+)_(\w+)$', ARCH)
|
| 1026 |
+
if m:
|
| 1027 |
+
name, major, minor, arch = m.groups()
|
| 1028 |
+
minor = int(minor)
|
| 1029 |
+
matches = [arch]
|
| 1030 |
+
if arch in ('i386', 'ppc'):
|
| 1031 |
+
matches.append('fat')
|
| 1032 |
+
if arch in ('i386', 'ppc', 'x86_64'):
|
| 1033 |
+
matches.append('fat3')
|
| 1034 |
+
if arch in ('ppc64', 'x86_64'):
|
| 1035 |
+
matches.append('fat64')
|
| 1036 |
+
if arch in ('i386', 'x86_64'):
|
| 1037 |
+
matches.append('intel')
|
| 1038 |
+
if arch in ('i386', 'x86_64', 'intel', 'ppc', 'ppc64'):
|
| 1039 |
+
matches.append('universal')
|
| 1040 |
+
while minor >= 0:
|
| 1041 |
+
for match in matches:
|
| 1042 |
+
s = '%s_%s_%s_%s' % (name, major, minor, match)
|
| 1043 |
+
if s != ARCH: # already there
|
| 1044 |
+
arches.append(s)
|
| 1045 |
+
minor -= 1
|
| 1046 |
+
|
| 1047 |
+
# Most specific - our Python version, ABI and arch
|
| 1048 |
+
for abi in abis:
|
| 1049 |
+
for arch in arches:
|
| 1050 |
+
result.append((''.join((IMP_PREFIX, versions[0])), abi, arch))
|
| 1051 |
+
# manylinux
|
| 1052 |
+
if abi != 'none' and sys.platform.startswith('linux'):
|
| 1053 |
+
arch = arch.replace('linux_', '')
|
| 1054 |
+
parts = _get_glibc_version()
|
| 1055 |
+
if len(parts) == 2:
|
| 1056 |
+
if parts >= (2, 5):
|
| 1057 |
+
result.append((''.join((IMP_PREFIX, versions[0])), abi,
|
| 1058 |
+
'manylinux1_%s' % arch))
|
| 1059 |
+
if parts >= (2, 12):
|
| 1060 |
+
result.append((''.join((IMP_PREFIX, versions[0])), abi,
|
| 1061 |
+
'manylinux2010_%s' % arch))
|
| 1062 |
+
if parts >= (2, 17):
|
| 1063 |
+
result.append((''.join((IMP_PREFIX, versions[0])), abi,
|
| 1064 |
+
'manylinux2014_%s' % arch))
|
| 1065 |
+
result.append(
|
| 1066 |
+
(''.join((IMP_PREFIX, versions[0])), abi,
|
| 1067 |
+
'manylinux_%s_%s_%s' % (parts[0], parts[1], arch)))
|
| 1068 |
+
|
| 1069 |
+
# where no ABI / arch dependency, but IMP_PREFIX dependency
|
| 1070 |
+
for i, version in enumerate(versions):
|
| 1071 |
+
result.append((''.join((IMP_PREFIX, version)), 'none', 'any'))
|
| 1072 |
+
if i == 0:
|
| 1073 |
+
result.append((''.join((IMP_PREFIX, version[0])), 'none', 'any'))
|
| 1074 |
+
|
| 1075 |
+
# no IMP_PREFIX, ABI or arch dependency
|
| 1076 |
+
for i, version in enumerate(versions):
|
| 1077 |
+
result.append((''.join(('py', version)), 'none', 'any'))
|
| 1078 |
+
if i == 0:
|
| 1079 |
+
result.append((''.join(('py', version[0])), 'none', 'any'))
|
| 1080 |
+
|
| 1081 |
+
return set(result)
|
| 1082 |
+
|
| 1083 |
+
|
| 1084 |
+
COMPATIBLE_TAGS = compatible_tags()
|
| 1085 |
+
|
| 1086 |
+
del compatible_tags
|
| 1087 |
+
|
| 1088 |
+
|
| 1089 |
+
def is_compatible(wheel, tags=None):
|
| 1090 |
+
if not isinstance(wheel, Wheel):
|
| 1091 |
+
wheel = Wheel(wheel) # assume it's a filename
|
| 1092 |
+
result = False
|
| 1093 |
+
if tags is None:
|
| 1094 |
+
tags = COMPATIBLE_TAGS
|
| 1095 |
+
for ver, abi, arch in tags:
|
| 1096 |
+
if ver in wheel.pyver and abi in wheel.abi and arch in wheel.arch:
|
| 1097 |
+
result = True
|
| 1098 |
+
break
|
| 1099 |
+
return result
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/__init__.py
ADDED
|
@@ -0,0 +1,322 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# module pyparsing.py
|
| 2 |
+
#
|
| 3 |
+
# Copyright (c) 2003-2022 Paul T. McGuire
|
| 4 |
+
#
|
| 5 |
+
# Permission is hereby granted, free of charge, to any person obtaining
|
| 6 |
+
# a copy of this software and associated documentation files (the
|
| 7 |
+
# "Software"), to deal in the Software without restriction, including
|
| 8 |
+
# without limitation the rights to use, copy, modify, merge, publish,
|
| 9 |
+
# distribute, sublicense, and/or sell copies of the Software, and to
|
| 10 |
+
# permit persons to whom the Software is furnished to do so, subject to
|
| 11 |
+
# the following conditions:
|
| 12 |
+
#
|
| 13 |
+
# The above copyright notice and this permission notice shall be
|
| 14 |
+
# included in all copies or substantial portions of the Software.
|
| 15 |
+
#
|
| 16 |
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
| 17 |
+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
| 18 |
+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
| 19 |
+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
| 20 |
+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
| 21 |
+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
| 22 |
+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 23 |
+
#
|
| 24 |
+
|
| 25 |
+
__doc__ = """
|
| 26 |
+
pyparsing module - Classes and methods to define and execute parsing grammars
|
| 27 |
+
=============================================================================
|
| 28 |
+
|
| 29 |
+
The pyparsing module is an alternative approach to creating and
|
| 30 |
+
executing simple grammars, vs. the traditional lex/yacc approach, or the
|
| 31 |
+
use of regular expressions. With pyparsing, you don't need to learn
|
| 32 |
+
a new syntax for defining grammars or matching expressions - the parsing
|
| 33 |
+
module provides a library of classes that you use to construct the
|
| 34 |
+
grammar directly in Python.
|
| 35 |
+
|
| 36 |
+
Here is a program to parse "Hello, World!" (or any greeting of the form
|
| 37 |
+
``"<salutation>, <addressee>!"``), built up using :class:`Word`,
|
| 38 |
+
:class:`Literal`, and :class:`And` elements
|
| 39 |
+
(the :meth:`'+'<ParserElement.__add__>` operators create :class:`And` expressions,
|
| 40 |
+
and the strings are auto-converted to :class:`Literal` expressions)::
|
| 41 |
+
|
| 42 |
+
from pip._vendor.pyparsing import Word, alphas
|
| 43 |
+
|
| 44 |
+
# define grammar of a greeting
|
| 45 |
+
greet = Word(alphas) + "," + Word(alphas) + "!"
|
| 46 |
+
|
| 47 |
+
hello = "Hello, World!"
|
| 48 |
+
print(hello, "->", greet.parse_string(hello))
|
| 49 |
+
|
| 50 |
+
The program outputs the following::
|
| 51 |
+
|
| 52 |
+
Hello, World! -> ['Hello', ',', 'World', '!']
|
| 53 |
+
|
| 54 |
+
The Python representation of the grammar is quite readable, owing to the
|
| 55 |
+
self-explanatory class names, and the use of :class:`'+'<And>`,
|
| 56 |
+
:class:`'|'<MatchFirst>`, :class:`'^'<Or>` and :class:`'&'<Each>` operators.
|
| 57 |
+
|
| 58 |
+
The :class:`ParseResults` object returned from
|
| 59 |
+
:class:`ParserElement.parse_string` can be
|
| 60 |
+
accessed as a nested list, a dictionary, or an object with named
|
| 61 |
+
attributes.
|
| 62 |
+
|
| 63 |
+
The pyparsing module handles some of the problems that are typically
|
| 64 |
+
vexing when writing text parsers:
|
| 65 |
+
|
| 66 |
+
- extra or missing whitespace (the above program will also handle
|
| 67 |
+
"Hello,World!", "Hello , World !", etc.)
|
| 68 |
+
- quoted strings
|
| 69 |
+
- embedded comments
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
Getting Started -
|
| 73 |
+
-----------------
|
| 74 |
+
Visit the classes :class:`ParserElement` and :class:`ParseResults` to
|
| 75 |
+
see the base classes that most other pyparsing
|
| 76 |
+
classes inherit from. Use the docstrings for examples of how to:
|
| 77 |
+
|
| 78 |
+
- construct literal match expressions from :class:`Literal` and
|
| 79 |
+
:class:`CaselessLiteral` classes
|
| 80 |
+
- construct character word-group expressions using the :class:`Word`
|
| 81 |
+
class
|
| 82 |
+
- see how to create repetitive expressions using :class:`ZeroOrMore`
|
| 83 |
+
and :class:`OneOrMore` classes
|
| 84 |
+
- use :class:`'+'<And>`, :class:`'|'<MatchFirst>`, :class:`'^'<Or>`,
|
| 85 |
+
and :class:`'&'<Each>` operators to combine simple expressions into
|
| 86 |
+
more complex ones
|
| 87 |
+
- associate names with your parsed results using
|
| 88 |
+
:class:`ParserElement.set_results_name`
|
| 89 |
+
- access the parsed data, which is returned as a :class:`ParseResults`
|
| 90 |
+
object
|
| 91 |
+
- find some helpful expression short-cuts like :class:`DelimitedList`
|
| 92 |
+
and :class:`one_of`
|
| 93 |
+
- find more useful common expressions in the :class:`pyparsing_common`
|
| 94 |
+
namespace class
|
| 95 |
+
"""
|
| 96 |
+
from typing import NamedTuple
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
class version_info(NamedTuple):
|
| 100 |
+
major: int
|
| 101 |
+
minor: int
|
| 102 |
+
micro: int
|
| 103 |
+
releaselevel: str
|
| 104 |
+
serial: int
|
| 105 |
+
|
| 106 |
+
@property
|
| 107 |
+
def __version__(self):
|
| 108 |
+
return (
|
| 109 |
+
f"{self.major}.{self.minor}.{self.micro}"
|
| 110 |
+
+ (
|
| 111 |
+
f"{'r' if self.releaselevel[0] == 'c' else ''}{self.releaselevel[0]}{self.serial}",
|
| 112 |
+
"",
|
| 113 |
+
)[self.releaselevel == "final"]
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
def __str__(self):
|
| 117 |
+
return f"{__name__} {self.__version__} / {__version_time__}"
|
| 118 |
+
|
| 119 |
+
def __repr__(self):
|
| 120 |
+
return f"{__name__}.{type(self).__name__}({', '.join('{}={!r}'.format(*nv) for nv in zip(self._fields, self))})"
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
__version_info__ = version_info(3, 1, 0, "final", 1)
|
| 124 |
+
__version_time__ = "18 Jun 2023 14:05 UTC"
|
| 125 |
+
__version__ = __version_info__.__version__
|
| 126 |
+
__versionTime__ = __version_time__
|
| 127 |
+
__author__ = "Paul McGuire <ptmcg.gm+pyparsing@gmail.com>"
|
| 128 |
+
|
| 129 |
+
from .util import *
|
| 130 |
+
from .exceptions import *
|
| 131 |
+
from .actions import *
|
| 132 |
+
from .core import __diag__, __compat__
|
| 133 |
+
from .results import *
|
| 134 |
+
from .core import * # type: ignore[misc, assignment]
|
| 135 |
+
from .core import _builtin_exprs as core_builtin_exprs
|
| 136 |
+
from .helpers import * # type: ignore[misc, assignment]
|
| 137 |
+
from .helpers import _builtin_exprs as helper_builtin_exprs
|
| 138 |
+
|
| 139 |
+
from .unicode import unicode_set, UnicodeRangeList, pyparsing_unicode as unicode
|
| 140 |
+
from .testing import pyparsing_test as testing
|
| 141 |
+
from .common import (
|
| 142 |
+
pyparsing_common as common,
|
| 143 |
+
_builtin_exprs as common_builtin_exprs,
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
# define backward compat synonyms
|
| 147 |
+
if "pyparsing_unicode" not in globals():
|
| 148 |
+
pyparsing_unicode = unicode # type: ignore[misc]
|
| 149 |
+
if "pyparsing_common" not in globals():
|
| 150 |
+
pyparsing_common = common # type: ignore[misc]
|
| 151 |
+
if "pyparsing_test" not in globals():
|
| 152 |
+
pyparsing_test = testing # type: ignore[misc]
|
| 153 |
+
|
| 154 |
+
core_builtin_exprs += common_builtin_exprs + helper_builtin_exprs
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
__all__ = [
|
| 158 |
+
"__version__",
|
| 159 |
+
"__version_time__",
|
| 160 |
+
"__author__",
|
| 161 |
+
"__compat__",
|
| 162 |
+
"__diag__",
|
| 163 |
+
"And",
|
| 164 |
+
"AtLineStart",
|
| 165 |
+
"AtStringStart",
|
| 166 |
+
"CaselessKeyword",
|
| 167 |
+
"CaselessLiteral",
|
| 168 |
+
"CharsNotIn",
|
| 169 |
+
"CloseMatch",
|
| 170 |
+
"Combine",
|
| 171 |
+
"DelimitedList",
|
| 172 |
+
"Dict",
|
| 173 |
+
"Each",
|
| 174 |
+
"Empty",
|
| 175 |
+
"FollowedBy",
|
| 176 |
+
"Forward",
|
| 177 |
+
"GoToColumn",
|
| 178 |
+
"Group",
|
| 179 |
+
"IndentedBlock",
|
| 180 |
+
"Keyword",
|
| 181 |
+
"LineEnd",
|
| 182 |
+
"LineStart",
|
| 183 |
+
"Literal",
|
| 184 |
+
"Located",
|
| 185 |
+
"PrecededBy",
|
| 186 |
+
"MatchFirst",
|
| 187 |
+
"NoMatch",
|
| 188 |
+
"NotAny",
|
| 189 |
+
"OneOrMore",
|
| 190 |
+
"OnlyOnce",
|
| 191 |
+
"OpAssoc",
|
| 192 |
+
"Opt",
|
| 193 |
+
"Optional",
|
| 194 |
+
"Or",
|
| 195 |
+
"ParseBaseException",
|
| 196 |
+
"ParseElementEnhance",
|
| 197 |
+
"ParseException",
|
| 198 |
+
"ParseExpression",
|
| 199 |
+
"ParseFatalException",
|
| 200 |
+
"ParseResults",
|
| 201 |
+
"ParseSyntaxException",
|
| 202 |
+
"ParserElement",
|
| 203 |
+
"PositionToken",
|
| 204 |
+
"QuotedString",
|
| 205 |
+
"RecursiveGrammarException",
|
| 206 |
+
"Regex",
|
| 207 |
+
"SkipTo",
|
| 208 |
+
"StringEnd",
|
| 209 |
+
"StringStart",
|
| 210 |
+
"Suppress",
|
| 211 |
+
"Token",
|
| 212 |
+
"TokenConverter",
|
| 213 |
+
"White",
|
| 214 |
+
"Word",
|
| 215 |
+
"WordEnd",
|
| 216 |
+
"WordStart",
|
| 217 |
+
"ZeroOrMore",
|
| 218 |
+
"Char",
|
| 219 |
+
"alphanums",
|
| 220 |
+
"alphas",
|
| 221 |
+
"alphas8bit",
|
| 222 |
+
"any_close_tag",
|
| 223 |
+
"any_open_tag",
|
| 224 |
+
"autoname_elements",
|
| 225 |
+
"c_style_comment",
|
| 226 |
+
"col",
|
| 227 |
+
"common_html_entity",
|
| 228 |
+
"condition_as_parse_action",
|
| 229 |
+
"counted_array",
|
| 230 |
+
"cpp_style_comment",
|
| 231 |
+
"dbl_quoted_string",
|
| 232 |
+
"dbl_slash_comment",
|
| 233 |
+
"delimited_list",
|
| 234 |
+
"dict_of",
|
| 235 |
+
"empty",
|
| 236 |
+
"hexnums",
|
| 237 |
+
"html_comment",
|
| 238 |
+
"identchars",
|
| 239 |
+
"identbodychars",
|
| 240 |
+
"infix_notation",
|
| 241 |
+
"java_style_comment",
|
| 242 |
+
"line",
|
| 243 |
+
"line_end",
|
| 244 |
+
"line_start",
|
| 245 |
+
"lineno",
|
| 246 |
+
"make_html_tags",
|
| 247 |
+
"make_xml_tags",
|
| 248 |
+
"match_only_at_col",
|
| 249 |
+
"match_previous_expr",
|
| 250 |
+
"match_previous_literal",
|
| 251 |
+
"nested_expr",
|
| 252 |
+
"null_debug_action",
|
| 253 |
+
"nums",
|
| 254 |
+
"one_of",
|
| 255 |
+
"original_text_for",
|
| 256 |
+
"printables",
|
| 257 |
+
"punc8bit",
|
| 258 |
+
"pyparsing_common",
|
| 259 |
+
"pyparsing_test",
|
| 260 |
+
"pyparsing_unicode",
|
| 261 |
+
"python_style_comment",
|
| 262 |
+
"quoted_string",
|
| 263 |
+
"remove_quotes",
|
| 264 |
+
"replace_with",
|
| 265 |
+
"replace_html_entity",
|
| 266 |
+
"rest_of_line",
|
| 267 |
+
"sgl_quoted_string",
|
| 268 |
+
"srange",
|
| 269 |
+
"string_end",
|
| 270 |
+
"string_start",
|
| 271 |
+
"token_map",
|
| 272 |
+
"trace_parse_action",
|
| 273 |
+
"ungroup",
|
| 274 |
+
"unicode_set",
|
| 275 |
+
"unicode_string",
|
| 276 |
+
"with_attribute",
|
| 277 |
+
"with_class",
|
| 278 |
+
# pre-PEP8 compatibility names
|
| 279 |
+
"__versionTime__",
|
| 280 |
+
"anyCloseTag",
|
| 281 |
+
"anyOpenTag",
|
| 282 |
+
"cStyleComment",
|
| 283 |
+
"commonHTMLEntity",
|
| 284 |
+
"conditionAsParseAction",
|
| 285 |
+
"countedArray",
|
| 286 |
+
"cppStyleComment",
|
| 287 |
+
"dblQuotedString",
|
| 288 |
+
"dblSlashComment",
|
| 289 |
+
"delimitedList",
|
| 290 |
+
"dictOf",
|
| 291 |
+
"htmlComment",
|
| 292 |
+
"indentedBlock",
|
| 293 |
+
"infixNotation",
|
| 294 |
+
"javaStyleComment",
|
| 295 |
+
"lineEnd",
|
| 296 |
+
"lineStart",
|
| 297 |
+
"locatedExpr",
|
| 298 |
+
"makeHTMLTags",
|
| 299 |
+
"makeXMLTags",
|
| 300 |
+
"matchOnlyAtCol",
|
| 301 |
+
"matchPreviousExpr",
|
| 302 |
+
"matchPreviousLiteral",
|
| 303 |
+
"nestedExpr",
|
| 304 |
+
"nullDebugAction",
|
| 305 |
+
"oneOf",
|
| 306 |
+
"opAssoc",
|
| 307 |
+
"originalTextFor",
|
| 308 |
+
"pythonStyleComment",
|
| 309 |
+
"quotedString",
|
| 310 |
+
"removeQuotes",
|
| 311 |
+
"replaceHTMLEntity",
|
| 312 |
+
"replaceWith",
|
| 313 |
+
"restOfLine",
|
| 314 |
+
"sglQuotedString",
|
| 315 |
+
"stringEnd",
|
| 316 |
+
"stringStart",
|
| 317 |
+
"tokenMap",
|
| 318 |
+
"traceParseAction",
|
| 319 |
+
"unicodeString",
|
| 320 |
+
"withAttribute",
|
| 321 |
+
"withClass",
|
| 322 |
+
]
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/actions.py
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# actions.py
|
| 2 |
+
|
| 3 |
+
from .exceptions import ParseException
|
| 4 |
+
from .util import col, replaced_by_pep8
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
class OnlyOnce:
|
| 8 |
+
"""
|
| 9 |
+
Wrapper for parse actions, to ensure they are only called once.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
def __init__(self, method_call):
|
| 13 |
+
from .core import _trim_arity
|
| 14 |
+
|
| 15 |
+
self.callable = _trim_arity(method_call)
|
| 16 |
+
self.called = False
|
| 17 |
+
|
| 18 |
+
def __call__(self, s, l, t):
|
| 19 |
+
if not self.called:
|
| 20 |
+
results = self.callable(s, l, t)
|
| 21 |
+
self.called = True
|
| 22 |
+
return results
|
| 23 |
+
raise ParseException(s, l, "OnlyOnce obj called multiple times w/out reset")
|
| 24 |
+
|
| 25 |
+
def reset(self):
|
| 26 |
+
"""
|
| 27 |
+
Allow the associated parse action to be called once more.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
self.called = False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def match_only_at_col(n):
|
| 34 |
+
"""
|
| 35 |
+
Helper method for defining parse actions that require matching at
|
| 36 |
+
a specific column in the input text.
|
| 37 |
+
"""
|
| 38 |
+
|
| 39 |
+
def verify_col(strg, locn, toks):
|
| 40 |
+
if col(locn, strg) != n:
|
| 41 |
+
raise ParseException(strg, locn, f"matched token not at column {n}")
|
| 42 |
+
|
| 43 |
+
return verify_col
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def replace_with(repl_str):
|
| 47 |
+
"""
|
| 48 |
+
Helper method for common parse actions that simply return
|
| 49 |
+
a literal value. Especially useful when used with
|
| 50 |
+
:class:`transform_string<ParserElement.transform_string>` ().
|
| 51 |
+
|
| 52 |
+
Example::
|
| 53 |
+
|
| 54 |
+
num = Word(nums).set_parse_action(lambda toks: int(toks[0]))
|
| 55 |
+
na = one_of("N/A NA").set_parse_action(replace_with(math.nan))
|
| 56 |
+
term = na | num
|
| 57 |
+
|
| 58 |
+
term[1, ...].parse_string("324 234 N/A 234") # -> [324, 234, nan, 234]
|
| 59 |
+
"""
|
| 60 |
+
return lambda s, l, t: [repl_str]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def remove_quotes(s, l, t):
|
| 64 |
+
"""
|
| 65 |
+
Helper parse action for removing quotation marks from parsed
|
| 66 |
+
quoted strings.
|
| 67 |
+
|
| 68 |
+
Example::
|
| 69 |
+
|
| 70 |
+
# by default, quotation marks are included in parsed results
|
| 71 |
+
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["'Now is the Winter of our Discontent'"]
|
| 72 |
+
|
| 73 |
+
# use remove_quotes to strip quotation marks from parsed results
|
| 74 |
+
quoted_string.set_parse_action(remove_quotes)
|
| 75 |
+
quoted_string.parse_string("'Now is the Winter of our Discontent'") # -> ["Now is the Winter of our Discontent"]
|
| 76 |
+
"""
|
| 77 |
+
return t[0][1:-1]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def with_attribute(*args, **attr_dict):
|
| 81 |
+
"""
|
| 82 |
+
Helper to create a validating parse action to be used with start
|
| 83 |
+
tags created with :class:`make_xml_tags` or
|
| 84 |
+
:class:`make_html_tags`. Use ``with_attribute`` to qualify
|
| 85 |
+
a starting tag with a required attribute value, to avoid false
|
| 86 |
+
matches on common tags such as ``<TD>`` or ``<DIV>``.
|
| 87 |
+
|
| 88 |
+
Call ``with_attribute`` with a series of attribute names and
|
| 89 |
+
values. Specify the list of filter attributes names and values as:
|
| 90 |
+
|
| 91 |
+
- keyword arguments, as in ``(align="right")``, or
|
| 92 |
+
- as an explicit dict with ``**`` operator, when an attribute
|
| 93 |
+
name is also a Python reserved word, as in ``**{"class":"Customer", "align":"right"}``
|
| 94 |
+
- a list of name-value tuples, as in ``(("ns1:class", "Customer"), ("ns2:align", "right"))``
|
| 95 |
+
|
| 96 |
+
For attribute names with a namespace prefix, you must use the second
|
| 97 |
+
form. Attribute names are matched insensitive to upper/lower case.
|
| 98 |
+
|
| 99 |
+
If just testing for ``class`` (with or without a namespace), use
|
| 100 |
+
:class:`with_class`.
|
| 101 |
+
|
| 102 |
+
To verify that the attribute exists, but without specifying a value,
|
| 103 |
+
pass ``with_attribute.ANY_VALUE`` as the value.
|
| 104 |
+
|
| 105 |
+
Example::
|
| 106 |
+
|
| 107 |
+
html = '''
|
| 108 |
+
<div>
|
| 109 |
+
Some text
|
| 110 |
+
<div type="grid">1 4 0 1 0</div>
|
| 111 |
+
<div type="graph">1,3 2,3 1,1</div>
|
| 112 |
+
<div>this has no type</div>
|
| 113 |
+
</div>
|
| 114 |
+
|
| 115 |
+
'''
|
| 116 |
+
div,div_end = make_html_tags("div")
|
| 117 |
+
|
| 118 |
+
# only match div tag having a type attribute with value "grid"
|
| 119 |
+
div_grid = div().set_parse_action(with_attribute(type="grid"))
|
| 120 |
+
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
| 121 |
+
for grid_header in grid_expr.search_string(html):
|
| 122 |
+
print(grid_header.body)
|
| 123 |
+
|
| 124 |
+
# construct a match with any div tag having a type attribute, regardless of the value
|
| 125 |
+
div_any_type = div().set_parse_action(with_attribute(type=with_attribute.ANY_VALUE))
|
| 126 |
+
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
| 127 |
+
for div_header in div_expr.search_string(html):
|
| 128 |
+
print(div_header.body)
|
| 129 |
+
|
| 130 |
+
prints::
|
| 131 |
+
|
| 132 |
+
1 4 0 1 0
|
| 133 |
+
|
| 134 |
+
1 4 0 1 0
|
| 135 |
+
1,3 2,3 1,1
|
| 136 |
+
"""
|
| 137 |
+
if args:
|
| 138 |
+
attrs = args[:]
|
| 139 |
+
else:
|
| 140 |
+
attrs = attr_dict.items()
|
| 141 |
+
attrs = [(k, v) for k, v in attrs]
|
| 142 |
+
|
| 143 |
+
def pa(s, l, tokens):
|
| 144 |
+
for attrName, attrValue in attrs:
|
| 145 |
+
if attrName not in tokens:
|
| 146 |
+
raise ParseException(s, l, "no matching attribute " + attrName)
|
| 147 |
+
if attrValue != with_attribute.ANY_VALUE and tokens[attrName] != attrValue:
|
| 148 |
+
raise ParseException(
|
| 149 |
+
s,
|
| 150 |
+
l,
|
| 151 |
+
f"attribute {attrName!r} has value {tokens[attrName]!r}, must be {attrValue!r}",
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
+
return pa
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
with_attribute.ANY_VALUE = object() # type: ignore [attr-defined]
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def with_class(classname, namespace=""):
|
| 161 |
+
"""
|
| 162 |
+
Simplified version of :class:`with_attribute` when
|
| 163 |
+
matching on a div class - made difficult because ``class`` is
|
| 164 |
+
a reserved word in Python.
|
| 165 |
+
|
| 166 |
+
Example::
|
| 167 |
+
|
| 168 |
+
html = '''
|
| 169 |
+
<div>
|
| 170 |
+
Some text
|
| 171 |
+
<div class="grid">1 4 0 1 0</div>
|
| 172 |
+
<div class="graph">1,3 2,3 1,1</div>
|
| 173 |
+
<div>this <div> has no class</div>
|
| 174 |
+
</div>
|
| 175 |
+
|
| 176 |
+
'''
|
| 177 |
+
div,div_end = make_html_tags("div")
|
| 178 |
+
div_grid = div().set_parse_action(with_class("grid"))
|
| 179 |
+
|
| 180 |
+
grid_expr = div_grid + SkipTo(div | div_end)("body")
|
| 181 |
+
for grid_header in grid_expr.search_string(html):
|
| 182 |
+
print(grid_header.body)
|
| 183 |
+
|
| 184 |
+
div_any_type = div().set_parse_action(with_class(withAttribute.ANY_VALUE))
|
| 185 |
+
div_expr = div_any_type + SkipTo(div | div_end)("body")
|
| 186 |
+
for div_header in div_expr.search_string(html):
|
| 187 |
+
print(div_header.body)
|
| 188 |
+
|
| 189 |
+
prints::
|
| 190 |
+
|
| 191 |
+
1 4 0 1 0
|
| 192 |
+
|
| 193 |
+
1 4 0 1 0
|
| 194 |
+
1,3 2,3 1,1
|
| 195 |
+
"""
|
| 196 |
+
classattr = f"{namespace}:class" if namespace else "class"
|
| 197 |
+
return with_attribute(**{classattr: classname})
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
# pre-PEP8 compatibility symbols
|
| 201 |
+
# fmt: off
|
| 202 |
+
@replaced_by_pep8(replace_with)
|
| 203 |
+
def replaceWith(): ...
|
| 204 |
+
|
| 205 |
+
@replaced_by_pep8(remove_quotes)
|
| 206 |
+
def removeQuotes(): ...
|
| 207 |
+
|
| 208 |
+
@replaced_by_pep8(with_attribute)
|
| 209 |
+
def withAttribute(): ...
|
| 210 |
+
|
| 211 |
+
@replaced_by_pep8(with_class)
|
| 212 |
+
def withClass(): ...
|
| 213 |
+
|
| 214 |
+
@replaced_by_pep8(match_only_at_col)
|
| 215 |
+
def matchOnlyAtCol(): ...
|
| 216 |
+
|
| 217 |
+
# fmt: on
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/common.py
ADDED
|
@@ -0,0 +1,432 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# common.py
|
| 2 |
+
from .core import *
|
| 3 |
+
from .helpers import DelimitedList, any_open_tag, any_close_tag
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
# some other useful expressions - using lower-case class name since we are really using this as a namespace
|
| 8 |
+
class pyparsing_common:
|
| 9 |
+
"""Here are some common low-level expressions that may be useful in
|
| 10 |
+
jump-starting parser development:
|
| 11 |
+
|
| 12 |
+
- numeric forms (:class:`integers<integer>`, :class:`reals<real>`,
|
| 13 |
+
:class:`scientific notation<sci_real>`)
|
| 14 |
+
- common :class:`programming identifiers<identifier>`
|
| 15 |
+
- network addresses (:class:`MAC<mac_address>`,
|
| 16 |
+
:class:`IPv4<ipv4_address>`, :class:`IPv6<ipv6_address>`)
|
| 17 |
+
- ISO8601 :class:`dates<iso8601_date>` and
|
| 18 |
+
:class:`datetime<iso8601_datetime>`
|
| 19 |
+
- :class:`UUID<uuid>`
|
| 20 |
+
- :class:`comma-separated list<comma_separated_list>`
|
| 21 |
+
- :class:`url`
|
| 22 |
+
|
| 23 |
+
Parse actions:
|
| 24 |
+
|
| 25 |
+
- :class:`convert_to_integer`
|
| 26 |
+
- :class:`convert_to_float`
|
| 27 |
+
- :class:`convert_to_date`
|
| 28 |
+
- :class:`convert_to_datetime`
|
| 29 |
+
- :class:`strip_html_tags`
|
| 30 |
+
- :class:`upcase_tokens`
|
| 31 |
+
- :class:`downcase_tokens`
|
| 32 |
+
|
| 33 |
+
Example::
|
| 34 |
+
|
| 35 |
+
pyparsing_common.number.run_tests('''
|
| 36 |
+
# any int or real number, returned as the appropriate type
|
| 37 |
+
100
|
| 38 |
+
-100
|
| 39 |
+
+100
|
| 40 |
+
3.14159
|
| 41 |
+
6.02e23
|
| 42 |
+
1e-12
|
| 43 |
+
''')
|
| 44 |
+
|
| 45 |
+
pyparsing_common.fnumber.run_tests('''
|
| 46 |
+
# any int or real number, returned as float
|
| 47 |
+
100
|
| 48 |
+
-100
|
| 49 |
+
+100
|
| 50 |
+
3.14159
|
| 51 |
+
6.02e23
|
| 52 |
+
1e-12
|
| 53 |
+
''')
|
| 54 |
+
|
| 55 |
+
pyparsing_common.hex_integer.run_tests('''
|
| 56 |
+
# hex numbers
|
| 57 |
+
100
|
| 58 |
+
FF
|
| 59 |
+
''')
|
| 60 |
+
|
| 61 |
+
pyparsing_common.fraction.run_tests('''
|
| 62 |
+
# fractions
|
| 63 |
+
1/2
|
| 64 |
+
-3/4
|
| 65 |
+
''')
|
| 66 |
+
|
| 67 |
+
pyparsing_common.mixed_integer.run_tests('''
|
| 68 |
+
# mixed fractions
|
| 69 |
+
1
|
| 70 |
+
1/2
|
| 71 |
+
-3/4
|
| 72 |
+
1-3/4
|
| 73 |
+
''')
|
| 74 |
+
|
| 75 |
+
import uuid
|
| 76 |
+
pyparsing_common.uuid.set_parse_action(token_map(uuid.UUID))
|
| 77 |
+
pyparsing_common.uuid.run_tests('''
|
| 78 |
+
# uuid
|
| 79 |
+
12345678-1234-5678-1234-567812345678
|
| 80 |
+
''')
|
| 81 |
+
|
| 82 |
+
prints::
|
| 83 |
+
|
| 84 |
+
# any int or real number, returned as the appropriate type
|
| 85 |
+
100
|
| 86 |
+
[100]
|
| 87 |
+
|
| 88 |
+
-100
|
| 89 |
+
[-100]
|
| 90 |
+
|
| 91 |
+
+100
|
| 92 |
+
[100]
|
| 93 |
+
|
| 94 |
+
3.14159
|
| 95 |
+
[3.14159]
|
| 96 |
+
|
| 97 |
+
6.02e23
|
| 98 |
+
[6.02e+23]
|
| 99 |
+
|
| 100 |
+
1e-12
|
| 101 |
+
[1e-12]
|
| 102 |
+
|
| 103 |
+
# any int or real number, returned as float
|
| 104 |
+
100
|
| 105 |
+
[100.0]
|
| 106 |
+
|
| 107 |
+
-100
|
| 108 |
+
[-100.0]
|
| 109 |
+
|
| 110 |
+
+100
|
| 111 |
+
[100.0]
|
| 112 |
+
|
| 113 |
+
3.14159
|
| 114 |
+
[3.14159]
|
| 115 |
+
|
| 116 |
+
6.02e23
|
| 117 |
+
[6.02e+23]
|
| 118 |
+
|
| 119 |
+
1e-12
|
| 120 |
+
[1e-12]
|
| 121 |
+
|
| 122 |
+
# hex numbers
|
| 123 |
+
100
|
| 124 |
+
[256]
|
| 125 |
+
|
| 126 |
+
FF
|
| 127 |
+
[255]
|
| 128 |
+
|
| 129 |
+
# fractions
|
| 130 |
+
1/2
|
| 131 |
+
[0.5]
|
| 132 |
+
|
| 133 |
+
-3/4
|
| 134 |
+
[-0.75]
|
| 135 |
+
|
| 136 |
+
# mixed fractions
|
| 137 |
+
1
|
| 138 |
+
[1]
|
| 139 |
+
|
| 140 |
+
1/2
|
| 141 |
+
[0.5]
|
| 142 |
+
|
| 143 |
+
-3/4
|
| 144 |
+
[-0.75]
|
| 145 |
+
|
| 146 |
+
1-3/4
|
| 147 |
+
[1.75]
|
| 148 |
+
|
| 149 |
+
# uuid
|
| 150 |
+
12345678-1234-5678-1234-567812345678
|
| 151 |
+
[UUID('12345678-1234-5678-1234-567812345678')]
|
| 152 |
+
"""
|
| 153 |
+
|
| 154 |
+
convert_to_integer = token_map(int)
|
| 155 |
+
"""
|
| 156 |
+
Parse action for converting parsed integers to Python int
|
| 157 |
+
"""
|
| 158 |
+
|
| 159 |
+
convert_to_float = token_map(float)
|
| 160 |
+
"""
|
| 161 |
+
Parse action for converting parsed numbers to Python float
|
| 162 |
+
"""
|
| 163 |
+
|
| 164 |
+
integer = Word(nums).set_name("integer").set_parse_action(convert_to_integer)
|
| 165 |
+
"""expression that parses an unsigned integer, returns an int"""
|
| 166 |
+
|
| 167 |
+
hex_integer = (
|
| 168 |
+
Word(hexnums).set_name("hex integer").set_parse_action(token_map(int, 16))
|
| 169 |
+
)
|
| 170 |
+
"""expression that parses a hexadecimal integer, returns an int"""
|
| 171 |
+
|
| 172 |
+
signed_integer = (
|
| 173 |
+
Regex(r"[+-]?\d+")
|
| 174 |
+
.set_name("signed integer")
|
| 175 |
+
.set_parse_action(convert_to_integer)
|
| 176 |
+
)
|
| 177 |
+
"""expression that parses an integer with optional leading sign, returns an int"""
|
| 178 |
+
|
| 179 |
+
fraction = (
|
| 180 |
+
signed_integer().set_parse_action(convert_to_float)
|
| 181 |
+
+ "/"
|
| 182 |
+
+ signed_integer().set_parse_action(convert_to_float)
|
| 183 |
+
).set_name("fraction")
|
| 184 |
+
"""fractional expression of an integer divided by an integer, returns a float"""
|
| 185 |
+
fraction.add_parse_action(lambda tt: tt[0] / tt[-1])
|
| 186 |
+
|
| 187 |
+
mixed_integer = (
|
| 188 |
+
fraction | signed_integer + Opt(Opt("-").suppress() + fraction)
|
| 189 |
+
).set_name("fraction or mixed integer-fraction")
|
| 190 |
+
"""mixed integer of the form 'integer - fraction', with optional leading integer, returns float"""
|
| 191 |
+
mixed_integer.add_parse_action(sum)
|
| 192 |
+
|
| 193 |
+
real = (
|
| 194 |
+
Regex(r"[+-]?(?:\d+\.\d*|\.\d+)")
|
| 195 |
+
.set_name("real number")
|
| 196 |
+
.set_parse_action(convert_to_float)
|
| 197 |
+
)
|
| 198 |
+
"""expression that parses a floating point number and returns a float"""
|
| 199 |
+
|
| 200 |
+
sci_real = (
|
| 201 |
+
Regex(r"[+-]?(?:\d+(?:[eE][+-]?\d+)|(?:\d+\.\d*|\.\d+)(?:[eE][+-]?\d+)?)")
|
| 202 |
+
.set_name("real number with scientific notation")
|
| 203 |
+
.set_parse_action(convert_to_float)
|
| 204 |
+
)
|
| 205 |
+
"""expression that parses a floating point number with optional
|
| 206 |
+
scientific notation and returns a float"""
|
| 207 |
+
|
| 208 |
+
# streamlining this expression makes the docs nicer-looking
|
| 209 |
+
number = (sci_real | real | signed_integer).setName("number").streamline()
|
| 210 |
+
"""any numeric expression, returns the corresponding Python type"""
|
| 211 |
+
|
| 212 |
+
fnumber = (
|
| 213 |
+
Regex(r"[+-]?\d+\.?\d*([eE][+-]?\d+)?")
|
| 214 |
+
.set_name("fnumber")
|
| 215 |
+
.set_parse_action(convert_to_float)
|
| 216 |
+
)
|
| 217 |
+
"""any int or real number, returned as float"""
|
| 218 |
+
|
| 219 |
+
identifier = Word(identchars, identbodychars).set_name("identifier")
|
| 220 |
+
"""typical code identifier (leading alpha or '_', followed by 0 or more alphas, nums, or '_')"""
|
| 221 |
+
|
| 222 |
+
ipv4_address = Regex(
|
| 223 |
+
r"(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})(\.(25[0-5]|2[0-4][0-9]|1?[0-9]{1,2})){3}"
|
| 224 |
+
).set_name("IPv4 address")
|
| 225 |
+
"IPv4 address (``0.0.0.0 - 255.255.255.255``)"
|
| 226 |
+
|
| 227 |
+
_ipv6_part = Regex(r"[0-9a-fA-F]{1,4}").set_name("hex_integer")
|
| 228 |
+
_full_ipv6_address = (_ipv6_part + (":" + _ipv6_part) * 7).set_name(
|
| 229 |
+
"full IPv6 address"
|
| 230 |
+
)
|
| 231 |
+
_short_ipv6_address = (
|
| 232 |
+
Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
| 233 |
+
+ "::"
|
| 234 |
+
+ Opt(_ipv6_part + (":" + _ipv6_part) * (0, 6))
|
| 235 |
+
).set_name("short IPv6 address")
|
| 236 |
+
_short_ipv6_address.add_condition(
|
| 237 |
+
lambda t: sum(1 for tt in t if pyparsing_common._ipv6_part.matches(tt)) < 8
|
| 238 |
+
)
|
| 239 |
+
_mixed_ipv6_address = ("::ffff:" + ipv4_address).set_name("mixed IPv6 address")
|
| 240 |
+
ipv6_address = Combine(
|
| 241 |
+
(_full_ipv6_address | _mixed_ipv6_address | _short_ipv6_address).set_name(
|
| 242 |
+
"IPv6 address"
|
| 243 |
+
)
|
| 244 |
+
).set_name("IPv6 address")
|
| 245 |
+
"IPv6 address (long, short, or mixed form)"
|
| 246 |
+
|
| 247 |
+
mac_address = Regex(
|
| 248 |
+
r"[0-9a-fA-F]{2}([:.-])[0-9a-fA-F]{2}(?:\1[0-9a-fA-F]{2}){4}"
|
| 249 |
+
).set_name("MAC address")
|
| 250 |
+
"MAC address xx:xx:xx:xx:xx (may also have '-' or '.' delimiters)"
|
| 251 |
+
|
| 252 |
+
@staticmethod
|
| 253 |
+
def convert_to_date(fmt: str = "%Y-%m-%d"):
|
| 254 |
+
"""
|
| 255 |
+
Helper to create a parse action for converting parsed date string to Python datetime.date
|
| 256 |
+
|
| 257 |
+
Params -
|
| 258 |
+
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%d"``)
|
| 259 |
+
|
| 260 |
+
Example::
|
| 261 |
+
|
| 262 |
+
date_expr = pyparsing_common.iso8601_date.copy()
|
| 263 |
+
date_expr.set_parse_action(pyparsing_common.convert_to_date())
|
| 264 |
+
print(date_expr.parse_string("1999-12-31"))
|
| 265 |
+
|
| 266 |
+
prints::
|
| 267 |
+
|
| 268 |
+
[datetime.date(1999, 12, 31)]
|
| 269 |
+
"""
|
| 270 |
+
|
| 271 |
+
def cvt_fn(ss, ll, tt):
|
| 272 |
+
try:
|
| 273 |
+
return datetime.strptime(tt[0], fmt).date()
|
| 274 |
+
except ValueError as ve:
|
| 275 |
+
raise ParseException(ss, ll, str(ve))
|
| 276 |
+
|
| 277 |
+
return cvt_fn
|
| 278 |
+
|
| 279 |
+
@staticmethod
|
| 280 |
+
def convert_to_datetime(fmt: str = "%Y-%m-%dT%H:%M:%S.%f"):
|
| 281 |
+
"""Helper to create a parse action for converting parsed
|
| 282 |
+
datetime string to Python datetime.datetime
|
| 283 |
+
|
| 284 |
+
Params -
|
| 285 |
+
- fmt - format to be passed to datetime.strptime (default= ``"%Y-%m-%dT%H:%M:%S.%f"``)
|
| 286 |
+
|
| 287 |
+
Example::
|
| 288 |
+
|
| 289 |
+
dt_expr = pyparsing_common.iso8601_datetime.copy()
|
| 290 |
+
dt_expr.set_parse_action(pyparsing_common.convert_to_datetime())
|
| 291 |
+
print(dt_expr.parse_string("1999-12-31T23:59:59.999"))
|
| 292 |
+
|
| 293 |
+
prints::
|
| 294 |
+
|
| 295 |
+
[datetime.datetime(1999, 12, 31, 23, 59, 59, 999000)]
|
| 296 |
+
"""
|
| 297 |
+
|
| 298 |
+
def cvt_fn(s, l, t):
|
| 299 |
+
try:
|
| 300 |
+
return datetime.strptime(t[0], fmt)
|
| 301 |
+
except ValueError as ve:
|
| 302 |
+
raise ParseException(s, l, str(ve))
|
| 303 |
+
|
| 304 |
+
return cvt_fn
|
| 305 |
+
|
| 306 |
+
iso8601_date = Regex(
|
| 307 |
+
r"(?P<year>\d{4})(?:-(?P<month>\d\d)(?:-(?P<day>\d\d))?)?"
|
| 308 |
+
).set_name("ISO8601 date")
|
| 309 |
+
"ISO8601 date (``yyyy-mm-dd``)"
|
| 310 |
+
|
| 311 |
+
iso8601_datetime = Regex(
|
| 312 |
+
r"(?P<year>\d{4})-(?P<month>\d\d)-(?P<day>\d\d)[T ](?P<hour>\d\d):(?P<minute>\d\d)(:(?P<second>\d\d(\.\d*)?)?)?(?P<tz>Z|[+-]\d\d:?\d\d)?"
|
| 313 |
+
).set_name("ISO8601 datetime")
|
| 314 |
+
"ISO8601 datetime (``yyyy-mm-ddThh:mm:ss.s(Z|+-00:00)``) - trailing seconds, milliseconds, and timezone optional; accepts separating ``'T'`` or ``' '``"
|
| 315 |
+
|
| 316 |
+
uuid = Regex(r"[0-9a-fA-F]{8}(-[0-9a-fA-F]{4}){3}-[0-9a-fA-F]{12}").set_name("UUID")
|
| 317 |
+
"UUID (``xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx``)"
|
| 318 |
+
|
| 319 |
+
_html_stripper = any_open_tag.suppress() | any_close_tag.suppress()
|
| 320 |
+
|
| 321 |
+
@staticmethod
|
| 322 |
+
def strip_html_tags(s: str, l: int, tokens: ParseResults):
|
| 323 |
+
"""Parse action to remove HTML tags from web page HTML source
|
| 324 |
+
|
| 325 |
+
Example::
|
| 326 |
+
|
| 327 |
+
# strip HTML links from normal text
|
| 328 |
+
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
|
| 329 |
+
td, td_end = make_html_tags("TD")
|
| 330 |
+
table_text = td + SkipTo(td_end).set_parse_action(pyparsing_common.strip_html_tags)("body") + td_end
|
| 331 |
+
print(table_text.parse_string(text).body)
|
| 332 |
+
|
| 333 |
+
Prints::
|
| 334 |
+
|
| 335 |
+
More info at the pyparsing wiki page
|
| 336 |
+
"""
|
| 337 |
+
return pyparsing_common._html_stripper.transform_string(tokens[0])
|
| 338 |
+
|
| 339 |
+
_commasepitem = (
|
| 340 |
+
Combine(
|
| 341 |
+
OneOrMore(
|
| 342 |
+
~Literal(",")
|
| 343 |
+
+ ~LineEnd()
|
| 344 |
+
+ Word(printables, exclude_chars=",")
|
| 345 |
+
+ Opt(White(" \t") + ~FollowedBy(LineEnd() | ","))
|
| 346 |
+
)
|
| 347 |
+
)
|
| 348 |
+
.streamline()
|
| 349 |
+
.set_name("commaItem")
|
| 350 |
+
)
|
| 351 |
+
comma_separated_list = DelimitedList(
|
| 352 |
+
Opt(quoted_string.copy() | _commasepitem, default="")
|
| 353 |
+
).set_name("comma separated list")
|
| 354 |
+
"""Predefined expression of 1 or more printable words or quoted strings, separated by commas."""
|
| 355 |
+
|
| 356 |
+
upcase_tokens = staticmethod(token_map(lambda t: t.upper()))
|
| 357 |
+
"""Parse action to convert tokens to upper case."""
|
| 358 |
+
|
| 359 |
+
downcase_tokens = staticmethod(token_map(lambda t: t.lower()))
|
| 360 |
+
"""Parse action to convert tokens to lower case."""
|
| 361 |
+
|
| 362 |
+
# fmt: off
|
| 363 |
+
url = Regex(
|
| 364 |
+
# https://mathiasbynens.be/demo/url-regex
|
| 365 |
+
# https://gist.github.com/dperini/729294
|
| 366 |
+
r"(?P<url>" +
|
| 367 |
+
# protocol identifier (optional)
|
| 368 |
+
# short syntax // still required
|
| 369 |
+
r"(?:(?:(?P<scheme>https?|ftp):)?\/\/)" +
|
| 370 |
+
# user:pass BasicAuth (optional)
|
| 371 |
+
r"(?:(?P<auth>\S+(?::\S*)?)@)?" +
|
| 372 |
+
r"(?P<host>" +
|
| 373 |
+
# IP address exclusion
|
| 374 |
+
# private & local networks
|
| 375 |
+
r"(?!(?:10|127)(?:\.\d{1,3}){3})" +
|
| 376 |
+
r"(?!(?:169\.254|192\.168)(?:\.\d{1,3}){2})" +
|
| 377 |
+
r"(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})" +
|
| 378 |
+
# IP address dotted notation octets
|
| 379 |
+
# excludes loopback network 0.0.0.0
|
| 380 |
+
# excludes reserved space >= 224.0.0.0
|
| 381 |
+
# excludes network & broadcast addresses
|
| 382 |
+
# (first & last IP address of each class)
|
| 383 |
+
r"(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])" +
|
| 384 |
+
r"(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}" +
|
| 385 |
+
r"(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))" +
|
| 386 |
+
r"|" +
|
| 387 |
+
# host & domain names, may end with dot
|
| 388 |
+
# can be replaced by a shortest alternative
|
| 389 |
+
# (?![-_])(?:[-\w\u00a1-\uffff]{0,63}[^-_]\.)+
|
| 390 |
+
r"(?:" +
|
| 391 |
+
r"(?:" +
|
| 392 |
+
r"[a-z0-9\u00a1-\uffff]" +
|
| 393 |
+
r"[a-z0-9\u00a1-\uffff_-]{0,62}" +
|
| 394 |
+
r")?" +
|
| 395 |
+
r"[a-z0-9\u00a1-\uffff]\." +
|
| 396 |
+
r")+" +
|
| 397 |
+
# TLD identifier name, may end with dot
|
| 398 |
+
r"(?:[a-z\u00a1-\uffff]{2,}\.?)" +
|
| 399 |
+
r")" +
|
| 400 |
+
# port number (optional)
|
| 401 |
+
r"(:(?P<port>\d{2,5}))?" +
|
| 402 |
+
# resource path (optional)
|
| 403 |
+
r"(?P<path>\/[^?# ]*)?" +
|
| 404 |
+
# query string (optional)
|
| 405 |
+
r"(\?(?P<query>[^#]*))?" +
|
| 406 |
+
# fragment (optional)
|
| 407 |
+
r"(#(?P<fragment>\S*))?" +
|
| 408 |
+
r")"
|
| 409 |
+
).set_name("url")
|
| 410 |
+
"""URL (http/https/ftp scheme)"""
|
| 411 |
+
# fmt: on
|
| 412 |
+
|
| 413 |
+
# pre-PEP8 compatibility names
|
| 414 |
+
convertToInteger = convert_to_integer
|
| 415 |
+
"""Deprecated - use :class:`convert_to_integer`"""
|
| 416 |
+
convertToFloat = convert_to_float
|
| 417 |
+
"""Deprecated - use :class:`convert_to_float`"""
|
| 418 |
+
convertToDate = convert_to_date
|
| 419 |
+
"""Deprecated - use :class:`convert_to_date`"""
|
| 420 |
+
convertToDatetime = convert_to_datetime
|
| 421 |
+
"""Deprecated - use :class:`convert_to_datetime`"""
|
| 422 |
+
stripHTMLTags = strip_html_tags
|
| 423 |
+
"""Deprecated - use :class:`strip_html_tags`"""
|
| 424 |
+
upcaseTokens = upcase_tokens
|
| 425 |
+
"""Deprecated - use :class:`upcase_tokens`"""
|
| 426 |
+
downcaseTokens = downcase_tokens
|
| 427 |
+
"""Deprecated - use :class:`downcase_tokens`"""
|
| 428 |
+
|
| 429 |
+
|
| 430 |
+
_builtin_exprs = [
|
| 431 |
+
v for v in vars(pyparsing_common).values() if isinstance(v, ParserElement)
|
| 432 |
+
]
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/core.py
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/diagram/__init__.py
ADDED
|
@@ -0,0 +1,656 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# mypy: ignore-errors
|
| 2 |
+
import railroad
|
| 3 |
+
from pip._vendor import pyparsing
|
| 4 |
+
import typing
|
| 5 |
+
from typing import (
|
| 6 |
+
List,
|
| 7 |
+
NamedTuple,
|
| 8 |
+
Generic,
|
| 9 |
+
TypeVar,
|
| 10 |
+
Dict,
|
| 11 |
+
Callable,
|
| 12 |
+
Set,
|
| 13 |
+
Iterable,
|
| 14 |
+
)
|
| 15 |
+
from jinja2 import Template
|
| 16 |
+
from io import StringIO
|
| 17 |
+
import inspect
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
jinja2_template_source = """\
|
| 21 |
+
{% if not embed %}
|
| 22 |
+
<!DOCTYPE html>
|
| 23 |
+
<html>
|
| 24 |
+
<head>
|
| 25 |
+
{% endif %}
|
| 26 |
+
{% if not head %}
|
| 27 |
+
<style>
|
| 28 |
+
.railroad-heading {
|
| 29 |
+
font-family: monospace;
|
| 30 |
+
}
|
| 31 |
+
</style>
|
| 32 |
+
{% else %}
|
| 33 |
+
{{ head | safe }}
|
| 34 |
+
{% endif %}
|
| 35 |
+
{% if not embed %}
|
| 36 |
+
</head>
|
| 37 |
+
<body>
|
| 38 |
+
{% endif %}
|
| 39 |
+
{{ body | safe }}
|
| 40 |
+
{% for diagram in diagrams %}
|
| 41 |
+
<div class="railroad-group">
|
| 42 |
+
<h1 class="railroad-heading">{{ diagram.title }}</h1>
|
| 43 |
+
<div class="railroad-description">{{ diagram.text }}</div>
|
| 44 |
+
<div class="railroad-svg">
|
| 45 |
+
{{ diagram.svg }}
|
| 46 |
+
</div>
|
| 47 |
+
</div>
|
| 48 |
+
{% endfor %}
|
| 49 |
+
{% if not embed %}
|
| 50 |
+
</body>
|
| 51 |
+
</html>
|
| 52 |
+
{% endif %}
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
template = Template(jinja2_template_source)
|
| 56 |
+
|
| 57 |
+
# Note: ideally this would be a dataclass, but we're supporting Python 3.5+ so we can't do this yet
|
| 58 |
+
NamedDiagram = NamedTuple(
|
| 59 |
+
"NamedDiagram",
|
| 60 |
+
[("name", str), ("diagram", typing.Optional[railroad.DiagramItem]), ("index", int)],
|
| 61 |
+
)
|
| 62 |
+
"""
|
| 63 |
+
A simple structure for associating a name with a railroad diagram
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
T = TypeVar("T")
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class EachItem(railroad.Group):
|
| 70 |
+
"""
|
| 71 |
+
Custom railroad item to compose a:
|
| 72 |
+
- Group containing a
|
| 73 |
+
- OneOrMore containing a
|
| 74 |
+
- Choice of the elements in the Each
|
| 75 |
+
with the group label indicating that all must be matched
|
| 76 |
+
"""
|
| 77 |
+
|
| 78 |
+
all_label = "[ALL]"
|
| 79 |
+
|
| 80 |
+
def __init__(self, *items):
|
| 81 |
+
choice_item = railroad.Choice(len(items) - 1, *items)
|
| 82 |
+
one_or_more_item = railroad.OneOrMore(item=choice_item)
|
| 83 |
+
super().__init__(one_or_more_item, label=self.all_label)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class AnnotatedItem(railroad.Group):
|
| 87 |
+
"""
|
| 88 |
+
Simple subclass of Group that creates an annotation label
|
| 89 |
+
"""
|
| 90 |
+
|
| 91 |
+
def __init__(self, label: str, item):
|
| 92 |
+
super().__init__(item=item, label="[{}]".format(label) if label else label)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class EditablePartial(Generic[T]):
|
| 96 |
+
"""
|
| 97 |
+
Acts like a functools.partial, but can be edited. In other words, it represents a type that hasn't yet been
|
| 98 |
+
constructed.
|
| 99 |
+
"""
|
| 100 |
+
|
| 101 |
+
# We need this here because the railroad constructors actually transform the data, so can't be called until the
|
| 102 |
+
# entire tree is assembled
|
| 103 |
+
|
| 104 |
+
def __init__(self, func: Callable[..., T], args: list, kwargs: dict):
|
| 105 |
+
self.func = func
|
| 106 |
+
self.args = args
|
| 107 |
+
self.kwargs = kwargs
|
| 108 |
+
|
| 109 |
+
@classmethod
|
| 110 |
+
def from_call(cls, func: Callable[..., T], *args, **kwargs) -> "EditablePartial[T]":
|
| 111 |
+
"""
|
| 112 |
+
If you call this function in the same way that you would call the constructor, it will store the arguments
|
| 113 |
+
as you expect. For example EditablePartial.from_call(Fraction, 1, 3)() == Fraction(1, 3)
|
| 114 |
+
"""
|
| 115 |
+
return EditablePartial(func=func, args=list(args), kwargs=kwargs)
|
| 116 |
+
|
| 117 |
+
@property
|
| 118 |
+
def name(self):
|
| 119 |
+
return self.kwargs["name"]
|
| 120 |
+
|
| 121 |
+
def __call__(self) -> T:
|
| 122 |
+
"""
|
| 123 |
+
Evaluate the partial and return the result
|
| 124 |
+
"""
|
| 125 |
+
args = self.args.copy()
|
| 126 |
+
kwargs = self.kwargs.copy()
|
| 127 |
+
|
| 128 |
+
# This is a helpful hack to allow you to specify varargs parameters (e.g. *args) as keyword args (e.g.
|
| 129 |
+
# args=['list', 'of', 'things'])
|
| 130 |
+
arg_spec = inspect.getfullargspec(self.func)
|
| 131 |
+
if arg_spec.varargs in self.kwargs:
|
| 132 |
+
args += kwargs.pop(arg_spec.varargs)
|
| 133 |
+
|
| 134 |
+
return self.func(*args, **kwargs)
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def railroad_to_html(diagrams: List[NamedDiagram], embed=False, **kwargs) -> str:
|
| 138 |
+
"""
|
| 139 |
+
Given a list of NamedDiagram, produce a single HTML string that visualises those diagrams
|
| 140 |
+
:params kwargs: kwargs to be passed in to the template
|
| 141 |
+
"""
|
| 142 |
+
data = []
|
| 143 |
+
for diagram in diagrams:
|
| 144 |
+
if diagram.diagram is None:
|
| 145 |
+
continue
|
| 146 |
+
io = StringIO()
|
| 147 |
+
try:
|
| 148 |
+
css = kwargs.get('css')
|
| 149 |
+
diagram.diagram.writeStandalone(io.write, css=css)
|
| 150 |
+
except AttributeError:
|
| 151 |
+
diagram.diagram.writeSvg(io.write)
|
| 152 |
+
title = diagram.name
|
| 153 |
+
if diagram.index == 0:
|
| 154 |
+
title += " (root)"
|
| 155 |
+
data.append({"title": title, "text": "", "svg": io.getvalue()})
|
| 156 |
+
|
| 157 |
+
return template.render(diagrams=data, embed=embed, **kwargs)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def resolve_partial(partial: "EditablePartial[T]") -> T:
|
| 161 |
+
"""
|
| 162 |
+
Recursively resolves a collection of Partials into whatever type they are
|
| 163 |
+
"""
|
| 164 |
+
if isinstance(partial, EditablePartial):
|
| 165 |
+
partial.args = resolve_partial(partial.args)
|
| 166 |
+
partial.kwargs = resolve_partial(partial.kwargs)
|
| 167 |
+
return partial()
|
| 168 |
+
elif isinstance(partial, list):
|
| 169 |
+
return [resolve_partial(x) for x in partial]
|
| 170 |
+
elif isinstance(partial, dict):
|
| 171 |
+
return {key: resolve_partial(x) for key, x in partial.items()}
|
| 172 |
+
else:
|
| 173 |
+
return partial
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def to_railroad(
|
| 177 |
+
element: pyparsing.ParserElement,
|
| 178 |
+
diagram_kwargs: typing.Optional[dict] = None,
|
| 179 |
+
vertical: int = 3,
|
| 180 |
+
show_results_names: bool = False,
|
| 181 |
+
show_groups: bool = False,
|
| 182 |
+
) -> List[NamedDiagram]:
|
| 183 |
+
"""
|
| 184 |
+
Convert a pyparsing element tree into a list of diagrams. This is the recommended entrypoint to diagram
|
| 185 |
+
creation if you want to access the Railroad tree before it is converted to HTML
|
| 186 |
+
:param element: base element of the parser being diagrammed
|
| 187 |
+
:param diagram_kwargs: kwargs to pass to the Diagram() constructor
|
| 188 |
+
:param vertical: (optional) - int - limit at which number of alternatives should be
|
| 189 |
+
shown vertically instead of horizontally
|
| 190 |
+
:param show_results_names - bool to indicate whether results name annotations should be
|
| 191 |
+
included in the diagram
|
| 192 |
+
:param show_groups - bool to indicate whether groups should be highlighted with an unlabeled
|
| 193 |
+
surrounding box
|
| 194 |
+
"""
|
| 195 |
+
# Convert the whole tree underneath the root
|
| 196 |
+
lookup = ConverterState(diagram_kwargs=diagram_kwargs or {})
|
| 197 |
+
_to_diagram_element(
|
| 198 |
+
element,
|
| 199 |
+
lookup=lookup,
|
| 200 |
+
parent=None,
|
| 201 |
+
vertical=vertical,
|
| 202 |
+
show_results_names=show_results_names,
|
| 203 |
+
show_groups=show_groups,
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
root_id = id(element)
|
| 207 |
+
# Convert the root if it hasn't been already
|
| 208 |
+
if root_id in lookup:
|
| 209 |
+
if not element.customName:
|
| 210 |
+
lookup[root_id].name = ""
|
| 211 |
+
lookup[root_id].mark_for_extraction(root_id, lookup, force=True)
|
| 212 |
+
|
| 213 |
+
# Now that we're finished, we can convert from intermediate structures into Railroad elements
|
| 214 |
+
diags = list(lookup.diagrams.values())
|
| 215 |
+
if len(diags) > 1:
|
| 216 |
+
# collapse out duplicate diags with the same name
|
| 217 |
+
seen = set()
|
| 218 |
+
deduped_diags = []
|
| 219 |
+
for d in diags:
|
| 220 |
+
# don't extract SkipTo elements, they are uninformative as subdiagrams
|
| 221 |
+
if d.name == "...":
|
| 222 |
+
continue
|
| 223 |
+
if d.name is not None and d.name not in seen:
|
| 224 |
+
seen.add(d.name)
|
| 225 |
+
deduped_diags.append(d)
|
| 226 |
+
resolved = [resolve_partial(partial) for partial in deduped_diags]
|
| 227 |
+
else:
|
| 228 |
+
# special case - if just one diagram, always display it, even if
|
| 229 |
+
# it has no name
|
| 230 |
+
resolved = [resolve_partial(partial) for partial in diags]
|
| 231 |
+
return sorted(resolved, key=lambda diag: diag.index)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
def _should_vertical(
|
| 235 |
+
specification: int, exprs: Iterable[pyparsing.ParserElement]
|
| 236 |
+
) -> bool:
|
| 237 |
+
"""
|
| 238 |
+
Returns true if we should return a vertical list of elements
|
| 239 |
+
"""
|
| 240 |
+
if specification is None:
|
| 241 |
+
return False
|
| 242 |
+
else:
|
| 243 |
+
return len(_visible_exprs(exprs)) >= specification
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
class ElementState:
|
| 247 |
+
"""
|
| 248 |
+
State recorded for an individual pyparsing Element
|
| 249 |
+
"""
|
| 250 |
+
|
| 251 |
+
# Note: this should be a dataclass, but we have to support Python 3.5
|
| 252 |
+
def __init__(
|
| 253 |
+
self,
|
| 254 |
+
element: pyparsing.ParserElement,
|
| 255 |
+
converted: EditablePartial,
|
| 256 |
+
parent: EditablePartial,
|
| 257 |
+
number: int,
|
| 258 |
+
name: str = None,
|
| 259 |
+
parent_index: typing.Optional[int] = None,
|
| 260 |
+
):
|
| 261 |
+
#: The pyparsing element that this represents
|
| 262 |
+
self.element: pyparsing.ParserElement = element
|
| 263 |
+
#: The name of the element
|
| 264 |
+
self.name: typing.Optional[str] = name
|
| 265 |
+
#: The output Railroad element in an unconverted state
|
| 266 |
+
self.converted: EditablePartial = converted
|
| 267 |
+
#: The parent Railroad element, which we store so that we can extract this if it's duplicated
|
| 268 |
+
self.parent: EditablePartial = parent
|
| 269 |
+
#: The order in which we found this element, used for sorting diagrams if this is extracted into a diagram
|
| 270 |
+
self.number: int = number
|
| 271 |
+
#: The index of this inside its parent
|
| 272 |
+
self.parent_index: typing.Optional[int] = parent_index
|
| 273 |
+
#: If true, we should extract this out into a subdiagram
|
| 274 |
+
self.extract: bool = False
|
| 275 |
+
#: If true, all of this element's children have been filled out
|
| 276 |
+
self.complete: bool = False
|
| 277 |
+
|
| 278 |
+
def mark_for_extraction(
|
| 279 |
+
self, el_id: int, state: "ConverterState", name: str = None, force: bool = False
|
| 280 |
+
):
|
| 281 |
+
"""
|
| 282 |
+
Called when this instance has been seen twice, and thus should eventually be extracted into a sub-diagram
|
| 283 |
+
:param el_id: id of the element
|
| 284 |
+
:param state: element/diagram state tracker
|
| 285 |
+
:param name: name to use for this element's text
|
| 286 |
+
:param force: If true, force extraction now, regardless of the state of this. Only useful for extracting the
|
| 287 |
+
root element when we know we're finished
|
| 288 |
+
"""
|
| 289 |
+
self.extract = True
|
| 290 |
+
|
| 291 |
+
# Set the name
|
| 292 |
+
if not self.name:
|
| 293 |
+
if name:
|
| 294 |
+
# Allow forcing a custom name
|
| 295 |
+
self.name = name
|
| 296 |
+
elif self.element.customName:
|
| 297 |
+
self.name = self.element.customName
|
| 298 |
+
else:
|
| 299 |
+
self.name = ""
|
| 300 |
+
|
| 301 |
+
# Just because this is marked for extraction doesn't mean we can do it yet. We may have to wait for children
|
| 302 |
+
# to be added
|
| 303 |
+
# Also, if this is just a string literal etc, don't bother extracting it
|
| 304 |
+
if force or (self.complete and _worth_extracting(self.element)):
|
| 305 |
+
state.extract_into_diagram(el_id)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
class ConverterState:
|
| 309 |
+
"""
|
| 310 |
+
Stores some state that persists between recursions into the element tree
|
| 311 |
+
"""
|
| 312 |
+
|
| 313 |
+
def __init__(self, diagram_kwargs: typing.Optional[dict] = None):
|
| 314 |
+
#: A dictionary mapping ParserElements to state relating to them
|
| 315 |
+
self._element_diagram_states: Dict[int, ElementState] = {}
|
| 316 |
+
#: A dictionary mapping ParserElement IDs to subdiagrams generated from them
|
| 317 |
+
self.diagrams: Dict[int, EditablePartial[NamedDiagram]] = {}
|
| 318 |
+
#: The index of the next unnamed element
|
| 319 |
+
self.unnamed_index: int = 1
|
| 320 |
+
#: The index of the next element. This is used for sorting
|
| 321 |
+
self.index: int = 0
|
| 322 |
+
#: Shared kwargs that are used to customize the construction of diagrams
|
| 323 |
+
self.diagram_kwargs: dict = diagram_kwargs or {}
|
| 324 |
+
self.extracted_diagram_names: Set[str] = set()
|
| 325 |
+
|
| 326 |
+
def __setitem__(self, key: int, value: ElementState):
|
| 327 |
+
self._element_diagram_states[key] = value
|
| 328 |
+
|
| 329 |
+
def __getitem__(self, key: int) -> ElementState:
|
| 330 |
+
return self._element_diagram_states[key]
|
| 331 |
+
|
| 332 |
+
def __delitem__(self, key: int):
|
| 333 |
+
del self._element_diagram_states[key]
|
| 334 |
+
|
| 335 |
+
def __contains__(self, key: int):
|
| 336 |
+
return key in self._element_diagram_states
|
| 337 |
+
|
| 338 |
+
def generate_unnamed(self) -> int:
|
| 339 |
+
"""
|
| 340 |
+
Generate a number used in the name of an otherwise unnamed diagram
|
| 341 |
+
"""
|
| 342 |
+
self.unnamed_index += 1
|
| 343 |
+
return self.unnamed_index
|
| 344 |
+
|
| 345 |
+
def generate_index(self) -> int:
|
| 346 |
+
"""
|
| 347 |
+
Generate a number used to index a diagram
|
| 348 |
+
"""
|
| 349 |
+
self.index += 1
|
| 350 |
+
return self.index
|
| 351 |
+
|
| 352 |
+
def extract_into_diagram(self, el_id: int):
|
| 353 |
+
"""
|
| 354 |
+
Used when we encounter the same token twice in the same tree. When this
|
| 355 |
+
happens, we replace all instances of that token with a terminal, and
|
| 356 |
+
create a new subdiagram for the token
|
| 357 |
+
"""
|
| 358 |
+
position = self[el_id]
|
| 359 |
+
|
| 360 |
+
# Replace the original definition of this element with a regular block
|
| 361 |
+
if position.parent:
|
| 362 |
+
ret = EditablePartial.from_call(railroad.NonTerminal, text=position.name)
|
| 363 |
+
if "item" in position.parent.kwargs:
|
| 364 |
+
position.parent.kwargs["item"] = ret
|
| 365 |
+
elif "items" in position.parent.kwargs:
|
| 366 |
+
position.parent.kwargs["items"][position.parent_index] = ret
|
| 367 |
+
|
| 368 |
+
# If the element we're extracting is a group, skip to its content but keep the title
|
| 369 |
+
if position.converted.func == railroad.Group:
|
| 370 |
+
content = position.converted.kwargs["item"]
|
| 371 |
+
else:
|
| 372 |
+
content = position.converted
|
| 373 |
+
|
| 374 |
+
self.diagrams[el_id] = EditablePartial.from_call(
|
| 375 |
+
NamedDiagram,
|
| 376 |
+
name=position.name,
|
| 377 |
+
diagram=EditablePartial.from_call(
|
| 378 |
+
railroad.Diagram, content, **self.diagram_kwargs
|
| 379 |
+
),
|
| 380 |
+
index=position.number,
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
del self[el_id]
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
def _worth_extracting(element: pyparsing.ParserElement) -> bool:
|
| 387 |
+
"""
|
| 388 |
+
Returns true if this element is worth having its own sub-diagram. Simply, if any of its children
|
| 389 |
+
themselves have children, then its complex enough to extract
|
| 390 |
+
"""
|
| 391 |
+
children = element.recurse()
|
| 392 |
+
return any(child.recurse() for child in children)
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
def _apply_diagram_item_enhancements(fn):
|
| 396 |
+
"""
|
| 397 |
+
decorator to ensure enhancements to a diagram item (such as results name annotations)
|
| 398 |
+
get applied on return from _to_diagram_element (we do this since there are several
|
| 399 |
+
returns in _to_diagram_element)
|
| 400 |
+
"""
|
| 401 |
+
|
| 402 |
+
def _inner(
|
| 403 |
+
element: pyparsing.ParserElement,
|
| 404 |
+
parent: typing.Optional[EditablePartial],
|
| 405 |
+
lookup: ConverterState = None,
|
| 406 |
+
vertical: int = None,
|
| 407 |
+
index: int = 0,
|
| 408 |
+
name_hint: str = None,
|
| 409 |
+
show_results_names: bool = False,
|
| 410 |
+
show_groups: bool = False,
|
| 411 |
+
) -> typing.Optional[EditablePartial]:
|
| 412 |
+
ret = fn(
|
| 413 |
+
element,
|
| 414 |
+
parent,
|
| 415 |
+
lookup,
|
| 416 |
+
vertical,
|
| 417 |
+
index,
|
| 418 |
+
name_hint,
|
| 419 |
+
show_results_names,
|
| 420 |
+
show_groups,
|
| 421 |
+
)
|
| 422 |
+
|
| 423 |
+
# apply annotation for results name, if present
|
| 424 |
+
if show_results_names and ret is not None:
|
| 425 |
+
element_results_name = element.resultsName
|
| 426 |
+
if element_results_name:
|
| 427 |
+
# add "*" to indicate if this is a "list all results" name
|
| 428 |
+
element_results_name += "" if element.modalResults else "*"
|
| 429 |
+
ret = EditablePartial.from_call(
|
| 430 |
+
railroad.Group, item=ret, label=element_results_name
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
return ret
|
| 434 |
+
|
| 435 |
+
return _inner
|
| 436 |
+
|
| 437 |
+
|
| 438 |
+
def _visible_exprs(exprs: Iterable[pyparsing.ParserElement]):
|
| 439 |
+
non_diagramming_exprs = (
|
| 440 |
+
pyparsing.ParseElementEnhance,
|
| 441 |
+
pyparsing.PositionToken,
|
| 442 |
+
pyparsing.And._ErrorStop,
|
| 443 |
+
)
|
| 444 |
+
return [
|
| 445 |
+
e
|
| 446 |
+
for e in exprs
|
| 447 |
+
if not (e.customName or e.resultsName or isinstance(e, non_diagramming_exprs))
|
| 448 |
+
]
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
@_apply_diagram_item_enhancements
|
| 452 |
+
def _to_diagram_element(
|
| 453 |
+
element: pyparsing.ParserElement,
|
| 454 |
+
parent: typing.Optional[EditablePartial],
|
| 455 |
+
lookup: ConverterState = None,
|
| 456 |
+
vertical: int = None,
|
| 457 |
+
index: int = 0,
|
| 458 |
+
name_hint: str = None,
|
| 459 |
+
show_results_names: bool = False,
|
| 460 |
+
show_groups: bool = False,
|
| 461 |
+
) -> typing.Optional[EditablePartial]:
|
| 462 |
+
"""
|
| 463 |
+
Recursively converts a PyParsing Element to a railroad Element
|
| 464 |
+
:param lookup: The shared converter state that keeps track of useful things
|
| 465 |
+
:param index: The index of this element within the parent
|
| 466 |
+
:param parent: The parent of this element in the output tree
|
| 467 |
+
:param vertical: Controls at what point we make a list of elements vertical. If this is an integer (the default),
|
| 468 |
+
it sets the threshold of the number of items before we go vertical. If True, always go vertical, if False, never
|
| 469 |
+
do so
|
| 470 |
+
:param name_hint: If provided, this will override the generated name
|
| 471 |
+
:param show_results_names: bool flag indicating whether to add annotations for results names
|
| 472 |
+
:returns: The converted version of the input element, but as a Partial that hasn't yet been constructed
|
| 473 |
+
:param show_groups: bool flag indicating whether to show groups using bounding box
|
| 474 |
+
"""
|
| 475 |
+
exprs = element.recurse()
|
| 476 |
+
name = name_hint or element.customName or element.__class__.__name__
|
| 477 |
+
|
| 478 |
+
# Python's id() is used to provide a unique identifier for elements
|
| 479 |
+
el_id = id(element)
|
| 480 |
+
|
| 481 |
+
element_results_name = element.resultsName
|
| 482 |
+
|
| 483 |
+
# Here we basically bypass processing certain wrapper elements if they contribute nothing to the diagram
|
| 484 |
+
if not element.customName:
|
| 485 |
+
if isinstance(
|
| 486 |
+
element,
|
| 487 |
+
(
|
| 488 |
+
# pyparsing.TokenConverter,
|
| 489 |
+
# pyparsing.Forward,
|
| 490 |
+
pyparsing.Located,
|
| 491 |
+
),
|
| 492 |
+
):
|
| 493 |
+
# However, if this element has a useful custom name, and its child does not, we can pass it on to the child
|
| 494 |
+
if exprs:
|
| 495 |
+
if not exprs[0].customName:
|
| 496 |
+
propagated_name = name
|
| 497 |
+
else:
|
| 498 |
+
propagated_name = None
|
| 499 |
+
|
| 500 |
+
return _to_diagram_element(
|
| 501 |
+
element.expr,
|
| 502 |
+
parent=parent,
|
| 503 |
+
lookup=lookup,
|
| 504 |
+
vertical=vertical,
|
| 505 |
+
index=index,
|
| 506 |
+
name_hint=propagated_name,
|
| 507 |
+
show_results_names=show_results_names,
|
| 508 |
+
show_groups=show_groups,
|
| 509 |
+
)
|
| 510 |
+
|
| 511 |
+
# If the element isn't worth extracting, we always treat it as the first time we say it
|
| 512 |
+
if _worth_extracting(element):
|
| 513 |
+
if el_id in lookup:
|
| 514 |
+
# If we've seen this element exactly once before, we are only just now finding out that it's a duplicate,
|
| 515 |
+
# so we have to extract it into a new diagram.
|
| 516 |
+
looked_up = lookup[el_id]
|
| 517 |
+
looked_up.mark_for_extraction(el_id, lookup, name=name_hint)
|
| 518 |
+
ret = EditablePartial.from_call(railroad.NonTerminal, text=looked_up.name)
|
| 519 |
+
return ret
|
| 520 |
+
|
| 521 |
+
elif el_id in lookup.diagrams:
|
| 522 |
+
# If we have seen the element at least twice before, and have already extracted it into a subdiagram, we
|
| 523 |
+
# just put in a marker element that refers to the sub-diagram
|
| 524 |
+
ret = EditablePartial.from_call(
|
| 525 |
+
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
| 526 |
+
)
|
| 527 |
+
return ret
|
| 528 |
+
|
| 529 |
+
# Recursively convert child elements
|
| 530 |
+
# Here we find the most relevant Railroad element for matching pyparsing Element
|
| 531 |
+
# We use ``items=[]`` here to hold the place for where the child elements will go once created
|
| 532 |
+
if isinstance(element, pyparsing.And):
|
| 533 |
+
# detect And's created with ``expr*N`` notation - for these use a OneOrMore with a repeat
|
| 534 |
+
# (all will have the same name, and resultsName)
|
| 535 |
+
if not exprs:
|
| 536 |
+
return None
|
| 537 |
+
if len(set((e.name, e.resultsName) for e in exprs)) == 1:
|
| 538 |
+
ret = EditablePartial.from_call(
|
| 539 |
+
railroad.OneOrMore, item="", repeat=str(len(exprs))
|
| 540 |
+
)
|
| 541 |
+
elif _should_vertical(vertical, exprs):
|
| 542 |
+
ret = EditablePartial.from_call(railroad.Stack, items=[])
|
| 543 |
+
else:
|
| 544 |
+
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
| 545 |
+
elif isinstance(element, (pyparsing.Or, pyparsing.MatchFirst)):
|
| 546 |
+
if not exprs:
|
| 547 |
+
return None
|
| 548 |
+
if _should_vertical(vertical, exprs):
|
| 549 |
+
ret = EditablePartial.from_call(railroad.Choice, 0, items=[])
|
| 550 |
+
else:
|
| 551 |
+
ret = EditablePartial.from_call(railroad.HorizontalChoice, items=[])
|
| 552 |
+
elif isinstance(element, pyparsing.Each):
|
| 553 |
+
if not exprs:
|
| 554 |
+
return None
|
| 555 |
+
ret = EditablePartial.from_call(EachItem, items=[])
|
| 556 |
+
elif isinstance(element, pyparsing.NotAny):
|
| 557 |
+
ret = EditablePartial.from_call(AnnotatedItem, label="NOT", item="")
|
| 558 |
+
elif isinstance(element, pyparsing.FollowedBy):
|
| 559 |
+
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKAHEAD", item="")
|
| 560 |
+
elif isinstance(element, pyparsing.PrecededBy):
|
| 561 |
+
ret = EditablePartial.from_call(AnnotatedItem, label="LOOKBEHIND", item="")
|
| 562 |
+
elif isinstance(element, pyparsing.Group):
|
| 563 |
+
if show_groups:
|
| 564 |
+
ret = EditablePartial.from_call(AnnotatedItem, label="", item="")
|
| 565 |
+
else:
|
| 566 |
+
ret = EditablePartial.from_call(railroad.Group, label="", item="")
|
| 567 |
+
elif isinstance(element, pyparsing.TokenConverter):
|
| 568 |
+
label = type(element).__name__.lower()
|
| 569 |
+
if label == "tokenconverter":
|
| 570 |
+
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
| 571 |
+
else:
|
| 572 |
+
ret = EditablePartial.from_call(AnnotatedItem, label=label, item="")
|
| 573 |
+
elif isinstance(element, pyparsing.Opt):
|
| 574 |
+
ret = EditablePartial.from_call(railroad.Optional, item="")
|
| 575 |
+
elif isinstance(element, pyparsing.OneOrMore):
|
| 576 |
+
ret = EditablePartial.from_call(railroad.OneOrMore, item="")
|
| 577 |
+
elif isinstance(element, pyparsing.ZeroOrMore):
|
| 578 |
+
ret = EditablePartial.from_call(railroad.ZeroOrMore, item="")
|
| 579 |
+
elif isinstance(element, pyparsing.Group):
|
| 580 |
+
ret = EditablePartial.from_call(
|
| 581 |
+
railroad.Group, item=None, label=element_results_name
|
| 582 |
+
)
|
| 583 |
+
elif isinstance(element, pyparsing.Empty) and not element.customName:
|
| 584 |
+
# Skip unnamed "Empty" elements
|
| 585 |
+
ret = None
|
| 586 |
+
elif isinstance(element, pyparsing.ParseElementEnhance):
|
| 587 |
+
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
| 588 |
+
elif len(exprs) > 0 and not element_results_name:
|
| 589 |
+
ret = EditablePartial.from_call(railroad.Group, item="", label=name)
|
| 590 |
+
elif len(exprs) > 0:
|
| 591 |
+
ret = EditablePartial.from_call(railroad.Sequence, items=[])
|
| 592 |
+
else:
|
| 593 |
+
terminal = EditablePartial.from_call(railroad.Terminal, element.defaultName)
|
| 594 |
+
ret = terminal
|
| 595 |
+
|
| 596 |
+
if ret is None:
|
| 597 |
+
return
|
| 598 |
+
|
| 599 |
+
# Indicate this element's position in the tree so we can extract it if necessary
|
| 600 |
+
lookup[el_id] = ElementState(
|
| 601 |
+
element=element,
|
| 602 |
+
converted=ret,
|
| 603 |
+
parent=parent,
|
| 604 |
+
parent_index=index,
|
| 605 |
+
number=lookup.generate_index(),
|
| 606 |
+
)
|
| 607 |
+
if element.customName:
|
| 608 |
+
lookup[el_id].mark_for_extraction(el_id, lookup, element.customName)
|
| 609 |
+
|
| 610 |
+
i = 0
|
| 611 |
+
for expr in exprs:
|
| 612 |
+
# Add a placeholder index in case we have to extract the child before we even add it to the parent
|
| 613 |
+
if "items" in ret.kwargs:
|
| 614 |
+
ret.kwargs["items"].insert(i, None)
|
| 615 |
+
|
| 616 |
+
item = _to_diagram_element(
|
| 617 |
+
expr,
|
| 618 |
+
parent=ret,
|
| 619 |
+
lookup=lookup,
|
| 620 |
+
vertical=vertical,
|
| 621 |
+
index=i,
|
| 622 |
+
show_results_names=show_results_names,
|
| 623 |
+
show_groups=show_groups,
|
| 624 |
+
)
|
| 625 |
+
|
| 626 |
+
# Some elements don't need to be shown in the diagram
|
| 627 |
+
if item is not None:
|
| 628 |
+
if "item" in ret.kwargs:
|
| 629 |
+
ret.kwargs["item"] = item
|
| 630 |
+
elif "items" in ret.kwargs:
|
| 631 |
+
# If we've already extracted the child, don't touch this index, since it's occupied by a nonterminal
|
| 632 |
+
ret.kwargs["items"][i] = item
|
| 633 |
+
i += 1
|
| 634 |
+
elif "items" in ret.kwargs:
|
| 635 |
+
# If we're supposed to skip this element, remove it from the parent
|
| 636 |
+
del ret.kwargs["items"][i]
|
| 637 |
+
|
| 638 |
+
# If all this items children are none, skip this item
|
| 639 |
+
if ret and (
|
| 640 |
+
("items" in ret.kwargs and len(ret.kwargs["items"]) == 0)
|
| 641 |
+
or ("item" in ret.kwargs and ret.kwargs["item"] is None)
|
| 642 |
+
):
|
| 643 |
+
ret = EditablePartial.from_call(railroad.Terminal, name)
|
| 644 |
+
|
| 645 |
+
# Mark this element as "complete", ie it has all of its children
|
| 646 |
+
if el_id in lookup:
|
| 647 |
+
lookup[el_id].complete = True
|
| 648 |
+
|
| 649 |
+
if el_id in lookup and lookup[el_id].extract and lookup[el_id].complete:
|
| 650 |
+
lookup.extract_into_diagram(el_id)
|
| 651 |
+
if ret is not None:
|
| 652 |
+
ret = EditablePartial.from_call(
|
| 653 |
+
railroad.NonTerminal, text=lookup.diagrams[el_id].kwargs["name"]
|
| 654 |
+
)
|
| 655 |
+
|
| 656 |
+
return ret
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/exceptions.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# exceptions.py
|
| 2 |
+
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
import typing
|
| 6 |
+
|
| 7 |
+
from .util import (
|
| 8 |
+
col,
|
| 9 |
+
line,
|
| 10 |
+
lineno,
|
| 11 |
+
_collapse_string_to_ranges,
|
| 12 |
+
replaced_by_pep8,
|
| 13 |
+
)
|
| 14 |
+
from .unicode import pyparsing_unicode as ppu
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class ExceptionWordUnicode(ppu.Latin1, ppu.LatinA, ppu.LatinB, ppu.Greek, ppu.Cyrillic):
|
| 18 |
+
pass
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
_extract_alphanums = _collapse_string_to_ranges(ExceptionWordUnicode.alphanums)
|
| 22 |
+
_exception_word_extractor = re.compile("([" + _extract_alphanums + "]{1,16})|.")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class ParseBaseException(Exception):
|
| 26 |
+
"""base exception class for all parsing runtime exceptions"""
|
| 27 |
+
|
| 28 |
+
loc: int
|
| 29 |
+
msg: str
|
| 30 |
+
pstr: str
|
| 31 |
+
parser_element: typing.Any # "ParserElement"
|
| 32 |
+
args: typing.Tuple[str, int, typing.Optional[str]]
|
| 33 |
+
|
| 34 |
+
__slots__ = (
|
| 35 |
+
"loc",
|
| 36 |
+
"msg",
|
| 37 |
+
"pstr",
|
| 38 |
+
"parser_element",
|
| 39 |
+
"args",
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
# Performance tuning: we construct a *lot* of these, so keep this
|
| 43 |
+
# constructor as small and fast as possible
|
| 44 |
+
def __init__(
|
| 45 |
+
self,
|
| 46 |
+
pstr: str,
|
| 47 |
+
loc: int = 0,
|
| 48 |
+
msg: typing.Optional[str] = None,
|
| 49 |
+
elem=None,
|
| 50 |
+
):
|
| 51 |
+
self.loc = loc
|
| 52 |
+
if msg is None:
|
| 53 |
+
self.msg = pstr
|
| 54 |
+
self.pstr = ""
|
| 55 |
+
else:
|
| 56 |
+
self.msg = msg
|
| 57 |
+
self.pstr = pstr
|
| 58 |
+
self.parser_element = elem
|
| 59 |
+
self.args = (pstr, loc, msg)
|
| 60 |
+
|
| 61 |
+
@staticmethod
|
| 62 |
+
def explain_exception(exc, depth=16):
|
| 63 |
+
"""
|
| 64 |
+
Method to take an exception and translate the Python internal traceback into a list
|
| 65 |
+
of the pyparsing expressions that caused the exception to be raised.
|
| 66 |
+
|
| 67 |
+
Parameters:
|
| 68 |
+
|
| 69 |
+
- exc - exception raised during parsing (need not be a ParseException, in support
|
| 70 |
+
of Python exceptions that might be raised in a parse action)
|
| 71 |
+
- depth (default=16) - number of levels back in the stack trace to list expression
|
| 72 |
+
and function names; if None, the full stack trace names will be listed; if 0, only
|
| 73 |
+
the failing input line, marker, and exception string will be shown
|
| 74 |
+
|
| 75 |
+
Returns a multi-line string listing the ParserElements and/or function names in the
|
| 76 |
+
exception's stack trace.
|
| 77 |
+
"""
|
| 78 |
+
import inspect
|
| 79 |
+
from .core import ParserElement
|
| 80 |
+
|
| 81 |
+
if depth is None:
|
| 82 |
+
depth = sys.getrecursionlimit()
|
| 83 |
+
ret = []
|
| 84 |
+
if isinstance(exc, ParseBaseException):
|
| 85 |
+
ret.append(exc.line)
|
| 86 |
+
ret.append(" " * (exc.column - 1) + "^")
|
| 87 |
+
ret.append(f"{type(exc).__name__}: {exc}")
|
| 88 |
+
|
| 89 |
+
if depth > 0:
|
| 90 |
+
callers = inspect.getinnerframes(exc.__traceback__, context=depth)
|
| 91 |
+
seen = set()
|
| 92 |
+
for i, ff in enumerate(callers[-depth:]):
|
| 93 |
+
frm = ff[0]
|
| 94 |
+
|
| 95 |
+
f_self = frm.f_locals.get("self", None)
|
| 96 |
+
if isinstance(f_self, ParserElement):
|
| 97 |
+
if not frm.f_code.co_name.startswith(
|
| 98 |
+
("parseImpl", "_parseNoCache")
|
| 99 |
+
):
|
| 100 |
+
continue
|
| 101 |
+
if id(f_self) in seen:
|
| 102 |
+
continue
|
| 103 |
+
seen.add(id(f_self))
|
| 104 |
+
|
| 105 |
+
self_type = type(f_self)
|
| 106 |
+
ret.append(
|
| 107 |
+
f"{self_type.__module__}.{self_type.__name__} - {f_self}"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
elif f_self is not None:
|
| 111 |
+
self_type = type(f_self)
|
| 112 |
+
ret.append(f"{self_type.__module__}.{self_type.__name__}")
|
| 113 |
+
|
| 114 |
+
else:
|
| 115 |
+
code = frm.f_code
|
| 116 |
+
if code.co_name in ("wrapper", "<module>"):
|
| 117 |
+
continue
|
| 118 |
+
|
| 119 |
+
ret.append(code.co_name)
|
| 120 |
+
|
| 121 |
+
depth -= 1
|
| 122 |
+
if not depth:
|
| 123 |
+
break
|
| 124 |
+
|
| 125 |
+
return "\n".join(ret)
|
| 126 |
+
|
| 127 |
+
@classmethod
|
| 128 |
+
def _from_exception(cls, pe):
|
| 129 |
+
"""
|
| 130 |
+
internal factory method to simplify creating one type of ParseException
|
| 131 |
+
from another - avoids having __init__ signature conflicts among subclasses
|
| 132 |
+
"""
|
| 133 |
+
return cls(pe.pstr, pe.loc, pe.msg, pe.parser_element)
|
| 134 |
+
|
| 135 |
+
@property
|
| 136 |
+
def line(self) -> str:
|
| 137 |
+
"""
|
| 138 |
+
Return the line of text where the exception occurred.
|
| 139 |
+
"""
|
| 140 |
+
return line(self.loc, self.pstr)
|
| 141 |
+
|
| 142 |
+
@property
|
| 143 |
+
def lineno(self) -> int:
|
| 144 |
+
"""
|
| 145 |
+
Return the 1-based line number of text where the exception occurred.
|
| 146 |
+
"""
|
| 147 |
+
return lineno(self.loc, self.pstr)
|
| 148 |
+
|
| 149 |
+
@property
|
| 150 |
+
def col(self) -> int:
|
| 151 |
+
"""
|
| 152 |
+
Return the 1-based column on the line of text where the exception occurred.
|
| 153 |
+
"""
|
| 154 |
+
return col(self.loc, self.pstr)
|
| 155 |
+
|
| 156 |
+
@property
|
| 157 |
+
def column(self) -> int:
|
| 158 |
+
"""
|
| 159 |
+
Return the 1-based column on the line of text where the exception occurred.
|
| 160 |
+
"""
|
| 161 |
+
return col(self.loc, self.pstr)
|
| 162 |
+
|
| 163 |
+
# pre-PEP8 compatibility
|
| 164 |
+
@property
|
| 165 |
+
def parserElement(self):
|
| 166 |
+
return self.parser_element
|
| 167 |
+
|
| 168 |
+
@parserElement.setter
|
| 169 |
+
def parserElement(self, elem):
|
| 170 |
+
self.parser_element = elem
|
| 171 |
+
|
| 172 |
+
def __str__(self) -> str:
|
| 173 |
+
if self.pstr:
|
| 174 |
+
if self.loc >= len(self.pstr):
|
| 175 |
+
foundstr = ", found end of text"
|
| 176 |
+
else:
|
| 177 |
+
# pull out next word at error location
|
| 178 |
+
found_match = _exception_word_extractor.match(self.pstr, self.loc)
|
| 179 |
+
if found_match is not None:
|
| 180 |
+
found = found_match.group(0)
|
| 181 |
+
else:
|
| 182 |
+
found = self.pstr[self.loc : self.loc + 1]
|
| 183 |
+
foundstr = (", found %r" % found).replace(r"\\", "\\")
|
| 184 |
+
else:
|
| 185 |
+
foundstr = ""
|
| 186 |
+
return f"{self.msg}{foundstr} (at char {self.loc}), (line:{self.lineno}, col:{self.column})"
|
| 187 |
+
|
| 188 |
+
def __repr__(self):
|
| 189 |
+
return str(self)
|
| 190 |
+
|
| 191 |
+
def mark_input_line(
|
| 192 |
+
self, marker_string: typing.Optional[str] = None, *, markerString: str = ">!<"
|
| 193 |
+
) -> str:
|
| 194 |
+
"""
|
| 195 |
+
Extracts the exception line from the input string, and marks
|
| 196 |
+
the location of the exception with a special symbol.
|
| 197 |
+
"""
|
| 198 |
+
markerString = marker_string if marker_string is not None else markerString
|
| 199 |
+
line_str = self.line
|
| 200 |
+
line_column = self.column - 1
|
| 201 |
+
if markerString:
|
| 202 |
+
line_str = "".join(
|
| 203 |
+
(line_str[:line_column], markerString, line_str[line_column:])
|
| 204 |
+
)
|
| 205 |
+
return line_str.strip()
|
| 206 |
+
|
| 207 |
+
def explain(self, depth=16) -> str:
|
| 208 |
+
"""
|
| 209 |
+
Method to translate the Python internal traceback into a list
|
| 210 |
+
of the pyparsing expressions that caused the exception to be raised.
|
| 211 |
+
|
| 212 |
+
Parameters:
|
| 213 |
+
|
| 214 |
+
- depth (default=16) - number of levels back in the stack trace to list expression
|
| 215 |
+
and function names; if None, the full stack trace names will be listed; if 0, only
|
| 216 |
+
the failing input line, marker, and exception string will be shown
|
| 217 |
+
|
| 218 |
+
Returns a multi-line string listing the ParserElements and/or function names in the
|
| 219 |
+
exception's stack trace.
|
| 220 |
+
|
| 221 |
+
Example::
|
| 222 |
+
|
| 223 |
+
expr = pp.Word(pp.nums) * 3
|
| 224 |
+
try:
|
| 225 |
+
expr.parse_string("123 456 A789")
|
| 226 |
+
except pp.ParseException as pe:
|
| 227 |
+
print(pe.explain(depth=0))
|
| 228 |
+
|
| 229 |
+
prints::
|
| 230 |
+
|
| 231 |
+
123 456 A789
|
| 232 |
+
^
|
| 233 |
+
ParseException: Expected W:(0-9), found 'A' (at char 8), (line:1, col:9)
|
| 234 |
+
|
| 235 |
+
Note: the diagnostic output will include string representations of the expressions
|
| 236 |
+
that failed to parse. These representations will be more helpful if you use `set_name` to
|
| 237 |
+
give identifiable names to your expressions. Otherwise they will use the default string
|
| 238 |
+
forms, which may be cryptic to read.
|
| 239 |
+
|
| 240 |
+
Note: pyparsing's default truncation of exception tracebacks may also truncate the
|
| 241 |
+
stack of expressions that are displayed in the ``explain`` output. To get the full listing
|
| 242 |
+
of parser expressions, you may have to set ``ParserElement.verbose_stacktrace = True``
|
| 243 |
+
"""
|
| 244 |
+
return self.explain_exception(self, depth)
|
| 245 |
+
|
| 246 |
+
# fmt: off
|
| 247 |
+
@replaced_by_pep8(mark_input_line)
|
| 248 |
+
def markInputline(self): ...
|
| 249 |
+
# fmt: on
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
class ParseException(ParseBaseException):
|
| 253 |
+
"""
|
| 254 |
+
Exception thrown when a parse expression doesn't match the input string
|
| 255 |
+
|
| 256 |
+
Example::
|
| 257 |
+
|
| 258 |
+
try:
|
| 259 |
+
Word(nums).set_name("integer").parse_string("ABC")
|
| 260 |
+
except ParseException as pe:
|
| 261 |
+
print(pe)
|
| 262 |
+
print("column: {}".format(pe.column))
|
| 263 |
+
|
| 264 |
+
prints::
|
| 265 |
+
|
| 266 |
+
Expected integer (at char 0), (line:1, col:1)
|
| 267 |
+
column: 1
|
| 268 |
+
|
| 269 |
+
"""
|
| 270 |
+
|
| 271 |
+
|
| 272 |
+
class ParseFatalException(ParseBaseException):
|
| 273 |
+
"""
|
| 274 |
+
User-throwable exception thrown when inconsistent parse content
|
| 275 |
+
is found; stops all parsing immediately
|
| 276 |
+
"""
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
class ParseSyntaxException(ParseFatalException):
|
| 280 |
+
"""
|
| 281 |
+
Just like :class:`ParseFatalException`, but thrown internally
|
| 282 |
+
when an :class:`ErrorStop<And._ErrorStop>` ('-' operator) indicates
|
| 283 |
+
that parsing is to stop immediately because an unbacktrackable
|
| 284 |
+
syntax error has been found.
|
| 285 |
+
"""
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
class RecursiveGrammarException(Exception):
|
| 289 |
+
"""
|
| 290 |
+
Exception thrown by :class:`ParserElement.validate` if the
|
| 291 |
+
grammar could be left-recursive; parser may need to enable
|
| 292 |
+
left recursion using :class:`ParserElement.enable_left_recursion<ParserElement.enable_left_recursion>`
|
| 293 |
+
"""
|
| 294 |
+
|
| 295 |
+
def __init__(self, parseElementList):
|
| 296 |
+
self.parseElementTrace = parseElementList
|
| 297 |
+
|
| 298 |
+
def __str__(self) -> str:
|
| 299 |
+
return f"RecursiveGrammarException: {self.parseElementTrace}"
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/helpers.py
ADDED
|
@@ -0,0 +1,1100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# helpers.py
|
| 2 |
+
import html.entities
|
| 3 |
+
import re
|
| 4 |
+
import sys
|
| 5 |
+
import typing
|
| 6 |
+
|
| 7 |
+
from . import __diag__
|
| 8 |
+
from .core import *
|
| 9 |
+
from .util import (
|
| 10 |
+
_bslash,
|
| 11 |
+
_flatten,
|
| 12 |
+
_escape_regex_range_chars,
|
| 13 |
+
replaced_by_pep8,
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
#
|
| 18 |
+
# global helpers
|
| 19 |
+
#
|
| 20 |
+
def counted_array(
|
| 21 |
+
expr: ParserElement,
|
| 22 |
+
int_expr: typing.Optional[ParserElement] = None,
|
| 23 |
+
*,
|
| 24 |
+
intExpr: typing.Optional[ParserElement] = None,
|
| 25 |
+
) -> ParserElement:
|
| 26 |
+
"""Helper to define a counted list of expressions.
|
| 27 |
+
|
| 28 |
+
This helper defines a pattern of the form::
|
| 29 |
+
|
| 30 |
+
integer expr expr expr...
|
| 31 |
+
|
| 32 |
+
where the leading integer tells how many expr expressions follow.
|
| 33 |
+
The matched tokens returns the array of expr tokens as a list - the
|
| 34 |
+
leading count token is suppressed.
|
| 35 |
+
|
| 36 |
+
If ``int_expr`` is specified, it should be a pyparsing expression
|
| 37 |
+
that produces an integer value.
|
| 38 |
+
|
| 39 |
+
Example::
|
| 40 |
+
|
| 41 |
+
counted_array(Word(alphas)).parse_string('2 ab cd ef') # -> ['ab', 'cd']
|
| 42 |
+
|
| 43 |
+
# in this parser, the leading integer value is given in binary,
|
| 44 |
+
# '10' indicating that 2 values are in the array
|
| 45 |
+
binary_constant = Word('01').set_parse_action(lambda t: int(t[0], 2))
|
| 46 |
+
counted_array(Word(alphas), int_expr=binary_constant).parse_string('10 ab cd ef') # -> ['ab', 'cd']
|
| 47 |
+
|
| 48 |
+
# if other fields must be parsed after the count but before the
|
| 49 |
+
# list items, give the fields results names and they will
|
| 50 |
+
# be preserved in the returned ParseResults:
|
| 51 |
+
count_with_metadata = integer + Word(alphas)("type")
|
| 52 |
+
typed_array = counted_array(Word(alphanums), int_expr=count_with_metadata)("items")
|
| 53 |
+
result = typed_array.parse_string("3 bool True True False")
|
| 54 |
+
print(result.dump())
|
| 55 |
+
|
| 56 |
+
# prints
|
| 57 |
+
# ['True', 'True', 'False']
|
| 58 |
+
# - items: ['True', 'True', 'False']
|
| 59 |
+
# - type: 'bool'
|
| 60 |
+
"""
|
| 61 |
+
intExpr = intExpr or int_expr
|
| 62 |
+
array_expr = Forward()
|
| 63 |
+
|
| 64 |
+
def count_field_parse_action(s, l, t):
|
| 65 |
+
nonlocal array_expr
|
| 66 |
+
n = t[0]
|
| 67 |
+
array_expr <<= (expr * n) if n else Empty()
|
| 68 |
+
# clear list contents, but keep any named results
|
| 69 |
+
del t[:]
|
| 70 |
+
|
| 71 |
+
if intExpr is None:
|
| 72 |
+
intExpr = Word(nums).set_parse_action(lambda t: int(t[0]))
|
| 73 |
+
else:
|
| 74 |
+
intExpr = intExpr.copy()
|
| 75 |
+
intExpr.set_name("arrayLen")
|
| 76 |
+
intExpr.add_parse_action(count_field_parse_action, call_during_try=True)
|
| 77 |
+
return (intExpr + array_expr).set_name("(len) " + str(expr) + "...")
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def match_previous_literal(expr: ParserElement) -> ParserElement:
|
| 81 |
+
"""Helper to define an expression that is indirectly defined from
|
| 82 |
+
the tokens matched in a previous expression, that is, it looks for
|
| 83 |
+
a 'repeat' of a previous expression. For example::
|
| 84 |
+
|
| 85 |
+
first = Word(nums)
|
| 86 |
+
second = match_previous_literal(first)
|
| 87 |
+
match_expr = first + ":" + second
|
| 88 |
+
|
| 89 |
+
will match ``"1:1"``, but not ``"1:2"``. Because this
|
| 90 |
+
matches a previous literal, will also match the leading
|
| 91 |
+
``"1:1"`` in ``"1:10"``. If this is not desired, use
|
| 92 |
+
:class:`match_previous_expr`. Do *not* use with packrat parsing
|
| 93 |
+
enabled.
|
| 94 |
+
"""
|
| 95 |
+
rep = Forward()
|
| 96 |
+
|
| 97 |
+
def copy_token_to_repeater(s, l, t):
|
| 98 |
+
if t:
|
| 99 |
+
if len(t) == 1:
|
| 100 |
+
rep << t[0]
|
| 101 |
+
else:
|
| 102 |
+
# flatten t tokens
|
| 103 |
+
tflat = _flatten(t.as_list())
|
| 104 |
+
rep << And(Literal(tt) for tt in tflat)
|
| 105 |
+
else:
|
| 106 |
+
rep << Empty()
|
| 107 |
+
|
| 108 |
+
expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
|
| 109 |
+
rep.set_name("(prev) " + str(expr))
|
| 110 |
+
return rep
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def match_previous_expr(expr: ParserElement) -> ParserElement:
|
| 114 |
+
"""Helper to define an expression that is indirectly defined from
|
| 115 |
+
the tokens matched in a previous expression, that is, it looks for
|
| 116 |
+
a 'repeat' of a previous expression. For example::
|
| 117 |
+
|
| 118 |
+
first = Word(nums)
|
| 119 |
+
second = match_previous_expr(first)
|
| 120 |
+
match_expr = first + ":" + second
|
| 121 |
+
|
| 122 |
+
will match ``"1:1"``, but not ``"1:2"``. Because this
|
| 123 |
+
matches by expressions, will *not* match the leading ``"1:1"``
|
| 124 |
+
in ``"1:10"``; the expressions are evaluated first, and then
|
| 125 |
+
compared, so ``"1"`` is compared with ``"10"``. Do *not* use
|
| 126 |
+
with packrat parsing enabled.
|
| 127 |
+
"""
|
| 128 |
+
rep = Forward()
|
| 129 |
+
e2 = expr.copy()
|
| 130 |
+
rep <<= e2
|
| 131 |
+
|
| 132 |
+
def copy_token_to_repeater(s, l, t):
|
| 133 |
+
matchTokens = _flatten(t.as_list())
|
| 134 |
+
|
| 135 |
+
def must_match_these_tokens(s, l, t):
|
| 136 |
+
theseTokens = _flatten(t.as_list())
|
| 137 |
+
if theseTokens != matchTokens:
|
| 138 |
+
raise ParseException(
|
| 139 |
+
s, l, f"Expected {matchTokens}, found{theseTokens}"
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
rep.set_parse_action(must_match_these_tokens, callDuringTry=True)
|
| 143 |
+
|
| 144 |
+
expr.add_parse_action(copy_token_to_repeater, callDuringTry=True)
|
| 145 |
+
rep.set_name("(prev) " + str(expr))
|
| 146 |
+
return rep
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def one_of(
|
| 150 |
+
strs: Union[typing.Iterable[str], str],
|
| 151 |
+
caseless: bool = False,
|
| 152 |
+
use_regex: bool = True,
|
| 153 |
+
as_keyword: bool = False,
|
| 154 |
+
*,
|
| 155 |
+
useRegex: bool = True,
|
| 156 |
+
asKeyword: bool = False,
|
| 157 |
+
) -> ParserElement:
|
| 158 |
+
"""Helper to quickly define a set of alternative :class:`Literal` s,
|
| 159 |
+
and makes sure to do longest-first testing when there is a conflict,
|
| 160 |
+
regardless of the input order, but returns
|
| 161 |
+
a :class:`MatchFirst` for best performance.
|
| 162 |
+
|
| 163 |
+
Parameters:
|
| 164 |
+
|
| 165 |
+
- ``strs`` - a string of space-delimited literals, or a collection of
|
| 166 |
+
string literals
|
| 167 |
+
- ``caseless`` - treat all literals as caseless - (default= ``False``)
|
| 168 |
+
- ``use_regex`` - as an optimization, will
|
| 169 |
+
generate a :class:`Regex` object; otherwise, will generate
|
| 170 |
+
a :class:`MatchFirst` object (if ``caseless=True`` or ``as_keyword=True``, or if
|
| 171 |
+
creating a :class:`Regex` raises an exception) - (default= ``True``)
|
| 172 |
+
- ``as_keyword`` - enforce :class:`Keyword`-style matching on the
|
| 173 |
+
generated expressions - (default= ``False``)
|
| 174 |
+
- ``asKeyword`` and ``useRegex`` are retained for pre-PEP8 compatibility,
|
| 175 |
+
but will be removed in a future release
|
| 176 |
+
|
| 177 |
+
Example::
|
| 178 |
+
|
| 179 |
+
comp_oper = one_of("< = > <= >= !=")
|
| 180 |
+
var = Word(alphas)
|
| 181 |
+
number = Word(nums)
|
| 182 |
+
term = var | number
|
| 183 |
+
comparison_expr = term + comp_oper + term
|
| 184 |
+
print(comparison_expr.search_string("B = 12 AA=23 B<=AA AA>12"))
|
| 185 |
+
|
| 186 |
+
prints::
|
| 187 |
+
|
| 188 |
+
[['B', '=', '12'], ['AA', '=', '23'], ['B', '<=', 'AA'], ['AA', '>', '12']]
|
| 189 |
+
"""
|
| 190 |
+
asKeyword = asKeyword or as_keyword
|
| 191 |
+
useRegex = useRegex and use_regex
|
| 192 |
+
|
| 193 |
+
if (
|
| 194 |
+
isinstance(caseless, str_type)
|
| 195 |
+
and __diag__.warn_on_multiple_string_args_to_oneof
|
| 196 |
+
):
|
| 197 |
+
warnings.warn(
|
| 198 |
+
"More than one string argument passed to one_of, pass"
|
| 199 |
+
" choices as a list or space-delimited string",
|
| 200 |
+
stacklevel=2,
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
if caseless:
|
| 204 |
+
isequal = lambda a, b: a.upper() == b.upper()
|
| 205 |
+
masks = lambda a, b: b.upper().startswith(a.upper())
|
| 206 |
+
parseElementClass = CaselessKeyword if asKeyword else CaselessLiteral
|
| 207 |
+
else:
|
| 208 |
+
isequal = lambda a, b: a == b
|
| 209 |
+
masks = lambda a, b: b.startswith(a)
|
| 210 |
+
parseElementClass = Keyword if asKeyword else Literal
|
| 211 |
+
|
| 212 |
+
symbols: List[str] = []
|
| 213 |
+
if isinstance(strs, str_type):
|
| 214 |
+
strs = typing.cast(str, strs)
|
| 215 |
+
symbols = strs.split()
|
| 216 |
+
elif isinstance(strs, Iterable):
|
| 217 |
+
symbols = list(strs)
|
| 218 |
+
else:
|
| 219 |
+
raise TypeError("Invalid argument to one_of, expected string or iterable")
|
| 220 |
+
if not symbols:
|
| 221 |
+
return NoMatch()
|
| 222 |
+
|
| 223 |
+
# reorder given symbols to take care to avoid masking longer choices with shorter ones
|
| 224 |
+
# (but only if the given symbols are not just single characters)
|
| 225 |
+
if any(len(sym) > 1 for sym in symbols):
|
| 226 |
+
i = 0
|
| 227 |
+
while i < len(symbols) - 1:
|
| 228 |
+
cur = symbols[i]
|
| 229 |
+
for j, other in enumerate(symbols[i + 1 :]):
|
| 230 |
+
if isequal(other, cur):
|
| 231 |
+
del symbols[i + j + 1]
|
| 232 |
+
break
|
| 233 |
+
elif masks(cur, other):
|
| 234 |
+
del symbols[i + j + 1]
|
| 235 |
+
symbols.insert(i, other)
|
| 236 |
+
break
|
| 237 |
+
else:
|
| 238 |
+
i += 1
|
| 239 |
+
|
| 240 |
+
if useRegex:
|
| 241 |
+
re_flags: int = re.IGNORECASE if caseless else 0
|
| 242 |
+
|
| 243 |
+
try:
|
| 244 |
+
if all(len(sym) == 1 for sym in symbols):
|
| 245 |
+
# symbols are just single characters, create range regex pattern
|
| 246 |
+
patt = f"[{''.join(_escape_regex_range_chars(sym) for sym in symbols)}]"
|
| 247 |
+
else:
|
| 248 |
+
patt = "|".join(re.escape(sym) for sym in symbols)
|
| 249 |
+
|
| 250 |
+
# wrap with \b word break markers if defining as keywords
|
| 251 |
+
if asKeyword:
|
| 252 |
+
patt = rf"\b(?:{patt})\b"
|
| 253 |
+
|
| 254 |
+
ret = Regex(patt, flags=re_flags).set_name(" | ".join(symbols))
|
| 255 |
+
|
| 256 |
+
if caseless:
|
| 257 |
+
# add parse action to return symbols as specified, not in random
|
| 258 |
+
# casing as found in input string
|
| 259 |
+
symbol_map = {sym.lower(): sym for sym in symbols}
|
| 260 |
+
ret.add_parse_action(lambda s, l, t: symbol_map[t[0].lower()])
|
| 261 |
+
|
| 262 |
+
return ret
|
| 263 |
+
|
| 264 |
+
except re.error:
|
| 265 |
+
warnings.warn(
|
| 266 |
+
"Exception creating Regex for one_of, building MatchFirst", stacklevel=2
|
| 267 |
+
)
|
| 268 |
+
|
| 269 |
+
# last resort, just use MatchFirst
|
| 270 |
+
return MatchFirst(parseElementClass(sym) for sym in symbols).set_name(
|
| 271 |
+
" | ".join(symbols)
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def dict_of(key: ParserElement, value: ParserElement) -> ParserElement:
|
| 276 |
+
"""Helper to easily and clearly define a dictionary by specifying
|
| 277 |
+
the respective patterns for the key and value. Takes care of
|
| 278 |
+
defining the :class:`Dict`, :class:`ZeroOrMore`, and
|
| 279 |
+
:class:`Group` tokens in the proper order. The key pattern
|
| 280 |
+
can include delimiting markers or punctuation, as long as they are
|
| 281 |
+
suppressed, thereby leaving the significant key text. The value
|
| 282 |
+
pattern can include named results, so that the :class:`Dict` results
|
| 283 |
+
can include named token fields.
|
| 284 |
+
|
| 285 |
+
Example::
|
| 286 |
+
|
| 287 |
+
text = "shape: SQUARE posn: upper left color: light blue texture: burlap"
|
| 288 |
+
attr_expr = (label + Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join))
|
| 289 |
+
print(attr_expr[1, ...].parse_string(text).dump())
|
| 290 |
+
|
| 291 |
+
attr_label = label
|
| 292 |
+
attr_value = Suppress(':') + OneOrMore(data_word, stop_on=label).set_parse_action(' '.join)
|
| 293 |
+
|
| 294 |
+
# similar to Dict, but simpler call format
|
| 295 |
+
result = dict_of(attr_label, attr_value).parse_string(text)
|
| 296 |
+
print(result.dump())
|
| 297 |
+
print(result['shape'])
|
| 298 |
+
print(result.shape) # object attribute access works too
|
| 299 |
+
print(result.as_dict())
|
| 300 |
+
|
| 301 |
+
prints::
|
| 302 |
+
|
| 303 |
+
[['shape', 'SQUARE'], ['posn', 'upper left'], ['color', 'light blue'], ['texture', 'burlap']]
|
| 304 |
+
- color: 'light blue'
|
| 305 |
+
- posn: 'upper left'
|
| 306 |
+
- shape: 'SQUARE'
|
| 307 |
+
- texture: 'burlap'
|
| 308 |
+
SQUARE
|
| 309 |
+
SQUARE
|
| 310 |
+
{'color': 'light blue', 'shape': 'SQUARE', 'posn': 'upper left', 'texture': 'burlap'}
|
| 311 |
+
"""
|
| 312 |
+
return Dict(OneOrMore(Group(key + value)))
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
def original_text_for(
|
| 316 |
+
expr: ParserElement, as_string: bool = True, *, asString: bool = True
|
| 317 |
+
) -> ParserElement:
|
| 318 |
+
"""Helper to return the original, untokenized text for a given
|
| 319 |
+
expression. Useful to restore the parsed fields of an HTML start
|
| 320 |
+
tag into the raw tag text itself, or to revert separate tokens with
|
| 321 |
+
intervening whitespace back to the original matching input text. By
|
| 322 |
+
default, returns a string containing the original parsed text.
|
| 323 |
+
|
| 324 |
+
If the optional ``as_string`` argument is passed as
|
| 325 |
+
``False``, then the return value is
|
| 326 |
+
a :class:`ParseResults` containing any results names that
|
| 327 |
+
were originally matched, and a single token containing the original
|
| 328 |
+
matched text from the input string. So if the expression passed to
|
| 329 |
+
:class:`original_text_for` contains expressions with defined
|
| 330 |
+
results names, you must set ``as_string`` to ``False`` if you
|
| 331 |
+
want to preserve those results name values.
|
| 332 |
+
|
| 333 |
+
The ``asString`` pre-PEP8 argument is retained for compatibility,
|
| 334 |
+
but will be removed in a future release.
|
| 335 |
+
|
| 336 |
+
Example::
|
| 337 |
+
|
| 338 |
+
src = "this is test <b> bold <i>text</i> </b> normal text "
|
| 339 |
+
for tag in ("b", "i"):
|
| 340 |
+
opener, closer = make_html_tags(tag)
|
| 341 |
+
patt = original_text_for(opener + ... + closer)
|
| 342 |
+
print(patt.search_string(src)[0])
|
| 343 |
+
|
| 344 |
+
prints::
|
| 345 |
+
|
| 346 |
+
['<b> bold <i>text</i> </b>']
|
| 347 |
+
['<i>text</i>']
|
| 348 |
+
"""
|
| 349 |
+
asString = asString and as_string
|
| 350 |
+
|
| 351 |
+
locMarker = Empty().set_parse_action(lambda s, loc, t: loc)
|
| 352 |
+
endlocMarker = locMarker.copy()
|
| 353 |
+
endlocMarker.callPreparse = False
|
| 354 |
+
matchExpr = locMarker("_original_start") + expr + endlocMarker("_original_end")
|
| 355 |
+
if asString:
|
| 356 |
+
extractText = lambda s, l, t: s[t._original_start : t._original_end]
|
| 357 |
+
else:
|
| 358 |
+
|
| 359 |
+
def extractText(s, l, t):
|
| 360 |
+
t[:] = [s[t.pop("_original_start") : t.pop("_original_end")]]
|
| 361 |
+
|
| 362 |
+
matchExpr.set_parse_action(extractText)
|
| 363 |
+
matchExpr.ignoreExprs = expr.ignoreExprs
|
| 364 |
+
matchExpr.suppress_warning(Diagnostics.warn_ungrouped_named_tokens_in_collection)
|
| 365 |
+
return matchExpr
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def ungroup(expr: ParserElement) -> ParserElement:
|
| 369 |
+
"""Helper to undo pyparsing's default grouping of And expressions,
|
| 370 |
+
even if all but one are non-empty.
|
| 371 |
+
"""
|
| 372 |
+
return TokenConverter(expr).add_parse_action(lambda t: t[0])
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
def locatedExpr(expr: ParserElement) -> ParserElement:
|
| 376 |
+
"""
|
| 377 |
+
(DEPRECATED - future code should use the :class:`Located` class)
|
| 378 |
+
Helper to decorate a returned token with its starting and ending
|
| 379 |
+
locations in the input string.
|
| 380 |
+
|
| 381 |
+
This helper adds the following results names:
|
| 382 |
+
|
| 383 |
+
- ``locn_start`` - location where matched expression begins
|
| 384 |
+
- ``locn_end`` - location where matched expression ends
|
| 385 |
+
- ``value`` - the actual parsed results
|
| 386 |
+
|
| 387 |
+
Be careful if the input text contains ``<TAB>`` characters, you
|
| 388 |
+
may want to call :class:`ParserElement.parse_with_tabs`
|
| 389 |
+
|
| 390 |
+
Example::
|
| 391 |
+
|
| 392 |
+
wd = Word(alphas)
|
| 393 |
+
for match in locatedExpr(wd).search_string("ljsdf123lksdjjf123lkkjj1222"):
|
| 394 |
+
print(match)
|
| 395 |
+
|
| 396 |
+
prints::
|
| 397 |
+
|
| 398 |
+
[[0, 'ljsdf', 5]]
|
| 399 |
+
[[8, 'lksdjjf', 15]]
|
| 400 |
+
[[18, 'lkkjj', 23]]
|
| 401 |
+
"""
|
| 402 |
+
locator = Empty().set_parse_action(lambda ss, ll, tt: ll)
|
| 403 |
+
return Group(
|
| 404 |
+
locator("locn_start")
|
| 405 |
+
+ expr("value")
|
| 406 |
+
+ locator.copy().leaveWhitespace()("locn_end")
|
| 407 |
+
)
|
| 408 |
+
|
| 409 |
+
|
| 410 |
+
def nested_expr(
|
| 411 |
+
opener: Union[str, ParserElement] = "(",
|
| 412 |
+
closer: Union[str, ParserElement] = ")",
|
| 413 |
+
content: typing.Optional[ParserElement] = None,
|
| 414 |
+
ignore_expr: ParserElement = quoted_string(),
|
| 415 |
+
*,
|
| 416 |
+
ignoreExpr: ParserElement = quoted_string(),
|
| 417 |
+
) -> ParserElement:
|
| 418 |
+
"""Helper method for defining nested lists enclosed in opening and
|
| 419 |
+
closing delimiters (``"("`` and ``")"`` are the default).
|
| 420 |
+
|
| 421 |
+
Parameters:
|
| 422 |
+
|
| 423 |
+
- ``opener`` - opening character for a nested list
|
| 424 |
+
(default= ``"("``); can also be a pyparsing expression
|
| 425 |
+
- ``closer`` - closing character for a nested list
|
| 426 |
+
(default= ``")"``); can also be a pyparsing expression
|
| 427 |
+
- ``content`` - expression for items within the nested lists
|
| 428 |
+
(default= ``None``)
|
| 429 |
+
- ``ignore_expr`` - expression for ignoring opening and closing delimiters
|
| 430 |
+
(default= :class:`quoted_string`)
|
| 431 |
+
- ``ignoreExpr`` - this pre-PEP8 argument is retained for compatibility
|
| 432 |
+
but will be removed in a future release
|
| 433 |
+
|
| 434 |
+
If an expression is not provided for the content argument, the
|
| 435 |
+
nested expression will capture all whitespace-delimited content
|
| 436 |
+
between delimiters as a list of separate values.
|
| 437 |
+
|
| 438 |
+
Use the ``ignore_expr`` argument to define expressions that may
|
| 439 |
+
contain opening or closing characters that should not be treated as
|
| 440 |
+
opening or closing characters for nesting, such as quoted_string or
|
| 441 |
+
a comment expression. Specify multiple expressions using an
|
| 442 |
+
:class:`Or` or :class:`MatchFirst`. The default is
|
| 443 |
+
:class:`quoted_string`, but if no expressions are to be ignored, then
|
| 444 |
+
pass ``None`` for this argument.
|
| 445 |
+
|
| 446 |
+
Example::
|
| 447 |
+
|
| 448 |
+
data_type = one_of("void int short long char float double")
|
| 449 |
+
decl_data_type = Combine(data_type + Opt(Word('*')))
|
| 450 |
+
ident = Word(alphas+'_', alphanums+'_')
|
| 451 |
+
number = pyparsing_common.number
|
| 452 |
+
arg = Group(decl_data_type + ident)
|
| 453 |
+
LPAR, RPAR = map(Suppress, "()")
|
| 454 |
+
|
| 455 |
+
code_body = nested_expr('{', '}', ignore_expr=(quoted_string | c_style_comment))
|
| 456 |
+
|
| 457 |
+
c_function = (decl_data_type("type")
|
| 458 |
+
+ ident("name")
|
| 459 |
+
+ LPAR + Opt(DelimitedList(arg), [])("args") + RPAR
|
| 460 |
+
+ code_body("body"))
|
| 461 |
+
c_function.ignore(c_style_comment)
|
| 462 |
+
|
| 463 |
+
source_code = '''
|
| 464 |
+
int is_odd(int x) {
|
| 465 |
+
return (x%2);
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
int dec_to_hex(char hchar) {
|
| 469 |
+
if (hchar >= '0' && hchar <= '9') {
|
| 470 |
+
return (ord(hchar)-ord('0'));
|
| 471 |
+
} else {
|
| 472 |
+
return (10+ord(hchar)-ord('A'));
|
| 473 |
+
}
|
| 474 |
+
}
|
| 475 |
+
'''
|
| 476 |
+
for func in c_function.search_string(source_code):
|
| 477 |
+
print("%(name)s (%(type)s) args: %(args)s" % func)
|
| 478 |
+
|
| 479 |
+
|
| 480 |
+
prints::
|
| 481 |
+
|
| 482 |
+
is_odd (int) args: [['int', 'x']]
|
| 483 |
+
dec_to_hex (int) args: [['char', 'hchar']]
|
| 484 |
+
"""
|
| 485 |
+
if ignoreExpr != ignore_expr:
|
| 486 |
+
ignoreExpr = ignore_expr if ignoreExpr == quoted_string() else ignoreExpr
|
| 487 |
+
if opener == closer:
|
| 488 |
+
raise ValueError("opening and closing strings cannot be the same")
|
| 489 |
+
if content is None:
|
| 490 |
+
if isinstance(opener, str_type) and isinstance(closer, str_type):
|
| 491 |
+
opener = typing.cast(str, opener)
|
| 492 |
+
closer = typing.cast(str, closer)
|
| 493 |
+
if len(opener) == 1 and len(closer) == 1:
|
| 494 |
+
if ignoreExpr is not None:
|
| 495 |
+
content = Combine(
|
| 496 |
+
OneOrMore(
|
| 497 |
+
~ignoreExpr
|
| 498 |
+
+ CharsNotIn(
|
| 499 |
+
opener + closer + ParserElement.DEFAULT_WHITE_CHARS,
|
| 500 |
+
exact=1,
|
| 501 |
+
)
|
| 502 |
+
)
|
| 503 |
+
).set_parse_action(lambda t: t[0].strip())
|
| 504 |
+
else:
|
| 505 |
+
content = empty.copy() + CharsNotIn(
|
| 506 |
+
opener + closer + ParserElement.DEFAULT_WHITE_CHARS
|
| 507 |
+
).set_parse_action(lambda t: t[0].strip())
|
| 508 |
+
else:
|
| 509 |
+
if ignoreExpr is not None:
|
| 510 |
+
content = Combine(
|
| 511 |
+
OneOrMore(
|
| 512 |
+
~ignoreExpr
|
| 513 |
+
+ ~Literal(opener)
|
| 514 |
+
+ ~Literal(closer)
|
| 515 |
+
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
|
| 516 |
+
)
|
| 517 |
+
).set_parse_action(lambda t: t[0].strip())
|
| 518 |
+
else:
|
| 519 |
+
content = Combine(
|
| 520 |
+
OneOrMore(
|
| 521 |
+
~Literal(opener)
|
| 522 |
+
+ ~Literal(closer)
|
| 523 |
+
+ CharsNotIn(ParserElement.DEFAULT_WHITE_CHARS, exact=1)
|
| 524 |
+
)
|
| 525 |
+
).set_parse_action(lambda t: t[0].strip())
|
| 526 |
+
else:
|
| 527 |
+
raise ValueError(
|
| 528 |
+
"opening and closing arguments must be strings if no content expression is given"
|
| 529 |
+
)
|
| 530 |
+
ret = Forward()
|
| 531 |
+
if ignoreExpr is not None:
|
| 532 |
+
ret <<= Group(
|
| 533 |
+
Suppress(opener) + ZeroOrMore(ignoreExpr | ret | content) + Suppress(closer)
|
| 534 |
+
)
|
| 535 |
+
else:
|
| 536 |
+
ret <<= Group(Suppress(opener) + ZeroOrMore(ret | content) + Suppress(closer))
|
| 537 |
+
ret.set_name("nested %s%s expression" % (opener, closer))
|
| 538 |
+
return ret
|
| 539 |
+
|
| 540 |
+
|
| 541 |
+
def _makeTags(tagStr, xml, suppress_LT=Suppress("<"), suppress_GT=Suppress(">")):
|
| 542 |
+
"""Internal helper to construct opening and closing tag expressions, given a tag name"""
|
| 543 |
+
if isinstance(tagStr, str_type):
|
| 544 |
+
resname = tagStr
|
| 545 |
+
tagStr = Keyword(tagStr, caseless=not xml)
|
| 546 |
+
else:
|
| 547 |
+
resname = tagStr.name
|
| 548 |
+
|
| 549 |
+
tagAttrName = Word(alphas, alphanums + "_-:")
|
| 550 |
+
if xml:
|
| 551 |
+
tagAttrValue = dbl_quoted_string.copy().set_parse_action(remove_quotes)
|
| 552 |
+
openTag = (
|
| 553 |
+
suppress_LT
|
| 554 |
+
+ tagStr("tag")
|
| 555 |
+
+ Dict(ZeroOrMore(Group(tagAttrName + Suppress("=") + tagAttrValue)))
|
| 556 |
+
+ Opt("/", default=[False])("empty").set_parse_action(
|
| 557 |
+
lambda s, l, t: t[0] == "/"
|
| 558 |
+
)
|
| 559 |
+
+ suppress_GT
|
| 560 |
+
)
|
| 561 |
+
else:
|
| 562 |
+
tagAttrValue = quoted_string.copy().set_parse_action(remove_quotes) | Word(
|
| 563 |
+
printables, exclude_chars=">"
|
| 564 |
+
)
|
| 565 |
+
openTag = (
|
| 566 |
+
suppress_LT
|
| 567 |
+
+ tagStr("tag")
|
| 568 |
+
+ Dict(
|
| 569 |
+
ZeroOrMore(
|
| 570 |
+
Group(
|
| 571 |
+
tagAttrName.set_parse_action(lambda t: t[0].lower())
|
| 572 |
+
+ Opt(Suppress("=") + tagAttrValue)
|
| 573 |
+
)
|
| 574 |
+
)
|
| 575 |
+
)
|
| 576 |
+
+ Opt("/", default=[False])("empty").set_parse_action(
|
| 577 |
+
lambda s, l, t: t[0] == "/"
|
| 578 |
+
)
|
| 579 |
+
+ suppress_GT
|
| 580 |
+
)
|
| 581 |
+
closeTag = Combine(Literal("</") + tagStr + ">", adjacent=False)
|
| 582 |
+
|
| 583 |
+
openTag.set_name("<%s>" % resname)
|
| 584 |
+
# add start<tagname> results name in parse action now that ungrouped names are not reported at two levels
|
| 585 |
+
openTag.add_parse_action(
|
| 586 |
+
lambda t: t.__setitem__(
|
| 587 |
+
"start" + "".join(resname.replace(":", " ").title().split()), t.copy()
|
| 588 |
+
)
|
| 589 |
+
)
|
| 590 |
+
closeTag = closeTag(
|
| 591 |
+
"end" + "".join(resname.replace(":", " ").title().split())
|
| 592 |
+
).set_name("</%s>" % resname)
|
| 593 |
+
openTag.tag = resname
|
| 594 |
+
closeTag.tag = resname
|
| 595 |
+
openTag.tag_body = SkipTo(closeTag())
|
| 596 |
+
return openTag, closeTag
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
def make_html_tags(
|
| 600 |
+
tag_str: Union[str, ParserElement]
|
| 601 |
+
) -> Tuple[ParserElement, ParserElement]:
|
| 602 |
+
"""Helper to construct opening and closing tag expressions for HTML,
|
| 603 |
+
given a tag name. Matches tags in either upper or lower case,
|
| 604 |
+
attributes with namespaces and with quoted or unquoted values.
|
| 605 |
+
|
| 606 |
+
Example::
|
| 607 |
+
|
| 608 |
+
text = '<td>More info at the <a href="https://github.com/pyparsing/pyparsing/wiki">pyparsing</a> wiki page</td>'
|
| 609 |
+
# make_html_tags returns pyparsing expressions for the opening and
|
| 610 |
+
# closing tags as a 2-tuple
|
| 611 |
+
a, a_end = make_html_tags("A")
|
| 612 |
+
link_expr = a + SkipTo(a_end)("link_text") + a_end
|
| 613 |
+
|
| 614 |
+
for link in link_expr.search_string(text):
|
| 615 |
+
# attributes in the <A> tag (like "href" shown here) are
|
| 616 |
+
# also accessible as named results
|
| 617 |
+
print(link.link_text, '->', link.href)
|
| 618 |
+
|
| 619 |
+
prints::
|
| 620 |
+
|
| 621 |
+
pyparsing -> https://github.com/pyparsing/pyparsing/wiki
|
| 622 |
+
"""
|
| 623 |
+
return _makeTags(tag_str, False)
|
| 624 |
+
|
| 625 |
+
|
| 626 |
+
def make_xml_tags(
|
| 627 |
+
tag_str: Union[str, ParserElement]
|
| 628 |
+
) -> Tuple[ParserElement, ParserElement]:
|
| 629 |
+
"""Helper to construct opening and closing tag expressions for XML,
|
| 630 |
+
given a tag name. Matches tags only in the given upper/lower case.
|
| 631 |
+
|
| 632 |
+
Example: similar to :class:`make_html_tags`
|
| 633 |
+
"""
|
| 634 |
+
return _makeTags(tag_str, True)
|
| 635 |
+
|
| 636 |
+
|
| 637 |
+
any_open_tag: ParserElement
|
| 638 |
+
any_close_tag: ParserElement
|
| 639 |
+
any_open_tag, any_close_tag = make_html_tags(
|
| 640 |
+
Word(alphas, alphanums + "_:").set_name("any tag")
|
| 641 |
+
)
|
| 642 |
+
|
| 643 |
+
_htmlEntityMap = {k.rstrip(";"): v for k, v in html.entities.html5.items()}
|
| 644 |
+
common_html_entity = Regex("&(?P<entity>" + "|".join(_htmlEntityMap) + ");").set_name(
|
| 645 |
+
"common HTML entity"
|
| 646 |
+
)
|
| 647 |
+
|
| 648 |
+
|
| 649 |
+
def replace_html_entity(s, l, t):
|
| 650 |
+
"""Helper parser action to replace common HTML entities with their special characters"""
|
| 651 |
+
return _htmlEntityMap.get(t.entity)
|
| 652 |
+
|
| 653 |
+
|
| 654 |
+
class OpAssoc(Enum):
|
| 655 |
+
"""Enumeration of operator associativity
|
| 656 |
+
- used in constructing InfixNotationOperatorSpec for :class:`infix_notation`"""
|
| 657 |
+
|
| 658 |
+
LEFT = 1
|
| 659 |
+
RIGHT = 2
|
| 660 |
+
|
| 661 |
+
|
| 662 |
+
InfixNotationOperatorArgType = Union[
|
| 663 |
+
ParserElement, str, Tuple[Union[ParserElement, str], Union[ParserElement, str]]
|
| 664 |
+
]
|
| 665 |
+
InfixNotationOperatorSpec = Union[
|
| 666 |
+
Tuple[
|
| 667 |
+
InfixNotationOperatorArgType,
|
| 668 |
+
int,
|
| 669 |
+
OpAssoc,
|
| 670 |
+
typing.Optional[ParseAction],
|
| 671 |
+
],
|
| 672 |
+
Tuple[
|
| 673 |
+
InfixNotationOperatorArgType,
|
| 674 |
+
int,
|
| 675 |
+
OpAssoc,
|
| 676 |
+
],
|
| 677 |
+
]
|
| 678 |
+
|
| 679 |
+
|
| 680 |
+
def infix_notation(
|
| 681 |
+
base_expr: ParserElement,
|
| 682 |
+
op_list: List[InfixNotationOperatorSpec],
|
| 683 |
+
lpar: Union[str, ParserElement] = Suppress("("),
|
| 684 |
+
rpar: Union[str, ParserElement] = Suppress(")"),
|
| 685 |
+
) -> ParserElement:
|
| 686 |
+
"""Helper method for constructing grammars of expressions made up of
|
| 687 |
+
operators working in a precedence hierarchy. Operators may be unary
|
| 688 |
+
or binary, left- or right-associative. Parse actions can also be
|
| 689 |
+
attached to operator expressions. The generated parser will also
|
| 690 |
+
recognize the use of parentheses to override operator precedences
|
| 691 |
+
(see example below).
|
| 692 |
+
|
| 693 |
+
Note: if you define a deep operator list, you may see performance
|
| 694 |
+
issues when using infix_notation. See
|
| 695 |
+
:class:`ParserElement.enable_packrat` for a mechanism to potentially
|
| 696 |
+
improve your parser performance.
|
| 697 |
+
|
| 698 |
+
Parameters:
|
| 699 |
+
|
| 700 |
+
- ``base_expr`` - expression representing the most basic operand to
|
| 701 |
+
be used in the expression
|
| 702 |
+
- ``op_list`` - list of tuples, one for each operator precedence level
|
| 703 |
+
in the expression grammar; each tuple is of the form ``(op_expr,
|
| 704 |
+
num_operands, right_left_assoc, (optional)parse_action)``, where:
|
| 705 |
+
|
| 706 |
+
- ``op_expr`` is the pyparsing expression for the operator; may also
|
| 707 |
+
be a string, which will be converted to a Literal; if ``num_operands``
|
| 708 |
+
is 3, ``op_expr`` is a tuple of two expressions, for the two
|
| 709 |
+
operators separating the 3 terms
|
| 710 |
+
- ``num_operands`` is the number of terms for this operator (must be 1,
|
| 711 |
+
2, or 3)
|
| 712 |
+
- ``right_left_assoc`` is the indicator whether the operator is right
|
| 713 |
+
or left associative, using the pyparsing-defined constants
|
| 714 |
+
``OpAssoc.RIGHT`` and ``OpAssoc.LEFT``.
|
| 715 |
+
- ``parse_action`` is the parse action to be associated with
|
| 716 |
+
expressions matching this operator expression (the parse action
|
| 717 |
+
tuple member may be omitted); if the parse action is passed
|
| 718 |
+
a tuple or list of functions, this is equivalent to calling
|
| 719 |
+
``set_parse_action(*fn)``
|
| 720 |
+
(:class:`ParserElement.set_parse_action`)
|
| 721 |
+
- ``lpar`` - expression for matching left-parentheses; if passed as a
|
| 722 |
+
str, then will be parsed as ``Suppress(lpar)``. If lpar is passed as
|
| 723 |
+
an expression (such as ``Literal('(')``), then it will be kept in
|
| 724 |
+
the parsed results, and grouped with them. (default= ``Suppress('(')``)
|
| 725 |
+
- ``rpar`` - expression for matching right-parentheses; if passed as a
|
| 726 |
+
str, then will be parsed as ``Suppress(rpar)``. If rpar is passed as
|
| 727 |
+
an expression (such as ``Literal(')')``), then it will be kept in
|
| 728 |
+
the parsed results, and grouped with them. (default= ``Suppress(')')``)
|
| 729 |
+
|
| 730 |
+
Example::
|
| 731 |
+
|
| 732 |
+
# simple example of four-function arithmetic with ints and
|
| 733 |
+
# variable names
|
| 734 |
+
integer = pyparsing_common.signed_integer
|
| 735 |
+
varname = pyparsing_common.identifier
|
| 736 |
+
|
| 737 |
+
arith_expr = infix_notation(integer | varname,
|
| 738 |
+
[
|
| 739 |
+
('-', 1, OpAssoc.RIGHT),
|
| 740 |
+
(one_of('* /'), 2, OpAssoc.LEFT),
|
| 741 |
+
(one_of('+ -'), 2, OpAssoc.LEFT),
|
| 742 |
+
])
|
| 743 |
+
|
| 744 |
+
arith_expr.run_tests('''
|
| 745 |
+
5+3*6
|
| 746 |
+
(5+3)*6
|
| 747 |
+
-2--11
|
| 748 |
+
''', full_dump=False)
|
| 749 |
+
|
| 750 |
+
prints::
|
| 751 |
+
|
| 752 |
+
5+3*6
|
| 753 |
+
[[5, '+', [3, '*', 6]]]
|
| 754 |
+
|
| 755 |
+
(5+3)*6
|
| 756 |
+
[[[5, '+', 3], '*', 6]]
|
| 757 |
+
|
| 758 |
+
(5+x)*y
|
| 759 |
+
[[[5, '+', 'x'], '*', 'y']]
|
| 760 |
+
|
| 761 |
+
-2--11
|
| 762 |
+
[[['-', 2], '-', ['-', 11]]]
|
| 763 |
+
"""
|
| 764 |
+
|
| 765 |
+
# captive version of FollowedBy that does not do parse actions or capture results names
|
| 766 |
+
class _FB(FollowedBy):
|
| 767 |
+
def parseImpl(self, instring, loc, doActions=True):
|
| 768 |
+
self.expr.try_parse(instring, loc)
|
| 769 |
+
return loc, []
|
| 770 |
+
|
| 771 |
+
_FB.__name__ = "FollowedBy>"
|
| 772 |
+
|
| 773 |
+
ret = Forward()
|
| 774 |
+
if isinstance(lpar, str):
|
| 775 |
+
lpar = Suppress(lpar)
|
| 776 |
+
if isinstance(rpar, str):
|
| 777 |
+
rpar = Suppress(rpar)
|
| 778 |
+
|
| 779 |
+
# if lpar and rpar are not suppressed, wrap in group
|
| 780 |
+
if not (isinstance(rpar, Suppress) and isinstance(rpar, Suppress)):
|
| 781 |
+
lastExpr = base_expr | Group(lpar + ret + rpar)
|
| 782 |
+
else:
|
| 783 |
+
lastExpr = base_expr | (lpar + ret + rpar)
|
| 784 |
+
|
| 785 |
+
arity: int
|
| 786 |
+
rightLeftAssoc: opAssoc
|
| 787 |
+
pa: typing.Optional[ParseAction]
|
| 788 |
+
opExpr1: ParserElement
|
| 789 |
+
opExpr2: ParserElement
|
| 790 |
+
for i, operDef in enumerate(op_list):
|
| 791 |
+
opExpr, arity, rightLeftAssoc, pa = (operDef + (None,))[:4] # type: ignore[assignment]
|
| 792 |
+
if isinstance(opExpr, str_type):
|
| 793 |
+
opExpr = ParserElement._literalStringClass(opExpr)
|
| 794 |
+
opExpr = typing.cast(ParserElement, opExpr)
|
| 795 |
+
if arity == 3:
|
| 796 |
+
if not isinstance(opExpr, (tuple, list)) or len(opExpr) != 2:
|
| 797 |
+
raise ValueError(
|
| 798 |
+
"if numterms=3, opExpr must be a tuple or list of two expressions"
|
| 799 |
+
)
|
| 800 |
+
opExpr1, opExpr2 = opExpr
|
| 801 |
+
term_name = f"{opExpr1}{opExpr2} term"
|
| 802 |
+
else:
|
| 803 |
+
term_name = f"{opExpr} term"
|
| 804 |
+
|
| 805 |
+
if not 1 <= arity <= 3:
|
| 806 |
+
raise ValueError("operator must be unary (1), binary (2), or ternary (3)")
|
| 807 |
+
|
| 808 |
+
if rightLeftAssoc not in (OpAssoc.LEFT, OpAssoc.RIGHT):
|
| 809 |
+
raise ValueError("operator must indicate right or left associativity")
|
| 810 |
+
|
| 811 |
+
thisExpr: ParserElement = Forward().set_name(term_name)
|
| 812 |
+
thisExpr = typing.cast(Forward, thisExpr)
|
| 813 |
+
if rightLeftAssoc is OpAssoc.LEFT:
|
| 814 |
+
if arity == 1:
|
| 815 |
+
matchExpr = _FB(lastExpr + opExpr) + Group(lastExpr + opExpr[1, ...])
|
| 816 |
+
elif arity == 2:
|
| 817 |
+
if opExpr is not None:
|
| 818 |
+
matchExpr = _FB(lastExpr + opExpr + lastExpr) + Group(
|
| 819 |
+
lastExpr + (opExpr + lastExpr)[1, ...]
|
| 820 |
+
)
|
| 821 |
+
else:
|
| 822 |
+
matchExpr = _FB(lastExpr + lastExpr) + Group(lastExpr[2, ...])
|
| 823 |
+
elif arity == 3:
|
| 824 |
+
matchExpr = _FB(
|
| 825 |
+
lastExpr + opExpr1 + lastExpr + opExpr2 + lastExpr
|
| 826 |
+
) + Group(lastExpr + OneOrMore(opExpr1 + lastExpr + opExpr2 + lastExpr))
|
| 827 |
+
elif rightLeftAssoc is OpAssoc.RIGHT:
|
| 828 |
+
if arity == 1:
|
| 829 |
+
# try to avoid LR with this extra test
|
| 830 |
+
if not isinstance(opExpr, Opt):
|
| 831 |
+
opExpr = Opt(opExpr)
|
| 832 |
+
matchExpr = _FB(opExpr.expr + thisExpr) + Group(opExpr + thisExpr)
|
| 833 |
+
elif arity == 2:
|
| 834 |
+
if opExpr is not None:
|
| 835 |
+
matchExpr = _FB(lastExpr + opExpr + thisExpr) + Group(
|
| 836 |
+
lastExpr + (opExpr + thisExpr)[1, ...]
|
| 837 |
+
)
|
| 838 |
+
else:
|
| 839 |
+
matchExpr = _FB(lastExpr + thisExpr) + Group(
|
| 840 |
+
lastExpr + thisExpr[1, ...]
|
| 841 |
+
)
|
| 842 |
+
elif arity == 3:
|
| 843 |
+
matchExpr = _FB(
|
| 844 |
+
lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr
|
| 845 |
+
) + Group(lastExpr + opExpr1 + thisExpr + opExpr2 + thisExpr)
|
| 846 |
+
if pa:
|
| 847 |
+
if isinstance(pa, (tuple, list)):
|
| 848 |
+
matchExpr.set_parse_action(*pa)
|
| 849 |
+
else:
|
| 850 |
+
matchExpr.set_parse_action(pa)
|
| 851 |
+
thisExpr <<= (matchExpr | lastExpr).setName(term_name)
|
| 852 |
+
lastExpr = thisExpr
|
| 853 |
+
ret <<= lastExpr
|
| 854 |
+
return ret
|
| 855 |
+
|
| 856 |
+
|
| 857 |
+
def indentedBlock(blockStatementExpr, indentStack, indent=True, backup_stacks=[]):
|
| 858 |
+
"""
|
| 859 |
+
(DEPRECATED - use :class:`IndentedBlock` class instead)
|
| 860 |
+
Helper method for defining space-delimited indentation blocks,
|
| 861 |
+
such as those used to define block statements in Python source code.
|
| 862 |
+
|
| 863 |
+
Parameters:
|
| 864 |
+
|
| 865 |
+
- ``blockStatementExpr`` - expression defining syntax of statement that
|
| 866 |
+
is repeated within the indented block
|
| 867 |
+
- ``indentStack`` - list created by caller to manage indentation stack
|
| 868 |
+
(multiple ``statementWithIndentedBlock`` expressions within a single
|
| 869 |
+
grammar should share a common ``indentStack``)
|
| 870 |
+
- ``indent`` - boolean indicating whether block must be indented beyond
|
| 871 |
+
the current level; set to ``False`` for block of left-most statements
|
| 872 |
+
(default= ``True``)
|
| 873 |
+
|
| 874 |
+
A valid block must contain at least one ``blockStatement``.
|
| 875 |
+
|
| 876 |
+
(Note that indentedBlock uses internal parse actions which make it
|
| 877 |
+
incompatible with packrat parsing.)
|
| 878 |
+
|
| 879 |
+
Example::
|
| 880 |
+
|
| 881 |
+
data = '''
|
| 882 |
+
def A(z):
|
| 883 |
+
A1
|
| 884 |
+
B = 100
|
| 885 |
+
G = A2
|
| 886 |
+
A2
|
| 887 |
+
A3
|
| 888 |
+
B
|
| 889 |
+
def BB(a,b,c):
|
| 890 |
+
BB1
|
| 891 |
+
def BBA():
|
| 892 |
+
bba1
|
| 893 |
+
bba2
|
| 894 |
+
bba3
|
| 895 |
+
C
|
| 896 |
+
D
|
| 897 |
+
def spam(x,y):
|
| 898 |
+
def eggs(z):
|
| 899 |
+
pass
|
| 900 |
+
'''
|
| 901 |
+
|
| 902 |
+
|
| 903 |
+
indentStack = [1]
|
| 904 |
+
stmt = Forward()
|
| 905 |
+
|
| 906 |
+
identifier = Word(alphas, alphanums)
|
| 907 |
+
funcDecl = ("def" + identifier + Group("(" + Opt(delimitedList(identifier)) + ")") + ":")
|
| 908 |
+
func_body = indentedBlock(stmt, indentStack)
|
| 909 |
+
funcDef = Group(funcDecl + func_body)
|
| 910 |
+
|
| 911 |
+
rvalue = Forward()
|
| 912 |
+
funcCall = Group(identifier + "(" + Opt(delimitedList(rvalue)) + ")")
|
| 913 |
+
rvalue << (funcCall | identifier | Word(nums))
|
| 914 |
+
assignment = Group(identifier + "=" + rvalue)
|
| 915 |
+
stmt << (funcDef | assignment | identifier)
|
| 916 |
+
|
| 917 |
+
module_body = stmt[1, ...]
|
| 918 |
+
|
| 919 |
+
parseTree = module_body.parseString(data)
|
| 920 |
+
parseTree.pprint()
|
| 921 |
+
|
| 922 |
+
prints::
|
| 923 |
+
|
| 924 |
+
[['def',
|
| 925 |
+
'A',
|
| 926 |
+
['(', 'z', ')'],
|
| 927 |
+
':',
|
| 928 |
+
[['A1'], [['B', '=', '100']], [['G', '=', 'A2']], ['A2'], ['A3']]],
|
| 929 |
+
'B',
|
| 930 |
+
['def',
|
| 931 |
+
'BB',
|
| 932 |
+
['(', 'a', 'b', 'c', ')'],
|
| 933 |
+
':',
|
| 934 |
+
[['BB1'], [['def', 'BBA', ['(', ')'], ':', [['bba1'], ['bba2'], ['bba3']]]]]],
|
| 935 |
+
'C',
|
| 936 |
+
'D',
|
| 937 |
+
['def',
|
| 938 |
+
'spam',
|
| 939 |
+
['(', 'x', 'y', ')'],
|
| 940 |
+
':',
|
| 941 |
+
[[['def', 'eggs', ['(', 'z', ')'], ':', [['pass']]]]]]]
|
| 942 |
+
"""
|
| 943 |
+
backup_stacks.append(indentStack[:])
|
| 944 |
+
|
| 945 |
+
def reset_stack():
|
| 946 |
+
indentStack[:] = backup_stacks[-1]
|
| 947 |
+
|
| 948 |
+
def checkPeerIndent(s, l, t):
|
| 949 |
+
if l >= len(s):
|
| 950 |
+
return
|
| 951 |
+
curCol = col(l, s)
|
| 952 |
+
if curCol != indentStack[-1]:
|
| 953 |
+
if curCol > indentStack[-1]:
|
| 954 |
+
raise ParseException(s, l, "illegal nesting")
|
| 955 |
+
raise ParseException(s, l, "not a peer entry")
|
| 956 |
+
|
| 957 |
+
def checkSubIndent(s, l, t):
|
| 958 |
+
curCol = col(l, s)
|
| 959 |
+
if curCol > indentStack[-1]:
|
| 960 |
+
indentStack.append(curCol)
|
| 961 |
+
else:
|
| 962 |
+
raise ParseException(s, l, "not a subentry")
|
| 963 |
+
|
| 964 |
+
def checkUnindent(s, l, t):
|
| 965 |
+
if l >= len(s):
|
| 966 |
+
return
|
| 967 |
+
curCol = col(l, s)
|
| 968 |
+
if not (indentStack and curCol in indentStack):
|
| 969 |
+
raise ParseException(s, l, "not an unindent")
|
| 970 |
+
if curCol < indentStack[-1]:
|
| 971 |
+
indentStack.pop()
|
| 972 |
+
|
| 973 |
+
NL = OneOrMore(LineEnd().set_whitespace_chars("\t ").suppress())
|
| 974 |
+
INDENT = (Empty() + Empty().set_parse_action(checkSubIndent)).set_name("INDENT")
|
| 975 |
+
PEER = Empty().set_parse_action(checkPeerIndent).set_name("")
|
| 976 |
+
UNDENT = Empty().set_parse_action(checkUnindent).set_name("UNINDENT")
|
| 977 |
+
if indent:
|
| 978 |
+
smExpr = Group(
|
| 979 |
+
Opt(NL)
|
| 980 |
+
+ INDENT
|
| 981 |
+
+ OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
|
| 982 |
+
+ UNDENT
|
| 983 |
+
)
|
| 984 |
+
else:
|
| 985 |
+
smExpr = Group(
|
| 986 |
+
Opt(NL)
|
| 987 |
+
+ OneOrMore(PEER + Group(blockStatementExpr) + Opt(NL))
|
| 988 |
+
+ Opt(UNDENT)
|
| 989 |
+
)
|
| 990 |
+
|
| 991 |
+
# add a parse action to remove backup_stack from list of backups
|
| 992 |
+
smExpr.add_parse_action(
|
| 993 |
+
lambda: backup_stacks.pop(-1) and None if backup_stacks else None
|
| 994 |
+
)
|
| 995 |
+
smExpr.set_fail_action(lambda a, b, c, d: reset_stack())
|
| 996 |
+
blockStatementExpr.ignore(_bslash + LineEnd())
|
| 997 |
+
return smExpr.set_name("indented block")
|
| 998 |
+
|
| 999 |
+
|
| 1000 |
+
# it's easy to get these comment structures wrong - they're very common, so may as well make them available
|
| 1001 |
+
c_style_comment = Combine(Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/").set_name(
|
| 1002 |
+
"C style comment"
|
| 1003 |
+
)
|
| 1004 |
+
"Comment of the form ``/* ... */``"
|
| 1005 |
+
|
| 1006 |
+
html_comment = Regex(r"<!--[\s\S]*?-->").set_name("HTML comment")
|
| 1007 |
+
"Comment of the form ``<!-- ... -->``"
|
| 1008 |
+
|
| 1009 |
+
rest_of_line = Regex(r".*").leave_whitespace().set_name("rest of line")
|
| 1010 |
+
dbl_slash_comment = Regex(r"//(?:\\\n|[^\n])*").set_name("// comment")
|
| 1011 |
+
"Comment of the form ``// ... (to end of line)``"
|
| 1012 |
+
|
| 1013 |
+
cpp_style_comment = Combine(
|
| 1014 |
+
Regex(r"/\*(?:[^*]|\*(?!/))*") + "*/" | dbl_slash_comment
|
| 1015 |
+
).set_name("C++ style comment")
|
| 1016 |
+
"Comment of either form :class:`c_style_comment` or :class:`dbl_slash_comment`"
|
| 1017 |
+
|
| 1018 |
+
java_style_comment = cpp_style_comment
|
| 1019 |
+
"Same as :class:`cpp_style_comment`"
|
| 1020 |
+
|
| 1021 |
+
python_style_comment = Regex(r"#.*").set_name("Python style comment")
|
| 1022 |
+
"Comment of the form ``# ... (to end of line)``"
|
| 1023 |
+
|
| 1024 |
+
|
| 1025 |
+
# build list of built-in expressions, for future reference if a global default value
|
| 1026 |
+
# gets updated
|
| 1027 |
+
_builtin_exprs: List[ParserElement] = [
|
| 1028 |
+
v for v in vars().values() if isinstance(v, ParserElement)
|
| 1029 |
+
]
|
| 1030 |
+
|
| 1031 |
+
|
| 1032 |
+
# compatibility function, superseded by DelimitedList class
|
| 1033 |
+
def delimited_list(
|
| 1034 |
+
expr: Union[str, ParserElement],
|
| 1035 |
+
delim: Union[str, ParserElement] = ",",
|
| 1036 |
+
combine: bool = False,
|
| 1037 |
+
min: typing.Optional[int] = None,
|
| 1038 |
+
max: typing.Optional[int] = None,
|
| 1039 |
+
*,
|
| 1040 |
+
allow_trailing_delim: bool = False,
|
| 1041 |
+
) -> ParserElement:
|
| 1042 |
+
"""(DEPRECATED - use :class:`DelimitedList` class)"""
|
| 1043 |
+
return DelimitedList(
|
| 1044 |
+
expr, delim, combine, min, max, allow_trailing_delim=allow_trailing_delim
|
| 1045 |
+
)
|
| 1046 |
+
|
| 1047 |
+
|
| 1048 |
+
# pre-PEP8 compatible names
|
| 1049 |
+
# fmt: off
|
| 1050 |
+
opAssoc = OpAssoc
|
| 1051 |
+
anyOpenTag = any_open_tag
|
| 1052 |
+
anyCloseTag = any_close_tag
|
| 1053 |
+
commonHTMLEntity = common_html_entity
|
| 1054 |
+
cStyleComment = c_style_comment
|
| 1055 |
+
htmlComment = html_comment
|
| 1056 |
+
restOfLine = rest_of_line
|
| 1057 |
+
dblSlashComment = dbl_slash_comment
|
| 1058 |
+
cppStyleComment = cpp_style_comment
|
| 1059 |
+
javaStyleComment = java_style_comment
|
| 1060 |
+
pythonStyleComment = python_style_comment
|
| 1061 |
+
|
| 1062 |
+
@replaced_by_pep8(DelimitedList)
|
| 1063 |
+
def delimitedList(): ...
|
| 1064 |
+
|
| 1065 |
+
@replaced_by_pep8(DelimitedList)
|
| 1066 |
+
def delimited_list(): ...
|
| 1067 |
+
|
| 1068 |
+
@replaced_by_pep8(counted_array)
|
| 1069 |
+
def countedArray(): ...
|
| 1070 |
+
|
| 1071 |
+
@replaced_by_pep8(match_previous_literal)
|
| 1072 |
+
def matchPreviousLiteral(): ...
|
| 1073 |
+
|
| 1074 |
+
@replaced_by_pep8(match_previous_expr)
|
| 1075 |
+
def matchPreviousExpr(): ...
|
| 1076 |
+
|
| 1077 |
+
@replaced_by_pep8(one_of)
|
| 1078 |
+
def oneOf(): ...
|
| 1079 |
+
|
| 1080 |
+
@replaced_by_pep8(dict_of)
|
| 1081 |
+
def dictOf(): ...
|
| 1082 |
+
|
| 1083 |
+
@replaced_by_pep8(original_text_for)
|
| 1084 |
+
def originalTextFor(): ...
|
| 1085 |
+
|
| 1086 |
+
@replaced_by_pep8(nested_expr)
|
| 1087 |
+
def nestedExpr(): ...
|
| 1088 |
+
|
| 1089 |
+
@replaced_by_pep8(make_html_tags)
|
| 1090 |
+
def makeHTMLTags(): ...
|
| 1091 |
+
|
| 1092 |
+
@replaced_by_pep8(make_xml_tags)
|
| 1093 |
+
def makeXMLTags(): ...
|
| 1094 |
+
|
| 1095 |
+
@replaced_by_pep8(replace_html_entity)
|
| 1096 |
+
def replaceHTMLEntity(): ...
|
| 1097 |
+
|
| 1098 |
+
@replaced_by_pep8(infix_notation)
|
| 1099 |
+
def infixNotation(): ...
|
| 1100 |
+
# fmt: on
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/results.py
ADDED
|
@@ -0,0 +1,796 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# results.py
|
| 2 |
+
from collections.abc import (
|
| 3 |
+
MutableMapping,
|
| 4 |
+
Mapping,
|
| 5 |
+
MutableSequence,
|
| 6 |
+
Iterator,
|
| 7 |
+
Sequence,
|
| 8 |
+
Container,
|
| 9 |
+
)
|
| 10 |
+
import pprint
|
| 11 |
+
from typing import Tuple, Any, Dict, Set, List
|
| 12 |
+
|
| 13 |
+
str_type: Tuple[type, ...] = (str, bytes)
|
| 14 |
+
_generator_type = type((_ for _ in ()))
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class _ParseResultsWithOffset:
|
| 18 |
+
tup: Tuple["ParseResults", int]
|
| 19 |
+
__slots__ = ["tup"]
|
| 20 |
+
|
| 21 |
+
def __init__(self, p1: "ParseResults", p2: int):
|
| 22 |
+
self.tup: Tuple[ParseResults, int] = (p1, p2)
|
| 23 |
+
|
| 24 |
+
def __getitem__(self, i):
|
| 25 |
+
return self.tup[i]
|
| 26 |
+
|
| 27 |
+
def __getstate__(self):
|
| 28 |
+
return self.tup
|
| 29 |
+
|
| 30 |
+
def __setstate__(self, *args):
|
| 31 |
+
self.tup = args[0]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class ParseResults:
|
| 35 |
+
"""Structured parse results, to provide multiple means of access to
|
| 36 |
+
the parsed data:
|
| 37 |
+
|
| 38 |
+
- as a list (``len(results)``)
|
| 39 |
+
- by list index (``results[0], results[1]``, etc.)
|
| 40 |
+
- by attribute (``results.<results_name>`` - see :class:`ParserElement.set_results_name`)
|
| 41 |
+
|
| 42 |
+
Example::
|
| 43 |
+
|
| 44 |
+
integer = Word(nums)
|
| 45 |
+
date_str = (integer.set_results_name("year") + '/'
|
| 46 |
+
+ integer.set_results_name("month") + '/'
|
| 47 |
+
+ integer.set_results_name("day"))
|
| 48 |
+
# equivalent form:
|
| 49 |
+
# date_str = (integer("year") + '/'
|
| 50 |
+
# + integer("month") + '/'
|
| 51 |
+
# + integer("day"))
|
| 52 |
+
|
| 53 |
+
# parse_string returns a ParseResults object
|
| 54 |
+
result = date_str.parse_string("1999/12/31")
|
| 55 |
+
|
| 56 |
+
def test(s, fn=repr):
|
| 57 |
+
print(f"{s} -> {fn(eval(s))}")
|
| 58 |
+
test("list(result)")
|
| 59 |
+
test("result[0]")
|
| 60 |
+
test("result['month']")
|
| 61 |
+
test("result.day")
|
| 62 |
+
test("'month' in result")
|
| 63 |
+
test("'minutes' in result")
|
| 64 |
+
test("result.dump()", str)
|
| 65 |
+
|
| 66 |
+
prints::
|
| 67 |
+
|
| 68 |
+
list(result) -> ['1999', '/', '12', '/', '31']
|
| 69 |
+
result[0] -> '1999'
|
| 70 |
+
result['month'] -> '12'
|
| 71 |
+
result.day -> '31'
|
| 72 |
+
'month' in result -> True
|
| 73 |
+
'minutes' in result -> False
|
| 74 |
+
result.dump() -> ['1999', '/', '12', '/', '31']
|
| 75 |
+
- day: '31'
|
| 76 |
+
- month: '12'
|
| 77 |
+
- year: '1999'
|
| 78 |
+
"""
|
| 79 |
+
|
| 80 |
+
_null_values: Tuple[Any, ...] = (None, [], ())
|
| 81 |
+
|
| 82 |
+
_name: str
|
| 83 |
+
_parent: "ParseResults"
|
| 84 |
+
_all_names: Set[str]
|
| 85 |
+
_modal: bool
|
| 86 |
+
_toklist: List[Any]
|
| 87 |
+
_tokdict: Dict[str, Any]
|
| 88 |
+
|
| 89 |
+
__slots__ = (
|
| 90 |
+
"_name",
|
| 91 |
+
"_parent",
|
| 92 |
+
"_all_names",
|
| 93 |
+
"_modal",
|
| 94 |
+
"_toklist",
|
| 95 |
+
"_tokdict",
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
class List(list):
|
| 99 |
+
"""
|
| 100 |
+
Simple wrapper class to distinguish parsed list results that should be preserved
|
| 101 |
+
as actual Python lists, instead of being converted to :class:`ParseResults`::
|
| 102 |
+
|
| 103 |
+
LBRACK, RBRACK = map(pp.Suppress, "[]")
|
| 104 |
+
element = pp.Forward()
|
| 105 |
+
item = ppc.integer
|
| 106 |
+
element_list = LBRACK + pp.DelimitedList(element) + RBRACK
|
| 107 |
+
|
| 108 |
+
# add parse actions to convert from ParseResults to actual Python collection types
|
| 109 |
+
def as_python_list(t):
|
| 110 |
+
return pp.ParseResults.List(t.as_list())
|
| 111 |
+
element_list.add_parse_action(as_python_list)
|
| 112 |
+
|
| 113 |
+
element <<= item | element_list
|
| 114 |
+
|
| 115 |
+
element.run_tests('''
|
| 116 |
+
100
|
| 117 |
+
[2,3,4]
|
| 118 |
+
[[2, 1],3,4]
|
| 119 |
+
[(2, 1),3,4]
|
| 120 |
+
(2,3,4)
|
| 121 |
+
''', post_parse=lambda s, r: (r[0], type(r[0])))
|
| 122 |
+
|
| 123 |
+
prints::
|
| 124 |
+
|
| 125 |
+
100
|
| 126 |
+
(100, <class 'int'>)
|
| 127 |
+
|
| 128 |
+
[2,3,4]
|
| 129 |
+
([2, 3, 4], <class 'list'>)
|
| 130 |
+
|
| 131 |
+
[[2, 1],3,4]
|
| 132 |
+
([[2, 1], 3, 4], <class 'list'>)
|
| 133 |
+
|
| 134 |
+
(Used internally by :class:`Group` when `aslist=True`.)
|
| 135 |
+
"""
|
| 136 |
+
|
| 137 |
+
def __new__(cls, contained=None):
|
| 138 |
+
if contained is None:
|
| 139 |
+
contained = []
|
| 140 |
+
|
| 141 |
+
if not isinstance(contained, list):
|
| 142 |
+
raise TypeError(
|
| 143 |
+
f"{cls.__name__} may only be constructed with a list, not {type(contained).__name__}"
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
return list.__new__(cls)
|
| 147 |
+
|
| 148 |
+
def __new__(cls, toklist=None, name=None, **kwargs):
|
| 149 |
+
if isinstance(toklist, ParseResults):
|
| 150 |
+
return toklist
|
| 151 |
+
self = object.__new__(cls)
|
| 152 |
+
self._name = None
|
| 153 |
+
self._parent = None
|
| 154 |
+
self._all_names = set()
|
| 155 |
+
|
| 156 |
+
if toklist is None:
|
| 157 |
+
self._toklist = []
|
| 158 |
+
elif isinstance(toklist, (list, _generator_type)):
|
| 159 |
+
self._toklist = (
|
| 160 |
+
[toklist[:]]
|
| 161 |
+
if isinstance(toklist, ParseResults.List)
|
| 162 |
+
else list(toklist)
|
| 163 |
+
)
|
| 164 |
+
else:
|
| 165 |
+
self._toklist = [toklist]
|
| 166 |
+
self._tokdict = dict()
|
| 167 |
+
return self
|
| 168 |
+
|
| 169 |
+
# Performance tuning: we construct a *lot* of these, so keep this
|
| 170 |
+
# constructor as small and fast as possible
|
| 171 |
+
def __init__(
|
| 172 |
+
self, toklist=None, name=None, asList=True, modal=True, isinstance=isinstance
|
| 173 |
+
):
|
| 174 |
+
self._tokdict: Dict[str, _ParseResultsWithOffset]
|
| 175 |
+
self._modal = modal
|
| 176 |
+
if name is not None and name != "":
|
| 177 |
+
if isinstance(name, int):
|
| 178 |
+
name = str(name)
|
| 179 |
+
if not modal:
|
| 180 |
+
self._all_names = {name}
|
| 181 |
+
self._name = name
|
| 182 |
+
if toklist not in self._null_values:
|
| 183 |
+
if isinstance(toklist, (str_type, type)):
|
| 184 |
+
toklist = [toklist]
|
| 185 |
+
if asList:
|
| 186 |
+
if isinstance(toklist, ParseResults):
|
| 187 |
+
self[name] = _ParseResultsWithOffset(
|
| 188 |
+
ParseResults(toklist._toklist), 0
|
| 189 |
+
)
|
| 190 |
+
else:
|
| 191 |
+
self[name] = _ParseResultsWithOffset(
|
| 192 |
+
ParseResults(toklist[0]), 0
|
| 193 |
+
)
|
| 194 |
+
self[name]._name = name
|
| 195 |
+
else:
|
| 196 |
+
try:
|
| 197 |
+
self[name] = toklist[0]
|
| 198 |
+
except (KeyError, TypeError, IndexError):
|
| 199 |
+
if toklist is not self:
|
| 200 |
+
self[name] = toklist
|
| 201 |
+
else:
|
| 202 |
+
self._name = name
|
| 203 |
+
|
| 204 |
+
def __getitem__(self, i):
|
| 205 |
+
if isinstance(i, (int, slice)):
|
| 206 |
+
return self._toklist[i]
|
| 207 |
+
else:
|
| 208 |
+
if i not in self._all_names:
|
| 209 |
+
return self._tokdict[i][-1][0]
|
| 210 |
+
else:
|
| 211 |
+
return ParseResults([v[0] for v in self._tokdict[i]])
|
| 212 |
+
|
| 213 |
+
def __setitem__(self, k, v, isinstance=isinstance):
|
| 214 |
+
if isinstance(v, _ParseResultsWithOffset):
|
| 215 |
+
self._tokdict[k] = self._tokdict.get(k, list()) + [v]
|
| 216 |
+
sub = v[0]
|
| 217 |
+
elif isinstance(k, (int, slice)):
|
| 218 |
+
self._toklist[k] = v
|
| 219 |
+
sub = v
|
| 220 |
+
else:
|
| 221 |
+
self._tokdict[k] = self._tokdict.get(k, list()) + [
|
| 222 |
+
_ParseResultsWithOffset(v, 0)
|
| 223 |
+
]
|
| 224 |
+
sub = v
|
| 225 |
+
if isinstance(sub, ParseResults):
|
| 226 |
+
sub._parent = self
|
| 227 |
+
|
| 228 |
+
def __delitem__(self, i):
|
| 229 |
+
if isinstance(i, (int, slice)):
|
| 230 |
+
mylen = len(self._toklist)
|
| 231 |
+
del self._toklist[i]
|
| 232 |
+
|
| 233 |
+
# convert int to slice
|
| 234 |
+
if isinstance(i, int):
|
| 235 |
+
if i < 0:
|
| 236 |
+
i += mylen
|
| 237 |
+
i = slice(i, i + 1)
|
| 238 |
+
# get removed indices
|
| 239 |
+
removed = list(range(*i.indices(mylen)))
|
| 240 |
+
removed.reverse()
|
| 241 |
+
# fixup indices in token dictionary
|
| 242 |
+
for name, occurrences in self._tokdict.items():
|
| 243 |
+
for j in removed:
|
| 244 |
+
for k, (value, position) in enumerate(occurrences):
|
| 245 |
+
occurrences[k] = _ParseResultsWithOffset(
|
| 246 |
+
value, position - (position > j)
|
| 247 |
+
)
|
| 248 |
+
else:
|
| 249 |
+
del self._tokdict[i]
|
| 250 |
+
|
| 251 |
+
def __contains__(self, k) -> bool:
|
| 252 |
+
return k in self._tokdict
|
| 253 |
+
|
| 254 |
+
def __len__(self) -> int:
|
| 255 |
+
return len(self._toklist)
|
| 256 |
+
|
| 257 |
+
def __bool__(self) -> bool:
|
| 258 |
+
return not not (self._toklist or self._tokdict)
|
| 259 |
+
|
| 260 |
+
def __iter__(self) -> Iterator:
|
| 261 |
+
return iter(self._toklist)
|
| 262 |
+
|
| 263 |
+
def __reversed__(self) -> Iterator:
|
| 264 |
+
return iter(self._toklist[::-1])
|
| 265 |
+
|
| 266 |
+
def keys(self):
|
| 267 |
+
return iter(self._tokdict)
|
| 268 |
+
|
| 269 |
+
def values(self):
|
| 270 |
+
return (self[k] for k in self.keys())
|
| 271 |
+
|
| 272 |
+
def items(self):
|
| 273 |
+
return ((k, self[k]) for k in self.keys())
|
| 274 |
+
|
| 275 |
+
def haskeys(self) -> bool:
|
| 276 |
+
"""
|
| 277 |
+
Since ``keys()`` returns an iterator, this method is helpful in bypassing
|
| 278 |
+
code that looks for the existence of any defined results names."""
|
| 279 |
+
return not not self._tokdict
|
| 280 |
+
|
| 281 |
+
def pop(self, *args, **kwargs):
|
| 282 |
+
"""
|
| 283 |
+
Removes and returns item at specified index (default= ``last``).
|
| 284 |
+
Supports both ``list`` and ``dict`` semantics for ``pop()``. If
|
| 285 |
+
passed no argument or an integer argument, it will use ``list``
|
| 286 |
+
semantics and pop tokens from the list of parsed tokens. If passed
|
| 287 |
+
a non-integer argument (most likely a string), it will use ``dict``
|
| 288 |
+
semantics and pop the corresponding value from any defined results
|
| 289 |
+
names. A second default return value argument is supported, just as in
|
| 290 |
+
``dict.pop()``.
|
| 291 |
+
|
| 292 |
+
Example::
|
| 293 |
+
|
| 294 |
+
numlist = Word(nums)[...]
|
| 295 |
+
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
| 296 |
+
|
| 297 |
+
def remove_first(tokens):
|
| 298 |
+
tokens.pop(0)
|
| 299 |
+
numlist.add_parse_action(remove_first)
|
| 300 |
+
print(numlist.parse_string("0 123 321")) # -> ['123', '321']
|
| 301 |
+
|
| 302 |
+
label = Word(alphas)
|
| 303 |
+
patt = label("LABEL") + Word(nums)[1, ...]
|
| 304 |
+
print(patt.parse_string("AAB 123 321").dump())
|
| 305 |
+
|
| 306 |
+
# Use pop() in a parse action to remove named result (note that corresponding value is not
|
| 307 |
+
# removed from list form of results)
|
| 308 |
+
def remove_LABEL(tokens):
|
| 309 |
+
tokens.pop("LABEL")
|
| 310 |
+
return tokens
|
| 311 |
+
patt.add_parse_action(remove_LABEL)
|
| 312 |
+
print(patt.parse_string("AAB 123 321").dump())
|
| 313 |
+
|
| 314 |
+
prints::
|
| 315 |
+
|
| 316 |
+
['AAB', '123', '321']
|
| 317 |
+
- LABEL: 'AAB'
|
| 318 |
+
|
| 319 |
+
['AAB', '123', '321']
|
| 320 |
+
"""
|
| 321 |
+
if not args:
|
| 322 |
+
args = [-1]
|
| 323 |
+
for k, v in kwargs.items():
|
| 324 |
+
if k == "default":
|
| 325 |
+
args = (args[0], v)
|
| 326 |
+
else:
|
| 327 |
+
raise TypeError(f"pop() got an unexpected keyword argument {k!r}")
|
| 328 |
+
if isinstance(args[0], int) or len(args) == 1 or args[0] in self:
|
| 329 |
+
index = args[0]
|
| 330 |
+
ret = self[index]
|
| 331 |
+
del self[index]
|
| 332 |
+
return ret
|
| 333 |
+
else:
|
| 334 |
+
defaultvalue = args[1]
|
| 335 |
+
return defaultvalue
|
| 336 |
+
|
| 337 |
+
def get(self, key, default_value=None):
|
| 338 |
+
"""
|
| 339 |
+
Returns named result matching the given key, or if there is no
|
| 340 |
+
such name, then returns the given ``default_value`` or ``None`` if no
|
| 341 |
+
``default_value`` is specified.
|
| 342 |
+
|
| 343 |
+
Similar to ``dict.get()``.
|
| 344 |
+
|
| 345 |
+
Example::
|
| 346 |
+
|
| 347 |
+
integer = Word(nums)
|
| 348 |
+
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
| 349 |
+
|
| 350 |
+
result = date_str.parse_string("1999/12/31")
|
| 351 |
+
print(result.get("year")) # -> '1999'
|
| 352 |
+
print(result.get("hour", "not specified")) # -> 'not specified'
|
| 353 |
+
print(result.get("hour")) # -> None
|
| 354 |
+
"""
|
| 355 |
+
if key in self:
|
| 356 |
+
return self[key]
|
| 357 |
+
else:
|
| 358 |
+
return default_value
|
| 359 |
+
|
| 360 |
+
def insert(self, index, ins_string):
|
| 361 |
+
"""
|
| 362 |
+
Inserts new element at location index in the list of parsed tokens.
|
| 363 |
+
|
| 364 |
+
Similar to ``list.insert()``.
|
| 365 |
+
|
| 366 |
+
Example::
|
| 367 |
+
|
| 368 |
+
numlist = Word(nums)[...]
|
| 369 |
+
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
| 370 |
+
|
| 371 |
+
# use a parse action to insert the parse location in the front of the parsed results
|
| 372 |
+
def insert_locn(locn, tokens):
|
| 373 |
+
tokens.insert(0, locn)
|
| 374 |
+
numlist.add_parse_action(insert_locn)
|
| 375 |
+
print(numlist.parse_string("0 123 321")) # -> [0, '0', '123', '321']
|
| 376 |
+
"""
|
| 377 |
+
self._toklist.insert(index, ins_string)
|
| 378 |
+
# fixup indices in token dictionary
|
| 379 |
+
for name, occurrences in self._tokdict.items():
|
| 380 |
+
for k, (value, position) in enumerate(occurrences):
|
| 381 |
+
occurrences[k] = _ParseResultsWithOffset(
|
| 382 |
+
value, position + (position > index)
|
| 383 |
+
)
|
| 384 |
+
|
| 385 |
+
def append(self, item):
|
| 386 |
+
"""
|
| 387 |
+
Add single element to end of ``ParseResults`` list of elements.
|
| 388 |
+
|
| 389 |
+
Example::
|
| 390 |
+
|
| 391 |
+
numlist = Word(nums)[...]
|
| 392 |
+
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321']
|
| 393 |
+
|
| 394 |
+
# use a parse action to compute the sum of the parsed integers, and add it to the end
|
| 395 |
+
def append_sum(tokens):
|
| 396 |
+
tokens.append(sum(map(int, tokens)))
|
| 397 |
+
numlist.add_parse_action(append_sum)
|
| 398 |
+
print(numlist.parse_string("0 123 321")) # -> ['0', '123', '321', 444]
|
| 399 |
+
"""
|
| 400 |
+
self._toklist.append(item)
|
| 401 |
+
|
| 402 |
+
def extend(self, itemseq):
|
| 403 |
+
"""
|
| 404 |
+
Add sequence of elements to end of ``ParseResults`` list of elements.
|
| 405 |
+
|
| 406 |
+
Example::
|
| 407 |
+
|
| 408 |
+
patt = Word(alphas)[1, ...]
|
| 409 |
+
|
| 410 |
+
# use a parse action to append the reverse of the matched strings, to make a palindrome
|
| 411 |
+
def make_palindrome(tokens):
|
| 412 |
+
tokens.extend(reversed([t[::-1] for t in tokens]))
|
| 413 |
+
return ''.join(tokens)
|
| 414 |
+
patt.add_parse_action(make_palindrome)
|
| 415 |
+
print(patt.parse_string("lskdj sdlkjf lksd")) # -> 'lskdjsdlkjflksddsklfjkldsjdksl'
|
| 416 |
+
"""
|
| 417 |
+
if isinstance(itemseq, ParseResults):
|
| 418 |
+
self.__iadd__(itemseq)
|
| 419 |
+
else:
|
| 420 |
+
self._toklist.extend(itemseq)
|
| 421 |
+
|
| 422 |
+
def clear(self):
|
| 423 |
+
"""
|
| 424 |
+
Clear all elements and results names.
|
| 425 |
+
"""
|
| 426 |
+
del self._toklist[:]
|
| 427 |
+
self._tokdict.clear()
|
| 428 |
+
|
| 429 |
+
def __getattr__(self, name):
|
| 430 |
+
try:
|
| 431 |
+
return self[name]
|
| 432 |
+
except KeyError:
|
| 433 |
+
if name.startswith("__"):
|
| 434 |
+
raise AttributeError(name)
|
| 435 |
+
return ""
|
| 436 |
+
|
| 437 |
+
def __add__(self, other: "ParseResults") -> "ParseResults":
|
| 438 |
+
ret = self.copy()
|
| 439 |
+
ret += other
|
| 440 |
+
return ret
|
| 441 |
+
|
| 442 |
+
def __iadd__(self, other: "ParseResults") -> "ParseResults":
|
| 443 |
+
if not other:
|
| 444 |
+
return self
|
| 445 |
+
|
| 446 |
+
if other._tokdict:
|
| 447 |
+
offset = len(self._toklist)
|
| 448 |
+
addoffset = lambda a: offset if a < 0 else a + offset
|
| 449 |
+
otheritems = other._tokdict.items()
|
| 450 |
+
otherdictitems = [
|
| 451 |
+
(k, _ParseResultsWithOffset(v[0], addoffset(v[1])))
|
| 452 |
+
for k, vlist in otheritems
|
| 453 |
+
for v in vlist
|
| 454 |
+
]
|
| 455 |
+
for k, v in otherdictitems:
|
| 456 |
+
self[k] = v
|
| 457 |
+
if isinstance(v[0], ParseResults):
|
| 458 |
+
v[0]._parent = self
|
| 459 |
+
|
| 460 |
+
self._toklist += other._toklist
|
| 461 |
+
self._all_names |= other._all_names
|
| 462 |
+
return self
|
| 463 |
+
|
| 464 |
+
def __radd__(self, other) -> "ParseResults":
|
| 465 |
+
if isinstance(other, int) and other == 0:
|
| 466 |
+
# useful for merging many ParseResults using sum() builtin
|
| 467 |
+
return self.copy()
|
| 468 |
+
else:
|
| 469 |
+
# this may raise a TypeError - so be it
|
| 470 |
+
return other + self
|
| 471 |
+
|
| 472 |
+
def __repr__(self) -> str:
|
| 473 |
+
return f"{type(self).__name__}({self._toklist!r}, {self.as_dict()})"
|
| 474 |
+
|
| 475 |
+
def __str__(self) -> str:
|
| 476 |
+
return (
|
| 477 |
+
"["
|
| 478 |
+
+ ", ".join(
|
| 479 |
+
[
|
| 480 |
+
str(i) if isinstance(i, ParseResults) else repr(i)
|
| 481 |
+
for i in self._toklist
|
| 482 |
+
]
|
| 483 |
+
)
|
| 484 |
+
+ "]"
|
| 485 |
+
)
|
| 486 |
+
|
| 487 |
+
def _asStringList(self, sep=""):
|
| 488 |
+
out = []
|
| 489 |
+
for item in self._toklist:
|
| 490 |
+
if out and sep:
|
| 491 |
+
out.append(sep)
|
| 492 |
+
if isinstance(item, ParseResults):
|
| 493 |
+
out += item._asStringList()
|
| 494 |
+
else:
|
| 495 |
+
out.append(str(item))
|
| 496 |
+
return out
|
| 497 |
+
|
| 498 |
+
def as_list(self) -> list:
|
| 499 |
+
"""
|
| 500 |
+
Returns the parse results as a nested list of matching tokens, all converted to strings.
|
| 501 |
+
|
| 502 |
+
Example::
|
| 503 |
+
|
| 504 |
+
patt = Word(alphas)[1, ...]
|
| 505 |
+
result = patt.parse_string("sldkj lsdkj sldkj")
|
| 506 |
+
# even though the result prints in string-like form, it is actually a pyparsing ParseResults
|
| 507 |
+
print(type(result), result) # -> <class 'pyparsing.ParseResults'> ['sldkj', 'lsdkj', 'sldkj']
|
| 508 |
+
|
| 509 |
+
# Use as_list() to create an actual list
|
| 510 |
+
result_list = result.as_list()
|
| 511 |
+
print(type(result_list), result_list) # -> <class 'list'> ['sldkj', 'lsdkj', 'sldkj']
|
| 512 |
+
"""
|
| 513 |
+
return [
|
| 514 |
+
res.as_list() if isinstance(res, ParseResults) else res
|
| 515 |
+
for res in self._toklist
|
| 516 |
+
]
|
| 517 |
+
|
| 518 |
+
def as_dict(self) -> dict:
|
| 519 |
+
"""
|
| 520 |
+
Returns the named parse results as a nested dictionary.
|
| 521 |
+
|
| 522 |
+
Example::
|
| 523 |
+
|
| 524 |
+
integer = Word(nums)
|
| 525 |
+
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
| 526 |
+
|
| 527 |
+
result = date_str.parse_string('12/31/1999')
|
| 528 |
+
print(type(result), repr(result)) # -> <class 'pyparsing.ParseResults'> (['12', '/', '31', '/', '1999'], {'day': [('1999', 4)], 'year': [('12', 0)], 'month': [('31', 2)]})
|
| 529 |
+
|
| 530 |
+
result_dict = result.as_dict()
|
| 531 |
+
print(type(result_dict), repr(result_dict)) # -> <class 'dict'> {'day': '1999', 'year': '12', 'month': '31'}
|
| 532 |
+
|
| 533 |
+
# even though a ParseResults supports dict-like access, sometime you just need to have a dict
|
| 534 |
+
import json
|
| 535 |
+
print(json.dumps(result)) # -> Exception: TypeError: ... is not JSON serializable
|
| 536 |
+
print(json.dumps(result.as_dict())) # -> {"month": "31", "day": "1999", "year": "12"}
|
| 537 |
+
"""
|
| 538 |
+
|
| 539 |
+
def to_item(obj):
|
| 540 |
+
if isinstance(obj, ParseResults):
|
| 541 |
+
return obj.as_dict() if obj.haskeys() else [to_item(v) for v in obj]
|
| 542 |
+
else:
|
| 543 |
+
return obj
|
| 544 |
+
|
| 545 |
+
return dict((k, to_item(v)) for k, v in self.items())
|
| 546 |
+
|
| 547 |
+
def copy(self) -> "ParseResults":
|
| 548 |
+
"""
|
| 549 |
+
Returns a new shallow copy of a :class:`ParseResults` object. `ParseResults`
|
| 550 |
+
items contained within the source are shared with the copy. Use
|
| 551 |
+
:class:`ParseResults.deepcopy()` to create a copy with its own separate
|
| 552 |
+
content values.
|
| 553 |
+
"""
|
| 554 |
+
ret = ParseResults(self._toklist)
|
| 555 |
+
ret._tokdict = self._tokdict.copy()
|
| 556 |
+
ret._parent = self._parent
|
| 557 |
+
ret._all_names |= self._all_names
|
| 558 |
+
ret._name = self._name
|
| 559 |
+
return ret
|
| 560 |
+
|
| 561 |
+
def deepcopy(self) -> "ParseResults":
|
| 562 |
+
"""
|
| 563 |
+
Returns a new deep copy of a :class:`ParseResults` object.
|
| 564 |
+
"""
|
| 565 |
+
ret = self.copy()
|
| 566 |
+
# replace values with copies if they are of known mutable types
|
| 567 |
+
for i, obj in enumerate(self._toklist):
|
| 568 |
+
if isinstance(obj, ParseResults):
|
| 569 |
+
self._toklist[i] = obj.deepcopy()
|
| 570 |
+
elif isinstance(obj, (str, bytes)):
|
| 571 |
+
pass
|
| 572 |
+
elif isinstance(obj, MutableMapping):
|
| 573 |
+
self._toklist[i] = dest = type(obj)()
|
| 574 |
+
for k, v in obj.items():
|
| 575 |
+
dest[k] = v.deepcopy() if isinstance(v, ParseResults) else v
|
| 576 |
+
elif isinstance(obj, Container):
|
| 577 |
+
self._toklist[i] = type(obj)(
|
| 578 |
+
v.deepcopy() if isinstance(v, ParseResults) else v for v in obj
|
| 579 |
+
)
|
| 580 |
+
return ret
|
| 581 |
+
|
| 582 |
+
def get_name(self):
|
| 583 |
+
r"""
|
| 584 |
+
Returns the results name for this token expression. Useful when several
|
| 585 |
+
different expressions might match at a particular location.
|
| 586 |
+
|
| 587 |
+
Example::
|
| 588 |
+
|
| 589 |
+
integer = Word(nums)
|
| 590 |
+
ssn_expr = Regex(r"\d\d\d-\d\d-\d\d\d\d")
|
| 591 |
+
house_number_expr = Suppress('#') + Word(nums, alphanums)
|
| 592 |
+
user_data = (Group(house_number_expr)("house_number")
|
| 593 |
+
| Group(ssn_expr)("ssn")
|
| 594 |
+
| Group(integer)("age"))
|
| 595 |
+
user_info = user_data[1, ...]
|
| 596 |
+
|
| 597 |
+
result = user_info.parse_string("22 111-22-3333 #221B")
|
| 598 |
+
for item in result:
|
| 599 |
+
print(item.get_name(), ':', item[0])
|
| 600 |
+
|
| 601 |
+
prints::
|
| 602 |
+
|
| 603 |
+
age : 22
|
| 604 |
+
ssn : 111-22-3333
|
| 605 |
+
house_number : 221B
|
| 606 |
+
"""
|
| 607 |
+
if self._name:
|
| 608 |
+
return self._name
|
| 609 |
+
elif self._parent:
|
| 610 |
+
par: "ParseResults" = self._parent
|
| 611 |
+
parent_tokdict_items = par._tokdict.items()
|
| 612 |
+
return next(
|
| 613 |
+
(
|
| 614 |
+
k
|
| 615 |
+
for k, vlist in parent_tokdict_items
|
| 616 |
+
for v, loc in vlist
|
| 617 |
+
if v is self
|
| 618 |
+
),
|
| 619 |
+
None,
|
| 620 |
+
)
|
| 621 |
+
elif (
|
| 622 |
+
len(self) == 1
|
| 623 |
+
and len(self._tokdict) == 1
|
| 624 |
+
and next(iter(self._tokdict.values()))[0][1] in (0, -1)
|
| 625 |
+
):
|
| 626 |
+
return next(iter(self._tokdict.keys()))
|
| 627 |
+
else:
|
| 628 |
+
return None
|
| 629 |
+
|
| 630 |
+
def dump(self, indent="", full=True, include_list=True, _depth=0) -> str:
|
| 631 |
+
"""
|
| 632 |
+
Diagnostic method for listing out the contents of
|
| 633 |
+
a :class:`ParseResults`. Accepts an optional ``indent`` argument so
|
| 634 |
+
that this string can be embedded in a nested display of other data.
|
| 635 |
+
|
| 636 |
+
Example::
|
| 637 |
+
|
| 638 |
+
integer = Word(nums)
|
| 639 |
+
date_str = integer("year") + '/' + integer("month") + '/' + integer("day")
|
| 640 |
+
|
| 641 |
+
result = date_str.parse_string('1999/12/31')
|
| 642 |
+
print(result.dump())
|
| 643 |
+
|
| 644 |
+
prints::
|
| 645 |
+
|
| 646 |
+
['1999', '/', '12', '/', '31']
|
| 647 |
+
- day: '31'
|
| 648 |
+
- month: '12'
|
| 649 |
+
- year: '1999'
|
| 650 |
+
"""
|
| 651 |
+
out = []
|
| 652 |
+
NL = "\n"
|
| 653 |
+
out.append(indent + str(self.as_list()) if include_list else "")
|
| 654 |
+
|
| 655 |
+
if full:
|
| 656 |
+
if self.haskeys():
|
| 657 |
+
items = sorted((str(k), v) for k, v in self.items())
|
| 658 |
+
for k, v in items:
|
| 659 |
+
if out:
|
| 660 |
+
out.append(NL)
|
| 661 |
+
out.append(f"{indent}{(' ' * _depth)}- {k}: ")
|
| 662 |
+
if isinstance(v, ParseResults):
|
| 663 |
+
if v:
|
| 664 |
+
out.append(
|
| 665 |
+
v.dump(
|
| 666 |
+
indent=indent,
|
| 667 |
+
full=full,
|
| 668 |
+
include_list=include_list,
|
| 669 |
+
_depth=_depth + 1,
|
| 670 |
+
)
|
| 671 |
+
)
|
| 672 |
+
else:
|
| 673 |
+
out.append(str(v))
|
| 674 |
+
else:
|
| 675 |
+
out.append(repr(v))
|
| 676 |
+
if any(isinstance(vv, ParseResults) for vv in self):
|
| 677 |
+
v = self
|
| 678 |
+
for i, vv in enumerate(v):
|
| 679 |
+
if isinstance(vv, ParseResults):
|
| 680 |
+
out.append(
|
| 681 |
+
"\n{}{}[{}]:\n{}{}{}".format(
|
| 682 |
+
indent,
|
| 683 |
+
(" " * (_depth)),
|
| 684 |
+
i,
|
| 685 |
+
indent,
|
| 686 |
+
(" " * (_depth + 1)),
|
| 687 |
+
vv.dump(
|
| 688 |
+
indent=indent,
|
| 689 |
+
full=full,
|
| 690 |
+
include_list=include_list,
|
| 691 |
+
_depth=_depth + 1,
|
| 692 |
+
),
|
| 693 |
+
)
|
| 694 |
+
)
|
| 695 |
+
else:
|
| 696 |
+
out.append(
|
| 697 |
+
"\n%s%s[%d]:\n%s%s%s"
|
| 698 |
+
% (
|
| 699 |
+
indent,
|
| 700 |
+
(" " * (_depth)),
|
| 701 |
+
i,
|
| 702 |
+
indent,
|
| 703 |
+
(" " * (_depth + 1)),
|
| 704 |
+
str(vv),
|
| 705 |
+
)
|
| 706 |
+
)
|
| 707 |
+
|
| 708 |
+
return "".join(out)
|
| 709 |
+
|
| 710 |
+
def pprint(self, *args, **kwargs):
|
| 711 |
+
"""
|
| 712 |
+
Pretty-printer for parsed results as a list, using the
|
| 713 |
+
`pprint <https://docs.python.org/3/library/pprint.html>`_ module.
|
| 714 |
+
Accepts additional positional or keyword args as defined for
|
| 715 |
+
`pprint.pprint <https://docs.python.org/3/library/pprint.html#pprint.pprint>`_ .
|
| 716 |
+
|
| 717 |
+
Example::
|
| 718 |
+
|
| 719 |
+
ident = Word(alphas, alphanums)
|
| 720 |
+
num = Word(nums)
|
| 721 |
+
func = Forward()
|
| 722 |
+
term = ident | num | Group('(' + func + ')')
|
| 723 |
+
func <<= ident + Group(Optional(DelimitedList(term)))
|
| 724 |
+
result = func.parse_string("fna a,b,(fnb c,d,200),100")
|
| 725 |
+
result.pprint(width=40)
|
| 726 |
+
|
| 727 |
+
prints::
|
| 728 |
+
|
| 729 |
+
['fna',
|
| 730 |
+
['a',
|
| 731 |
+
'b',
|
| 732 |
+
['(', 'fnb', ['c', 'd', '200'], ')'],
|
| 733 |
+
'100']]
|
| 734 |
+
"""
|
| 735 |
+
pprint.pprint(self.as_list(), *args, **kwargs)
|
| 736 |
+
|
| 737 |
+
# add support for pickle protocol
|
| 738 |
+
def __getstate__(self):
|
| 739 |
+
return (
|
| 740 |
+
self._toklist,
|
| 741 |
+
(
|
| 742 |
+
self._tokdict.copy(),
|
| 743 |
+
None,
|
| 744 |
+
self._all_names,
|
| 745 |
+
self._name,
|
| 746 |
+
),
|
| 747 |
+
)
|
| 748 |
+
|
| 749 |
+
def __setstate__(self, state):
|
| 750 |
+
self._toklist, (self._tokdict, par, inAccumNames, self._name) = state
|
| 751 |
+
self._all_names = set(inAccumNames)
|
| 752 |
+
self._parent = None
|
| 753 |
+
|
| 754 |
+
def __getnewargs__(self):
|
| 755 |
+
return self._toklist, self._name
|
| 756 |
+
|
| 757 |
+
def __dir__(self):
|
| 758 |
+
return dir(type(self)) + list(self.keys())
|
| 759 |
+
|
| 760 |
+
@classmethod
|
| 761 |
+
def from_dict(cls, other, name=None) -> "ParseResults":
|
| 762 |
+
"""
|
| 763 |
+
Helper classmethod to construct a ``ParseResults`` from a ``dict``, preserving the
|
| 764 |
+
name-value relations as results names. If an optional ``name`` argument is
|
| 765 |
+
given, a nested ``ParseResults`` will be returned.
|
| 766 |
+
"""
|
| 767 |
+
|
| 768 |
+
def is_iterable(obj):
|
| 769 |
+
try:
|
| 770 |
+
iter(obj)
|
| 771 |
+
except Exception:
|
| 772 |
+
return False
|
| 773 |
+
# str's are iterable, but in pyparsing, we don't want to iterate over them
|
| 774 |
+
else:
|
| 775 |
+
return not isinstance(obj, str_type)
|
| 776 |
+
|
| 777 |
+
ret = cls([])
|
| 778 |
+
for k, v in other.items():
|
| 779 |
+
if isinstance(v, Mapping):
|
| 780 |
+
ret += cls.from_dict(v, name=k)
|
| 781 |
+
else:
|
| 782 |
+
ret += cls([v], name=k, asList=is_iterable(v))
|
| 783 |
+
if name is not None:
|
| 784 |
+
ret = cls([ret], name=name)
|
| 785 |
+
return ret
|
| 786 |
+
|
| 787 |
+
asList = as_list
|
| 788 |
+
"""Deprecated - use :class:`as_list`"""
|
| 789 |
+
asDict = as_dict
|
| 790 |
+
"""Deprecated - use :class:`as_dict`"""
|
| 791 |
+
getName = get_name
|
| 792 |
+
"""Deprecated - use :class:`get_name`"""
|
| 793 |
+
|
| 794 |
+
|
| 795 |
+
MutableMapping.register(ParseResults)
|
| 796 |
+
MutableSequence.register(ParseResults)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/testing.py
ADDED
|
@@ -0,0 +1,331 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# testing.py
|
| 2 |
+
|
| 3 |
+
from contextlib import contextmanager
|
| 4 |
+
import typing
|
| 5 |
+
|
| 6 |
+
from .core import (
|
| 7 |
+
ParserElement,
|
| 8 |
+
ParseException,
|
| 9 |
+
Keyword,
|
| 10 |
+
__diag__,
|
| 11 |
+
__compat__,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class pyparsing_test:
|
| 16 |
+
"""
|
| 17 |
+
namespace class for classes useful in writing unit tests
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
class reset_pyparsing_context:
|
| 21 |
+
"""
|
| 22 |
+
Context manager to be used when writing unit tests that modify pyparsing config values:
|
| 23 |
+
- packrat parsing
|
| 24 |
+
- bounded recursion parsing
|
| 25 |
+
- default whitespace characters.
|
| 26 |
+
- default keyword characters
|
| 27 |
+
- literal string auto-conversion class
|
| 28 |
+
- __diag__ settings
|
| 29 |
+
|
| 30 |
+
Example::
|
| 31 |
+
|
| 32 |
+
with reset_pyparsing_context():
|
| 33 |
+
# test that literals used to construct a grammar are automatically suppressed
|
| 34 |
+
ParserElement.inlineLiteralsUsing(Suppress)
|
| 35 |
+
|
| 36 |
+
term = Word(alphas) | Word(nums)
|
| 37 |
+
group = Group('(' + term[...] + ')')
|
| 38 |
+
|
| 39 |
+
# assert that the '()' characters are not included in the parsed tokens
|
| 40 |
+
self.assertParseAndCheckList(group, "(abc 123 def)", ['abc', '123', 'def'])
|
| 41 |
+
|
| 42 |
+
# after exiting context manager, literals are converted to Literal expressions again
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
def __init__(self):
|
| 46 |
+
self._save_context = {}
|
| 47 |
+
|
| 48 |
+
def save(self):
|
| 49 |
+
self._save_context["default_whitespace"] = ParserElement.DEFAULT_WHITE_CHARS
|
| 50 |
+
self._save_context["default_keyword_chars"] = Keyword.DEFAULT_KEYWORD_CHARS
|
| 51 |
+
|
| 52 |
+
self._save_context[
|
| 53 |
+
"literal_string_class"
|
| 54 |
+
] = ParserElement._literalStringClass
|
| 55 |
+
|
| 56 |
+
self._save_context["verbose_stacktrace"] = ParserElement.verbose_stacktrace
|
| 57 |
+
|
| 58 |
+
self._save_context["packrat_enabled"] = ParserElement._packratEnabled
|
| 59 |
+
if ParserElement._packratEnabled:
|
| 60 |
+
self._save_context[
|
| 61 |
+
"packrat_cache_size"
|
| 62 |
+
] = ParserElement.packrat_cache.size
|
| 63 |
+
else:
|
| 64 |
+
self._save_context["packrat_cache_size"] = None
|
| 65 |
+
self._save_context["packrat_parse"] = ParserElement._parse
|
| 66 |
+
self._save_context[
|
| 67 |
+
"recursion_enabled"
|
| 68 |
+
] = ParserElement._left_recursion_enabled
|
| 69 |
+
|
| 70 |
+
self._save_context["__diag__"] = {
|
| 71 |
+
name: getattr(__diag__, name) for name in __diag__._all_names
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
self._save_context["__compat__"] = {
|
| 75 |
+
"collect_all_And_tokens": __compat__.collect_all_And_tokens
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
return self
|
| 79 |
+
|
| 80 |
+
def restore(self):
|
| 81 |
+
# reset pyparsing global state
|
| 82 |
+
if (
|
| 83 |
+
ParserElement.DEFAULT_WHITE_CHARS
|
| 84 |
+
!= self._save_context["default_whitespace"]
|
| 85 |
+
):
|
| 86 |
+
ParserElement.set_default_whitespace_chars(
|
| 87 |
+
self._save_context["default_whitespace"]
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
ParserElement.verbose_stacktrace = self._save_context["verbose_stacktrace"]
|
| 91 |
+
|
| 92 |
+
Keyword.DEFAULT_KEYWORD_CHARS = self._save_context["default_keyword_chars"]
|
| 93 |
+
ParserElement.inlineLiteralsUsing(
|
| 94 |
+
self._save_context["literal_string_class"]
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
for name, value in self._save_context["__diag__"].items():
|
| 98 |
+
(__diag__.enable if value else __diag__.disable)(name)
|
| 99 |
+
|
| 100 |
+
ParserElement._packratEnabled = False
|
| 101 |
+
if self._save_context["packrat_enabled"]:
|
| 102 |
+
ParserElement.enable_packrat(self._save_context["packrat_cache_size"])
|
| 103 |
+
else:
|
| 104 |
+
ParserElement._parse = self._save_context["packrat_parse"]
|
| 105 |
+
ParserElement._left_recursion_enabled = self._save_context[
|
| 106 |
+
"recursion_enabled"
|
| 107 |
+
]
|
| 108 |
+
|
| 109 |
+
__compat__.collect_all_And_tokens = self._save_context["__compat__"]
|
| 110 |
+
|
| 111 |
+
return self
|
| 112 |
+
|
| 113 |
+
def copy(self):
|
| 114 |
+
ret = type(self)()
|
| 115 |
+
ret._save_context.update(self._save_context)
|
| 116 |
+
return ret
|
| 117 |
+
|
| 118 |
+
def __enter__(self):
|
| 119 |
+
return self.save()
|
| 120 |
+
|
| 121 |
+
def __exit__(self, *args):
|
| 122 |
+
self.restore()
|
| 123 |
+
|
| 124 |
+
class TestParseResultsAsserts:
|
| 125 |
+
"""
|
| 126 |
+
A mixin class to add parse results assertion methods to normal unittest.TestCase classes.
|
| 127 |
+
"""
|
| 128 |
+
|
| 129 |
+
def assertParseResultsEquals(
|
| 130 |
+
self, result, expected_list=None, expected_dict=None, msg=None
|
| 131 |
+
):
|
| 132 |
+
"""
|
| 133 |
+
Unit test assertion to compare a :class:`ParseResults` object with an optional ``expected_list``,
|
| 134 |
+
and compare any defined results names with an optional ``expected_dict``.
|
| 135 |
+
"""
|
| 136 |
+
if expected_list is not None:
|
| 137 |
+
self.assertEqual(expected_list, result.as_list(), msg=msg)
|
| 138 |
+
if expected_dict is not None:
|
| 139 |
+
self.assertEqual(expected_dict, result.as_dict(), msg=msg)
|
| 140 |
+
|
| 141 |
+
def assertParseAndCheckList(
|
| 142 |
+
self, expr, test_string, expected_list, msg=None, verbose=True
|
| 143 |
+
):
|
| 144 |
+
"""
|
| 145 |
+
Convenience wrapper assert to test a parser element and input string, and assert that
|
| 146 |
+
the resulting ``ParseResults.asList()`` is equal to the ``expected_list``.
|
| 147 |
+
"""
|
| 148 |
+
result = expr.parse_string(test_string, parse_all=True)
|
| 149 |
+
if verbose:
|
| 150 |
+
print(result.dump())
|
| 151 |
+
else:
|
| 152 |
+
print(result.as_list())
|
| 153 |
+
self.assertParseResultsEquals(result, expected_list=expected_list, msg=msg)
|
| 154 |
+
|
| 155 |
+
def assertParseAndCheckDict(
|
| 156 |
+
self, expr, test_string, expected_dict, msg=None, verbose=True
|
| 157 |
+
):
|
| 158 |
+
"""
|
| 159 |
+
Convenience wrapper assert to test a parser element and input string, and assert that
|
| 160 |
+
the resulting ``ParseResults.asDict()`` is equal to the ``expected_dict``.
|
| 161 |
+
"""
|
| 162 |
+
result = expr.parse_string(test_string, parseAll=True)
|
| 163 |
+
if verbose:
|
| 164 |
+
print(result.dump())
|
| 165 |
+
else:
|
| 166 |
+
print(result.as_list())
|
| 167 |
+
self.assertParseResultsEquals(result, expected_dict=expected_dict, msg=msg)
|
| 168 |
+
|
| 169 |
+
def assertRunTestResults(
|
| 170 |
+
self, run_tests_report, expected_parse_results=None, msg=None
|
| 171 |
+
):
|
| 172 |
+
"""
|
| 173 |
+
Unit test assertion to evaluate output of ``ParserElement.runTests()``. If a list of
|
| 174 |
+
list-dict tuples is given as the ``expected_parse_results`` argument, then these are zipped
|
| 175 |
+
with the report tuples returned by ``runTests`` and evaluated using ``assertParseResultsEquals``.
|
| 176 |
+
Finally, asserts that the overall ``runTests()`` success value is ``True``.
|
| 177 |
+
|
| 178 |
+
:param run_tests_report: tuple(bool, [tuple(str, ParseResults or Exception)]) returned from runTests
|
| 179 |
+
:param expected_parse_results (optional): [tuple(str, list, dict, Exception)]
|
| 180 |
+
"""
|
| 181 |
+
run_test_success, run_test_results = run_tests_report
|
| 182 |
+
|
| 183 |
+
if expected_parse_results is not None:
|
| 184 |
+
merged = [
|
| 185 |
+
(*rpt, expected)
|
| 186 |
+
for rpt, expected in zip(run_test_results, expected_parse_results)
|
| 187 |
+
]
|
| 188 |
+
for test_string, result, expected in merged:
|
| 189 |
+
# expected should be a tuple containing a list and/or a dict or an exception,
|
| 190 |
+
# and optional failure message string
|
| 191 |
+
# an empty tuple will skip any result validation
|
| 192 |
+
fail_msg = next(
|
| 193 |
+
(exp for exp in expected if isinstance(exp, str)), None
|
| 194 |
+
)
|
| 195 |
+
expected_exception = next(
|
| 196 |
+
(
|
| 197 |
+
exp
|
| 198 |
+
for exp in expected
|
| 199 |
+
if isinstance(exp, type) and issubclass(exp, Exception)
|
| 200 |
+
),
|
| 201 |
+
None,
|
| 202 |
+
)
|
| 203 |
+
if expected_exception is not None:
|
| 204 |
+
with self.assertRaises(
|
| 205 |
+
expected_exception=expected_exception, msg=fail_msg or msg
|
| 206 |
+
):
|
| 207 |
+
if isinstance(result, Exception):
|
| 208 |
+
raise result
|
| 209 |
+
else:
|
| 210 |
+
expected_list = next(
|
| 211 |
+
(exp for exp in expected if isinstance(exp, list)), None
|
| 212 |
+
)
|
| 213 |
+
expected_dict = next(
|
| 214 |
+
(exp for exp in expected if isinstance(exp, dict)), None
|
| 215 |
+
)
|
| 216 |
+
if (expected_list, expected_dict) != (None, None):
|
| 217 |
+
self.assertParseResultsEquals(
|
| 218 |
+
result,
|
| 219 |
+
expected_list=expected_list,
|
| 220 |
+
expected_dict=expected_dict,
|
| 221 |
+
msg=fail_msg or msg,
|
| 222 |
+
)
|
| 223 |
+
else:
|
| 224 |
+
# warning here maybe?
|
| 225 |
+
print(f"no validation for {test_string!r}")
|
| 226 |
+
|
| 227 |
+
# do this last, in case some specific test results can be reported instead
|
| 228 |
+
self.assertTrue(
|
| 229 |
+
run_test_success, msg=msg if msg is not None else "failed runTests"
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
@contextmanager
|
| 233 |
+
def assertRaisesParseException(self, exc_type=ParseException, msg=None):
|
| 234 |
+
with self.assertRaises(exc_type, msg=msg):
|
| 235 |
+
yield
|
| 236 |
+
|
| 237 |
+
@staticmethod
|
| 238 |
+
def with_line_numbers(
|
| 239 |
+
s: str,
|
| 240 |
+
start_line: typing.Optional[int] = None,
|
| 241 |
+
end_line: typing.Optional[int] = None,
|
| 242 |
+
expand_tabs: bool = True,
|
| 243 |
+
eol_mark: str = "|",
|
| 244 |
+
mark_spaces: typing.Optional[str] = None,
|
| 245 |
+
mark_control: typing.Optional[str] = None,
|
| 246 |
+
) -> str:
|
| 247 |
+
"""
|
| 248 |
+
Helpful method for debugging a parser - prints a string with line and column numbers.
|
| 249 |
+
(Line and column numbers are 1-based.)
|
| 250 |
+
|
| 251 |
+
:param s: tuple(bool, str - string to be printed with line and column numbers
|
| 252 |
+
:param start_line: int - (optional) starting line number in s to print (default=1)
|
| 253 |
+
:param end_line: int - (optional) ending line number in s to print (default=len(s))
|
| 254 |
+
:param expand_tabs: bool - (optional) expand tabs to spaces, to match the pyparsing default
|
| 255 |
+
:param eol_mark: str - (optional) string to mark the end of lines, helps visualize trailing spaces (default="|")
|
| 256 |
+
:param mark_spaces: str - (optional) special character to display in place of spaces
|
| 257 |
+
:param mark_control: str - (optional) convert non-printing control characters to a placeholding
|
| 258 |
+
character; valid values:
|
| 259 |
+
- "unicode" - replaces control chars with Unicode symbols, such as "␍" and "␊"
|
| 260 |
+
- any single character string - replace control characters with given string
|
| 261 |
+
- None (default) - string is displayed as-is
|
| 262 |
+
|
| 263 |
+
:return: str - input string with leading line numbers and column number headers
|
| 264 |
+
"""
|
| 265 |
+
if expand_tabs:
|
| 266 |
+
s = s.expandtabs()
|
| 267 |
+
if mark_control is not None:
|
| 268 |
+
mark_control = typing.cast(str, mark_control)
|
| 269 |
+
if mark_control == "unicode":
|
| 270 |
+
transtable_map = {
|
| 271 |
+
c: u for c, u in zip(range(0, 33), range(0x2400, 0x2433))
|
| 272 |
+
}
|
| 273 |
+
transtable_map[127] = 0x2421
|
| 274 |
+
tbl = str.maketrans(transtable_map)
|
| 275 |
+
eol_mark = ""
|
| 276 |
+
else:
|
| 277 |
+
ord_mark_control = ord(mark_control)
|
| 278 |
+
tbl = str.maketrans(
|
| 279 |
+
{c: ord_mark_control for c in list(range(0, 32)) + [127]}
|
| 280 |
+
)
|
| 281 |
+
s = s.translate(tbl)
|
| 282 |
+
if mark_spaces is not None and mark_spaces != " ":
|
| 283 |
+
if mark_spaces == "unicode":
|
| 284 |
+
tbl = str.maketrans({9: 0x2409, 32: 0x2423})
|
| 285 |
+
s = s.translate(tbl)
|
| 286 |
+
else:
|
| 287 |
+
s = s.replace(" ", mark_spaces)
|
| 288 |
+
if start_line is None:
|
| 289 |
+
start_line = 1
|
| 290 |
+
if end_line is None:
|
| 291 |
+
end_line = len(s)
|
| 292 |
+
end_line = min(end_line, len(s))
|
| 293 |
+
start_line = min(max(1, start_line), end_line)
|
| 294 |
+
|
| 295 |
+
if mark_control != "unicode":
|
| 296 |
+
s_lines = s.splitlines()[start_line - 1 : end_line]
|
| 297 |
+
else:
|
| 298 |
+
s_lines = [line + "␊" for line in s.split("␊")[start_line - 1 : end_line]]
|
| 299 |
+
if not s_lines:
|
| 300 |
+
return ""
|
| 301 |
+
|
| 302 |
+
lineno_width = len(str(end_line))
|
| 303 |
+
max_line_len = max(len(line) for line in s_lines)
|
| 304 |
+
lead = " " * (lineno_width + 1)
|
| 305 |
+
if max_line_len >= 99:
|
| 306 |
+
header0 = (
|
| 307 |
+
lead
|
| 308 |
+
+ "".join(
|
| 309 |
+
f"{' ' * 99}{(i + 1) % 100}"
|
| 310 |
+
for i in range(max(max_line_len // 100, 1))
|
| 311 |
+
)
|
| 312 |
+
+ "\n"
|
| 313 |
+
)
|
| 314 |
+
else:
|
| 315 |
+
header0 = ""
|
| 316 |
+
header1 = (
|
| 317 |
+
header0
|
| 318 |
+
+ lead
|
| 319 |
+
+ "".join(f" {(i + 1) % 10}" for i in range(-(-max_line_len // 10)))
|
| 320 |
+
+ "\n"
|
| 321 |
+
)
|
| 322 |
+
header2 = lead + "1234567890" * (-(-max_line_len // 10)) + "\n"
|
| 323 |
+
return (
|
| 324 |
+
header1
|
| 325 |
+
+ header2
|
| 326 |
+
+ "\n".join(
|
| 327 |
+
f"{i:{lineno_width}d}:{line}{eol_mark}"
|
| 328 |
+
for i, line in enumerate(s_lines, start=start_line)
|
| 329 |
+
)
|
| 330 |
+
+ "\n"
|
| 331 |
+
)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/unicode.py
ADDED
|
@@ -0,0 +1,361 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# unicode.py
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from itertools import filterfalse
|
| 5 |
+
from typing import List, Tuple, Union
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class _lazyclassproperty:
|
| 9 |
+
def __init__(self, fn):
|
| 10 |
+
self.fn = fn
|
| 11 |
+
self.__doc__ = fn.__doc__
|
| 12 |
+
self.__name__ = fn.__name__
|
| 13 |
+
|
| 14 |
+
def __get__(self, obj, cls):
|
| 15 |
+
if cls is None:
|
| 16 |
+
cls = type(obj)
|
| 17 |
+
if not hasattr(cls, "_intern") or any(
|
| 18 |
+
cls._intern is getattr(superclass, "_intern", [])
|
| 19 |
+
for superclass in cls.__mro__[1:]
|
| 20 |
+
):
|
| 21 |
+
cls._intern = {}
|
| 22 |
+
attrname = self.fn.__name__
|
| 23 |
+
if attrname not in cls._intern:
|
| 24 |
+
cls._intern[attrname] = self.fn(cls)
|
| 25 |
+
return cls._intern[attrname]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
UnicodeRangeList = List[Union[Tuple[int, int], Tuple[int]]]
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class unicode_set:
|
| 32 |
+
"""
|
| 33 |
+
A set of Unicode characters, for language-specific strings for
|
| 34 |
+
``alphas``, ``nums``, ``alphanums``, and ``printables``.
|
| 35 |
+
A unicode_set is defined by a list of ranges in the Unicode character
|
| 36 |
+
set, in a class attribute ``_ranges``. Ranges can be specified using
|
| 37 |
+
2-tuples or a 1-tuple, such as::
|
| 38 |
+
|
| 39 |
+
_ranges = [
|
| 40 |
+
(0x0020, 0x007e),
|
| 41 |
+
(0x00a0, 0x00ff),
|
| 42 |
+
(0x0100,),
|
| 43 |
+
]
|
| 44 |
+
|
| 45 |
+
Ranges are left- and right-inclusive. A 1-tuple of (x,) is treated as (x, x).
|
| 46 |
+
|
| 47 |
+
A unicode set can also be defined using multiple inheritance of other unicode sets::
|
| 48 |
+
|
| 49 |
+
class CJK(Chinese, Japanese, Korean):
|
| 50 |
+
pass
|
| 51 |
+
"""
|
| 52 |
+
|
| 53 |
+
_ranges: UnicodeRangeList = []
|
| 54 |
+
|
| 55 |
+
@_lazyclassproperty
|
| 56 |
+
def _chars_for_ranges(cls):
|
| 57 |
+
ret = []
|
| 58 |
+
for cc in cls.__mro__:
|
| 59 |
+
if cc is unicode_set:
|
| 60 |
+
break
|
| 61 |
+
for rr in getattr(cc, "_ranges", ()):
|
| 62 |
+
ret.extend(range(rr[0], rr[-1] + 1))
|
| 63 |
+
return [chr(c) for c in sorted(set(ret))]
|
| 64 |
+
|
| 65 |
+
@_lazyclassproperty
|
| 66 |
+
def printables(cls):
|
| 67 |
+
"""all non-whitespace characters in this range"""
|
| 68 |
+
return "".join(filterfalse(str.isspace, cls._chars_for_ranges))
|
| 69 |
+
|
| 70 |
+
@_lazyclassproperty
|
| 71 |
+
def alphas(cls):
|
| 72 |
+
"""all alphabetic characters in this range"""
|
| 73 |
+
return "".join(filter(str.isalpha, cls._chars_for_ranges))
|
| 74 |
+
|
| 75 |
+
@_lazyclassproperty
|
| 76 |
+
def nums(cls):
|
| 77 |
+
"""all numeric digit characters in this range"""
|
| 78 |
+
return "".join(filter(str.isdigit, cls._chars_for_ranges))
|
| 79 |
+
|
| 80 |
+
@_lazyclassproperty
|
| 81 |
+
def alphanums(cls):
|
| 82 |
+
"""all alphanumeric characters in this range"""
|
| 83 |
+
return cls.alphas + cls.nums
|
| 84 |
+
|
| 85 |
+
@_lazyclassproperty
|
| 86 |
+
def identchars(cls):
|
| 87 |
+
"""all characters in this range that are valid identifier characters, plus underscore '_'"""
|
| 88 |
+
return "".join(
|
| 89 |
+
sorted(
|
| 90 |
+
set(
|
| 91 |
+
"".join(filter(str.isidentifier, cls._chars_for_ranges))
|
| 92 |
+
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyzªµº"
|
| 93 |
+
+ "ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõöøùúûüýþÿ"
|
| 94 |
+
+ "_"
|
| 95 |
+
)
|
| 96 |
+
)
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
@_lazyclassproperty
|
| 100 |
+
def identbodychars(cls):
|
| 101 |
+
"""
|
| 102 |
+
all characters in this range that are valid identifier body characters,
|
| 103 |
+
plus the digits 0-9, and · (Unicode MIDDLE DOT)
|
| 104 |
+
"""
|
| 105 |
+
return "".join(
|
| 106 |
+
sorted(
|
| 107 |
+
set(
|
| 108 |
+
cls.identchars
|
| 109 |
+
+ "0123456789·"
|
| 110 |
+
+ "".join(
|
| 111 |
+
[c for c in cls._chars_for_ranges if ("_" + c).isidentifier()]
|
| 112 |
+
)
|
| 113 |
+
)
|
| 114 |
+
)
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
@_lazyclassproperty
|
| 118 |
+
def identifier(cls):
|
| 119 |
+
"""
|
| 120 |
+
a pyparsing Word expression for an identifier using this range's definitions for
|
| 121 |
+
identchars and identbodychars
|
| 122 |
+
"""
|
| 123 |
+
from pip._vendor.pyparsing import Word
|
| 124 |
+
|
| 125 |
+
return Word(cls.identchars, cls.identbodychars)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
class pyparsing_unicode(unicode_set):
|
| 129 |
+
"""
|
| 130 |
+
A namespace class for defining common language unicode_sets.
|
| 131 |
+
"""
|
| 132 |
+
|
| 133 |
+
# fmt: off
|
| 134 |
+
|
| 135 |
+
# define ranges in language character sets
|
| 136 |
+
_ranges: UnicodeRangeList = [
|
| 137 |
+
(0x0020, sys.maxunicode),
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
class BasicMultilingualPlane(unicode_set):
|
| 141 |
+
"""Unicode set for the Basic Multilingual Plane"""
|
| 142 |
+
_ranges: UnicodeRangeList = [
|
| 143 |
+
(0x0020, 0xFFFF),
|
| 144 |
+
]
|
| 145 |
+
|
| 146 |
+
class Latin1(unicode_set):
|
| 147 |
+
"""Unicode set for Latin-1 Unicode Character Range"""
|
| 148 |
+
_ranges: UnicodeRangeList = [
|
| 149 |
+
(0x0020, 0x007E),
|
| 150 |
+
(0x00A0, 0x00FF),
|
| 151 |
+
]
|
| 152 |
+
|
| 153 |
+
class LatinA(unicode_set):
|
| 154 |
+
"""Unicode set for Latin-A Unicode Character Range"""
|
| 155 |
+
_ranges: UnicodeRangeList = [
|
| 156 |
+
(0x0100, 0x017F),
|
| 157 |
+
]
|
| 158 |
+
|
| 159 |
+
class LatinB(unicode_set):
|
| 160 |
+
"""Unicode set for Latin-B Unicode Character Range"""
|
| 161 |
+
_ranges: UnicodeRangeList = [
|
| 162 |
+
(0x0180, 0x024F),
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
class Greek(unicode_set):
|
| 166 |
+
"""Unicode set for Greek Unicode Character Ranges"""
|
| 167 |
+
_ranges: UnicodeRangeList = [
|
| 168 |
+
(0x0342, 0x0345),
|
| 169 |
+
(0x0370, 0x0377),
|
| 170 |
+
(0x037A, 0x037F),
|
| 171 |
+
(0x0384, 0x038A),
|
| 172 |
+
(0x038C,),
|
| 173 |
+
(0x038E, 0x03A1),
|
| 174 |
+
(0x03A3, 0x03E1),
|
| 175 |
+
(0x03F0, 0x03FF),
|
| 176 |
+
(0x1D26, 0x1D2A),
|
| 177 |
+
(0x1D5E,),
|
| 178 |
+
(0x1D60,),
|
| 179 |
+
(0x1D66, 0x1D6A),
|
| 180 |
+
(0x1F00, 0x1F15),
|
| 181 |
+
(0x1F18, 0x1F1D),
|
| 182 |
+
(0x1F20, 0x1F45),
|
| 183 |
+
(0x1F48, 0x1F4D),
|
| 184 |
+
(0x1F50, 0x1F57),
|
| 185 |
+
(0x1F59,),
|
| 186 |
+
(0x1F5B,),
|
| 187 |
+
(0x1F5D,),
|
| 188 |
+
(0x1F5F, 0x1F7D),
|
| 189 |
+
(0x1F80, 0x1FB4),
|
| 190 |
+
(0x1FB6, 0x1FC4),
|
| 191 |
+
(0x1FC6, 0x1FD3),
|
| 192 |
+
(0x1FD6, 0x1FDB),
|
| 193 |
+
(0x1FDD, 0x1FEF),
|
| 194 |
+
(0x1FF2, 0x1FF4),
|
| 195 |
+
(0x1FF6, 0x1FFE),
|
| 196 |
+
(0x2129,),
|
| 197 |
+
(0x2719, 0x271A),
|
| 198 |
+
(0xAB65,),
|
| 199 |
+
(0x10140, 0x1018D),
|
| 200 |
+
(0x101A0,),
|
| 201 |
+
(0x1D200, 0x1D245),
|
| 202 |
+
(0x1F7A1, 0x1F7A7),
|
| 203 |
+
]
|
| 204 |
+
|
| 205 |
+
class Cyrillic(unicode_set):
|
| 206 |
+
"""Unicode set for Cyrillic Unicode Character Range"""
|
| 207 |
+
_ranges: UnicodeRangeList = [
|
| 208 |
+
(0x0400, 0x052F),
|
| 209 |
+
(0x1C80, 0x1C88),
|
| 210 |
+
(0x1D2B,),
|
| 211 |
+
(0x1D78,),
|
| 212 |
+
(0x2DE0, 0x2DFF),
|
| 213 |
+
(0xA640, 0xA672),
|
| 214 |
+
(0xA674, 0xA69F),
|
| 215 |
+
(0xFE2E, 0xFE2F),
|
| 216 |
+
]
|
| 217 |
+
|
| 218 |
+
class Chinese(unicode_set):
|
| 219 |
+
"""Unicode set for Chinese Unicode Character Range"""
|
| 220 |
+
_ranges: UnicodeRangeList = [
|
| 221 |
+
(0x2E80, 0x2E99),
|
| 222 |
+
(0x2E9B, 0x2EF3),
|
| 223 |
+
(0x31C0, 0x31E3),
|
| 224 |
+
(0x3400, 0x4DB5),
|
| 225 |
+
(0x4E00, 0x9FEF),
|
| 226 |
+
(0xA700, 0xA707),
|
| 227 |
+
(0xF900, 0xFA6D),
|
| 228 |
+
(0xFA70, 0xFAD9),
|
| 229 |
+
(0x16FE2, 0x16FE3),
|
| 230 |
+
(0x1F210, 0x1F212),
|
| 231 |
+
(0x1F214, 0x1F23B),
|
| 232 |
+
(0x1F240, 0x1F248),
|
| 233 |
+
(0x20000, 0x2A6D6),
|
| 234 |
+
(0x2A700, 0x2B734),
|
| 235 |
+
(0x2B740, 0x2B81D),
|
| 236 |
+
(0x2B820, 0x2CEA1),
|
| 237 |
+
(0x2CEB0, 0x2EBE0),
|
| 238 |
+
(0x2F800, 0x2FA1D),
|
| 239 |
+
]
|
| 240 |
+
|
| 241 |
+
class Japanese(unicode_set):
|
| 242 |
+
"""Unicode set for Japanese Unicode Character Range, combining Kanji, Hiragana, and Katakana ranges"""
|
| 243 |
+
|
| 244 |
+
class Kanji(unicode_set):
|
| 245 |
+
"Unicode set for Kanji Unicode Character Range"
|
| 246 |
+
_ranges: UnicodeRangeList = [
|
| 247 |
+
(0x4E00, 0x9FBF),
|
| 248 |
+
(0x3000, 0x303F),
|
| 249 |
+
]
|
| 250 |
+
|
| 251 |
+
class Hiragana(unicode_set):
|
| 252 |
+
"""Unicode set for Hiragana Unicode Character Range"""
|
| 253 |
+
_ranges: UnicodeRangeList = [
|
| 254 |
+
(0x3041, 0x3096),
|
| 255 |
+
(0x3099, 0x30A0),
|
| 256 |
+
(0x30FC,),
|
| 257 |
+
(0xFF70,),
|
| 258 |
+
(0x1B001,),
|
| 259 |
+
(0x1B150, 0x1B152),
|
| 260 |
+
(0x1F200,),
|
| 261 |
+
]
|
| 262 |
+
|
| 263 |
+
class Katakana(unicode_set):
|
| 264 |
+
"""Unicode set for Katakana Unicode Character Range"""
|
| 265 |
+
_ranges: UnicodeRangeList = [
|
| 266 |
+
(0x3099, 0x309C),
|
| 267 |
+
(0x30A0, 0x30FF),
|
| 268 |
+
(0x31F0, 0x31FF),
|
| 269 |
+
(0x32D0, 0x32FE),
|
| 270 |
+
(0xFF65, 0xFF9F),
|
| 271 |
+
(0x1B000,),
|
| 272 |
+
(0x1B164, 0x1B167),
|
| 273 |
+
(0x1F201, 0x1F202),
|
| 274 |
+
(0x1F213,),
|
| 275 |
+
]
|
| 276 |
+
|
| 277 |
+
漢字 = Kanji
|
| 278 |
+
カタカナ = Katakana
|
| 279 |
+
ひらがな = Hiragana
|
| 280 |
+
|
| 281 |
+
_ranges = (
|
| 282 |
+
Kanji._ranges
|
| 283 |
+
+ Hiragana._ranges
|
| 284 |
+
+ Katakana._ranges
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
class Hangul(unicode_set):
|
| 288 |
+
"""Unicode set for Hangul (Korean) Unicode Character Range"""
|
| 289 |
+
_ranges: UnicodeRangeList = [
|
| 290 |
+
(0x1100, 0x11FF),
|
| 291 |
+
(0x302E, 0x302F),
|
| 292 |
+
(0x3131, 0x318E),
|
| 293 |
+
(0x3200, 0x321C),
|
| 294 |
+
(0x3260, 0x327B),
|
| 295 |
+
(0x327E,),
|
| 296 |
+
(0xA960, 0xA97C),
|
| 297 |
+
(0xAC00, 0xD7A3),
|
| 298 |
+
(0xD7B0, 0xD7C6),
|
| 299 |
+
(0xD7CB, 0xD7FB),
|
| 300 |
+
(0xFFA0, 0xFFBE),
|
| 301 |
+
(0xFFC2, 0xFFC7),
|
| 302 |
+
(0xFFCA, 0xFFCF),
|
| 303 |
+
(0xFFD2, 0xFFD7),
|
| 304 |
+
(0xFFDA, 0xFFDC),
|
| 305 |
+
]
|
| 306 |
+
|
| 307 |
+
Korean = Hangul
|
| 308 |
+
|
| 309 |
+
class CJK(Chinese, Japanese, Hangul):
|
| 310 |
+
"""Unicode set for combined Chinese, Japanese, and Korean (CJK) Unicode Character Range"""
|
| 311 |
+
|
| 312 |
+
class Thai(unicode_set):
|
| 313 |
+
"""Unicode set for Thai Unicode Character Range"""
|
| 314 |
+
_ranges: UnicodeRangeList = [
|
| 315 |
+
(0x0E01, 0x0E3A),
|
| 316 |
+
(0x0E3F, 0x0E5B)
|
| 317 |
+
]
|
| 318 |
+
|
| 319 |
+
class Arabic(unicode_set):
|
| 320 |
+
"""Unicode set for Arabic Unicode Character Range"""
|
| 321 |
+
_ranges: UnicodeRangeList = [
|
| 322 |
+
(0x0600, 0x061B),
|
| 323 |
+
(0x061E, 0x06FF),
|
| 324 |
+
(0x0700, 0x077F),
|
| 325 |
+
]
|
| 326 |
+
|
| 327 |
+
class Hebrew(unicode_set):
|
| 328 |
+
"""Unicode set for Hebrew Unicode Character Range"""
|
| 329 |
+
_ranges: UnicodeRangeList = [
|
| 330 |
+
(0x0591, 0x05C7),
|
| 331 |
+
(0x05D0, 0x05EA),
|
| 332 |
+
(0x05EF, 0x05F4),
|
| 333 |
+
(0xFB1D, 0xFB36),
|
| 334 |
+
(0xFB38, 0xFB3C),
|
| 335 |
+
(0xFB3E,),
|
| 336 |
+
(0xFB40, 0xFB41),
|
| 337 |
+
(0xFB43, 0xFB44),
|
| 338 |
+
(0xFB46, 0xFB4F),
|
| 339 |
+
]
|
| 340 |
+
|
| 341 |
+
class Devanagari(unicode_set):
|
| 342 |
+
"""Unicode set for Devanagari Unicode Character Range"""
|
| 343 |
+
_ranges: UnicodeRangeList = [
|
| 344 |
+
(0x0900, 0x097F),
|
| 345 |
+
(0xA8E0, 0xA8FF)
|
| 346 |
+
]
|
| 347 |
+
|
| 348 |
+
BMP = BasicMultilingualPlane
|
| 349 |
+
|
| 350 |
+
# add language identifiers using language Unicode
|
| 351 |
+
العربية = Arabic
|
| 352 |
+
中文 = Chinese
|
| 353 |
+
кириллица = Cyrillic
|
| 354 |
+
Ελληνικά = Greek
|
| 355 |
+
עִברִית = Hebrew
|
| 356 |
+
日本語 = Japanese
|
| 357 |
+
한국어 = Korean
|
| 358 |
+
ไทย = Thai
|
| 359 |
+
देवनागरी = Devanagari
|
| 360 |
+
|
| 361 |
+
# fmt: on
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/pyparsing/util.py
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# util.py
|
| 2 |
+
import inspect
|
| 3 |
+
import warnings
|
| 4 |
+
import types
|
| 5 |
+
import collections
|
| 6 |
+
import itertools
|
| 7 |
+
from functools import lru_cache, wraps
|
| 8 |
+
from typing import Callable, List, Union, Iterable, TypeVar, cast
|
| 9 |
+
|
| 10 |
+
_bslash = chr(92)
|
| 11 |
+
C = TypeVar("C", bound=Callable)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class __config_flags:
|
| 15 |
+
"""Internal class for defining compatibility and debugging flags"""
|
| 16 |
+
|
| 17 |
+
_all_names: List[str] = []
|
| 18 |
+
_fixed_names: List[str] = []
|
| 19 |
+
_type_desc = "configuration"
|
| 20 |
+
|
| 21 |
+
@classmethod
|
| 22 |
+
def _set(cls, dname, value):
|
| 23 |
+
if dname in cls._fixed_names:
|
| 24 |
+
warnings.warn(
|
| 25 |
+
f"{cls.__name__}.{dname} {cls._type_desc} is {str(getattr(cls, dname)).upper()}"
|
| 26 |
+
f" and cannot be overridden",
|
| 27 |
+
stacklevel=3,
|
| 28 |
+
)
|
| 29 |
+
return
|
| 30 |
+
if dname in cls._all_names:
|
| 31 |
+
setattr(cls, dname, value)
|
| 32 |
+
else:
|
| 33 |
+
raise ValueError(f"no such {cls._type_desc} {dname!r}")
|
| 34 |
+
|
| 35 |
+
enable = classmethod(lambda cls, name: cls._set(name, True))
|
| 36 |
+
disable = classmethod(lambda cls, name: cls._set(name, False))
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@lru_cache(maxsize=128)
|
| 40 |
+
def col(loc: int, strg: str) -> int:
|
| 41 |
+
"""
|
| 42 |
+
Returns current column within a string, counting newlines as line separators.
|
| 43 |
+
The first column is number 1.
|
| 44 |
+
|
| 45 |
+
Note: the default parsing behavior is to expand tabs in the input string
|
| 46 |
+
before starting the parsing process. See
|
| 47 |
+
:class:`ParserElement.parse_string` for more
|
| 48 |
+
information on parsing strings containing ``<TAB>`` s, and suggested
|
| 49 |
+
methods to maintain a consistent view of the parsed string, the parse
|
| 50 |
+
location, and line and column positions within the parsed string.
|
| 51 |
+
"""
|
| 52 |
+
s = strg
|
| 53 |
+
return 1 if 0 < loc < len(s) and s[loc - 1] == "\n" else loc - s.rfind("\n", 0, loc)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@lru_cache(maxsize=128)
|
| 57 |
+
def lineno(loc: int, strg: str) -> int:
|
| 58 |
+
"""Returns current line number within a string, counting newlines as line separators.
|
| 59 |
+
The first line is number 1.
|
| 60 |
+
|
| 61 |
+
Note - the default parsing behavior is to expand tabs in the input string
|
| 62 |
+
before starting the parsing process. See :class:`ParserElement.parse_string`
|
| 63 |
+
for more information on parsing strings containing ``<TAB>`` s, and
|
| 64 |
+
suggested methods to maintain a consistent view of the parsed string, the
|
| 65 |
+
parse location, and line and column positions within the parsed string.
|
| 66 |
+
"""
|
| 67 |
+
return strg.count("\n", 0, loc) + 1
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
@lru_cache(maxsize=128)
|
| 71 |
+
def line(loc: int, strg: str) -> str:
|
| 72 |
+
"""
|
| 73 |
+
Returns the line of text containing loc within a string, counting newlines as line separators.
|
| 74 |
+
"""
|
| 75 |
+
last_cr = strg.rfind("\n", 0, loc)
|
| 76 |
+
next_cr = strg.find("\n", loc)
|
| 77 |
+
return strg[last_cr + 1 : next_cr] if next_cr >= 0 else strg[last_cr + 1 :]
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class _UnboundedCache:
|
| 81 |
+
def __init__(self):
|
| 82 |
+
cache = {}
|
| 83 |
+
cache_get = cache.get
|
| 84 |
+
self.not_in_cache = not_in_cache = object()
|
| 85 |
+
|
| 86 |
+
def get(_, key):
|
| 87 |
+
return cache_get(key, not_in_cache)
|
| 88 |
+
|
| 89 |
+
def set_(_, key, value):
|
| 90 |
+
cache[key] = value
|
| 91 |
+
|
| 92 |
+
def clear(_):
|
| 93 |
+
cache.clear()
|
| 94 |
+
|
| 95 |
+
self.size = None
|
| 96 |
+
self.get = types.MethodType(get, self)
|
| 97 |
+
self.set = types.MethodType(set_, self)
|
| 98 |
+
self.clear = types.MethodType(clear, self)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
class _FifoCache:
|
| 102 |
+
def __init__(self, size):
|
| 103 |
+
self.not_in_cache = not_in_cache = object()
|
| 104 |
+
cache = {}
|
| 105 |
+
keyring = [object()] * size
|
| 106 |
+
cache_get = cache.get
|
| 107 |
+
cache_pop = cache.pop
|
| 108 |
+
keyiter = itertools.cycle(range(size))
|
| 109 |
+
|
| 110 |
+
def get(_, key):
|
| 111 |
+
return cache_get(key, not_in_cache)
|
| 112 |
+
|
| 113 |
+
def set_(_, key, value):
|
| 114 |
+
cache[key] = value
|
| 115 |
+
i = next(keyiter)
|
| 116 |
+
cache_pop(keyring[i], None)
|
| 117 |
+
keyring[i] = key
|
| 118 |
+
|
| 119 |
+
def clear(_):
|
| 120 |
+
cache.clear()
|
| 121 |
+
keyring[:] = [object()] * size
|
| 122 |
+
|
| 123 |
+
self.size = size
|
| 124 |
+
self.get = types.MethodType(get, self)
|
| 125 |
+
self.set = types.MethodType(set_, self)
|
| 126 |
+
self.clear = types.MethodType(clear, self)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
class LRUMemo:
|
| 130 |
+
"""
|
| 131 |
+
A memoizing mapping that retains `capacity` deleted items
|
| 132 |
+
|
| 133 |
+
The memo tracks retained items by their access order; once `capacity` items
|
| 134 |
+
are retained, the least recently used item is discarded.
|
| 135 |
+
"""
|
| 136 |
+
|
| 137 |
+
def __init__(self, capacity):
|
| 138 |
+
self._capacity = capacity
|
| 139 |
+
self._active = {}
|
| 140 |
+
self._memory = collections.OrderedDict()
|
| 141 |
+
|
| 142 |
+
def __getitem__(self, key):
|
| 143 |
+
try:
|
| 144 |
+
return self._active[key]
|
| 145 |
+
except KeyError:
|
| 146 |
+
self._memory.move_to_end(key)
|
| 147 |
+
return self._memory[key]
|
| 148 |
+
|
| 149 |
+
def __setitem__(self, key, value):
|
| 150 |
+
self._memory.pop(key, None)
|
| 151 |
+
self._active[key] = value
|
| 152 |
+
|
| 153 |
+
def __delitem__(self, key):
|
| 154 |
+
try:
|
| 155 |
+
value = self._active.pop(key)
|
| 156 |
+
except KeyError:
|
| 157 |
+
pass
|
| 158 |
+
else:
|
| 159 |
+
while len(self._memory) >= self._capacity:
|
| 160 |
+
self._memory.popitem(last=False)
|
| 161 |
+
self._memory[key] = value
|
| 162 |
+
|
| 163 |
+
def clear(self):
|
| 164 |
+
self._active.clear()
|
| 165 |
+
self._memory.clear()
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
class UnboundedMemo(dict):
|
| 169 |
+
"""
|
| 170 |
+
A memoizing mapping that retains all deleted items
|
| 171 |
+
"""
|
| 172 |
+
|
| 173 |
+
def __delitem__(self, key):
|
| 174 |
+
pass
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def _escape_regex_range_chars(s: str) -> str:
|
| 178 |
+
# escape these chars: ^-[]
|
| 179 |
+
for c in r"\^-[]":
|
| 180 |
+
s = s.replace(c, _bslash + c)
|
| 181 |
+
s = s.replace("\n", r"\n")
|
| 182 |
+
s = s.replace("\t", r"\t")
|
| 183 |
+
return str(s)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def _collapse_string_to_ranges(
|
| 187 |
+
s: Union[str, Iterable[str]], re_escape: bool = True
|
| 188 |
+
) -> str:
|
| 189 |
+
def is_consecutive(c):
|
| 190 |
+
c_int = ord(c)
|
| 191 |
+
is_consecutive.prev, prev = c_int, is_consecutive.prev
|
| 192 |
+
if c_int - prev > 1:
|
| 193 |
+
is_consecutive.value = next(is_consecutive.counter)
|
| 194 |
+
return is_consecutive.value
|
| 195 |
+
|
| 196 |
+
is_consecutive.prev = 0 # type: ignore [attr-defined]
|
| 197 |
+
is_consecutive.counter = itertools.count() # type: ignore [attr-defined]
|
| 198 |
+
is_consecutive.value = -1 # type: ignore [attr-defined]
|
| 199 |
+
|
| 200 |
+
def escape_re_range_char(c):
|
| 201 |
+
return "\\" + c if c in r"\^-][" else c
|
| 202 |
+
|
| 203 |
+
def no_escape_re_range_char(c):
|
| 204 |
+
return c
|
| 205 |
+
|
| 206 |
+
if not re_escape:
|
| 207 |
+
escape_re_range_char = no_escape_re_range_char
|
| 208 |
+
|
| 209 |
+
ret = []
|
| 210 |
+
s = "".join(sorted(set(s)))
|
| 211 |
+
if len(s) > 3:
|
| 212 |
+
for _, chars in itertools.groupby(s, key=is_consecutive):
|
| 213 |
+
first = last = next(chars)
|
| 214 |
+
last = collections.deque(
|
| 215 |
+
itertools.chain(iter([last]), chars), maxlen=1
|
| 216 |
+
).pop()
|
| 217 |
+
if first == last:
|
| 218 |
+
ret.append(escape_re_range_char(first))
|
| 219 |
+
else:
|
| 220 |
+
sep = "" if ord(last) == ord(first) + 1 else "-"
|
| 221 |
+
ret.append(
|
| 222 |
+
f"{escape_re_range_char(first)}{sep}{escape_re_range_char(last)}"
|
| 223 |
+
)
|
| 224 |
+
else:
|
| 225 |
+
ret = [escape_re_range_char(c) for c in s]
|
| 226 |
+
|
| 227 |
+
return "".join(ret)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def _flatten(ll: list) -> list:
|
| 231 |
+
ret = []
|
| 232 |
+
for i in ll:
|
| 233 |
+
if isinstance(i, list):
|
| 234 |
+
ret.extend(_flatten(i))
|
| 235 |
+
else:
|
| 236 |
+
ret.append(i)
|
| 237 |
+
return ret
|
| 238 |
+
|
| 239 |
+
|
| 240 |
+
def _make_synonym_function(compat_name: str, fn: C) -> C:
|
| 241 |
+
# In a future version, uncomment the code in the internal _inner() functions
|
| 242 |
+
# to begin emitting DeprecationWarnings.
|
| 243 |
+
|
| 244 |
+
# Unwrap staticmethod/classmethod
|
| 245 |
+
fn = getattr(fn, "__func__", fn)
|
| 246 |
+
|
| 247 |
+
# (Presence of 'self' arg in signature is used by explain_exception() methods, so we take
|
| 248 |
+
# some extra steps to add it if present in decorated function.)
|
| 249 |
+
if "self" == list(inspect.signature(fn).parameters)[0]:
|
| 250 |
+
|
| 251 |
+
@wraps(fn)
|
| 252 |
+
def _inner(self, *args, **kwargs):
|
| 253 |
+
# warnings.warn(
|
| 254 |
+
# f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=3
|
| 255 |
+
# )
|
| 256 |
+
return fn(self, *args, **kwargs)
|
| 257 |
+
|
| 258 |
+
else:
|
| 259 |
+
|
| 260 |
+
@wraps(fn)
|
| 261 |
+
def _inner(*args, **kwargs):
|
| 262 |
+
# warnings.warn(
|
| 263 |
+
# f"Deprecated - use {fn.__name__}", DeprecationWarning, stacklevel=3
|
| 264 |
+
# )
|
| 265 |
+
return fn(*args, **kwargs)
|
| 266 |
+
|
| 267 |
+
_inner.__doc__ = f"""Deprecated - use :class:`{fn.__name__}`"""
|
| 268 |
+
_inner.__name__ = compat_name
|
| 269 |
+
_inner.__annotations__ = fn.__annotations__
|
| 270 |
+
if isinstance(fn, types.FunctionType):
|
| 271 |
+
_inner.__kwdefaults__ = fn.__kwdefaults__
|
| 272 |
+
elif isinstance(fn, type) and hasattr(fn, "__init__"):
|
| 273 |
+
_inner.__kwdefaults__ = fn.__init__.__kwdefaults__
|
| 274 |
+
else:
|
| 275 |
+
_inner.__kwdefaults__ = None
|
| 276 |
+
_inner.__qualname__ = fn.__qualname__
|
| 277 |
+
return cast(C, _inner)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
def replaced_by_pep8(fn: C) -> Callable[[Callable], C]:
|
| 281 |
+
"""
|
| 282 |
+
Decorator for pre-PEP8 compatibility synonyms, to link them to the new function.
|
| 283 |
+
"""
|
| 284 |
+
return lambda other: _make_synonym_function(other.__name__, fn)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/__init__.py
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more
|
| 3 |
+
"""
|
| 4 |
+
from __future__ import absolute_import
|
| 5 |
+
|
| 6 |
+
# Set default logging handler to avoid "No handler found" warnings.
|
| 7 |
+
import logging
|
| 8 |
+
import warnings
|
| 9 |
+
from logging import NullHandler
|
| 10 |
+
|
| 11 |
+
from . import exceptions
|
| 12 |
+
from ._version import __version__
|
| 13 |
+
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, connection_from_url
|
| 14 |
+
from .filepost import encode_multipart_formdata
|
| 15 |
+
from .poolmanager import PoolManager, ProxyManager, proxy_from_url
|
| 16 |
+
from .response import HTTPResponse
|
| 17 |
+
from .util.request import make_headers
|
| 18 |
+
from .util.retry import Retry
|
| 19 |
+
from .util.timeout import Timeout
|
| 20 |
+
from .util.url import get_host
|
| 21 |
+
|
| 22 |
+
# === NOTE TO REPACKAGERS AND VENDORS ===
|
| 23 |
+
# Please delete this block, this logic is only
|
| 24 |
+
# for urllib3 being distributed via PyPI.
|
| 25 |
+
# See: https://github.com/urllib3/urllib3/issues/2680
|
| 26 |
+
try:
|
| 27 |
+
import urllib3_secure_extra # type: ignore # noqa: F401
|
| 28 |
+
except ImportError:
|
| 29 |
+
pass
|
| 30 |
+
else:
|
| 31 |
+
warnings.warn(
|
| 32 |
+
"'urllib3[secure]' extra is deprecated and will be removed "
|
| 33 |
+
"in a future release of urllib3 2.x. Read more in this issue: "
|
| 34 |
+
"https://github.com/urllib3/urllib3/issues/2680",
|
| 35 |
+
category=DeprecationWarning,
|
| 36 |
+
stacklevel=2,
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
__author__ = "Andrey Petrov (andrey.petrov@shazow.net)"
|
| 40 |
+
__license__ = "MIT"
|
| 41 |
+
__version__ = __version__
|
| 42 |
+
|
| 43 |
+
__all__ = (
|
| 44 |
+
"HTTPConnectionPool",
|
| 45 |
+
"HTTPSConnectionPool",
|
| 46 |
+
"PoolManager",
|
| 47 |
+
"ProxyManager",
|
| 48 |
+
"HTTPResponse",
|
| 49 |
+
"Retry",
|
| 50 |
+
"Timeout",
|
| 51 |
+
"add_stderr_logger",
|
| 52 |
+
"connection_from_url",
|
| 53 |
+
"disable_warnings",
|
| 54 |
+
"encode_multipart_formdata",
|
| 55 |
+
"get_host",
|
| 56 |
+
"make_headers",
|
| 57 |
+
"proxy_from_url",
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
logging.getLogger(__name__).addHandler(NullHandler())
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def add_stderr_logger(level=logging.DEBUG):
|
| 64 |
+
"""
|
| 65 |
+
Helper for quickly adding a StreamHandler to the logger. Useful for
|
| 66 |
+
debugging.
|
| 67 |
+
|
| 68 |
+
Returns the handler after adding it.
|
| 69 |
+
"""
|
| 70 |
+
# This method needs to be in this __init__.py to get the __name__ correct
|
| 71 |
+
# even if urllib3 is vendored within another package.
|
| 72 |
+
logger = logging.getLogger(__name__)
|
| 73 |
+
handler = logging.StreamHandler()
|
| 74 |
+
handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
|
| 75 |
+
logger.addHandler(handler)
|
| 76 |
+
logger.setLevel(level)
|
| 77 |
+
logger.debug("Added a stderr logging handler to logger: %s", __name__)
|
| 78 |
+
return handler
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ... Clean up.
|
| 82 |
+
del NullHandler
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
# All warning filters *must* be appended unless you're really certain that they
|
| 86 |
+
# shouldn't be: otherwise, it's very hard for users to use most Python
|
| 87 |
+
# mechanisms to silence them.
|
| 88 |
+
# SecurityWarning's always go off by default.
|
| 89 |
+
warnings.simplefilter("always", exceptions.SecurityWarning, append=True)
|
| 90 |
+
# SubjectAltNameWarning's should go off once per host
|
| 91 |
+
warnings.simplefilter("default", exceptions.SubjectAltNameWarning, append=True)
|
| 92 |
+
# InsecurePlatformWarning's don't vary between requests, so we keep it default.
|
| 93 |
+
warnings.simplefilter("default", exceptions.InsecurePlatformWarning, append=True)
|
| 94 |
+
# SNIMissingWarnings should go off only once.
|
| 95 |
+
warnings.simplefilter("default", exceptions.SNIMissingWarning, append=True)
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def disable_warnings(category=exceptions.HTTPWarning):
|
| 99 |
+
"""
|
| 100 |
+
Helper for quickly disabling all urllib3 warnings.
|
| 101 |
+
"""
|
| 102 |
+
warnings.simplefilter("ignore", category)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_collections.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
try:
|
| 4 |
+
from collections.abc import Mapping, MutableMapping
|
| 5 |
+
except ImportError:
|
| 6 |
+
from collections import Mapping, MutableMapping
|
| 7 |
+
try:
|
| 8 |
+
from threading import RLock
|
| 9 |
+
except ImportError: # Platform-specific: No threads available
|
| 10 |
+
|
| 11 |
+
class RLock:
|
| 12 |
+
def __enter__(self):
|
| 13 |
+
pass
|
| 14 |
+
|
| 15 |
+
def __exit__(self, exc_type, exc_value, traceback):
|
| 16 |
+
pass
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
from collections import OrderedDict
|
| 20 |
+
|
| 21 |
+
from .exceptions import InvalidHeader
|
| 22 |
+
from .packages import six
|
| 23 |
+
from .packages.six import iterkeys, itervalues
|
| 24 |
+
|
| 25 |
+
__all__ = ["RecentlyUsedContainer", "HTTPHeaderDict"]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
_Null = object()
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class RecentlyUsedContainer(MutableMapping):
|
| 32 |
+
"""
|
| 33 |
+
Provides a thread-safe dict-like container which maintains up to
|
| 34 |
+
``maxsize`` keys while throwing away the least-recently-used keys beyond
|
| 35 |
+
``maxsize``.
|
| 36 |
+
|
| 37 |
+
:param maxsize:
|
| 38 |
+
Maximum number of recent elements to retain.
|
| 39 |
+
|
| 40 |
+
:param dispose_func:
|
| 41 |
+
Every time an item is evicted from the container,
|
| 42 |
+
``dispose_func(value)`` is called. Callback which will get called
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
ContainerCls = OrderedDict
|
| 46 |
+
|
| 47 |
+
def __init__(self, maxsize=10, dispose_func=None):
|
| 48 |
+
self._maxsize = maxsize
|
| 49 |
+
self.dispose_func = dispose_func
|
| 50 |
+
|
| 51 |
+
self._container = self.ContainerCls()
|
| 52 |
+
self.lock = RLock()
|
| 53 |
+
|
| 54 |
+
def __getitem__(self, key):
|
| 55 |
+
# Re-insert the item, moving it to the end of the eviction line.
|
| 56 |
+
with self.lock:
|
| 57 |
+
item = self._container.pop(key)
|
| 58 |
+
self._container[key] = item
|
| 59 |
+
return item
|
| 60 |
+
|
| 61 |
+
def __setitem__(self, key, value):
|
| 62 |
+
evicted_value = _Null
|
| 63 |
+
with self.lock:
|
| 64 |
+
# Possibly evict the existing value of 'key'
|
| 65 |
+
evicted_value = self._container.get(key, _Null)
|
| 66 |
+
self._container[key] = value
|
| 67 |
+
|
| 68 |
+
# If we didn't evict an existing value, we might have to evict the
|
| 69 |
+
# least recently used item from the beginning of the container.
|
| 70 |
+
if len(self._container) > self._maxsize:
|
| 71 |
+
_key, evicted_value = self._container.popitem(last=False)
|
| 72 |
+
|
| 73 |
+
if self.dispose_func and evicted_value is not _Null:
|
| 74 |
+
self.dispose_func(evicted_value)
|
| 75 |
+
|
| 76 |
+
def __delitem__(self, key):
|
| 77 |
+
with self.lock:
|
| 78 |
+
value = self._container.pop(key)
|
| 79 |
+
|
| 80 |
+
if self.dispose_func:
|
| 81 |
+
self.dispose_func(value)
|
| 82 |
+
|
| 83 |
+
def __len__(self):
|
| 84 |
+
with self.lock:
|
| 85 |
+
return len(self._container)
|
| 86 |
+
|
| 87 |
+
def __iter__(self):
|
| 88 |
+
raise NotImplementedError(
|
| 89 |
+
"Iteration over this class is unlikely to be threadsafe."
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
def clear(self):
|
| 93 |
+
with self.lock:
|
| 94 |
+
# Copy pointers to all values, then wipe the mapping
|
| 95 |
+
values = list(itervalues(self._container))
|
| 96 |
+
self._container.clear()
|
| 97 |
+
|
| 98 |
+
if self.dispose_func:
|
| 99 |
+
for value in values:
|
| 100 |
+
self.dispose_func(value)
|
| 101 |
+
|
| 102 |
+
def keys(self):
|
| 103 |
+
with self.lock:
|
| 104 |
+
return list(iterkeys(self._container))
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
class HTTPHeaderDict(MutableMapping):
|
| 108 |
+
"""
|
| 109 |
+
:param headers:
|
| 110 |
+
An iterable of field-value pairs. Must not contain multiple field names
|
| 111 |
+
when compared case-insensitively.
|
| 112 |
+
|
| 113 |
+
:param kwargs:
|
| 114 |
+
Additional field-value pairs to pass in to ``dict.update``.
|
| 115 |
+
|
| 116 |
+
A ``dict`` like container for storing HTTP Headers.
|
| 117 |
+
|
| 118 |
+
Field names are stored and compared case-insensitively in compliance with
|
| 119 |
+
RFC 7230. Iteration provides the first case-sensitive key seen for each
|
| 120 |
+
case-insensitive pair.
|
| 121 |
+
|
| 122 |
+
Using ``__setitem__`` syntax overwrites fields that compare equal
|
| 123 |
+
case-insensitively in order to maintain ``dict``'s api. For fields that
|
| 124 |
+
compare equal, instead create a new ``HTTPHeaderDict`` and use ``.add``
|
| 125 |
+
in a loop.
|
| 126 |
+
|
| 127 |
+
If multiple fields that are equal case-insensitively are passed to the
|
| 128 |
+
constructor or ``.update``, the behavior is undefined and some will be
|
| 129 |
+
lost.
|
| 130 |
+
|
| 131 |
+
>>> headers = HTTPHeaderDict()
|
| 132 |
+
>>> headers.add('Set-Cookie', 'foo=bar')
|
| 133 |
+
>>> headers.add('set-cookie', 'baz=quxx')
|
| 134 |
+
>>> headers['content-length'] = '7'
|
| 135 |
+
>>> headers['SET-cookie']
|
| 136 |
+
'foo=bar, baz=quxx'
|
| 137 |
+
>>> headers['Content-Length']
|
| 138 |
+
'7'
|
| 139 |
+
"""
|
| 140 |
+
|
| 141 |
+
def __init__(self, headers=None, **kwargs):
|
| 142 |
+
super(HTTPHeaderDict, self).__init__()
|
| 143 |
+
self._container = OrderedDict()
|
| 144 |
+
if headers is not None:
|
| 145 |
+
if isinstance(headers, HTTPHeaderDict):
|
| 146 |
+
self._copy_from(headers)
|
| 147 |
+
else:
|
| 148 |
+
self.extend(headers)
|
| 149 |
+
if kwargs:
|
| 150 |
+
self.extend(kwargs)
|
| 151 |
+
|
| 152 |
+
def __setitem__(self, key, val):
|
| 153 |
+
self._container[key.lower()] = [key, val]
|
| 154 |
+
return self._container[key.lower()]
|
| 155 |
+
|
| 156 |
+
def __getitem__(self, key):
|
| 157 |
+
val = self._container[key.lower()]
|
| 158 |
+
return ", ".join(val[1:])
|
| 159 |
+
|
| 160 |
+
def __delitem__(self, key):
|
| 161 |
+
del self._container[key.lower()]
|
| 162 |
+
|
| 163 |
+
def __contains__(self, key):
|
| 164 |
+
return key.lower() in self._container
|
| 165 |
+
|
| 166 |
+
def __eq__(self, other):
|
| 167 |
+
if not isinstance(other, Mapping) and not hasattr(other, "keys"):
|
| 168 |
+
return False
|
| 169 |
+
if not isinstance(other, type(self)):
|
| 170 |
+
other = type(self)(other)
|
| 171 |
+
return dict((k.lower(), v) for k, v in self.itermerged()) == dict(
|
| 172 |
+
(k.lower(), v) for k, v in other.itermerged()
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
def __ne__(self, other):
|
| 176 |
+
return not self.__eq__(other)
|
| 177 |
+
|
| 178 |
+
if six.PY2: # Python 2
|
| 179 |
+
iterkeys = MutableMapping.iterkeys
|
| 180 |
+
itervalues = MutableMapping.itervalues
|
| 181 |
+
|
| 182 |
+
__marker = object()
|
| 183 |
+
|
| 184 |
+
def __len__(self):
|
| 185 |
+
return len(self._container)
|
| 186 |
+
|
| 187 |
+
def __iter__(self):
|
| 188 |
+
# Only provide the originally cased names
|
| 189 |
+
for vals in self._container.values():
|
| 190 |
+
yield vals[0]
|
| 191 |
+
|
| 192 |
+
def pop(self, key, default=__marker):
|
| 193 |
+
"""D.pop(k[,d]) -> v, remove specified key and return the corresponding value.
|
| 194 |
+
If key is not found, d is returned if given, otherwise KeyError is raised.
|
| 195 |
+
"""
|
| 196 |
+
# Using the MutableMapping function directly fails due to the private marker.
|
| 197 |
+
# Using ordinary dict.pop would expose the internal structures.
|
| 198 |
+
# So let's reinvent the wheel.
|
| 199 |
+
try:
|
| 200 |
+
value = self[key]
|
| 201 |
+
except KeyError:
|
| 202 |
+
if default is self.__marker:
|
| 203 |
+
raise
|
| 204 |
+
return default
|
| 205 |
+
else:
|
| 206 |
+
del self[key]
|
| 207 |
+
return value
|
| 208 |
+
|
| 209 |
+
def discard(self, key):
|
| 210 |
+
try:
|
| 211 |
+
del self[key]
|
| 212 |
+
except KeyError:
|
| 213 |
+
pass
|
| 214 |
+
|
| 215 |
+
def add(self, key, val):
|
| 216 |
+
"""Adds a (name, value) pair, doesn't overwrite the value if it already
|
| 217 |
+
exists.
|
| 218 |
+
|
| 219 |
+
>>> headers = HTTPHeaderDict(foo='bar')
|
| 220 |
+
>>> headers.add('Foo', 'baz')
|
| 221 |
+
>>> headers['foo']
|
| 222 |
+
'bar, baz'
|
| 223 |
+
"""
|
| 224 |
+
key_lower = key.lower()
|
| 225 |
+
new_vals = [key, val]
|
| 226 |
+
# Keep the common case aka no item present as fast as possible
|
| 227 |
+
vals = self._container.setdefault(key_lower, new_vals)
|
| 228 |
+
if new_vals is not vals:
|
| 229 |
+
vals.append(val)
|
| 230 |
+
|
| 231 |
+
def extend(self, *args, **kwargs):
|
| 232 |
+
"""Generic import function for any type of header-like object.
|
| 233 |
+
Adapted version of MutableMapping.update in order to insert items
|
| 234 |
+
with self.add instead of self.__setitem__
|
| 235 |
+
"""
|
| 236 |
+
if len(args) > 1:
|
| 237 |
+
raise TypeError(
|
| 238 |
+
"extend() takes at most 1 positional "
|
| 239 |
+
"arguments ({0} given)".format(len(args))
|
| 240 |
+
)
|
| 241 |
+
other = args[0] if len(args) >= 1 else ()
|
| 242 |
+
|
| 243 |
+
if isinstance(other, HTTPHeaderDict):
|
| 244 |
+
for key, val in other.iteritems():
|
| 245 |
+
self.add(key, val)
|
| 246 |
+
elif isinstance(other, Mapping):
|
| 247 |
+
for key in other:
|
| 248 |
+
self.add(key, other[key])
|
| 249 |
+
elif hasattr(other, "keys"):
|
| 250 |
+
for key in other.keys():
|
| 251 |
+
self.add(key, other[key])
|
| 252 |
+
else:
|
| 253 |
+
for key, value in other:
|
| 254 |
+
self.add(key, value)
|
| 255 |
+
|
| 256 |
+
for key, value in kwargs.items():
|
| 257 |
+
self.add(key, value)
|
| 258 |
+
|
| 259 |
+
def getlist(self, key, default=__marker):
|
| 260 |
+
"""Returns a list of all the values for the named field. Returns an
|
| 261 |
+
empty list if the key doesn't exist."""
|
| 262 |
+
try:
|
| 263 |
+
vals = self._container[key.lower()]
|
| 264 |
+
except KeyError:
|
| 265 |
+
if default is self.__marker:
|
| 266 |
+
return []
|
| 267 |
+
return default
|
| 268 |
+
else:
|
| 269 |
+
return vals[1:]
|
| 270 |
+
|
| 271 |
+
# Backwards compatibility for httplib
|
| 272 |
+
getheaders = getlist
|
| 273 |
+
getallmatchingheaders = getlist
|
| 274 |
+
iget = getlist
|
| 275 |
+
|
| 276 |
+
# Backwards compatibility for http.cookiejar
|
| 277 |
+
get_all = getlist
|
| 278 |
+
|
| 279 |
+
def __repr__(self):
|
| 280 |
+
return "%s(%s)" % (type(self).__name__, dict(self.itermerged()))
|
| 281 |
+
|
| 282 |
+
def _copy_from(self, other):
|
| 283 |
+
for key in other:
|
| 284 |
+
val = other.getlist(key)
|
| 285 |
+
if isinstance(val, list):
|
| 286 |
+
# Don't need to convert tuples
|
| 287 |
+
val = list(val)
|
| 288 |
+
self._container[key.lower()] = [key] + val
|
| 289 |
+
|
| 290 |
+
def copy(self):
|
| 291 |
+
clone = type(self)()
|
| 292 |
+
clone._copy_from(self)
|
| 293 |
+
return clone
|
| 294 |
+
|
| 295 |
+
def iteritems(self):
|
| 296 |
+
"""Iterate over all header lines, including duplicate ones."""
|
| 297 |
+
for key in self:
|
| 298 |
+
vals = self._container[key.lower()]
|
| 299 |
+
for val in vals[1:]:
|
| 300 |
+
yield vals[0], val
|
| 301 |
+
|
| 302 |
+
def itermerged(self):
|
| 303 |
+
"""Iterate over all headers, merging duplicate ones together."""
|
| 304 |
+
for key in self:
|
| 305 |
+
val = self._container[key.lower()]
|
| 306 |
+
yield val[0], ", ".join(val[1:])
|
| 307 |
+
|
| 308 |
+
def items(self):
|
| 309 |
+
return list(self.iteritems())
|
| 310 |
+
|
| 311 |
+
@classmethod
|
| 312 |
+
def from_httplib(cls, message): # Python 2
|
| 313 |
+
"""Read headers from a Python 2 httplib message object."""
|
| 314 |
+
# python2.7 does not expose a proper API for exporting multiheaders
|
| 315 |
+
# efficiently. This function re-reads raw lines from the message
|
| 316 |
+
# object and extracts the multiheaders properly.
|
| 317 |
+
obs_fold_continued_leaders = (" ", "\t")
|
| 318 |
+
headers = []
|
| 319 |
+
|
| 320 |
+
for line in message.headers:
|
| 321 |
+
if line.startswith(obs_fold_continued_leaders):
|
| 322 |
+
if not headers:
|
| 323 |
+
# We received a header line that starts with OWS as described
|
| 324 |
+
# in RFC-7230 S3.2.4. This indicates a multiline header, but
|
| 325 |
+
# there exists no previous header to which we can attach it.
|
| 326 |
+
raise InvalidHeader(
|
| 327 |
+
"Header continuation with no previous header: %s" % line
|
| 328 |
+
)
|
| 329 |
+
else:
|
| 330 |
+
key, value = headers[-1]
|
| 331 |
+
headers[-1] = (key, value + " " + line.strip())
|
| 332 |
+
continue
|
| 333 |
+
|
| 334 |
+
key, value = line.split(":", 1)
|
| 335 |
+
headers.append((key, value.strip()))
|
| 336 |
+
|
| 337 |
+
return cls(headers)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/_version.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# This file is protected via CODEOWNERS
|
| 2 |
+
__version__ = "1.26.17"
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connection.py
ADDED
|
@@ -0,0 +1,572 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import datetime
|
| 4 |
+
import logging
|
| 5 |
+
import os
|
| 6 |
+
import re
|
| 7 |
+
import socket
|
| 8 |
+
import warnings
|
| 9 |
+
from socket import error as SocketError
|
| 10 |
+
from socket import timeout as SocketTimeout
|
| 11 |
+
|
| 12 |
+
from .packages import six
|
| 13 |
+
from .packages.six.moves.http_client import HTTPConnection as _HTTPConnection
|
| 14 |
+
from .packages.six.moves.http_client import HTTPException # noqa: F401
|
| 15 |
+
from .util.proxy import create_proxy_ssl_context
|
| 16 |
+
|
| 17 |
+
try: # Compiled with SSL?
|
| 18 |
+
import ssl
|
| 19 |
+
|
| 20 |
+
BaseSSLError = ssl.SSLError
|
| 21 |
+
except (ImportError, AttributeError): # Platform-specific: No SSL.
|
| 22 |
+
ssl = None
|
| 23 |
+
|
| 24 |
+
class BaseSSLError(BaseException):
|
| 25 |
+
pass
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
# Python 3: not a no-op, we're adding this to the namespace so it can be imported.
|
| 30 |
+
ConnectionError = ConnectionError
|
| 31 |
+
except NameError:
|
| 32 |
+
# Python 2
|
| 33 |
+
class ConnectionError(Exception):
|
| 34 |
+
pass
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
try: # Python 3:
|
| 38 |
+
# Not a no-op, we're adding this to the namespace so it can be imported.
|
| 39 |
+
BrokenPipeError = BrokenPipeError
|
| 40 |
+
except NameError: # Python 2:
|
| 41 |
+
|
| 42 |
+
class BrokenPipeError(Exception):
|
| 43 |
+
pass
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
from ._collections import HTTPHeaderDict # noqa (historical, removed in v2)
|
| 47 |
+
from ._version import __version__
|
| 48 |
+
from .exceptions import (
|
| 49 |
+
ConnectTimeoutError,
|
| 50 |
+
NewConnectionError,
|
| 51 |
+
SubjectAltNameWarning,
|
| 52 |
+
SystemTimeWarning,
|
| 53 |
+
)
|
| 54 |
+
from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection
|
| 55 |
+
from .util.ssl_ import (
|
| 56 |
+
assert_fingerprint,
|
| 57 |
+
create_urllib3_context,
|
| 58 |
+
is_ipaddress,
|
| 59 |
+
resolve_cert_reqs,
|
| 60 |
+
resolve_ssl_version,
|
| 61 |
+
ssl_wrap_socket,
|
| 62 |
+
)
|
| 63 |
+
from .util.ssl_match_hostname import CertificateError, match_hostname
|
| 64 |
+
|
| 65 |
+
log = logging.getLogger(__name__)
|
| 66 |
+
|
| 67 |
+
port_by_scheme = {"http": 80, "https": 443}
|
| 68 |
+
|
| 69 |
+
# When it comes time to update this value as a part of regular maintenance
|
| 70 |
+
# (ie test_recent_date is failing) update it to ~6 months before the current date.
|
| 71 |
+
RECENT_DATE = datetime.date(2022, 1, 1)
|
| 72 |
+
|
| 73 |
+
_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]")
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
class HTTPConnection(_HTTPConnection, object):
|
| 77 |
+
"""
|
| 78 |
+
Based on :class:`http.client.HTTPConnection` but provides an extra constructor
|
| 79 |
+
backwards-compatibility layer between older and newer Pythons.
|
| 80 |
+
|
| 81 |
+
Additional keyword parameters are used to configure attributes of the connection.
|
| 82 |
+
Accepted parameters include:
|
| 83 |
+
|
| 84 |
+
- ``strict``: See the documentation on :class:`urllib3.connectionpool.HTTPConnectionPool`
|
| 85 |
+
- ``source_address``: Set the source address for the current connection.
|
| 86 |
+
- ``socket_options``: Set specific options on the underlying socket. If not specified, then
|
| 87 |
+
defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling
|
| 88 |
+
Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy.
|
| 89 |
+
|
| 90 |
+
For example, if you wish to enable TCP Keep Alive in addition to the defaults,
|
| 91 |
+
you might pass:
|
| 92 |
+
|
| 93 |
+
.. code-block:: python
|
| 94 |
+
|
| 95 |
+
HTTPConnection.default_socket_options + [
|
| 96 |
+
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1),
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
Or you may want to disable the defaults by passing an empty list (e.g., ``[]``).
|
| 100 |
+
"""
|
| 101 |
+
|
| 102 |
+
default_port = port_by_scheme["http"]
|
| 103 |
+
|
| 104 |
+
#: Disable Nagle's algorithm by default.
|
| 105 |
+
#: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]``
|
| 106 |
+
default_socket_options = [(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]
|
| 107 |
+
|
| 108 |
+
#: Whether this connection verifies the host's certificate.
|
| 109 |
+
is_verified = False
|
| 110 |
+
|
| 111 |
+
#: Whether this proxy connection (if used) verifies the proxy host's
|
| 112 |
+
#: certificate.
|
| 113 |
+
proxy_is_verified = None
|
| 114 |
+
|
| 115 |
+
def __init__(self, *args, **kw):
|
| 116 |
+
if not six.PY2:
|
| 117 |
+
kw.pop("strict", None)
|
| 118 |
+
|
| 119 |
+
# Pre-set source_address.
|
| 120 |
+
self.source_address = kw.get("source_address")
|
| 121 |
+
|
| 122 |
+
#: The socket options provided by the user. If no options are
|
| 123 |
+
#: provided, we use the default options.
|
| 124 |
+
self.socket_options = kw.pop("socket_options", self.default_socket_options)
|
| 125 |
+
|
| 126 |
+
# Proxy options provided by the user.
|
| 127 |
+
self.proxy = kw.pop("proxy", None)
|
| 128 |
+
self.proxy_config = kw.pop("proxy_config", None)
|
| 129 |
+
|
| 130 |
+
_HTTPConnection.__init__(self, *args, **kw)
|
| 131 |
+
|
| 132 |
+
@property
|
| 133 |
+
def host(self):
|
| 134 |
+
"""
|
| 135 |
+
Getter method to remove any trailing dots that indicate the hostname is an FQDN.
|
| 136 |
+
|
| 137 |
+
In general, SSL certificates don't include the trailing dot indicating a
|
| 138 |
+
fully-qualified domain name, and thus, they don't validate properly when
|
| 139 |
+
checked against a domain name that includes the dot. In addition, some
|
| 140 |
+
servers may not expect to receive the trailing dot when provided.
|
| 141 |
+
|
| 142 |
+
However, the hostname with trailing dot is critical to DNS resolution; doing a
|
| 143 |
+
lookup with the trailing dot will properly only resolve the appropriate FQDN,
|
| 144 |
+
whereas a lookup without a trailing dot will search the system's search domain
|
| 145 |
+
list. Thus, it's important to keep the original host around for use only in
|
| 146 |
+
those cases where it's appropriate (i.e., when doing DNS lookup to establish the
|
| 147 |
+
actual TCP connection across which we're going to send HTTP requests).
|
| 148 |
+
"""
|
| 149 |
+
return self._dns_host.rstrip(".")
|
| 150 |
+
|
| 151 |
+
@host.setter
|
| 152 |
+
def host(self, value):
|
| 153 |
+
"""
|
| 154 |
+
Setter for the `host` property.
|
| 155 |
+
|
| 156 |
+
We assume that only urllib3 uses the _dns_host attribute; httplib itself
|
| 157 |
+
only uses `host`, and it seems reasonable that other libraries follow suit.
|
| 158 |
+
"""
|
| 159 |
+
self._dns_host = value
|
| 160 |
+
|
| 161 |
+
def _new_conn(self):
|
| 162 |
+
"""Establish a socket connection and set nodelay settings on it.
|
| 163 |
+
|
| 164 |
+
:return: New socket connection.
|
| 165 |
+
"""
|
| 166 |
+
extra_kw = {}
|
| 167 |
+
if self.source_address:
|
| 168 |
+
extra_kw["source_address"] = self.source_address
|
| 169 |
+
|
| 170 |
+
if self.socket_options:
|
| 171 |
+
extra_kw["socket_options"] = self.socket_options
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
conn = connection.create_connection(
|
| 175 |
+
(self._dns_host, self.port), self.timeout, **extra_kw
|
| 176 |
+
)
|
| 177 |
+
|
| 178 |
+
except SocketTimeout:
|
| 179 |
+
raise ConnectTimeoutError(
|
| 180 |
+
self,
|
| 181 |
+
"Connection to %s timed out. (connect timeout=%s)"
|
| 182 |
+
% (self.host, self.timeout),
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
except SocketError as e:
|
| 186 |
+
raise NewConnectionError(
|
| 187 |
+
self, "Failed to establish a new connection: %s" % e
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
return conn
|
| 191 |
+
|
| 192 |
+
def _is_using_tunnel(self):
|
| 193 |
+
# Google App Engine's httplib does not define _tunnel_host
|
| 194 |
+
return getattr(self, "_tunnel_host", None)
|
| 195 |
+
|
| 196 |
+
def _prepare_conn(self, conn):
|
| 197 |
+
self.sock = conn
|
| 198 |
+
if self._is_using_tunnel():
|
| 199 |
+
# TODO: Fix tunnel so it doesn't depend on self.sock state.
|
| 200 |
+
self._tunnel()
|
| 201 |
+
# Mark this connection as not reusable
|
| 202 |
+
self.auto_open = 0
|
| 203 |
+
|
| 204 |
+
def connect(self):
|
| 205 |
+
conn = self._new_conn()
|
| 206 |
+
self._prepare_conn(conn)
|
| 207 |
+
|
| 208 |
+
def putrequest(self, method, url, *args, **kwargs):
|
| 209 |
+
""" """
|
| 210 |
+
# Empty docstring because the indentation of CPython's implementation
|
| 211 |
+
# is broken but we don't want this method in our documentation.
|
| 212 |
+
match = _CONTAINS_CONTROL_CHAR_RE.search(method)
|
| 213 |
+
if match:
|
| 214 |
+
raise ValueError(
|
| 215 |
+
"Method cannot contain non-token characters %r (found at least %r)"
|
| 216 |
+
% (method, match.group())
|
| 217 |
+
)
|
| 218 |
+
|
| 219 |
+
return _HTTPConnection.putrequest(self, method, url, *args, **kwargs)
|
| 220 |
+
|
| 221 |
+
def putheader(self, header, *values):
|
| 222 |
+
""" """
|
| 223 |
+
if not any(isinstance(v, str) and v == SKIP_HEADER for v in values):
|
| 224 |
+
_HTTPConnection.putheader(self, header, *values)
|
| 225 |
+
elif six.ensure_str(header.lower()) not in SKIPPABLE_HEADERS:
|
| 226 |
+
raise ValueError(
|
| 227 |
+
"urllib3.util.SKIP_HEADER only supports '%s'"
|
| 228 |
+
% ("', '".join(map(str.title, sorted(SKIPPABLE_HEADERS))),)
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
def request(self, method, url, body=None, headers=None):
|
| 232 |
+
# Update the inner socket's timeout value to send the request.
|
| 233 |
+
# This only triggers if the connection is re-used.
|
| 234 |
+
if getattr(self, "sock", None) is not None:
|
| 235 |
+
self.sock.settimeout(self.timeout)
|
| 236 |
+
|
| 237 |
+
if headers is None:
|
| 238 |
+
headers = {}
|
| 239 |
+
else:
|
| 240 |
+
# Avoid modifying the headers passed into .request()
|
| 241 |
+
headers = headers.copy()
|
| 242 |
+
if "user-agent" not in (six.ensure_str(k.lower()) for k in headers):
|
| 243 |
+
headers["User-Agent"] = _get_default_user_agent()
|
| 244 |
+
super(HTTPConnection, self).request(method, url, body=body, headers=headers)
|
| 245 |
+
|
| 246 |
+
def request_chunked(self, method, url, body=None, headers=None):
|
| 247 |
+
"""
|
| 248 |
+
Alternative to the common request method, which sends the
|
| 249 |
+
body with chunked encoding and not as one block
|
| 250 |
+
"""
|
| 251 |
+
headers = headers or {}
|
| 252 |
+
header_keys = set([six.ensure_str(k.lower()) for k in headers])
|
| 253 |
+
skip_accept_encoding = "accept-encoding" in header_keys
|
| 254 |
+
skip_host = "host" in header_keys
|
| 255 |
+
self.putrequest(
|
| 256 |
+
method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host
|
| 257 |
+
)
|
| 258 |
+
if "user-agent" not in header_keys:
|
| 259 |
+
self.putheader("User-Agent", _get_default_user_agent())
|
| 260 |
+
for header, value in headers.items():
|
| 261 |
+
self.putheader(header, value)
|
| 262 |
+
if "transfer-encoding" not in header_keys:
|
| 263 |
+
self.putheader("Transfer-Encoding", "chunked")
|
| 264 |
+
self.endheaders()
|
| 265 |
+
|
| 266 |
+
if body is not None:
|
| 267 |
+
stringish_types = six.string_types + (bytes,)
|
| 268 |
+
if isinstance(body, stringish_types):
|
| 269 |
+
body = (body,)
|
| 270 |
+
for chunk in body:
|
| 271 |
+
if not chunk:
|
| 272 |
+
continue
|
| 273 |
+
if not isinstance(chunk, bytes):
|
| 274 |
+
chunk = chunk.encode("utf8")
|
| 275 |
+
len_str = hex(len(chunk))[2:]
|
| 276 |
+
to_send = bytearray(len_str.encode())
|
| 277 |
+
to_send += b"\r\n"
|
| 278 |
+
to_send += chunk
|
| 279 |
+
to_send += b"\r\n"
|
| 280 |
+
self.send(to_send)
|
| 281 |
+
|
| 282 |
+
# After the if clause, to always have a closed body
|
| 283 |
+
self.send(b"0\r\n\r\n")
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
class HTTPSConnection(HTTPConnection):
|
| 287 |
+
"""
|
| 288 |
+
Many of the parameters to this constructor are passed to the underlying SSL
|
| 289 |
+
socket by means of :py:func:`urllib3.util.ssl_wrap_socket`.
|
| 290 |
+
"""
|
| 291 |
+
|
| 292 |
+
default_port = port_by_scheme["https"]
|
| 293 |
+
|
| 294 |
+
cert_reqs = None
|
| 295 |
+
ca_certs = None
|
| 296 |
+
ca_cert_dir = None
|
| 297 |
+
ca_cert_data = None
|
| 298 |
+
ssl_version = None
|
| 299 |
+
assert_fingerprint = None
|
| 300 |
+
tls_in_tls_required = False
|
| 301 |
+
|
| 302 |
+
def __init__(
|
| 303 |
+
self,
|
| 304 |
+
host,
|
| 305 |
+
port=None,
|
| 306 |
+
key_file=None,
|
| 307 |
+
cert_file=None,
|
| 308 |
+
key_password=None,
|
| 309 |
+
strict=None,
|
| 310 |
+
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
| 311 |
+
ssl_context=None,
|
| 312 |
+
server_hostname=None,
|
| 313 |
+
**kw
|
| 314 |
+
):
|
| 315 |
+
|
| 316 |
+
HTTPConnection.__init__(self, host, port, strict=strict, timeout=timeout, **kw)
|
| 317 |
+
|
| 318 |
+
self.key_file = key_file
|
| 319 |
+
self.cert_file = cert_file
|
| 320 |
+
self.key_password = key_password
|
| 321 |
+
self.ssl_context = ssl_context
|
| 322 |
+
self.server_hostname = server_hostname
|
| 323 |
+
|
| 324 |
+
# Required property for Google AppEngine 1.9.0 which otherwise causes
|
| 325 |
+
# HTTPS requests to go out as HTTP. (See Issue #356)
|
| 326 |
+
self._protocol = "https"
|
| 327 |
+
|
| 328 |
+
def set_cert(
|
| 329 |
+
self,
|
| 330 |
+
key_file=None,
|
| 331 |
+
cert_file=None,
|
| 332 |
+
cert_reqs=None,
|
| 333 |
+
key_password=None,
|
| 334 |
+
ca_certs=None,
|
| 335 |
+
assert_hostname=None,
|
| 336 |
+
assert_fingerprint=None,
|
| 337 |
+
ca_cert_dir=None,
|
| 338 |
+
ca_cert_data=None,
|
| 339 |
+
):
|
| 340 |
+
"""
|
| 341 |
+
This method should only be called once, before the connection is used.
|
| 342 |
+
"""
|
| 343 |
+
# If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also
|
| 344 |
+
# have an SSLContext object in which case we'll use its verify_mode.
|
| 345 |
+
if cert_reqs is None:
|
| 346 |
+
if self.ssl_context is not None:
|
| 347 |
+
cert_reqs = self.ssl_context.verify_mode
|
| 348 |
+
else:
|
| 349 |
+
cert_reqs = resolve_cert_reqs(None)
|
| 350 |
+
|
| 351 |
+
self.key_file = key_file
|
| 352 |
+
self.cert_file = cert_file
|
| 353 |
+
self.cert_reqs = cert_reqs
|
| 354 |
+
self.key_password = key_password
|
| 355 |
+
self.assert_hostname = assert_hostname
|
| 356 |
+
self.assert_fingerprint = assert_fingerprint
|
| 357 |
+
self.ca_certs = ca_certs and os.path.expanduser(ca_certs)
|
| 358 |
+
self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir)
|
| 359 |
+
self.ca_cert_data = ca_cert_data
|
| 360 |
+
|
| 361 |
+
def connect(self):
|
| 362 |
+
# Add certificate verification
|
| 363 |
+
self.sock = conn = self._new_conn()
|
| 364 |
+
hostname = self.host
|
| 365 |
+
tls_in_tls = False
|
| 366 |
+
|
| 367 |
+
if self._is_using_tunnel():
|
| 368 |
+
if self.tls_in_tls_required:
|
| 369 |
+
self.sock = conn = self._connect_tls_proxy(hostname, conn)
|
| 370 |
+
tls_in_tls = True
|
| 371 |
+
|
| 372 |
+
# Calls self._set_hostport(), so self.host is
|
| 373 |
+
# self._tunnel_host below.
|
| 374 |
+
self._tunnel()
|
| 375 |
+
# Mark this connection as not reusable
|
| 376 |
+
self.auto_open = 0
|
| 377 |
+
|
| 378 |
+
# Override the host with the one we're requesting data from.
|
| 379 |
+
hostname = self._tunnel_host
|
| 380 |
+
|
| 381 |
+
server_hostname = hostname
|
| 382 |
+
if self.server_hostname is not None:
|
| 383 |
+
server_hostname = self.server_hostname
|
| 384 |
+
|
| 385 |
+
is_time_off = datetime.date.today() < RECENT_DATE
|
| 386 |
+
if is_time_off:
|
| 387 |
+
warnings.warn(
|
| 388 |
+
(
|
| 389 |
+
"System time is way off (before {0}). This will probably "
|
| 390 |
+
"lead to SSL verification errors"
|
| 391 |
+
).format(RECENT_DATE),
|
| 392 |
+
SystemTimeWarning,
|
| 393 |
+
)
|
| 394 |
+
|
| 395 |
+
# Wrap socket using verification with the root certs in
|
| 396 |
+
# trusted_root_certs
|
| 397 |
+
default_ssl_context = False
|
| 398 |
+
if self.ssl_context is None:
|
| 399 |
+
default_ssl_context = True
|
| 400 |
+
self.ssl_context = create_urllib3_context(
|
| 401 |
+
ssl_version=resolve_ssl_version(self.ssl_version),
|
| 402 |
+
cert_reqs=resolve_cert_reqs(self.cert_reqs),
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
context = self.ssl_context
|
| 406 |
+
context.verify_mode = resolve_cert_reqs(self.cert_reqs)
|
| 407 |
+
|
| 408 |
+
# Try to load OS default certs if none are given.
|
| 409 |
+
# Works well on Windows (requires Python3.4+)
|
| 410 |
+
if (
|
| 411 |
+
not self.ca_certs
|
| 412 |
+
and not self.ca_cert_dir
|
| 413 |
+
and not self.ca_cert_data
|
| 414 |
+
and default_ssl_context
|
| 415 |
+
and hasattr(context, "load_default_certs")
|
| 416 |
+
):
|
| 417 |
+
context.load_default_certs()
|
| 418 |
+
|
| 419 |
+
self.sock = ssl_wrap_socket(
|
| 420 |
+
sock=conn,
|
| 421 |
+
keyfile=self.key_file,
|
| 422 |
+
certfile=self.cert_file,
|
| 423 |
+
key_password=self.key_password,
|
| 424 |
+
ca_certs=self.ca_certs,
|
| 425 |
+
ca_cert_dir=self.ca_cert_dir,
|
| 426 |
+
ca_cert_data=self.ca_cert_data,
|
| 427 |
+
server_hostname=server_hostname,
|
| 428 |
+
ssl_context=context,
|
| 429 |
+
tls_in_tls=tls_in_tls,
|
| 430 |
+
)
|
| 431 |
+
|
| 432 |
+
# If we're using all defaults and the connection
|
| 433 |
+
# is TLSv1 or TLSv1.1 we throw a DeprecationWarning
|
| 434 |
+
# for the host.
|
| 435 |
+
if (
|
| 436 |
+
default_ssl_context
|
| 437 |
+
and self.ssl_version is None
|
| 438 |
+
and hasattr(self.sock, "version")
|
| 439 |
+
and self.sock.version() in {"TLSv1", "TLSv1.1"}
|
| 440 |
+
):
|
| 441 |
+
warnings.warn(
|
| 442 |
+
"Negotiating TLSv1/TLSv1.1 by default is deprecated "
|
| 443 |
+
"and will be disabled in urllib3 v2.0.0. Connecting to "
|
| 444 |
+
"'%s' with '%s' can be enabled by explicitly opting-in "
|
| 445 |
+
"with 'ssl_version'" % (self.host, self.sock.version()),
|
| 446 |
+
DeprecationWarning,
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
if self.assert_fingerprint:
|
| 450 |
+
assert_fingerprint(
|
| 451 |
+
self.sock.getpeercert(binary_form=True), self.assert_fingerprint
|
| 452 |
+
)
|
| 453 |
+
elif (
|
| 454 |
+
context.verify_mode != ssl.CERT_NONE
|
| 455 |
+
and not getattr(context, "check_hostname", False)
|
| 456 |
+
and self.assert_hostname is not False
|
| 457 |
+
):
|
| 458 |
+
# While urllib3 attempts to always turn off hostname matching from
|
| 459 |
+
# the TLS library, this cannot always be done. So we check whether
|
| 460 |
+
# the TLS Library still thinks it's matching hostnames.
|
| 461 |
+
cert = self.sock.getpeercert()
|
| 462 |
+
if not cert.get("subjectAltName", ()):
|
| 463 |
+
warnings.warn(
|
| 464 |
+
(
|
| 465 |
+
"Certificate for {0} has no `subjectAltName`, falling back to check for a "
|
| 466 |
+
"`commonName` for now. This feature is being removed by major browsers and "
|
| 467 |
+
"deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
|
| 468 |
+
"for details.)".format(hostname)
|
| 469 |
+
),
|
| 470 |
+
SubjectAltNameWarning,
|
| 471 |
+
)
|
| 472 |
+
_match_hostname(cert, self.assert_hostname or server_hostname)
|
| 473 |
+
|
| 474 |
+
self.is_verified = (
|
| 475 |
+
context.verify_mode == ssl.CERT_REQUIRED
|
| 476 |
+
or self.assert_fingerprint is not None
|
| 477 |
+
)
|
| 478 |
+
|
| 479 |
+
def _connect_tls_proxy(self, hostname, conn):
|
| 480 |
+
"""
|
| 481 |
+
Establish a TLS connection to the proxy using the provided SSL context.
|
| 482 |
+
"""
|
| 483 |
+
proxy_config = self.proxy_config
|
| 484 |
+
ssl_context = proxy_config.ssl_context
|
| 485 |
+
if ssl_context:
|
| 486 |
+
# If the user provided a proxy context, we assume CA and client
|
| 487 |
+
# certificates have already been set
|
| 488 |
+
return ssl_wrap_socket(
|
| 489 |
+
sock=conn,
|
| 490 |
+
server_hostname=hostname,
|
| 491 |
+
ssl_context=ssl_context,
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
ssl_context = create_proxy_ssl_context(
|
| 495 |
+
self.ssl_version,
|
| 496 |
+
self.cert_reqs,
|
| 497 |
+
self.ca_certs,
|
| 498 |
+
self.ca_cert_dir,
|
| 499 |
+
self.ca_cert_data,
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
# If no cert was provided, use only the default options for server
|
| 503 |
+
# certificate validation
|
| 504 |
+
socket = ssl_wrap_socket(
|
| 505 |
+
sock=conn,
|
| 506 |
+
ca_certs=self.ca_certs,
|
| 507 |
+
ca_cert_dir=self.ca_cert_dir,
|
| 508 |
+
ca_cert_data=self.ca_cert_data,
|
| 509 |
+
server_hostname=hostname,
|
| 510 |
+
ssl_context=ssl_context,
|
| 511 |
+
)
|
| 512 |
+
|
| 513 |
+
if ssl_context.verify_mode != ssl.CERT_NONE and not getattr(
|
| 514 |
+
ssl_context, "check_hostname", False
|
| 515 |
+
):
|
| 516 |
+
# While urllib3 attempts to always turn off hostname matching from
|
| 517 |
+
# the TLS library, this cannot always be done. So we check whether
|
| 518 |
+
# the TLS Library still thinks it's matching hostnames.
|
| 519 |
+
cert = socket.getpeercert()
|
| 520 |
+
if not cert.get("subjectAltName", ()):
|
| 521 |
+
warnings.warn(
|
| 522 |
+
(
|
| 523 |
+
"Certificate for {0} has no `subjectAltName`, falling back to check for a "
|
| 524 |
+
"`commonName` for now. This feature is being removed by major browsers and "
|
| 525 |
+
"deprecated by RFC 2818. (See https://github.com/urllib3/urllib3/issues/497 "
|
| 526 |
+
"for details.)".format(hostname)
|
| 527 |
+
),
|
| 528 |
+
SubjectAltNameWarning,
|
| 529 |
+
)
|
| 530 |
+
_match_hostname(cert, hostname)
|
| 531 |
+
|
| 532 |
+
self.proxy_is_verified = ssl_context.verify_mode == ssl.CERT_REQUIRED
|
| 533 |
+
return socket
|
| 534 |
+
|
| 535 |
+
|
| 536 |
+
def _match_hostname(cert, asserted_hostname):
|
| 537 |
+
# Our upstream implementation of ssl.match_hostname()
|
| 538 |
+
# only applies this normalization to IP addresses so it doesn't
|
| 539 |
+
# match DNS SANs so we do the same thing!
|
| 540 |
+
stripped_hostname = asserted_hostname.strip("u[]")
|
| 541 |
+
if is_ipaddress(stripped_hostname):
|
| 542 |
+
asserted_hostname = stripped_hostname
|
| 543 |
+
|
| 544 |
+
try:
|
| 545 |
+
match_hostname(cert, asserted_hostname)
|
| 546 |
+
except CertificateError as e:
|
| 547 |
+
log.warning(
|
| 548 |
+
"Certificate did not match expected hostname: %s. Certificate: %s",
|
| 549 |
+
asserted_hostname,
|
| 550 |
+
cert,
|
| 551 |
+
)
|
| 552 |
+
# Add cert to exception and reraise so client code can inspect
|
| 553 |
+
# the cert when catching the exception, if they want to
|
| 554 |
+
e._peer_cert = cert
|
| 555 |
+
raise
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
def _get_default_user_agent():
|
| 559 |
+
return "python-urllib3/%s" % __version__
|
| 560 |
+
|
| 561 |
+
|
| 562 |
+
class DummyConnection(object):
|
| 563 |
+
"""Used to detect a failed ConnectionCls import."""
|
| 564 |
+
|
| 565 |
+
pass
|
| 566 |
+
|
| 567 |
+
|
| 568 |
+
if not ssl:
|
| 569 |
+
HTTPSConnection = DummyConnection # noqa: F811
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
VerifiedHTTPSConnection = HTTPSConnection
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/connectionpool.py
ADDED
|
@@ -0,0 +1,1132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import errno
|
| 4 |
+
import logging
|
| 5 |
+
import re
|
| 6 |
+
import socket
|
| 7 |
+
import sys
|
| 8 |
+
import warnings
|
| 9 |
+
from socket import error as SocketError
|
| 10 |
+
from socket import timeout as SocketTimeout
|
| 11 |
+
|
| 12 |
+
from .connection import (
|
| 13 |
+
BaseSSLError,
|
| 14 |
+
BrokenPipeError,
|
| 15 |
+
DummyConnection,
|
| 16 |
+
HTTPConnection,
|
| 17 |
+
HTTPException,
|
| 18 |
+
HTTPSConnection,
|
| 19 |
+
VerifiedHTTPSConnection,
|
| 20 |
+
port_by_scheme,
|
| 21 |
+
)
|
| 22 |
+
from .exceptions import (
|
| 23 |
+
ClosedPoolError,
|
| 24 |
+
EmptyPoolError,
|
| 25 |
+
HeaderParsingError,
|
| 26 |
+
HostChangedError,
|
| 27 |
+
InsecureRequestWarning,
|
| 28 |
+
LocationValueError,
|
| 29 |
+
MaxRetryError,
|
| 30 |
+
NewConnectionError,
|
| 31 |
+
ProtocolError,
|
| 32 |
+
ProxyError,
|
| 33 |
+
ReadTimeoutError,
|
| 34 |
+
SSLError,
|
| 35 |
+
TimeoutError,
|
| 36 |
+
)
|
| 37 |
+
from .packages import six
|
| 38 |
+
from .packages.six.moves import queue
|
| 39 |
+
from .request import RequestMethods
|
| 40 |
+
from .response import HTTPResponse
|
| 41 |
+
from .util.connection import is_connection_dropped
|
| 42 |
+
from .util.proxy import connection_requires_http_tunnel
|
| 43 |
+
from .util.queue import LifoQueue
|
| 44 |
+
from .util.request import set_file_position
|
| 45 |
+
from .util.response import assert_header_parsing
|
| 46 |
+
from .util.retry import Retry
|
| 47 |
+
from .util.ssl_match_hostname import CertificateError
|
| 48 |
+
from .util.timeout import Timeout
|
| 49 |
+
from .util.url import Url, _encode_target
|
| 50 |
+
from .util.url import _normalize_host as normalize_host
|
| 51 |
+
from .util.url import get_host, parse_url
|
| 52 |
+
|
| 53 |
+
try: # Platform-specific: Python 3
|
| 54 |
+
import weakref
|
| 55 |
+
|
| 56 |
+
weakref_finalize = weakref.finalize
|
| 57 |
+
except AttributeError: # Platform-specific: Python 2
|
| 58 |
+
from .packages.backports.weakref_finalize import weakref_finalize
|
| 59 |
+
|
| 60 |
+
xrange = six.moves.xrange
|
| 61 |
+
|
| 62 |
+
log = logging.getLogger(__name__)
|
| 63 |
+
|
| 64 |
+
_Default = object()
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
# Pool objects
|
| 68 |
+
class ConnectionPool(object):
|
| 69 |
+
"""
|
| 70 |
+
Base class for all connection pools, such as
|
| 71 |
+
:class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
|
| 72 |
+
|
| 73 |
+
.. note::
|
| 74 |
+
ConnectionPool.urlopen() does not normalize or percent-encode target URIs
|
| 75 |
+
which is useful if your target server doesn't support percent-encoded
|
| 76 |
+
target URIs.
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
scheme = None
|
| 80 |
+
QueueCls = LifoQueue
|
| 81 |
+
|
| 82 |
+
def __init__(self, host, port=None):
|
| 83 |
+
if not host:
|
| 84 |
+
raise LocationValueError("No host specified.")
|
| 85 |
+
|
| 86 |
+
self.host = _normalize_host(host, scheme=self.scheme)
|
| 87 |
+
self._proxy_host = host.lower()
|
| 88 |
+
self.port = port
|
| 89 |
+
|
| 90 |
+
def __str__(self):
|
| 91 |
+
return "%s(host=%r, port=%r)" % (type(self).__name__, self.host, self.port)
|
| 92 |
+
|
| 93 |
+
def __enter__(self):
|
| 94 |
+
return self
|
| 95 |
+
|
| 96 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 97 |
+
self.close()
|
| 98 |
+
# Return False to re-raise any potential exceptions
|
| 99 |
+
return False
|
| 100 |
+
|
| 101 |
+
def close(self):
|
| 102 |
+
"""
|
| 103 |
+
Close all pooled connections and disable the pool.
|
| 104 |
+
"""
|
| 105 |
+
pass
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
|
| 109 |
+
_blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK}
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
class HTTPConnectionPool(ConnectionPool, RequestMethods):
|
| 113 |
+
"""
|
| 114 |
+
Thread-safe connection pool for one host.
|
| 115 |
+
|
| 116 |
+
:param host:
|
| 117 |
+
Host used for this HTTP Connection (e.g. "localhost"), passed into
|
| 118 |
+
:class:`http.client.HTTPConnection`.
|
| 119 |
+
|
| 120 |
+
:param port:
|
| 121 |
+
Port used for this HTTP Connection (None is equivalent to 80), passed
|
| 122 |
+
into :class:`http.client.HTTPConnection`.
|
| 123 |
+
|
| 124 |
+
:param strict:
|
| 125 |
+
Causes BadStatusLine to be raised if the status line can't be parsed
|
| 126 |
+
as a valid HTTP/1.0 or 1.1 status line, passed into
|
| 127 |
+
:class:`http.client.HTTPConnection`.
|
| 128 |
+
|
| 129 |
+
.. note::
|
| 130 |
+
Only works in Python 2. This parameter is ignored in Python 3.
|
| 131 |
+
|
| 132 |
+
:param timeout:
|
| 133 |
+
Socket timeout in seconds for each individual connection. This can
|
| 134 |
+
be a float or integer, which sets the timeout for the HTTP request,
|
| 135 |
+
or an instance of :class:`urllib3.util.Timeout` which gives you more
|
| 136 |
+
fine-grained control over request timeouts. After the constructor has
|
| 137 |
+
been parsed, this is always a `urllib3.util.Timeout` object.
|
| 138 |
+
|
| 139 |
+
:param maxsize:
|
| 140 |
+
Number of connections to save that can be reused. More than 1 is useful
|
| 141 |
+
in multithreaded situations. If ``block`` is set to False, more
|
| 142 |
+
connections will be created but they will not be saved once they've
|
| 143 |
+
been used.
|
| 144 |
+
|
| 145 |
+
:param block:
|
| 146 |
+
If set to True, no more than ``maxsize`` connections will be used at
|
| 147 |
+
a time. When no free connections are available, the call will block
|
| 148 |
+
until a connection has been released. This is a useful side effect for
|
| 149 |
+
particular multithreaded situations where one does not want to use more
|
| 150 |
+
than maxsize connections per host to prevent flooding.
|
| 151 |
+
|
| 152 |
+
:param headers:
|
| 153 |
+
Headers to include with all requests, unless other headers are given
|
| 154 |
+
explicitly.
|
| 155 |
+
|
| 156 |
+
:param retries:
|
| 157 |
+
Retry configuration to use by default with requests in this pool.
|
| 158 |
+
|
| 159 |
+
:param _proxy:
|
| 160 |
+
Parsed proxy URL, should not be used directly, instead, see
|
| 161 |
+
:class:`urllib3.ProxyManager`
|
| 162 |
+
|
| 163 |
+
:param _proxy_headers:
|
| 164 |
+
A dictionary with proxy headers, should not be used directly,
|
| 165 |
+
instead, see :class:`urllib3.ProxyManager`
|
| 166 |
+
|
| 167 |
+
:param \\**conn_kw:
|
| 168 |
+
Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
|
| 169 |
+
:class:`urllib3.connection.HTTPSConnection` instances.
|
| 170 |
+
"""
|
| 171 |
+
|
| 172 |
+
scheme = "http"
|
| 173 |
+
ConnectionCls = HTTPConnection
|
| 174 |
+
ResponseCls = HTTPResponse
|
| 175 |
+
|
| 176 |
+
def __init__(
|
| 177 |
+
self,
|
| 178 |
+
host,
|
| 179 |
+
port=None,
|
| 180 |
+
strict=False,
|
| 181 |
+
timeout=Timeout.DEFAULT_TIMEOUT,
|
| 182 |
+
maxsize=1,
|
| 183 |
+
block=False,
|
| 184 |
+
headers=None,
|
| 185 |
+
retries=None,
|
| 186 |
+
_proxy=None,
|
| 187 |
+
_proxy_headers=None,
|
| 188 |
+
_proxy_config=None,
|
| 189 |
+
**conn_kw
|
| 190 |
+
):
|
| 191 |
+
ConnectionPool.__init__(self, host, port)
|
| 192 |
+
RequestMethods.__init__(self, headers)
|
| 193 |
+
|
| 194 |
+
self.strict = strict
|
| 195 |
+
|
| 196 |
+
if not isinstance(timeout, Timeout):
|
| 197 |
+
timeout = Timeout.from_float(timeout)
|
| 198 |
+
|
| 199 |
+
if retries is None:
|
| 200 |
+
retries = Retry.DEFAULT
|
| 201 |
+
|
| 202 |
+
self.timeout = timeout
|
| 203 |
+
self.retries = retries
|
| 204 |
+
|
| 205 |
+
self.pool = self.QueueCls(maxsize)
|
| 206 |
+
self.block = block
|
| 207 |
+
|
| 208 |
+
self.proxy = _proxy
|
| 209 |
+
self.proxy_headers = _proxy_headers or {}
|
| 210 |
+
self.proxy_config = _proxy_config
|
| 211 |
+
|
| 212 |
+
# Fill the queue up so that doing get() on it will block properly
|
| 213 |
+
for _ in xrange(maxsize):
|
| 214 |
+
self.pool.put(None)
|
| 215 |
+
|
| 216 |
+
# These are mostly for testing and debugging purposes.
|
| 217 |
+
self.num_connections = 0
|
| 218 |
+
self.num_requests = 0
|
| 219 |
+
self.conn_kw = conn_kw
|
| 220 |
+
|
| 221 |
+
if self.proxy:
|
| 222 |
+
# Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
|
| 223 |
+
# We cannot know if the user has added default socket options, so we cannot replace the
|
| 224 |
+
# list.
|
| 225 |
+
self.conn_kw.setdefault("socket_options", [])
|
| 226 |
+
|
| 227 |
+
self.conn_kw["proxy"] = self.proxy
|
| 228 |
+
self.conn_kw["proxy_config"] = self.proxy_config
|
| 229 |
+
|
| 230 |
+
# Do not pass 'self' as callback to 'finalize'.
|
| 231 |
+
# Then the 'finalize' would keep an endless living (leak) to self.
|
| 232 |
+
# By just passing a reference to the pool allows the garbage collector
|
| 233 |
+
# to free self if nobody else has a reference to it.
|
| 234 |
+
pool = self.pool
|
| 235 |
+
|
| 236 |
+
# Close all the HTTPConnections in the pool before the
|
| 237 |
+
# HTTPConnectionPool object is garbage collected.
|
| 238 |
+
weakref_finalize(self, _close_pool_connections, pool)
|
| 239 |
+
|
| 240 |
+
def _new_conn(self):
|
| 241 |
+
"""
|
| 242 |
+
Return a fresh :class:`HTTPConnection`.
|
| 243 |
+
"""
|
| 244 |
+
self.num_connections += 1
|
| 245 |
+
log.debug(
|
| 246 |
+
"Starting new HTTP connection (%d): %s:%s",
|
| 247 |
+
self.num_connections,
|
| 248 |
+
self.host,
|
| 249 |
+
self.port or "80",
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
conn = self.ConnectionCls(
|
| 253 |
+
host=self.host,
|
| 254 |
+
port=self.port,
|
| 255 |
+
timeout=self.timeout.connect_timeout,
|
| 256 |
+
strict=self.strict,
|
| 257 |
+
**self.conn_kw
|
| 258 |
+
)
|
| 259 |
+
return conn
|
| 260 |
+
|
| 261 |
+
def _get_conn(self, timeout=None):
|
| 262 |
+
"""
|
| 263 |
+
Get a connection. Will return a pooled connection if one is available.
|
| 264 |
+
|
| 265 |
+
If no connections are available and :prop:`.block` is ``False``, then a
|
| 266 |
+
fresh connection is returned.
|
| 267 |
+
|
| 268 |
+
:param timeout:
|
| 269 |
+
Seconds to wait before giving up and raising
|
| 270 |
+
:class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
|
| 271 |
+
:prop:`.block` is ``True``.
|
| 272 |
+
"""
|
| 273 |
+
conn = None
|
| 274 |
+
try:
|
| 275 |
+
conn = self.pool.get(block=self.block, timeout=timeout)
|
| 276 |
+
|
| 277 |
+
except AttributeError: # self.pool is None
|
| 278 |
+
raise ClosedPoolError(self, "Pool is closed.")
|
| 279 |
+
|
| 280 |
+
except queue.Empty:
|
| 281 |
+
if self.block:
|
| 282 |
+
raise EmptyPoolError(
|
| 283 |
+
self,
|
| 284 |
+
"Pool reached maximum size and no more connections are allowed.",
|
| 285 |
+
)
|
| 286 |
+
pass # Oh well, we'll create a new connection then
|
| 287 |
+
|
| 288 |
+
# If this is a persistent connection, check if it got disconnected
|
| 289 |
+
if conn and is_connection_dropped(conn):
|
| 290 |
+
log.debug("Resetting dropped connection: %s", self.host)
|
| 291 |
+
conn.close()
|
| 292 |
+
if getattr(conn, "auto_open", 1) == 0:
|
| 293 |
+
# This is a proxied connection that has been mutated by
|
| 294 |
+
# http.client._tunnel() and cannot be reused (since it would
|
| 295 |
+
# attempt to bypass the proxy)
|
| 296 |
+
conn = None
|
| 297 |
+
|
| 298 |
+
return conn or self._new_conn()
|
| 299 |
+
|
| 300 |
+
def _put_conn(self, conn):
|
| 301 |
+
"""
|
| 302 |
+
Put a connection back into the pool.
|
| 303 |
+
|
| 304 |
+
:param conn:
|
| 305 |
+
Connection object for the current host and port as returned by
|
| 306 |
+
:meth:`._new_conn` or :meth:`._get_conn`.
|
| 307 |
+
|
| 308 |
+
If the pool is already full, the connection is closed and discarded
|
| 309 |
+
because we exceeded maxsize. If connections are discarded frequently,
|
| 310 |
+
then maxsize should be increased.
|
| 311 |
+
|
| 312 |
+
If the pool is closed, then the connection will be closed and discarded.
|
| 313 |
+
"""
|
| 314 |
+
try:
|
| 315 |
+
self.pool.put(conn, block=False)
|
| 316 |
+
return # Everything is dandy, done.
|
| 317 |
+
except AttributeError:
|
| 318 |
+
# self.pool is None.
|
| 319 |
+
pass
|
| 320 |
+
except queue.Full:
|
| 321 |
+
# This should never happen if self.block == True
|
| 322 |
+
log.warning(
|
| 323 |
+
"Connection pool is full, discarding connection: %s. Connection pool size: %s",
|
| 324 |
+
self.host,
|
| 325 |
+
self.pool.qsize(),
|
| 326 |
+
)
|
| 327 |
+
# Connection never got put back into the pool, close it.
|
| 328 |
+
if conn:
|
| 329 |
+
conn.close()
|
| 330 |
+
|
| 331 |
+
def _validate_conn(self, conn):
|
| 332 |
+
"""
|
| 333 |
+
Called right before a request is made, after the socket is created.
|
| 334 |
+
"""
|
| 335 |
+
pass
|
| 336 |
+
|
| 337 |
+
def _prepare_proxy(self, conn):
|
| 338 |
+
# Nothing to do for HTTP connections.
|
| 339 |
+
pass
|
| 340 |
+
|
| 341 |
+
def _get_timeout(self, timeout):
|
| 342 |
+
"""Helper that always returns a :class:`urllib3.util.Timeout`"""
|
| 343 |
+
if timeout is _Default:
|
| 344 |
+
return self.timeout.clone()
|
| 345 |
+
|
| 346 |
+
if isinstance(timeout, Timeout):
|
| 347 |
+
return timeout.clone()
|
| 348 |
+
else:
|
| 349 |
+
# User passed us an int/float. This is for backwards compatibility,
|
| 350 |
+
# can be removed later
|
| 351 |
+
return Timeout.from_float(timeout)
|
| 352 |
+
|
| 353 |
+
def _raise_timeout(self, err, url, timeout_value):
|
| 354 |
+
"""Is the error actually a timeout? Will raise a ReadTimeout or pass"""
|
| 355 |
+
|
| 356 |
+
if isinstance(err, SocketTimeout):
|
| 357 |
+
raise ReadTimeoutError(
|
| 358 |
+
self, url, "Read timed out. (read timeout=%s)" % timeout_value
|
| 359 |
+
)
|
| 360 |
+
|
| 361 |
+
# See the above comment about EAGAIN in Python 3. In Python 2 we have
|
| 362 |
+
# to specifically catch it and throw the timeout error
|
| 363 |
+
if hasattr(err, "errno") and err.errno in _blocking_errnos:
|
| 364 |
+
raise ReadTimeoutError(
|
| 365 |
+
self, url, "Read timed out. (read timeout=%s)" % timeout_value
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
# Catch possible read timeouts thrown as SSL errors. If not the
|
| 369 |
+
# case, rethrow the original. We need to do this because of:
|
| 370 |
+
# http://bugs.python.org/issue10272
|
| 371 |
+
if "timed out" in str(err) or "did not complete (read)" in str(
|
| 372 |
+
err
|
| 373 |
+
): # Python < 2.7.4
|
| 374 |
+
raise ReadTimeoutError(
|
| 375 |
+
self, url, "Read timed out. (read timeout=%s)" % timeout_value
|
| 376 |
+
)
|
| 377 |
+
|
| 378 |
+
def _make_request(
|
| 379 |
+
self, conn, method, url, timeout=_Default, chunked=False, **httplib_request_kw
|
| 380 |
+
):
|
| 381 |
+
"""
|
| 382 |
+
Perform a request on a given urllib connection object taken from our
|
| 383 |
+
pool.
|
| 384 |
+
|
| 385 |
+
:param conn:
|
| 386 |
+
a connection from one of our connection pools
|
| 387 |
+
|
| 388 |
+
:param timeout:
|
| 389 |
+
Socket timeout in seconds for the request. This can be a
|
| 390 |
+
float or integer, which will set the same timeout value for
|
| 391 |
+
the socket connect and the socket read, or an instance of
|
| 392 |
+
:class:`urllib3.util.Timeout`, which gives you more fine-grained
|
| 393 |
+
control over your timeouts.
|
| 394 |
+
"""
|
| 395 |
+
self.num_requests += 1
|
| 396 |
+
|
| 397 |
+
timeout_obj = self._get_timeout(timeout)
|
| 398 |
+
timeout_obj.start_connect()
|
| 399 |
+
conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
|
| 400 |
+
|
| 401 |
+
# Trigger any extra validation we need to do.
|
| 402 |
+
try:
|
| 403 |
+
self._validate_conn(conn)
|
| 404 |
+
except (SocketTimeout, BaseSSLError) as e:
|
| 405 |
+
# Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
|
| 406 |
+
self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
|
| 407 |
+
raise
|
| 408 |
+
|
| 409 |
+
# conn.request() calls http.client.*.request, not the method in
|
| 410 |
+
# urllib3.request. It also calls makefile (recv) on the socket.
|
| 411 |
+
try:
|
| 412 |
+
if chunked:
|
| 413 |
+
conn.request_chunked(method, url, **httplib_request_kw)
|
| 414 |
+
else:
|
| 415 |
+
conn.request(method, url, **httplib_request_kw)
|
| 416 |
+
|
| 417 |
+
# We are swallowing BrokenPipeError (errno.EPIPE) since the server is
|
| 418 |
+
# legitimately able to close the connection after sending a valid response.
|
| 419 |
+
# With this behaviour, the received response is still readable.
|
| 420 |
+
except BrokenPipeError:
|
| 421 |
+
# Python 3
|
| 422 |
+
pass
|
| 423 |
+
except IOError as e:
|
| 424 |
+
# Python 2 and macOS/Linux
|
| 425 |
+
# EPIPE and ESHUTDOWN are BrokenPipeError on Python 2, and EPROTOTYPE is needed on macOS
|
| 426 |
+
# https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/
|
| 427 |
+
if e.errno not in {
|
| 428 |
+
errno.EPIPE,
|
| 429 |
+
errno.ESHUTDOWN,
|
| 430 |
+
errno.EPROTOTYPE,
|
| 431 |
+
}:
|
| 432 |
+
raise
|
| 433 |
+
|
| 434 |
+
# Reset the timeout for the recv() on the socket
|
| 435 |
+
read_timeout = timeout_obj.read_timeout
|
| 436 |
+
|
| 437 |
+
# App Engine doesn't have a sock attr
|
| 438 |
+
if getattr(conn, "sock", None):
|
| 439 |
+
# In Python 3 socket.py will catch EAGAIN and return None when you
|
| 440 |
+
# try and read into the file pointer created by http.client, which
|
| 441 |
+
# instead raises a BadStatusLine exception. Instead of catching
|
| 442 |
+
# the exception and assuming all BadStatusLine exceptions are read
|
| 443 |
+
# timeouts, check for a zero timeout before making the request.
|
| 444 |
+
if read_timeout == 0:
|
| 445 |
+
raise ReadTimeoutError(
|
| 446 |
+
self, url, "Read timed out. (read timeout=%s)" % read_timeout
|
| 447 |
+
)
|
| 448 |
+
if read_timeout is Timeout.DEFAULT_TIMEOUT:
|
| 449 |
+
conn.sock.settimeout(socket.getdefaulttimeout())
|
| 450 |
+
else: # None or a value
|
| 451 |
+
conn.sock.settimeout(read_timeout)
|
| 452 |
+
|
| 453 |
+
# Receive the response from the server
|
| 454 |
+
try:
|
| 455 |
+
try:
|
| 456 |
+
# Python 2.7, use buffering of HTTP responses
|
| 457 |
+
httplib_response = conn.getresponse(buffering=True)
|
| 458 |
+
except TypeError:
|
| 459 |
+
# Python 3
|
| 460 |
+
try:
|
| 461 |
+
httplib_response = conn.getresponse()
|
| 462 |
+
except BaseException as e:
|
| 463 |
+
# Remove the TypeError from the exception chain in
|
| 464 |
+
# Python 3 (including for exceptions like SystemExit).
|
| 465 |
+
# Otherwise it looks like a bug in the code.
|
| 466 |
+
six.raise_from(e, None)
|
| 467 |
+
except (SocketTimeout, BaseSSLError, SocketError) as e:
|
| 468 |
+
self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
|
| 469 |
+
raise
|
| 470 |
+
|
| 471 |
+
# AppEngine doesn't have a version attr.
|
| 472 |
+
http_version = getattr(conn, "_http_vsn_str", "HTTP/?")
|
| 473 |
+
log.debug(
|
| 474 |
+
'%s://%s:%s "%s %s %s" %s %s',
|
| 475 |
+
self.scheme,
|
| 476 |
+
self.host,
|
| 477 |
+
self.port,
|
| 478 |
+
method,
|
| 479 |
+
url,
|
| 480 |
+
http_version,
|
| 481 |
+
httplib_response.status,
|
| 482 |
+
httplib_response.length,
|
| 483 |
+
)
|
| 484 |
+
|
| 485 |
+
try:
|
| 486 |
+
assert_header_parsing(httplib_response.msg)
|
| 487 |
+
except (HeaderParsingError, TypeError) as hpe: # Platform-specific: Python 3
|
| 488 |
+
log.warning(
|
| 489 |
+
"Failed to parse headers (url=%s): %s",
|
| 490 |
+
self._absolute_url(url),
|
| 491 |
+
hpe,
|
| 492 |
+
exc_info=True,
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
return httplib_response
|
| 496 |
+
|
| 497 |
+
def _absolute_url(self, path):
|
| 498 |
+
return Url(scheme=self.scheme, host=self.host, port=self.port, path=path).url
|
| 499 |
+
|
| 500 |
+
def close(self):
|
| 501 |
+
"""
|
| 502 |
+
Close all pooled connections and disable the pool.
|
| 503 |
+
"""
|
| 504 |
+
if self.pool is None:
|
| 505 |
+
return
|
| 506 |
+
# Disable access to the pool
|
| 507 |
+
old_pool, self.pool = self.pool, None
|
| 508 |
+
|
| 509 |
+
# Close all the HTTPConnections in the pool.
|
| 510 |
+
_close_pool_connections(old_pool)
|
| 511 |
+
|
| 512 |
+
def is_same_host(self, url):
|
| 513 |
+
"""
|
| 514 |
+
Check if the given ``url`` is a member of the same host as this
|
| 515 |
+
connection pool.
|
| 516 |
+
"""
|
| 517 |
+
if url.startswith("/"):
|
| 518 |
+
return True
|
| 519 |
+
|
| 520 |
+
# TODO: Add optional support for socket.gethostbyname checking.
|
| 521 |
+
scheme, host, port = get_host(url)
|
| 522 |
+
if host is not None:
|
| 523 |
+
host = _normalize_host(host, scheme=scheme)
|
| 524 |
+
|
| 525 |
+
# Use explicit default port for comparison when none is given
|
| 526 |
+
if self.port and not port:
|
| 527 |
+
port = port_by_scheme.get(scheme)
|
| 528 |
+
elif not self.port and port == port_by_scheme.get(scheme):
|
| 529 |
+
port = None
|
| 530 |
+
|
| 531 |
+
return (scheme, host, port) == (self.scheme, self.host, self.port)
|
| 532 |
+
|
| 533 |
+
def urlopen(
|
| 534 |
+
self,
|
| 535 |
+
method,
|
| 536 |
+
url,
|
| 537 |
+
body=None,
|
| 538 |
+
headers=None,
|
| 539 |
+
retries=None,
|
| 540 |
+
redirect=True,
|
| 541 |
+
assert_same_host=True,
|
| 542 |
+
timeout=_Default,
|
| 543 |
+
pool_timeout=None,
|
| 544 |
+
release_conn=None,
|
| 545 |
+
chunked=False,
|
| 546 |
+
body_pos=None,
|
| 547 |
+
**response_kw
|
| 548 |
+
):
|
| 549 |
+
"""
|
| 550 |
+
Get a connection from the pool and perform an HTTP request. This is the
|
| 551 |
+
lowest level call for making a request, so you'll need to specify all
|
| 552 |
+
the raw details.
|
| 553 |
+
|
| 554 |
+
.. note::
|
| 555 |
+
|
| 556 |
+
More commonly, it's appropriate to use a convenience method provided
|
| 557 |
+
by :class:`.RequestMethods`, such as :meth:`request`.
|
| 558 |
+
|
| 559 |
+
.. note::
|
| 560 |
+
|
| 561 |
+
`release_conn` will only behave as expected if
|
| 562 |
+
`preload_content=False` because we want to make
|
| 563 |
+
`preload_content=False` the default behaviour someday soon without
|
| 564 |
+
breaking backwards compatibility.
|
| 565 |
+
|
| 566 |
+
:param method:
|
| 567 |
+
HTTP request method (such as GET, POST, PUT, etc.)
|
| 568 |
+
|
| 569 |
+
:param url:
|
| 570 |
+
The URL to perform the request on.
|
| 571 |
+
|
| 572 |
+
:param body:
|
| 573 |
+
Data to send in the request body, either :class:`str`, :class:`bytes`,
|
| 574 |
+
an iterable of :class:`str`/:class:`bytes`, or a file-like object.
|
| 575 |
+
|
| 576 |
+
:param headers:
|
| 577 |
+
Dictionary of custom headers to send, such as User-Agent,
|
| 578 |
+
If-None-Match, etc. If None, pool headers are used. If provided,
|
| 579 |
+
these headers completely replace any pool-specific headers.
|
| 580 |
+
|
| 581 |
+
:param retries:
|
| 582 |
+
Configure the number of retries to allow before raising a
|
| 583 |
+
:class:`~urllib3.exceptions.MaxRetryError` exception.
|
| 584 |
+
|
| 585 |
+
Pass ``None`` to retry until you receive a response. Pass a
|
| 586 |
+
:class:`~urllib3.util.retry.Retry` object for fine-grained control
|
| 587 |
+
over different types of retries.
|
| 588 |
+
Pass an integer number to retry connection errors that many times,
|
| 589 |
+
but no other types of errors. Pass zero to never retry.
|
| 590 |
+
|
| 591 |
+
If ``False``, then retries are disabled and any exception is raised
|
| 592 |
+
immediately. Also, instead of raising a MaxRetryError on redirects,
|
| 593 |
+
the redirect response will be returned.
|
| 594 |
+
|
| 595 |
+
:type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
|
| 596 |
+
|
| 597 |
+
:param redirect:
|
| 598 |
+
If True, automatically handle redirects (status codes 301, 302,
|
| 599 |
+
303, 307, 308). Each redirect counts as a retry. Disabling retries
|
| 600 |
+
will disable redirect, too.
|
| 601 |
+
|
| 602 |
+
:param assert_same_host:
|
| 603 |
+
If ``True``, will make sure that the host of the pool requests is
|
| 604 |
+
consistent else will raise HostChangedError. When ``False``, you can
|
| 605 |
+
use the pool on an HTTP proxy and request foreign hosts.
|
| 606 |
+
|
| 607 |
+
:param timeout:
|
| 608 |
+
If specified, overrides the default timeout for this one
|
| 609 |
+
request. It may be a float (in seconds) or an instance of
|
| 610 |
+
:class:`urllib3.util.Timeout`.
|
| 611 |
+
|
| 612 |
+
:param pool_timeout:
|
| 613 |
+
If set and the pool is set to block=True, then this method will
|
| 614 |
+
block for ``pool_timeout`` seconds and raise EmptyPoolError if no
|
| 615 |
+
connection is available within the time period.
|
| 616 |
+
|
| 617 |
+
:param release_conn:
|
| 618 |
+
If False, then the urlopen call will not release the connection
|
| 619 |
+
back into the pool once a response is received (but will release if
|
| 620 |
+
you read the entire contents of the response such as when
|
| 621 |
+
`preload_content=True`). This is useful if you're not preloading
|
| 622 |
+
the response's content immediately. You will need to call
|
| 623 |
+
``r.release_conn()`` on the response ``r`` to return the connection
|
| 624 |
+
back into the pool. If None, it takes the value of
|
| 625 |
+
``response_kw.get('preload_content', True)``.
|
| 626 |
+
|
| 627 |
+
:param chunked:
|
| 628 |
+
If True, urllib3 will send the body using chunked transfer
|
| 629 |
+
encoding. Otherwise, urllib3 will send the body using the standard
|
| 630 |
+
content-length form. Defaults to False.
|
| 631 |
+
|
| 632 |
+
:param int body_pos:
|
| 633 |
+
Position to seek to in file-like body in the event of a retry or
|
| 634 |
+
redirect. Typically this won't need to be set because urllib3 will
|
| 635 |
+
auto-populate the value when needed.
|
| 636 |
+
|
| 637 |
+
:param \\**response_kw:
|
| 638 |
+
Additional parameters are passed to
|
| 639 |
+
:meth:`urllib3.response.HTTPResponse.from_httplib`
|
| 640 |
+
"""
|
| 641 |
+
|
| 642 |
+
parsed_url = parse_url(url)
|
| 643 |
+
destination_scheme = parsed_url.scheme
|
| 644 |
+
|
| 645 |
+
if headers is None:
|
| 646 |
+
headers = self.headers
|
| 647 |
+
|
| 648 |
+
if not isinstance(retries, Retry):
|
| 649 |
+
retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
|
| 650 |
+
|
| 651 |
+
if release_conn is None:
|
| 652 |
+
release_conn = response_kw.get("preload_content", True)
|
| 653 |
+
|
| 654 |
+
# Check host
|
| 655 |
+
if assert_same_host and not self.is_same_host(url):
|
| 656 |
+
raise HostChangedError(self, url, retries)
|
| 657 |
+
|
| 658 |
+
# Ensure that the URL we're connecting to is properly encoded
|
| 659 |
+
if url.startswith("/"):
|
| 660 |
+
url = six.ensure_str(_encode_target(url))
|
| 661 |
+
else:
|
| 662 |
+
url = six.ensure_str(parsed_url.url)
|
| 663 |
+
|
| 664 |
+
conn = None
|
| 665 |
+
|
| 666 |
+
# Track whether `conn` needs to be released before
|
| 667 |
+
# returning/raising/recursing. Update this variable if necessary, and
|
| 668 |
+
# leave `release_conn` constant throughout the function. That way, if
|
| 669 |
+
# the function recurses, the original value of `release_conn` will be
|
| 670 |
+
# passed down into the recursive call, and its value will be respected.
|
| 671 |
+
#
|
| 672 |
+
# See issue #651 [1] for details.
|
| 673 |
+
#
|
| 674 |
+
# [1] <https://github.com/urllib3/urllib3/issues/651>
|
| 675 |
+
release_this_conn = release_conn
|
| 676 |
+
|
| 677 |
+
http_tunnel_required = connection_requires_http_tunnel(
|
| 678 |
+
self.proxy, self.proxy_config, destination_scheme
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
# Merge the proxy headers. Only done when not using HTTP CONNECT. We
|
| 682 |
+
# have to copy the headers dict so we can safely change it without those
|
| 683 |
+
# changes being reflected in anyone else's copy.
|
| 684 |
+
if not http_tunnel_required:
|
| 685 |
+
headers = headers.copy()
|
| 686 |
+
headers.update(self.proxy_headers)
|
| 687 |
+
|
| 688 |
+
# Must keep the exception bound to a separate variable or else Python 3
|
| 689 |
+
# complains about UnboundLocalError.
|
| 690 |
+
err = None
|
| 691 |
+
|
| 692 |
+
# Keep track of whether we cleanly exited the except block. This
|
| 693 |
+
# ensures we do proper cleanup in finally.
|
| 694 |
+
clean_exit = False
|
| 695 |
+
|
| 696 |
+
# Rewind body position, if needed. Record current position
|
| 697 |
+
# for future rewinds in the event of a redirect/retry.
|
| 698 |
+
body_pos = set_file_position(body, body_pos)
|
| 699 |
+
|
| 700 |
+
try:
|
| 701 |
+
# Request a connection from the queue.
|
| 702 |
+
timeout_obj = self._get_timeout(timeout)
|
| 703 |
+
conn = self._get_conn(timeout=pool_timeout)
|
| 704 |
+
|
| 705 |
+
conn.timeout = timeout_obj.connect_timeout
|
| 706 |
+
|
| 707 |
+
is_new_proxy_conn = self.proxy is not None and not getattr(
|
| 708 |
+
conn, "sock", None
|
| 709 |
+
)
|
| 710 |
+
if is_new_proxy_conn and http_tunnel_required:
|
| 711 |
+
self._prepare_proxy(conn)
|
| 712 |
+
|
| 713 |
+
# Make the request on the httplib connection object.
|
| 714 |
+
httplib_response = self._make_request(
|
| 715 |
+
conn,
|
| 716 |
+
method,
|
| 717 |
+
url,
|
| 718 |
+
timeout=timeout_obj,
|
| 719 |
+
body=body,
|
| 720 |
+
headers=headers,
|
| 721 |
+
chunked=chunked,
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
+
# If we're going to release the connection in ``finally:``, then
|
| 725 |
+
# the response doesn't need to know about the connection. Otherwise
|
| 726 |
+
# it will also try to release it and we'll have a double-release
|
| 727 |
+
# mess.
|
| 728 |
+
response_conn = conn if not release_conn else None
|
| 729 |
+
|
| 730 |
+
# Pass method to Response for length checking
|
| 731 |
+
response_kw["request_method"] = method
|
| 732 |
+
|
| 733 |
+
# Import httplib's response into our own wrapper object
|
| 734 |
+
response = self.ResponseCls.from_httplib(
|
| 735 |
+
httplib_response,
|
| 736 |
+
pool=self,
|
| 737 |
+
connection=response_conn,
|
| 738 |
+
retries=retries,
|
| 739 |
+
**response_kw
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
# Everything went great!
|
| 743 |
+
clean_exit = True
|
| 744 |
+
|
| 745 |
+
except EmptyPoolError:
|
| 746 |
+
# Didn't get a connection from the pool, no need to clean up
|
| 747 |
+
clean_exit = True
|
| 748 |
+
release_this_conn = False
|
| 749 |
+
raise
|
| 750 |
+
|
| 751 |
+
except (
|
| 752 |
+
TimeoutError,
|
| 753 |
+
HTTPException,
|
| 754 |
+
SocketError,
|
| 755 |
+
ProtocolError,
|
| 756 |
+
BaseSSLError,
|
| 757 |
+
SSLError,
|
| 758 |
+
CertificateError,
|
| 759 |
+
) as e:
|
| 760 |
+
# Discard the connection for these exceptions. It will be
|
| 761 |
+
# replaced during the next _get_conn() call.
|
| 762 |
+
clean_exit = False
|
| 763 |
+
|
| 764 |
+
def _is_ssl_error_message_from_http_proxy(ssl_error):
|
| 765 |
+
# We're trying to detect the message 'WRONG_VERSION_NUMBER' but
|
| 766 |
+
# SSLErrors are kinda all over the place when it comes to the message,
|
| 767 |
+
# so we try to cover our bases here!
|
| 768 |
+
message = " ".join(re.split("[^a-z]", str(ssl_error).lower()))
|
| 769 |
+
return (
|
| 770 |
+
"wrong version number" in message or "unknown protocol" in message
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
# Try to detect a common user error with proxies which is to
|
| 774 |
+
# set an HTTP proxy to be HTTPS when it should be 'http://'
|
| 775 |
+
# (ie {'http': 'http://proxy', 'https': 'https://proxy'})
|
| 776 |
+
# Instead we add a nice error message and point to a URL.
|
| 777 |
+
if (
|
| 778 |
+
isinstance(e, BaseSSLError)
|
| 779 |
+
and self.proxy
|
| 780 |
+
and _is_ssl_error_message_from_http_proxy(e)
|
| 781 |
+
and conn.proxy
|
| 782 |
+
and conn.proxy.scheme == "https"
|
| 783 |
+
):
|
| 784 |
+
e = ProxyError(
|
| 785 |
+
"Your proxy appears to only use HTTP and not HTTPS, "
|
| 786 |
+
"try changing your proxy URL to be HTTP. See: "
|
| 787 |
+
"https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
|
| 788 |
+
"#https-proxy-error-http-proxy",
|
| 789 |
+
SSLError(e),
|
| 790 |
+
)
|
| 791 |
+
elif isinstance(e, (BaseSSLError, CertificateError)):
|
| 792 |
+
e = SSLError(e)
|
| 793 |
+
elif isinstance(e, (SocketError, NewConnectionError)) and self.proxy:
|
| 794 |
+
e = ProxyError("Cannot connect to proxy.", e)
|
| 795 |
+
elif isinstance(e, (SocketError, HTTPException)):
|
| 796 |
+
e = ProtocolError("Connection aborted.", e)
|
| 797 |
+
|
| 798 |
+
retries = retries.increment(
|
| 799 |
+
method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
|
| 800 |
+
)
|
| 801 |
+
retries.sleep()
|
| 802 |
+
|
| 803 |
+
# Keep track of the error for the retry warning.
|
| 804 |
+
err = e
|
| 805 |
+
|
| 806 |
+
finally:
|
| 807 |
+
if not clean_exit:
|
| 808 |
+
# We hit some kind of exception, handled or otherwise. We need
|
| 809 |
+
# to throw the connection away unless explicitly told not to.
|
| 810 |
+
# Close the connection, set the variable to None, and make sure
|
| 811 |
+
# we put the None back in the pool to avoid leaking it.
|
| 812 |
+
conn = conn and conn.close()
|
| 813 |
+
release_this_conn = True
|
| 814 |
+
|
| 815 |
+
if release_this_conn:
|
| 816 |
+
# Put the connection back to be reused. If the connection is
|
| 817 |
+
# expired then it will be None, which will get replaced with a
|
| 818 |
+
# fresh connection during _get_conn.
|
| 819 |
+
self._put_conn(conn)
|
| 820 |
+
|
| 821 |
+
if not conn:
|
| 822 |
+
# Try again
|
| 823 |
+
log.warning(
|
| 824 |
+
"Retrying (%r) after connection broken by '%r': %s", retries, err, url
|
| 825 |
+
)
|
| 826 |
+
return self.urlopen(
|
| 827 |
+
method,
|
| 828 |
+
url,
|
| 829 |
+
body,
|
| 830 |
+
headers,
|
| 831 |
+
retries,
|
| 832 |
+
redirect,
|
| 833 |
+
assert_same_host,
|
| 834 |
+
timeout=timeout,
|
| 835 |
+
pool_timeout=pool_timeout,
|
| 836 |
+
release_conn=release_conn,
|
| 837 |
+
chunked=chunked,
|
| 838 |
+
body_pos=body_pos,
|
| 839 |
+
**response_kw
|
| 840 |
+
)
|
| 841 |
+
|
| 842 |
+
# Handle redirect?
|
| 843 |
+
redirect_location = redirect and response.get_redirect_location()
|
| 844 |
+
if redirect_location:
|
| 845 |
+
if response.status == 303:
|
| 846 |
+
method = "GET"
|
| 847 |
+
|
| 848 |
+
try:
|
| 849 |
+
retries = retries.increment(method, url, response=response, _pool=self)
|
| 850 |
+
except MaxRetryError:
|
| 851 |
+
if retries.raise_on_redirect:
|
| 852 |
+
response.drain_conn()
|
| 853 |
+
raise
|
| 854 |
+
return response
|
| 855 |
+
|
| 856 |
+
response.drain_conn()
|
| 857 |
+
retries.sleep_for_retry(response)
|
| 858 |
+
log.debug("Redirecting %s -> %s", url, redirect_location)
|
| 859 |
+
return self.urlopen(
|
| 860 |
+
method,
|
| 861 |
+
redirect_location,
|
| 862 |
+
body,
|
| 863 |
+
headers,
|
| 864 |
+
retries=retries,
|
| 865 |
+
redirect=redirect,
|
| 866 |
+
assert_same_host=assert_same_host,
|
| 867 |
+
timeout=timeout,
|
| 868 |
+
pool_timeout=pool_timeout,
|
| 869 |
+
release_conn=release_conn,
|
| 870 |
+
chunked=chunked,
|
| 871 |
+
body_pos=body_pos,
|
| 872 |
+
**response_kw
|
| 873 |
+
)
|
| 874 |
+
|
| 875 |
+
# Check if we should retry the HTTP response.
|
| 876 |
+
has_retry_after = bool(response.headers.get("Retry-After"))
|
| 877 |
+
if retries.is_retry(method, response.status, has_retry_after):
|
| 878 |
+
try:
|
| 879 |
+
retries = retries.increment(method, url, response=response, _pool=self)
|
| 880 |
+
except MaxRetryError:
|
| 881 |
+
if retries.raise_on_status:
|
| 882 |
+
response.drain_conn()
|
| 883 |
+
raise
|
| 884 |
+
return response
|
| 885 |
+
|
| 886 |
+
response.drain_conn()
|
| 887 |
+
retries.sleep(response)
|
| 888 |
+
log.debug("Retry: %s", url)
|
| 889 |
+
return self.urlopen(
|
| 890 |
+
method,
|
| 891 |
+
url,
|
| 892 |
+
body,
|
| 893 |
+
headers,
|
| 894 |
+
retries=retries,
|
| 895 |
+
redirect=redirect,
|
| 896 |
+
assert_same_host=assert_same_host,
|
| 897 |
+
timeout=timeout,
|
| 898 |
+
pool_timeout=pool_timeout,
|
| 899 |
+
release_conn=release_conn,
|
| 900 |
+
chunked=chunked,
|
| 901 |
+
body_pos=body_pos,
|
| 902 |
+
**response_kw
|
| 903 |
+
)
|
| 904 |
+
|
| 905 |
+
return response
|
| 906 |
+
|
| 907 |
+
|
| 908 |
+
class HTTPSConnectionPool(HTTPConnectionPool):
|
| 909 |
+
"""
|
| 910 |
+
Same as :class:`.HTTPConnectionPool`, but HTTPS.
|
| 911 |
+
|
| 912 |
+
:class:`.HTTPSConnection` uses one of ``assert_fingerprint``,
|
| 913 |
+
``assert_hostname`` and ``host`` in this order to verify connections.
|
| 914 |
+
If ``assert_hostname`` is False, no verification is done.
|
| 915 |
+
|
| 916 |
+
The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
|
| 917 |
+
``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl`
|
| 918 |
+
is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
|
| 919 |
+
the connection socket into an SSL socket.
|
| 920 |
+
"""
|
| 921 |
+
|
| 922 |
+
scheme = "https"
|
| 923 |
+
ConnectionCls = HTTPSConnection
|
| 924 |
+
|
| 925 |
+
def __init__(
|
| 926 |
+
self,
|
| 927 |
+
host,
|
| 928 |
+
port=None,
|
| 929 |
+
strict=False,
|
| 930 |
+
timeout=Timeout.DEFAULT_TIMEOUT,
|
| 931 |
+
maxsize=1,
|
| 932 |
+
block=False,
|
| 933 |
+
headers=None,
|
| 934 |
+
retries=None,
|
| 935 |
+
_proxy=None,
|
| 936 |
+
_proxy_headers=None,
|
| 937 |
+
key_file=None,
|
| 938 |
+
cert_file=None,
|
| 939 |
+
cert_reqs=None,
|
| 940 |
+
key_password=None,
|
| 941 |
+
ca_certs=None,
|
| 942 |
+
ssl_version=None,
|
| 943 |
+
assert_hostname=None,
|
| 944 |
+
assert_fingerprint=None,
|
| 945 |
+
ca_cert_dir=None,
|
| 946 |
+
**conn_kw
|
| 947 |
+
):
|
| 948 |
+
|
| 949 |
+
HTTPConnectionPool.__init__(
|
| 950 |
+
self,
|
| 951 |
+
host,
|
| 952 |
+
port,
|
| 953 |
+
strict,
|
| 954 |
+
timeout,
|
| 955 |
+
maxsize,
|
| 956 |
+
block,
|
| 957 |
+
headers,
|
| 958 |
+
retries,
|
| 959 |
+
_proxy,
|
| 960 |
+
_proxy_headers,
|
| 961 |
+
**conn_kw
|
| 962 |
+
)
|
| 963 |
+
|
| 964 |
+
self.key_file = key_file
|
| 965 |
+
self.cert_file = cert_file
|
| 966 |
+
self.cert_reqs = cert_reqs
|
| 967 |
+
self.key_password = key_password
|
| 968 |
+
self.ca_certs = ca_certs
|
| 969 |
+
self.ca_cert_dir = ca_cert_dir
|
| 970 |
+
self.ssl_version = ssl_version
|
| 971 |
+
self.assert_hostname = assert_hostname
|
| 972 |
+
self.assert_fingerprint = assert_fingerprint
|
| 973 |
+
|
| 974 |
+
def _prepare_conn(self, conn):
|
| 975 |
+
"""
|
| 976 |
+
Prepare the ``connection`` for :meth:`urllib3.util.ssl_wrap_socket`
|
| 977 |
+
and establish the tunnel if proxy is used.
|
| 978 |
+
"""
|
| 979 |
+
|
| 980 |
+
if isinstance(conn, VerifiedHTTPSConnection):
|
| 981 |
+
conn.set_cert(
|
| 982 |
+
key_file=self.key_file,
|
| 983 |
+
key_password=self.key_password,
|
| 984 |
+
cert_file=self.cert_file,
|
| 985 |
+
cert_reqs=self.cert_reqs,
|
| 986 |
+
ca_certs=self.ca_certs,
|
| 987 |
+
ca_cert_dir=self.ca_cert_dir,
|
| 988 |
+
assert_hostname=self.assert_hostname,
|
| 989 |
+
assert_fingerprint=self.assert_fingerprint,
|
| 990 |
+
)
|
| 991 |
+
conn.ssl_version = self.ssl_version
|
| 992 |
+
return conn
|
| 993 |
+
|
| 994 |
+
def _prepare_proxy(self, conn):
|
| 995 |
+
"""
|
| 996 |
+
Establishes a tunnel connection through HTTP CONNECT.
|
| 997 |
+
|
| 998 |
+
Tunnel connection is established early because otherwise httplib would
|
| 999 |
+
improperly set Host: header to proxy's IP:port.
|
| 1000 |
+
"""
|
| 1001 |
+
|
| 1002 |
+
conn.set_tunnel(self._proxy_host, self.port, self.proxy_headers)
|
| 1003 |
+
|
| 1004 |
+
if self.proxy.scheme == "https":
|
| 1005 |
+
conn.tls_in_tls_required = True
|
| 1006 |
+
|
| 1007 |
+
conn.connect()
|
| 1008 |
+
|
| 1009 |
+
def _new_conn(self):
|
| 1010 |
+
"""
|
| 1011 |
+
Return a fresh :class:`http.client.HTTPSConnection`.
|
| 1012 |
+
"""
|
| 1013 |
+
self.num_connections += 1
|
| 1014 |
+
log.debug(
|
| 1015 |
+
"Starting new HTTPS connection (%d): %s:%s",
|
| 1016 |
+
self.num_connections,
|
| 1017 |
+
self.host,
|
| 1018 |
+
self.port or "443",
|
| 1019 |
+
)
|
| 1020 |
+
|
| 1021 |
+
if not self.ConnectionCls or self.ConnectionCls is DummyConnection:
|
| 1022 |
+
raise SSLError(
|
| 1023 |
+
"Can't connect to HTTPS URL because the SSL module is not available."
|
| 1024 |
+
)
|
| 1025 |
+
|
| 1026 |
+
actual_host = self.host
|
| 1027 |
+
actual_port = self.port
|
| 1028 |
+
if self.proxy is not None:
|
| 1029 |
+
actual_host = self.proxy.host
|
| 1030 |
+
actual_port = self.proxy.port
|
| 1031 |
+
|
| 1032 |
+
conn = self.ConnectionCls(
|
| 1033 |
+
host=actual_host,
|
| 1034 |
+
port=actual_port,
|
| 1035 |
+
timeout=self.timeout.connect_timeout,
|
| 1036 |
+
strict=self.strict,
|
| 1037 |
+
cert_file=self.cert_file,
|
| 1038 |
+
key_file=self.key_file,
|
| 1039 |
+
key_password=self.key_password,
|
| 1040 |
+
**self.conn_kw
|
| 1041 |
+
)
|
| 1042 |
+
|
| 1043 |
+
return self._prepare_conn(conn)
|
| 1044 |
+
|
| 1045 |
+
def _validate_conn(self, conn):
|
| 1046 |
+
"""
|
| 1047 |
+
Called right before a request is made, after the socket is created.
|
| 1048 |
+
"""
|
| 1049 |
+
super(HTTPSConnectionPool, self)._validate_conn(conn)
|
| 1050 |
+
|
| 1051 |
+
# Force connect early to allow us to validate the connection.
|
| 1052 |
+
if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
|
| 1053 |
+
conn.connect()
|
| 1054 |
+
|
| 1055 |
+
if not conn.is_verified:
|
| 1056 |
+
warnings.warn(
|
| 1057 |
+
(
|
| 1058 |
+
"Unverified HTTPS request is being made to host '%s'. "
|
| 1059 |
+
"Adding certificate verification is strongly advised. See: "
|
| 1060 |
+
"https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
|
| 1061 |
+
"#ssl-warnings" % conn.host
|
| 1062 |
+
),
|
| 1063 |
+
InsecureRequestWarning,
|
| 1064 |
+
)
|
| 1065 |
+
|
| 1066 |
+
if getattr(conn, "proxy_is_verified", None) is False:
|
| 1067 |
+
warnings.warn(
|
| 1068 |
+
(
|
| 1069 |
+
"Unverified HTTPS connection done to an HTTPS proxy. "
|
| 1070 |
+
"Adding certificate verification is strongly advised. See: "
|
| 1071 |
+
"https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html"
|
| 1072 |
+
"#ssl-warnings"
|
| 1073 |
+
),
|
| 1074 |
+
InsecureRequestWarning,
|
| 1075 |
+
)
|
| 1076 |
+
|
| 1077 |
+
|
| 1078 |
+
def connection_from_url(url, **kw):
|
| 1079 |
+
"""
|
| 1080 |
+
Given a url, return an :class:`.ConnectionPool` instance of its host.
|
| 1081 |
+
|
| 1082 |
+
This is a shortcut for not having to parse out the scheme, host, and port
|
| 1083 |
+
of the url before creating an :class:`.ConnectionPool` instance.
|
| 1084 |
+
|
| 1085 |
+
:param url:
|
| 1086 |
+
Absolute URL string that must include the scheme. Port is optional.
|
| 1087 |
+
|
| 1088 |
+
:param \\**kw:
|
| 1089 |
+
Passes additional parameters to the constructor of the appropriate
|
| 1090 |
+
:class:`.ConnectionPool`. Useful for specifying things like
|
| 1091 |
+
timeout, maxsize, headers, etc.
|
| 1092 |
+
|
| 1093 |
+
Example::
|
| 1094 |
+
|
| 1095 |
+
>>> conn = connection_from_url('http://google.com/')
|
| 1096 |
+
>>> r = conn.request('GET', '/')
|
| 1097 |
+
"""
|
| 1098 |
+
scheme, host, port = get_host(url)
|
| 1099 |
+
port = port or port_by_scheme.get(scheme, 80)
|
| 1100 |
+
if scheme == "https":
|
| 1101 |
+
return HTTPSConnectionPool(host, port=port, **kw)
|
| 1102 |
+
else:
|
| 1103 |
+
return HTTPConnectionPool(host, port=port, **kw)
|
| 1104 |
+
|
| 1105 |
+
|
| 1106 |
+
def _normalize_host(host, scheme):
|
| 1107 |
+
"""
|
| 1108 |
+
Normalize hosts for comparisons and use with sockets.
|
| 1109 |
+
"""
|
| 1110 |
+
|
| 1111 |
+
host = normalize_host(host, scheme)
|
| 1112 |
+
|
| 1113 |
+
# httplib doesn't like it when we include brackets in IPv6 addresses
|
| 1114 |
+
# Specifically, if we include brackets but also pass the port then
|
| 1115 |
+
# httplib crazily doubles up the square brackets on the Host header.
|
| 1116 |
+
# Instead, we need to make sure we never pass ``None`` as the port.
|
| 1117 |
+
# However, for backward compatibility reasons we can't actually
|
| 1118 |
+
# *assert* that. See http://bugs.python.org/issue28539
|
| 1119 |
+
if host.startswith("[") and host.endswith("]"):
|
| 1120 |
+
host = host[1:-1]
|
| 1121 |
+
return host
|
| 1122 |
+
|
| 1123 |
+
|
| 1124 |
+
def _close_pool_connections(pool):
|
| 1125 |
+
"""Drains a queue of connections and closes each one."""
|
| 1126 |
+
try:
|
| 1127 |
+
while True:
|
| 1128 |
+
conn = pool.get(block=False)
|
| 1129 |
+
if conn:
|
| 1130 |
+
conn.close()
|
| 1131 |
+
except queue.Empty:
|
| 1132 |
+
pass # Done.
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/exceptions.py
ADDED
|
@@ -0,0 +1,323 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
from .packages.six.moves.http_client import IncompleteRead as httplib_IncompleteRead
|
| 4 |
+
|
| 5 |
+
# Base Exceptions
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
class HTTPError(Exception):
|
| 9 |
+
"""Base exception used by this module."""
|
| 10 |
+
|
| 11 |
+
pass
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class HTTPWarning(Warning):
|
| 15 |
+
"""Base warning used by this module."""
|
| 16 |
+
|
| 17 |
+
pass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class PoolError(HTTPError):
|
| 21 |
+
"""Base exception for errors caused within a pool."""
|
| 22 |
+
|
| 23 |
+
def __init__(self, pool, message):
|
| 24 |
+
self.pool = pool
|
| 25 |
+
HTTPError.__init__(self, "%s: %s" % (pool, message))
|
| 26 |
+
|
| 27 |
+
def __reduce__(self):
|
| 28 |
+
# For pickling purposes.
|
| 29 |
+
return self.__class__, (None, None)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class RequestError(PoolError):
|
| 33 |
+
"""Base exception for PoolErrors that have associated URLs."""
|
| 34 |
+
|
| 35 |
+
def __init__(self, pool, url, message):
|
| 36 |
+
self.url = url
|
| 37 |
+
PoolError.__init__(self, pool, message)
|
| 38 |
+
|
| 39 |
+
def __reduce__(self):
|
| 40 |
+
# For pickling purposes.
|
| 41 |
+
return self.__class__, (None, self.url, None)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class SSLError(HTTPError):
|
| 45 |
+
"""Raised when SSL certificate fails in an HTTPS connection."""
|
| 46 |
+
|
| 47 |
+
pass
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class ProxyError(HTTPError):
|
| 51 |
+
"""Raised when the connection to a proxy fails."""
|
| 52 |
+
|
| 53 |
+
def __init__(self, message, error, *args):
|
| 54 |
+
super(ProxyError, self).__init__(message, error, *args)
|
| 55 |
+
self.original_error = error
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class DecodeError(HTTPError):
|
| 59 |
+
"""Raised when automatic decoding based on Content-Type fails."""
|
| 60 |
+
|
| 61 |
+
pass
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class ProtocolError(HTTPError):
|
| 65 |
+
"""Raised when something unexpected happens mid-request/response."""
|
| 66 |
+
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
#: Renamed to ProtocolError but aliased for backwards compatibility.
|
| 71 |
+
ConnectionError = ProtocolError
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# Leaf Exceptions
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class MaxRetryError(RequestError):
|
| 78 |
+
"""Raised when the maximum number of retries is exceeded.
|
| 79 |
+
|
| 80 |
+
:param pool: The connection pool
|
| 81 |
+
:type pool: :class:`~urllib3.connectionpool.HTTPConnectionPool`
|
| 82 |
+
:param string url: The requested Url
|
| 83 |
+
:param exceptions.Exception reason: The underlying error
|
| 84 |
+
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
def __init__(self, pool, url, reason=None):
|
| 88 |
+
self.reason = reason
|
| 89 |
+
|
| 90 |
+
message = "Max retries exceeded with url: %s (Caused by %r)" % (url, reason)
|
| 91 |
+
|
| 92 |
+
RequestError.__init__(self, pool, url, message)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class HostChangedError(RequestError):
|
| 96 |
+
"""Raised when an existing pool gets a request for a foreign host."""
|
| 97 |
+
|
| 98 |
+
def __init__(self, pool, url, retries=3):
|
| 99 |
+
message = "Tried to open a foreign host with url: %s" % url
|
| 100 |
+
RequestError.__init__(self, pool, url, message)
|
| 101 |
+
self.retries = retries
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class TimeoutStateError(HTTPError):
|
| 105 |
+
"""Raised when passing an invalid state to a timeout"""
|
| 106 |
+
|
| 107 |
+
pass
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class TimeoutError(HTTPError):
|
| 111 |
+
"""Raised when a socket timeout error occurs.
|
| 112 |
+
|
| 113 |
+
Catching this error will catch both :exc:`ReadTimeoutErrors
|
| 114 |
+
<ReadTimeoutError>` and :exc:`ConnectTimeoutErrors <ConnectTimeoutError>`.
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
pass
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
class ReadTimeoutError(TimeoutError, RequestError):
|
| 121 |
+
"""Raised when a socket timeout occurs while receiving data from a server"""
|
| 122 |
+
|
| 123 |
+
pass
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# This timeout error does not have a URL attached and needs to inherit from the
|
| 127 |
+
# base HTTPError
|
| 128 |
+
class ConnectTimeoutError(TimeoutError):
|
| 129 |
+
"""Raised when a socket timeout occurs while connecting to a server"""
|
| 130 |
+
|
| 131 |
+
pass
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
class NewConnectionError(ConnectTimeoutError, PoolError):
|
| 135 |
+
"""Raised when we fail to establish a new connection. Usually ECONNREFUSED."""
|
| 136 |
+
|
| 137 |
+
pass
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class EmptyPoolError(PoolError):
|
| 141 |
+
"""Raised when a pool runs out of connections and no more are allowed."""
|
| 142 |
+
|
| 143 |
+
pass
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
class ClosedPoolError(PoolError):
|
| 147 |
+
"""Raised when a request enters a pool after the pool has been closed."""
|
| 148 |
+
|
| 149 |
+
pass
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
class LocationValueError(ValueError, HTTPError):
|
| 153 |
+
"""Raised when there is something wrong with a given URL input."""
|
| 154 |
+
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class LocationParseError(LocationValueError):
|
| 159 |
+
"""Raised when get_host or similar fails to parse the URL input."""
|
| 160 |
+
|
| 161 |
+
def __init__(self, location):
|
| 162 |
+
message = "Failed to parse: %s" % location
|
| 163 |
+
HTTPError.__init__(self, message)
|
| 164 |
+
|
| 165 |
+
self.location = location
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
class URLSchemeUnknown(LocationValueError):
|
| 169 |
+
"""Raised when a URL input has an unsupported scheme."""
|
| 170 |
+
|
| 171 |
+
def __init__(self, scheme):
|
| 172 |
+
message = "Not supported URL scheme %s" % scheme
|
| 173 |
+
super(URLSchemeUnknown, self).__init__(message)
|
| 174 |
+
|
| 175 |
+
self.scheme = scheme
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
class ResponseError(HTTPError):
|
| 179 |
+
"""Used as a container for an error reason supplied in a MaxRetryError."""
|
| 180 |
+
|
| 181 |
+
GENERIC_ERROR = "too many error responses"
|
| 182 |
+
SPECIFIC_ERROR = "too many {status_code} error responses"
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
class SecurityWarning(HTTPWarning):
|
| 186 |
+
"""Warned when performing security reducing actions"""
|
| 187 |
+
|
| 188 |
+
pass
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
class SubjectAltNameWarning(SecurityWarning):
|
| 192 |
+
"""Warned when connecting to a host with a certificate missing a SAN."""
|
| 193 |
+
|
| 194 |
+
pass
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
class InsecureRequestWarning(SecurityWarning):
|
| 198 |
+
"""Warned when making an unverified HTTPS request."""
|
| 199 |
+
|
| 200 |
+
pass
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
class SystemTimeWarning(SecurityWarning):
|
| 204 |
+
"""Warned when system time is suspected to be wrong"""
|
| 205 |
+
|
| 206 |
+
pass
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
class InsecurePlatformWarning(SecurityWarning):
|
| 210 |
+
"""Warned when certain TLS/SSL configuration is not available on a platform."""
|
| 211 |
+
|
| 212 |
+
pass
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
class SNIMissingWarning(HTTPWarning):
|
| 216 |
+
"""Warned when making a HTTPS request without SNI available."""
|
| 217 |
+
|
| 218 |
+
pass
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
class DependencyWarning(HTTPWarning):
|
| 222 |
+
"""
|
| 223 |
+
Warned when an attempt is made to import a module with missing optional
|
| 224 |
+
dependencies.
|
| 225 |
+
"""
|
| 226 |
+
|
| 227 |
+
pass
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
class ResponseNotChunked(ProtocolError, ValueError):
|
| 231 |
+
"""Response needs to be chunked in order to read it as chunks."""
|
| 232 |
+
|
| 233 |
+
pass
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
class BodyNotHttplibCompatible(HTTPError):
|
| 237 |
+
"""
|
| 238 |
+
Body should be :class:`http.client.HTTPResponse` like
|
| 239 |
+
(have an fp attribute which returns raw chunks) for read_chunked().
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
pass
|
| 243 |
+
|
| 244 |
+
|
| 245 |
+
class IncompleteRead(HTTPError, httplib_IncompleteRead):
|
| 246 |
+
"""
|
| 247 |
+
Response length doesn't match expected Content-Length
|
| 248 |
+
|
| 249 |
+
Subclass of :class:`http.client.IncompleteRead` to allow int value
|
| 250 |
+
for ``partial`` to avoid creating large objects on streamed reads.
|
| 251 |
+
"""
|
| 252 |
+
|
| 253 |
+
def __init__(self, partial, expected):
|
| 254 |
+
super(IncompleteRead, self).__init__(partial, expected)
|
| 255 |
+
|
| 256 |
+
def __repr__(self):
|
| 257 |
+
return "IncompleteRead(%i bytes read, %i more expected)" % (
|
| 258 |
+
self.partial,
|
| 259 |
+
self.expected,
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
|
| 263 |
+
class InvalidChunkLength(HTTPError, httplib_IncompleteRead):
|
| 264 |
+
"""Invalid chunk length in a chunked response."""
|
| 265 |
+
|
| 266 |
+
def __init__(self, response, length):
|
| 267 |
+
super(InvalidChunkLength, self).__init__(
|
| 268 |
+
response.tell(), response.length_remaining
|
| 269 |
+
)
|
| 270 |
+
self.response = response
|
| 271 |
+
self.length = length
|
| 272 |
+
|
| 273 |
+
def __repr__(self):
|
| 274 |
+
return "InvalidChunkLength(got length %r, %i bytes read)" % (
|
| 275 |
+
self.length,
|
| 276 |
+
self.partial,
|
| 277 |
+
)
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
class InvalidHeader(HTTPError):
|
| 281 |
+
"""The header provided was somehow invalid."""
|
| 282 |
+
|
| 283 |
+
pass
|
| 284 |
+
|
| 285 |
+
|
| 286 |
+
class ProxySchemeUnknown(AssertionError, URLSchemeUnknown):
|
| 287 |
+
"""ProxyManager does not support the supplied scheme"""
|
| 288 |
+
|
| 289 |
+
# TODO(t-8ch): Stop inheriting from AssertionError in v2.0.
|
| 290 |
+
|
| 291 |
+
def __init__(self, scheme):
|
| 292 |
+
# 'localhost' is here because our URL parser parses
|
| 293 |
+
# localhost:8080 -> scheme=localhost, remove if we fix this.
|
| 294 |
+
if scheme == "localhost":
|
| 295 |
+
scheme = None
|
| 296 |
+
if scheme is None:
|
| 297 |
+
message = "Proxy URL had no scheme, should start with http:// or https://"
|
| 298 |
+
else:
|
| 299 |
+
message = (
|
| 300 |
+
"Proxy URL had unsupported scheme %s, should use http:// or https://"
|
| 301 |
+
% scheme
|
| 302 |
+
)
|
| 303 |
+
super(ProxySchemeUnknown, self).__init__(message)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
class ProxySchemeUnsupported(ValueError):
|
| 307 |
+
"""Fetching HTTPS resources through HTTPS proxies is unsupported"""
|
| 308 |
+
|
| 309 |
+
pass
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
class HeaderParsingError(HTTPError):
|
| 313 |
+
"""Raised by assert_header_parsing, but we convert it to a log.warning statement."""
|
| 314 |
+
|
| 315 |
+
def __init__(self, defects, unparsed_data):
|
| 316 |
+
message = "%s, unparsed data: %r" % (defects or "Unknown", unparsed_data)
|
| 317 |
+
super(HeaderParsingError, self).__init__(message)
|
| 318 |
+
|
| 319 |
+
|
| 320 |
+
class UnrewindableBodyError(HTTPError):
|
| 321 |
+
"""urllib3 encountered an error when trying to rewind a body"""
|
| 322 |
+
|
| 323 |
+
pass
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/fields.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import email.utils
|
| 4 |
+
import mimetypes
|
| 5 |
+
import re
|
| 6 |
+
|
| 7 |
+
from .packages import six
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def guess_content_type(filename, default="application/octet-stream"):
|
| 11 |
+
"""
|
| 12 |
+
Guess the "Content-Type" of a file.
|
| 13 |
+
|
| 14 |
+
:param filename:
|
| 15 |
+
The filename to guess the "Content-Type" of using :mod:`mimetypes`.
|
| 16 |
+
:param default:
|
| 17 |
+
If no "Content-Type" can be guessed, default to `default`.
|
| 18 |
+
"""
|
| 19 |
+
if filename:
|
| 20 |
+
return mimetypes.guess_type(filename)[0] or default
|
| 21 |
+
return default
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def format_header_param_rfc2231(name, value):
|
| 25 |
+
"""
|
| 26 |
+
Helper function to format and quote a single header parameter using the
|
| 27 |
+
strategy defined in RFC 2231.
|
| 28 |
+
|
| 29 |
+
Particularly useful for header parameters which might contain
|
| 30 |
+
non-ASCII values, like file names. This follows
|
| 31 |
+
`RFC 2388 Section 4.4 <https://tools.ietf.org/html/rfc2388#section-4.4>`_.
|
| 32 |
+
|
| 33 |
+
:param name:
|
| 34 |
+
The name of the parameter, a string expected to be ASCII only.
|
| 35 |
+
:param value:
|
| 36 |
+
The value of the parameter, provided as ``bytes`` or `str``.
|
| 37 |
+
:ret:
|
| 38 |
+
An RFC-2231-formatted unicode string.
|
| 39 |
+
"""
|
| 40 |
+
if isinstance(value, six.binary_type):
|
| 41 |
+
value = value.decode("utf-8")
|
| 42 |
+
|
| 43 |
+
if not any(ch in value for ch in '"\\\r\n'):
|
| 44 |
+
result = u'%s="%s"' % (name, value)
|
| 45 |
+
try:
|
| 46 |
+
result.encode("ascii")
|
| 47 |
+
except (UnicodeEncodeError, UnicodeDecodeError):
|
| 48 |
+
pass
|
| 49 |
+
else:
|
| 50 |
+
return result
|
| 51 |
+
|
| 52 |
+
if six.PY2: # Python 2:
|
| 53 |
+
value = value.encode("utf-8")
|
| 54 |
+
|
| 55 |
+
# encode_rfc2231 accepts an encoded string and returns an ascii-encoded
|
| 56 |
+
# string in Python 2 but accepts and returns unicode strings in Python 3
|
| 57 |
+
value = email.utils.encode_rfc2231(value, "utf-8")
|
| 58 |
+
value = "%s*=%s" % (name, value)
|
| 59 |
+
|
| 60 |
+
if six.PY2: # Python 2:
|
| 61 |
+
value = value.decode("utf-8")
|
| 62 |
+
|
| 63 |
+
return value
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
_HTML5_REPLACEMENTS = {
|
| 67 |
+
u"\u0022": u"%22",
|
| 68 |
+
# Replace "\" with "\\".
|
| 69 |
+
u"\u005C": u"\u005C\u005C",
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
# All control characters from 0x00 to 0x1F *except* 0x1B.
|
| 73 |
+
_HTML5_REPLACEMENTS.update(
|
| 74 |
+
{
|
| 75 |
+
six.unichr(cc): u"%{:02X}".format(cc)
|
| 76 |
+
for cc in range(0x00, 0x1F + 1)
|
| 77 |
+
if cc not in (0x1B,)
|
| 78 |
+
}
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _replace_multiple(value, needles_and_replacements):
|
| 83 |
+
def replacer(match):
|
| 84 |
+
return needles_and_replacements[match.group(0)]
|
| 85 |
+
|
| 86 |
+
pattern = re.compile(
|
| 87 |
+
r"|".join([re.escape(needle) for needle in needles_and_replacements.keys()])
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
result = pattern.sub(replacer, value)
|
| 91 |
+
|
| 92 |
+
return result
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def format_header_param_html5(name, value):
|
| 96 |
+
"""
|
| 97 |
+
Helper function to format and quote a single header parameter using the
|
| 98 |
+
HTML5 strategy.
|
| 99 |
+
|
| 100 |
+
Particularly useful for header parameters which might contain
|
| 101 |
+
non-ASCII values, like file names. This follows the `HTML5 Working Draft
|
| 102 |
+
Section 4.10.22.7`_ and matches the behavior of curl and modern browsers.
|
| 103 |
+
|
| 104 |
+
.. _HTML5 Working Draft Section 4.10.22.7:
|
| 105 |
+
https://w3c.github.io/html/sec-forms.html#multipart-form-data
|
| 106 |
+
|
| 107 |
+
:param name:
|
| 108 |
+
The name of the parameter, a string expected to be ASCII only.
|
| 109 |
+
:param value:
|
| 110 |
+
The value of the parameter, provided as ``bytes`` or `str``.
|
| 111 |
+
:ret:
|
| 112 |
+
A unicode string, stripped of troublesome characters.
|
| 113 |
+
"""
|
| 114 |
+
if isinstance(value, six.binary_type):
|
| 115 |
+
value = value.decode("utf-8")
|
| 116 |
+
|
| 117 |
+
value = _replace_multiple(value, _HTML5_REPLACEMENTS)
|
| 118 |
+
|
| 119 |
+
return u'%s="%s"' % (name, value)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
# For backwards-compatibility.
|
| 123 |
+
format_header_param = format_header_param_html5
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
class RequestField(object):
|
| 127 |
+
"""
|
| 128 |
+
A data container for request body parameters.
|
| 129 |
+
|
| 130 |
+
:param name:
|
| 131 |
+
The name of this request field. Must be unicode.
|
| 132 |
+
:param data:
|
| 133 |
+
The data/value body.
|
| 134 |
+
:param filename:
|
| 135 |
+
An optional filename of the request field. Must be unicode.
|
| 136 |
+
:param headers:
|
| 137 |
+
An optional dict-like object of headers to initially use for the field.
|
| 138 |
+
:param header_formatter:
|
| 139 |
+
An optional callable that is used to encode and format the headers. By
|
| 140 |
+
default, this is :func:`format_header_param_html5`.
|
| 141 |
+
"""
|
| 142 |
+
|
| 143 |
+
def __init__(
|
| 144 |
+
self,
|
| 145 |
+
name,
|
| 146 |
+
data,
|
| 147 |
+
filename=None,
|
| 148 |
+
headers=None,
|
| 149 |
+
header_formatter=format_header_param_html5,
|
| 150 |
+
):
|
| 151 |
+
self._name = name
|
| 152 |
+
self._filename = filename
|
| 153 |
+
self.data = data
|
| 154 |
+
self.headers = {}
|
| 155 |
+
if headers:
|
| 156 |
+
self.headers = dict(headers)
|
| 157 |
+
self.header_formatter = header_formatter
|
| 158 |
+
|
| 159 |
+
@classmethod
|
| 160 |
+
def from_tuples(cls, fieldname, value, header_formatter=format_header_param_html5):
|
| 161 |
+
"""
|
| 162 |
+
A :class:`~urllib3.fields.RequestField` factory from old-style tuple parameters.
|
| 163 |
+
|
| 164 |
+
Supports constructing :class:`~urllib3.fields.RequestField` from
|
| 165 |
+
parameter of key/value strings AND key/filetuple. A filetuple is a
|
| 166 |
+
(filename, data, MIME type) tuple where the MIME type is optional.
|
| 167 |
+
For example::
|
| 168 |
+
|
| 169 |
+
'foo': 'bar',
|
| 170 |
+
'fakefile': ('foofile.txt', 'contents of foofile'),
|
| 171 |
+
'realfile': ('barfile.txt', open('realfile').read()),
|
| 172 |
+
'typedfile': ('bazfile.bin', open('bazfile').read(), 'image/jpeg'),
|
| 173 |
+
'nonamefile': 'contents of nonamefile field',
|
| 174 |
+
|
| 175 |
+
Field names and filenames must be unicode.
|
| 176 |
+
"""
|
| 177 |
+
if isinstance(value, tuple):
|
| 178 |
+
if len(value) == 3:
|
| 179 |
+
filename, data, content_type = value
|
| 180 |
+
else:
|
| 181 |
+
filename, data = value
|
| 182 |
+
content_type = guess_content_type(filename)
|
| 183 |
+
else:
|
| 184 |
+
filename = None
|
| 185 |
+
content_type = None
|
| 186 |
+
data = value
|
| 187 |
+
|
| 188 |
+
request_param = cls(
|
| 189 |
+
fieldname, data, filename=filename, header_formatter=header_formatter
|
| 190 |
+
)
|
| 191 |
+
request_param.make_multipart(content_type=content_type)
|
| 192 |
+
|
| 193 |
+
return request_param
|
| 194 |
+
|
| 195 |
+
def _render_part(self, name, value):
|
| 196 |
+
"""
|
| 197 |
+
Overridable helper function to format a single header parameter. By
|
| 198 |
+
default, this calls ``self.header_formatter``.
|
| 199 |
+
|
| 200 |
+
:param name:
|
| 201 |
+
The name of the parameter, a string expected to be ASCII only.
|
| 202 |
+
:param value:
|
| 203 |
+
The value of the parameter, provided as a unicode string.
|
| 204 |
+
"""
|
| 205 |
+
|
| 206 |
+
return self.header_formatter(name, value)
|
| 207 |
+
|
| 208 |
+
def _render_parts(self, header_parts):
|
| 209 |
+
"""
|
| 210 |
+
Helper function to format and quote a single header.
|
| 211 |
+
|
| 212 |
+
Useful for single headers that are composed of multiple items. E.g.,
|
| 213 |
+
'Content-Disposition' fields.
|
| 214 |
+
|
| 215 |
+
:param header_parts:
|
| 216 |
+
A sequence of (k, v) tuples or a :class:`dict` of (k, v) to format
|
| 217 |
+
as `k1="v1"; k2="v2"; ...`.
|
| 218 |
+
"""
|
| 219 |
+
parts = []
|
| 220 |
+
iterable = header_parts
|
| 221 |
+
if isinstance(header_parts, dict):
|
| 222 |
+
iterable = header_parts.items()
|
| 223 |
+
|
| 224 |
+
for name, value in iterable:
|
| 225 |
+
if value is not None:
|
| 226 |
+
parts.append(self._render_part(name, value))
|
| 227 |
+
|
| 228 |
+
return u"; ".join(parts)
|
| 229 |
+
|
| 230 |
+
def render_headers(self):
|
| 231 |
+
"""
|
| 232 |
+
Renders the headers for this request field.
|
| 233 |
+
"""
|
| 234 |
+
lines = []
|
| 235 |
+
|
| 236 |
+
sort_keys = ["Content-Disposition", "Content-Type", "Content-Location"]
|
| 237 |
+
for sort_key in sort_keys:
|
| 238 |
+
if self.headers.get(sort_key, False):
|
| 239 |
+
lines.append(u"%s: %s" % (sort_key, self.headers[sort_key]))
|
| 240 |
+
|
| 241 |
+
for header_name, header_value in self.headers.items():
|
| 242 |
+
if header_name not in sort_keys:
|
| 243 |
+
if header_value:
|
| 244 |
+
lines.append(u"%s: %s" % (header_name, header_value))
|
| 245 |
+
|
| 246 |
+
lines.append(u"\r\n")
|
| 247 |
+
return u"\r\n".join(lines)
|
| 248 |
+
|
| 249 |
+
def make_multipart(
|
| 250 |
+
self, content_disposition=None, content_type=None, content_location=None
|
| 251 |
+
):
|
| 252 |
+
"""
|
| 253 |
+
Makes this request field into a multipart request field.
|
| 254 |
+
|
| 255 |
+
This method overrides "Content-Disposition", "Content-Type" and
|
| 256 |
+
"Content-Location" headers to the request parameter.
|
| 257 |
+
|
| 258 |
+
:param content_type:
|
| 259 |
+
The 'Content-Type' of the request body.
|
| 260 |
+
:param content_location:
|
| 261 |
+
The 'Content-Location' of the request body.
|
| 262 |
+
|
| 263 |
+
"""
|
| 264 |
+
self.headers["Content-Disposition"] = content_disposition or u"form-data"
|
| 265 |
+
self.headers["Content-Disposition"] += u"; ".join(
|
| 266 |
+
[
|
| 267 |
+
u"",
|
| 268 |
+
self._render_parts(
|
| 269 |
+
((u"name", self._name), (u"filename", self._filename))
|
| 270 |
+
),
|
| 271 |
+
]
|
| 272 |
+
)
|
| 273 |
+
self.headers["Content-Type"] = content_type
|
| 274 |
+
self.headers["Content-Location"] = content_location
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/filepost.py
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import binascii
|
| 4 |
+
import codecs
|
| 5 |
+
import os
|
| 6 |
+
from io import BytesIO
|
| 7 |
+
|
| 8 |
+
from .fields import RequestField
|
| 9 |
+
from .packages import six
|
| 10 |
+
from .packages.six import b
|
| 11 |
+
|
| 12 |
+
writer = codecs.lookup("utf-8")[3]
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def choose_boundary():
|
| 16 |
+
"""
|
| 17 |
+
Our embarrassingly-simple replacement for mimetools.choose_boundary.
|
| 18 |
+
"""
|
| 19 |
+
boundary = binascii.hexlify(os.urandom(16))
|
| 20 |
+
if not six.PY2:
|
| 21 |
+
boundary = boundary.decode("ascii")
|
| 22 |
+
return boundary
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def iter_field_objects(fields):
|
| 26 |
+
"""
|
| 27 |
+
Iterate over fields.
|
| 28 |
+
|
| 29 |
+
Supports list of (k, v) tuples and dicts, and lists of
|
| 30 |
+
:class:`~urllib3.fields.RequestField`.
|
| 31 |
+
|
| 32 |
+
"""
|
| 33 |
+
if isinstance(fields, dict):
|
| 34 |
+
i = six.iteritems(fields)
|
| 35 |
+
else:
|
| 36 |
+
i = iter(fields)
|
| 37 |
+
|
| 38 |
+
for field in i:
|
| 39 |
+
if isinstance(field, RequestField):
|
| 40 |
+
yield field
|
| 41 |
+
else:
|
| 42 |
+
yield RequestField.from_tuples(*field)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def iter_fields(fields):
|
| 46 |
+
"""
|
| 47 |
+
.. deprecated:: 1.6
|
| 48 |
+
|
| 49 |
+
Iterate over fields.
|
| 50 |
+
|
| 51 |
+
The addition of :class:`~urllib3.fields.RequestField` makes this function
|
| 52 |
+
obsolete. Instead, use :func:`iter_field_objects`, which returns
|
| 53 |
+
:class:`~urllib3.fields.RequestField` objects.
|
| 54 |
+
|
| 55 |
+
Supports list of (k, v) tuples and dicts.
|
| 56 |
+
"""
|
| 57 |
+
if isinstance(fields, dict):
|
| 58 |
+
return ((k, v) for k, v in six.iteritems(fields))
|
| 59 |
+
|
| 60 |
+
return ((k, v) for k, v in fields)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def encode_multipart_formdata(fields, boundary=None):
|
| 64 |
+
"""
|
| 65 |
+
Encode a dictionary of ``fields`` using the multipart/form-data MIME format.
|
| 66 |
+
|
| 67 |
+
:param fields:
|
| 68 |
+
Dictionary of fields or list of (key, :class:`~urllib3.fields.RequestField`).
|
| 69 |
+
|
| 70 |
+
:param boundary:
|
| 71 |
+
If not specified, then a random boundary will be generated using
|
| 72 |
+
:func:`urllib3.filepost.choose_boundary`.
|
| 73 |
+
"""
|
| 74 |
+
body = BytesIO()
|
| 75 |
+
if boundary is None:
|
| 76 |
+
boundary = choose_boundary()
|
| 77 |
+
|
| 78 |
+
for field in iter_field_objects(fields):
|
| 79 |
+
body.write(b("--%s\r\n" % (boundary)))
|
| 80 |
+
|
| 81 |
+
writer(body).write(field.render_headers())
|
| 82 |
+
data = field.data
|
| 83 |
+
|
| 84 |
+
if isinstance(data, int):
|
| 85 |
+
data = str(data) # Backwards compatibility
|
| 86 |
+
|
| 87 |
+
if isinstance(data, six.text_type):
|
| 88 |
+
writer(body).write(data)
|
| 89 |
+
else:
|
| 90 |
+
body.write(data)
|
| 91 |
+
|
| 92 |
+
body.write(b"\r\n")
|
| 93 |
+
|
| 94 |
+
body.write(b("--%s--\r\n" % (boundary)))
|
| 95 |
+
|
| 96 |
+
content_type = str("multipart/form-data; boundary=%s" % boundary)
|
| 97 |
+
|
| 98 |
+
return body.getvalue(), content_type
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/poolmanager.py
ADDED
|
@@ -0,0 +1,537 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import collections
|
| 4 |
+
import functools
|
| 5 |
+
import logging
|
| 6 |
+
|
| 7 |
+
from ._collections import RecentlyUsedContainer
|
| 8 |
+
from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
|
| 9 |
+
from .exceptions import (
|
| 10 |
+
LocationValueError,
|
| 11 |
+
MaxRetryError,
|
| 12 |
+
ProxySchemeUnknown,
|
| 13 |
+
ProxySchemeUnsupported,
|
| 14 |
+
URLSchemeUnknown,
|
| 15 |
+
)
|
| 16 |
+
from .packages import six
|
| 17 |
+
from .packages.six.moves.urllib.parse import urljoin
|
| 18 |
+
from .request import RequestMethods
|
| 19 |
+
from .util.proxy import connection_requires_http_tunnel
|
| 20 |
+
from .util.retry import Retry
|
| 21 |
+
from .util.url import parse_url
|
| 22 |
+
|
| 23 |
+
__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
log = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
SSL_KEYWORDS = (
|
| 29 |
+
"key_file",
|
| 30 |
+
"cert_file",
|
| 31 |
+
"cert_reqs",
|
| 32 |
+
"ca_certs",
|
| 33 |
+
"ssl_version",
|
| 34 |
+
"ca_cert_dir",
|
| 35 |
+
"ssl_context",
|
| 36 |
+
"key_password",
|
| 37 |
+
"server_hostname",
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
# All known keyword arguments that could be provided to the pool manager, its
|
| 41 |
+
# pools, or the underlying connections. This is used to construct a pool key.
|
| 42 |
+
_key_fields = (
|
| 43 |
+
"key_scheme", # str
|
| 44 |
+
"key_host", # str
|
| 45 |
+
"key_port", # int
|
| 46 |
+
"key_timeout", # int or float or Timeout
|
| 47 |
+
"key_retries", # int or Retry
|
| 48 |
+
"key_strict", # bool
|
| 49 |
+
"key_block", # bool
|
| 50 |
+
"key_source_address", # str
|
| 51 |
+
"key_key_file", # str
|
| 52 |
+
"key_key_password", # str
|
| 53 |
+
"key_cert_file", # str
|
| 54 |
+
"key_cert_reqs", # str
|
| 55 |
+
"key_ca_certs", # str
|
| 56 |
+
"key_ssl_version", # str
|
| 57 |
+
"key_ca_cert_dir", # str
|
| 58 |
+
"key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
|
| 59 |
+
"key_maxsize", # int
|
| 60 |
+
"key_headers", # dict
|
| 61 |
+
"key__proxy", # parsed proxy url
|
| 62 |
+
"key__proxy_headers", # dict
|
| 63 |
+
"key__proxy_config", # class
|
| 64 |
+
"key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples
|
| 65 |
+
"key__socks_options", # dict
|
| 66 |
+
"key_assert_hostname", # bool or string
|
| 67 |
+
"key_assert_fingerprint", # str
|
| 68 |
+
"key_server_hostname", # str
|
| 69 |
+
)
|
| 70 |
+
|
| 71 |
+
#: The namedtuple class used to construct keys for the connection pool.
|
| 72 |
+
#: All custom key schemes should include the fields in this key at a minimum.
|
| 73 |
+
PoolKey = collections.namedtuple("PoolKey", _key_fields)
|
| 74 |
+
|
| 75 |
+
_proxy_config_fields = ("ssl_context", "use_forwarding_for_https")
|
| 76 |
+
ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _default_key_normalizer(key_class, request_context):
|
| 80 |
+
"""
|
| 81 |
+
Create a pool key out of a request context dictionary.
|
| 82 |
+
|
| 83 |
+
According to RFC 3986, both the scheme and host are case-insensitive.
|
| 84 |
+
Therefore, this function normalizes both before constructing the pool
|
| 85 |
+
key for an HTTPS request. If you wish to change this behaviour, provide
|
| 86 |
+
alternate callables to ``key_fn_by_scheme``.
|
| 87 |
+
|
| 88 |
+
:param key_class:
|
| 89 |
+
The class to use when constructing the key. This should be a namedtuple
|
| 90 |
+
with the ``scheme`` and ``host`` keys at a minimum.
|
| 91 |
+
:type key_class: namedtuple
|
| 92 |
+
:param request_context:
|
| 93 |
+
A dictionary-like object that contain the context for a request.
|
| 94 |
+
:type request_context: dict
|
| 95 |
+
|
| 96 |
+
:return: A namedtuple that can be used as a connection pool key.
|
| 97 |
+
:rtype: PoolKey
|
| 98 |
+
"""
|
| 99 |
+
# Since we mutate the dictionary, make a copy first
|
| 100 |
+
context = request_context.copy()
|
| 101 |
+
context["scheme"] = context["scheme"].lower()
|
| 102 |
+
context["host"] = context["host"].lower()
|
| 103 |
+
|
| 104 |
+
# These are both dictionaries and need to be transformed into frozensets
|
| 105 |
+
for key in ("headers", "_proxy_headers", "_socks_options"):
|
| 106 |
+
if key in context and context[key] is not None:
|
| 107 |
+
context[key] = frozenset(context[key].items())
|
| 108 |
+
|
| 109 |
+
# The socket_options key may be a list and needs to be transformed into a
|
| 110 |
+
# tuple.
|
| 111 |
+
socket_opts = context.get("socket_options")
|
| 112 |
+
if socket_opts is not None:
|
| 113 |
+
context["socket_options"] = tuple(socket_opts)
|
| 114 |
+
|
| 115 |
+
# Map the kwargs to the names in the namedtuple - this is necessary since
|
| 116 |
+
# namedtuples can't have fields starting with '_'.
|
| 117 |
+
for key in list(context.keys()):
|
| 118 |
+
context["key_" + key] = context.pop(key)
|
| 119 |
+
|
| 120 |
+
# Default to ``None`` for keys missing from the context
|
| 121 |
+
for field in key_class._fields:
|
| 122 |
+
if field not in context:
|
| 123 |
+
context[field] = None
|
| 124 |
+
|
| 125 |
+
return key_class(**context)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
#: A dictionary that maps a scheme to a callable that creates a pool key.
|
| 129 |
+
#: This can be used to alter the way pool keys are constructed, if desired.
|
| 130 |
+
#: Each PoolManager makes a copy of this dictionary so they can be configured
|
| 131 |
+
#: globally here, or individually on the instance.
|
| 132 |
+
key_fn_by_scheme = {
|
| 133 |
+
"http": functools.partial(_default_key_normalizer, PoolKey),
|
| 134 |
+
"https": functools.partial(_default_key_normalizer, PoolKey),
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
class PoolManager(RequestMethods):
|
| 141 |
+
"""
|
| 142 |
+
Allows for arbitrary requests while transparently keeping track of
|
| 143 |
+
necessary connection pools for you.
|
| 144 |
+
|
| 145 |
+
:param num_pools:
|
| 146 |
+
Number of connection pools to cache before discarding the least
|
| 147 |
+
recently used pool.
|
| 148 |
+
|
| 149 |
+
:param headers:
|
| 150 |
+
Headers to include with all requests, unless other headers are given
|
| 151 |
+
explicitly.
|
| 152 |
+
|
| 153 |
+
:param \\**connection_pool_kw:
|
| 154 |
+
Additional parameters are used to create fresh
|
| 155 |
+
:class:`urllib3.connectionpool.ConnectionPool` instances.
|
| 156 |
+
|
| 157 |
+
Example::
|
| 158 |
+
|
| 159 |
+
>>> manager = PoolManager(num_pools=2)
|
| 160 |
+
>>> r = manager.request('GET', 'http://google.com/')
|
| 161 |
+
>>> r = manager.request('GET', 'http://google.com/mail')
|
| 162 |
+
>>> r = manager.request('GET', 'http://yahoo.com/')
|
| 163 |
+
>>> len(manager.pools)
|
| 164 |
+
2
|
| 165 |
+
|
| 166 |
+
"""
|
| 167 |
+
|
| 168 |
+
proxy = None
|
| 169 |
+
proxy_config = None
|
| 170 |
+
|
| 171 |
+
def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
|
| 172 |
+
RequestMethods.__init__(self, headers)
|
| 173 |
+
self.connection_pool_kw = connection_pool_kw
|
| 174 |
+
self.pools = RecentlyUsedContainer(num_pools)
|
| 175 |
+
|
| 176 |
+
# Locally set the pool classes and keys so other PoolManagers can
|
| 177 |
+
# override them.
|
| 178 |
+
self.pool_classes_by_scheme = pool_classes_by_scheme
|
| 179 |
+
self.key_fn_by_scheme = key_fn_by_scheme.copy()
|
| 180 |
+
|
| 181 |
+
def __enter__(self):
|
| 182 |
+
return self
|
| 183 |
+
|
| 184 |
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
| 185 |
+
self.clear()
|
| 186 |
+
# Return False to re-raise any potential exceptions
|
| 187 |
+
return False
|
| 188 |
+
|
| 189 |
+
def _new_pool(self, scheme, host, port, request_context=None):
|
| 190 |
+
"""
|
| 191 |
+
Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
|
| 192 |
+
any additional pool keyword arguments.
|
| 193 |
+
|
| 194 |
+
If ``request_context`` is provided, it is provided as keyword arguments
|
| 195 |
+
to the pool class used. This method is used to actually create the
|
| 196 |
+
connection pools handed out by :meth:`connection_from_url` and
|
| 197 |
+
companion methods. It is intended to be overridden for customization.
|
| 198 |
+
"""
|
| 199 |
+
pool_cls = self.pool_classes_by_scheme[scheme]
|
| 200 |
+
if request_context is None:
|
| 201 |
+
request_context = self.connection_pool_kw.copy()
|
| 202 |
+
|
| 203 |
+
# Although the context has everything necessary to create the pool,
|
| 204 |
+
# this function has historically only used the scheme, host, and port
|
| 205 |
+
# in the positional args. When an API change is acceptable these can
|
| 206 |
+
# be removed.
|
| 207 |
+
for key in ("scheme", "host", "port"):
|
| 208 |
+
request_context.pop(key, None)
|
| 209 |
+
|
| 210 |
+
if scheme == "http":
|
| 211 |
+
for kw in SSL_KEYWORDS:
|
| 212 |
+
request_context.pop(kw, None)
|
| 213 |
+
|
| 214 |
+
return pool_cls(host, port, **request_context)
|
| 215 |
+
|
| 216 |
+
def clear(self):
|
| 217 |
+
"""
|
| 218 |
+
Empty our store of pools and direct them all to close.
|
| 219 |
+
|
| 220 |
+
This will not affect in-flight connections, but they will not be
|
| 221 |
+
re-used after completion.
|
| 222 |
+
"""
|
| 223 |
+
self.pools.clear()
|
| 224 |
+
|
| 225 |
+
def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
|
| 226 |
+
"""
|
| 227 |
+
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
|
| 228 |
+
|
| 229 |
+
If ``port`` isn't given, it will be derived from the ``scheme`` using
|
| 230 |
+
``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
|
| 231 |
+
provided, it is merged with the instance's ``connection_pool_kw``
|
| 232 |
+
variable and used to create the new connection pool, if one is
|
| 233 |
+
needed.
|
| 234 |
+
"""
|
| 235 |
+
|
| 236 |
+
if not host:
|
| 237 |
+
raise LocationValueError("No host specified.")
|
| 238 |
+
|
| 239 |
+
request_context = self._merge_pool_kwargs(pool_kwargs)
|
| 240 |
+
request_context["scheme"] = scheme or "http"
|
| 241 |
+
if not port:
|
| 242 |
+
port = port_by_scheme.get(request_context["scheme"].lower(), 80)
|
| 243 |
+
request_context["port"] = port
|
| 244 |
+
request_context["host"] = host
|
| 245 |
+
|
| 246 |
+
return self.connection_from_context(request_context)
|
| 247 |
+
|
| 248 |
+
def connection_from_context(self, request_context):
|
| 249 |
+
"""
|
| 250 |
+
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
|
| 251 |
+
|
| 252 |
+
``request_context`` must at least contain the ``scheme`` key and its
|
| 253 |
+
value must be a key in ``key_fn_by_scheme`` instance variable.
|
| 254 |
+
"""
|
| 255 |
+
scheme = request_context["scheme"].lower()
|
| 256 |
+
pool_key_constructor = self.key_fn_by_scheme.get(scheme)
|
| 257 |
+
if not pool_key_constructor:
|
| 258 |
+
raise URLSchemeUnknown(scheme)
|
| 259 |
+
pool_key = pool_key_constructor(request_context)
|
| 260 |
+
|
| 261 |
+
return self.connection_from_pool_key(pool_key, request_context=request_context)
|
| 262 |
+
|
| 263 |
+
def connection_from_pool_key(self, pool_key, request_context=None):
|
| 264 |
+
"""
|
| 265 |
+
Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
|
| 266 |
+
|
| 267 |
+
``pool_key`` should be a namedtuple that only contains immutable
|
| 268 |
+
objects. At a minimum it must have the ``scheme``, ``host``, and
|
| 269 |
+
``port`` fields.
|
| 270 |
+
"""
|
| 271 |
+
with self.pools.lock:
|
| 272 |
+
# If the scheme, host, or port doesn't match existing open
|
| 273 |
+
# connections, open a new ConnectionPool.
|
| 274 |
+
pool = self.pools.get(pool_key)
|
| 275 |
+
if pool:
|
| 276 |
+
return pool
|
| 277 |
+
|
| 278 |
+
# Make a fresh ConnectionPool of the desired type
|
| 279 |
+
scheme = request_context["scheme"]
|
| 280 |
+
host = request_context["host"]
|
| 281 |
+
port = request_context["port"]
|
| 282 |
+
pool = self._new_pool(scheme, host, port, request_context=request_context)
|
| 283 |
+
self.pools[pool_key] = pool
|
| 284 |
+
|
| 285 |
+
return pool
|
| 286 |
+
|
| 287 |
+
def connection_from_url(self, url, pool_kwargs=None):
|
| 288 |
+
"""
|
| 289 |
+
Similar to :func:`urllib3.connectionpool.connection_from_url`.
|
| 290 |
+
|
| 291 |
+
If ``pool_kwargs`` is not provided and a new pool needs to be
|
| 292 |
+
constructed, ``self.connection_pool_kw`` is used to initialize
|
| 293 |
+
the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
|
| 294 |
+
is provided, it is used instead. Note that if a new pool does not
|
| 295 |
+
need to be created for the request, the provided ``pool_kwargs`` are
|
| 296 |
+
not used.
|
| 297 |
+
"""
|
| 298 |
+
u = parse_url(url)
|
| 299 |
+
return self.connection_from_host(
|
| 300 |
+
u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
def _merge_pool_kwargs(self, override):
|
| 304 |
+
"""
|
| 305 |
+
Merge a dictionary of override values for self.connection_pool_kw.
|
| 306 |
+
|
| 307 |
+
This does not modify self.connection_pool_kw and returns a new dict.
|
| 308 |
+
Any keys in the override dictionary with a value of ``None`` are
|
| 309 |
+
removed from the merged dictionary.
|
| 310 |
+
"""
|
| 311 |
+
base_pool_kwargs = self.connection_pool_kw.copy()
|
| 312 |
+
if override:
|
| 313 |
+
for key, value in override.items():
|
| 314 |
+
if value is None:
|
| 315 |
+
try:
|
| 316 |
+
del base_pool_kwargs[key]
|
| 317 |
+
except KeyError:
|
| 318 |
+
pass
|
| 319 |
+
else:
|
| 320 |
+
base_pool_kwargs[key] = value
|
| 321 |
+
return base_pool_kwargs
|
| 322 |
+
|
| 323 |
+
def _proxy_requires_url_absolute_form(self, parsed_url):
|
| 324 |
+
"""
|
| 325 |
+
Indicates if the proxy requires the complete destination URL in the
|
| 326 |
+
request. Normally this is only needed when not using an HTTP CONNECT
|
| 327 |
+
tunnel.
|
| 328 |
+
"""
|
| 329 |
+
if self.proxy is None:
|
| 330 |
+
return False
|
| 331 |
+
|
| 332 |
+
return not connection_requires_http_tunnel(
|
| 333 |
+
self.proxy, self.proxy_config, parsed_url.scheme
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
def _validate_proxy_scheme_url_selection(self, url_scheme):
|
| 337 |
+
"""
|
| 338 |
+
Validates that were not attempting to do TLS in TLS connections on
|
| 339 |
+
Python2 or with unsupported SSL implementations.
|
| 340 |
+
"""
|
| 341 |
+
if self.proxy is None or url_scheme != "https":
|
| 342 |
+
return
|
| 343 |
+
|
| 344 |
+
if self.proxy.scheme != "https":
|
| 345 |
+
return
|
| 346 |
+
|
| 347 |
+
if six.PY2 and not self.proxy_config.use_forwarding_for_https:
|
| 348 |
+
raise ProxySchemeUnsupported(
|
| 349 |
+
"Contacting HTTPS destinations through HTTPS proxies "
|
| 350 |
+
"'via CONNECT tunnels' is not supported in Python 2"
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
def urlopen(self, method, url, redirect=True, **kw):
|
| 354 |
+
"""
|
| 355 |
+
Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
|
| 356 |
+
with custom cross-host redirect logic and only sends the request-uri
|
| 357 |
+
portion of the ``url``.
|
| 358 |
+
|
| 359 |
+
The given ``url`` parameter must be absolute, such that an appropriate
|
| 360 |
+
:class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
|
| 361 |
+
"""
|
| 362 |
+
u = parse_url(url)
|
| 363 |
+
self._validate_proxy_scheme_url_selection(u.scheme)
|
| 364 |
+
|
| 365 |
+
conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
|
| 366 |
+
|
| 367 |
+
kw["assert_same_host"] = False
|
| 368 |
+
kw["redirect"] = False
|
| 369 |
+
|
| 370 |
+
if "headers" not in kw:
|
| 371 |
+
kw["headers"] = self.headers.copy()
|
| 372 |
+
|
| 373 |
+
if self._proxy_requires_url_absolute_form(u):
|
| 374 |
+
response = conn.urlopen(method, url, **kw)
|
| 375 |
+
else:
|
| 376 |
+
response = conn.urlopen(method, u.request_uri, **kw)
|
| 377 |
+
|
| 378 |
+
redirect_location = redirect and response.get_redirect_location()
|
| 379 |
+
if not redirect_location:
|
| 380 |
+
return response
|
| 381 |
+
|
| 382 |
+
# Support relative URLs for redirecting.
|
| 383 |
+
redirect_location = urljoin(url, redirect_location)
|
| 384 |
+
|
| 385 |
+
# RFC 7231, Section 6.4.4
|
| 386 |
+
if response.status == 303:
|
| 387 |
+
method = "GET"
|
| 388 |
+
|
| 389 |
+
retries = kw.get("retries")
|
| 390 |
+
if not isinstance(retries, Retry):
|
| 391 |
+
retries = Retry.from_int(retries, redirect=redirect)
|
| 392 |
+
|
| 393 |
+
# Strip headers marked as unsafe to forward to the redirected location.
|
| 394 |
+
# Check remove_headers_on_redirect to avoid a potential network call within
|
| 395 |
+
# conn.is_same_host() which may use socket.gethostbyname() in the future.
|
| 396 |
+
if retries.remove_headers_on_redirect and not conn.is_same_host(
|
| 397 |
+
redirect_location
|
| 398 |
+
):
|
| 399 |
+
headers = list(six.iterkeys(kw["headers"]))
|
| 400 |
+
for header in headers:
|
| 401 |
+
if header.lower() in retries.remove_headers_on_redirect:
|
| 402 |
+
kw["headers"].pop(header, None)
|
| 403 |
+
|
| 404 |
+
try:
|
| 405 |
+
retries = retries.increment(method, url, response=response, _pool=conn)
|
| 406 |
+
except MaxRetryError:
|
| 407 |
+
if retries.raise_on_redirect:
|
| 408 |
+
response.drain_conn()
|
| 409 |
+
raise
|
| 410 |
+
return response
|
| 411 |
+
|
| 412 |
+
kw["retries"] = retries
|
| 413 |
+
kw["redirect"] = redirect
|
| 414 |
+
|
| 415 |
+
log.info("Redirecting %s -> %s", url, redirect_location)
|
| 416 |
+
|
| 417 |
+
response.drain_conn()
|
| 418 |
+
return self.urlopen(method, redirect_location, **kw)
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
class ProxyManager(PoolManager):
|
| 422 |
+
"""
|
| 423 |
+
Behaves just like :class:`PoolManager`, but sends all requests through
|
| 424 |
+
the defined proxy, using the CONNECT method for HTTPS URLs.
|
| 425 |
+
|
| 426 |
+
:param proxy_url:
|
| 427 |
+
The URL of the proxy to be used.
|
| 428 |
+
|
| 429 |
+
:param proxy_headers:
|
| 430 |
+
A dictionary containing headers that will be sent to the proxy. In case
|
| 431 |
+
of HTTP they are being sent with each request, while in the
|
| 432 |
+
HTTPS/CONNECT case they are sent only once. Could be used for proxy
|
| 433 |
+
authentication.
|
| 434 |
+
|
| 435 |
+
:param proxy_ssl_context:
|
| 436 |
+
The proxy SSL context is used to establish the TLS connection to the
|
| 437 |
+
proxy when using HTTPS proxies.
|
| 438 |
+
|
| 439 |
+
:param use_forwarding_for_https:
|
| 440 |
+
(Defaults to False) If set to True will forward requests to the HTTPS
|
| 441 |
+
proxy to be made on behalf of the client instead of creating a TLS
|
| 442 |
+
tunnel via the CONNECT method. **Enabling this flag means that request
|
| 443 |
+
and response headers and content will be visible from the HTTPS proxy**
|
| 444 |
+
whereas tunneling keeps request and response headers and content
|
| 445 |
+
private. IP address, target hostname, SNI, and port are always visible
|
| 446 |
+
to an HTTPS proxy even when this flag is disabled.
|
| 447 |
+
|
| 448 |
+
Example:
|
| 449 |
+
>>> proxy = urllib3.ProxyManager('http://localhost:3128/')
|
| 450 |
+
>>> r1 = proxy.request('GET', 'http://google.com/')
|
| 451 |
+
>>> r2 = proxy.request('GET', 'http://httpbin.org/')
|
| 452 |
+
>>> len(proxy.pools)
|
| 453 |
+
1
|
| 454 |
+
>>> r3 = proxy.request('GET', 'https://httpbin.org/')
|
| 455 |
+
>>> r4 = proxy.request('GET', 'https://twitter.com/')
|
| 456 |
+
>>> len(proxy.pools)
|
| 457 |
+
3
|
| 458 |
+
|
| 459 |
+
"""
|
| 460 |
+
|
| 461 |
+
def __init__(
|
| 462 |
+
self,
|
| 463 |
+
proxy_url,
|
| 464 |
+
num_pools=10,
|
| 465 |
+
headers=None,
|
| 466 |
+
proxy_headers=None,
|
| 467 |
+
proxy_ssl_context=None,
|
| 468 |
+
use_forwarding_for_https=False,
|
| 469 |
+
**connection_pool_kw
|
| 470 |
+
):
|
| 471 |
+
|
| 472 |
+
if isinstance(proxy_url, HTTPConnectionPool):
|
| 473 |
+
proxy_url = "%s://%s:%i" % (
|
| 474 |
+
proxy_url.scheme,
|
| 475 |
+
proxy_url.host,
|
| 476 |
+
proxy_url.port,
|
| 477 |
+
)
|
| 478 |
+
proxy = parse_url(proxy_url)
|
| 479 |
+
|
| 480 |
+
if proxy.scheme not in ("http", "https"):
|
| 481 |
+
raise ProxySchemeUnknown(proxy.scheme)
|
| 482 |
+
|
| 483 |
+
if not proxy.port:
|
| 484 |
+
port = port_by_scheme.get(proxy.scheme, 80)
|
| 485 |
+
proxy = proxy._replace(port=port)
|
| 486 |
+
|
| 487 |
+
self.proxy = proxy
|
| 488 |
+
self.proxy_headers = proxy_headers or {}
|
| 489 |
+
self.proxy_ssl_context = proxy_ssl_context
|
| 490 |
+
self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
|
| 491 |
+
|
| 492 |
+
connection_pool_kw["_proxy"] = self.proxy
|
| 493 |
+
connection_pool_kw["_proxy_headers"] = self.proxy_headers
|
| 494 |
+
connection_pool_kw["_proxy_config"] = self.proxy_config
|
| 495 |
+
|
| 496 |
+
super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
|
| 497 |
+
|
| 498 |
+
def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
|
| 499 |
+
if scheme == "https":
|
| 500 |
+
return super(ProxyManager, self).connection_from_host(
|
| 501 |
+
host, port, scheme, pool_kwargs=pool_kwargs
|
| 502 |
+
)
|
| 503 |
+
|
| 504 |
+
return super(ProxyManager, self).connection_from_host(
|
| 505 |
+
self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
|
| 506 |
+
)
|
| 507 |
+
|
| 508 |
+
def _set_proxy_headers(self, url, headers=None):
|
| 509 |
+
"""
|
| 510 |
+
Sets headers needed by proxies: specifically, the Accept and Host
|
| 511 |
+
headers. Only sets headers not provided by the user.
|
| 512 |
+
"""
|
| 513 |
+
headers_ = {"Accept": "*/*"}
|
| 514 |
+
|
| 515 |
+
netloc = parse_url(url).netloc
|
| 516 |
+
if netloc:
|
| 517 |
+
headers_["Host"] = netloc
|
| 518 |
+
|
| 519 |
+
if headers:
|
| 520 |
+
headers_.update(headers)
|
| 521 |
+
return headers_
|
| 522 |
+
|
| 523 |
+
def urlopen(self, method, url, redirect=True, **kw):
|
| 524 |
+
"Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
|
| 525 |
+
u = parse_url(url)
|
| 526 |
+
if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
|
| 527 |
+
# For connections using HTTP CONNECT, httplib sets the necessary
|
| 528 |
+
# headers on the CONNECT to the proxy. If we're not using CONNECT,
|
| 529 |
+
# we'll definitely need to set 'Host' at the very least.
|
| 530 |
+
headers = kw.get("headers", self.headers)
|
| 531 |
+
kw["headers"] = self._set_proxy_headers(url, headers)
|
| 532 |
+
|
| 533 |
+
return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
|
| 534 |
+
|
| 535 |
+
|
| 536 |
+
def proxy_from_url(url, **kw):
|
| 537 |
+
return ProxyManager(proxy_url=url, **kw)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/request.py
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
|
| 5 |
+
from .filepost import encode_multipart_formdata
|
| 6 |
+
from .packages import six
|
| 7 |
+
from .packages.six.moves.urllib.parse import urlencode
|
| 8 |
+
|
| 9 |
+
__all__ = ["RequestMethods"]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RequestMethods(object):
|
| 13 |
+
"""
|
| 14 |
+
Convenience mixin for classes who implement a :meth:`urlopen` method, such
|
| 15 |
+
as :class:`urllib3.HTTPConnectionPool` and
|
| 16 |
+
:class:`urllib3.PoolManager`.
|
| 17 |
+
|
| 18 |
+
Provides behavior for making common types of HTTP request methods and
|
| 19 |
+
decides which type of request field encoding to use.
|
| 20 |
+
|
| 21 |
+
Specifically,
|
| 22 |
+
|
| 23 |
+
:meth:`.request_encode_url` is for sending requests whose fields are
|
| 24 |
+
encoded in the URL (such as GET, HEAD, DELETE).
|
| 25 |
+
|
| 26 |
+
:meth:`.request_encode_body` is for sending requests whose fields are
|
| 27 |
+
encoded in the *body* of the request using multipart or www-form-urlencoded
|
| 28 |
+
(such as for POST, PUT, PATCH).
|
| 29 |
+
|
| 30 |
+
:meth:`.request` is for making any kind of request, it will look up the
|
| 31 |
+
appropriate encoding format and use one of the above two methods to make
|
| 32 |
+
the request.
|
| 33 |
+
|
| 34 |
+
Initializer parameters:
|
| 35 |
+
|
| 36 |
+
:param headers:
|
| 37 |
+
Headers to include with all requests, unless other headers are given
|
| 38 |
+
explicitly.
|
| 39 |
+
"""
|
| 40 |
+
|
| 41 |
+
_encode_url_methods = {"DELETE", "GET", "HEAD", "OPTIONS"}
|
| 42 |
+
|
| 43 |
+
def __init__(self, headers=None):
|
| 44 |
+
self.headers = headers or {}
|
| 45 |
+
|
| 46 |
+
def urlopen(
|
| 47 |
+
self,
|
| 48 |
+
method,
|
| 49 |
+
url,
|
| 50 |
+
body=None,
|
| 51 |
+
headers=None,
|
| 52 |
+
encode_multipart=True,
|
| 53 |
+
multipart_boundary=None,
|
| 54 |
+
**kw
|
| 55 |
+
): # Abstract
|
| 56 |
+
raise NotImplementedError(
|
| 57 |
+
"Classes extending RequestMethods must implement "
|
| 58 |
+
"their own ``urlopen`` method."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
def request(self, method, url, fields=None, headers=None, **urlopen_kw):
|
| 62 |
+
"""
|
| 63 |
+
Make a request using :meth:`urlopen` with the appropriate encoding of
|
| 64 |
+
``fields`` based on the ``method`` used.
|
| 65 |
+
|
| 66 |
+
This is a convenience method that requires the least amount of manual
|
| 67 |
+
effort. It can be used in most situations, while still having the
|
| 68 |
+
option to drop down to more specific methods when necessary, such as
|
| 69 |
+
:meth:`request_encode_url`, :meth:`request_encode_body`,
|
| 70 |
+
or even the lowest level :meth:`urlopen`.
|
| 71 |
+
"""
|
| 72 |
+
method = method.upper()
|
| 73 |
+
|
| 74 |
+
urlopen_kw["request_url"] = url
|
| 75 |
+
|
| 76 |
+
if method in self._encode_url_methods:
|
| 77 |
+
return self.request_encode_url(
|
| 78 |
+
method, url, fields=fields, headers=headers, **urlopen_kw
|
| 79 |
+
)
|
| 80 |
+
else:
|
| 81 |
+
return self.request_encode_body(
|
| 82 |
+
method, url, fields=fields, headers=headers, **urlopen_kw
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
def request_encode_url(self, method, url, fields=None, headers=None, **urlopen_kw):
|
| 86 |
+
"""
|
| 87 |
+
Make a request using :meth:`urlopen` with the ``fields`` encoded in
|
| 88 |
+
the url. This is useful for request methods like GET, HEAD, DELETE, etc.
|
| 89 |
+
"""
|
| 90 |
+
if headers is None:
|
| 91 |
+
headers = self.headers
|
| 92 |
+
|
| 93 |
+
extra_kw = {"headers": headers}
|
| 94 |
+
extra_kw.update(urlopen_kw)
|
| 95 |
+
|
| 96 |
+
if fields:
|
| 97 |
+
url += "?" + urlencode(fields)
|
| 98 |
+
|
| 99 |
+
return self.urlopen(method, url, **extra_kw)
|
| 100 |
+
|
| 101 |
+
def request_encode_body(
|
| 102 |
+
self,
|
| 103 |
+
method,
|
| 104 |
+
url,
|
| 105 |
+
fields=None,
|
| 106 |
+
headers=None,
|
| 107 |
+
encode_multipart=True,
|
| 108 |
+
multipart_boundary=None,
|
| 109 |
+
**urlopen_kw
|
| 110 |
+
):
|
| 111 |
+
"""
|
| 112 |
+
Make a request using :meth:`urlopen` with the ``fields`` encoded in
|
| 113 |
+
the body. This is useful for request methods like POST, PUT, PATCH, etc.
|
| 114 |
+
|
| 115 |
+
When ``encode_multipart=True`` (default), then
|
| 116 |
+
:func:`urllib3.encode_multipart_formdata` is used to encode
|
| 117 |
+
the payload with the appropriate content type. Otherwise
|
| 118 |
+
:func:`urllib.parse.urlencode` is used with the
|
| 119 |
+
'application/x-www-form-urlencoded' content type.
|
| 120 |
+
|
| 121 |
+
Multipart encoding must be used when posting files, and it's reasonably
|
| 122 |
+
safe to use it in other times too. However, it may break request
|
| 123 |
+
signing, such as with OAuth.
|
| 124 |
+
|
| 125 |
+
Supports an optional ``fields`` parameter of key/value strings AND
|
| 126 |
+
key/filetuple. A filetuple is a (filename, data, MIME type) tuple where
|
| 127 |
+
the MIME type is optional. For example::
|
| 128 |
+
|
| 129 |
+
fields = {
|
| 130 |
+
'foo': 'bar',
|
| 131 |
+
'fakefile': ('foofile.txt', 'contents of foofile'),
|
| 132 |
+
'realfile': ('barfile.txt', open('realfile').read()),
|
| 133 |
+
'typedfile': ('bazfile.bin', open('bazfile').read(),
|
| 134 |
+
'image/jpeg'),
|
| 135 |
+
'nonamefile': 'contents of nonamefile field',
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
When uploading a file, providing a filename (the first parameter of the
|
| 139 |
+
tuple) is optional but recommended to best mimic behavior of browsers.
|
| 140 |
+
|
| 141 |
+
Note that if ``headers`` are supplied, the 'Content-Type' header will
|
| 142 |
+
be overwritten because it depends on the dynamic random boundary string
|
| 143 |
+
which is used to compose the body of the request. The random boundary
|
| 144 |
+
string can be explicitly set with the ``multipart_boundary`` parameter.
|
| 145 |
+
"""
|
| 146 |
+
if headers is None:
|
| 147 |
+
headers = self.headers
|
| 148 |
+
|
| 149 |
+
extra_kw = {"headers": {}}
|
| 150 |
+
|
| 151 |
+
if fields:
|
| 152 |
+
if "body" in urlopen_kw:
|
| 153 |
+
raise TypeError(
|
| 154 |
+
"request got values for both 'fields' and 'body', can only specify one."
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
if encode_multipart:
|
| 158 |
+
body, content_type = encode_multipart_formdata(
|
| 159 |
+
fields, boundary=multipart_boundary
|
| 160 |
+
)
|
| 161 |
+
else:
|
| 162 |
+
body, content_type = (
|
| 163 |
+
urlencode(fields),
|
| 164 |
+
"application/x-www-form-urlencoded",
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
extra_kw["body"] = body
|
| 168 |
+
extra_kw["headers"] = {"Content-Type": content_type}
|
| 169 |
+
|
| 170 |
+
extra_kw["headers"].update(headers)
|
| 171 |
+
extra_kw.update(urlopen_kw)
|
| 172 |
+
|
| 173 |
+
return self.urlopen(method, url, **extra_kw)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
if not six.PY2:
|
| 177 |
+
|
| 178 |
+
class RequestModule(sys.modules[__name__].__class__):
|
| 179 |
+
def __call__(self, *args, **kwargs):
|
| 180 |
+
"""
|
| 181 |
+
If user tries to call this module directly urllib3 v2.x style raise an error to the user
|
| 182 |
+
suggesting they may need urllib3 v2
|
| 183 |
+
"""
|
| 184 |
+
raise TypeError(
|
| 185 |
+
"'module' object is not callable\n"
|
| 186 |
+
"urllib3.request() method is not supported in this release, "
|
| 187 |
+
"upgrade to urllib3 v2 to use it\n"
|
| 188 |
+
"see https://urllib3.readthedocs.io/en/stable/v2-migration-guide.html"
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
sys.modules[__name__].__class__ = RequestModule
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/response.py
ADDED
|
@@ -0,0 +1,879 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import logging
|
| 5 |
+
import sys
|
| 6 |
+
import warnings
|
| 7 |
+
import zlib
|
| 8 |
+
from contextlib import contextmanager
|
| 9 |
+
from socket import error as SocketError
|
| 10 |
+
from socket import timeout as SocketTimeout
|
| 11 |
+
|
| 12 |
+
brotli = None
|
| 13 |
+
|
| 14 |
+
from . import util
|
| 15 |
+
from ._collections import HTTPHeaderDict
|
| 16 |
+
from .connection import BaseSSLError, HTTPException
|
| 17 |
+
from .exceptions import (
|
| 18 |
+
BodyNotHttplibCompatible,
|
| 19 |
+
DecodeError,
|
| 20 |
+
HTTPError,
|
| 21 |
+
IncompleteRead,
|
| 22 |
+
InvalidChunkLength,
|
| 23 |
+
InvalidHeader,
|
| 24 |
+
ProtocolError,
|
| 25 |
+
ReadTimeoutError,
|
| 26 |
+
ResponseNotChunked,
|
| 27 |
+
SSLError,
|
| 28 |
+
)
|
| 29 |
+
from .packages import six
|
| 30 |
+
from .util.response import is_fp_closed, is_response_to_head
|
| 31 |
+
|
| 32 |
+
log = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class DeflateDecoder(object):
|
| 36 |
+
def __init__(self):
|
| 37 |
+
self._first_try = True
|
| 38 |
+
self._data = b""
|
| 39 |
+
self._obj = zlib.decompressobj()
|
| 40 |
+
|
| 41 |
+
def __getattr__(self, name):
|
| 42 |
+
return getattr(self._obj, name)
|
| 43 |
+
|
| 44 |
+
def decompress(self, data):
|
| 45 |
+
if not data:
|
| 46 |
+
return data
|
| 47 |
+
|
| 48 |
+
if not self._first_try:
|
| 49 |
+
return self._obj.decompress(data)
|
| 50 |
+
|
| 51 |
+
self._data += data
|
| 52 |
+
try:
|
| 53 |
+
decompressed = self._obj.decompress(data)
|
| 54 |
+
if decompressed:
|
| 55 |
+
self._first_try = False
|
| 56 |
+
self._data = None
|
| 57 |
+
return decompressed
|
| 58 |
+
except zlib.error:
|
| 59 |
+
self._first_try = False
|
| 60 |
+
self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
|
| 61 |
+
try:
|
| 62 |
+
return self.decompress(self._data)
|
| 63 |
+
finally:
|
| 64 |
+
self._data = None
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class GzipDecoderState(object):
|
| 68 |
+
|
| 69 |
+
FIRST_MEMBER = 0
|
| 70 |
+
OTHER_MEMBERS = 1
|
| 71 |
+
SWALLOW_DATA = 2
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
class GzipDecoder(object):
|
| 75 |
+
def __init__(self):
|
| 76 |
+
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
| 77 |
+
self._state = GzipDecoderState.FIRST_MEMBER
|
| 78 |
+
|
| 79 |
+
def __getattr__(self, name):
|
| 80 |
+
return getattr(self._obj, name)
|
| 81 |
+
|
| 82 |
+
def decompress(self, data):
|
| 83 |
+
ret = bytearray()
|
| 84 |
+
if self._state == GzipDecoderState.SWALLOW_DATA or not data:
|
| 85 |
+
return bytes(ret)
|
| 86 |
+
while True:
|
| 87 |
+
try:
|
| 88 |
+
ret += self._obj.decompress(data)
|
| 89 |
+
except zlib.error:
|
| 90 |
+
previous_state = self._state
|
| 91 |
+
# Ignore data after the first error
|
| 92 |
+
self._state = GzipDecoderState.SWALLOW_DATA
|
| 93 |
+
if previous_state == GzipDecoderState.OTHER_MEMBERS:
|
| 94 |
+
# Allow trailing garbage acceptable in other gzip clients
|
| 95 |
+
return bytes(ret)
|
| 96 |
+
raise
|
| 97 |
+
data = self._obj.unused_data
|
| 98 |
+
if not data:
|
| 99 |
+
return bytes(ret)
|
| 100 |
+
self._state = GzipDecoderState.OTHER_MEMBERS
|
| 101 |
+
self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if brotli is not None:
|
| 105 |
+
|
| 106 |
+
class BrotliDecoder(object):
|
| 107 |
+
# Supports both 'brotlipy' and 'Brotli' packages
|
| 108 |
+
# since they share an import name. The top branches
|
| 109 |
+
# are for 'brotlipy' and bottom branches for 'Brotli'
|
| 110 |
+
def __init__(self):
|
| 111 |
+
self._obj = brotli.Decompressor()
|
| 112 |
+
if hasattr(self._obj, "decompress"):
|
| 113 |
+
self.decompress = self._obj.decompress
|
| 114 |
+
else:
|
| 115 |
+
self.decompress = self._obj.process
|
| 116 |
+
|
| 117 |
+
def flush(self):
|
| 118 |
+
if hasattr(self._obj, "flush"):
|
| 119 |
+
return self._obj.flush()
|
| 120 |
+
return b""
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
class MultiDecoder(object):
|
| 124 |
+
"""
|
| 125 |
+
From RFC7231:
|
| 126 |
+
If one or more encodings have been applied to a representation, the
|
| 127 |
+
sender that applied the encodings MUST generate a Content-Encoding
|
| 128 |
+
header field that lists the content codings in the order in which
|
| 129 |
+
they were applied.
|
| 130 |
+
"""
|
| 131 |
+
|
| 132 |
+
def __init__(self, modes):
|
| 133 |
+
self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
|
| 134 |
+
|
| 135 |
+
def flush(self):
|
| 136 |
+
return self._decoders[0].flush()
|
| 137 |
+
|
| 138 |
+
def decompress(self, data):
|
| 139 |
+
for d in reversed(self._decoders):
|
| 140 |
+
data = d.decompress(data)
|
| 141 |
+
return data
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def _get_decoder(mode):
|
| 145 |
+
if "," in mode:
|
| 146 |
+
return MultiDecoder(mode)
|
| 147 |
+
|
| 148 |
+
if mode == "gzip":
|
| 149 |
+
return GzipDecoder()
|
| 150 |
+
|
| 151 |
+
if brotli is not None and mode == "br":
|
| 152 |
+
return BrotliDecoder()
|
| 153 |
+
|
| 154 |
+
return DeflateDecoder()
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
class HTTPResponse(io.IOBase):
|
| 158 |
+
"""
|
| 159 |
+
HTTP Response container.
|
| 160 |
+
|
| 161 |
+
Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
|
| 162 |
+
loaded and decoded on-demand when the ``data`` property is accessed. This
|
| 163 |
+
class is also compatible with the Python standard library's :mod:`io`
|
| 164 |
+
module, and can hence be treated as a readable object in the context of that
|
| 165 |
+
framework.
|
| 166 |
+
|
| 167 |
+
Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
|
| 168 |
+
|
| 169 |
+
:param preload_content:
|
| 170 |
+
If True, the response's body will be preloaded during construction.
|
| 171 |
+
|
| 172 |
+
:param decode_content:
|
| 173 |
+
If True, will attempt to decode the body based on the
|
| 174 |
+
'content-encoding' header.
|
| 175 |
+
|
| 176 |
+
:param original_response:
|
| 177 |
+
When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
|
| 178 |
+
object, it's convenient to include the original for debug purposes. It's
|
| 179 |
+
otherwise unused.
|
| 180 |
+
|
| 181 |
+
:param retries:
|
| 182 |
+
The retries contains the last :class:`~urllib3.util.retry.Retry` that
|
| 183 |
+
was used during the request.
|
| 184 |
+
|
| 185 |
+
:param enforce_content_length:
|
| 186 |
+
Enforce content length checking. Body returned by server must match
|
| 187 |
+
value of Content-Length header, if present. Otherwise, raise error.
|
| 188 |
+
"""
|
| 189 |
+
|
| 190 |
+
CONTENT_DECODERS = ["gzip", "deflate"]
|
| 191 |
+
if brotli is not None:
|
| 192 |
+
CONTENT_DECODERS += ["br"]
|
| 193 |
+
REDIRECT_STATUSES = [301, 302, 303, 307, 308]
|
| 194 |
+
|
| 195 |
+
def __init__(
|
| 196 |
+
self,
|
| 197 |
+
body="",
|
| 198 |
+
headers=None,
|
| 199 |
+
status=0,
|
| 200 |
+
version=0,
|
| 201 |
+
reason=None,
|
| 202 |
+
strict=0,
|
| 203 |
+
preload_content=True,
|
| 204 |
+
decode_content=True,
|
| 205 |
+
original_response=None,
|
| 206 |
+
pool=None,
|
| 207 |
+
connection=None,
|
| 208 |
+
msg=None,
|
| 209 |
+
retries=None,
|
| 210 |
+
enforce_content_length=False,
|
| 211 |
+
request_method=None,
|
| 212 |
+
request_url=None,
|
| 213 |
+
auto_close=True,
|
| 214 |
+
):
|
| 215 |
+
|
| 216 |
+
if isinstance(headers, HTTPHeaderDict):
|
| 217 |
+
self.headers = headers
|
| 218 |
+
else:
|
| 219 |
+
self.headers = HTTPHeaderDict(headers)
|
| 220 |
+
self.status = status
|
| 221 |
+
self.version = version
|
| 222 |
+
self.reason = reason
|
| 223 |
+
self.strict = strict
|
| 224 |
+
self.decode_content = decode_content
|
| 225 |
+
self.retries = retries
|
| 226 |
+
self.enforce_content_length = enforce_content_length
|
| 227 |
+
self.auto_close = auto_close
|
| 228 |
+
|
| 229 |
+
self._decoder = None
|
| 230 |
+
self._body = None
|
| 231 |
+
self._fp = None
|
| 232 |
+
self._original_response = original_response
|
| 233 |
+
self._fp_bytes_read = 0
|
| 234 |
+
self.msg = msg
|
| 235 |
+
self._request_url = request_url
|
| 236 |
+
|
| 237 |
+
if body and isinstance(body, (six.string_types, bytes)):
|
| 238 |
+
self._body = body
|
| 239 |
+
|
| 240 |
+
self._pool = pool
|
| 241 |
+
self._connection = connection
|
| 242 |
+
|
| 243 |
+
if hasattr(body, "read"):
|
| 244 |
+
self._fp = body
|
| 245 |
+
|
| 246 |
+
# Are we using the chunked-style of transfer encoding?
|
| 247 |
+
self.chunked = False
|
| 248 |
+
self.chunk_left = None
|
| 249 |
+
tr_enc = self.headers.get("transfer-encoding", "").lower()
|
| 250 |
+
# Don't incur the penalty of creating a list and then discarding it
|
| 251 |
+
encodings = (enc.strip() for enc in tr_enc.split(","))
|
| 252 |
+
if "chunked" in encodings:
|
| 253 |
+
self.chunked = True
|
| 254 |
+
|
| 255 |
+
# Determine length of response
|
| 256 |
+
self.length_remaining = self._init_length(request_method)
|
| 257 |
+
|
| 258 |
+
# If requested, preload the body.
|
| 259 |
+
if preload_content and not self._body:
|
| 260 |
+
self._body = self.read(decode_content=decode_content)
|
| 261 |
+
|
| 262 |
+
def get_redirect_location(self):
|
| 263 |
+
"""
|
| 264 |
+
Should we redirect and where to?
|
| 265 |
+
|
| 266 |
+
:returns: Truthy redirect location string if we got a redirect status
|
| 267 |
+
code and valid location. ``None`` if redirect status and no
|
| 268 |
+
location. ``False`` if not a redirect status code.
|
| 269 |
+
"""
|
| 270 |
+
if self.status in self.REDIRECT_STATUSES:
|
| 271 |
+
return self.headers.get("location")
|
| 272 |
+
|
| 273 |
+
return False
|
| 274 |
+
|
| 275 |
+
def release_conn(self):
|
| 276 |
+
if not self._pool or not self._connection:
|
| 277 |
+
return
|
| 278 |
+
|
| 279 |
+
self._pool._put_conn(self._connection)
|
| 280 |
+
self._connection = None
|
| 281 |
+
|
| 282 |
+
def drain_conn(self):
|
| 283 |
+
"""
|
| 284 |
+
Read and discard any remaining HTTP response data in the response connection.
|
| 285 |
+
|
| 286 |
+
Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
|
| 287 |
+
"""
|
| 288 |
+
try:
|
| 289 |
+
self.read()
|
| 290 |
+
except (HTTPError, SocketError, BaseSSLError, HTTPException):
|
| 291 |
+
pass
|
| 292 |
+
|
| 293 |
+
@property
|
| 294 |
+
def data(self):
|
| 295 |
+
# For backwards-compat with earlier urllib3 0.4 and earlier.
|
| 296 |
+
if self._body:
|
| 297 |
+
return self._body
|
| 298 |
+
|
| 299 |
+
if self._fp:
|
| 300 |
+
return self.read(cache_content=True)
|
| 301 |
+
|
| 302 |
+
@property
|
| 303 |
+
def connection(self):
|
| 304 |
+
return self._connection
|
| 305 |
+
|
| 306 |
+
def isclosed(self):
|
| 307 |
+
return is_fp_closed(self._fp)
|
| 308 |
+
|
| 309 |
+
def tell(self):
|
| 310 |
+
"""
|
| 311 |
+
Obtain the number of bytes pulled over the wire so far. May differ from
|
| 312 |
+
the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
|
| 313 |
+
if bytes are encoded on the wire (e.g, compressed).
|
| 314 |
+
"""
|
| 315 |
+
return self._fp_bytes_read
|
| 316 |
+
|
| 317 |
+
def _init_length(self, request_method):
|
| 318 |
+
"""
|
| 319 |
+
Set initial length value for Response content if available.
|
| 320 |
+
"""
|
| 321 |
+
length = self.headers.get("content-length")
|
| 322 |
+
|
| 323 |
+
if length is not None:
|
| 324 |
+
if self.chunked:
|
| 325 |
+
# This Response will fail with an IncompleteRead if it can't be
|
| 326 |
+
# received as chunked. This method falls back to attempt reading
|
| 327 |
+
# the response before raising an exception.
|
| 328 |
+
log.warning(
|
| 329 |
+
"Received response with both Content-Length and "
|
| 330 |
+
"Transfer-Encoding set. This is expressly forbidden "
|
| 331 |
+
"by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
|
| 332 |
+
"attempting to process response as Transfer-Encoding: "
|
| 333 |
+
"chunked."
|
| 334 |
+
)
|
| 335 |
+
return None
|
| 336 |
+
|
| 337 |
+
try:
|
| 338 |
+
# RFC 7230 section 3.3.2 specifies multiple content lengths can
|
| 339 |
+
# be sent in a single Content-Length header
|
| 340 |
+
# (e.g. Content-Length: 42, 42). This line ensures the values
|
| 341 |
+
# are all valid ints and that as long as the `set` length is 1,
|
| 342 |
+
# all values are the same. Otherwise, the header is invalid.
|
| 343 |
+
lengths = set([int(val) for val in length.split(",")])
|
| 344 |
+
if len(lengths) > 1:
|
| 345 |
+
raise InvalidHeader(
|
| 346 |
+
"Content-Length contained multiple "
|
| 347 |
+
"unmatching values (%s)" % length
|
| 348 |
+
)
|
| 349 |
+
length = lengths.pop()
|
| 350 |
+
except ValueError:
|
| 351 |
+
length = None
|
| 352 |
+
else:
|
| 353 |
+
if length < 0:
|
| 354 |
+
length = None
|
| 355 |
+
|
| 356 |
+
# Convert status to int for comparison
|
| 357 |
+
# In some cases, httplib returns a status of "_UNKNOWN"
|
| 358 |
+
try:
|
| 359 |
+
status = int(self.status)
|
| 360 |
+
except ValueError:
|
| 361 |
+
status = 0
|
| 362 |
+
|
| 363 |
+
# Check for responses that shouldn't include a body
|
| 364 |
+
if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
|
| 365 |
+
length = 0
|
| 366 |
+
|
| 367 |
+
return length
|
| 368 |
+
|
| 369 |
+
def _init_decoder(self):
|
| 370 |
+
"""
|
| 371 |
+
Set-up the _decoder attribute if necessary.
|
| 372 |
+
"""
|
| 373 |
+
# Note: content-encoding value should be case-insensitive, per RFC 7230
|
| 374 |
+
# Section 3.2
|
| 375 |
+
content_encoding = self.headers.get("content-encoding", "").lower()
|
| 376 |
+
if self._decoder is None:
|
| 377 |
+
if content_encoding in self.CONTENT_DECODERS:
|
| 378 |
+
self._decoder = _get_decoder(content_encoding)
|
| 379 |
+
elif "," in content_encoding:
|
| 380 |
+
encodings = [
|
| 381 |
+
e.strip()
|
| 382 |
+
for e in content_encoding.split(",")
|
| 383 |
+
if e.strip() in self.CONTENT_DECODERS
|
| 384 |
+
]
|
| 385 |
+
if len(encodings):
|
| 386 |
+
self._decoder = _get_decoder(content_encoding)
|
| 387 |
+
|
| 388 |
+
DECODER_ERROR_CLASSES = (IOError, zlib.error)
|
| 389 |
+
if brotli is not None:
|
| 390 |
+
DECODER_ERROR_CLASSES += (brotli.error,)
|
| 391 |
+
|
| 392 |
+
def _decode(self, data, decode_content, flush_decoder):
|
| 393 |
+
"""
|
| 394 |
+
Decode the data passed in and potentially flush the decoder.
|
| 395 |
+
"""
|
| 396 |
+
if not decode_content:
|
| 397 |
+
return data
|
| 398 |
+
|
| 399 |
+
try:
|
| 400 |
+
if self._decoder:
|
| 401 |
+
data = self._decoder.decompress(data)
|
| 402 |
+
except self.DECODER_ERROR_CLASSES as e:
|
| 403 |
+
content_encoding = self.headers.get("content-encoding", "").lower()
|
| 404 |
+
raise DecodeError(
|
| 405 |
+
"Received response with content-encoding: %s, but "
|
| 406 |
+
"failed to decode it." % content_encoding,
|
| 407 |
+
e,
|
| 408 |
+
)
|
| 409 |
+
if flush_decoder:
|
| 410 |
+
data += self._flush_decoder()
|
| 411 |
+
|
| 412 |
+
return data
|
| 413 |
+
|
| 414 |
+
def _flush_decoder(self):
|
| 415 |
+
"""
|
| 416 |
+
Flushes the decoder. Should only be called if the decoder is actually
|
| 417 |
+
being used.
|
| 418 |
+
"""
|
| 419 |
+
if self._decoder:
|
| 420 |
+
buf = self._decoder.decompress(b"")
|
| 421 |
+
return buf + self._decoder.flush()
|
| 422 |
+
|
| 423 |
+
return b""
|
| 424 |
+
|
| 425 |
+
@contextmanager
|
| 426 |
+
def _error_catcher(self):
|
| 427 |
+
"""
|
| 428 |
+
Catch low-level python exceptions, instead re-raising urllib3
|
| 429 |
+
variants, so that low-level exceptions are not leaked in the
|
| 430 |
+
high-level api.
|
| 431 |
+
|
| 432 |
+
On exit, release the connection back to the pool.
|
| 433 |
+
"""
|
| 434 |
+
clean_exit = False
|
| 435 |
+
|
| 436 |
+
try:
|
| 437 |
+
try:
|
| 438 |
+
yield
|
| 439 |
+
|
| 440 |
+
except SocketTimeout:
|
| 441 |
+
# FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
|
| 442 |
+
# there is yet no clean way to get at it from this context.
|
| 443 |
+
raise ReadTimeoutError(self._pool, None, "Read timed out.")
|
| 444 |
+
|
| 445 |
+
except BaseSSLError as e:
|
| 446 |
+
# FIXME: Is there a better way to differentiate between SSLErrors?
|
| 447 |
+
if "read operation timed out" not in str(e):
|
| 448 |
+
# SSL errors related to framing/MAC get wrapped and reraised here
|
| 449 |
+
raise SSLError(e)
|
| 450 |
+
|
| 451 |
+
raise ReadTimeoutError(self._pool, None, "Read timed out.")
|
| 452 |
+
|
| 453 |
+
except (HTTPException, SocketError) as e:
|
| 454 |
+
# This includes IncompleteRead.
|
| 455 |
+
raise ProtocolError("Connection broken: %r" % e, e)
|
| 456 |
+
|
| 457 |
+
# If no exception is thrown, we should avoid cleaning up
|
| 458 |
+
# unnecessarily.
|
| 459 |
+
clean_exit = True
|
| 460 |
+
finally:
|
| 461 |
+
# If we didn't terminate cleanly, we need to throw away our
|
| 462 |
+
# connection.
|
| 463 |
+
if not clean_exit:
|
| 464 |
+
# The response may not be closed but we're not going to use it
|
| 465 |
+
# anymore so close it now to ensure that the connection is
|
| 466 |
+
# released back to the pool.
|
| 467 |
+
if self._original_response:
|
| 468 |
+
self._original_response.close()
|
| 469 |
+
|
| 470 |
+
# Closing the response may not actually be sufficient to close
|
| 471 |
+
# everything, so if we have a hold of the connection close that
|
| 472 |
+
# too.
|
| 473 |
+
if self._connection:
|
| 474 |
+
self._connection.close()
|
| 475 |
+
|
| 476 |
+
# If we hold the original response but it's closed now, we should
|
| 477 |
+
# return the connection back to the pool.
|
| 478 |
+
if self._original_response and self._original_response.isclosed():
|
| 479 |
+
self.release_conn()
|
| 480 |
+
|
| 481 |
+
def _fp_read(self, amt):
|
| 482 |
+
"""
|
| 483 |
+
Read a response with the thought that reading the number of bytes
|
| 484 |
+
larger than can fit in a 32-bit int at a time via SSL in some
|
| 485 |
+
known cases leads to an overflow error that has to be prevented
|
| 486 |
+
if `amt` or `self.length_remaining` indicate that a problem may
|
| 487 |
+
happen.
|
| 488 |
+
|
| 489 |
+
The known cases:
|
| 490 |
+
* 3.8 <= CPython < 3.9.7 because of a bug
|
| 491 |
+
https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
|
| 492 |
+
* urllib3 injected with pyOpenSSL-backed SSL-support.
|
| 493 |
+
* CPython < 3.10 only when `amt` does not fit 32-bit int.
|
| 494 |
+
"""
|
| 495 |
+
assert self._fp
|
| 496 |
+
c_int_max = 2 ** 31 - 1
|
| 497 |
+
if (
|
| 498 |
+
(
|
| 499 |
+
(amt and amt > c_int_max)
|
| 500 |
+
or (self.length_remaining and self.length_remaining > c_int_max)
|
| 501 |
+
)
|
| 502 |
+
and not util.IS_SECURETRANSPORT
|
| 503 |
+
and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
|
| 504 |
+
):
|
| 505 |
+
buffer = io.BytesIO()
|
| 506 |
+
# Besides `max_chunk_amt` being a maximum chunk size, it
|
| 507 |
+
# affects memory overhead of reading a response by this
|
| 508 |
+
# method in CPython.
|
| 509 |
+
# `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
|
| 510 |
+
# chunk size that does not lead to an overflow error, but
|
| 511 |
+
# 256 MiB is a compromise.
|
| 512 |
+
max_chunk_amt = 2 ** 28
|
| 513 |
+
while amt is None or amt != 0:
|
| 514 |
+
if amt is not None:
|
| 515 |
+
chunk_amt = min(amt, max_chunk_amt)
|
| 516 |
+
amt -= chunk_amt
|
| 517 |
+
else:
|
| 518 |
+
chunk_amt = max_chunk_amt
|
| 519 |
+
data = self._fp.read(chunk_amt)
|
| 520 |
+
if not data:
|
| 521 |
+
break
|
| 522 |
+
buffer.write(data)
|
| 523 |
+
del data # to reduce peak memory usage by `max_chunk_amt`.
|
| 524 |
+
return buffer.getvalue()
|
| 525 |
+
else:
|
| 526 |
+
# StringIO doesn't like amt=None
|
| 527 |
+
return self._fp.read(amt) if amt is not None else self._fp.read()
|
| 528 |
+
|
| 529 |
+
def read(self, amt=None, decode_content=None, cache_content=False):
|
| 530 |
+
"""
|
| 531 |
+
Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
|
| 532 |
+
parameters: ``decode_content`` and ``cache_content``.
|
| 533 |
+
|
| 534 |
+
:param amt:
|
| 535 |
+
How much of the content to read. If specified, caching is skipped
|
| 536 |
+
because it doesn't make sense to cache partial content as the full
|
| 537 |
+
response.
|
| 538 |
+
|
| 539 |
+
:param decode_content:
|
| 540 |
+
If True, will attempt to decode the body based on the
|
| 541 |
+
'content-encoding' header.
|
| 542 |
+
|
| 543 |
+
:param cache_content:
|
| 544 |
+
If True, will save the returned data such that the same result is
|
| 545 |
+
returned despite of the state of the underlying file object. This
|
| 546 |
+
is useful if you want the ``.data`` property to continue working
|
| 547 |
+
after having ``.read()`` the file object. (Overridden if ``amt`` is
|
| 548 |
+
set.)
|
| 549 |
+
"""
|
| 550 |
+
self._init_decoder()
|
| 551 |
+
if decode_content is None:
|
| 552 |
+
decode_content = self.decode_content
|
| 553 |
+
|
| 554 |
+
if self._fp is None:
|
| 555 |
+
return
|
| 556 |
+
|
| 557 |
+
flush_decoder = False
|
| 558 |
+
fp_closed = getattr(self._fp, "closed", False)
|
| 559 |
+
|
| 560 |
+
with self._error_catcher():
|
| 561 |
+
data = self._fp_read(amt) if not fp_closed else b""
|
| 562 |
+
if amt is None:
|
| 563 |
+
flush_decoder = True
|
| 564 |
+
else:
|
| 565 |
+
cache_content = False
|
| 566 |
+
if (
|
| 567 |
+
amt != 0 and not data
|
| 568 |
+
): # Platform-specific: Buggy versions of Python.
|
| 569 |
+
# Close the connection when no data is returned
|
| 570 |
+
#
|
| 571 |
+
# This is redundant to what httplib/http.client _should_
|
| 572 |
+
# already do. However, versions of python released before
|
| 573 |
+
# December 15, 2012 (http://bugs.python.org/issue16298) do
|
| 574 |
+
# not properly close the connection in all cases. There is
|
| 575 |
+
# no harm in redundantly calling close.
|
| 576 |
+
self._fp.close()
|
| 577 |
+
flush_decoder = True
|
| 578 |
+
if self.enforce_content_length and self.length_remaining not in (
|
| 579 |
+
0,
|
| 580 |
+
None,
|
| 581 |
+
):
|
| 582 |
+
# This is an edge case that httplib failed to cover due
|
| 583 |
+
# to concerns of backward compatibility. We're
|
| 584 |
+
# addressing it here to make sure IncompleteRead is
|
| 585 |
+
# raised during streaming, so all calls with incorrect
|
| 586 |
+
# Content-Length are caught.
|
| 587 |
+
raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
|
| 588 |
+
|
| 589 |
+
if data:
|
| 590 |
+
self._fp_bytes_read += len(data)
|
| 591 |
+
if self.length_remaining is not None:
|
| 592 |
+
self.length_remaining -= len(data)
|
| 593 |
+
|
| 594 |
+
data = self._decode(data, decode_content, flush_decoder)
|
| 595 |
+
|
| 596 |
+
if cache_content:
|
| 597 |
+
self._body = data
|
| 598 |
+
|
| 599 |
+
return data
|
| 600 |
+
|
| 601 |
+
def stream(self, amt=2 ** 16, decode_content=None):
|
| 602 |
+
"""
|
| 603 |
+
A generator wrapper for the read() method. A call will block until
|
| 604 |
+
``amt`` bytes have been read from the connection or until the
|
| 605 |
+
connection is closed.
|
| 606 |
+
|
| 607 |
+
:param amt:
|
| 608 |
+
How much of the content to read. The generator will return up to
|
| 609 |
+
much data per iteration, but may return less. This is particularly
|
| 610 |
+
likely when using compressed data. However, the empty string will
|
| 611 |
+
never be returned.
|
| 612 |
+
|
| 613 |
+
:param decode_content:
|
| 614 |
+
If True, will attempt to decode the body based on the
|
| 615 |
+
'content-encoding' header.
|
| 616 |
+
"""
|
| 617 |
+
if self.chunked and self.supports_chunked_reads():
|
| 618 |
+
for line in self.read_chunked(amt, decode_content=decode_content):
|
| 619 |
+
yield line
|
| 620 |
+
else:
|
| 621 |
+
while not is_fp_closed(self._fp):
|
| 622 |
+
data = self.read(amt=amt, decode_content=decode_content)
|
| 623 |
+
|
| 624 |
+
if data:
|
| 625 |
+
yield data
|
| 626 |
+
|
| 627 |
+
@classmethod
|
| 628 |
+
def from_httplib(ResponseCls, r, **response_kw):
|
| 629 |
+
"""
|
| 630 |
+
Given an :class:`http.client.HTTPResponse` instance ``r``, return a
|
| 631 |
+
corresponding :class:`urllib3.response.HTTPResponse` object.
|
| 632 |
+
|
| 633 |
+
Remaining parameters are passed to the HTTPResponse constructor, along
|
| 634 |
+
with ``original_response=r``.
|
| 635 |
+
"""
|
| 636 |
+
headers = r.msg
|
| 637 |
+
|
| 638 |
+
if not isinstance(headers, HTTPHeaderDict):
|
| 639 |
+
if six.PY2:
|
| 640 |
+
# Python 2.7
|
| 641 |
+
headers = HTTPHeaderDict.from_httplib(headers)
|
| 642 |
+
else:
|
| 643 |
+
headers = HTTPHeaderDict(headers.items())
|
| 644 |
+
|
| 645 |
+
# HTTPResponse objects in Python 3 don't have a .strict attribute
|
| 646 |
+
strict = getattr(r, "strict", 0)
|
| 647 |
+
resp = ResponseCls(
|
| 648 |
+
body=r,
|
| 649 |
+
headers=headers,
|
| 650 |
+
status=r.status,
|
| 651 |
+
version=r.version,
|
| 652 |
+
reason=r.reason,
|
| 653 |
+
strict=strict,
|
| 654 |
+
original_response=r,
|
| 655 |
+
**response_kw
|
| 656 |
+
)
|
| 657 |
+
return resp
|
| 658 |
+
|
| 659 |
+
# Backwards-compatibility methods for http.client.HTTPResponse
|
| 660 |
+
def getheaders(self):
|
| 661 |
+
warnings.warn(
|
| 662 |
+
"HTTPResponse.getheaders() is deprecated and will be removed "
|
| 663 |
+
"in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
|
| 664 |
+
category=DeprecationWarning,
|
| 665 |
+
stacklevel=2,
|
| 666 |
+
)
|
| 667 |
+
return self.headers
|
| 668 |
+
|
| 669 |
+
def getheader(self, name, default=None):
|
| 670 |
+
warnings.warn(
|
| 671 |
+
"HTTPResponse.getheader() is deprecated and will be removed "
|
| 672 |
+
"in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
|
| 673 |
+
category=DeprecationWarning,
|
| 674 |
+
stacklevel=2,
|
| 675 |
+
)
|
| 676 |
+
return self.headers.get(name, default)
|
| 677 |
+
|
| 678 |
+
# Backwards compatibility for http.cookiejar
|
| 679 |
+
def info(self):
|
| 680 |
+
return self.headers
|
| 681 |
+
|
| 682 |
+
# Overrides from io.IOBase
|
| 683 |
+
def close(self):
|
| 684 |
+
if not self.closed:
|
| 685 |
+
self._fp.close()
|
| 686 |
+
|
| 687 |
+
if self._connection:
|
| 688 |
+
self._connection.close()
|
| 689 |
+
|
| 690 |
+
if not self.auto_close:
|
| 691 |
+
io.IOBase.close(self)
|
| 692 |
+
|
| 693 |
+
@property
|
| 694 |
+
def closed(self):
|
| 695 |
+
if not self.auto_close:
|
| 696 |
+
return io.IOBase.closed.__get__(self)
|
| 697 |
+
elif self._fp is None:
|
| 698 |
+
return True
|
| 699 |
+
elif hasattr(self._fp, "isclosed"):
|
| 700 |
+
return self._fp.isclosed()
|
| 701 |
+
elif hasattr(self._fp, "closed"):
|
| 702 |
+
return self._fp.closed
|
| 703 |
+
else:
|
| 704 |
+
return True
|
| 705 |
+
|
| 706 |
+
def fileno(self):
|
| 707 |
+
if self._fp is None:
|
| 708 |
+
raise IOError("HTTPResponse has no file to get a fileno from")
|
| 709 |
+
elif hasattr(self._fp, "fileno"):
|
| 710 |
+
return self._fp.fileno()
|
| 711 |
+
else:
|
| 712 |
+
raise IOError(
|
| 713 |
+
"The file-like object this HTTPResponse is wrapped "
|
| 714 |
+
"around has no file descriptor"
|
| 715 |
+
)
|
| 716 |
+
|
| 717 |
+
def flush(self):
|
| 718 |
+
if (
|
| 719 |
+
self._fp is not None
|
| 720 |
+
and hasattr(self._fp, "flush")
|
| 721 |
+
and not getattr(self._fp, "closed", False)
|
| 722 |
+
):
|
| 723 |
+
return self._fp.flush()
|
| 724 |
+
|
| 725 |
+
def readable(self):
|
| 726 |
+
# This method is required for `io` module compatibility.
|
| 727 |
+
return True
|
| 728 |
+
|
| 729 |
+
def readinto(self, b):
|
| 730 |
+
# This method is required for `io` module compatibility.
|
| 731 |
+
temp = self.read(len(b))
|
| 732 |
+
if len(temp) == 0:
|
| 733 |
+
return 0
|
| 734 |
+
else:
|
| 735 |
+
b[: len(temp)] = temp
|
| 736 |
+
return len(temp)
|
| 737 |
+
|
| 738 |
+
def supports_chunked_reads(self):
|
| 739 |
+
"""
|
| 740 |
+
Checks if the underlying file-like object looks like a
|
| 741 |
+
:class:`http.client.HTTPResponse` object. We do this by testing for
|
| 742 |
+
the fp attribute. If it is present we assume it returns raw chunks as
|
| 743 |
+
processed by read_chunked().
|
| 744 |
+
"""
|
| 745 |
+
return hasattr(self._fp, "fp")
|
| 746 |
+
|
| 747 |
+
def _update_chunk_length(self):
|
| 748 |
+
# First, we'll figure out length of a chunk and then
|
| 749 |
+
# we'll try to read it from socket.
|
| 750 |
+
if self.chunk_left is not None:
|
| 751 |
+
return
|
| 752 |
+
line = self._fp.fp.readline()
|
| 753 |
+
line = line.split(b";", 1)[0]
|
| 754 |
+
try:
|
| 755 |
+
self.chunk_left = int(line, 16)
|
| 756 |
+
except ValueError:
|
| 757 |
+
# Invalid chunked protocol response, abort.
|
| 758 |
+
self.close()
|
| 759 |
+
raise InvalidChunkLength(self, line)
|
| 760 |
+
|
| 761 |
+
def _handle_chunk(self, amt):
|
| 762 |
+
returned_chunk = None
|
| 763 |
+
if amt is None:
|
| 764 |
+
chunk = self._fp._safe_read(self.chunk_left)
|
| 765 |
+
returned_chunk = chunk
|
| 766 |
+
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
| 767 |
+
self.chunk_left = None
|
| 768 |
+
elif amt < self.chunk_left:
|
| 769 |
+
value = self._fp._safe_read(amt)
|
| 770 |
+
self.chunk_left = self.chunk_left - amt
|
| 771 |
+
returned_chunk = value
|
| 772 |
+
elif amt == self.chunk_left:
|
| 773 |
+
value = self._fp._safe_read(amt)
|
| 774 |
+
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
| 775 |
+
self.chunk_left = None
|
| 776 |
+
returned_chunk = value
|
| 777 |
+
else: # amt > self.chunk_left
|
| 778 |
+
returned_chunk = self._fp._safe_read(self.chunk_left)
|
| 779 |
+
self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
|
| 780 |
+
self.chunk_left = None
|
| 781 |
+
return returned_chunk
|
| 782 |
+
|
| 783 |
+
def read_chunked(self, amt=None, decode_content=None):
|
| 784 |
+
"""
|
| 785 |
+
Similar to :meth:`HTTPResponse.read`, but with an additional
|
| 786 |
+
parameter: ``decode_content``.
|
| 787 |
+
|
| 788 |
+
:param amt:
|
| 789 |
+
How much of the content to read. If specified, caching is skipped
|
| 790 |
+
because it doesn't make sense to cache partial content as the full
|
| 791 |
+
response.
|
| 792 |
+
|
| 793 |
+
:param decode_content:
|
| 794 |
+
If True, will attempt to decode the body based on the
|
| 795 |
+
'content-encoding' header.
|
| 796 |
+
"""
|
| 797 |
+
self._init_decoder()
|
| 798 |
+
# FIXME: Rewrite this method and make it a class with a better structured logic.
|
| 799 |
+
if not self.chunked:
|
| 800 |
+
raise ResponseNotChunked(
|
| 801 |
+
"Response is not chunked. "
|
| 802 |
+
"Header 'transfer-encoding: chunked' is missing."
|
| 803 |
+
)
|
| 804 |
+
if not self.supports_chunked_reads():
|
| 805 |
+
raise BodyNotHttplibCompatible(
|
| 806 |
+
"Body should be http.client.HTTPResponse like. "
|
| 807 |
+
"It should have have an fp attribute which returns raw chunks."
|
| 808 |
+
)
|
| 809 |
+
|
| 810 |
+
with self._error_catcher():
|
| 811 |
+
# Don't bother reading the body of a HEAD request.
|
| 812 |
+
if self._original_response and is_response_to_head(self._original_response):
|
| 813 |
+
self._original_response.close()
|
| 814 |
+
return
|
| 815 |
+
|
| 816 |
+
# If a response is already read and closed
|
| 817 |
+
# then return immediately.
|
| 818 |
+
if self._fp.fp is None:
|
| 819 |
+
return
|
| 820 |
+
|
| 821 |
+
while True:
|
| 822 |
+
self._update_chunk_length()
|
| 823 |
+
if self.chunk_left == 0:
|
| 824 |
+
break
|
| 825 |
+
chunk = self._handle_chunk(amt)
|
| 826 |
+
decoded = self._decode(
|
| 827 |
+
chunk, decode_content=decode_content, flush_decoder=False
|
| 828 |
+
)
|
| 829 |
+
if decoded:
|
| 830 |
+
yield decoded
|
| 831 |
+
|
| 832 |
+
if decode_content:
|
| 833 |
+
# On CPython and PyPy, we should never need to flush the
|
| 834 |
+
# decoder. However, on Jython we *might* need to, so
|
| 835 |
+
# lets defensively do it anyway.
|
| 836 |
+
decoded = self._flush_decoder()
|
| 837 |
+
if decoded: # Platform-specific: Jython.
|
| 838 |
+
yield decoded
|
| 839 |
+
|
| 840 |
+
# Chunk content ends with \r\n: discard it.
|
| 841 |
+
while True:
|
| 842 |
+
line = self._fp.fp.readline()
|
| 843 |
+
if not line:
|
| 844 |
+
# Some sites may not end with '\r\n'.
|
| 845 |
+
break
|
| 846 |
+
if line == b"\r\n":
|
| 847 |
+
break
|
| 848 |
+
|
| 849 |
+
# We read everything; close the "file".
|
| 850 |
+
if self._original_response:
|
| 851 |
+
self._original_response.close()
|
| 852 |
+
|
| 853 |
+
def geturl(self):
|
| 854 |
+
"""
|
| 855 |
+
Returns the URL that was the source of this response.
|
| 856 |
+
If the request that generated this response redirected, this method
|
| 857 |
+
will return the final redirect location.
|
| 858 |
+
"""
|
| 859 |
+
if self.retries is not None and len(self.retries.history):
|
| 860 |
+
return self.retries.history[-1].redirect_location
|
| 861 |
+
else:
|
| 862 |
+
return self._request_url
|
| 863 |
+
|
| 864 |
+
def __iter__(self):
|
| 865 |
+
buffer = []
|
| 866 |
+
for chunk in self.stream(decode_content=True):
|
| 867 |
+
if b"\n" in chunk:
|
| 868 |
+
chunk = chunk.split(b"\n")
|
| 869 |
+
yield b"".join(buffer) + chunk[0] + b"\n"
|
| 870 |
+
for x in chunk[1:-1]:
|
| 871 |
+
yield x + b"\n"
|
| 872 |
+
if chunk[-1]:
|
| 873 |
+
buffer = [chunk[-1]]
|
| 874 |
+
else:
|
| 875 |
+
buffer = []
|
| 876 |
+
else:
|
| 877 |
+
buffer.append(chunk)
|
| 878 |
+
if buffer:
|
| 879 |
+
yield b"".join(buffer)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/__init__.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
# For backwards compatibility, provide imports that used to be here.
|
| 4 |
+
from .connection import is_connection_dropped
|
| 5 |
+
from .request import SKIP_HEADER, SKIPPABLE_HEADERS, make_headers
|
| 6 |
+
from .response import is_fp_closed
|
| 7 |
+
from .retry import Retry
|
| 8 |
+
from .ssl_ import (
|
| 9 |
+
ALPN_PROTOCOLS,
|
| 10 |
+
HAS_SNI,
|
| 11 |
+
IS_PYOPENSSL,
|
| 12 |
+
IS_SECURETRANSPORT,
|
| 13 |
+
PROTOCOL_TLS,
|
| 14 |
+
SSLContext,
|
| 15 |
+
assert_fingerprint,
|
| 16 |
+
resolve_cert_reqs,
|
| 17 |
+
resolve_ssl_version,
|
| 18 |
+
ssl_wrap_socket,
|
| 19 |
+
)
|
| 20 |
+
from .timeout import Timeout, current_time
|
| 21 |
+
from .url import Url, get_host, parse_url, split_first
|
| 22 |
+
from .wait import wait_for_read, wait_for_write
|
| 23 |
+
|
| 24 |
+
__all__ = (
|
| 25 |
+
"HAS_SNI",
|
| 26 |
+
"IS_PYOPENSSL",
|
| 27 |
+
"IS_SECURETRANSPORT",
|
| 28 |
+
"SSLContext",
|
| 29 |
+
"PROTOCOL_TLS",
|
| 30 |
+
"ALPN_PROTOCOLS",
|
| 31 |
+
"Retry",
|
| 32 |
+
"Timeout",
|
| 33 |
+
"Url",
|
| 34 |
+
"assert_fingerprint",
|
| 35 |
+
"current_time",
|
| 36 |
+
"is_connection_dropped",
|
| 37 |
+
"is_fp_closed",
|
| 38 |
+
"get_host",
|
| 39 |
+
"parse_url",
|
| 40 |
+
"make_headers",
|
| 41 |
+
"resolve_cert_reqs",
|
| 42 |
+
"resolve_ssl_version",
|
| 43 |
+
"split_first",
|
| 44 |
+
"ssl_wrap_socket",
|
| 45 |
+
"wait_for_read",
|
| 46 |
+
"wait_for_write",
|
| 47 |
+
"SKIP_HEADER",
|
| 48 |
+
"SKIPPABLE_HEADERS",
|
| 49 |
+
)
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/connection.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import absolute_import
|
| 2 |
+
|
| 3 |
+
import socket
|
| 4 |
+
|
| 5 |
+
from ..contrib import _appengine_environ
|
| 6 |
+
from ..exceptions import LocationParseError
|
| 7 |
+
from ..packages import six
|
| 8 |
+
from .wait import NoWayToWaitForSocketError, wait_for_read
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def is_connection_dropped(conn): # Platform-specific
|
| 12 |
+
"""
|
| 13 |
+
Returns True if the connection is dropped and should be closed.
|
| 14 |
+
|
| 15 |
+
:param conn:
|
| 16 |
+
:class:`http.client.HTTPConnection` object.
|
| 17 |
+
|
| 18 |
+
Note: For platforms like AppEngine, this will always return ``False`` to
|
| 19 |
+
let the platform handle connection recycling transparently for us.
|
| 20 |
+
"""
|
| 21 |
+
sock = getattr(conn, "sock", False)
|
| 22 |
+
if sock is False: # Platform-specific: AppEngine
|
| 23 |
+
return False
|
| 24 |
+
if sock is None: # Connection already closed (such as by httplib).
|
| 25 |
+
return True
|
| 26 |
+
try:
|
| 27 |
+
# Returns True if readable, which here means it's been dropped
|
| 28 |
+
return wait_for_read(sock, timeout=0.0)
|
| 29 |
+
except NoWayToWaitForSocketError: # Platform-specific: AppEngine
|
| 30 |
+
return False
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# This function is copied from socket.py in the Python 2.7 standard
|
| 34 |
+
# library test suite. Added to its signature is only `socket_options`.
|
| 35 |
+
# One additional modification is that we avoid binding to IPv6 servers
|
| 36 |
+
# discovered in DNS if the system doesn't have IPv6 functionality.
|
| 37 |
+
def create_connection(
|
| 38 |
+
address,
|
| 39 |
+
timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
|
| 40 |
+
source_address=None,
|
| 41 |
+
socket_options=None,
|
| 42 |
+
):
|
| 43 |
+
"""Connect to *address* and return the socket object.
|
| 44 |
+
|
| 45 |
+
Convenience function. Connect to *address* (a 2-tuple ``(host,
|
| 46 |
+
port)``) and return the socket object. Passing the optional
|
| 47 |
+
*timeout* parameter will set the timeout on the socket instance
|
| 48 |
+
before attempting to connect. If no *timeout* is supplied, the
|
| 49 |
+
global default timeout setting returned by :func:`socket.getdefaulttimeout`
|
| 50 |
+
is used. If *source_address* is set it must be a tuple of (host, port)
|
| 51 |
+
for the socket to bind as a source address before making the connection.
|
| 52 |
+
An host of '' or port 0 tells the OS to use the default.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
host, port = address
|
| 56 |
+
if host.startswith("["):
|
| 57 |
+
host = host.strip("[]")
|
| 58 |
+
err = None
|
| 59 |
+
|
| 60 |
+
# Using the value from allowed_gai_family() in the context of getaddrinfo lets
|
| 61 |
+
# us select whether to work with IPv4 DNS records, IPv6 records, or both.
|
| 62 |
+
# The original create_connection function always returns all records.
|
| 63 |
+
family = allowed_gai_family()
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
host.encode("idna")
|
| 67 |
+
except UnicodeError:
|
| 68 |
+
return six.raise_from(
|
| 69 |
+
LocationParseError(u"'%s', label empty or too long" % host), None
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
|
| 73 |
+
af, socktype, proto, canonname, sa = res
|
| 74 |
+
sock = None
|
| 75 |
+
try:
|
| 76 |
+
sock = socket.socket(af, socktype, proto)
|
| 77 |
+
|
| 78 |
+
# If provided, set socket level options before connecting.
|
| 79 |
+
_set_socket_options(sock, socket_options)
|
| 80 |
+
|
| 81 |
+
if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
|
| 82 |
+
sock.settimeout(timeout)
|
| 83 |
+
if source_address:
|
| 84 |
+
sock.bind(source_address)
|
| 85 |
+
sock.connect(sa)
|
| 86 |
+
return sock
|
| 87 |
+
|
| 88 |
+
except socket.error as e:
|
| 89 |
+
err = e
|
| 90 |
+
if sock is not None:
|
| 91 |
+
sock.close()
|
| 92 |
+
sock = None
|
| 93 |
+
|
| 94 |
+
if err is not None:
|
| 95 |
+
raise err
|
| 96 |
+
|
| 97 |
+
raise socket.error("getaddrinfo returns an empty list")
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _set_socket_options(sock, options):
|
| 101 |
+
if options is None:
|
| 102 |
+
return
|
| 103 |
+
|
| 104 |
+
for opt in options:
|
| 105 |
+
sock.setsockopt(*opt)
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def allowed_gai_family():
|
| 109 |
+
"""This function is designed to work in the context of
|
| 110 |
+
getaddrinfo, where family=socket.AF_UNSPEC is the default and
|
| 111 |
+
will perform a DNS search for both IPv6 and IPv4 records."""
|
| 112 |
+
|
| 113 |
+
family = socket.AF_INET
|
| 114 |
+
if HAS_IPV6:
|
| 115 |
+
family = socket.AF_UNSPEC
|
| 116 |
+
return family
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _has_ipv6(host):
|
| 120 |
+
"""Returns True if the system can bind an IPv6 address."""
|
| 121 |
+
sock = None
|
| 122 |
+
has_ipv6 = False
|
| 123 |
+
|
| 124 |
+
# App Engine doesn't support IPV6 sockets and actually has a quota on the
|
| 125 |
+
# number of sockets that can be used, so just early out here instead of
|
| 126 |
+
# creating a socket needlessly.
|
| 127 |
+
# See https://github.com/urllib3/urllib3/issues/1446
|
| 128 |
+
if _appengine_environ.is_appengine_sandbox():
|
| 129 |
+
return False
|
| 130 |
+
|
| 131 |
+
if socket.has_ipv6:
|
| 132 |
+
# has_ipv6 returns true if cPython was compiled with IPv6 support.
|
| 133 |
+
# It does not tell us if the system has IPv6 support enabled. To
|
| 134 |
+
# determine that we must bind to an IPv6 address.
|
| 135 |
+
# https://github.com/urllib3/urllib3/pull/611
|
| 136 |
+
# https://bugs.python.org/issue658327
|
| 137 |
+
try:
|
| 138 |
+
sock = socket.socket(socket.AF_INET6)
|
| 139 |
+
sock.bind((host, 0))
|
| 140 |
+
has_ipv6 = True
|
| 141 |
+
except Exception:
|
| 142 |
+
pass
|
| 143 |
+
|
| 144 |
+
if sock:
|
| 145 |
+
sock.close()
|
| 146 |
+
return has_ipv6
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
HAS_IPV6 = _has_ipv6("::1")
|
LTA_openwebtext_dualt/mini_owt_logdirichlet/.venv_qwen35/lib/python3.12/site-packages/pip/_vendor/urllib3/util/proxy.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .ssl_ import create_urllib3_context, resolve_cert_reqs, resolve_ssl_version
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def connection_requires_http_tunnel(
|
| 5 |
+
proxy_url=None, proxy_config=None, destination_scheme=None
|
| 6 |
+
):
|
| 7 |
+
"""
|
| 8 |
+
Returns True if the connection requires an HTTP CONNECT through the proxy.
|
| 9 |
+
|
| 10 |
+
:param URL proxy_url:
|
| 11 |
+
URL of the proxy.
|
| 12 |
+
:param ProxyConfig proxy_config:
|
| 13 |
+
Proxy configuration from poolmanager.py
|
| 14 |
+
:param str destination_scheme:
|
| 15 |
+
The scheme of the destination. (i.e https, http, etc)
|
| 16 |
+
"""
|
| 17 |
+
# If we're not using a proxy, no way to use a tunnel.
|
| 18 |
+
if proxy_url is None:
|
| 19 |
+
return False
|
| 20 |
+
|
| 21 |
+
# HTTP destinations never require tunneling, we always forward.
|
| 22 |
+
if destination_scheme == "http":
|
| 23 |
+
return False
|
| 24 |
+
|
| 25 |
+
# Support for forwarding with HTTPS proxies and HTTPS destinations.
|
| 26 |
+
if (
|
| 27 |
+
proxy_url.scheme == "https"
|
| 28 |
+
and proxy_config
|
| 29 |
+
and proxy_config.use_forwarding_for_https
|
| 30 |
+
):
|
| 31 |
+
return False
|
| 32 |
+
|
| 33 |
+
# Otherwise always use a tunnel.
|
| 34 |
+
return True
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def create_proxy_ssl_context(
|
| 38 |
+
ssl_version, cert_reqs, ca_certs=None, ca_cert_dir=None, ca_cert_data=None
|
| 39 |
+
):
|
| 40 |
+
"""
|
| 41 |
+
Generates a default proxy ssl context if one hasn't been provided by the
|
| 42 |
+
user.
|
| 43 |
+
"""
|
| 44 |
+
ssl_context = create_urllib3_context(
|
| 45 |
+
ssl_version=resolve_ssl_version(ssl_version),
|
| 46 |
+
cert_reqs=resolve_cert_reqs(cert_reqs),
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
if (
|
| 50 |
+
not ca_certs
|
| 51 |
+
and not ca_cert_dir
|
| 52 |
+
and not ca_cert_data
|
| 53 |
+
and hasattr(ssl_context, "load_default_certs")
|
| 54 |
+
):
|
| 55 |
+
ssl_context.load_default_certs()
|
| 56 |
+
|
| 57 |
+
return ssl_context
|