Joblib
ynuozhang commited on
Commit
e66c5e2
·
1 Parent(s): 78e29df

add light install

Browse files
README.md CHANGED
@@ -27,7 +27,21 @@ This is the repository for [PeptiVerse: A Unified Platform for Therapeutic Pepti
27
  - [Citation](#citation)
28
 
29
  ## Quick Start
 
 
 
 
 
 
 
30
 
 
 
 
 
 
 
 
31
  ```bash
32
  # Clone repository
33
  git clone https://huggingface.co/ChatterjeeLab/PeptiVerse
 
27
  - [Citation](#citation)
28
 
29
  ## Quick Start
30
+ - Light-weighted start (basic models, no cuML, read below for details)
31
+ ```bash
32
+ # Ignore all LFS files
33
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/ChatterjeeLab/PeptiVerse
34
+
35
+ # Install basic pkgs
36
+ pip install -r requirements.txt
37
 
38
+ # Download basic model weights according to the basic_models.txt. Adjust which config you wanted as needed.
39
+ python download_light.py
40
+
41
+ # Test in inference
42
+ python inference.py
43
+ ```
44
+ - Full model clone (will clone all model weights)
45
  ```bash
46
  # Clone repository
47
  git clone https://huggingface.co/ChatterjeeLab/PeptiVerse
basic_models.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Properties, Best_Model_WT, Best_Model_SMILES, Type, Threshold_WT, Threshold_SMILES,
2
+ Hemolysis, XGB, Transformer, Classifier, 0.2521, 0.4343 ,
3
+ Non-Fouling, MLP, XGB, Classifier, 0.57, 0.6969,
4
+ Solubility, CNN, -, Classifier, 0.377, -,
5
+ Permeability (Penetrance), XGB, -, Classifier, 0.5493, -,
6
+ Toxicity, -, Transformer, Classifier, -, 0.3401,
7
+ Binding_affinity, unpooled, unpooled, Regression, -, -,
8
+ Permeability_PAMPA, -, CNN, Regression, -, -,
9
+ Permeability_CACO2, -, SVR, Regression, -, -,
10
+ Halflife, Transformer, XGB, Regression, -, -,
download_light.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ from __future__ import annotations
3
+
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Dict, List, Optional, Tuple
7
+
8
+ from huggingface_hub import snapshot_download
9
+ from inference import (
10
+ PeptiVersePredictor,
11
+ read_best_manifest_csv,
12
+ canon_model,
13
+ )
14
+
15
+ # -----------------------------
16
+ # Config
17
+ # -----------------------------
18
+ MODEL_REPO = "ChatterjeeLab/PeptiVerse"
19
+ DEFAULT_ASSETS_DIR = Path("./") # where downloaded models live
20
+ DEFAULT_MANIFEST = Path("./basic_models.txt")
21
+
22
+ BANNED_MODELS = {"svm", "enet", "svm_gpu", "enet_gpu"}
23
+
24
+
25
+ def _norm_prop_disk(prop_key: str) -> str:
26
+ return "half_life" if prop_key == "halflife" else prop_key
27
+
28
+ def _resolve_expected_model_dir(prop_key: str, model_name: str, mode: str) -> str:
29
+ disk_prop = _norm_prop_disk(prop_key)
30
+ base = f"training_classifiers/{disk_prop}"
31
+
32
+ # binding affinity is special: its label is pooled/unpooled and folder uses wt_<mode>_<pooled|unpooled>
33
+ if prop_key == "binding_affinity":
34
+ pooled_or_unpooled = model_name # "pooled" or "unpooled"
35
+ return f"{base}/wt_{mode}_{pooled_or_unpooled}"
36
+
37
+ # halflife special folders
38
+ if prop_key == "halflife":
39
+ if model_name in {"xgb_wt_log", "xgb_smiles"}:
40
+ return f"{base}/{model_name}"
41
+ if mode == "wt" and model_name == "transformer":
42
+ return f"{base}/transformer_wt_log"
43
+ if model_name == "xgb":
44
+ return f"{base}/{'xgb_wt_log' if mode == 'wt' else 'xgb_smiles'}"
45
+
46
+ return f"{base}/{model_name}_{mode}"
47
+
48
+
49
+ def build_allow_patterns_from_manifest(manifest_path: Path) -> List[str]:
50
+ best = read_best_manifest_csv(manifest_path)
51
+
52
+ allow: List[str] = []
53
+
54
+ # For each property, fetch best artifacts for wt + smiles
55
+ for prop_key, row in best.items():
56
+ for mode, label in [("wt", row.best_wt), ("smiles", row.best_smiles)]:
57
+ m = canon_model(label)
58
+ if m is None:
59
+ continue
60
+
61
+ if m in BANNED_MODELS:
62
+ m = "xgb"
63
+
64
+ model_dir = _resolve_expected_model_dir(prop_key, m, mode)
65
+
66
+ # fetch only "basic" artifacts, not everything in the folder
67
+ allow += [
68
+ f"{model_dir}/best_model.json",
69
+ f"{model_dir}/best_model.pt",
70
+ f"{model_dir}/best_model*.joblib",
71
+ f"{model_dir}/best_model*.json",
72
+ ]
73
+
74
+ seen = set()
75
+ out = []
76
+ for p in allow:
77
+ if p not in seen:
78
+ out.append(p)
79
+ seen.add(p)
80
+ return out
81
+
82
+
83
+ def download_assets(
84
+ repo_id: str,
85
+ manifest_path: Path,
86
+ out_dir: Path,
87
+ ) -> Path:
88
+ out_dir = out_dir.resolve()
89
+ out_dir.mkdir(parents=True, exist_ok=True)
90
+
91
+ allow_patterns = build_allow_patterns_from_manifest(manifest_path)
92
+
93
+ snapshot_download(
94
+ repo_id=repo_id,
95
+ local_dir=str(out_dir),
96
+ local_dir_use_symlinks=False,
97
+ allow_patterns=allow_patterns,
98
+ )
99
+ return out_dir
100
+
101
+
102
+ # -----------------------------
103
+ # Main
104
+ # -----------------------------
105
+ def main():
106
+ import argparse
107
+
108
+ ap = argparse.ArgumentParser(description="Lightweight PeptiVerse inference with on-demand model download.")
109
+ ap.add_argument("--repo", default=MODEL_REPO, help="HF repo id containing weights/assets.")
110
+ ap.add_argument("--manifest", default=str(DEFAULT_MANIFEST), help="Path to best_models.txt")
111
+ ap.add_argument("--assets", default=str(DEFAULT_ASSETS_DIR), help="Where to store downloaded assets")
112
+ ap.add_argument("--device", default=None, help="cuda / cpu / cuda:0, etc")
113
+
114
+ ap.add_argument("--property", default="hemolysis", help="Property key (e.g. hemolysis, solubility, ...)")
115
+ ap.add_argument("--mode", default="wt", choices=["wt", "smiles"], help="Input type: wt=AA sequence, smiles=SMILES")
116
+ ap.add_argument("--input", default="GIGAVLKVLTTGLPALISWIKRKRQQ", help="Sequence or SMILES string")
117
+ ap.add_argument("--target_seq", default=None, help="Target WT sequence for binding_affinity")
118
+ ap.add_argument("--binder", default=None, help="Binder string (AA or SMILES) for binding_affinity")
119
+ args = ap.parse_args()
120
+
121
+ manifest_path = Path(args.manifest)
122
+ if not manifest_path.exists():
123
+ raise FileNotFoundError(f"Manifest not found: {manifest_path}")
124
+
125
+ assets_dir = download_assets(args.repo, manifest_path=manifest_path, out_dir=Path(args.assets))
126
+
127
+ """ OPTIONAL TEST CODE
128
+ predictor = PeptiVersePredictor(
129
+ manifest_path="basic_models.txt", # use the downloaded copy to be consistent
130
+ classifier_weight_root=str(assets_dir),
131
+ device=args.device,
132
+ )
133
+
134
+ if args.property == "binding_affinity":
135
+ if not args.target_seq or not args.binder:
136
+ raise ValueError("For binding_affinity, provide --target_seq and --binder.")
137
+ out = predictor.predict_binding_affinity(args.mode, target_seq=args.target_seq, binder_str=args.binder)
138
+ else:
139
+ out = predictor.predict_property(args.property, args.mode, args.input)
140
+
141
+ print(out)
142
+ """
143
+
144
+ if __name__ == "__main__":
145
+ main()
environment.yml DELETED
@@ -1,434 +0,0 @@
1
- name: metal
2
- channels:
3
- - conda-forge
4
- - omnia
5
- - defaults
6
- dependencies:
7
- - _libgcc_mutex=0.1=main
8
- - _openmp_mutex=5.1=1_gnu
9
- - bzip2=1.0.8=h5eee18b_6
10
- - ca-certificates=2025.11.12=hbd8a1cb_0
11
- - certifi=2025.11.12=pyhd8ed1ab_0
12
- - expat=2.7.1=h6a678d5_0
13
- - git-lfs=3.7.1=h6138981_0
14
- - ld_impl_linux-64=2.40=h12ee557_0
15
- - libffi=3.4.4=h6a678d5_1
16
- - libgcc-ng=11.2.0=h1234567_1
17
- - libgomp=11.2.0=h1234567_1
18
- - libstdcxx-ng=11.2.0=h1234567_1
19
- - libuuid=1.41.5=h5eee18b_0
20
- - libxcb=1.17.0=h9b100fa_0
21
- - ncurses=6.5=h7934f7d_0
22
- - openssl=3.0.17=h5eee18b_0
23
- - pip=25.1=pyhc872135_2
24
- - pthread-stubs=0.3=h0ce48e5_1
25
- - python=3.10.18=h1a3bd86_0
26
- - readline=8.3=hc2a1206_0
27
- - ripgrep=13.0.0=h2f28480_2
28
- - sqlite=3.50.2=hb25bd0a_1
29
- - tk=8.6.15=h54e0aa7_0
30
- - wheel=0.45.1=py310h06a4308_0
31
- - xorg-libx11=1.8.12=h9b100fa_1
32
- - xorg-libxau=1.0.12=h9b100fa_0
33
- - xorg-libxdmcp=1.1.5=h9b100fa_0
34
- - xorg-xorgproto=2024.1=h5eee18b_1
35
- - xz=5.6.4=h5eee18b_1
36
- - zlib=1.2.13=h5eee18b_1
37
- - pip:
38
- - about-time==4.2.1
39
- - absl-py==2.3.1
40
- - accelerate==1.10.0
41
- - aiofiles==23.2.1
42
- - aiohappyeyeballs==2.6.1
43
- - aiohttp==3.12.15
44
- - aiosignal==1.4.0
45
- - alembic==1.17.2
46
- - alive-progress==3.3.0
47
- - anndata==0.11.4
48
- - annotated-doc==0.0.3
49
- - annotated-types==0.7.0
50
- - antlr4-python3-runtime==4.9.3
51
- - anyio==4.10.0
52
- - argon2-cffi==25.1.0
53
- - argon2-cffi-bindings==25.1.0
54
- - array-api-compat==1.12.0
55
- - arrow==1.3.0
56
- - ase==3.26.0
57
- - astroid==3.3.11
58
- - asttokens==3.0.0
59
- - astunparse==1.6.3
60
- - async-lru==2.0.5
61
- - async-timeout==5.0.1
62
- - attrs==25.3.0
63
- - autograd==1.8.0
64
- - autopep8==2.0.4
65
- - babel==2.17.0
66
- - beautifulsoup4==4.13.4
67
- - biopython==1.85
68
- - biotite==0.41.2
69
- - biotraj==1.2.2
70
- - black==25.1.0
71
- - bleach==6.2.0
72
- - blosum==2.0.3
73
- - bokeh==3.6.3
74
- - brotli==1.2.0
75
- - cachetools==6.2.3
76
- - cffi==1.17.1
77
- - cftime==1.6.4.post1
78
- - charset-normalizer==3.4.3
79
- - click==8.2.1
80
- - cloudpathlib==0.23.0
81
- - cloudpickle==3.1.2
82
- - cma==4.4.0
83
- - colorama==0.4.6
84
- - colorcet==3.1.0
85
- - colorlog==6.10.1
86
- - comm==0.2.3
87
- - contourpy==1.3.2
88
- - cucim-cu12==25.12.0
89
- - cuda-bindings==12.9.4
90
- - cuda-core==0.3.2
91
- - cuda-pathfinder==1.3.3
92
- - cuda-python==12.9.4
93
- - cuda-toolkit==12.9.1
94
- - cudf-cu12==25.12.0
95
- - cugraph-cu12==25.12.2
96
- - cuml-cu12==25.12.0
97
- - cupy-cuda12x==13.6.0
98
- - cuvs-cu12==25.12.0
99
- - cuxfilter-cu12==25.12.0
100
- - cycler==0.12.1
101
- - dask==2025.9.1
102
- - dask-cuda==25.12.0
103
- - dask-cudf-cu12==25.12.0
104
- - datasets==4.0.0
105
- - datashader==0.18.2
106
- - debugpy==1.8.16
107
- - decorator==5.2.1
108
- - deeptime==0.4.5
109
- - defusedxml==0.7.1
110
- - deprecated==1.2.18
111
- - dill==0.3.8
112
- - distributed==2025.9.1
113
- - distributed-ucxx-cu12==0.47.0
114
- - docstring-to-markdown==0.17
115
- - einops==0.8.1
116
- - exceptiongroup==1.3.0
117
- - executing==2.2.0
118
- - fair-esm==2.0.0
119
- - fastapi==0.121.0
120
- - fastjsonschema==2.21.2
121
- - fastprogress==1.0.3
122
- - fastrlock==0.8.3
123
- - ffmpy==0.6.4
124
- - filelock==3.19.1
125
- - flake8==7.1.2
126
- - flatbuffers==25.9.23
127
- - fonttools==4.60.0
128
- - fqdn==1.5.1
129
- - frozenlist==1.7.0
130
- - fsspec==2025.3.0
131
- - gast==0.6.0
132
- - gensim==4.4.0
133
- - geopandas==1.1.1
134
- - gitdb==4.0.12
135
- - gitpython==3.1.45
136
- - google-pasta==0.2.0
137
- - gradio==5.20.1
138
- - gradio-client==1.7.2
139
- - graphemeu==0.7.2
140
- - greenlet==3.3.0
141
- - groovy==0.1.2
142
- - grpcio==1.76.0
143
- - h11==0.16.0
144
- - h5py==3.14.0
145
- - hf-xet==1.1.8
146
- - holoviews==1.20.2
147
- - httpcore==1.0.9
148
- - httpx==0.28.1
149
- - huggingface-hub==0.35.3
150
- - humanfriendly==10.0
151
- - hydra-core==1.3.2
152
- - idna==3.10
153
- - igraph==1.0.0
154
- - imageio==2.37.0
155
- - importlib-metadata==8.7.0
156
- - inquirerpy==0.3.4
157
- - ipykernel==6.30.1
158
- - ipython==8.37.0
159
- - isoduration==20.11.0
160
- - isort==6.0.1
161
- - iterative-stratification==0.1.9
162
- - jedi==0.19.2
163
- - jinja2==3.1.6
164
- - joblib==1.5.1
165
- - json5==0.12.1
166
- - jsonpointer==3.0.0
167
- - jsonschema==4.25.1
168
- - jsonschema-specifications==2025.4.1
169
- - jupyter-client==8.6.3
170
- - jupyter-core==5.8.1
171
- - jupyter-events==0.12.0
172
- - jupyter-lsp==2.2.6
173
- - jupyter-server==2.16.0
174
- - jupyter-server-mathjax==0.2.6
175
- - jupyter-server-proxy==4.4.0
176
- - jupyter-server-terminals==0.5.3
177
- - jupyterlab==4.4.6
178
- - jupyterlab-code-formatter==3.0.2
179
- - jupyterlab-git==0.51.2
180
- - jupyterlab-lsp==5.2.0
181
- - jupyterlab-pygments==0.3.0
182
- - jupyterlab-search-replace==1.1.0
183
- - jupyterlab-server==2.27.3
184
- - jupyterlab-spreadsheet-editor==0.7.2
185
- - keras==3.12.0
186
- - kiwisolver==1.4.9
187
- - lark==1.2.2
188
- - lazy-loader==0.4
189
- - lckr-jupyterlab-variableinspector==3.2.4
190
- - legacy-api-wrap==1.5
191
- - leidenalg==0.11.0
192
- - libclang==18.1.1
193
- - libcudf-cu12==25.12.0
194
- - libcugraph-cu12==25.12.2
195
- - libcuml-cu12==25.12.0
196
- - libcuvs-cu12==25.12.0
197
- - libkvikio-cu12==25.12.0
198
- - libraft-cu12==25.12.0
199
- - librmm-cu12==25.12.0
200
- - libucx-cu12==1.19.0
201
- - libucxx-cu12==0.47.0
202
- - lightning==2.5.5
203
- - lightning-utilities==0.15.2
204
- - linearboost==0.1.4
205
- - linkify-it-py==2.0.3
206
- - llvmlite==0.44.0
207
- - locket==1.0.0
208
- - loguru==0.7.3
209
- - mako==1.3.10
210
- - markdown==3.9
211
- - markdown-it-py==4.0.0
212
- - markupsafe==2.1.5
213
- - matplotlib==3.10.6
214
- - matplotlib-inline==0.1.7
215
- - mccabe==0.7.0
216
- - mdit-py-plugins==0.5.0
217
- - mdshare==0.4.2
218
- - mdtraj==1.10.3
219
- - mdurl==0.1.2
220
- - mistune==3.1.3
221
- - ml-dtypes==0.5.3
222
- - mpmath==1.3.0
223
- - msgpack==1.1.2
224
- - msgpack-numpy==0.4.8
225
- - multidict==6.6.4
226
- - multipledispatch==1.0.0
227
- - multiprocess==0.70.16
228
- - mypy-extensions==1.1.0
229
- - namex==0.1.0
230
- - natsort==8.4.0
231
- - nbclient==0.10.2
232
- - nbconvert==7.16.6
233
- - nbdime==4.0.2
234
- - nbformat==5.10.4
235
- - nest-asyncio==1.6.0
236
- - netcdf4==1.7.2
237
- - networkx==3.4.2
238
- - notebook==7.4.5
239
- - notebook-shim==0.2.4
240
- - numba==0.61.2
241
- - numba-cuda==0.19.1
242
- - numpy==1.26.4
243
- - nvidia-cublas-cu12==12.8.4.1
244
- - nvidia-cuda-cccl-cu12==12.9.27
245
- - nvidia-cuda-cupti-cu12==12.8.90
246
- - nvidia-cuda-nvcc-cu12==12.9.86
247
- - nvidia-cuda-nvrtc-cu12==12.8.93
248
- - nvidia-cuda-runtime-cu12==12.8.90
249
- - nvidia-cudnn-cu12==9.10.2.21
250
- - nvidia-cufft-cu12==11.3.3.83
251
- - nvidia-cufile-cu12==1.13.1.3
252
- - nvidia-curand-cu12==10.3.9.90
253
- - nvidia-cusolver-cu12==11.7.3.90
254
- - nvidia-cusparse-cu12==12.5.8.93
255
- - nvidia-cusparselt-cu12==0.7.1
256
- - nvidia-ml-py==13.590.44
257
- - nvidia-nccl-cu12==2.27.3
258
- - nvidia-nvimgcodec-cu12==0.6.1.37
259
- - nvidia-nvjitlink-cu12==12.8.93
260
- - nvidia-nvtx-cu12==12.8.90
261
- - nvtx==0.2.14
262
- - nx-cugraph-cu12==25.12.0
263
- - omegaconf==2.3.0
264
- - opt-einsum==3.4.0
265
- - optree==0.17.0
266
- - optuna==4.6.0
267
- - orjson==3.11.4
268
- - overrides==7.7.0
269
- - p2smi==1.1.1
270
- - packaging==25.0
271
- - pandas==2.3.2
272
- - pandocfilters==1.5.1
273
- - panel==1.7.5
274
- - param==2.3.1
275
- - paretoflow==0.1.5
276
- - parso==0.8.4
277
- - partd==1.4.2
278
- - pathos==0.3.2
279
- - pathspec==0.12.1
280
- - patsy==1.0.2
281
- - peft==0.17.1
282
- - pexpect==4.9.0
283
- - pfzy==0.3.4
284
- - pillow==11.3.0
285
- - platformdirs==4.3.8
286
- - pluggy==1.6.0
287
- - pox==0.3.6
288
- - ppft==1.7.7
289
- - prdc==0.2
290
- - prometheus-client==0.22.1
291
- - prompt-toolkit==3.0.51
292
- - propcache==0.3.2
293
- - protobuf==6.32.0
294
- - psutil==7.0.0
295
- - ptyprocess==0.7.0
296
- - pure-eval==0.2.3
297
- - pyarrow==21.0.0
298
- - pycodestyle==2.12.1
299
- - pycparser==2.22
300
- - pyct==0.6.0
301
- - pydantic==2.11.9
302
- - pydantic-core==2.33.2
303
- - pydocstyle==6.3.0
304
- - pydub==0.25.1
305
- - pyemma==2.5.12
306
- - pyflakes==3.2.0
307
- - pygments==2.19.2
308
- - pylibcudf-cu12==25.12.0
309
- - pylibcugraph-cu12==25.12.2
310
- - pylibraft-cu12==25.12.0
311
- - pylint==3.3.8
312
- - pymoo==0.6.1.5
313
- - pynndescent==0.5.13
314
- - pyogrio==0.12.1
315
- - pyparsing==3.2.5
316
- - pyproj==3.7.1
317
- - python-dateutil==2.9.0.post0
318
- - python-json-logger==3.3.0
319
- - python-lsp-jsonrpc==1.1.2
320
- - python-lsp-server==1.13.0
321
- - python-multipart==0.0.20
322
- - pytoolconfig==1.3.1
323
- - pytorch-lightning==2.5.5
324
- - pytorch-lightning-bolts==0.3.2.post1
325
- - pytorch-metric-learning==2.9.0
326
- - pytz==2025.2
327
- - pyviz-comms==3.0.6
328
- - pyyaml==6.0.2
329
- - pyzmq==27.0.1
330
- - raft-dask-cu12==25.12.0
331
- - rapids-dask-dependency==25.12.0
332
- - rapids-logger==0.2.3
333
- - rdkit==2025.9.1
334
- - referencing==0.36.2
335
- - regex==2025.7.34
336
- - requests==2.32.5
337
- - requests-toolbelt==1.0.0
338
- - rfc3339-validator==0.1.4
339
- - rfc3986-validator==0.1.1
340
- - rfc3987-syntax==1.1.0
341
- - rich==14.1.0
342
- - rmm-cu12==25.12.0
343
- - rope==1.14.0
344
- - rpds-py==0.27.0
345
- - ruff==0.14.3
346
- - safehttpx==0.1.7
347
- - safetensors==0.6.2
348
- - scanpy==1.11.5
349
- - schedulefree==1.4.1
350
- - scikit-image==0.25.2
351
- - scikit-learn==1.7.1
352
- - scipy==1.15.3
353
- - seaborn==0.13.2
354
- - semantic-version==2.10.0
355
- - send2trash==1.8.3
356
- - sentry-sdk==2.35.0
357
- - session-info2==0.2.3
358
- - setuptools==80.9.0
359
- - shapely==2.0.7
360
- - shellingham==1.5.4
361
- - simpervisor==1.0.0
362
- - six==1.17.0
363
- - smart-open==7.4.3
364
- - smilespe==0.0.3
365
- - smmap==5.0.2
366
- - sniffio==1.3.1
367
- - snowballstemmer==3.0.1
368
- - sortedcontainers==2.4.0
369
- - soupsieve==2.7
370
- - sqlalchemy==2.0.45
371
- - stack-data==0.6.3
372
- - starlette==0.49.3
373
- - statsmodels==0.14.5
374
- - sympy==1.14.0
375
- - tblib==3.2.2
376
- - tenacity==9.1.2
377
- - tensorboard==2.20.0
378
- - tensorboard-data-server==0.7.2
379
- - tensorflow==2.20.0
380
- - termcolor==3.2.0
381
- - terminado==0.18.1
382
- - texttable==1.7.0
383
- - threadpoolctl==3.6.0
384
- - tifffile==2025.5.10
385
- - timm==1.0.22
386
- - tinycss2==1.4.0
387
- - tokenizers==0.20.3
388
- - tomli==2.2.1
389
- - tomlkit==0.13.3
390
- - toolz==1.1.0
391
- - torch==2.8.0
392
- - torch-geometric==2.6.1
393
- - torchaudio==2.8.0+cu128
394
- - torchmetrics==1.8.2
395
- - torchtext==0.18.0
396
- - torchvision==0.23.0+cu128
397
- - tornado==6.5.2
398
- - tqdm==4.67.1
399
- - traitlets==5.14.3
400
- - transformers==4.46.0
401
- - treelite==4.6.1
402
- - triton==3.4.0
403
- - typer==0.20.0
404
- - types-python-dateutil==2.9.0.20250809
405
- - typing-extensions==4.15.0
406
- - typing-inspection==0.4.1
407
- - tzdata==2025.2
408
- - uc-micro-py==1.0.3
409
- - ucxx-cu12==0.47.0
410
- - ujson==5.11.0
411
- - umap-learn==0.5.9.post2
412
- - uri-template==1.3.0
413
- - urllib3==2.5.0
414
- - uvicorn==0.38.0
415
- - vampnet==0.1.4.dev16+gc88ed3f0f.d20251028
416
- - wandb==0.21.1
417
- - wcwidth==0.2.13
418
- - webcolors==24.11.1
419
- - webencodings==0.5.1
420
- - websocket-client==1.8.0
421
- - websockets==15.0.1
422
- - werkzeug==3.1.3
423
- - whatthepatch==1.0.7
424
- - wrapt==1.17.3
425
- - xarray==2025.6.1
426
- - xgboost==3.0.4
427
- - xxhash==3.5.0
428
- - xyzservices==2025.11.0
429
- - yapf==0.43.0
430
- - yarl==1.20.1
431
- - zict==3.0.0
432
- - zipp==3.23.0
433
- - zstd==1.5.7.2
434
- prefix: /vast/projects/pranam/lab/yz927/envs/metal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py CHANGED
@@ -965,7 +965,7 @@ class PeptiVersePredictor:
965
 
966
  if __name__ == "__main__":
967
  predictor = PeptiVersePredictor(
968
- manifest_path="best_models.txt",
969
  classifier_weight_root="./"
970
  )
971
  print(predictor.predict_property("hemolysis", "wt", "GIGAVLKVLTTGLPALISWIKRKRQQ"))
 
965
 
966
  if __name__ == "__main__":
967
  predictor = PeptiVersePredictor(
968
+ manifest_path="basic_models.txt",
969
  classifier_weight_root="./"
970
  )
971
  print(predictor.predict_property("hemolysis", "wt", "GIGAVLKVLTTGLPALISWIKRKRQQ"))
tokenizer/.ipynb_checkpoints/my_tokenizers-checkpoint.py DELETED
@@ -1,398 +0,0 @@
1
- import collections
2
- import logging
3
- import os
4
- import re
5
- import codecs
6
- import unicodedata
7
- from typing import List, Optional
8
- from transformers import PreTrainedTokenizer
9
- from SmilesPE.tokenizer import SPE_Tokenizer
10
-
11
- def load_vocab(vocab_file):
12
- """Loads a vocabulary file into a dictionary."""
13
- vocab = collections.OrderedDict()
14
- with open(vocab_file, "r", encoding="utf-8") as reader:
15
- tokens = reader.readlines()
16
- for index, token in enumerate(tokens):
17
- token = token.rstrip("\n")
18
- vocab[token] = index
19
- return vocab
20
-
21
- class Atomwise_Tokenizer(object):
22
- """Run atom-level SMILES tokenization"""
23
-
24
- def __init__(self):
25
- """ Constructs a atom-level Tokenizer.
26
- """
27
- # self.regex_pattern = r"(\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
28
- self.regex_pattern = r"(\([^\(\)]{0,4}\)|\[[^\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\(|\)|\.|=|#|-|\+|\\|\/\/?|:|~|@|\?|>>?|\*|\$|\%[0-9]{2}|[0-9])"
29
-
30
- self.regex = re.compile(self.regex_pattern)
31
-
32
- def tokenize(self, text):
33
- """ Basic Tokenization of a SMILES.
34
- """
35
- tokens = [token for token in self.regex.findall(text)]
36
- return tokens
37
-
38
- class SMILES_SPE_Tokenizer(PreTrainedTokenizer):
39
- r"""
40
- Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE).
41
- This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
42
- should refer to the superclass for more information regarding methods.
43
- Args:
44
- vocab_file (:obj:`string`):
45
- File containing the vocabulary.
46
- spe_file (:obj:`string`):
47
- File containing the trained SMILES Pair Encoding vocabulary.
48
- unk_token (:obj:`string`, `optional`, defaults to "[UNK]"):
49
- The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
50
- token instead.
51
- sep_token (:obj:`string`, `optional`, defaults to "[SEP]"):
52
- The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
53
- for sequence classification or for a text and a question for question answering.
54
- It is also used as the last token of a sequence built with special tokens.
55
- pad_token (:obj:`string`, `optional`, defaults to "[PAD]"):
56
- The token used for padding, for example when batching sequences of different lengths.
57
- cls_token (:obj:`string`, `optional`, defaults to "[CLS]"):
58
- The classifier token which is used when doing sequence classification (classification of the whole
59
- sequence instead of per-token classification). It is the first token of the sequence when built with
60
- special tokens.
61
- mask_token (:obj:`string`, `optional`, defaults to "[MASK]"):
62
- The token used for masking values. This is the token used when training this model with masked language
63
- modeling. This is the token which the model will try to predict.
64
- """
65
-
66
- def __init__(self, vocab_file, spe_file,
67
- unk_token="[UNK]",
68
- sep_token="[SEP]",
69
- pad_token="[PAD]",
70
- cls_token="[CLS]",
71
- mask_token="[MASK]",
72
- **kwargs):
73
- if not os.path.isfile(vocab_file):
74
- raise ValueError("Can't find a vocabulary file at path '{}'.".format(vocab_file))
75
- if not os.path.isfile(spe_file):
76
- raise ValueError("Can't find a SPE vocabulary file at path '{}'.".format(spe_file))
77
-
78
- self.vocab = load_vocab(vocab_file)
79
- self.spe_vocab = open(spe_file, 'r', encoding='utf-8')
80
- self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
81
- self.spe_tokenizer = SPE_Tokenizer(self.spe_vocab)
82
-
83
- super().__init__(
84
- unk_token=unk_token,
85
- sep_token=sep_token,
86
- pad_token=pad_token,
87
- cls_token=cls_token,
88
- mask_token=mask_token,
89
- **kwargs)
90
-
91
- @property
92
- def vocab_size(self):
93
- return len(self.vocab)
94
-
95
- def get_vocab(self):
96
- return dict(self.vocab, **self.added_tokens_encoder)
97
-
98
- def _tokenize(self, text):
99
- return self.spe_tokenizer.tokenize(text).split(' ')
100
-
101
- def _convert_token_to_id(self, token):
102
- """ Converts a token (str) in an id using the vocab. """
103
- return self.vocab.get(token, self.vocab.get(self.unk_token))
104
-
105
- def decode(self, token_ids, skip_special_tokens=False, clean_up_tokenization_spaces=True):
106
- text = self.convert_ids_to_tokens(token_ids, skip_special_tokens=skip_special_tokens)
107
- return self.convert_tokens_to_string(text)
108
-
109
- def _convert_id_to_token(self, index):
110
- """Converts an index (integer) in a token (str) using the vocab."""
111
- return self.ids_to_tokens.get(index, self.unk_token)
112
-
113
- def convert_tokens_to_string(self, tokens):
114
- """ Converts a sequence of tokens (string) in a single string. """
115
- out_string = " ".join(tokens).replace(" ##", "").strip()
116
- return out_string
117
-
118
- def build_inputs_with_special_tokens(
119
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
120
- ) -> List[int]:
121
- """
122
- Build model inputs from a sequence or a pair of sequence for sequence classification tasks
123
- by concatenating and adding special tokens.
124
- A BERT sequence has the following format:
125
- - single sequence: ``[CLS] X [SEP]``
126
- - pair of sequences: ``[CLS] A [SEP] B [SEP]``
127
- Args:
128
- token_ids_0 (:obj:`List[int]`):
129
- List of IDs to which the special tokens will be added
130
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
131
- Optional second list of IDs for sequence pairs.
132
- Returns:
133
- :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
134
- """
135
- if token_ids_1 is None:
136
- return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
137
- cls = [self.cls_token_id]
138
- sep = [self.sep_token_id]
139
- return cls + token_ids_0 + sep + token_ids_1 + sep
140
-
141
- def get_special_tokens_mask(
142
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
143
- ) -> List[int]:
144
- """
145
- Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
146
- special tokens using the tokenizer ``prepare_for_model`` method.
147
- Args:
148
- token_ids_0 (:obj:`List[int]`):
149
- List of ids.
150
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
151
- Optional second list of IDs for sequence pairs.
152
- already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
153
- Set to True if the token list is already formatted with special tokens for the model
154
- Returns:
155
- :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
156
- """
157
-
158
- if already_has_special_tokens:
159
- if token_ids_1 is not None:
160
- raise ValueError(
161
- "You should not supply a second sequence if the provided sequence of "
162
- "ids is already formated with special tokens for the model."
163
- )
164
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
165
-
166
- if token_ids_1 is not None:
167
- return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
168
- return [1] + ([0] * len(token_ids_0)) + [1]
169
-
170
- def create_token_type_ids_from_sequences(
171
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
172
- ) -> List[int]:
173
- """
174
- Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
175
- A BERT sequence pair mask has the following format:
176
- ::
177
- 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
178
- | first sequence | second sequence |
179
- if token_ids_1 is None, only returns the first portion of the mask (0's).
180
- Args:
181
- token_ids_0 (:obj:`List[int]`):
182
- List of ids.
183
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
184
- Optional second list of IDs for sequence pairs.
185
- Returns:
186
- :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
187
- sequence(s).
188
- """
189
- sep = [self.sep_token_id]
190
- cls = [self.cls_token_id]
191
- if token_ids_1 is None:
192
- return len(cls + token_ids_0 + sep) * [0]
193
- return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
194
-
195
- def save_vocabulary(self, vocab_path):
196
- """
197
- Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
198
- Args:
199
- vocab_path (:obj:`str`):
200
- The directory in which to save the vocabulary.
201
- Returns:
202
- :obj:`Tuple(str)`: Paths to the files saved.
203
- """
204
- index = 0
205
- if os.path.isdir(vocab_path):
206
- vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"])
207
- else:
208
- vocab_file = vocab_path
209
- with open(vocab_file, "w", encoding="utf-8") as writer:
210
- for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
211
- if index != token_index:
212
- logger.warning(
213
- "Saving vocabulary to {}: vocabulary indices are not consecutive."
214
- " Please check that the vocabulary is not corrupted!".format(vocab_file)
215
- )
216
- index = token_index
217
- writer.write(token + "\n")
218
- index += 1
219
- return (vocab_file,)
220
-
221
- class SMILES_Atomwise_Tokenizer(PreTrainedTokenizer):
222
- r"""
223
- Constructs a SMILES tokenizer. Based on SMILES Pair Encoding (https://github.com/XinhaoLi74/SmilesPE).
224
- This tokenizer inherits from :class:`~transformers.PreTrainedTokenizer` which contains most of the methods. Users
225
- should refer to the superclass for more information regarding methods.
226
- Args:
227
- vocab_file (:obj:`string`):
228
- File containing the vocabulary.
229
- unk_token (:obj:`string`, `optional`, defaults to "[UNK]"):
230
- The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
231
- token instead.
232
- sep_token (:obj:`string`, `optional`, defaults to "[SEP]"):
233
- The separator token, which is used when building a sequence from multiple sequences, e.g. two sequences
234
- for sequence classification or for a text and a question for question answering.
235
- It is also used as the last token of a sequence built with special tokens.
236
- pad_token (:obj:`string`, `optional`, defaults to "[PAD]"):
237
- The token used for padding, for example when batching sequences of different lengths.
238
- cls_token (:obj:`string`, `optional`, defaults to "[CLS]"):
239
- The classifier token which is used when doing sequence classification (classification of the whole
240
- sequence instead of per-token classification). It is the first token of the sequence when built with
241
- special tokens.
242
- mask_token (:obj:`string`, `optional`, defaults to "[MASK]"):
243
- The token used for masking values. This is the token used when training this model with masked language
244
- modeling. This is the token which the model will try to predict.
245
- """
246
-
247
- def __init__(
248
- self,
249
- vocab_file,
250
- unk_token="[UNK]",
251
- sep_token="[SEP]",
252
- pad_token="[PAD]",
253
- cls_token="[CLS]",
254
- mask_token="[MASK]",
255
- **kwargs
256
- ):
257
- super().__init__(
258
- unk_token=unk_token,
259
- sep_token=sep_token,
260
- pad_token=pad_token,
261
- cls_token=cls_token,
262
- mask_token=mask_token,
263
- **kwargs,
264
- )
265
-
266
- if not os.path.isfile(vocab_file):
267
- raise ValueError(
268
- "Can't find a vocabulary file at path '{}'.".format(vocab_file)
269
- )
270
- self.vocab = load_vocab(vocab_file)
271
- self.ids_to_tokens = collections.OrderedDict([(ids, tok) for tok, ids in self.vocab.items()])
272
- self.tokenizer = Atomwise_Tokenizer()
273
-
274
- @property
275
- def vocab_size(self):
276
- return len(self.vocab)
277
-
278
- def get_vocab(self):
279
- return dict(self.vocab, **self.added_tokens_encoder)
280
-
281
- def _tokenize(self, text):
282
- return self.tokenizer.tokenize(text)
283
-
284
- def _convert_token_to_id(self, token):
285
- """ Converts a token (str) in an id using the vocab. """
286
- return self.vocab.get(token, self.vocab.get(self.unk_token))
287
-
288
- def _convert_id_to_token(self, index):
289
- """Converts an index (integer) in a token (str) using the vocab."""
290
- return self.ids_to_tokens.get(index, self.unk_token)
291
-
292
- def convert_tokens_to_string(self, tokens):
293
- """ Converts a sequence of tokens (string) in a single string. """
294
- out_string = " ".join(tokens).replace(" ##", "").strip()
295
- return out_string
296
-
297
- def build_inputs_with_special_tokens(
298
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
299
- ) -> List[int]:
300
- """
301
- Build model inputs from a sequence or a pair of sequence for sequence classification tasks
302
- by concatenating and adding special tokens.
303
- A BERT sequence has the following format:
304
- - single sequence: ``[CLS] X [SEP]``
305
- - pair of sequences: ``[CLS] A [SEP] B [SEP]``
306
- Args:
307
- token_ids_0 (:obj:`List[int]`):
308
- List of IDs to which the special tokens will be added
309
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
310
- Optional second list of IDs for sequence pairs.
311
- Returns:
312
- :obj:`List[int]`: list of `input IDs <../glossary.html#input-ids>`__ with the appropriate special tokens.
313
- """
314
- if token_ids_1 is None:
315
- return [self.cls_token_id] + token_ids_0 + [self.sep_token_id]
316
- cls = [self.cls_token_id]
317
- sep = [self.sep_token_id]
318
- return cls + token_ids_0 + sep + token_ids_1 + sep
319
-
320
- def get_special_tokens_mask(
321
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
322
- ) -> List[int]:
323
- """
324
- Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
325
- special tokens using the tokenizer ``prepare_for_model`` method.
326
- Args:
327
- token_ids_0 (:obj:`List[int]`):
328
- List of ids.
329
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
330
- Optional second list of IDs for sequence pairs.
331
- already_has_special_tokens (:obj:`bool`, `optional`, defaults to :obj:`False`):
332
- Set to True if the token list is already formatted with special tokens for the model
333
- Returns:
334
- :obj:`List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
335
- """
336
-
337
- if already_has_special_tokens:
338
- if token_ids_1 is not None:
339
- raise ValueError(
340
- "You should not supply a second sequence if the provided sequence of "
341
- "ids is already formated with special tokens for the model."
342
- )
343
- return list(map(lambda x: 1 if x in [self.sep_token_id, self.cls_token_id] else 0, token_ids_0))
344
-
345
- if token_ids_1 is not None:
346
- return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1)) + [1]
347
- return [1] + ([0] * len(token_ids_0)) + [1]
348
-
349
- def create_token_type_ids_from_sequences(
350
- self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
351
- ) -> List[int]:
352
- """
353
- Creates a mask from the two sequences passed to be used in a sequence-pair classification task.
354
- A BERT sequence pair mask has the following format:
355
- ::
356
- 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
357
- | first sequence | second sequence |
358
- if token_ids_1 is None, only returns the first portion of the mask (0's).
359
- Args:
360
- token_ids_0 (:obj:`List[int]`):
361
- List of ids.
362
- token_ids_1 (:obj:`List[int]`, `optional`, defaults to :obj:`None`):
363
- Optional second list of IDs for sequence pairs.
364
- Returns:
365
- :obj:`List[int]`: List of `token type IDs <../glossary.html#token-type-ids>`_ according to the given
366
- sequence(s).
367
- """
368
- sep = [self.sep_token_id]
369
- cls = [self.cls_token_id]
370
- if token_ids_1 is None:
371
- return len(cls + token_ids_0 + sep) * [0]
372
- return len(cls + token_ids_0 + sep) * [0] + len(token_ids_1 + sep) * [1]
373
-
374
- def save_vocabulary(self, vocab_path):
375
- """
376
- Save the sentencepiece vocabulary (copy original file) and special tokens file to a directory.
377
- Args:
378
- vocab_path (:obj:`str`):
379
- The directory in which to save the vocabulary.
380
- Returns:
381
- :obj:`Tuple(str)`: Paths to the files saved.
382
- """
383
- index = 0
384
- if os.path.isdir(vocab_path):
385
- vocab_file = os.path.join(vocab_path, VOCAB_FILES_NAMES["vocab_file"])
386
- else:
387
- vocab_file = vocab_path
388
- with open(vocab_file, "w", encoding="utf-8") as writer:
389
- for token, token_index in sorted(self.vocab.items(), key=lambda kv: kv[1]):
390
- if index != token_index:
391
- logger.warning(
392
- "Saving vocabulary to {}: vocabulary indices are not consecutive."
393
- " Please check that the vocabulary is not corrupted!".format(vocab_file)
394
- )
395
- index = token_index
396
- writer.write(token + "\n")
397
- index += 1
398
- return (vocab_file,)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer/__pycache__/my_tokenizers.cpython-310.pyc DELETED
Binary file (15.5 kB)