yuccaaa commited on
Commit
dcb9c13
·
verified ·
1 Parent(s): 4d12519

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. data/protein-text/eval_assist.zipltqrmb6_.tmp +3 -0
  3. llm_model/model-00003-of-00004.safetensors +3 -0
  4. model/__pycache__/blip2.cpython-310.pyc +0 -0
  5. model/__pycache__/blip2.cpython-311.pyc +0 -0
  6. model/__pycache__/blip2.cpython-38.pyc +0 -0
  7. model/__pycache__/blip2_opt.cpython-310.pyc +0 -0
  8. model/__pycache__/blip2_opt.cpython-311.pyc +0 -0
  9. model/__pycache__/blip2_stage1.cpython-310.pyc +0 -0
  10. model/__pycache__/blip2_stage1.cpython-311.pyc +0 -0
  11. model/__pycache__/blip2_stage1.cpython-38.pyc +0 -0
  12. model/__pycache__/blip2_stage2.cpython-310.pyc +0 -0
  13. model/__pycache__/blip2_stage3.cpython-310.pyc +0 -0
  14. model/__pycache__/blip2_stage3.cpython-311.pyc +0 -0
  15. model/__pycache__/blip2qformer.cpython-310.pyc +0 -0
  16. model/__pycache__/blip2qformer.cpython-311.pyc +0 -0
  17. model/__pycache__/blip2qformer.cpython-38.pyc +0 -0
  18. model/__pycache__/dist_funs.cpython-310.pyc +0 -0
  19. model/__pycache__/help_funcs.cpython-310.pyc +0 -0
  20. model/__pycache__/opt_flash_attention.cpython-310.pyc +0 -0
  21. model/__pycache__/opt_flash_attention.cpython-311.pyc +0 -0
  22. model/__pycache__/opt_flash_attention.cpython-313.pyc +0 -0
  23. model/test.py +34 -0
  24. plm_model/esm2-150m/.gitattributes +33 -0
  25. plm_model/esm2-150m/README.md +20 -0
  26. plm_model/esm2-150m/config.json +30 -0
  27. plm_model/esm2-150m/model.safetensors +3 -0
  28. plm_model/esm2-150m/pytorch_model.bin +3 -0
  29. plm_model/esm2-150m/special_tokens_map.json +7 -0
  30. plm_model/esm2-150m/tf_model.h5 +3 -0
  31. plm_model/esm2-150m/tokenizer_config.json +4 -0
  32. plm_model/esm2-150m/vocab.txt +33 -0
  33. plm_model/microsoft/.gitattributes +9 -0
  34. plm_model/microsoft/LICENSE.md +21 -0
  35. plm_model/microsoft/README.md +38 -0
  36. plm_model/microsoft/config.json +17 -0
  37. plm_model/microsoft/flax_model.msgpack +3 -0
  38. plm_model/microsoft/pytorch_model.bin +3 -0
  39. plm_model/microsoft/tokenizer_config.json +3 -0
  40. plm_model/microsoft/vocab.txt +0 -0
  41. results/2datasets_construct_predictions1.txt +0 -0
  42. results/2datasets_qweninstruct_predictions.txt +0 -0
  43. results/aav_07252307_predictions.txt +0 -0
  44. results/ablation_deeplocbinary_predictions.txt +0 -0
  45. results/ablation_fluorescence_predictions.txt +0 -0
  46. results/ablation_gb1.0_predictions.txt +0 -0
  47. results/ablation_gb1_predictions.txt +0 -0
  48. results/ablation_material_predictions.txt +0 -0
  49. results/ablation_metallonbinding_predictions.txt +0 -0
  50. results/antibiotic_07262045_predictions.txt +0 -0
.gitattributes CHANGED
@@ -54,3 +54,4 @@ data_small/OntoProteinDatasetV2/train.txt filter=lfs diff=lfs merge=lfs -text
54
  data_small/PDBDataset/abstract.json filter=lfs diff=lfs merge=lfs -text
55
  data_small/PDBDataset/qa_all.json filter=lfs diff=lfs merge=lfs -text
56
  data_small/SwissProtV3/train_set_.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
54
  data_small/PDBDataset/abstract.json filter=lfs diff=lfs merge=lfs -text
55
  data_small/PDBDataset/qa_all.json filter=lfs diff=lfs merge=lfs -text
56
  data_small/SwissProtV3/train_set_.jsonl filter=lfs diff=lfs merge=lfs -text
57
+ data/protein-text/eval_assist.zipltqrmb6_.tmp filter=lfs diff=lfs merge=lfs -text
data/protein-text/eval_assist.zipltqrmb6_.tmp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6092fb4729236af66e921aff8d7330012bc0eb1428240856251cd14ed23b45e9
3
+ size 8291512692
llm_model/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5e22df05cdc66407dd05e130ce0e47ddf56e2d3ee6ef6eef9249b07ecb4e9d7
3
+ size 4333083488
model/__pycache__/blip2.cpython-310.pyc ADDED
Binary file (3.17 kB). View file
 
model/__pycache__/blip2.cpython-311.pyc ADDED
Binary file (5.11 kB). View file
 
model/__pycache__/blip2.cpython-38.pyc ADDED
Binary file (3.11 kB). View file
 
model/__pycache__/blip2_opt.cpython-310.pyc ADDED
Binary file (9.74 kB). View file
 
model/__pycache__/blip2_opt.cpython-311.pyc ADDED
Binary file (18.8 kB). View file
 
model/__pycache__/blip2_stage1.cpython-310.pyc ADDED
Binary file (11.9 kB). View file
 
model/__pycache__/blip2_stage1.cpython-311.pyc ADDED
Binary file (33.7 kB). View file
 
model/__pycache__/blip2_stage1.cpython-38.pyc ADDED
Binary file (13.7 kB). View file
 
model/__pycache__/blip2_stage2.cpython-310.pyc ADDED
Binary file (14.1 kB). View file
 
model/__pycache__/blip2_stage3.cpython-310.pyc ADDED
Binary file (10.9 kB). View file
 
model/__pycache__/blip2_stage3.cpython-311.pyc ADDED
Binary file (21.1 kB). View file
 
model/__pycache__/blip2qformer.cpython-310.pyc ADDED
Binary file (7.54 kB). View file
 
model/__pycache__/blip2qformer.cpython-311.pyc ADDED
Binary file (15.8 kB). View file
 
model/__pycache__/blip2qformer.cpython-38.pyc ADDED
Binary file (7.46 kB). View file
 
model/__pycache__/dist_funs.cpython-310.pyc ADDED
Binary file (6.71 kB). View file
 
model/__pycache__/help_funcs.cpython-310.pyc ADDED
Binary file (3.96 kB). View file
 
model/__pycache__/opt_flash_attention.cpython-310.pyc ADDED
Binary file (7.2 kB). View file
 
model/__pycache__/opt_flash_attention.cpython-311.pyc ADDED
Binary file (15.8 kB). View file
 
model/__pycache__/opt_flash_attention.cpython-313.pyc ADDED
Binary file (15.2 kB). View file
 
model/test.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM
2
+ import torch
3
+ llm_tokenizer = AutoTokenizer.from_pretrained("/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300", use_fast=False, padding_side='right')
4
+ llm_tokenizer.add_special_tokens({'pad_token': '<pad>'})
5
+
6
+ llm_model = AutoModelForCausalLM.from_pretrained("/oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300", torch_dtype=torch.bfloat16)
7
+ llm_model.resize_token_embeddings(len(llm_tokenizer))
8
+
9
+ text = "You need to answer the following question directly, which means you can only give the number of the option in the answer. For example: <ANSWER>option number</ANSWER> Based on the following protein\'s amino acid sequence, is the protein located on the membrane? Swiss-Prot description for P86987,Options:\n0.Yes\n1.No"
10
+
11
+ # Step 1: 编码成 input_ids 和 attention_mask(注意要 tensor 类型)
12
+ inputs = llm_tokenizer(text, return_tensors="pt")
13
+
14
+ input_ids = inputs["input_ids"] # shape: [1, L]
15
+ attention_mask = inputs["attention_mask"] # shape: [1, L]
16
+
17
+ # Step 2: 通过模型 embedding 层获取 inputs_embeds
18
+ with torch.no_grad():
19
+ inputs_embeds = llm_model.get_input_embeddings()(input_ids)
20
+
21
+
22
+ outputs = llm_model.generate(
23
+ inputs_embeds=inputs_embeds,
24
+ attention_mask=attention_mask,
25
+
26
+ max_length=128,
27
+ min_length=1,
28
+
29
+ use_cache=True,
30
+ cache_implementation="hybrid"
31
+ )
32
+ output_text = self.llm_tokenizer.batch_decode(outputs, skip_special_tokens=True)
33
+ output_text = [text.strip() for text in output_text]
34
+ print(output_text)
plm_model/esm2-150m/.gitattributes ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.npy filter=lfs diff=lfs merge=lfs -text
14
+ *.npz filter=lfs diff=lfs merge=lfs -text
15
+ *.onnx filter=lfs diff=lfs merge=lfs -text
16
+ *.ot filter=lfs diff=lfs merge=lfs -text
17
+ *.parquet filter=lfs diff=lfs merge=lfs -text
18
+ *.pb filter=lfs diff=lfs merge=lfs -text
19
+ *.pickle filter=lfs diff=lfs merge=lfs -text
20
+ *.pkl filter=lfs diff=lfs merge=lfs -text
21
+ *.pt filter=lfs diff=lfs merge=lfs -text
22
+ *.pth filter=lfs diff=lfs merge=lfs -text
23
+ *.rar filter=lfs diff=lfs merge=lfs -text
24
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
26
+ *.tflite filter=lfs diff=lfs merge=lfs -text
27
+ *.tgz filter=lfs diff=lfs merge=lfs -text
28
+ *.wasm filter=lfs diff=lfs merge=lfs -text
29
+ *.xz filter=lfs diff=lfs merge=lfs -text
30
+ *.zip filter=lfs diff=lfs merge=lfs -text
31
+ *.zst filter=lfs diff=lfs merge=lfs -text
32
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
33
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
plm_model/esm2-150m/README.md ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ widget:
4
+ - text: "MQIFVKTLTGKTITLEVEPS<mask>TIENVKAKIQDKEGIPPDQQRLIFAGKQLEDGRTLSDYNIQKESTLHLVLRLRGG"
5
+ ---
6
+
7
+ ## ESM-2
8
+
9
+ ESM-2 is a state-of-the-art protein model trained on a masked language modelling objective. It is suitable for fine-tuning on a wide range of tasks that take protein sequences as input. For detailed information on the model architecture and training data, please refer to the [accompanying paper](https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2). You may also be interested in some demo notebooks ([PyTorch](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/protein_language_modeling.ipynb), [TensorFlow](https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/protein_language_modeling-tf.ipynb)) which demonstrate how to fine-tune ESM-2 models on your tasks of interest.
10
+
11
+ Several ESM-2 checkpoints are available in the Hub with varying sizes. Larger sizes generally have somewhat better accuracy, but require much more memory and time to train:
12
+
13
+ | Checkpoint name | Num layers | Num parameters |
14
+ |------------------------------|----|----------|
15
+ | [esm2_t48_15B_UR50D](https://huggingface.co/facebook/esm2_t48_15B_UR50D) | 48 | 15B |
16
+ | [esm2_t36_3B_UR50D](https://huggingface.co/facebook/esm2_t36_3B_UR50D) | 36 | 3B |
17
+ | [esm2_t33_650M_UR50D](https://huggingface.co/facebook/esm2_t33_650M_UR50D) | 33 | 650M |
18
+ | [esm2_t30_150M_UR50D](https://huggingface.co/facebook/esm2_t30_150M_UR50D) | 30 | 150M |
19
+ | [esm2_t12_35M_UR50D](https://huggingface.co/facebook/esm2_t12_35M_UR50D) | 12 | 35M |
20
+ | [esm2_t6_8M_UR50D](https://huggingface.co/facebook/esm2_t6_8M_UR50D) | 6 | 8M |
plm_model/esm2-150m/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/tmp/facebook/esm2_t30_150M_UR50D",
3
+ "architectures": [
4
+ "EsmForMaskedLM"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "classifier_dropout": null,
8
+ "emb_layer_norm_before": false,
9
+ "esmfold_config": null,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.0,
12
+ "hidden_size": 640,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 2560,
15
+ "is_folding_model": false,
16
+ "layer_norm_eps": 1e-05,
17
+ "mask_token_id": 32,
18
+ "max_position_embeddings": 1026,
19
+ "model_type": "esm",
20
+ "num_attention_heads": 20,
21
+ "num_hidden_layers": 30,
22
+ "pad_token_id": 1,
23
+ "position_embedding_type": "rotary",
24
+ "token_dropout": true,
25
+ "torch_dtype": "float32",
26
+ "transformers_version": "4.25.0.dev0",
27
+ "use_cache": true,
28
+ "vocab_list": null,
29
+ "vocab_size": 33
30
+ }
plm_model/esm2-150m/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3f1da8aea53bddd32c246c86168c23b9fd72341fb9db9a94436f855f5053566
3
+ size 595257706
plm_model/esm2-150m/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88feb574b9f4e31c45762961d1b2ddba95796db86fb480d207b4d15e6ec8aab
3
+ size 595364077
plm_model/esm2-150m/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "<cls>",
3
+ "eos_token": "<eos>",
4
+ "mask_token": "<mask>",
5
+ "pad_token": "<pad>",
6
+ "unk_token": "<unk>"
7
+ }
plm_model/esm2-150m/tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b04a25f0ded4bd3a7665d4589863984117384b239cee6de97c46aefec4a1b5
3
+ size 593355136
plm_model/esm2-150m/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "model_max_length": 1000000000000000019884624838656,
3
+ "tokenizer_class": "EsmTokenizer"
4
+ }
plm_model/esm2-150m/vocab.txt ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <cls>
2
+ <pad>
3
+ <eos>
4
+ <unk>
5
+ L
6
+ A
7
+ G
8
+ V
9
+ S
10
+ E
11
+ R
12
+ T
13
+ I
14
+ D
15
+ P
16
+ K
17
+ Q
18
+ N
19
+ F
20
+ Y
21
+ M
22
+ H
23
+ W
24
+ C
25
+ X
26
+ B
27
+ U
28
+ Z
29
+ O
30
+ .
31
+ -
32
+ <null_1>
33
+ <mask>
plm_model/microsoft/.gitattributes ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
2
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.h5 filter=lfs diff=lfs merge=lfs -text
5
+ *.tflite filter=lfs diff=lfs merge=lfs -text
6
+ *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.ot filter=lfs diff=lfs merge=lfs -text
8
+ *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
plm_model/microsoft/LICENSE.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) Microsoft Corporation
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
19
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
21
+ OR OTHER DEALINGS IN THE SOFTWARE.
plm_model/microsoft/README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: en
3
+ tags:
4
+ - exbert
5
+ license: mit
6
+ widget:
7
+ - text: "[MASK] is a tumor suppressor gene."
8
+ ---
9
+
10
+ ## MSR BiomedBERT (abstracts + full text)
11
+
12
+ <div style="border: 2px solid orange; border-radius:10px; padding:0px 10px; width: fit-content;">
13
+
14
+ * This model was previously named **"PubMedBERT (abstracts + full text)"**.
15
+ * You can either adopt the new model name "microsoft/BiomedNLP-BiomedBERT-base-uncased-abstract-fulltext" or update your `transformers` library to version 4.22+ if you need to refer to the old name.
16
+
17
+ </div>
18
+
19
+ Pretraining large neural language models, such as BERT, has led to impressive gains on many natural language processing (NLP) tasks. However, most pretraining efforts focus on general domain corpora, such as newswire and Web. A prevailing assumption is that even domain-specific pretraining can benefit by starting from general-domain language models. [Recent work](https://arxiv.org/abs/2007.15779) shows that for domains with abundant unlabeled text, such as biomedicine, pretraining language models from scratch results in substantial gains over continual pretraining of general-domain language models.
20
+
21
+ BiomedBERT is pretrained from scratch using _abstracts_ from [PubMed](https://pubmed.ncbi.nlm.nih.gov/) and _full-text_ articles from [PubMedCentral](https://www.ncbi.nlm.nih.gov/pmc/). This model achieves state-of-the-art performance on many biomedical NLP tasks, and currently holds the top score on the [Biomedical Language Understanding and Reasoning Benchmark](https://aka.ms/BLURB).
22
+
23
+ ## Citation
24
+
25
+ If you find BiomedBERT useful in your research, please cite the following paper:
26
+
27
+ ```latex
28
+ @misc{pubmedbert,
29
+ author = {Yu Gu and Robert Tinn and Hao Cheng and Michael Lucas and Naoto Usuyama and Xiaodong Liu and Tristan Naumann and Jianfeng Gao and Hoifung Poon},
30
+ title = {Domain-Specific Language Model Pretraining for Biomedical Natural Language Processing},
31
+ year = {2020},
32
+ eprint = {arXiv:2007.15779},
33
+ }
34
+ ```
35
+
36
+ <a href="https://huggingface.co/exbert/?model=microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext&modelKind=bidirectional&sentence=Gefitinib%20is%20an%20EGFR%20tyrosine%20kinase%20inhibitor,%20which%20is%20often%20used%20for%20breast%20cancer%20and%20NSCLC%20treatment.&layer=3&heads=..0,1,2,3,4,5,6,7,8,9,10,11&threshold=0.7&tokenInd=17&tokenSide=right&maskInds=..&hideClsSep=true">
37
+ <img width="300px" src="https://cdn-media.huggingface.co/exbert/button.png">
38
+ </a>
plm_model/microsoft/config.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "BertForMaskedLM"
4
+ ],
5
+ "model_type": "bert",
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 3072,
12
+ "max_position_embeddings": 512,
13
+ "num_attention_heads": 12,
14
+ "num_hidden_layers": 12,
15
+ "type_vocab_size": 2,
16
+ "vocab_size": 30522
17
+ }
plm_model/microsoft/flax_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84761403b655e7d865093297cc57d574c5ec7ce705917f9d7011683c79f5fc41
3
+ size 437936109
plm_model/microsoft/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7bbb66376cfd6b2db3447192b034efe016337cbef135c35c411fd61b13c193
3
+ size 440474434
plm_model/microsoft/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "do_lower_case": true
3
+ }
plm_model/microsoft/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/2datasets_construct_predictions1.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/2datasets_qweninstruct_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/aav_07252307_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_deeplocbinary_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_fluorescence_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_gb1.0_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_gb1_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_material_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/ablation_metallonbinding_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
results/antibiotic_07262045_predictions.txt ADDED
The diff for this file is too large to render. See raw diff