pavm595 commited on
Commit
b712bbf
·
verified ·
1 Parent(s): 165c578

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +20 -3
  2. config.json +21 -0
  3. pytorch_model.bin +3 -0
  4. spiece.model +3 -0
README.md CHANGED
@@ -1,3 +1,20 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ProtAlbert
2
+
3
+ ```python
4
+ from transformers import AutoModel, AlbertTokenizer, pipeline
5
+ import re
6
+
7
+ tokenizer = AlbertTokenizer.from_pretrained("Rostlab/prot_albert", do_lower_case=False)
8
+
9
+ model = AutoModel.from_pretrained("Rostlab/prot_albert")
10
+
11
+ fe = pipeline('feature-extraction', model=model, tokenizer=tokenizer, device=0)
12
+
13
+ sequences_Example = ["A E T C Z A O", "S K T Z P"]
14
+
15
+ sequences_Example = [re.sub(r"[UZOB]", "X", sequence) for sequence in sequences_Example]
16
+
17
+ embedding = fe(sequences_Example)
18
+
19
+ print(embedding)
20
+ ```
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "attention_probs_dropout_prob": 0,
3
+ "hidden_act": "gelu",
4
+ "hidden_dropout_prob": 0,
5
+ "embedding_size": 128,
6
+ "hidden_size": 4096,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 16384,
9
+ "max_position_embeddings": 40000,
10
+ "num_attention_heads": 64,
11
+ "num_hidden_layers": 12,
12
+ "num_hidden_groups": 1,
13
+ "net_structure_type": 0,
14
+ "layers_to_keep": [],
15
+ "gap_size": 0,
16
+ "num_memory_blocks": 0,
17
+ "inner_group_num": 1,
18
+ "down_scale_factor": 1,
19
+ "type_vocab_size": 2,
20
+ "vocab_size": 34
21
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f616acbd5007236453cef55029264af6da30d8f4491a4e5f63641055e53d2b6
3
+ size 897396780
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2bd947f6400f10d02c65a9ca396e607bf89e076cfd3f9775e9ef053539998d9
3
+ size 238187