Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -51,11 +51,12 @@ embedding_dict = model.embed_dataset(
|
|
| 51 |
sequences=[
|
| 52 |
'MALWMRLLPLLALLALWGPDPAAA', ... # list of protein sequences
|
| 53 |
],
|
|
|
|
| 54 |
batch_size=2, # adjust for your GPU memory
|
| 55 |
max_len=512, # adjust for your needs
|
| 56 |
full_embeddings=False, # if True, no pooling is performed
|
| 57 |
embed_dtype=torch.float32, # cast to what dtype you want
|
| 58 |
-
|
| 59 |
num_workers=0, # if you have many cpu cores, we find that num_workers = 4 is fast for large datasets
|
| 60 |
sql=False, # if True, embeddings will be stored in SQLite database
|
| 61 |
sql_db_path='embeddings.db',
|
|
|
|
| 51 |
sequences=[
|
| 52 |
'MALWMRLLPLLALLALWGPDPAAA', ... # list of protein sequences
|
| 53 |
],
|
| 54 |
+
tokenizer=model.tokenizer,
|
| 55 |
batch_size=2, # adjust for your GPU memory
|
| 56 |
max_len=512, # adjust for your needs
|
| 57 |
full_embeddings=False, # if True, no pooling is performed
|
| 58 |
embed_dtype=torch.float32, # cast to what dtype you want
|
| 59 |
+
pooling_types=['mean', 'cls'], # more than one pooling type will be concatenated together
|
| 60 |
num_workers=0, # if you have many cpu cores, we find that num_workers = 4 is fast for large datasets
|
| 61 |
sql=False, # if True, embeddings will be stored in SQLite database
|
| 62 |
sql_db_path='embeddings.db',
|