CyrilMa commited on
Commit
d96d1d4
·
verified ·
1 Parent(s): 67f3981

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation": "gelu",
3
+ "architectures": [
4
+ "ProteomeLMForMaskedLM"
5
+ ],
6
+ "attention_dropout": 0.1,
7
+ "batch_size": 64,
8
+ "beta1": 0.9,
9
+ "beta2": 0.999,
10
+ "dataloader_num_workers": 8,
11
+ "db_path": "/home/malbrank/training/",
12
+ "dim": 128,
13
+ "dropout": 0.1,
14
+ "dtype": "bfloat16",
15
+ "eval_accumulation_steps": 1,
16
+ "eval_epochs": 1,
17
+ "gradient_accumulation_steps": 1,
18
+ "gradient_checkpointing": false,
19
+ "hidden_dim": 128,
20
+ "initializer_range": 0.02,
21
+ "input_size": 1152,
22
+ "learning_rate": 0.0003,
23
+ "logging_steps": 10,
24
+ "loss_choice": "polar",
25
+ "loss_type": "mse",
26
+ "mask_fraction": 0.5,
27
+ "max_grad_norm": 1.0,
28
+ "max_position_embeddings": 512,
29
+ "max_steps": 1000000,
30
+ "min_taxid_size": 200,
31
+ "model_type": "distilbert",
32
+ "n_heads": 8,
33
+ "n_inner": null,
34
+ "n_layers": 6,
35
+ "namedir": "ProteomeLM-XXS-Kuma",
36
+ "num_epochs": 2000,
37
+ "output_dir": "/home/malbrank/ProteomeLM/output/",
38
+ "pad_token_id": 0,
39
+ "qa_dropout": 0.1,
40
+ "save_epochs": 15,
41
+ "scheduler": "cosine",
42
+ "seq_classif_dropout": 0.2,
43
+ "sinusoidal_pos_embds": false,
44
+ "torch_dtype": "bfloat16",
45
+ "transformers_version": "4.51.1",
46
+ "use_cache": true,
47
+ "use_one_gpu": "0",
48
+ "vocab_size": 30522,
49
+ "wandb_project": "proteomelm",
50
+ "warmup_steps": 500,
51
+ "weight_decay": 0.01
52
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86307b3130acb78a073df385abbd0ee2ea077c5d62e9e07eb7340c08c3b498e1
3
+ size 11339908
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ac13792d86a35055882ac2630a6050da43d85199f99258f8380fc7e79aab78c
3
+ size 4246337
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e21574f7b3c3648309222a1ccfb91e1cec2979cabcd91e89380e9973b46f6b57
3
+ size 14443
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b8adca72c79a24af6c7c1fc23ff8437f4bac3cf4189b3d54558f42c0a773a1c
3
+ size 1263
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:902e67761854588e8a4d111789440b874e556eaca2883c5806427b5a14b150f6
3
+ size 5571