ltuzova commited on
Commit
28f1857
·
verified ·
1 Parent(s): 33feaec

Training in progress, epoch 0, checkpoint

Browse files
last-checkpoint/default/head_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "gelu",
4
+ "bias": true,
5
+ "embedding_size": 768,
6
+ "head_type": "masked_lm",
7
+ "label2id": null,
8
+ "layer_norm": true,
9
+ "layers": 2,
10
+ "shift_labels": false,
11
+ "vocab_size": 50265
12
+ },
13
+ "hidden_size": 768,
14
+ "model_class": "RobertaAdapterModel",
15
+ "model_name": "roberta-base",
16
+ "model_type": "roberta",
17
+ "name": "default",
18
+ "version": "0.1.2"
19
+ }
last-checkpoint/default/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdff7928366b173ef2d74fcdf04581a7c98c13c3bd11623203f922ffcc7248c5
3
+ size 156986358
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a46534c06e096874f8b363e21727329e759dd5a284a12b8ca6b4a90aacf4531
3
+ size 93571514
last-checkpoint/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba062fb616648fe95a7387c720bd7229051939a15b5efb41d6bab65bc4443944
3
+ size 14244
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42cfbbb71aacede8d916cd2c75ee19ec5e636b312ce77040498932f13f3d26d9
3
+ size 1064
last-checkpoint/tapt_unipelt/adapter_config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "architecture": "union",
4
+ "configs": [
5
+ {
6
+ "architecture": "prefix_tuning",
7
+ "bottleneck_size": 512,
8
+ "cross_prefix": true,
9
+ "dropout": 0.0,
10
+ "encoder_prefix": true,
11
+ "flat": false,
12
+ "leave_out": [],
13
+ "non_linearity": "tanh",
14
+ "prefix_length": 10,
15
+ "shared_gating": true,
16
+ "use_gating": true
17
+ },
18
+ {
19
+ "adapter_residual_before_ln": false,
20
+ "cross_adapter": false,
21
+ "factorized_phm_W": true,
22
+ "factorized_phm_rule": false,
23
+ "hypercomplex_nonlinearity": "glorot-uniform",
24
+ "init_weights": "bert",
25
+ "inv_adapter": null,
26
+ "inv_adapter_reduction_factor": null,
27
+ "is_parallel": false,
28
+ "learn_phm": true,
29
+ "leave_out": [],
30
+ "ln_after": false,
31
+ "ln_before": false,
32
+ "mh_adapter": false,
33
+ "non_linearity": "relu",
34
+ "original_ln_after": true,
35
+ "original_ln_before": true,
36
+ "output_adapter": true,
37
+ "phm_bias": true,
38
+ "phm_c_init": "normal",
39
+ "phm_dim": 4,
40
+ "phm_init_range": 0.0001,
41
+ "phm_layer": false,
42
+ "phm_rank": 1,
43
+ "reduction_factor": 16,
44
+ "residual_before_ln": true,
45
+ "scaling": 1.0,
46
+ "shared_W_phm": false,
47
+ "shared_phm_rule": true,
48
+ "use_gating": true
49
+ },
50
+ {
51
+ "alpha": 8,
52
+ "architecture": "lora",
53
+ "attn_matrices": [
54
+ "q",
55
+ "v"
56
+ ],
57
+ "composition_mode": "add",
58
+ "dropout": 0.0,
59
+ "init_weights": "lora",
60
+ "intermediate_lora": false,
61
+ "leave_out": [],
62
+ "output_lora": false,
63
+ "r": 8,
64
+ "selfattn_lora": true,
65
+ "use_gating": true
66
+ }
67
+ ]
68
+ },
69
+ "config_id": "7ee253f8cb7be91f",
70
+ "hidden_size": 768,
71
+ "model_class": "RobertaAdapterModel",
72
+ "model_name": "roberta-base",
73
+ "model_type": "roberta",
74
+ "name": "tapt_unipelt",
75
+ "version": "0.1.2"
76
+ }
last-checkpoint/tapt_unipelt/head_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "activation_function": "gelu",
4
+ "bias": true,
5
+ "embedding_size": 768,
6
+ "head_type": "masked_lm",
7
+ "label2id": null,
8
+ "layer_norm": true,
9
+ "layers": 2,
10
+ "shift_labels": false,
11
+ "vocab_size": 50265
12
+ },
13
+ "hidden_size": 768,
14
+ "model_class": "RobertaAdapterModel",
15
+ "model_name": "roberta-base",
16
+ "model_type": "roberta",
17
+ "name": "tapt_unipelt",
18
+ "version": "0.1.2"
19
+ }
last-checkpoint/tapt_unipelt/pytorch_adapter.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:584c2f7c9333fc4cf4afe6763b017083f7a4e52495f4877babd8553dadbce535
3
+ size 44418864
last-checkpoint/tapt_unipelt/pytorch_model_head.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0768c9d31497289722df2637a035da1d47556c67bd5fa1be54e9e12718fc7833
3
+ size 156986358
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.5050151348114014,
3
+ "best_model_checkpoint": "tapt_helpfulness_unipelt_pretraining_model/checkpoint-232",
4
+ "epoch": 0.9978494623655914,
5
+ "eval_steps": 500,
6
+ "global_step": 232,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "learning_rate": 9.900000000000001e-05,
14
+ "loss": 4.7846,
15
+ "step": 232
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_loss": 2.5050151348114014,
20
+ "eval_runtime": 27.8974,
21
+ "eval_samples_per_second": 70.974,
22
+ "eval_steps_per_second": 3.405,
23
+ "step": 232
24
+ }
25
+ ],
26
+ "logging_steps": 500,
27
+ "max_steps": 23200,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 100,
30
+ "save_steps": 500,
31
+ "total_flos": 2936712191373312.0,
32
+ "train_batch_size": 21,
33
+ "trial_name": null,
34
+ "trial_params": null
35
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b04ee6a722e0a2aa2bcbdc779e700fcf9833b5a2a4670f81b1cb465375f90f3
3
+ size 4728