mokcho commited on
Commit
1ca0e13
·
verified ·
1 Parent(s): 96de65f

Upload folder using huggingface_hub

Browse files
UD_English-EWT/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "bert-base-multilingual-cased",
3
  "adapters": {
4
  "adapters": {},
5
  "config_map": {},
@@ -8,11 +8,12 @@
8
  "fusions": {}
9
  },
10
  "architectures": [
11
- "BertAdapterModel"
12
  ],
13
  "attention_probs_dropout_prob": 0.1,
 
14
  "classifier_dropout": null,
15
- "directionality": "bidi",
16
  "hidden_act": "gelu",
17
  "hidden_dropout_prob": 0.1,
18
  "hidden_size": 768,
@@ -106,17 +107,13 @@
106
  "vocative": 36,
107
  "xcomp": 37
108
  },
109
- "layer_norm_eps": 1e-12,
110
- "max_position_embeddings": 512,
111
- "model_type": "bert",
112
  "num_attention_heads": 12,
113
  "num_hidden_layers": 12,
 
114
  "pad_token_id": -1,
115
- "pooler_fc_size": 768,
116
- "pooler_num_attention_heads": 12,
117
- "pooler_num_fc_layers": 3,
118
- "pooler_size_per_head": 128,
119
- "pooler_type": "first_token_transform",
120
  "position_embedding_type": "absolute",
121
  "prediction_heads": {
122
  "default": {
@@ -128,7 +125,7 @@
128
  "layer_norm": true,
129
  "layers": 2,
130
  "shift_labels": false,
131
- "vocab_size": 119547
132
  },
133
  "ud_UD_English-EWT": {
134
  "head_type": "dependency_parsing",
@@ -181,7 +178,7 @@
181
  },
182
  "torch_dtype": "float32",
183
  "transformers_version": "4.47.1",
184
- "type_vocab_size": 2,
185
  "use_cache": true,
186
- "vocab_size": 119547
187
  }
 
1
  {
2
+ "_name_or_path": "xlm-roberta-base",
3
  "adapters": {
4
  "adapters": {},
5
  "config_map": {},
 
8
  "fusions": {}
9
  },
10
  "architectures": [
11
+ "XLMRobertaAdapterModel"
12
  ],
13
  "attention_probs_dropout_prob": 0.1,
14
+ "bos_token_id": 0,
15
  "classifier_dropout": null,
16
+ "eos_token_id": 2,
17
  "hidden_act": "gelu",
18
  "hidden_dropout_prob": 0.1,
19
  "hidden_size": 768,
 
107
  "vocative": 36,
108
  "xcomp": 37
109
  },
110
+ "layer_norm_eps": 1e-05,
111
+ "max_position_embeddings": 514,
112
+ "model_type": "xlm-roberta",
113
  "num_attention_heads": 12,
114
  "num_hidden_layers": 12,
115
+ "output_past": true,
116
  "pad_token_id": -1,
 
 
 
 
 
117
  "position_embedding_type": "absolute",
118
  "prediction_heads": {
119
  "default": {
 
125
  "layer_norm": true,
126
  "layers": 2,
127
  "shift_labels": false,
128
+ "vocab_size": 250002
129
  },
130
  "ud_UD_English-EWT": {
131
  "head_type": "dependency_parsing",
 
178
  },
179
  "torch_dtype": "float32",
180
  "transformers_version": "4.47.1",
181
+ "type_vocab_size": 1,
182
  "use_cache": true,
183
+ "vocab_size": 250002
184
  }
UD_English-EWT/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72ec06c21166f029da5b02c649c27bfc33cb757d3191ce240434bcf663afd350
3
- size 815995556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f19b14ce7a3371db8bb3bfb107904d19c83374b3f6b7e02a10a1774968485d6
3
+ size 1217279040
UD_English-EWT/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddfe22ad1fdf8c284588a548048b8dcfae9fefd285e1d2149c648f5f0e71595d
3
- size 1632115130
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd638c5dcd3d84d0357114b75398d0c563543286e672d9143d2e7401b104d93f
3
+ size 2434680506
UD_English-EWT/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:318e9c3d0f08aee56211408354272de367b86cfa59039ef95f34a2492203921e
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:158cac9ee55d327b0b16f27e590bb67852f21d1bbb6885543a4bf8b0525f54cb
3
  size 14244
UD_English-EWT/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff13e8b65c0e6da1ed6e58b28e262db591ac75fc1eb083dac3a0bf96640bbb3a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5d476c12da381846e3dd23747ce593bb7d9068ba7200583b801a8ee542adb9c
3
  size 1064
UD_English-EWT/trainer_state.json CHANGED
@@ -1,33 +1,47 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.5510204081632653,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.559650621761287,
14
- "las": 86.17440057258739,
15
  "step": 196,
16
- "uas": 89.10095828859994
17
  },
18
  {
19
  "epoch": 2.0,
20
- "eval_loss": 0.5288873102464196,
21
- "las": 87.89216271024692,
22
  "step": 392,
23
- "uas": 90.58014235158456
24
  },
25
  {
26
  "epoch": 2.5510204081632653,
27
- "grad_norm": 4.740634441375732,
28
  "learning_rate": 9.79591836734694e-05,
29
- "loss": 0.6275,
30
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  }
32
  ],
33
  "logging_steps": 500,
@@ -42,12 +56,12 @@
42
  "should_evaluate": false,
43
  "should_log": false,
44
  "should_save": true,
45
- "should_training_stop": false
46
  },
47
  "attributes": {}
48
  }
49
  },
50
- "total_flos": 5494506504192000.0,
51
  "train_batch_size": 64,
52
  "trial_name": null,
53
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
  "eval_steps": 500,
6
+ "global_step": 980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.6601235683238695,
14
+ "las": 84.1703447453179,
15
  "step": 196,
16
+ "uas": 87.77684997415403
17
  },
18
  {
19
  "epoch": 2.0,
20
+ "eval_loss": 0.6943420922708889,
21
+ "las": 85.18430156268639,
22
  "step": 392,
23
+ "uas": 88.57608652431507
24
  },
25
  {
26
  "epoch": 2.5510204081632653,
27
+ "grad_norm": 10.005887031555176,
28
  "learning_rate": 9.79591836734694e-05,
29
+ "loss": 0.8481,
30
  "step": 500
31
+ },
32
+ {
33
+ "epoch": 3.0,
34
+ "eval_loss": 0.542496694828428,
35
+ "las": 88.15459859238935,
36
+ "step": 588,
37
+ "uas": 91.02151178973318
38
+ },
39
+ {
40
+ "epoch": 4.0,
41
+ "eval_loss": 0.5330029517589581,
42
+ "las": 88.73513857409837,
43
+ "step": 784,
44
+ "uas": 91.52650204779515
45
  }
46
  ],
47
  "logging_steps": 500,
 
56
  "should_evaluate": false,
57
  "should_log": false,
58
  "should_save": true,
59
+ "should_training_stop": true
60
  },
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 1.078180051156992e+16,
65
  "train_batch_size": 64,
66
  "trial_name": null,
67
  "trial_params": null
UD_English-EWT/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e4e83cf0cfb6fc9ab8de20143e6c6325b21f1ea7c67e92a0a9c44f68e5e18cc7
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bbfa0b662d2f2a575d8e0fe7d3a40efcea245c8cd2a4bd8807e3916cd3d86ea
3
  size 5432