Upload folder using huggingface_hub
Browse files- UD_English-EWT/config.json +11 -14
- UD_English-EWT/model.safetensors +2 -2
- UD_English-EWT/optimizer.pt +2 -2
- UD_English-EWT/rng_state.pth +1 -1
- UD_English-EWT/scheduler.pt +1 -1
- UD_English-EWT/trainer_state.json +26 -12
- UD_English-EWT/training_args.bin +1 -1
UD_English-EWT/config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"adapters": {
|
| 4 |
"adapters": {},
|
| 5 |
"config_map": {},
|
|
@@ -8,11 +8,12 @@
|
|
| 8 |
"fusions": {}
|
| 9 |
},
|
| 10 |
"architectures": [
|
| 11 |
-
"
|
| 12 |
],
|
| 13 |
"attention_probs_dropout_prob": 0.1,
|
|
|
|
| 14 |
"classifier_dropout": null,
|
| 15 |
-
"
|
| 16 |
"hidden_act": "gelu",
|
| 17 |
"hidden_dropout_prob": 0.1,
|
| 18 |
"hidden_size": 768,
|
|
@@ -106,17 +107,13 @@
|
|
| 106 |
"vocative": 36,
|
| 107 |
"xcomp": 37
|
| 108 |
},
|
| 109 |
-
"layer_norm_eps": 1e-
|
| 110 |
-
"max_position_embeddings":
|
| 111 |
-
"model_type": "
|
| 112 |
"num_attention_heads": 12,
|
| 113 |
"num_hidden_layers": 12,
|
|
|
|
| 114 |
"pad_token_id": -1,
|
| 115 |
-
"pooler_fc_size": 768,
|
| 116 |
-
"pooler_num_attention_heads": 12,
|
| 117 |
-
"pooler_num_fc_layers": 3,
|
| 118 |
-
"pooler_size_per_head": 128,
|
| 119 |
-
"pooler_type": "first_token_transform",
|
| 120 |
"position_embedding_type": "absolute",
|
| 121 |
"prediction_heads": {
|
| 122 |
"default": {
|
|
@@ -128,7 +125,7 @@
|
|
| 128 |
"layer_norm": true,
|
| 129 |
"layers": 2,
|
| 130 |
"shift_labels": false,
|
| 131 |
-
"vocab_size":
|
| 132 |
},
|
| 133 |
"ud_UD_English-EWT": {
|
| 134 |
"head_type": "dependency_parsing",
|
|
@@ -181,7 +178,7 @@
|
|
| 181 |
},
|
| 182 |
"torch_dtype": "float32",
|
| 183 |
"transformers_version": "4.47.1",
|
| 184 |
-
"type_vocab_size":
|
| 185 |
"use_cache": true,
|
| 186 |
-
"vocab_size":
|
| 187 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "xlm-roberta-base",
|
| 3 |
"adapters": {
|
| 4 |
"adapters": {},
|
| 5 |
"config_map": {},
|
|
|
|
| 8 |
"fusions": {}
|
| 9 |
},
|
| 10 |
"architectures": [
|
| 11 |
+
"XLMRobertaAdapterModel"
|
| 12 |
],
|
| 13 |
"attention_probs_dropout_prob": 0.1,
|
| 14 |
+
"bos_token_id": 0,
|
| 15 |
"classifier_dropout": null,
|
| 16 |
+
"eos_token_id": 2,
|
| 17 |
"hidden_act": "gelu",
|
| 18 |
"hidden_dropout_prob": 0.1,
|
| 19 |
"hidden_size": 768,
|
|
|
|
| 107 |
"vocative": 36,
|
| 108 |
"xcomp": 37
|
| 109 |
},
|
| 110 |
+
"layer_norm_eps": 1e-05,
|
| 111 |
+
"max_position_embeddings": 514,
|
| 112 |
+
"model_type": "xlm-roberta",
|
| 113 |
"num_attention_heads": 12,
|
| 114 |
"num_hidden_layers": 12,
|
| 115 |
+
"output_past": true,
|
| 116 |
"pad_token_id": -1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
"position_embedding_type": "absolute",
|
| 118 |
"prediction_heads": {
|
| 119 |
"default": {
|
|
|
|
| 125 |
"layer_norm": true,
|
| 126 |
"layers": 2,
|
| 127 |
"shift_labels": false,
|
| 128 |
+
"vocab_size": 250002
|
| 129 |
},
|
| 130 |
"ud_UD_English-EWT": {
|
| 131 |
"head_type": "dependency_parsing",
|
|
|
|
| 178 |
},
|
| 179 |
"torch_dtype": "float32",
|
| 180 |
"transformers_version": "4.47.1",
|
| 181 |
+
"type_vocab_size": 1,
|
| 182 |
"use_cache": true,
|
| 183 |
+
"vocab_size": 250002
|
| 184 |
}
|
UD_English-EWT/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4f19b14ce7a3371db8bb3bfb107904d19c83374b3f6b7e02a10a1774968485d6
|
| 3 |
+
size 1217279040
|
UD_English-EWT/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fd638c5dcd3d84d0357114b75398d0c563543286e672d9143d2e7401b104d93f
|
| 3 |
+
size 2434680506
|
UD_English-EWT/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:158cac9ee55d327b0b16f27e590bb67852f21d1bbb6885543a4bf8b0525f54cb
|
| 3 |
size 14244
|
UD_English-EWT/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5d476c12da381846e3dd23747ce593bb7d9068ba7200583b801a8ee542adb9c
|
| 3 |
size 1064
|
UD_English-EWT/trainer_state.json
CHANGED
|
@@ -1,33 +1,47 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 1.0,
|
| 13 |
-
"eval_loss": 0.
|
| 14 |
-
"las":
|
| 15 |
"step": 196,
|
| 16 |
-
"uas":
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 2.0,
|
| 20 |
-
"eval_loss": 0.
|
| 21 |
-
"las":
|
| 22 |
"step": 392,
|
| 23 |
-
"uas":
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 2.5510204081632653,
|
| 27 |
-
"grad_norm":
|
| 28 |
"learning_rate": 9.79591836734694e-05,
|
| 29 |
-
"loss": 0.
|
| 30 |
"step": 500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
}
|
| 32 |
],
|
| 33 |
"logging_steps": 500,
|
|
@@ -42,12 +56,12 @@
|
|
| 42 |
"should_evaluate": false,
|
| 43 |
"should_log": false,
|
| 44 |
"should_save": true,
|
| 45 |
-
"should_training_stop":
|
| 46 |
},
|
| 47 |
"attributes": {}
|
| 48 |
}
|
| 49 |
},
|
| 50 |
-
"total_flos":
|
| 51 |
"train_batch_size": 64,
|
| 52 |
"trial_name": null,
|
| 53 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 5.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 980,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 1.0,
|
| 13 |
+
"eval_loss": 0.6601235683238695,
|
| 14 |
+
"las": 84.1703447453179,
|
| 15 |
"step": 196,
|
| 16 |
+
"uas": 87.77684997415403
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 2.0,
|
| 20 |
+
"eval_loss": 0.6943420922708889,
|
| 21 |
+
"las": 85.18430156268639,
|
| 22 |
"step": 392,
|
| 23 |
+
"uas": 88.57608652431507
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"epoch": 2.5510204081632653,
|
| 27 |
+
"grad_norm": 10.005887031555176,
|
| 28 |
"learning_rate": 9.79591836734694e-05,
|
| 29 |
+
"loss": 0.8481,
|
| 30 |
"step": 500
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 3.0,
|
| 34 |
+
"eval_loss": 0.542496694828428,
|
| 35 |
+
"las": 88.15459859238935,
|
| 36 |
+
"step": 588,
|
| 37 |
+
"uas": 91.02151178973318
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 4.0,
|
| 41 |
+
"eval_loss": 0.5330029517589581,
|
| 42 |
+
"las": 88.73513857409837,
|
| 43 |
+
"step": 784,
|
| 44 |
+
"uas": 91.52650204779515
|
| 45 |
}
|
| 46 |
],
|
| 47 |
"logging_steps": 500,
|
|
|
|
| 56 |
"should_evaluate": false,
|
| 57 |
"should_log": false,
|
| 58 |
"should_save": true,
|
| 59 |
+
"should_training_stop": true
|
| 60 |
},
|
| 61 |
"attributes": {}
|
| 62 |
}
|
| 63 |
},
|
| 64 |
+
"total_flos": 1.078180051156992e+16,
|
| 65 |
"train_batch_size": 64,
|
| 66 |
"trial_name": null,
|
| 67 |
"trial_params": null
|
UD_English-EWT/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7bbfa0b662d2f2a575d8e0fe7d3a40efcea245c8cd2a4bd8807e3916cd3d86ea
|
| 3 |
size 5432
|