koke143 commited on
Commit
e357580
·
verified ·
1 Parent(s): 4cc2561

Model save

Browse files
Files changed (46) hide show
  1. .gitattributes +1 -0
  2. README.md +63 -0
  3. all_results.json +15 -0
  4. config.json +121 -0
  5. eval_results.json +9 -0
  6. merges.txt +0 -0
  7. model.safetensors +3 -0
  8. runs/Jul28_19-46-17_766925b9bca5/events.out.tfevents.1722195992.766925b9bca5.538.0 +0 -0
  9. runs/Jul28_19-46-17_766925b9bca5/events.out.tfevents.1722195992.766925b9bca5.538.1 +0 -0
  10. runs/Jul28_19-47-00_766925b9bca5/events.out.tfevents.1722196027.766925b9bca5.538.2 +0 -0
  11. runs/Jul28_19-47-00_766925b9bca5/events.out.tfevents.1722196027.766925b9bca5.538.3 +0 -0
  12. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196157.766925b9bca5.538.4 +0 -0
  13. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196157.766925b9bca5.538.5 +0 -0
  14. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196282.766925b9bca5.538.6 +0 -0
  15. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196282.766925b9bca5.538.7 +0 -0
  16. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196305.766925b9bca5.538.8 +0 -0
  17. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196305.766925b9bca5.538.9 +0 -0
  18. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196469.766925b9bca5.538.10 +0 -0
  19. runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196469.766925b9bca5.538.11 +0 -0
  20. runs/Jul28_20-01-33_766925b9bca5/events.out.tfevents.1722196899.766925b9bca5.538.12 +0 -0
  21. runs/Jul28_20-01-33_766925b9bca5/events.out.tfevents.1722196899.766925b9bca5.538.13 +0 -0
  22. runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197338.766925b9bca5.538.14 +0 -0
  23. runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197338.766925b9bca5.538.15 +0 -0
  24. runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197474.766925b9bca5.538.16 +0 -0
  25. runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197474.766925b9bca5.538.17 +0 -0
  26. runs/Jul28_20-16-46_766925b9bca5/events.out.tfevents.1722197811.766925b9bca5.538.18 +0 -0
  27. runs/Jul28_20-16-46_766925b9bca5/events.out.tfevents.1722197811.766925b9bca5.538.19 +0 -0
  28. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197854.766925b9bca5.538.20 +0 -0
  29. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197854.766925b9bca5.538.21 +0 -0
  30. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197871.766925b9bca5.538.22 +0 -0
  31. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197871.766925b9bca5.538.23 +0 -0
  32. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198477.766925b9bca5.538.24 +0 -0
  33. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198477.766925b9bca5.538.25 +0 -0
  34. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198551.766925b9bca5.538.26 +0 -0
  35. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198551.766925b9bca5.538.27 +0 -0
  36. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198566.766925b9bca5.538.28 +0 -0
  37. runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198566.766925b9bca5.538.29 +0 -0
  38. runs/Jul28_20-43-34_766925b9bca5/events.out.tfevents.1722199435.766925b9bca5.538.30 +0 -0
  39. runs/Jul28_20-43-34_766925b9bca5/events.out.tfevents.1722199435.766925b9bca5.538.31 +0 -0
  40. special_tokens_map.json +51 -0
  41. tokenizer.json +0 -0
  42. tokenizer_config.json +59 -0
  43. train_results.json +9 -0
  44. trainer_state.json +186 -0
  45. training_args.bin +0 -0
  46. vocab.json +0 -0
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ base_model: microsoft/deberta-v3-base
4
+ tags:
5
+ - generated_from_trainer
6
+ metrics:
7
+ - f1
8
+ model-index:
9
+ - name: boe_classifier
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # boe_classifier
17
+
18
+ This model is a fine-tuned version of [microsoft/deberta-v3-base](https://huggingface.co/microsoft/deberta-v3-base) on an unknown dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - F1: 0.3068
21
+ - Loss: 7.9021
22
+
23
+ ## Model description
24
+
25
+ More information needed
26
+
27
+ ## Intended uses & limitations
28
+
29
+ More information needed
30
+
31
+ ## Training and evaluation data
32
+
33
+ More information needed
34
+
35
+ ## Training procedure
36
+
37
+ ### Training hyperparameters
38
+
39
+ The following hyperparameters were used during training:
40
+ - learning_rate: 3e-05
41
+ - train_batch_size: 1
42
+ - eval_batch_size: 2
43
+ - seed: 42
44
+ - gradient_accumulation_steps: 10
45
+ - total_train_batch_size: 10
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - lr_scheduler_warmup_ratio: 0.03
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | F1 | Validation Loss |
54
+ |:-------------:|:------:|:----:|:------:|:---------------:|
55
+ | No log | 0.9195 | 8 | 0.2727 | 9.3126 |
56
+
57
+
58
+ ### Framework versions
59
+
60
+ - Transformers 4.42.4
61
+ - Pytorch 2.3.1+cu121
62
+ - Datasets 2.20.0
63
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.275862068965518,
3
+ "eval_f1": 0.31034482758620696,
4
+ "eval_loss": 7.982325553894043,
5
+ "eval_runtime": 0.1803,
6
+ "eval_samples": 10,
7
+ "eval_samples_per_second": 55.472,
8
+ "eval_steps_per_second": 27.736,
9
+ "total_flos": 202609940734866.0,
10
+ "train_loss": 0.0,
11
+ "train_runtime": 0.2425,
12
+ "train_samples": 87,
13
+ "train_samples_per_second": 358.716,
14
+ "train_steps_per_second": 32.985
15
+ }
config.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Leyes Organicas",
12
+ "1": "Reales Decretos y Reales Decretos-Leyes",
13
+ "2": "Tratados y Convenios Internacionales",
14
+ "3": "Leyes de Comunidades Autonomas",
15
+ "4": "Reglamentos y Normativas Generales",
16
+ "5": "Nombramientos y Ceses",
17
+ "6": "Promociones y Situaciones Especiales",
18
+ "7": "Convocatorias y Resultados de Oposiciones",
19
+ "8": "Anuncios de Concursos y Adjudicaciones de Plazas",
20
+ "9": "Ayudas",
21
+ "10": "Subvenciones y Becas",
22
+ "11": "Convenios Colectivos y Cartas de Servicio",
23
+ "12": "Planes de Estudio y Normativas Educativas",
24
+ "13": "Convenios Internacionales y Medidas Especiales",
25
+ "14": "Edictos y Notificaciones Judiciales",
26
+ "15": "Procedimientos y Citaciones Judiciales",
27
+ "16": "Licitaciones y Adjudicaciones Publicas",
28
+ "17": "Avisos y Notificaciones Oficiales",
29
+ "18": "Anuncios Comerciales y Convocatorias Privadas",
30
+ "19": "Sentencias y Autos del Tribunal Constitucional",
31
+ "20": "Orden de Publicaciones y Sumarios",
32
+ "21": "Publicaciones por Organo Emisor",
33
+ "22": "Jerarquia y Autenticidad de Normativas",
34
+ "23": "Publicaciones en Lenguas Cooficiales",
35
+ "24": "Interpretaciones y Documentos Oficiales",
36
+ "25": "Informes y Comunicaciones de Interes General",
37
+ "26": "Documentos y Estrategias Nacionales",
38
+ "27": "Medidas de Emergencia y Seguridad Nacional",
39
+ "28": "Anuncios de Regulaciones Especificas",
40
+ "29": "Normativas Temporales y Urgentes",
41
+ "30": "Medidas y Politicas Sectoriales",
42
+ "31": "Todos los Tipos de Leyes (Nacionales y Autonomicas)",
43
+ "32": "Todos los Tipos de Decretos (Legislativos y no Legislativos)",
44
+ "33": "Convocatorias y Resultados Generales (Empleo y Educacion)",
45
+ "34": "Anuncios y Avisos (Oficiales y Privados)",
46
+ "35": "Judicial y Procedimientos Legales",
47
+ "36": "Sentencias y Declaraciones Judiciales",
48
+ "37": "Publicaciones Multilingues y Cooficiales",
49
+ "38": "Informes y Estrategias de Politica",
50
+ "39": "Emergencias Nacionales y Medidas Excepcionales",
51
+ "40": "Documentos y Comunicaciones Especificas"
52
+ },
53
+ "initializer_range": 0.02,
54
+ "intermediate_size": 3072,
55
+ "label2id": {
56
+ "Anuncios Comerciales y Convocatorias Privadas": 18,
57
+ "Anuncios de Concursos y Adjudicaciones de Plazas": 8,
58
+ "Anuncios de Regulaciones Especificas": 28,
59
+ "Anuncios y Avisos (Oficiales y Privados)": 34,
60
+ "Avisos y Notificaciones Oficiales": 17,
61
+ "Ayudas": 9,
62
+ "Convenios Colectivos y Cartas de Servicio": 11,
63
+ "Convenios Internacionales y Medidas Especiales": 13,
64
+ "Convocatorias y Resultados Generales (Empleo y Educacion)": 33,
65
+ "Convocatorias y Resultados de Oposiciones": 7,
66
+ "Documentos y Comunicaciones Especificas": 40,
67
+ "Documentos y Estrategias Nacionales": 26,
68
+ "Edictos y Notificaciones Judiciales": 14,
69
+ "Emergencias Nacionales y Medidas Excepcionales": 39,
70
+ "Informes y Comunicaciones de Interes General": 25,
71
+ "Informes y Estrategias de Politica": 38,
72
+ "Interpretaciones y Documentos Oficiales": 24,
73
+ "Jerarquia y Autenticidad de Normativas": 22,
74
+ "Judicial y Procedimientos Legales": 35,
75
+ "Leyes Organicas": 0,
76
+ "Leyes de Comunidades Autonomas": 3,
77
+ "Licitaciones y Adjudicaciones Publicas": 16,
78
+ "Medidas de Emergencia y Seguridad Nacional": 27,
79
+ "Medidas y Politicas Sectoriales": 30,
80
+ "Nombramientos y Ceses": 5,
81
+ "Normativas Temporales y Urgentes": 29,
82
+ "Orden de Publicaciones y Sumarios": 20,
83
+ "Planes de Estudio y Normativas Educativas": 12,
84
+ "Procedimientos y Citaciones Judiciales": 15,
85
+ "Promociones y Situaciones Especiales": 6,
86
+ "Publicaciones Multilingues y Cooficiales": 37,
87
+ "Publicaciones en Lenguas Cooficiales": 23,
88
+ "Publicaciones por Organo Emisor": 21,
89
+ "Reales Decretos y Reales Decretos-Leyes": 1,
90
+ "Reglamentos y Normativas Generales": 4,
91
+ "Sentencias y Autos del Tribunal Constitucional": 19,
92
+ "Sentencias y Declaraciones Judiciales": 36,
93
+ "Subvenciones y Becas": 10,
94
+ "Todos los Tipos de Decretos (Legislativos y no Legislativos)": 32,
95
+ "Todos los Tipos de Leyes (Nacionales y Autonomicas)": 31,
96
+ "Tratados y Convenios Internacionales": 2
97
+ },
98
+ "layer_norm_eps": 1e-07,
99
+ "max_position_embeddings": 512,
100
+ "max_relative_positions": -1,
101
+ "model_type": "deberta-v2",
102
+ "norm_rel_ebd": "layer_norm",
103
+ "num_attention_heads": 12,
104
+ "num_hidden_layers": 12,
105
+ "pad_token_id": 0,
106
+ "pooler_dropout": 0,
107
+ "pooler_hidden_act": "gelu",
108
+ "pooler_hidden_size": 768,
109
+ "pos_att_type": [
110
+ "p2c",
111
+ "c2p"
112
+ ],
113
+ "position_biased_input": false,
114
+ "position_buckets": 256,
115
+ "relative_attention": true,
116
+ "share_att_key": true,
117
+ "torch_dtype": "float32",
118
+ "transformers_version": "4.42.4",
119
+ "type_vocab_size": 0,
120
+ "vocab_size": 128100
121
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.275862068965518,
3
+ "eval_f1": 0.31034482758620696,
4
+ "eval_loss": 7.982325553894043,
5
+ "eval_runtime": 0.1803,
6
+ "eval_samples": 10,
7
+ "eval_samples_per_second": 55.472,
8
+ "eval_steps_per_second": 27.736
9
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2951b0da7a268fe762a4a2af8a10d266416f85b51beecae72d5326be7773f0d3
3
+ size 737839252
runs/Jul28_19-46-17_766925b9bca5/events.out.tfevents.1722195992.766925b9bca5.538.0 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-46-17_766925b9bca5/events.out.tfevents.1722195992.766925b9bca5.538.1 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-47-00_766925b9bca5/events.out.tfevents.1722196027.766925b9bca5.538.2 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-47-00_766925b9bca5/events.out.tfevents.1722196027.766925b9bca5.538.3 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196157.766925b9bca5.538.4 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196157.766925b9bca5.538.5 ADDED
Binary file (9.12 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196282.766925b9bca5.538.6 ADDED
Binary file (9.87 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196282.766925b9bca5.538.7 ADDED
Binary file (9.87 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196305.766925b9bca5.538.8 ADDED
Binary file (10.2 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196305.766925b9bca5.538.9 ADDED
Binary file (10.2 kB). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196469.766925b9bca5.538.10 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_19-49-11_766925b9bca5/events.out.tfevents.1722196469.766925b9bca5.538.11 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-01-33_766925b9bca5/events.out.tfevents.1722196899.766925b9bca5.538.12 ADDED
Binary file (11.4 kB). View file
 
runs/Jul28_20-01-33_766925b9bca5/events.out.tfevents.1722196899.766925b9bca5.538.13 ADDED
Binary file (11.4 kB). View file
 
runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197338.766925b9bca5.538.14 ADDED
Binary file (12.3 kB). View file
 
runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197338.766925b9bca5.538.15 ADDED
Binary file (12.3 kB). View file
 
runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197474.766925b9bca5.538.16 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-08-52_766925b9bca5/events.out.tfevents.1722197474.766925b9bca5.538.17 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-16-46_766925b9bca5/events.out.tfevents.1722197811.766925b9bca5.538.18 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-16-46_766925b9bca5/events.out.tfevents.1722197811.766925b9bca5.538.19 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197854.766925b9bca5.538.20 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197854.766925b9bca5.538.21 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197871.766925b9bca5.538.22 ADDED
Binary file (9.89 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722197871.766925b9bca5.538.23 ADDED
Binary file (9.89 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198477.766925b9bca5.538.24 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198477.766925b9bca5.538.25 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198551.766925b9bca5.538.26 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198551.766925b9bca5.538.27 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198566.766925b9bca5.538.28 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-17-26_766925b9bca5/events.out.tfevents.1722198566.766925b9bca5.538.29 ADDED
Binary file (399 Bytes). View file
 
runs/Jul28_20-43-34_766925b9bca5/events.out.tfevents.1722199435.766925b9bca5.538.30 ADDED
Binary file (9.58 kB). View file
 
runs/Jul28_20-43-34_766925b9bca5/events.out.tfevents.1722199435.766925b9bca5.538.31 ADDED
Binary file (9.58 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "[CLS]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "[SEP]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "[MASK]",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "[PAD]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "[UNK]",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "[PAD]",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "[CLS]",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "[SEP]",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "3": {
30
+ "content": "[UNK]",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "50264": {
38
+ "content": "[MASK]",
39
+ "lstrip": true,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "bos_token": "[CLS]",
47
+ "clean_up_tokenization_spaces": true,
48
+ "cls_token": "[CLS]",
49
+ "do_lower_case": false,
50
+ "eos_token": "[SEP]",
51
+ "errors": "replace",
52
+ "mask_token": "[MASK]",
53
+ "model_max_length": 1000000000000000019884624838656,
54
+ "pad_token": "[PAD]",
55
+ "sep_token": "[SEP]",
56
+ "tokenizer_class": "DebertaTokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "gpt2"
59
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 9.275862068965518,
3
+ "total_flos": 202609940734866.0,
4
+ "train_loss": 0.0,
5
+ "train_runtime": 0.2425,
6
+ "train_samples": 87,
7
+ "train_samples_per_second": 358.716,
8
+ "train_steps_per_second": 32.985
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.31034482758620696,
3
+ "best_model_checkpoint": "./models/checkpoint/checkpoint-68",
4
+ "epoch": 9.275862068965518,
5
+ "eval_steps": 500,
6
+ "global_step": 80,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.9195402298850575,
13
+ "eval_f1": 0.2727272727272727,
14
+ "eval_loss": 9.31263256072998,
15
+ "eval_runtime": 0.1958,
16
+ "eval_samples_per_second": 51.067,
17
+ "eval_steps_per_second": 25.534,
18
+ "step": 8
19
+ },
20
+ {
21
+ "epoch": 0.9195402298850575,
22
+ "step": 8,
23
+ "total_flos": 20939936811486.0,
24
+ "train_loss": 9.918171882629395,
25
+ "train_runtime": 21.4307,
26
+ "train_samples_per_second": 4.06,
27
+ "train_steps_per_second": 0.373
28
+ },
29
+ {
30
+ "epoch": 0.9195402298850575,
31
+ "step": 8,
32
+ "total_flos": 20939936811486.0,
33
+ "train_loss": 0.0,
34
+ "train_runtime": 0.5294,
35
+ "train_samples_per_second": 164.352,
36
+ "train_steps_per_second": 15.113
37
+ },
38
+ {
39
+ "epoch": 1.9195402298850575,
40
+ "eval_f1": 0.2530120481927711,
41
+ "eval_loss": 9.20499038696289,
42
+ "eval_runtime": 0.1794,
43
+ "eval_samples_per_second": 55.739,
44
+ "eval_steps_per_second": 27.87,
45
+ "step": 16
46
+ },
47
+ {
48
+ "epoch": 2.954022988505747,
49
+ "eval_f1": 0.25842696629213485,
50
+ "eval_loss": 8.635601997375488,
51
+ "eval_runtime": 0.1743,
52
+ "eval_samples_per_second": 57.382,
53
+ "eval_steps_per_second": 28.691,
54
+ "step": 25
55
+ },
56
+ {
57
+ "epoch": 3.9885057471264367,
58
+ "eval_f1": 0.30409356725146197,
59
+ "eval_loss": 8.560417175292969,
60
+ "eval_runtime": 0.1737,
61
+ "eval_samples_per_second": 57.572,
62
+ "eval_steps_per_second": 28.786,
63
+ "step": 34
64
+ },
65
+ {
66
+ "epoch": 4.908045977011494,
67
+ "eval_f1": 0.3076923076923077,
68
+ "eval_loss": 8.47392463684082,
69
+ "eval_runtime": 0.1856,
70
+ "eval_samples_per_second": 53.876,
71
+ "eval_steps_per_second": 26.938,
72
+ "step": 42
73
+ },
74
+ {
75
+ "epoch": 5.942528735632184,
76
+ "eval_f1": 0.30857142857142855,
77
+ "eval_loss": 8.22740364074707,
78
+ "eval_runtime": 0.1773,
79
+ "eval_samples_per_second": 56.411,
80
+ "eval_steps_per_second": 28.205,
81
+ "step": 51
82
+ },
83
+ {
84
+ "epoch": 6.977011494252873,
85
+ "eval_f1": 0.3,
86
+ "eval_loss": 8.102139472961426,
87
+ "eval_runtime": 0.1819,
88
+ "eval_samples_per_second": 54.979,
89
+ "eval_steps_per_second": 27.49,
90
+ "step": 60
91
+ },
92
+ {
93
+ "epoch": 7.896551724137931,
94
+ "eval_f1": 0.31034482758620696,
95
+ "eval_loss": 7.982325553894043,
96
+ "eval_runtime": 0.1721,
97
+ "eval_samples_per_second": 58.115,
98
+ "eval_steps_per_second": 29.057,
99
+ "step": 68
100
+ },
101
+ {
102
+ "epoch": 8.931034482758621,
103
+ "eval_f1": 0.30681818181818177,
104
+ "eval_loss": 7.987342834472656,
105
+ "eval_runtime": 0.1737,
106
+ "eval_samples_per_second": 57.583,
107
+ "eval_steps_per_second": 28.792,
108
+ "step": 77
109
+ },
110
+ {
111
+ "epoch": 9.275862068965518,
112
+ "eval_f1": 0.30681818181818177,
113
+ "eval_loss": 7.902113437652588,
114
+ "eval_runtime": 0.1822,
115
+ "eval_samples_per_second": 54.889,
116
+ "eval_steps_per_second": 27.444,
117
+ "step": 80
118
+ },
119
+ {
120
+ "epoch": 9.275862068965518,
121
+ "step": 80,
122
+ "total_flos": 202609940734866.0,
123
+ "train_loss": 7.418544769287109,
124
+ "train_runtime": 132.4614,
125
+ "train_samples_per_second": 6.568,
126
+ "train_steps_per_second": 0.604
127
+ },
128
+ {
129
+ "epoch": 9.275862068965518,
130
+ "step": 80,
131
+ "total_flos": 202609940734866.0,
132
+ "train_loss": 0.0,
133
+ "train_runtime": 0.245,
134
+ "train_samples_per_second": 355.061,
135
+ "train_steps_per_second": 32.649
136
+ },
137
+ {
138
+ "epoch": 9.275862068965518,
139
+ "step": 80,
140
+ "total_flos": 202609940734866.0,
141
+ "train_loss": 0.0,
142
+ "train_runtime": 0.244,
143
+ "train_samples_per_second": 356.519,
144
+ "train_steps_per_second": 32.783
145
+ },
146
+ {
147
+ "epoch": 9.275862068965518,
148
+ "step": 80,
149
+ "total_flos": 202609940734866.0,
150
+ "train_loss": 0.0,
151
+ "train_runtime": 0.2425,
152
+ "train_samples_per_second": 358.716,
153
+ "train_steps_per_second": 32.985
154
+ }
155
+ ],
156
+ "logging_steps": 500,
157
+ "max_steps": 8,
158
+ "num_input_tokens_seen": 0,
159
+ "num_train_epochs": 1,
160
+ "save_steps": 500,
161
+ "stateful_callbacks": {
162
+ "EarlyStoppingCallback": {
163
+ "args": {
164
+ "early_stopping_patience": 3,
165
+ "early_stopping_threshold": 0.0
166
+ },
167
+ "attributes": {
168
+ "early_stopping_patience_counter": 0
169
+ }
170
+ },
171
+ "TrainerControl": {
172
+ "args": {
173
+ "should_epoch_stop": false,
174
+ "should_evaluate": false,
175
+ "should_log": false,
176
+ "should_save": true,
177
+ "should_training_stop": true
178
+ },
179
+ "attributes": {}
180
+ }
181
+ },
182
+ "total_flos": 202609940734866.0,
183
+ "train_batch_size": 1,
184
+ "trial_name": null,
185
+ "trial_params": null
186
+ }
training_args.bin ADDED
Binary file (5.18 kB). View file
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff