MariaOls commited on
Commit
2572771
·
verified ·
1 Parent(s): fa0eaea

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. added_tokens.json +4 -0
  2. assets/gazetteer.json +168 -0
  3. assets/gazetteer.txt +161 -0
  4. checkpoint-1131/added_tokens.json +4 -0
  5. checkpoint-1131/config.json +32 -0
  6. checkpoint-1131/model.safetensors +3 -0
  7. checkpoint-1131/optimizer.pt +3 -0
  8. checkpoint-1131/rng_state.pth +3 -0
  9. checkpoint-1131/scheduler.pt +3 -0
  10. checkpoint-1131/special_tokens_map.json +23 -0
  11. checkpoint-1131/tokenizer.json +0 -0
  12. checkpoint-1131/tokenizer_config.json +78 -0
  13. checkpoint-1131/trainer_state.json +223 -0
  14. checkpoint-1131/training_args.bin +3 -0
  15. checkpoint-1131/vocab.txt +0 -0
  16. checkpoint-1508/added_tokens.json +4 -0
  17. checkpoint-1508/config.json +32 -0
  18. checkpoint-1508/model.safetensors +3 -0
  19. checkpoint-1508/optimizer.pt +3 -0
  20. checkpoint-1508/rng_state.pth +3 -0
  21. checkpoint-1508/scheduler.pt +3 -0
  22. checkpoint-1508/special_tokens_map.json +23 -0
  23. checkpoint-1508/tokenizer.json +0 -0
  24. checkpoint-1508/tokenizer_config.json +78 -0
  25. checkpoint-1508/trainer_state.json +291 -0
  26. checkpoint-1508/training_args.bin +3 -0
  27. checkpoint-1508/vocab.txt +0 -0
  28. checkpoint-377/added_tokens.json +4 -0
  29. checkpoint-377/config.json +32 -0
  30. checkpoint-377/model.safetensors +3 -0
  31. checkpoint-377/optimizer.pt +3 -0
  32. checkpoint-377/rng_state.pth +3 -0
  33. checkpoint-377/scheduler.pt +3 -0
  34. checkpoint-377/special_tokens_map.json +23 -0
  35. checkpoint-377/tokenizer.json +0 -0
  36. checkpoint-377/tokenizer_config.json +78 -0
  37. checkpoint-377/trainer_state.json +94 -0
  38. checkpoint-377/training_args.bin +3 -0
  39. checkpoint-377/vocab.txt +0 -0
  40. checkpoint-754/added_tokens.json +4 -0
  41. checkpoint-754/config.json +32 -0
  42. checkpoint-754/model.safetensors +3 -0
  43. checkpoint-754/optimizer.pt +3 -0
  44. checkpoint-754/rng_state.pth +3 -0
  45. checkpoint-754/scheduler.pt +3 -0
  46. checkpoint-754/special_tokens_map.json +23 -0
  47. checkpoint-754/tokenizer.json +0 -0
  48. checkpoint-754/tokenizer_config.json +78 -0
  49. checkpoint-754/trainer_state.json +162 -0
  50. checkpoint-754/training_args.bin +3 -0
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</cand>": 119548,
3
+ "<cand>": 119547
4
+ }
assets/gazetteer.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "source": "ruscorpora_вводныеСлова-DiMaDataset.xlsx",
4
+ "size": 161,
5
+ "items": [
6
+ "Соответственно",
7
+ "соответственно",
8
+ "Действительно",
9
+ "действительно",
10
+ "естесственно",
11
+ "естестевенно",
12
+ "В-четвертых",
13
+ "Естественно",
14
+ "Определенно",
15
+ "По-видимому",
16
+ "безусловнее",
17
+ "в-последних",
18
+ "в-четвертых",
19
+ "естественно",
20
+ "оказывается",
21
+ "по-видимому",
22
+ "Безусловно",
23
+ "З-з-значит",
24
+ "Наконец-то",
25
+ "Несомненно",
26
+ "Разумеется",
27
+ "Собственно",
28
+ "Фактически",
29
+ "безусловно",
30
+ "желательно",
31
+ "наконец-то",
32
+ "несомненно",
33
+ "разумеется",
34
+ "собственно",
35
+ "фактически",
36
+ "Бесспорно",
37
+ "В-третьих",
38
+ "Во первых",
39
+ "Во-вторых",
40
+ "Во-первых",
41
+ "Вообще-то",
42
+ "Наверняка",
43
+ "бесспорно",
44
+ "в-седьмых",
45
+ "в-третьих",
46
+ "во-вторых",
47
+ "во-первых",
48
+ "вообще-то",
49
+ "говорится",
50
+ "наверняка",
51
+ "В-шестых",
52
+ "Вероятно",
53
+ "Возможно",
54
+ "Допустим",
55
+ "Казалось",
56
+ "Наверное",
57
+ "Например",
58
+ "Напротив",
59
+ "Очевидно",
60
+ "По-моему",
61
+ "Случайно",
62
+ "вероятно",
63
+ "возможно",
64
+ "допустим",
65
+ "известно",
66
+ "казалось",
67
+ "наверное",
68
+ "наоборот",
69
+ "например",
70
+ "напротив",
71
+ "нооборот",
72
+ "очевидно",
73
+ "по-моему",
74
+ "случайно",
75
+ "В-пятых",
76
+ "Впрочем",
77
+ "Главное",
78
+ "Говорят",
79
+ "Дескать",
80
+ "КОНЕЧНО",
81
+ "Кажется",
82
+ "Конечно",
83
+ "Наверно",
84
+ "Наконец",
85
+ "Пожалуй",
86
+ "впрочем",
87
+ "главное",
88
+ "говорят",
89
+ "дескать",
90
+ "кажется",
91
+ "конечно",
92
+ "наверно",
93
+ "наконец",
94
+ "пожалуй",
95
+ "помойму",
96
+ "понятно",
97
+ "почитай",
98
+ "собссно",
99
+ "спасибо",
100
+ "Бывало",
101
+ "Вернее",
102
+ "Видать",
103
+ "Видимо",
104
+ "Вообще",
105
+ "Значит",
106
+ "Короче",
107
+ "Кстати",
108
+ "Однако",
109
+ "Похоже",
110
+ "Правда",
111
+ "Скажем",
112
+ "Словом",
113
+ "Точнее",
114
+ "ХОРОШО",
115
+ "Хорошо",
116
+ "Честно",
117
+ "бывало",
118
+ "ваапче",
119
+ "вернее",
120
+ "видать",
121
+ "видимо",
122
+ "вообще",
123
+ "впрямь",
124
+ "значит",
125
+ "канешн",
126
+ "короче",
127
+ "кстати",
128
+ "однако",
129
+ "первых",
130
+ "похоже",
131
+ "правда",
132
+ "правдо",
133
+ "просто",
134
+ "скажем",
135
+ "скорее",
136
+ "словом",
137
+ "точнее",
138
+ "хорошо",
139
+ "Верно",
140
+ "Видно",
141
+ "Жалко",
142
+ "Лучше",
143
+ "Может",
144
+ "Никак",
145
+ "Точно",
146
+ "верно",
147
+ "видно",
148
+ "вобще",
149
+ "жалко",
150
+ "знать",
151
+ "канеш",
152
+ "лучше",
153
+ "может",
154
+ "никак",
155
+ "право",
156
+ "точно",
157
+ "Жаль",
158
+ "ИМХО",
159
+ "Итак",
160
+ "ваще",
161
+ "жаль",
162
+ "имхо",
163
+ "МОЛ",
164
+ "Мол",
165
+ "мол",
166
+ "чай"
167
+ ]
168
+ }
assets/gazetteer.txt ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Соответственно
2
+ соответственно
3
+ Действительно
4
+ действительно
5
+ естесственно
6
+ естестевенно
7
+ В-четвертых
8
+ Естественно
9
+ Определенно
10
+ По-видимому
11
+ безусловнее
12
+ в-последних
13
+ в-четвертых
14
+ естественно
15
+ оказывается
16
+ по-видимому
17
+ Безусловно
18
+ З-з-значит
19
+ Наконец-то
20
+ Несомненно
21
+ Разумеется
22
+ Собственно
23
+ Фактически
24
+ безусловно
25
+ желательно
26
+ наконец-то
27
+ несомненно
28
+ разумеется
29
+ собственно
30
+ фактически
31
+ Бесспорно
32
+ В-третьих
33
+ Во первых
34
+ Во-вторых
35
+ Во-первых
36
+ Вообще-то
37
+ Наверняка
38
+ бесспорно
39
+ в-седьмых
40
+ в-третьих
41
+ во-вторых
42
+ во-первых
43
+ вообще-то
44
+ говорится
45
+ наверняка
46
+ В-шестых
47
+ Вероятно
48
+ Возможно
49
+ Допустим
50
+ Казалось
51
+ Наверное
52
+ Например
53
+ Напротив
54
+ Очевидно
55
+ По-моему
56
+ Случайно
57
+ вероятно
58
+ возможно
59
+ допустим
60
+ известно
61
+ казалось
62
+ наверное
63
+ наоборот
64
+ например
65
+ напротив
66
+ нооборот
67
+ очевидно
68
+ по-моему
69
+ случайно
70
+ В-пятых
71
+ Впрочем
72
+ Главное
73
+ Говорят
74
+ Дескать
75
+ КОНЕЧНО
76
+ Кажется
77
+ Конечно
78
+ Наверно
79
+ Наконец
80
+ Пожалуй
81
+ впрочем
82
+ главное
83
+ говорят
84
+ дескать
85
+ кажется
86
+ конечно
87
+ наверно
88
+ наконец
89
+ пожалуй
90
+ помойму
91
+ понятно
92
+ почитай
93
+ собссно
94
+ спасибо
95
+ Бывало
96
+ Вернее
97
+ Видать
98
+ Видимо
99
+ Вообще
100
+ Значит
101
+ Короче
102
+ Кстати
103
+ Однако
104
+ Похоже
105
+ Правда
106
+ Скажем
107
+ Словом
108
+ Точнее
109
+ ХОРОШО
110
+ Хорошо
111
+ Честно
112
+ бывало
113
+ ваапче
114
+ вернее
115
+ видать
116
+ видимо
117
+ вообще
118
+ впрямь
119
+ значит
120
+ канешн
121
+ короче
122
+ кстати
123
+ однако
124
+ первых
125
+ похоже
126
+ правда
127
+ правдо
128
+ просто
129
+ скажем
130
+ скорее
131
+ словом
132
+ точнее
133
+ хорошо
134
+ Верно
135
+ Видно
136
+ Жалко
137
+ Лучше
138
+ Может
139
+ Никак
140
+ Точно
141
+ верно
142
+ видно
143
+ вобще
144
+ жалко
145
+ знать
146
+ канеш
147
+ лучше
148
+ может
149
+ никак
150
+ право
151
+ точно
152
+ Жаль
153
+ ИМХО
154
+ Итак
155
+ ваще
156
+ жаль
157
+ имхо
158
+ МОЛ
159
+ Мол
160
+ мол
161
+ чай
checkpoint-1131/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</cand>": 119548,
3
+ "<cand>": 119547
4
+ }
checkpoint-1131/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "DeepPavlov/rubert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.49.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 119549
32
+ }
checkpoint-1131/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec9b4e02309731a17a0e550199dfc0b653b8b2efb72a87cbe399f458df4d060a
3
+ size 711449600
checkpoint-1131/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2cb20821891b7df7b9cffbd730ee591d833df2c136e22191a72e76023bd1592
3
+ size 1423014650
checkpoint-1131/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e6a15a956ca04b9fd80efa1f51e14a191a110a65c311c93eab6f494f62ceade
3
+ size 13990
checkpoint-1131/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62d3f58ffe473a793ea74276e88ca54ec1783dc62272aa59b827b2e2d708ac48
3
+ size 1064
checkpoint-1131/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<cand>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</cand>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "cls_token": "[CLS]",
19
+ "mask_token": "[MASK]",
20
+ "pad_token": "[PAD]",
21
+ "sep_token": "[SEP]",
22
+ "unk_token": "[UNK]"
23
+ }
checkpoint-1131/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1131/tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "119547": {
44
+ "content": "<cand>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "119548": {
52
+ "content": "</cand>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "<cand>",
62
+ "</cand>"
63
+ ],
64
+ "clean_up_tokenization_spaces": true,
65
+ "cls_token": "[CLS]",
66
+ "do_basic_tokenize": true,
67
+ "do_lower_case": false,
68
+ "extra_special_tokens": {},
69
+ "mask_token": "[MASK]",
70
+ "model_max_length": 1000000000000000019884624838656,
71
+ "never_split": null,
72
+ "pad_token": "[PAD]",
73
+ "sep_token": "[SEP]",
74
+ "strip_accents": null,
75
+ "tokenize_chinese_chars": true,
76
+ "tokenizer_class": "BertTokenizer",
77
+ "unk_token": "[UNK]"
78
+ }
checkpoint-1131/trainer_state.json ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9873248832555037,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1131,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13262599469496023,
13
+ "grad_norm": 8.819928169250488,
14
+ "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6464,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.26525198938992045,
20
+ "grad_norm": 6.598285675048828,
21
+ "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.388,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.3978779840848806,
27
+ "grad_norm": 0.30871227383613586,
28
+ "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1931,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.5305039787798409,
34
+ "grad_norm": 6.666228294372559,
35
+ "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1591,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.6631299734748011,
41
+ "grad_norm": 0.44178861379623413,
42
+ "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1984,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.7957559681697612,
48
+ "grad_norm": 0.37462666630744934,
49
+ "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1124,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.0416572205722332,
56
+ "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.0809,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.9812206572769953,
63
+ "eval_f1": 0.9865410497981157,
64
+ "eval_loss": 0.0956883653998375,
65
+ "eval_precision": 0.9932249322493225,
66
+ "eval_recall": 0.9799465240641712,
67
+ "eval_runtime": 60.5192,
68
+ "eval_samples_per_second": 17.598,
69
+ "eval_steps_per_second": 1.107,
70
+ "step": 377
71
+ },
72
+ {
73
+ "epoch": 1.0610079575596818,
74
+ "grad_norm": 8.403841018676758,
75
+ "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0611,
77
+ "step": 400
78
+ },
79
+ {
80
+ "epoch": 1.193633952254642,
81
+ "grad_norm": 0.022825542837381363,
82
+ "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0758,
84
+ "step": 450
85
+ },
86
+ {
87
+ "epoch": 1.3262599469496021,
88
+ "grad_norm": 97.80863952636719,
89
+ "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0747,
91
+ "step": 500
92
+ },
93
+ {
94
+ "epoch": 1.4588859416445623,
95
+ "grad_norm": 0.03205716982483864,
96
+ "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0719,
98
+ "step": 550
99
+ },
100
+ {
101
+ "epoch": 1.5915119363395225,
102
+ "grad_norm": 13.893011093139648,
103
+ "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1053,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.03504275158047676,
110
+ "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0494,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.11265891045331955,
117
+ "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0142,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.06097806990146637,
124
+ "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.048,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 2.0,
130
+ "eval_accuracy": 0.9784037558685446,
131
+ "eval_f1": 0.984778292521509,
132
+ "eval_loss": 0.17541147768497467,
133
+ "eval_precision": 0.9750982961992136,
134
+ "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 65.725,
136
+ "eval_samples_per_second": 16.204,
137
+ "eval_steps_per_second": 1.019,
138
+ "step": 754
139
+ },
140
+ {
141
+ "epoch": 2.1220159151193636,
142
+ "grad_norm": 0.010624129325151443,
143
+ "learning_rate": 1.0434782608695653e-05,
144
+ "loss": 0.0328,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 2.2546419098143238,
149
+ "grad_norm": 0.009882211685180664,
150
+ "learning_rate": 9.697862932940311e-06,
151
+ "loss": 0.0254,
152
+ "step": 850
153
+ },
154
+ {
155
+ "epoch": 2.387267904509284,
156
+ "grad_norm": 0.006466939579695463,
157
+ "learning_rate": 8.960943257184968e-06,
158
+ "loss": 0.0412,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 2.519893899204244,
163
+ "grad_norm": 0.025009147822856903,
164
+ "learning_rate": 8.224023581429625e-06,
165
+ "loss": 0.0377,
166
+ "step": 950
167
+ },
168
+ {
169
+ "epoch": 2.6525198938992043,
170
+ "grad_norm": 16.0838565826416,
171
+ "learning_rate": 7.487103905674282e-06,
172
+ "loss": 0.0263,
173
+ "step": 1000
174
+ },
175
+ {
176
+ "epoch": 2.7851458885941645,
177
+ "grad_norm": 0.006907904986292124,
178
+ "learning_rate": 6.750184229918939e-06,
179
+ "loss": 0.0039,
180
+ "step": 1050
181
+ },
182
+ {
183
+ "epoch": 2.9177718832891246,
184
+ "grad_norm": 0.03146808221936226,
185
+ "learning_rate": 6.013264554163597e-06,
186
+ "loss": 0.0266,
187
+ "step": 1100
188
+ },
189
+ {
190
+ "epoch": 3.0,
191
+ "eval_accuracy": 0.9821596244131455,
192
+ "eval_f1": 0.9873248832555037,
193
+ "eval_loss": 0.12112097442150116,
194
+ "eval_precision": 0.9853528628495339,
195
+ "eval_recall": 0.9893048128342246,
196
+ "eval_runtime": 65.4812,
197
+ "eval_samples_per_second": 16.264,
198
+ "eval_steps_per_second": 1.023,
199
+ "step": 1131
200
+ }
201
+ ],
202
+ "logging_steps": 50,
203
+ "max_steps": 1508,
204
+ "num_input_tokens_seen": 0,
205
+ "num_train_epochs": 4,
206
+ "save_steps": 500,
207
+ "stateful_callbacks": {
208
+ "TrainerControl": {
209
+ "args": {
210
+ "should_epoch_stop": false,
211
+ "should_evaluate": false,
212
+ "should_log": false,
213
+ "should_save": true,
214
+ "should_training_stop": false
215
+ },
216
+ "attributes": {}
217
+ }
218
+ },
219
+ "total_flos": 639096753469440.0,
220
+ "train_batch_size": 16,
221
+ "trial_name": null,
222
+ "trial_params": null
223
+ }
checkpoint-1131/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
+ size 5304
checkpoint-1131/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1508/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</cand>": 119548,
3
+ "<cand>": 119547
4
+ }
checkpoint-1508/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "DeepPavlov/rubert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.49.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 119549
32
+ }
checkpoint-1508/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b30d84210b336753e94b844397015ae6635e4a978e6b132eaca6da156c50aead
3
+ size 711449600
checkpoint-1508/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9b811be240a41d4a804950f308a647956d67f40ae2709923fe949706ade9b7b
3
+ size 1423014650
checkpoint-1508/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19c5c01782d6444d26abf0ebe821fd3fa952a5be7b5f26ec6e1147e4c8612b4e
3
+ size 13990
checkpoint-1508/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a047f6a531e52dd15f099073182b2a42f5f21f7c304a459f1c93a142e9a0af0
3
+ size 1064
checkpoint-1508/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<cand>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</cand>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "cls_token": "[CLS]",
19
+ "mask_token": "[MASK]",
20
+ "pad_token": "[PAD]",
21
+ "sep_token": "[SEP]",
22
+ "unk_token": "[UNK]"
23
+ }
checkpoint-1508/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-1508/tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "119547": {
44
+ "content": "<cand>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "119548": {
52
+ "content": "</cand>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "<cand>",
62
+ "</cand>"
63
+ ],
64
+ "clean_up_tokenization_spaces": true,
65
+ "cls_token": "[CLS]",
66
+ "do_basic_tokenize": true,
67
+ "do_lower_case": false,
68
+ "extra_special_tokens": {},
69
+ "mask_token": "[MASK]",
70
+ "model_max_length": 1000000000000000019884624838656,
71
+ "never_split": null,
72
+ "pad_token": "[PAD]",
73
+ "sep_token": "[SEP]",
74
+ "strip_accents": null,
75
+ "tokenize_chinese_chars": true,
76
+ "tokenizer_class": "BertTokenizer",
77
+ "unk_token": "[UNK]"
78
+ }
checkpoint-1508/trainer_state.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9899665551839465,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1508",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1508,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13262599469496023,
13
+ "grad_norm": 8.819928169250488,
14
+ "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6464,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.26525198938992045,
20
+ "grad_norm": 6.598285675048828,
21
+ "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.388,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.3978779840848806,
27
+ "grad_norm": 0.30871227383613586,
28
+ "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1931,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.5305039787798409,
34
+ "grad_norm": 6.666228294372559,
35
+ "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1591,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.6631299734748011,
41
+ "grad_norm": 0.44178861379623413,
42
+ "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1984,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.7957559681697612,
48
+ "grad_norm": 0.37462666630744934,
49
+ "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1124,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.0416572205722332,
56
+ "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.0809,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.9812206572769953,
63
+ "eval_f1": 0.9865410497981157,
64
+ "eval_loss": 0.0956883653998375,
65
+ "eval_precision": 0.9932249322493225,
66
+ "eval_recall": 0.9799465240641712,
67
+ "eval_runtime": 60.5192,
68
+ "eval_samples_per_second": 17.598,
69
+ "eval_steps_per_second": 1.107,
70
+ "step": 377
71
+ },
72
+ {
73
+ "epoch": 1.0610079575596818,
74
+ "grad_norm": 8.403841018676758,
75
+ "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0611,
77
+ "step": 400
78
+ },
79
+ {
80
+ "epoch": 1.193633952254642,
81
+ "grad_norm": 0.022825542837381363,
82
+ "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0758,
84
+ "step": 450
85
+ },
86
+ {
87
+ "epoch": 1.3262599469496021,
88
+ "grad_norm": 97.80863952636719,
89
+ "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0747,
91
+ "step": 500
92
+ },
93
+ {
94
+ "epoch": 1.4588859416445623,
95
+ "grad_norm": 0.03205716982483864,
96
+ "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0719,
98
+ "step": 550
99
+ },
100
+ {
101
+ "epoch": 1.5915119363395225,
102
+ "grad_norm": 13.893011093139648,
103
+ "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1053,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.03504275158047676,
110
+ "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0494,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.11265891045331955,
117
+ "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0142,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.06097806990146637,
124
+ "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.048,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 2.0,
130
+ "eval_accuracy": 0.9784037558685446,
131
+ "eval_f1": 0.984778292521509,
132
+ "eval_loss": 0.17541147768497467,
133
+ "eval_precision": 0.9750982961992136,
134
+ "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 65.725,
136
+ "eval_samples_per_second": 16.204,
137
+ "eval_steps_per_second": 1.019,
138
+ "step": 754
139
+ },
140
+ {
141
+ "epoch": 2.1220159151193636,
142
+ "grad_norm": 0.010624129325151443,
143
+ "learning_rate": 1.0434782608695653e-05,
144
+ "loss": 0.0328,
145
+ "step": 800
146
+ },
147
+ {
148
+ "epoch": 2.2546419098143238,
149
+ "grad_norm": 0.009882211685180664,
150
+ "learning_rate": 9.697862932940311e-06,
151
+ "loss": 0.0254,
152
+ "step": 850
153
+ },
154
+ {
155
+ "epoch": 2.387267904509284,
156
+ "grad_norm": 0.006466939579695463,
157
+ "learning_rate": 8.960943257184968e-06,
158
+ "loss": 0.0412,
159
+ "step": 900
160
+ },
161
+ {
162
+ "epoch": 2.519893899204244,
163
+ "grad_norm": 0.025009147822856903,
164
+ "learning_rate": 8.224023581429625e-06,
165
+ "loss": 0.0377,
166
+ "step": 950
167
+ },
168
+ {
169
+ "epoch": 2.6525198938992043,
170
+ "grad_norm": 16.0838565826416,
171
+ "learning_rate": 7.487103905674282e-06,
172
+ "loss": 0.0263,
173
+ "step": 1000
174
+ },
175
+ {
176
+ "epoch": 2.7851458885941645,
177
+ "grad_norm": 0.006907904986292124,
178
+ "learning_rate": 6.750184229918939e-06,
179
+ "loss": 0.0039,
180
+ "step": 1050
181
+ },
182
+ {
183
+ "epoch": 2.9177718832891246,
184
+ "grad_norm": 0.03146808221936226,
185
+ "learning_rate": 6.013264554163597e-06,
186
+ "loss": 0.0266,
187
+ "step": 1100
188
+ },
189
+ {
190
+ "epoch": 3.0,
191
+ "eval_accuracy": 0.9821596244131455,
192
+ "eval_f1": 0.9873248832555037,
193
+ "eval_loss": 0.12112097442150116,
194
+ "eval_precision": 0.9853528628495339,
195
+ "eval_recall": 0.9893048128342246,
196
+ "eval_runtime": 65.4812,
197
+ "eval_samples_per_second": 16.264,
198
+ "eval_steps_per_second": 1.023,
199
+ "step": 1131
200
+ },
201
+ {
202
+ "epoch": 3.050397877984085,
203
+ "grad_norm": 0.00711169233545661,
204
+ "learning_rate": 5.276344878408254e-06,
205
+ "loss": 0.0191,
206
+ "step": 1150
207
+ },
208
+ {
209
+ "epoch": 3.183023872679045,
210
+ "grad_norm": 0.10712441056966782,
211
+ "learning_rate": 4.5394252026529115e-06,
212
+ "loss": 0.0079,
213
+ "step": 1200
214
+ },
215
+ {
216
+ "epoch": 3.315649867374005,
217
+ "grad_norm": 0.014097067527472973,
218
+ "learning_rate": 3.8025055268975686e-06,
219
+ "loss": 0.0218,
220
+ "step": 1250
221
+ },
222
+ {
223
+ "epoch": 3.4482758620689653,
224
+ "grad_norm": 0.08094095438718796,
225
+ "learning_rate": 3.065585851142226e-06,
226
+ "loss": 0.0053,
227
+ "step": 1300
228
+ },
229
+ {
230
+ "epoch": 3.5809018567639255,
231
+ "grad_norm": 0.012457519769668579,
232
+ "learning_rate": 2.328666175386883e-06,
233
+ "loss": 0.0003,
234
+ "step": 1350
235
+ },
236
+ {
237
+ "epoch": 3.713527851458886,
238
+ "grad_norm": 0.05693735554814339,
239
+ "learning_rate": 1.59174649963154e-06,
240
+ "loss": 0.0003,
241
+ "step": 1400
242
+ },
243
+ {
244
+ "epoch": 3.8461538461538463,
245
+ "grad_norm": 0.004445453640073538,
246
+ "learning_rate": 8.548268238761975e-07,
247
+ "loss": 0.0246,
248
+ "step": 1450
249
+ },
250
+ {
251
+ "epoch": 3.9787798408488064,
252
+ "grad_norm": 0.004754351451992989,
253
+ "learning_rate": 1.1790714812085484e-07,
254
+ "loss": 0.0111,
255
+ "step": 1500
256
+ },
257
+ {
258
+ "epoch": 4.0,
259
+ "eval_accuracy": 0.9859154929577465,
260
+ "eval_f1": 0.9899665551839465,
261
+ "eval_loss": 0.10738077014684677,
262
+ "eval_precision": 0.9906291834002677,
263
+ "eval_recall": 0.9893048128342246,
264
+ "eval_runtime": 65.4731,
265
+ "eval_samples_per_second": 16.266,
266
+ "eval_steps_per_second": 1.023,
267
+ "step": 1508
268
+ }
269
+ ],
270
+ "logging_steps": 50,
271
+ "max_steps": 1508,
272
+ "num_input_tokens_seen": 0,
273
+ "num_train_epochs": 4,
274
+ "save_steps": 500,
275
+ "stateful_callbacks": {
276
+ "TrainerControl": {
277
+ "args": {
278
+ "should_epoch_stop": false,
279
+ "should_evaluate": false,
280
+ "should_log": false,
281
+ "should_save": true,
282
+ "should_training_stop": true
283
+ },
284
+ "attributes": {}
285
+ }
286
+ },
287
+ "total_flos": 850572264215040.0,
288
+ "train_batch_size": 16,
289
+ "trial_name": null,
290
+ "trial_params": null
291
+ }
checkpoint-1508/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
+ size 5304
checkpoint-1508/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-377/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</cand>": 119548,
3
+ "<cand>": 119547
4
+ }
checkpoint-377/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "DeepPavlov/rubert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.49.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 119549
32
+ }
checkpoint-377/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ded7c527bf4f9cf448e7a1f8c244f442ee35e8ddf0b77ce3ce54bb9f8e4ce263
3
+ size 711449600
checkpoint-377/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155a5a5f11c545764eead711ae7536af829e153aa81aca1630679af82398d252
3
+ size 1423014650
checkpoint-377/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:779680481c3672208d95f7d276d71d8000a74b3b459885f98af7ca5ec5fc3b24
3
+ size 13990
checkpoint-377/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7699596f69ddac9b184d0c8e7b8faac5edb9ce845a40964370e105bb5de53f2
3
+ size 1064
checkpoint-377/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<cand>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</cand>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "cls_token": "[CLS]",
19
+ "mask_token": "[MASK]",
20
+ "pad_token": "[PAD]",
21
+ "sep_token": "[SEP]",
22
+ "unk_token": "[UNK]"
23
+ }
checkpoint-377/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-377/tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "119547": {
44
+ "content": "<cand>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "119548": {
52
+ "content": "</cand>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "<cand>",
62
+ "</cand>"
63
+ ],
64
+ "clean_up_tokenization_spaces": true,
65
+ "cls_token": "[CLS]",
66
+ "do_basic_tokenize": true,
67
+ "do_lower_case": false,
68
+ "extra_special_tokens": {},
69
+ "mask_token": "[MASK]",
70
+ "model_max_length": 1000000000000000019884624838656,
71
+ "never_split": null,
72
+ "pad_token": "[PAD]",
73
+ "sep_token": "[SEP]",
74
+ "strip_accents": null,
75
+ "tokenize_chinese_chars": true,
76
+ "tokenizer_class": "BertTokenizer",
77
+ "unk_token": "[UNK]"
78
+ }
checkpoint-377/trainer_state.json ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9865410497981157,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 377,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13262599469496023,
13
+ "grad_norm": 8.819928169250488,
14
+ "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6464,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.26525198938992045,
20
+ "grad_norm": 6.598285675048828,
21
+ "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.388,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.3978779840848806,
27
+ "grad_norm": 0.30871227383613586,
28
+ "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1931,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.5305039787798409,
34
+ "grad_norm": 6.666228294372559,
35
+ "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1591,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.6631299734748011,
41
+ "grad_norm": 0.44178861379623413,
42
+ "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1984,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.7957559681697612,
48
+ "grad_norm": 0.37462666630744934,
49
+ "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1124,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.0416572205722332,
56
+ "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.0809,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.9812206572769953,
63
+ "eval_f1": 0.9865410497981157,
64
+ "eval_loss": 0.0956883653998375,
65
+ "eval_precision": 0.9932249322493225,
66
+ "eval_recall": 0.9799465240641712,
67
+ "eval_runtime": 60.5192,
68
+ "eval_samples_per_second": 17.598,
69
+ "eval_steps_per_second": 1.107,
70
+ "step": 377
71
+ }
72
+ ],
73
+ "logging_steps": 50,
74
+ "max_steps": 1508,
75
+ "num_input_tokens_seen": 0,
76
+ "num_train_epochs": 4,
77
+ "save_steps": 500,
78
+ "stateful_callbacks": {
79
+ "TrainerControl": {
80
+ "args": {
81
+ "should_epoch_stop": false,
82
+ "should_evaluate": false,
83
+ "should_log": false,
84
+ "should_save": true,
85
+ "should_training_stop": false
86
+ },
87
+ "attributes": {}
88
+ }
89
+ },
90
+ "total_flos": 213580399188480.0,
91
+ "train_batch_size": 16,
92
+ "trial_name": null,
93
+ "trial_params": null
94
+ }
checkpoint-377/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
+ size 5304
checkpoint-377/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-754/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</cand>": 119548,
3
+ "<cand>": 119547
4
+ }
checkpoint-754/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "DeepPavlov/rubert-base-cased",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "directionality": "bidi",
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
+ "pooler_fc_size": 768,
22
+ "pooler_num_attention_heads": 12,
23
+ "pooler_num_fc_layers": 3,
24
+ "pooler_size_per_head": 128,
25
+ "pooler_type": "first_token_transform",
26
+ "position_embedding_type": "absolute",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.49.0",
29
+ "type_vocab_size": 2,
30
+ "use_cache": true,
31
+ "vocab_size": 119549
32
+ }
checkpoint-754/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abe96bf17c5ab694697666cdeed273085c6e509493dc0d2f29322ae07db9ad68
3
+ size 711449600
checkpoint-754/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44199bd2dbf22a5f947f048391a17d003c7e0d73ef60c43dffd44b21ea64cde3
3
+ size 1423014650
checkpoint-754/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2a22678f170ec99a58395a30e6a3f31da12a92e843748aa42dcbf2fae10eeff
3
+ size 13990
checkpoint-754/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8f67f5a885f6273acf3a74afc30c00afa53b55a5312a9a33f7b64a37dbd79ca
3
+ size 1064
checkpoint-754/special_tokens_map.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<cand>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</cand>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "cls_token": "[CLS]",
19
+ "mask_token": "[MASK]",
20
+ "pad_token": "[PAD]",
21
+ "sep_token": "[SEP]",
22
+ "unk_token": "[UNK]"
23
+ }
checkpoint-754/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-754/tokenizer_config.json ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "119547": {
44
+ "content": "<cand>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "119548": {
52
+ "content": "</cand>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ }
59
+ },
60
+ "additional_special_tokens": [
61
+ "<cand>",
62
+ "</cand>"
63
+ ],
64
+ "clean_up_tokenization_spaces": true,
65
+ "cls_token": "[CLS]",
66
+ "do_basic_tokenize": true,
67
+ "do_lower_case": false,
68
+ "extra_special_tokens": {},
69
+ "mask_token": "[MASK]",
70
+ "model_max_length": 1000000000000000019884624838656,
71
+ "never_split": null,
72
+ "pad_token": "[PAD]",
73
+ "sep_token": "[SEP]",
74
+ "strip_accents": null,
75
+ "tokenize_chinese_chars": true,
76
+ "tokenizer_class": "BertTokenizer",
77
+ "unk_token": "[UNK]"
78
+ }
checkpoint-754/trainer_state.json ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9865410497981157,
3
+ "best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 754,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.13262599469496023,
13
+ "grad_norm": 8.819928169250488,
14
+ "learning_rate": 6.622516556291392e-06,
15
+ "loss": 0.6464,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.26525198938992045,
20
+ "grad_norm": 6.598285675048828,
21
+ "learning_rate": 1.3245033112582784e-05,
22
+ "loss": 0.388,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.3978779840848806,
27
+ "grad_norm": 0.30871227383613586,
28
+ "learning_rate": 1.9867549668874173e-05,
29
+ "loss": 0.1931,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.5305039787798409,
34
+ "grad_norm": 6.666228294372559,
35
+ "learning_rate": 1.9277818717759768e-05,
36
+ "loss": 0.1591,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.6631299734748011,
41
+ "grad_norm": 0.44178861379623413,
42
+ "learning_rate": 1.8540899042004423e-05,
43
+ "loss": 0.1984,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.7957559681697612,
48
+ "grad_norm": 0.37462666630744934,
49
+ "learning_rate": 1.780397936624908e-05,
50
+ "loss": 0.1124,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.9283819628647215,
55
+ "grad_norm": 0.0416572205722332,
56
+ "learning_rate": 1.7067059690493736e-05,
57
+ "loss": 0.0809,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 1.0,
62
+ "eval_accuracy": 0.9812206572769953,
63
+ "eval_f1": 0.9865410497981157,
64
+ "eval_loss": 0.0956883653998375,
65
+ "eval_precision": 0.9932249322493225,
66
+ "eval_recall": 0.9799465240641712,
67
+ "eval_runtime": 60.5192,
68
+ "eval_samples_per_second": 17.598,
69
+ "eval_steps_per_second": 1.107,
70
+ "step": 377
71
+ },
72
+ {
73
+ "epoch": 1.0610079575596818,
74
+ "grad_norm": 8.403841018676758,
75
+ "learning_rate": 1.6330140014738394e-05,
76
+ "loss": 0.0611,
77
+ "step": 400
78
+ },
79
+ {
80
+ "epoch": 1.193633952254642,
81
+ "grad_norm": 0.022825542837381363,
82
+ "learning_rate": 1.5593220338983053e-05,
83
+ "loss": 0.0758,
84
+ "step": 450
85
+ },
86
+ {
87
+ "epoch": 1.3262599469496021,
88
+ "grad_norm": 97.80863952636719,
89
+ "learning_rate": 1.485630066322771e-05,
90
+ "loss": 0.0747,
91
+ "step": 500
92
+ },
93
+ {
94
+ "epoch": 1.4588859416445623,
95
+ "grad_norm": 0.03205716982483864,
96
+ "learning_rate": 1.4119380987472366e-05,
97
+ "loss": 0.0719,
98
+ "step": 550
99
+ },
100
+ {
101
+ "epoch": 1.5915119363395225,
102
+ "grad_norm": 13.893011093139648,
103
+ "learning_rate": 1.3382461311717023e-05,
104
+ "loss": 0.1053,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 1.7241379310344827,
109
+ "grad_norm": 0.03504275158047676,
110
+ "learning_rate": 1.2645541635961683e-05,
111
+ "loss": 0.0494,
112
+ "step": 650
113
+ },
114
+ {
115
+ "epoch": 1.8567639257294428,
116
+ "grad_norm": 0.11265891045331955,
117
+ "learning_rate": 1.190862196020634e-05,
118
+ "loss": 0.0142,
119
+ "step": 700
120
+ },
121
+ {
122
+ "epoch": 1.9893899204244032,
123
+ "grad_norm": 0.06097806990146637,
124
+ "learning_rate": 1.1171702284450996e-05,
125
+ "loss": 0.048,
126
+ "step": 750
127
+ },
128
+ {
129
+ "epoch": 2.0,
130
+ "eval_accuracy": 0.9784037558685446,
131
+ "eval_f1": 0.984778292521509,
132
+ "eval_loss": 0.17541147768497467,
133
+ "eval_precision": 0.9750982961992136,
134
+ "eval_recall": 0.9946524064171123,
135
+ "eval_runtime": 65.725,
136
+ "eval_samples_per_second": 16.204,
137
+ "eval_steps_per_second": 1.019,
138
+ "step": 754
139
+ }
140
+ ],
141
+ "logging_steps": 50,
142
+ "max_steps": 1508,
143
+ "num_input_tokens_seen": 0,
144
+ "num_train_epochs": 4,
145
+ "save_steps": 500,
146
+ "stateful_callbacks": {
147
+ "TrainerControl": {
148
+ "args": {
149
+ "should_epoch_stop": false,
150
+ "should_evaluate": false,
151
+ "should_log": false,
152
+ "should_save": true,
153
+ "should_training_stop": false
154
+ },
155
+ "attributes": {}
156
+ }
157
+ },
158
+ "total_flos": 427095020613120.0,
159
+ "train_batch_size": 16,
160
+ "trial_name": null,
161
+ "trial_params": null
162
+ }
checkpoint-754/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
3
+ size 5304