Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- added_tokens.json +4 -0
- assets/gazetteer.json +168 -0
- assets/gazetteer.txt +161 -0
- checkpoint-1131/added_tokens.json +4 -0
- checkpoint-1131/config.json +32 -0
- checkpoint-1131/model.safetensors +3 -0
- checkpoint-1131/optimizer.pt +3 -0
- checkpoint-1131/rng_state.pth +3 -0
- checkpoint-1131/scheduler.pt +3 -0
- checkpoint-1131/special_tokens_map.json +23 -0
- checkpoint-1131/tokenizer.json +0 -0
- checkpoint-1131/tokenizer_config.json +78 -0
- checkpoint-1131/trainer_state.json +223 -0
- checkpoint-1131/training_args.bin +3 -0
- checkpoint-1131/vocab.txt +0 -0
- checkpoint-1508/added_tokens.json +4 -0
- checkpoint-1508/config.json +32 -0
- checkpoint-1508/model.safetensors +3 -0
- checkpoint-1508/optimizer.pt +3 -0
- checkpoint-1508/rng_state.pth +3 -0
- checkpoint-1508/scheduler.pt +3 -0
- checkpoint-1508/special_tokens_map.json +23 -0
- checkpoint-1508/tokenizer.json +0 -0
- checkpoint-1508/tokenizer_config.json +78 -0
- checkpoint-1508/trainer_state.json +291 -0
- checkpoint-1508/training_args.bin +3 -0
- checkpoint-1508/vocab.txt +0 -0
- checkpoint-377/added_tokens.json +4 -0
- checkpoint-377/config.json +32 -0
- checkpoint-377/model.safetensors +3 -0
- checkpoint-377/optimizer.pt +3 -0
- checkpoint-377/rng_state.pth +3 -0
- checkpoint-377/scheduler.pt +3 -0
- checkpoint-377/special_tokens_map.json +23 -0
- checkpoint-377/tokenizer.json +0 -0
- checkpoint-377/tokenizer_config.json +78 -0
- checkpoint-377/trainer_state.json +94 -0
- checkpoint-377/training_args.bin +3 -0
- checkpoint-377/vocab.txt +0 -0
- checkpoint-754/added_tokens.json +4 -0
- checkpoint-754/config.json +32 -0
- checkpoint-754/model.safetensors +3 -0
- checkpoint-754/optimizer.pt +3 -0
- checkpoint-754/rng_state.pth +3 -0
- checkpoint-754/scheduler.pt +3 -0
- checkpoint-754/special_tokens_map.json +23 -0
- checkpoint-754/tokenizer.json +0 -0
- checkpoint-754/tokenizer_config.json +78 -0
- checkpoint-754/trainer_state.json +162 -0
- checkpoint-754/training_args.bin +3 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</cand>": 119548,
|
| 3 |
+
"<cand>": 119547
|
| 4 |
+
}
|
assets/gazetteer.json
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "1.0",
|
| 3 |
+
"source": "ruscorpora_вводныеСлова-DiMaDataset.xlsx",
|
| 4 |
+
"size": 161,
|
| 5 |
+
"items": [
|
| 6 |
+
"Соответственно",
|
| 7 |
+
"соответственно",
|
| 8 |
+
"Действительно",
|
| 9 |
+
"действительно",
|
| 10 |
+
"естесственно",
|
| 11 |
+
"естестевенно",
|
| 12 |
+
"В-четвертых",
|
| 13 |
+
"Естественно",
|
| 14 |
+
"Определенно",
|
| 15 |
+
"По-видимому",
|
| 16 |
+
"безусловнее",
|
| 17 |
+
"в-последних",
|
| 18 |
+
"в-четвертых",
|
| 19 |
+
"естественно",
|
| 20 |
+
"оказывается",
|
| 21 |
+
"по-видимому",
|
| 22 |
+
"Безусловно",
|
| 23 |
+
"З-з-значит",
|
| 24 |
+
"Наконец-то",
|
| 25 |
+
"Несомненно",
|
| 26 |
+
"Разумеется",
|
| 27 |
+
"Собственно",
|
| 28 |
+
"Фактически",
|
| 29 |
+
"безусловно",
|
| 30 |
+
"желательно",
|
| 31 |
+
"наконец-то",
|
| 32 |
+
"несомненно",
|
| 33 |
+
"разумеется",
|
| 34 |
+
"собственно",
|
| 35 |
+
"фактически",
|
| 36 |
+
"Бесспорно",
|
| 37 |
+
"В-третьих",
|
| 38 |
+
"Во первых",
|
| 39 |
+
"Во-вторых",
|
| 40 |
+
"Во-первых",
|
| 41 |
+
"Вообще-то",
|
| 42 |
+
"Наверняка",
|
| 43 |
+
"бесспорно",
|
| 44 |
+
"в-седьмых",
|
| 45 |
+
"в-третьих",
|
| 46 |
+
"во-вторых",
|
| 47 |
+
"во-первых",
|
| 48 |
+
"вообще-то",
|
| 49 |
+
"говорится",
|
| 50 |
+
"наверняка",
|
| 51 |
+
"В-шестых",
|
| 52 |
+
"Вероятно",
|
| 53 |
+
"Возможно",
|
| 54 |
+
"Допустим",
|
| 55 |
+
"Казалось",
|
| 56 |
+
"Наверное",
|
| 57 |
+
"Например",
|
| 58 |
+
"Напротив",
|
| 59 |
+
"Очевидно",
|
| 60 |
+
"По-моему",
|
| 61 |
+
"Случайно",
|
| 62 |
+
"вероятно",
|
| 63 |
+
"возможно",
|
| 64 |
+
"допустим",
|
| 65 |
+
"известно",
|
| 66 |
+
"казалось",
|
| 67 |
+
"наверное",
|
| 68 |
+
"наоборот",
|
| 69 |
+
"например",
|
| 70 |
+
"напротив",
|
| 71 |
+
"нооборот",
|
| 72 |
+
"очевидно",
|
| 73 |
+
"по-моему",
|
| 74 |
+
"случайно",
|
| 75 |
+
"В-пятых",
|
| 76 |
+
"Впрочем",
|
| 77 |
+
"Главное",
|
| 78 |
+
"Говорят",
|
| 79 |
+
"Дескать",
|
| 80 |
+
"КОНЕЧНО",
|
| 81 |
+
"Кажется",
|
| 82 |
+
"Конечно",
|
| 83 |
+
"Наверно",
|
| 84 |
+
"Наконец",
|
| 85 |
+
"Пожалуй",
|
| 86 |
+
"впрочем",
|
| 87 |
+
"главное",
|
| 88 |
+
"говорят",
|
| 89 |
+
"дескать",
|
| 90 |
+
"кажется",
|
| 91 |
+
"конечно",
|
| 92 |
+
"наверно",
|
| 93 |
+
"наконец",
|
| 94 |
+
"пожалуй",
|
| 95 |
+
"помойму",
|
| 96 |
+
"понятно",
|
| 97 |
+
"почитай",
|
| 98 |
+
"собссно",
|
| 99 |
+
"спасибо",
|
| 100 |
+
"Бывало",
|
| 101 |
+
"Вернее",
|
| 102 |
+
"Видать",
|
| 103 |
+
"Видимо",
|
| 104 |
+
"Вообще",
|
| 105 |
+
"Значит",
|
| 106 |
+
"Короче",
|
| 107 |
+
"Кстати",
|
| 108 |
+
"Однако",
|
| 109 |
+
"Похоже",
|
| 110 |
+
"Правда",
|
| 111 |
+
"Скажем",
|
| 112 |
+
"Словом",
|
| 113 |
+
"Точнее",
|
| 114 |
+
"ХОРОШО",
|
| 115 |
+
"Хорошо",
|
| 116 |
+
"Честно",
|
| 117 |
+
"бывало",
|
| 118 |
+
"ваапче",
|
| 119 |
+
"вернее",
|
| 120 |
+
"видать",
|
| 121 |
+
"видимо",
|
| 122 |
+
"вообще",
|
| 123 |
+
"впрямь",
|
| 124 |
+
"значит",
|
| 125 |
+
"канешн",
|
| 126 |
+
"короче",
|
| 127 |
+
"кстати",
|
| 128 |
+
"однако",
|
| 129 |
+
"первых",
|
| 130 |
+
"похоже",
|
| 131 |
+
"правда",
|
| 132 |
+
"правдо",
|
| 133 |
+
"просто",
|
| 134 |
+
"скажем",
|
| 135 |
+
"скорее",
|
| 136 |
+
"словом",
|
| 137 |
+
"точнее",
|
| 138 |
+
"хорошо",
|
| 139 |
+
"Верно",
|
| 140 |
+
"Видно",
|
| 141 |
+
"Жалко",
|
| 142 |
+
"Лучше",
|
| 143 |
+
"Может",
|
| 144 |
+
"Никак",
|
| 145 |
+
"Точно",
|
| 146 |
+
"верно",
|
| 147 |
+
"видно",
|
| 148 |
+
"вобще",
|
| 149 |
+
"жалко",
|
| 150 |
+
"знать",
|
| 151 |
+
"канеш",
|
| 152 |
+
"лучше",
|
| 153 |
+
"может",
|
| 154 |
+
"никак",
|
| 155 |
+
"право",
|
| 156 |
+
"точно",
|
| 157 |
+
"Жаль",
|
| 158 |
+
"ИМХО",
|
| 159 |
+
"Итак",
|
| 160 |
+
"ваще",
|
| 161 |
+
"жаль",
|
| 162 |
+
"имхо",
|
| 163 |
+
"МОЛ",
|
| 164 |
+
"Мол",
|
| 165 |
+
"мол",
|
| 166 |
+
"чай"
|
| 167 |
+
]
|
| 168 |
+
}
|
assets/gazetteer.txt
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Соответственно
|
| 2 |
+
соответственно
|
| 3 |
+
Действительно
|
| 4 |
+
действительно
|
| 5 |
+
естесственно
|
| 6 |
+
естестевенно
|
| 7 |
+
В-четвертых
|
| 8 |
+
Естественно
|
| 9 |
+
Определенно
|
| 10 |
+
По-видимому
|
| 11 |
+
безусловнее
|
| 12 |
+
в-последних
|
| 13 |
+
в-четвертых
|
| 14 |
+
естественно
|
| 15 |
+
оказывается
|
| 16 |
+
по-видимому
|
| 17 |
+
Безусловно
|
| 18 |
+
З-з-значит
|
| 19 |
+
Наконец-то
|
| 20 |
+
Несомненно
|
| 21 |
+
Разумеется
|
| 22 |
+
Собственно
|
| 23 |
+
Фактически
|
| 24 |
+
безусловно
|
| 25 |
+
желательно
|
| 26 |
+
наконец-то
|
| 27 |
+
несомненно
|
| 28 |
+
разумеется
|
| 29 |
+
собственно
|
| 30 |
+
фактически
|
| 31 |
+
Бесспорно
|
| 32 |
+
В-третьих
|
| 33 |
+
Во первых
|
| 34 |
+
Во-вторых
|
| 35 |
+
Во-первых
|
| 36 |
+
Вообще-то
|
| 37 |
+
Наверняка
|
| 38 |
+
бесспорно
|
| 39 |
+
в-седьмых
|
| 40 |
+
в-третьих
|
| 41 |
+
во-вторых
|
| 42 |
+
во-первых
|
| 43 |
+
вообще-то
|
| 44 |
+
говорится
|
| 45 |
+
наверняка
|
| 46 |
+
В-шестых
|
| 47 |
+
Вероятно
|
| 48 |
+
Возможно
|
| 49 |
+
Допустим
|
| 50 |
+
Казалось
|
| 51 |
+
Наверное
|
| 52 |
+
Например
|
| 53 |
+
Напротив
|
| 54 |
+
Очевидно
|
| 55 |
+
По-моему
|
| 56 |
+
Случайно
|
| 57 |
+
вероятно
|
| 58 |
+
возможно
|
| 59 |
+
допустим
|
| 60 |
+
известно
|
| 61 |
+
казалось
|
| 62 |
+
наверное
|
| 63 |
+
наоборот
|
| 64 |
+
например
|
| 65 |
+
напротив
|
| 66 |
+
нооборот
|
| 67 |
+
очевидно
|
| 68 |
+
по-моему
|
| 69 |
+
случайно
|
| 70 |
+
В-пятых
|
| 71 |
+
Впрочем
|
| 72 |
+
Главное
|
| 73 |
+
Говорят
|
| 74 |
+
Дескать
|
| 75 |
+
КОНЕЧНО
|
| 76 |
+
Кажется
|
| 77 |
+
Конечно
|
| 78 |
+
Наверно
|
| 79 |
+
Наконец
|
| 80 |
+
Пожалуй
|
| 81 |
+
впрочем
|
| 82 |
+
главное
|
| 83 |
+
говорят
|
| 84 |
+
дескать
|
| 85 |
+
кажется
|
| 86 |
+
конечно
|
| 87 |
+
наверно
|
| 88 |
+
наконец
|
| 89 |
+
пожалуй
|
| 90 |
+
помойму
|
| 91 |
+
понятно
|
| 92 |
+
почитай
|
| 93 |
+
собссно
|
| 94 |
+
спасибо
|
| 95 |
+
Бывало
|
| 96 |
+
Вернее
|
| 97 |
+
Видать
|
| 98 |
+
Видимо
|
| 99 |
+
Вообще
|
| 100 |
+
Значит
|
| 101 |
+
Короче
|
| 102 |
+
Кстати
|
| 103 |
+
Однако
|
| 104 |
+
Похоже
|
| 105 |
+
Правда
|
| 106 |
+
Скажем
|
| 107 |
+
Словом
|
| 108 |
+
Точнее
|
| 109 |
+
ХОРОШО
|
| 110 |
+
Хорошо
|
| 111 |
+
Честно
|
| 112 |
+
бывало
|
| 113 |
+
ваапче
|
| 114 |
+
вернее
|
| 115 |
+
видать
|
| 116 |
+
видимо
|
| 117 |
+
вообще
|
| 118 |
+
впрямь
|
| 119 |
+
значит
|
| 120 |
+
канешн
|
| 121 |
+
короче
|
| 122 |
+
кстати
|
| 123 |
+
однако
|
| 124 |
+
первых
|
| 125 |
+
похоже
|
| 126 |
+
правда
|
| 127 |
+
правдо
|
| 128 |
+
просто
|
| 129 |
+
скажем
|
| 130 |
+
скорее
|
| 131 |
+
словом
|
| 132 |
+
точнее
|
| 133 |
+
хорошо
|
| 134 |
+
Верно
|
| 135 |
+
Видно
|
| 136 |
+
Жалко
|
| 137 |
+
Лучше
|
| 138 |
+
Может
|
| 139 |
+
Никак
|
| 140 |
+
Точно
|
| 141 |
+
верно
|
| 142 |
+
видно
|
| 143 |
+
вобще
|
| 144 |
+
жалко
|
| 145 |
+
знать
|
| 146 |
+
канеш
|
| 147 |
+
лучше
|
| 148 |
+
может
|
| 149 |
+
никак
|
| 150 |
+
право
|
| 151 |
+
точно
|
| 152 |
+
Жаль
|
| 153 |
+
ИМХО
|
| 154 |
+
Итак
|
| 155 |
+
ваще
|
| 156 |
+
жаль
|
| 157 |
+
имхо
|
| 158 |
+
МОЛ
|
| 159 |
+
Мол
|
| 160 |
+
мол
|
| 161 |
+
чай
|
checkpoint-1131/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</cand>": 119548,
|
| 3 |
+
"<cand>": 119547
|
| 4 |
+
}
|
checkpoint-1131/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "DeepPavlov/rubert-base-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.49.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 119549
|
| 32 |
+
}
|
checkpoint-1131/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec9b4e02309731a17a0e550199dfc0b653b8b2efb72a87cbe399f458df4d060a
|
| 3 |
+
size 711449600
|
checkpoint-1131/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2cb20821891b7df7b9cffbd730ee591d833df2c136e22191a72e76023bd1592
|
| 3 |
+
size 1423014650
|
checkpoint-1131/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e6a15a956ca04b9fd80efa1f51e14a191a110a65c311c93eab6f494f62ceade
|
| 3 |
+
size 13990
|
checkpoint-1131/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62d3f58ffe473a793ea74276e88ca54ec1783dc62272aa59b827b2e2d708ac48
|
| 3 |
+
size 1064
|
checkpoint-1131/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "<cand>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"content": "</cand>",
|
| 12 |
+
"lstrip": false,
|
| 13 |
+
"normalized": false,
|
| 14 |
+
"rstrip": false,
|
| 15 |
+
"single_word": false
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"cls_token": "[CLS]",
|
| 19 |
+
"mask_token": "[MASK]",
|
| 20 |
+
"pad_token": "[PAD]",
|
| 21 |
+
"sep_token": "[SEP]",
|
| 22 |
+
"unk_token": "[UNK]"
|
| 23 |
+
}
|
checkpoint-1131/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-1131/tokenizer_config.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"119547": {
|
| 44 |
+
"content": "<cand>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"119548": {
|
| 52 |
+
"content": "</cand>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"additional_special_tokens": [
|
| 61 |
+
"<cand>",
|
| 62 |
+
"</cand>"
|
| 63 |
+
],
|
| 64 |
+
"clean_up_tokenization_spaces": true,
|
| 65 |
+
"cls_token": "[CLS]",
|
| 66 |
+
"do_basic_tokenize": true,
|
| 67 |
+
"do_lower_case": false,
|
| 68 |
+
"extra_special_tokens": {},
|
| 69 |
+
"mask_token": "[MASK]",
|
| 70 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 71 |
+
"never_split": null,
|
| 72 |
+
"pad_token": "[PAD]",
|
| 73 |
+
"sep_token": "[SEP]",
|
| 74 |
+
"strip_accents": null,
|
| 75 |
+
"tokenize_chinese_chars": true,
|
| 76 |
+
"tokenizer_class": "BertTokenizer",
|
| 77 |
+
"unk_token": "[UNK]"
|
| 78 |
+
}
|
checkpoint-1131/trainer_state.json
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.9873248832555037,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1131",
|
| 4 |
+
"epoch": 3.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 1131,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 8.819928169250488,
|
| 14 |
+
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6464,
|
| 16 |
+
"step": 50
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 6.598285675048828,
|
| 21 |
+
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.388,
|
| 23 |
+
"step": 100
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 0.30871227383613586,
|
| 28 |
+
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1931,
|
| 30 |
+
"step": 150
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 6.666228294372559,
|
| 35 |
+
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1591,
|
| 37 |
+
"step": 200
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 0.44178861379623413,
|
| 42 |
+
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1984,
|
| 44 |
+
"step": 250
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 0.37462666630744934,
|
| 49 |
+
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1124,
|
| 51 |
+
"step": 300
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.0416572205722332,
|
| 56 |
+
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.0809,
|
| 58 |
+
"step": 350
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9812206572769953,
|
| 63 |
+
"eval_f1": 0.9865410497981157,
|
| 64 |
+
"eval_loss": 0.0956883653998375,
|
| 65 |
+
"eval_precision": 0.9932249322493225,
|
| 66 |
+
"eval_recall": 0.9799465240641712,
|
| 67 |
+
"eval_runtime": 60.5192,
|
| 68 |
+
"eval_samples_per_second": 17.598,
|
| 69 |
+
"eval_steps_per_second": 1.107,
|
| 70 |
+
"step": 377
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 8.403841018676758,
|
| 75 |
+
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0611,
|
| 77 |
+
"step": 400
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.022825542837381363,
|
| 82 |
+
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0758,
|
| 84 |
+
"step": 450
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 97.80863952636719,
|
| 89 |
+
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0747,
|
| 91 |
+
"step": 500
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 0.03205716982483864,
|
| 96 |
+
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0719,
|
| 98 |
+
"step": 550
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 13.893011093139648,
|
| 103 |
+
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1053,
|
| 105 |
+
"step": 600
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.03504275158047676,
|
| 110 |
+
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0494,
|
| 112 |
+
"step": 650
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.11265891045331955,
|
| 117 |
+
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0142,
|
| 119 |
+
"step": 700
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.06097806990146637,
|
| 124 |
+
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.048,
|
| 126 |
+
"step": 750
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9784037558685446,
|
| 131 |
+
"eval_f1": 0.984778292521509,
|
| 132 |
+
"eval_loss": 0.17541147768497467,
|
| 133 |
+
"eval_precision": 0.9750982961992136,
|
| 134 |
+
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 65.725,
|
| 136 |
+
"eval_samples_per_second": 16.204,
|
| 137 |
+
"eval_steps_per_second": 1.019,
|
| 138 |
+
"step": 754
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 2.1220159151193636,
|
| 142 |
+
"grad_norm": 0.010624129325151443,
|
| 143 |
+
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
+
"loss": 0.0328,
|
| 145 |
+
"step": 800
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 2.2546419098143238,
|
| 149 |
+
"grad_norm": 0.009882211685180664,
|
| 150 |
+
"learning_rate": 9.697862932940311e-06,
|
| 151 |
+
"loss": 0.0254,
|
| 152 |
+
"step": 850
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"epoch": 2.387267904509284,
|
| 156 |
+
"grad_norm": 0.006466939579695463,
|
| 157 |
+
"learning_rate": 8.960943257184968e-06,
|
| 158 |
+
"loss": 0.0412,
|
| 159 |
+
"step": 900
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"epoch": 2.519893899204244,
|
| 163 |
+
"grad_norm": 0.025009147822856903,
|
| 164 |
+
"learning_rate": 8.224023581429625e-06,
|
| 165 |
+
"loss": 0.0377,
|
| 166 |
+
"step": 950
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"epoch": 2.6525198938992043,
|
| 170 |
+
"grad_norm": 16.0838565826416,
|
| 171 |
+
"learning_rate": 7.487103905674282e-06,
|
| 172 |
+
"loss": 0.0263,
|
| 173 |
+
"step": 1000
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 2.7851458885941645,
|
| 177 |
+
"grad_norm": 0.006907904986292124,
|
| 178 |
+
"learning_rate": 6.750184229918939e-06,
|
| 179 |
+
"loss": 0.0039,
|
| 180 |
+
"step": 1050
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 2.9177718832891246,
|
| 184 |
+
"grad_norm": 0.03146808221936226,
|
| 185 |
+
"learning_rate": 6.013264554163597e-06,
|
| 186 |
+
"loss": 0.0266,
|
| 187 |
+
"step": 1100
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"epoch": 3.0,
|
| 191 |
+
"eval_accuracy": 0.9821596244131455,
|
| 192 |
+
"eval_f1": 0.9873248832555037,
|
| 193 |
+
"eval_loss": 0.12112097442150116,
|
| 194 |
+
"eval_precision": 0.9853528628495339,
|
| 195 |
+
"eval_recall": 0.9893048128342246,
|
| 196 |
+
"eval_runtime": 65.4812,
|
| 197 |
+
"eval_samples_per_second": 16.264,
|
| 198 |
+
"eval_steps_per_second": 1.023,
|
| 199 |
+
"step": 1131
|
| 200 |
+
}
|
| 201 |
+
],
|
| 202 |
+
"logging_steps": 50,
|
| 203 |
+
"max_steps": 1508,
|
| 204 |
+
"num_input_tokens_seen": 0,
|
| 205 |
+
"num_train_epochs": 4,
|
| 206 |
+
"save_steps": 500,
|
| 207 |
+
"stateful_callbacks": {
|
| 208 |
+
"TrainerControl": {
|
| 209 |
+
"args": {
|
| 210 |
+
"should_epoch_stop": false,
|
| 211 |
+
"should_evaluate": false,
|
| 212 |
+
"should_log": false,
|
| 213 |
+
"should_save": true,
|
| 214 |
+
"should_training_stop": false
|
| 215 |
+
},
|
| 216 |
+
"attributes": {}
|
| 217 |
+
}
|
| 218 |
+
},
|
| 219 |
+
"total_flos": 639096753469440.0,
|
| 220 |
+
"train_batch_size": 16,
|
| 221 |
+
"trial_name": null,
|
| 222 |
+
"trial_params": null
|
| 223 |
+
}
|
checkpoint-1131/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
|
| 3 |
+
size 5304
|
checkpoint-1131/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-1508/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</cand>": 119548,
|
| 3 |
+
"<cand>": 119547
|
| 4 |
+
}
|
checkpoint-1508/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "DeepPavlov/rubert-base-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.49.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 119549
|
| 32 |
+
}
|
checkpoint-1508/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b30d84210b336753e94b844397015ae6635e4a978e6b132eaca6da156c50aead
|
| 3 |
+
size 711449600
|
checkpoint-1508/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9b811be240a41d4a804950f308a647956d67f40ae2709923fe949706ade9b7b
|
| 3 |
+
size 1423014650
|
checkpoint-1508/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19c5c01782d6444d26abf0ebe821fd3fa952a5be7b5f26ec6e1147e4c8612b4e
|
| 3 |
+
size 13990
|
checkpoint-1508/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a047f6a531e52dd15f099073182b2a42f5f21f7c304a459f1c93a142e9a0af0
|
| 3 |
+
size 1064
|
checkpoint-1508/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "<cand>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"content": "</cand>",
|
| 12 |
+
"lstrip": false,
|
| 13 |
+
"normalized": false,
|
| 14 |
+
"rstrip": false,
|
| 15 |
+
"single_word": false
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"cls_token": "[CLS]",
|
| 19 |
+
"mask_token": "[MASK]",
|
| 20 |
+
"pad_token": "[PAD]",
|
| 21 |
+
"sep_token": "[SEP]",
|
| 22 |
+
"unk_token": "[UNK]"
|
| 23 |
+
}
|
checkpoint-1508/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-1508/tokenizer_config.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"119547": {
|
| 44 |
+
"content": "<cand>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"119548": {
|
| 52 |
+
"content": "</cand>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"additional_special_tokens": [
|
| 61 |
+
"<cand>",
|
| 62 |
+
"</cand>"
|
| 63 |
+
],
|
| 64 |
+
"clean_up_tokenization_spaces": true,
|
| 65 |
+
"cls_token": "[CLS]",
|
| 66 |
+
"do_basic_tokenize": true,
|
| 67 |
+
"do_lower_case": false,
|
| 68 |
+
"extra_special_tokens": {},
|
| 69 |
+
"mask_token": "[MASK]",
|
| 70 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 71 |
+
"never_split": null,
|
| 72 |
+
"pad_token": "[PAD]",
|
| 73 |
+
"sep_token": "[SEP]",
|
| 74 |
+
"strip_accents": null,
|
| 75 |
+
"tokenize_chinese_chars": true,
|
| 76 |
+
"tokenizer_class": "BertTokenizer",
|
| 77 |
+
"unk_token": "[UNK]"
|
| 78 |
+
}
|
checkpoint-1508/trainer_state.json
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.9899665551839465,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-1508",
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 1508,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 8.819928169250488,
|
| 14 |
+
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6464,
|
| 16 |
+
"step": 50
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 6.598285675048828,
|
| 21 |
+
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.388,
|
| 23 |
+
"step": 100
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 0.30871227383613586,
|
| 28 |
+
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1931,
|
| 30 |
+
"step": 150
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 6.666228294372559,
|
| 35 |
+
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1591,
|
| 37 |
+
"step": 200
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 0.44178861379623413,
|
| 42 |
+
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1984,
|
| 44 |
+
"step": 250
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 0.37462666630744934,
|
| 49 |
+
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1124,
|
| 51 |
+
"step": 300
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.0416572205722332,
|
| 56 |
+
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.0809,
|
| 58 |
+
"step": 350
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9812206572769953,
|
| 63 |
+
"eval_f1": 0.9865410497981157,
|
| 64 |
+
"eval_loss": 0.0956883653998375,
|
| 65 |
+
"eval_precision": 0.9932249322493225,
|
| 66 |
+
"eval_recall": 0.9799465240641712,
|
| 67 |
+
"eval_runtime": 60.5192,
|
| 68 |
+
"eval_samples_per_second": 17.598,
|
| 69 |
+
"eval_steps_per_second": 1.107,
|
| 70 |
+
"step": 377
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 8.403841018676758,
|
| 75 |
+
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0611,
|
| 77 |
+
"step": 400
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.022825542837381363,
|
| 82 |
+
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0758,
|
| 84 |
+
"step": 450
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 97.80863952636719,
|
| 89 |
+
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0747,
|
| 91 |
+
"step": 500
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 0.03205716982483864,
|
| 96 |
+
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0719,
|
| 98 |
+
"step": 550
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 13.893011093139648,
|
| 103 |
+
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1053,
|
| 105 |
+
"step": 600
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.03504275158047676,
|
| 110 |
+
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0494,
|
| 112 |
+
"step": 650
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.11265891045331955,
|
| 117 |
+
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0142,
|
| 119 |
+
"step": 700
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.06097806990146637,
|
| 124 |
+
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.048,
|
| 126 |
+
"step": 750
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9784037558685446,
|
| 131 |
+
"eval_f1": 0.984778292521509,
|
| 132 |
+
"eval_loss": 0.17541147768497467,
|
| 133 |
+
"eval_precision": 0.9750982961992136,
|
| 134 |
+
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 65.725,
|
| 136 |
+
"eval_samples_per_second": 16.204,
|
| 137 |
+
"eval_steps_per_second": 1.019,
|
| 138 |
+
"step": 754
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"epoch": 2.1220159151193636,
|
| 142 |
+
"grad_norm": 0.010624129325151443,
|
| 143 |
+
"learning_rate": 1.0434782608695653e-05,
|
| 144 |
+
"loss": 0.0328,
|
| 145 |
+
"step": 800
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"epoch": 2.2546419098143238,
|
| 149 |
+
"grad_norm": 0.009882211685180664,
|
| 150 |
+
"learning_rate": 9.697862932940311e-06,
|
| 151 |
+
"loss": 0.0254,
|
| 152 |
+
"step": 850
|
| 153 |
+
},
|
| 154 |
+
{
|
| 155 |
+
"epoch": 2.387267904509284,
|
| 156 |
+
"grad_norm": 0.006466939579695463,
|
| 157 |
+
"learning_rate": 8.960943257184968e-06,
|
| 158 |
+
"loss": 0.0412,
|
| 159 |
+
"step": 900
|
| 160 |
+
},
|
| 161 |
+
{
|
| 162 |
+
"epoch": 2.519893899204244,
|
| 163 |
+
"grad_norm": 0.025009147822856903,
|
| 164 |
+
"learning_rate": 8.224023581429625e-06,
|
| 165 |
+
"loss": 0.0377,
|
| 166 |
+
"step": 950
|
| 167 |
+
},
|
| 168 |
+
{
|
| 169 |
+
"epoch": 2.6525198938992043,
|
| 170 |
+
"grad_norm": 16.0838565826416,
|
| 171 |
+
"learning_rate": 7.487103905674282e-06,
|
| 172 |
+
"loss": 0.0263,
|
| 173 |
+
"step": 1000
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"epoch": 2.7851458885941645,
|
| 177 |
+
"grad_norm": 0.006907904986292124,
|
| 178 |
+
"learning_rate": 6.750184229918939e-06,
|
| 179 |
+
"loss": 0.0039,
|
| 180 |
+
"step": 1050
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"epoch": 2.9177718832891246,
|
| 184 |
+
"grad_norm": 0.03146808221936226,
|
| 185 |
+
"learning_rate": 6.013264554163597e-06,
|
| 186 |
+
"loss": 0.0266,
|
| 187 |
+
"step": 1100
|
| 188 |
+
},
|
| 189 |
+
{
|
| 190 |
+
"epoch": 3.0,
|
| 191 |
+
"eval_accuracy": 0.9821596244131455,
|
| 192 |
+
"eval_f1": 0.9873248832555037,
|
| 193 |
+
"eval_loss": 0.12112097442150116,
|
| 194 |
+
"eval_precision": 0.9853528628495339,
|
| 195 |
+
"eval_recall": 0.9893048128342246,
|
| 196 |
+
"eval_runtime": 65.4812,
|
| 197 |
+
"eval_samples_per_second": 16.264,
|
| 198 |
+
"eval_steps_per_second": 1.023,
|
| 199 |
+
"step": 1131
|
| 200 |
+
},
|
| 201 |
+
{
|
| 202 |
+
"epoch": 3.050397877984085,
|
| 203 |
+
"grad_norm": 0.00711169233545661,
|
| 204 |
+
"learning_rate": 5.276344878408254e-06,
|
| 205 |
+
"loss": 0.0191,
|
| 206 |
+
"step": 1150
|
| 207 |
+
},
|
| 208 |
+
{
|
| 209 |
+
"epoch": 3.183023872679045,
|
| 210 |
+
"grad_norm": 0.10712441056966782,
|
| 211 |
+
"learning_rate": 4.5394252026529115e-06,
|
| 212 |
+
"loss": 0.0079,
|
| 213 |
+
"step": 1200
|
| 214 |
+
},
|
| 215 |
+
{
|
| 216 |
+
"epoch": 3.315649867374005,
|
| 217 |
+
"grad_norm": 0.014097067527472973,
|
| 218 |
+
"learning_rate": 3.8025055268975686e-06,
|
| 219 |
+
"loss": 0.0218,
|
| 220 |
+
"step": 1250
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"epoch": 3.4482758620689653,
|
| 224 |
+
"grad_norm": 0.08094095438718796,
|
| 225 |
+
"learning_rate": 3.065585851142226e-06,
|
| 226 |
+
"loss": 0.0053,
|
| 227 |
+
"step": 1300
|
| 228 |
+
},
|
| 229 |
+
{
|
| 230 |
+
"epoch": 3.5809018567639255,
|
| 231 |
+
"grad_norm": 0.012457519769668579,
|
| 232 |
+
"learning_rate": 2.328666175386883e-06,
|
| 233 |
+
"loss": 0.0003,
|
| 234 |
+
"step": 1350
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"epoch": 3.713527851458886,
|
| 238 |
+
"grad_norm": 0.05693735554814339,
|
| 239 |
+
"learning_rate": 1.59174649963154e-06,
|
| 240 |
+
"loss": 0.0003,
|
| 241 |
+
"step": 1400
|
| 242 |
+
},
|
| 243 |
+
{
|
| 244 |
+
"epoch": 3.8461538461538463,
|
| 245 |
+
"grad_norm": 0.004445453640073538,
|
| 246 |
+
"learning_rate": 8.548268238761975e-07,
|
| 247 |
+
"loss": 0.0246,
|
| 248 |
+
"step": 1450
|
| 249 |
+
},
|
| 250 |
+
{
|
| 251 |
+
"epoch": 3.9787798408488064,
|
| 252 |
+
"grad_norm": 0.004754351451992989,
|
| 253 |
+
"learning_rate": 1.1790714812085484e-07,
|
| 254 |
+
"loss": 0.0111,
|
| 255 |
+
"step": 1500
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"epoch": 4.0,
|
| 259 |
+
"eval_accuracy": 0.9859154929577465,
|
| 260 |
+
"eval_f1": 0.9899665551839465,
|
| 261 |
+
"eval_loss": 0.10738077014684677,
|
| 262 |
+
"eval_precision": 0.9906291834002677,
|
| 263 |
+
"eval_recall": 0.9893048128342246,
|
| 264 |
+
"eval_runtime": 65.4731,
|
| 265 |
+
"eval_samples_per_second": 16.266,
|
| 266 |
+
"eval_steps_per_second": 1.023,
|
| 267 |
+
"step": 1508
|
| 268 |
+
}
|
| 269 |
+
],
|
| 270 |
+
"logging_steps": 50,
|
| 271 |
+
"max_steps": 1508,
|
| 272 |
+
"num_input_tokens_seen": 0,
|
| 273 |
+
"num_train_epochs": 4,
|
| 274 |
+
"save_steps": 500,
|
| 275 |
+
"stateful_callbacks": {
|
| 276 |
+
"TrainerControl": {
|
| 277 |
+
"args": {
|
| 278 |
+
"should_epoch_stop": false,
|
| 279 |
+
"should_evaluate": false,
|
| 280 |
+
"should_log": false,
|
| 281 |
+
"should_save": true,
|
| 282 |
+
"should_training_stop": true
|
| 283 |
+
},
|
| 284 |
+
"attributes": {}
|
| 285 |
+
}
|
| 286 |
+
},
|
| 287 |
+
"total_flos": 850572264215040.0,
|
| 288 |
+
"train_batch_size": 16,
|
| 289 |
+
"trial_name": null,
|
| 290 |
+
"trial_params": null
|
| 291 |
+
}
|
checkpoint-1508/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
|
| 3 |
+
size 5304
|
checkpoint-1508/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-377/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</cand>": 119548,
|
| 3 |
+
"<cand>": 119547
|
| 4 |
+
}
|
checkpoint-377/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "DeepPavlov/rubert-base-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.49.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 119549
|
| 32 |
+
}
|
checkpoint-377/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ded7c527bf4f9cf448e7a1f8c244f442ee35e8ddf0b77ce3ce54bb9f8e4ce263
|
| 3 |
+
size 711449600
|
checkpoint-377/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:155a5a5f11c545764eead711ae7536af829e153aa81aca1630679af82398d252
|
| 3 |
+
size 1423014650
|
checkpoint-377/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:779680481c3672208d95f7d276d71d8000a74b3b459885f98af7ca5ec5fc3b24
|
| 3 |
+
size 13990
|
checkpoint-377/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c7699596f69ddac9b184d0c8e7b8faac5edb9ce845a40964370e105bb5de53f2
|
| 3 |
+
size 1064
|
checkpoint-377/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "<cand>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"content": "</cand>",
|
| 12 |
+
"lstrip": false,
|
| 13 |
+
"normalized": false,
|
| 14 |
+
"rstrip": false,
|
| 15 |
+
"single_word": false
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"cls_token": "[CLS]",
|
| 19 |
+
"mask_token": "[MASK]",
|
| 20 |
+
"pad_token": "[PAD]",
|
| 21 |
+
"sep_token": "[SEP]",
|
| 22 |
+
"unk_token": "[UNK]"
|
| 23 |
+
}
|
checkpoint-377/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-377/tokenizer_config.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"119547": {
|
| 44 |
+
"content": "<cand>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"119548": {
|
| 52 |
+
"content": "</cand>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"additional_special_tokens": [
|
| 61 |
+
"<cand>",
|
| 62 |
+
"</cand>"
|
| 63 |
+
],
|
| 64 |
+
"clean_up_tokenization_spaces": true,
|
| 65 |
+
"cls_token": "[CLS]",
|
| 66 |
+
"do_basic_tokenize": true,
|
| 67 |
+
"do_lower_case": false,
|
| 68 |
+
"extra_special_tokens": {},
|
| 69 |
+
"mask_token": "[MASK]",
|
| 70 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 71 |
+
"never_split": null,
|
| 72 |
+
"pad_token": "[PAD]",
|
| 73 |
+
"sep_token": "[SEP]",
|
| 74 |
+
"strip_accents": null,
|
| 75 |
+
"tokenize_chinese_chars": true,
|
| 76 |
+
"tokenizer_class": "BertTokenizer",
|
| 77 |
+
"unk_token": "[UNK]"
|
| 78 |
+
}
|
checkpoint-377/trainer_state.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.9865410497981157,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
|
| 4 |
+
"epoch": 1.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 377,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 8.819928169250488,
|
| 14 |
+
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6464,
|
| 16 |
+
"step": 50
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 6.598285675048828,
|
| 21 |
+
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.388,
|
| 23 |
+
"step": 100
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 0.30871227383613586,
|
| 28 |
+
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1931,
|
| 30 |
+
"step": 150
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 6.666228294372559,
|
| 35 |
+
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1591,
|
| 37 |
+
"step": 200
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 0.44178861379623413,
|
| 42 |
+
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1984,
|
| 44 |
+
"step": 250
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 0.37462666630744934,
|
| 49 |
+
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1124,
|
| 51 |
+
"step": 300
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.0416572205722332,
|
| 56 |
+
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.0809,
|
| 58 |
+
"step": 350
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9812206572769953,
|
| 63 |
+
"eval_f1": 0.9865410497981157,
|
| 64 |
+
"eval_loss": 0.0956883653998375,
|
| 65 |
+
"eval_precision": 0.9932249322493225,
|
| 66 |
+
"eval_recall": 0.9799465240641712,
|
| 67 |
+
"eval_runtime": 60.5192,
|
| 68 |
+
"eval_samples_per_second": 17.598,
|
| 69 |
+
"eval_steps_per_second": 1.107,
|
| 70 |
+
"step": 377
|
| 71 |
+
}
|
| 72 |
+
],
|
| 73 |
+
"logging_steps": 50,
|
| 74 |
+
"max_steps": 1508,
|
| 75 |
+
"num_input_tokens_seen": 0,
|
| 76 |
+
"num_train_epochs": 4,
|
| 77 |
+
"save_steps": 500,
|
| 78 |
+
"stateful_callbacks": {
|
| 79 |
+
"TrainerControl": {
|
| 80 |
+
"args": {
|
| 81 |
+
"should_epoch_stop": false,
|
| 82 |
+
"should_evaluate": false,
|
| 83 |
+
"should_log": false,
|
| 84 |
+
"should_save": true,
|
| 85 |
+
"should_training_stop": false
|
| 86 |
+
},
|
| 87 |
+
"attributes": {}
|
| 88 |
+
}
|
| 89 |
+
},
|
| 90 |
+
"total_flos": 213580399188480.0,
|
| 91 |
+
"train_batch_size": 16,
|
| 92 |
+
"trial_name": null,
|
| 93 |
+
"trial_params": null
|
| 94 |
+
}
|
checkpoint-377/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
|
| 3 |
+
size 5304
|
checkpoint-377/vocab.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-754/added_tokens.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</cand>": 119548,
|
| 3 |
+
"<cand>": 119547
|
| 4 |
+
}
|
checkpoint-754/config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "DeepPavlov/rubert-base-cased",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"BertForSequenceClassification"
|
| 5 |
+
],
|
| 6 |
+
"attention_probs_dropout_prob": 0.1,
|
| 7 |
+
"classifier_dropout": null,
|
| 8 |
+
"directionality": "bidi",
|
| 9 |
+
"hidden_act": "gelu",
|
| 10 |
+
"hidden_dropout_prob": 0.1,
|
| 11 |
+
"hidden_size": 768,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3072,
|
| 14 |
+
"layer_norm_eps": 1e-12,
|
| 15 |
+
"max_position_embeddings": 512,
|
| 16 |
+
"model_type": "bert",
|
| 17 |
+
"num_attention_heads": 12,
|
| 18 |
+
"num_hidden_layers": 12,
|
| 19 |
+
"output_past": true,
|
| 20 |
+
"pad_token_id": 0,
|
| 21 |
+
"pooler_fc_size": 768,
|
| 22 |
+
"pooler_num_attention_heads": 12,
|
| 23 |
+
"pooler_num_fc_layers": 3,
|
| 24 |
+
"pooler_size_per_head": 128,
|
| 25 |
+
"pooler_type": "first_token_transform",
|
| 26 |
+
"position_embedding_type": "absolute",
|
| 27 |
+
"torch_dtype": "float32",
|
| 28 |
+
"transformers_version": "4.49.0",
|
| 29 |
+
"type_vocab_size": 2,
|
| 30 |
+
"use_cache": true,
|
| 31 |
+
"vocab_size": 119549
|
| 32 |
+
}
|
checkpoint-754/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abe96bf17c5ab694697666cdeed273085c6e509493dc0d2f29322ae07db9ad68
|
| 3 |
+
size 711449600
|
checkpoint-754/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44199bd2dbf22a5f947f048391a17d003c7e0d73ef60c43dffd44b21ea64cde3
|
| 3 |
+
size 1423014650
|
checkpoint-754/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2a22678f170ec99a58395a30e6a3f31da12a92e843748aa42dcbf2fae10eeff
|
| 3 |
+
size 13990
|
checkpoint-754/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8f67f5a885f6273acf3a74afc30c00afa53b55a5312a9a33f7b64a37dbd79ca
|
| 3 |
+
size 1064
|
checkpoint-754/special_tokens_map.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
{
|
| 4 |
+
"content": "<cand>",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false
|
| 9 |
+
},
|
| 10 |
+
{
|
| 11 |
+
"content": "</cand>",
|
| 12 |
+
"lstrip": false,
|
| 13 |
+
"normalized": false,
|
| 14 |
+
"rstrip": false,
|
| 15 |
+
"single_word": false
|
| 16 |
+
}
|
| 17 |
+
],
|
| 18 |
+
"cls_token": "[CLS]",
|
| 19 |
+
"mask_token": "[MASK]",
|
| 20 |
+
"pad_token": "[PAD]",
|
| 21 |
+
"sep_token": "[SEP]",
|
| 22 |
+
"unk_token": "[UNK]"
|
| 23 |
+
}
|
checkpoint-754/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-754/tokenizer_config.json
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"added_tokens_decoder": {
|
| 3 |
+
"0": {
|
| 4 |
+
"content": "[PAD]",
|
| 5 |
+
"lstrip": false,
|
| 6 |
+
"normalized": false,
|
| 7 |
+
"rstrip": false,
|
| 8 |
+
"single_word": false,
|
| 9 |
+
"special": true
|
| 10 |
+
},
|
| 11 |
+
"100": {
|
| 12 |
+
"content": "[UNK]",
|
| 13 |
+
"lstrip": false,
|
| 14 |
+
"normalized": false,
|
| 15 |
+
"rstrip": false,
|
| 16 |
+
"single_word": false,
|
| 17 |
+
"special": true
|
| 18 |
+
},
|
| 19 |
+
"101": {
|
| 20 |
+
"content": "[CLS]",
|
| 21 |
+
"lstrip": false,
|
| 22 |
+
"normalized": false,
|
| 23 |
+
"rstrip": false,
|
| 24 |
+
"single_word": false,
|
| 25 |
+
"special": true
|
| 26 |
+
},
|
| 27 |
+
"102": {
|
| 28 |
+
"content": "[SEP]",
|
| 29 |
+
"lstrip": false,
|
| 30 |
+
"normalized": false,
|
| 31 |
+
"rstrip": false,
|
| 32 |
+
"single_word": false,
|
| 33 |
+
"special": true
|
| 34 |
+
},
|
| 35 |
+
"103": {
|
| 36 |
+
"content": "[MASK]",
|
| 37 |
+
"lstrip": false,
|
| 38 |
+
"normalized": false,
|
| 39 |
+
"rstrip": false,
|
| 40 |
+
"single_word": false,
|
| 41 |
+
"special": true
|
| 42 |
+
},
|
| 43 |
+
"119547": {
|
| 44 |
+
"content": "<cand>",
|
| 45 |
+
"lstrip": false,
|
| 46 |
+
"normalized": false,
|
| 47 |
+
"rstrip": false,
|
| 48 |
+
"single_word": false,
|
| 49 |
+
"special": true
|
| 50 |
+
},
|
| 51 |
+
"119548": {
|
| 52 |
+
"content": "</cand>",
|
| 53 |
+
"lstrip": false,
|
| 54 |
+
"normalized": false,
|
| 55 |
+
"rstrip": false,
|
| 56 |
+
"single_word": false,
|
| 57 |
+
"special": true
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"additional_special_tokens": [
|
| 61 |
+
"<cand>",
|
| 62 |
+
"</cand>"
|
| 63 |
+
],
|
| 64 |
+
"clean_up_tokenization_spaces": true,
|
| 65 |
+
"cls_token": "[CLS]",
|
| 66 |
+
"do_basic_tokenize": true,
|
| 67 |
+
"do_lower_case": false,
|
| 68 |
+
"extra_special_tokens": {},
|
| 69 |
+
"mask_token": "[MASK]",
|
| 70 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 71 |
+
"never_split": null,
|
| 72 |
+
"pad_token": "[PAD]",
|
| 73 |
+
"sep_token": "[SEP]",
|
| 74 |
+
"strip_accents": null,
|
| 75 |
+
"tokenize_chinese_chars": true,
|
| 76 |
+
"tokenizer_class": "BertTokenizer",
|
| 77 |
+
"unk_token": "[UNK]"
|
| 78 |
+
}
|
checkpoint-754/trainer_state.json
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": 0.9865410497981157,
|
| 3 |
+
"best_model_checkpoint": "./DiMa_new_artifacts\\checkpoint-377",
|
| 4 |
+
"epoch": 2.0,
|
| 5 |
+
"eval_steps": 500,
|
| 6 |
+
"global_step": 754,
|
| 7 |
+
"is_hyper_param_search": false,
|
| 8 |
+
"is_local_process_zero": true,
|
| 9 |
+
"is_world_process_zero": true,
|
| 10 |
+
"log_history": [
|
| 11 |
+
{
|
| 12 |
+
"epoch": 0.13262599469496023,
|
| 13 |
+
"grad_norm": 8.819928169250488,
|
| 14 |
+
"learning_rate": 6.622516556291392e-06,
|
| 15 |
+
"loss": 0.6464,
|
| 16 |
+
"step": 50
|
| 17 |
+
},
|
| 18 |
+
{
|
| 19 |
+
"epoch": 0.26525198938992045,
|
| 20 |
+
"grad_norm": 6.598285675048828,
|
| 21 |
+
"learning_rate": 1.3245033112582784e-05,
|
| 22 |
+
"loss": 0.388,
|
| 23 |
+
"step": 100
|
| 24 |
+
},
|
| 25 |
+
{
|
| 26 |
+
"epoch": 0.3978779840848806,
|
| 27 |
+
"grad_norm": 0.30871227383613586,
|
| 28 |
+
"learning_rate": 1.9867549668874173e-05,
|
| 29 |
+
"loss": 0.1931,
|
| 30 |
+
"step": 150
|
| 31 |
+
},
|
| 32 |
+
{
|
| 33 |
+
"epoch": 0.5305039787798409,
|
| 34 |
+
"grad_norm": 6.666228294372559,
|
| 35 |
+
"learning_rate": 1.9277818717759768e-05,
|
| 36 |
+
"loss": 0.1591,
|
| 37 |
+
"step": 200
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"epoch": 0.6631299734748011,
|
| 41 |
+
"grad_norm": 0.44178861379623413,
|
| 42 |
+
"learning_rate": 1.8540899042004423e-05,
|
| 43 |
+
"loss": 0.1984,
|
| 44 |
+
"step": 250
|
| 45 |
+
},
|
| 46 |
+
{
|
| 47 |
+
"epoch": 0.7957559681697612,
|
| 48 |
+
"grad_norm": 0.37462666630744934,
|
| 49 |
+
"learning_rate": 1.780397936624908e-05,
|
| 50 |
+
"loss": 0.1124,
|
| 51 |
+
"step": 300
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"epoch": 0.9283819628647215,
|
| 55 |
+
"grad_norm": 0.0416572205722332,
|
| 56 |
+
"learning_rate": 1.7067059690493736e-05,
|
| 57 |
+
"loss": 0.0809,
|
| 58 |
+
"step": 350
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"epoch": 1.0,
|
| 62 |
+
"eval_accuracy": 0.9812206572769953,
|
| 63 |
+
"eval_f1": 0.9865410497981157,
|
| 64 |
+
"eval_loss": 0.0956883653998375,
|
| 65 |
+
"eval_precision": 0.9932249322493225,
|
| 66 |
+
"eval_recall": 0.9799465240641712,
|
| 67 |
+
"eval_runtime": 60.5192,
|
| 68 |
+
"eval_samples_per_second": 17.598,
|
| 69 |
+
"eval_steps_per_second": 1.107,
|
| 70 |
+
"step": 377
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 1.0610079575596818,
|
| 74 |
+
"grad_norm": 8.403841018676758,
|
| 75 |
+
"learning_rate": 1.6330140014738394e-05,
|
| 76 |
+
"loss": 0.0611,
|
| 77 |
+
"step": 400
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"epoch": 1.193633952254642,
|
| 81 |
+
"grad_norm": 0.022825542837381363,
|
| 82 |
+
"learning_rate": 1.5593220338983053e-05,
|
| 83 |
+
"loss": 0.0758,
|
| 84 |
+
"step": 450
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 1.3262599469496021,
|
| 88 |
+
"grad_norm": 97.80863952636719,
|
| 89 |
+
"learning_rate": 1.485630066322771e-05,
|
| 90 |
+
"loss": 0.0747,
|
| 91 |
+
"step": 500
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"epoch": 1.4588859416445623,
|
| 95 |
+
"grad_norm": 0.03205716982483864,
|
| 96 |
+
"learning_rate": 1.4119380987472366e-05,
|
| 97 |
+
"loss": 0.0719,
|
| 98 |
+
"step": 550
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"epoch": 1.5915119363395225,
|
| 102 |
+
"grad_norm": 13.893011093139648,
|
| 103 |
+
"learning_rate": 1.3382461311717023e-05,
|
| 104 |
+
"loss": 0.1053,
|
| 105 |
+
"step": 600
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"epoch": 1.7241379310344827,
|
| 109 |
+
"grad_norm": 0.03504275158047676,
|
| 110 |
+
"learning_rate": 1.2645541635961683e-05,
|
| 111 |
+
"loss": 0.0494,
|
| 112 |
+
"step": 650
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"epoch": 1.8567639257294428,
|
| 116 |
+
"grad_norm": 0.11265891045331955,
|
| 117 |
+
"learning_rate": 1.190862196020634e-05,
|
| 118 |
+
"loss": 0.0142,
|
| 119 |
+
"step": 700
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"epoch": 1.9893899204244032,
|
| 123 |
+
"grad_norm": 0.06097806990146637,
|
| 124 |
+
"learning_rate": 1.1171702284450996e-05,
|
| 125 |
+
"loss": 0.048,
|
| 126 |
+
"step": 750
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"epoch": 2.0,
|
| 130 |
+
"eval_accuracy": 0.9784037558685446,
|
| 131 |
+
"eval_f1": 0.984778292521509,
|
| 132 |
+
"eval_loss": 0.17541147768497467,
|
| 133 |
+
"eval_precision": 0.9750982961992136,
|
| 134 |
+
"eval_recall": 0.9946524064171123,
|
| 135 |
+
"eval_runtime": 65.725,
|
| 136 |
+
"eval_samples_per_second": 16.204,
|
| 137 |
+
"eval_steps_per_second": 1.019,
|
| 138 |
+
"step": 754
|
| 139 |
+
}
|
| 140 |
+
],
|
| 141 |
+
"logging_steps": 50,
|
| 142 |
+
"max_steps": 1508,
|
| 143 |
+
"num_input_tokens_seen": 0,
|
| 144 |
+
"num_train_epochs": 4,
|
| 145 |
+
"save_steps": 500,
|
| 146 |
+
"stateful_callbacks": {
|
| 147 |
+
"TrainerControl": {
|
| 148 |
+
"args": {
|
| 149 |
+
"should_epoch_stop": false,
|
| 150 |
+
"should_evaluate": false,
|
| 151 |
+
"should_log": false,
|
| 152 |
+
"should_save": true,
|
| 153 |
+
"should_training_stop": false
|
| 154 |
+
},
|
| 155 |
+
"attributes": {}
|
| 156 |
+
}
|
| 157 |
+
},
|
| 158 |
+
"total_flos": 427095020613120.0,
|
| 159 |
+
"train_batch_size": 16,
|
| 160 |
+
"trial_name": null,
|
| 161 |
+
"trial_params": null
|
| 162 |
+
}
|
checkpoint-754/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be09fe3a5beb0d44eb74f02908e775d4761990fab8ae3b1d7435c5c9a50e5e93
|
| 3 |
+
size 5304
|