Initial upload of fine‑tuned Gemma + custom tokenizer
Browse files- model-00001-of-00005.safetensors +1 -1
- model-00002-of-00005.safetensors +1 -1
- model-00003-of-00005.safetensors +1 -1
- model-00004-of-00005.safetensors +1 -1
- model-00005-of-00005.safetensors +1 -1
- scheduler.pt +1 -1
- tokenizer_config.json +34 -41
- trainer_state.json +50 -50
- training_args.bin +1 -1
model-00001-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4979902192
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:482be2455e637e399de5e4c8afaec5d2675ec11f89137e526362b82b23cedb4b
|
| 3 |
size 4979902192
|
model-00002-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4931296592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43f80968dd4e284d30ed74416856e65c7344cb8ed822d3f833e711249692160e
|
| 3 |
size 4931296592
|
model-00003-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4931296656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0eb6398002e8b6604cef8459094542dcccbfd027bacd105d4a9c82f95ea16192
|
| 3 |
size 4931296656
|
model-00004-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4931296656
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c9e1e56ed4e0108727c4c549ee28ec2af3da9f48369c8fad1527a2978d19e20
|
| 3 |
size 4931296656
|
model-00005-of-00005.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4601000928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48f435f220bcf30ac3f7e864f113cdf55f1c2d86b35674047ab703a957ec75a5
|
| 3 |
size 4601000928
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:81e125f2538e139197e1ffe42d2e924ff12ed600a2b3c103b6ba772812143b1d
|
| 3 |
size 1465
|
tokenizer_config.json
CHANGED
|
@@ -1107,7 +1107,7 @@
|
|
| 1107 |
"special": false
|
| 1108 |
},
|
| 1109 |
"138": {
|
| 1110 |
-
"content": "
|
| 1111 |
"lstrip": false,
|
| 1112 |
"normalized": false,
|
| 1113 |
"rstrip": false,
|
|
@@ -1115,7 +1115,7 @@
|
|
| 1115 |
"special": false
|
| 1116 |
},
|
| 1117 |
"139": {
|
| 1118 |
-
"content": "
|
| 1119 |
"lstrip": false,
|
| 1120 |
"normalized": false,
|
| 1121 |
"rstrip": false,
|
|
@@ -1123,7 +1123,7 @@
|
|
| 1123 |
"special": false
|
| 1124 |
},
|
| 1125 |
"140": {
|
| 1126 |
-
"content": "
|
| 1127 |
"lstrip": false,
|
| 1128 |
"normalized": false,
|
| 1129 |
"rstrip": false,
|
|
@@ -1131,7 +1131,7 @@
|
|
| 1131 |
"special": false
|
| 1132 |
},
|
| 1133 |
"141": {
|
| 1134 |
-
"content": "
|
| 1135 |
"lstrip": false,
|
| 1136 |
"normalized": false,
|
| 1137 |
"rstrip": false,
|
|
@@ -1139,7 +1139,7 @@
|
|
| 1139 |
"special": false
|
| 1140 |
},
|
| 1141 |
"142": {
|
| 1142 |
-
"content": "
|
| 1143 |
"lstrip": false,
|
| 1144 |
"normalized": false,
|
| 1145 |
"rstrip": false,
|
|
@@ -1147,7 +1147,7 @@
|
|
| 1147 |
"special": false
|
| 1148 |
},
|
| 1149 |
"143": {
|
| 1150 |
-
"content": "
|
| 1151 |
"lstrip": false,
|
| 1152 |
"normalized": false,
|
| 1153 |
"rstrip": false,
|
|
@@ -1155,7 +1155,7 @@
|
|
| 1155 |
"special": false
|
| 1156 |
},
|
| 1157 |
"144": {
|
| 1158 |
-
"content": "
|
| 1159 |
"lstrip": false,
|
| 1160 |
"normalized": false,
|
| 1161 |
"rstrip": false,
|
|
@@ -1163,7 +1163,7 @@
|
|
| 1163 |
"special": false
|
| 1164 |
},
|
| 1165 |
"145": {
|
| 1166 |
-
"content": "
|
| 1167 |
"lstrip": false,
|
| 1168 |
"normalized": false,
|
| 1169 |
"rstrip": false,
|
|
@@ -1171,7 +1171,7 @@
|
|
| 1171 |
"special": false
|
| 1172 |
},
|
| 1173 |
"146": {
|
| 1174 |
-
"content": "
|
| 1175 |
"lstrip": false,
|
| 1176 |
"normalized": false,
|
| 1177 |
"rstrip": false,
|
|
@@ -1179,7 +1179,7 @@
|
|
| 1179 |
"special": false
|
| 1180 |
},
|
| 1181 |
"147": {
|
| 1182 |
-
"content": "
|
| 1183 |
"lstrip": false,
|
| 1184 |
"normalized": false,
|
| 1185 |
"rstrip": false,
|
|
@@ -1187,7 +1187,7 @@
|
|
| 1187 |
"special": false
|
| 1188 |
},
|
| 1189 |
"148": {
|
| 1190 |
-
"content": "
|
| 1191 |
"lstrip": false,
|
| 1192 |
"normalized": false,
|
| 1193 |
"rstrip": false,
|
|
@@ -1195,7 +1195,7 @@
|
|
| 1195 |
"special": false
|
| 1196 |
},
|
| 1197 |
"149": {
|
| 1198 |
-
"content": "
|
| 1199 |
"lstrip": false,
|
| 1200 |
"normalized": false,
|
| 1201 |
"rstrip": false,
|
|
@@ -1203,7 +1203,7 @@
|
|
| 1203 |
"special": false
|
| 1204 |
},
|
| 1205 |
"150": {
|
| 1206 |
-
"content": "
|
| 1207 |
"lstrip": false,
|
| 1208 |
"normalized": false,
|
| 1209 |
"rstrip": false,
|
|
@@ -1211,7 +1211,7 @@
|
|
| 1211 |
"special": false
|
| 1212 |
},
|
| 1213 |
"151": {
|
| 1214 |
-
"content": "
|
| 1215 |
"lstrip": false,
|
| 1216 |
"normalized": false,
|
| 1217 |
"rstrip": false,
|
|
@@ -1219,7 +1219,7 @@
|
|
| 1219 |
"special": false
|
| 1220 |
},
|
| 1221 |
"152": {
|
| 1222 |
-
"content": "
|
| 1223 |
"lstrip": false,
|
| 1224 |
"normalized": false,
|
| 1225 |
"rstrip": false,
|
|
@@ -1227,7 +1227,7 @@
|
|
| 1227 |
"special": false
|
| 1228 |
},
|
| 1229 |
"153": {
|
| 1230 |
-
"content": "
|
| 1231 |
"lstrip": false,
|
| 1232 |
"normalized": false,
|
| 1233 |
"rstrip": false,
|
|
@@ -1235,7 +1235,7 @@
|
|
| 1235 |
"special": false
|
| 1236 |
},
|
| 1237 |
"154": {
|
| 1238 |
-
"content": "
|
| 1239 |
"lstrip": false,
|
| 1240 |
"normalized": false,
|
| 1241 |
"rstrip": false,
|
|
@@ -1243,7 +1243,7 @@
|
|
| 1243 |
"special": false
|
| 1244 |
},
|
| 1245 |
"155": {
|
| 1246 |
-
"content": "
|
| 1247 |
"lstrip": false,
|
| 1248 |
"normalized": false,
|
| 1249 |
"rstrip": false,
|
|
@@ -1251,7 +1251,7 @@
|
|
| 1251 |
"special": false
|
| 1252 |
},
|
| 1253 |
"156": {
|
| 1254 |
-
"content": "
|
| 1255 |
"lstrip": false,
|
| 1256 |
"normalized": false,
|
| 1257 |
"rstrip": false,
|
|
@@ -1259,7 +1259,7 @@
|
|
| 1259 |
"special": false
|
| 1260 |
},
|
| 1261 |
"157": {
|
| 1262 |
-
"content": "
|
| 1263 |
"lstrip": false,
|
| 1264 |
"normalized": false,
|
| 1265 |
"rstrip": false,
|
|
@@ -1267,7 +1267,7 @@
|
|
| 1267 |
"special": false
|
| 1268 |
},
|
| 1269 |
"158": {
|
| 1270 |
-
"content": "
|
| 1271 |
"lstrip": false,
|
| 1272 |
"normalized": false,
|
| 1273 |
"rstrip": false,
|
|
@@ -1275,7 +1275,7 @@
|
|
| 1275 |
"special": false
|
| 1276 |
},
|
| 1277 |
"159": {
|
| 1278 |
-
"content": "
|
| 1279 |
"lstrip": false,
|
| 1280 |
"normalized": false,
|
| 1281 |
"rstrip": false,
|
|
@@ -1283,7 +1283,7 @@
|
|
| 1283 |
"special": false
|
| 1284 |
},
|
| 1285 |
"160": {
|
| 1286 |
-
"content": "
|
| 1287 |
"lstrip": false,
|
| 1288 |
"normalized": false,
|
| 1289 |
"rstrip": false,
|
|
@@ -1291,7 +1291,7 @@
|
|
| 1291 |
"special": false
|
| 1292 |
},
|
| 1293 |
"161": {
|
| 1294 |
-
"content": "
|
| 1295 |
"lstrip": false,
|
| 1296 |
"normalized": false,
|
| 1297 |
"rstrip": false,
|
|
@@ -1299,7 +1299,7 @@
|
|
| 1299 |
"special": false
|
| 1300 |
},
|
| 1301 |
"162": {
|
| 1302 |
-
"content": "
|
| 1303 |
"lstrip": false,
|
| 1304 |
"normalized": false,
|
| 1305 |
"rstrip": false,
|
|
@@ -1307,7 +1307,7 @@
|
|
| 1307 |
"special": false
|
| 1308 |
},
|
| 1309 |
"163": {
|
| 1310 |
-
"content": "
|
| 1311 |
"lstrip": false,
|
| 1312 |
"normalized": false,
|
| 1313 |
"rstrip": false,
|
|
@@ -1315,7 +1315,7 @@
|
|
| 1315 |
"special": false
|
| 1316 |
},
|
| 1317 |
"164": {
|
| 1318 |
-
"content": "
|
| 1319 |
"lstrip": false,
|
| 1320 |
"normalized": false,
|
| 1321 |
"rstrip": false,
|
|
@@ -1323,7 +1323,7 @@
|
|
| 1323 |
"special": false
|
| 1324 |
},
|
| 1325 |
"165": {
|
| 1326 |
-
"content": "
|
| 1327 |
"lstrip": false,
|
| 1328 |
"normalized": false,
|
| 1329 |
"rstrip": false,
|
|
@@ -1331,7 +1331,7 @@
|
|
| 1331 |
"special": false
|
| 1332 |
},
|
| 1333 |
"166": {
|
| 1334 |
-
"content": "
|
| 1335 |
"lstrip": false,
|
| 1336 |
"normalized": false,
|
| 1337 |
"rstrip": false,
|
|
@@ -1339,7 +1339,7 @@
|
|
| 1339 |
"special": false
|
| 1340 |
},
|
| 1341 |
"167": {
|
| 1342 |
-
"content": "
|
| 1343 |
"lstrip": false,
|
| 1344 |
"normalized": false,
|
| 1345 |
"rstrip": false,
|
|
@@ -51326,7 +51326,6 @@
|
|
| 51326 |
"boi_token": "<start_of_image>",
|
| 51327 |
"bos_token": "<bos>",
|
| 51328 |
"clean_up_tokenization_spaces": false,
|
| 51329 |
-
"end_string": "<end_of_turn>",
|
| 51330 |
"eoi_token": "<end_of_image>",
|
| 51331 |
"eos_token": "<eos>",
|
| 51332 |
"extra_special_tokens": {
|
|
@@ -51337,16 +51336,10 @@
|
|
| 51337 |
"image_token": "<image_soft_token>",
|
| 51338 |
"model_max_length": 1000000000000000019884624838656,
|
| 51339 |
"pad_token": "<pad>",
|
|
|
|
| 51340 |
"sp_model_kwargs": null,
|
| 51341 |
"spaces_between_special_tokens": false,
|
| 51342 |
-
"
|
| 51343 |
-
"tokenizer_class": "GemmaSpecialTokenizer",
|
| 51344 |
"unk_token": "<unk>",
|
| 51345 |
-
"use_default_system_prompt": false
|
| 51346 |
-
|
| 51347 |
-
"AutoTokenizer": [
|
| 51348 |
-
"gemma_special_tokenizer.GemmaSpecialTokenizer",
|
| 51349 |
-
"gemma_special_tokenizer.GemmaSpecialTokenizer"
|
| 51350 |
-
]
|
| 51351 |
-
}
|
| 51352 |
-
}
|
|
|
|
| 1107 |
"special": false
|
| 1108 |
},
|
| 1109 |
"138": {
|
| 1110 |
+
"content": "▁▁",
|
| 1111 |
"lstrip": false,
|
| 1112 |
"normalized": false,
|
| 1113 |
"rstrip": false,
|
|
|
|
| 1115 |
"special": false
|
| 1116 |
},
|
| 1117 |
"139": {
|
| 1118 |
+
"content": "▁▁▁",
|
| 1119 |
"lstrip": false,
|
| 1120 |
"normalized": false,
|
| 1121 |
"rstrip": false,
|
|
|
|
| 1123 |
"special": false
|
| 1124 |
},
|
| 1125 |
"140": {
|
| 1126 |
+
"content": "▁▁▁▁",
|
| 1127 |
"lstrip": false,
|
| 1128 |
"normalized": false,
|
| 1129 |
"rstrip": false,
|
|
|
|
| 1131 |
"special": false
|
| 1132 |
},
|
| 1133 |
"141": {
|
| 1134 |
+
"content": "▁▁▁▁▁",
|
| 1135 |
"lstrip": false,
|
| 1136 |
"normalized": false,
|
| 1137 |
"rstrip": false,
|
|
|
|
| 1139 |
"special": false
|
| 1140 |
},
|
| 1141 |
"142": {
|
| 1142 |
+
"content": "▁▁▁▁▁▁",
|
| 1143 |
"lstrip": false,
|
| 1144 |
"normalized": false,
|
| 1145 |
"rstrip": false,
|
|
|
|
| 1147 |
"special": false
|
| 1148 |
},
|
| 1149 |
"143": {
|
| 1150 |
+
"content": "▁▁▁▁▁▁▁",
|
| 1151 |
"lstrip": false,
|
| 1152 |
"normalized": false,
|
| 1153 |
"rstrip": false,
|
|
|
|
| 1155 |
"special": false
|
| 1156 |
},
|
| 1157 |
"144": {
|
| 1158 |
+
"content": "▁▁▁▁▁▁▁▁",
|
| 1159 |
"lstrip": false,
|
| 1160 |
"normalized": false,
|
| 1161 |
"rstrip": false,
|
|
|
|
| 1163 |
"special": false
|
| 1164 |
},
|
| 1165 |
"145": {
|
| 1166 |
+
"content": "▁▁▁▁▁▁▁▁▁",
|
| 1167 |
"lstrip": false,
|
| 1168 |
"normalized": false,
|
| 1169 |
"rstrip": false,
|
|
|
|
| 1171 |
"special": false
|
| 1172 |
},
|
| 1173 |
"146": {
|
| 1174 |
+
"content": "▁▁▁▁▁▁▁▁▁▁",
|
| 1175 |
"lstrip": false,
|
| 1176 |
"normalized": false,
|
| 1177 |
"rstrip": false,
|
|
|
|
| 1179 |
"special": false
|
| 1180 |
},
|
| 1181 |
"147": {
|
| 1182 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁",
|
| 1183 |
"lstrip": false,
|
| 1184 |
"normalized": false,
|
| 1185 |
"rstrip": false,
|
|
|
|
| 1187 |
"special": false
|
| 1188 |
},
|
| 1189 |
"148": {
|
| 1190 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1191 |
"lstrip": false,
|
| 1192 |
"normalized": false,
|
| 1193 |
"rstrip": false,
|
|
|
|
| 1195 |
"special": false
|
| 1196 |
},
|
| 1197 |
"149": {
|
| 1198 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1199 |
"lstrip": false,
|
| 1200 |
"normalized": false,
|
| 1201 |
"rstrip": false,
|
|
|
|
| 1203 |
"special": false
|
| 1204 |
},
|
| 1205 |
"150": {
|
| 1206 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1207 |
"lstrip": false,
|
| 1208 |
"normalized": false,
|
| 1209 |
"rstrip": false,
|
|
|
|
| 1211 |
"special": false
|
| 1212 |
},
|
| 1213 |
"151": {
|
| 1214 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1215 |
"lstrip": false,
|
| 1216 |
"normalized": false,
|
| 1217 |
"rstrip": false,
|
|
|
|
| 1219 |
"special": false
|
| 1220 |
},
|
| 1221 |
"152": {
|
| 1222 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1223 |
"lstrip": false,
|
| 1224 |
"normalized": false,
|
| 1225 |
"rstrip": false,
|
|
|
|
| 1227 |
"special": false
|
| 1228 |
},
|
| 1229 |
"153": {
|
| 1230 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1231 |
"lstrip": false,
|
| 1232 |
"normalized": false,
|
| 1233 |
"rstrip": false,
|
|
|
|
| 1235 |
"special": false
|
| 1236 |
},
|
| 1237 |
"154": {
|
| 1238 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1239 |
"lstrip": false,
|
| 1240 |
"normalized": false,
|
| 1241 |
"rstrip": false,
|
|
|
|
| 1243 |
"special": false
|
| 1244 |
},
|
| 1245 |
"155": {
|
| 1246 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1247 |
"lstrip": false,
|
| 1248 |
"normalized": false,
|
| 1249 |
"rstrip": false,
|
|
|
|
| 1251 |
"special": false
|
| 1252 |
},
|
| 1253 |
"156": {
|
| 1254 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1255 |
"lstrip": false,
|
| 1256 |
"normalized": false,
|
| 1257 |
"rstrip": false,
|
|
|
|
| 1259 |
"special": false
|
| 1260 |
},
|
| 1261 |
"157": {
|
| 1262 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1263 |
"lstrip": false,
|
| 1264 |
"normalized": false,
|
| 1265 |
"rstrip": false,
|
|
|
|
| 1267 |
"special": false
|
| 1268 |
},
|
| 1269 |
"158": {
|
| 1270 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1271 |
"lstrip": false,
|
| 1272 |
"normalized": false,
|
| 1273 |
"rstrip": false,
|
|
|
|
| 1275 |
"special": false
|
| 1276 |
},
|
| 1277 |
"159": {
|
| 1278 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1279 |
"lstrip": false,
|
| 1280 |
"normalized": false,
|
| 1281 |
"rstrip": false,
|
|
|
|
| 1283 |
"special": false
|
| 1284 |
},
|
| 1285 |
"160": {
|
| 1286 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1287 |
"lstrip": false,
|
| 1288 |
"normalized": false,
|
| 1289 |
"rstrip": false,
|
|
|
|
| 1291 |
"special": false
|
| 1292 |
},
|
| 1293 |
"161": {
|
| 1294 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1295 |
"lstrip": false,
|
| 1296 |
"normalized": false,
|
| 1297 |
"rstrip": false,
|
|
|
|
| 1299 |
"special": false
|
| 1300 |
},
|
| 1301 |
"162": {
|
| 1302 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1303 |
"lstrip": false,
|
| 1304 |
"normalized": false,
|
| 1305 |
"rstrip": false,
|
|
|
|
| 1307 |
"special": false
|
| 1308 |
},
|
| 1309 |
"163": {
|
| 1310 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1311 |
"lstrip": false,
|
| 1312 |
"normalized": false,
|
| 1313 |
"rstrip": false,
|
|
|
|
| 1315 |
"special": false
|
| 1316 |
},
|
| 1317 |
"164": {
|
| 1318 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1319 |
"lstrip": false,
|
| 1320 |
"normalized": false,
|
| 1321 |
"rstrip": false,
|
|
|
|
| 1323 |
"special": false
|
| 1324 |
},
|
| 1325 |
"165": {
|
| 1326 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1327 |
"lstrip": false,
|
| 1328 |
"normalized": false,
|
| 1329 |
"rstrip": false,
|
|
|
|
| 1331 |
"special": false
|
| 1332 |
},
|
| 1333 |
"166": {
|
| 1334 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1335 |
"lstrip": false,
|
| 1336 |
"normalized": false,
|
| 1337 |
"rstrip": false,
|
|
|
|
| 1339 |
"special": false
|
| 1340 |
},
|
| 1341 |
"167": {
|
| 1342 |
+
"content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
| 1343 |
"lstrip": false,
|
| 1344 |
"normalized": false,
|
| 1345 |
"rstrip": false,
|
|
|
|
| 51326 |
"boi_token": "<start_of_image>",
|
| 51327 |
"bos_token": "<bos>",
|
| 51328 |
"clean_up_tokenization_spaces": false,
|
|
|
|
| 51329 |
"eoi_token": "<end_of_image>",
|
| 51330 |
"eos_token": "<eos>",
|
| 51331 |
"extra_special_tokens": {
|
|
|
|
| 51336 |
"image_token": "<image_soft_token>",
|
| 51337 |
"model_max_length": 1000000000000000019884624838656,
|
| 51338 |
"pad_token": "<pad>",
|
| 51339 |
+
"processor_class": "Gemma3Processor",
|
| 51340 |
"sp_model_kwargs": null,
|
| 51341 |
"spaces_between_special_tokens": false,
|
| 51342 |
+
"tokenizer_class": "GemmaTokenizerFast",
|
|
|
|
| 51343 |
"unk_token": "<unk>",
|
| 51344 |
+
"use_default_system_prompt": false
|
| 51345 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
trainer_state.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 32,
|
| 7 |
"global_step": 8,
|
| 8 |
"is_hyper_param_search": false,
|
|
@@ -10,80 +10,80 @@
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
-
"epoch": 0.
|
| 14 |
-
"grad_norm":
|
| 15 |
"learning_rate": 3e-06,
|
| 16 |
-
"loss":
|
| 17 |
-
"mean_token_accuracy": 0.
|
| 18 |
-
"num_tokens":
|
| 19 |
"step": 1
|
| 20 |
},
|
| 21 |
{
|
| 22 |
-
"epoch": 0.
|
| 23 |
-
"grad_norm":
|
| 24 |
-
"learning_rate": 2.
|
| 25 |
-
"loss":
|
| 26 |
-
"mean_token_accuracy": 0.
|
| 27 |
-
"num_tokens":
|
| 28 |
"step": 2
|
| 29 |
},
|
| 30 |
{
|
| 31 |
-
"epoch": 0.
|
| 32 |
-
"grad_norm":
|
| 33 |
-
"learning_rate": 2.
|
| 34 |
-
"loss":
|
| 35 |
-
"mean_token_accuracy": 0.
|
| 36 |
-
"num_tokens":
|
| 37 |
"step": 3
|
| 38 |
},
|
| 39 |
{
|
| 40 |
-
"epoch": 0.
|
| 41 |
-
"grad_norm":
|
| 42 |
-
"learning_rate": 2.
|
| 43 |
-
"loss":
|
| 44 |
-
"mean_token_accuracy": 0.
|
| 45 |
-
"num_tokens":
|
| 46 |
"step": 4
|
| 47 |
},
|
| 48 |
{
|
| 49 |
-
"epoch": 0.
|
| 50 |
-
"grad_norm":
|
| 51 |
-
"learning_rate": 2.
|
| 52 |
-
"loss":
|
| 53 |
-
"mean_token_accuracy": 0.
|
| 54 |
-
"num_tokens":
|
| 55 |
"step": 5
|
| 56 |
},
|
| 57 |
{
|
| 58 |
-
"epoch": 0.
|
| 59 |
-
"grad_norm":
|
| 60 |
-
"learning_rate":
|
| 61 |
-
"loss":
|
| 62 |
-
"mean_token_accuracy": 0.
|
| 63 |
-
"num_tokens":
|
| 64 |
"step": 6
|
| 65 |
},
|
| 66 |
{
|
| 67 |
-
"epoch": 0.
|
| 68 |
-
"grad_norm":
|
| 69 |
-
"learning_rate":
|
| 70 |
-
"loss":
|
| 71 |
-
"mean_token_accuracy": 0.
|
| 72 |
-
"num_tokens":
|
| 73 |
"step": 7
|
| 74 |
},
|
| 75 |
{
|
| 76 |
-
"epoch": 0.
|
| 77 |
-
"grad_norm":
|
| 78 |
-
"learning_rate":
|
| 79 |
-
"loss":
|
| 80 |
-
"mean_token_accuracy": 0.
|
| 81 |
-
"num_tokens":
|
| 82 |
"step": 8
|
| 83 |
}
|
| 84 |
],
|
| 85 |
"logging_steps": 1,
|
| 86 |
-
"max_steps":
|
| 87 |
"num_input_tokens_seen": 0,
|
| 88 |
"num_train_epochs": 1,
|
| 89 |
"save_steps": 8,
|
|
@@ -99,7 +99,7 @@
|
|
| 99 |
"attributes": {}
|
| 100 |
}
|
| 101 |
},
|
| 102 |
-
"total_flos":
|
| 103 |
"train_batch_size": 1,
|
| 104 |
"trial_name": null,
|
| 105 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.5877457310948486,
|
| 6 |
"eval_steps": 32,
|
| 7 |
"global_step": 8,
|
| 8 |
"is_hyper_param_search": false,
|
|
|
|
| 10 |
"is_world_process_zero": true,
|
| 11 |
"log_history": [
|
| 12 |
{
|
| 13 |
+
"epoch": 0.07346821638685608,
|
| 14 |
+
"grad_norm": 52224.03643740955,
|
| 15 |
"learning_rate": 3e-06,
|
| 16 |
+
"loss": 1232.5903,
|
| 17 |
+
"mean_token_accuracy": 0.5698598268500064,
|
| 18 |
+
"num_tokens": 2439700.0,
|
| 19 |
"step": 1
|
| 20 |
},
|
| 21 |
{
|
| 22 |
+
"epoch": 0.14693643277371216,
|
| 23 |
+
"grad_norm": 24564.28249818258,
|
| 24 |
+
"learning_rate": 2.7692307692307693e-06,
|
| 25 |
+
"loss": 1012.342,
|
| 26 |
+
"mean_token_accuracy": 0.5935460063046776,
|
| 27 |
+
"num_tokens": 4818537.0,
|
| 28 |
"step": 2
|
| 29 |
},
|
| 30 |
{
|
| 31 |
+
"epoch": 0.22040464916056823,
|
| 32 |
+
"grad_norm": 10030.685256076777,
|
| 33 |
+
"learning_rate": 2.5384615384615385e-06,
|
| 34 |
+
"loss": 771.7506,
|
| 35 |
+
"mean_token_accuracy": 0.6206552012590691,
|
| 36 |
+
"num_tokens": 7222999.0,
|
| 37 |
"step": 3
|
| 38 |
},
|
| 39 |
{
|
| 40 |
+
"epoch": 0.2938728655474243,
|
| 41 |
+
"grad_norm": 9797.578497343955,
|
| 42 |
+
"learning_rate": 2.307692307692308e-06,
|
| 43 |
+
"loss": 673.8949,
|
| 44 |
+
"mean_token_accuracy": 0.6229124862584285,
|
| 45 |
+
"num_tokens": 9603627.0,
|
| 46 |
"step": 4
|
| 47 |
},
|
| 48 |
{
|
| 49 |
+
"epoch": 0.3673410819342804,
|
| 50 |
+
"grad_norm": 6906.536059477202,
|
| 51 |
+
"learning_rate": 2.076923076923077e-06,
|
| 52 |
+
"loss": 647.7118,
|
| 53 |
+
"mean_token_accuracy": 0.6252844646223821,
|
| 54 |
+
"num_tokens": 12031078.0,
|
| 55 |
"step": 5
|
| 56 |
},
|
| 57 |
{
|
| 58 |
+
"epoch": 0.44080929832113647,
|
| 59 |
+
"grad_norm": 4121.501080449312,
|
| 60 |
+
"learning_rate": 1.8461538461538462e-06,
|
| 61 |
+
"loss": 616.8495,
|
| 62 |
+
"mean_token_accuracy": 0.6341593922115862,
|
| 63 |
+
"num_tokens": 14403431.0,
|
| 64 |
"step": 6
|
| 65 |
},
|
| 66 |
{
|
| 67 |
+
"epoch": 0.5142775147079925,
|
| 68 |
+
"grad_norm": 2026.657249791317,
|
| 69 |
+
"learning_rate": 1.6153846153846154e-06,
|
| 70 |
+
"loss": 601.5251,
|
| 71 |
+
"mean_token_accuracy": 0.6352307246997952,
|
| 72 |
+
"num_tokens": 16811979.0,
|
| 73 |
"step": 7
|
| 74 |
},
|
| 75 |
{
|
| 76 |
+
"epoch": 0.5877457310948486,
|
| 77 |
+
"grad_norm": 1892.3110141436696,
|
| 78 |
+
"learning_rate": 1.3846153846153846e-06,
|
| 79 |
+
"loss": 577.3281,
|
| 80 |
+
"mean_token_accuracy": 0.6342741788248532,
|
| 81 |
+
"num_tokens": 19217660.0,
|
| 82 |
"step": 8
|
| 83 |
}
|
| 84 |
],
|
| 85 |
"logging_steps": 1,
|
| 86 |
+
"max_steps": 13,
|
| 87 |
"num_input_tokens_seen": 0,
|
| 88 |
"num_train_epochs": 1,
|
| 89 |
"save_steps": 8,
|
|
|
|
| 99 |
"attributes": {}
|
| 100 |
}
|
| 101 |
},
|
| 102 |
+
"total_flos": 128626689310720.0,
|
| 103 |
"train_batch_size": 1,
|
| 104 |
"trial_name": null,
|
| 105 |
"trial_params": null
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7377
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e21928f1882517a4b05718e3914cb90eb7110650d66245962ef389df1995890
|
| 3 |
size 7377
|