Training in progress, step 50
Browse files- config.json +1 -1
- model.safetensors +1 -1
- trainer_log.jsonl +5 -44
- training_args.bin +2 -2
config.json
CHANGED
|
@@ -30,7 +30,7 @@
|
|
| 30 |
"sliding_window": 32768,
|
| 31 |
"tie_word_embeddings": true,
|
| 32 |
"torch_dtype": "bfloat16",
|
| 33 |
-
"transformers_version": "4.
|
| 34 |
"use_cache": false,
|
| 35 |
"use_sliding_window": false,
|
| 36 |
"video_token_id": 151656,
|
|
|
|
| 30 |
"sliding_window": 32768,
|
| 31 |
"tie_word_embeddings": true,
|
| 32 |
"torch_dtype": "bfloat16",
|
| 33 |
+
"transformers_version": "4.51.3",
|
| 34 |
"use_cache": false,
|
| 35 |
"use_sliding_window": false,
|
| 36 |
"video_token_id": 151656,
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4418050848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4315810f5922e766acf9c2032d61dc0caeebfe72e182c09d6d67df95f106f748
|
| 3 |
size 4418050848
|
trainer_log.jsonl
CHANGED
|
@@ -1,44 +1,5 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps":
|
| 2 |
-
{"current_steps": 20, "total_steps":
|
| 3 |
-
{"current_steps": 30, "total_steps":
|
| 4 |
-
{"current_steps": 40, "total_steps":
|
| 5 |
-
{"current_steps": 50, "total_steps":
|
| 6 |
-
{"current_steps": 60, "total_steps": 5571, "loss": 0.1434, "lr": 1.0752688172043011e-06, "epoch": 0.03231017770597738, "percentage": 1.08, "elapsed_time": "0:01:36", "remaining_time": "2:27:56"}
|
| 7 |
-
{"current_steps": 70, "total_steps": 5571, "loss": 0.0447, "lr": 1.2544802867383513e-06, "epoch": 0.03769520732364028, "percentage": 1.26, "elapsed_time": "0:01:48", "remaining_time": "2:22:29"}
|
| 8 |
-
{"current_steps": 80, "total_steps": 5571, "loss": 0.0384, "lr": 1.4336917562724014e-06, "epoch": 0.043080236941303175, "percentage": 1.44, "elapsed_time": "0:02:00", "remaining_time": "2:18:22"}
|
| 9 |
-
{"current_steps": 90, "total_steps": 5571, "loss": 0.0351, "lr": 1.6129032258064516e-06, "epoch": 0.048465266558966075, "percentage": 1.62, "elapsed_time": "0:02:13", "remaining_time": "2:15:08"}
|
| 10 |
-
{"current_steps": 100, "total_steps": 5571, "loss": 0.0316, "lr": 1.792114695340502e-06, "epoch": 0.053850296176628974, "percentage": 1.8, "elapsed_time": "0:02:25", "remaining_time": "2:12:31"}
|
| 11 |
-
{"current_steps": 110, "total_steps": 5571, "loss": 0.028, "lr": 1.9713261648745523e-06, "epoch": 0.05923532579429187, "percentage": 1.97, "elapsed_time": "0:02:46", "remaining_time": "2:17:33"}
|
| 12 |
-
{"current_steps": 120, "total_steps": 5571, "loss": 0.0332, "lr": 2.1505376344086023e-06, "epoch": 0.06462035541195477, "percentage": 2.15, "elapsed_time": "0:02:58", "remaining_time": "2:15:10"}
|
| 13 |
-
{"current_steps": 130, "total_steps": 5571, "loss": 0.0274, "lr": 2.3297491039426526e-06, "epoch": 0.07000538502961766, "percentage": 2.33, "elapsed_time": "0:03:10", "remaining_time": "2:13:03"}
|
| 14 |
-
{"current_steps": 140, "total_steps": 5571, "loss": 0.0285, "lr": 2.5089605734767026e-06, "epoch": 0.07539041464728057, "percentage": 2.51, "elapsed_time": "0:03:22", "remaining_time": "2:11:14"}
|
| 15 |
-
{"current_steps": 150, "total_steps": 5571, "loss": 0.0228, "lr": 2.688172043010753e-06, "epoch": 0.08077544426494346, "percentage": 2.69, "elapsed_time": "0:03:35", "remaining_time": "2:09:37"}
|
| 16 |
-
{"current_steps": 160, "total_steps": 5571, "loss": 0.0279, "lr": 2.867383512544803e-06, "epoch": 0.08616047388260635, "percentage": 2.87, "elapsed_time": "0:03:56", "remaining_time": "2:13:17"}
|
| 17 |
-
{"current_steps": 170, "total_steps": 5571, "loss": 0.0301, "lr": 3.0465949820788532e-06, "epoch": 0.09154550350026926, "percentage": 3.05, "elapsed_time": "0:04:08", "remaining_time": "2:11:40"}
|
| 18 |
-
{"current_steps": 180, "total_steps": 5571, "loss": 0.0209, "lr": 3.225806451612903e-06, "epoch": 0.09693053311793215, "percentage": 3.23, "elapsed_time": "0:04:20", "remaining_time": "2:10:14"}
|
| 19 |
-
{"current_steps": 190, "total_steps": 5571, "loss": 0.0197, "lr": 3.4050179211469536e-06, "epoch": 0.10231556273559504, "percentage": 3.41, "elapsed_time": "0:04:33", "remaining_time": "2:08:55"}
|
| 20 |
-
{"current_steps": 200, "total_steps": 5571, "loss": 0.0234, "lr": 3.584229390681004e-06, "epoch": 0.10770059235325795, "percentage": 3.59, "elapsed_time": "0:04:45", "remaining_time": "2:07:43"}
|
| 21 |
-
{"current_steps": 210, "total_steps": 5571, "loss": 0.0184, "lr": 3.763440860215054e-06, "epoch": 0.11308562197092084, "percentage": 3.77, "elapsed_time": "0:05:07", "remaining_time": "2:10:37"}
|
| 22 |
-
{"current_steps": 220, "total_steps": 5571, "loss": 0.0159, "lr": 3.942652329749105e-06, "epoch": 0.11847065158858373, "percentage": 3.95, "elapsed_time": "0:05:19", "remaining_time": "2:09:24"}
|
| 23 |
-
{"current_steps": 230, "total_steps": 5571, "loss": 0.0192, "lr": 4.121863799283155e-06, "epoch": 0.12385568120624664, "percentage": 4.13, "elapsed_time": "0:05:31", "remaining_time": "2:08:15"}
|
| 24 |
-
{"current_steps": 240, "total_steps": 5571, "loss": 0.0196, "lr": 4.3010752688172045e-06, "epoch": 0.12924071082390953, "percentage": 4.31, "elapsed_time": "0:05:43", "remaining_time": "2:07:12"}
|
| 25 |
-
{"current_steps": 250, "total_steps": 5571, "loss": 0.0199, "lr": 4.480286738351255e-06, "epoch": 0.13462574044157244, "percentage": 4.49, "elapsed_time": "0:05:55", "remaining_time": "2:06:13"}
|
| 26 |
-
{"current_steps": 260, "total_steps": 5571, "loss": 0.0177, "lr": 4.659498207885305e-06, "epoch": 0.14001077005923532, "percentage": 4.67, "elapsed_time": "0:06:17", "remaining_time": "2:08:24"}
|
| 27 |
-
{"current_steps": 270, "total_steps": 5571, "loss": 0.0178, "lr": 4.838709677419355e-06, "epoch": 0.14539579967689822, "percentage": 4.85, "elapsed_time": "0:06:29", "remaining_time": "2:07:24"}
|
| 28 |
-
{"current_steps": 280, "total_steps": 5571, "loss": 0.0157, "lr": 5.017921146953405e-06, "epoch": 0.15078082929456113, "percentage": 5.03, "elapsed_time": "0:06:41", "remaining_time": "2:06:27"}
|
| 29 |
-
{"current_steps": 290, "total_steps": 5571, "loss": 0.0145, "lr": 5.197132616487456e-06, "epoch": 0.156165858912224, "percentage": 5.21, "elapsed_time": "0:06:53", "remaining_time": "2:05:34"}
|
| 30 |
-
{"current_steps": 300, "total_steps": 5571, "loss": 0.0134, "lr": 5.376344086021506e-06, "epoch": 0.16155088852988692, "percentage": 5.39, "elapsed_time": "0:07:05", "remaining_time": "2:04:44"}
|
| 31 |
-
{"current_steps": 300, "total_steps": 5571, "eval_loss": 0.016489772126078606, "epoch": 0.16155088852988692, "percentage": 5.39, "elapsed_time": "0:07:19", "remaining_time": "2:08:42"}
|
| 32 |
-
{"current_steps": 310, "total_steps": 5571, "loss": 0.0137, "lr": 5.555555555555557e-06, "epoch": 0.16693591814754982, "percentage": 5.56, "elapsed_time": "0:07:40", "remaining_time": "2:10:18"}
|
| 33 |
-
{"current_steps": 320, "total_steps": 5571, "loss": 0.0155, "lr": 5.734767025089606e-06, "epoch": 0.1723209477652127, "percentage": 5.74, "elapsed_time": "0:07:52", "remaining_time": "2:09:18"}
|
| 34 |
-
{"current_steps": 330, "total_steps": 5571, "loss": 0.0175, "lr": 5.9139784946236566e-06, "epoch": 0.1777059773828756, "percentage": 5.92, "elapsed_time": "0:08:04", "remaining_time": "2:08:20"}
|
| 35 |
-
{"current_steps": 340, "total_steps": 5571, "loss": 0.0143, "lr": 6.0931899641577065e-06, "epoch": 0.1830910070005385, "percentage": 6.1, "elapsed_time": "0:08:16", "remaining_time": "2:07:25"}
|
| 36 |
-
{"current_steps": 350, "total_steps": 5571, "loss": 0.0145, "lr": 6.272401433691757e-06, "epoch": 0.1884760366182014, "percentage": 6.28, "elapsed_time": "0:08:28", "remaining_time": "2:06:32"}
|
| 37 |
-
{"current_steps": 360, "total_steps": 5571, "loss": 0.0124, "lr": 6.451612903225806e-06, "epoch": 0.1938610662358643, "percentage": 6.46, "elapsed_time": "0:08:50", "remaining_time": "2:07:55"}
|
| 38 |
-
{"current_steps": 370, "total_steps": 5571, "loss": 0.0128, "lr": 6.630824372759857e-06, "epoch": 0.1992460958535272, "percentage": 6.64, "elapsed_time": "0:09:02", "remaining_time": "2:07:03"}
|
| 39 |
-
{"current_steps": 380, "total_steps": 5571, "loss": 0.013, "lr": 6.810035842293907e-06, "epoch": 0.20463112547119008, "percentage": 6.82, "elapsed_time": "0:09:14", "remaining_time": "2:06:13"}
|
| 40 |
-
{"current_steps": 390, "total_steps": 5571, "loss": 0.0125, "lr": 6.989247311827958e-06, "epoch": 0.210016155088853, "percentage": 7.0, "elapsed_time": "0:09:26", "remaining_time": "2:05:25"}
|
| 41 |
-
{"current_steps": 400, "total_steps": 5571, "loss": 0.0136, "lr": 7.168458781362008e-06, "epoch": 0.2154011847065159, "percentage": 7.18, "elapsed_time": "0:09:38", "remaining_time": "2:04:38"}
|
| 42 |
-
{"current_steps": 410, "total_steps": 5571, "loss": 0.0116, "lr": 7.347670250896059e-06, "epoch": 0.22078621432417878, "percentage": 7.36, "elapsed_time": "0:10:00", "remaining_time": "2:05:55"}
|
| 43 |
-
{"current_steps": 420, "total_steps": 5571, "loss": 0.0107, "lr": 7.526881720430108e-06, "epoch": 0.22617124394184168, "percentage": 7.54, "elapsed_time": "0:10:12", "remaining_time": "2:05:08"}
|
| 44 |
-
{"current_steps": 430, "total_steps": 5571, "loss": 0.0117, "lr": 7.706093189964159e-06, "epoch": 0.2315562735595046, "percentage": 7.72, "elapsed_time": "0:10:24", "remaining_time": "2:04:23"}
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 1392, "loss": 2.5556, "lr": 6.428571428571428e-07, "epoch": 0.021528525296017224, "percentage": 0.72, "elapsed_time": "0:08:01", "remaining_time": "18:28:38"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 1392, "loss": 2.0585, "lr": 1.3571428571428572e-06, "epoch": 0.04305705059203445, "percentage": 1.44, "elapsed_time": "0:15:53", "remaining_time": "18:10:09"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 1392, "loss": 0.6843, "lr": 2.0714285714285717e-06, "epoch": 0.06458557588805167, "percentage": 2.16, "elapsed_time": "0:23:42", "remaining_time": "17:56:09"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 1392, "loss": 0.0841, "lr": 2.785714285714286e-06, "epoch": 0.0861141011840689, "percentage": 2.87, "elapsed_time": "0:31:34", "remaining_time": "17:47:01"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 1392, "loss": 0.06, "lr": 3.5e-06, "epoch": 0.10764262648008611, "percentage": 3.59, "elapsed_time": "0:39:26", "remaining_time": "17:38:41"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:925897f96af8dc61c1bc7febe3b29e5b7ba7263a5b9630883a51a65b99e58637
|
| 3 |
+
size 7889
|