Training in progress, step 1000
Browse files- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- trainer_log.jsonl +26 -0
- training_args.bin +1 -1
adapter_config.json
CHANGED
|
@@ -25,12 +25,12 @@
|
|
| 25 |
"revision": null,
|
| 26 |
"target_modules": [
|
| 27 |
"up_proj",
|
| 28 |
-
"down_proj",
|
| 29 |
-
"gate_proj",
|
| 30 |
"k_proj",
|
| 31 |
-
"
|
|
|
|
| 32 |
"q_proj",
|
| 33 |
-
"
|
|
|
|
| 34 |
],
|
| 35 |
"task_type": "CAUSAL_LM",
|
| 36 |
"trainable_token_indices": null,
|
|
|
|
| 25 |
"revision": null,
|
| 26 |
"target_modules": [
|
| 27 |
"up_proj",
|
|
|
|
|
|
|
| 28 |
"k_proj",
|
| 29 |
+
"down_proj",
|
| 30 |
+
"o_proj",
|
| 31 |
"q_proj",
|
| 32 |
+
"v_proj",
|
| 33 |
+
"gate_proj"
|
| 34 |
],
|
| 35 |
"task_type": "CAUSAL_LM",
|
| 36 |
"trainable_token_indices": null,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 295488936
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85928d986f2cd15fb887ee4e40bc18ea50570b9cc2b7c36dae87cce33f1f15c9
|
| 3 |
size 295488936
|
trainer_log.jsonl
CHANGED
|
@@ -88,3 +88,29 @@
|
|
| 88 |
{"current_steps": 790, "total_steps": 1455, "loss": 0.1214, "lr": 5.137982157606937e-05, "epoch": 1.6285419886656363, "percentage": 54.3, "elapsed_time": "3:33:30", "remaining_time": "2:59:43"}
|
| 89 |
{"current_steps": 800, "total_steps": 1455, "loss": 0.1371, "lr": 5.017999919028566e-05, "epoch": 1.6491499227202473, "percentage": 54.98, "elapsed_time": "3:39:20", "remaining_time": "2:59:34"}
|
| 90 |
{"current_steps": 800, "total_steps": 1455, "eval_loss": 0.19273868203163147, "epoch": 1.6491499227202473, "percentage": 54.98, "elapsed_time": "3:41:47", "remaining_time": "3:01:35"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
{"current_steps": 790, "total_steps": 1455, "loss": 0.1214, "lr": 5.137982157606937e-05, "epoch": 1.6285419886656363, "percentage": 54.3, "elapsed_time": "3:33:30", "remaining_time": "2:59:43"}
|
| 89 |
{"current_steps": 800, "total_steps": 1455, "loss": 0.1371, "lr": 5.017999919028566e-05, "epoch": 1.6491499227202473, "percentage": 54.98, "elapsed_time": "3:39:20", "remaining_time": "2:59:34"}
|
| 90 |
{"current_steps": 800, "total_steps": 1455, "eval_loss": 0.19273868203163147, "epoch": 1.6491499227202473, "percentage": 54.98, "elapsed_time": "3:41:47", "remaining_time": "3:01:35"}
|
| 91 |
+
{"current_steps": 810, "total_steps": 1455, "loss": 0.1359, "lr": 4.898007313042975e-05, "epoch": 1.6697578567748583, "percentage": 55.67, "elapsed_time": "3:47:22", "remaining_time": "3:01:03"}
|
| 92 |
+
{"current_steps": 820, "total_steps": 1455, "loss": 0.1306, "lr": 4.7780734517504985e-05, "epoch": 1.6903657908294694, "percentage": 56.36, "elapsed_time": "3:52:28", "remaining_time": "3:00:01"}
|
| 93 |
+
{"current_steps": 830, "total_steps": 1455, "loss": 0.1414, "lr": 4.658267413416326e-05, "epoch": 1.7109737248840804, "percentage": 57.04, "elapsed_time": "3:58:37", "remaining_time": "2:59:41"}
|
| 94 |
+
{"current_steps": 840, "total_steps": 1455, "loss": 0.1407, "lr": 4.5386582026834906e-05, "epoch": 1.7315816589386914, "percentage": 57.73, "elapsed_time": "4:03:51", "remaining_time": "2:58:32"}
|
| 95 |
+
{"current_steps": 810, "total_steps": 1455, "loss": 0.1359, "lr": 4.898007313042975e-05, "epoch": 1.6697578567748583, "percentage": 55.67, "elapsed_time": "0:05:33", "remaining_time": "0:04:25"}
|
| 96 |
+
{"current_steps": 820, "total_steps": 1455, "loss": 0.1306, "lr": 4.7780734517504985e-05, "epoch": 1.6903657908294694, "percentage": 56.36, "elapsed_time": "0:10:43", "remaining_time": "0:08:18"}
|
| 97 |
+
{"current_steps": 830, "total_steps": 1455, "loss": 0.1414, "lr": 4.658267413416326e-05, "epoch": 1.7109737248840804, "percentage": 57.04, "elapsed_time": "0:16:58", "remaining_time": "0:12:46"}
|
| 98 |
+
{"current_steps": 840, "total_steps": 1455, "loss": 0.1407, "lr": 4.5386582026834906e-05, "epoch": 1.7315816589386914, "percentage": 57.73, "elapsed_time": "0:22:16", "remaining_time": "0:16:18"}
|
| 99 |
+
{"current_steps": 850, "total_steps": 1455, "loss": 0.1291, "lr": 4.4193147108283016e-05, "epoch": 1.7521895929933025, "percentage": 58.42, "elapsed_time": "0:27:44", "remaining_time": "0:19:44"}
|
| 100 |
+
{"current_steps": 860, "total_steps": 1455, "loss": 0.1434, "lr": 4.300305676081057e-05, "epoch": 1.7727975270479135, "percentage": 59.11, "elapsed_time": "0:32:53", "remaining_time": "0:22:45"}
|
| 101 |
+
{"current_steps": 870, "total_steps": 1455, "loss": 0.1295, "lr": 4.1816996440349104e-05, "epoch": 1.7934054611025245, "percentage": 59.79, "elapsed_time": "0:38:04", "remaining_time": "0:25:36"}
|
| 102 |
+
{"current_steps": 880, "total_steps": 1455, "loss": 0.1325, "lr": 4.063564928165682e-05, "epoch": 1.8140133951571356, "percentage": 60.48, "elapsed_time": "0:43:14", "remaining_time": "0:28:15"}
|
| 103 |
+
{"current_steps": 890, "total_steps": 1455, "loss": 0.164, "lr": 3.9459695704853836e-05, "epoch": 1.8346213292117466, "percentage": 61.17, "elapsed_time": "0:49:06", "remaining_time": "0:31:10"}
|
| 104 |
+
{"current_steps": 900, "total_steps": 1455, "loss": 0.147, "lr": 3.828981302352065e-05, "epoch": 1.8552292632663576, "percentage": 61.86, "elapsed_time": "0:54:55", "remaining_time": "0:33:52"}
|
| 105 |
+
{"current_steps": 900, "total_steps": 1455, "eval_loss": 0.1883440464735031, "epoch": 1.8552292632663576, "percentage": 61.86, "elapsed_time": "0:57:24", "remaining_time": "0:35:24"}
|
| 106 |
+
{"current_steps": 910, "total_steps": 1455, "loss": 0.1228, "lr": 3.712667505458622e-05, "epoch": 1.8758371973209687, "percentage": 62.54, "elapsed_time": "1:02:23", "remaining_time": "0:37:21"}
|
| 107 |
+
{"current_steps": 920, "total_steps": 1455, "loss": 0.135, "lr": 3.5970951730229785e-05, "epoch": 1.8964451313755797, "percentage": 63.23, "elapsed_time": "1:07:28", "remaining_time": "0:39:14"}
|
| 108 |
+
{"current_steps": 930, "total_steps": 1455, "loss": 0.1197, "lr": 3.482330871202029e-05, "epoch": 1.9170530654301907, "percentage": 63.92, "elapsed_time": "1:12:55", "remaining_time": "0:41:09"}
|
| 109 |
+
{"current_steps": 940, "total_steps": 1455, "loss": 0.1395, "lr": 3.3684407007515484e-05, "epoch": 1.9376609994848017, "percentage": 64.6, "elapsed_time": "1:18:46", "remaining_time": "0:43:09"}
|
| 110 |
+
{"current_steps": 950, "total_steps": 1455, "loss": 0.1347, "lr": 3.255490258954167e-05, "epoch": 1.9582689335394128, "percentage": 65.29, "elapsed_time": "1:24:10", "remaining_time": "0:44:44"}
|
| 111 |
+
{"current_steps": 960, "total_steps": 1455, "loss": 0.12, "lr": 3.14354460183732e-05, "epoch": 1.9788768675940238, "percentage": 65.98, "elapsed_time": "1:29:31", "remaining_time": "0:46:09"}
|
| 112 |
+
{"current_steps": 970, "total_steps": 1455, "loss": 0.1289, "lr": 3.032668206702959e-05, "epoch": 1.9994848016486348, "percentage": 66.67, "elapsed_time": "1:35:03", "remaining_time": "0:47:31"}
|
| 113 |
+
{"current_steps": 980, "total_steps": 1455, "loss": 0.0908, "lr": 2.9229249349905684e-05, "epoch": 2.020607934054611, "percentage": 67.35, "elapsed_time": "1:40:36", "remaining_time": "0:48:45"}
|
| 114 |
+
{"current_steps": 990, "total_steps": 1455, "loss": 0.0718, "lr": 2.8143779954949267e-05, "epoch": 2.041215868109222, "percentage": 68.04, "elapsed_time": "1:46:12", "remaining_time": "0:49:53"}
|
| 115 |
+
{"current_steps": 1000, "total_steps": 1455, "loss": 0.0668, "lr": 2.70708990795975e-05, "epoch": 2.061823802163833, "percentage": 68.73, "elapsed_time": "1:51:32", "remaining_time": "0:50:45"}
|
| 116 |
+
{"current_steps": 1000, "total_steps": 1455, "eval_loss": 0.1961071491241455, "epoch": 2.061823802163833, "percentage": 68.73, "elapsed_time": "1:54:02", "remaining_time": "0:51:53"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5816
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:929586f453c837dd2c45da477cee81ff19a4b897cb7beb925285c835f78d9f3f
|
| 3 |
size 5816
|