Upload 13 files
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +2 -2
- rng_state_0.pth +1 -1
- rng_state_1.pth +1 -1
- rng_state_2.pth +1 -1
- rng_state_3.pth +1 -1
- rng_state_4.pth +1 -1
- rng_state_5.pth +1 -1
- rng_state_6.pth +1 -1
- rng_state_7.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -59
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 645975704
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:436f36528e0cbad453619703d49a29667fa0654be011985336d1c2b872f526f8
|
| 3 |
size 645975704
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d2149a123e2aa314e8d5a4ff1af83d0915c9962275cc6c598c987ae667380d5
|
| 3 |
+
size 1292087115
|
rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16b1f30ed08c0cb1f6faa13f9adf57b5673e7f65618010fb242bd0a3c4bb0882
|
| 3 |
size 16389
|
rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7758845bdfcf8fb1634f7caf35462f13639e0941bd48712fadea60c710ba3072
|
| 3 |
size 16389
|
rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4cf8621df71a49229ce61172ca361f37587ea13b70aa2190bf8d87a833f8ee10
|
| 3 |
size 16389
|
rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8384fc2bd470fae280e01edd29cc9a2e4a86f8bc2f8d3a58a46b96c73349e6db
|
| 3 |
size 16389
|
rng_state_4.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddff82bc26542fb05efdcf2df590ed29e9d88dbb01de689d83c4414f8885608b
|
| 3 |
size 16389
|
rng_state_5.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:209b7d10ef86fe6e7f8d1749c24e88e06e11f4d90000c239905b4be42662d348
|
| 3 |
size 16389
|
rng_state_6.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5a2629cdd3b6c3a387977b70399124e4c1f9fe420cd68bca306733f6790d5e1
|
| 3 |
size 16389
|
rng_state_7.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 16389
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef570fd2e0fd44060a913d5dfadb2f9f6ae4f10cfb213d7e257e5f528ff6a899
|
| 3 |
size 16389
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:13c878b5181f107dfcc1437e1d485b47bb6612c1058d82c3d92ef97b6425cab6
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -92,62 +92,6 @@
|
|
| 92 |
"eval_samples_per_second": 8.675,
|
| 93 |
"eval_steps_per_second": 1.105,
|
| 94 |
"step": 156
|
| 95 |
-
},
|
| 96 |
-
{
|
| 97 |
-
"epoch": 3.3662650602409636,
|
| 98 |
-
"grad_norm": 0.07343257963657379,
|
| 99 |
-
"learning_rate": 0.0002861234766624867,
|
| 100 |
-
"loss": 0.0345,
|
| 101 |
-
"mean_token_accuracy": 0.9456785363168573,
|
| 102 |
-
"num_tokens": 71027426.0,
|
| 103 |
-
"step": 175
|
| 104 |
-
},
|
| 105 |
-
{
|
| 106 |
-
"epoch": 3.8481927710843373,
|
| 107 |
-
"grad_norm": 0.04622579738497734,
|
| 108 |
-
"learning_rate": 0.0002293960964917063,
|
| 109 |
-
"loss": 0.0259,
|
| 110 |
-
"mean_token_accuracy": 0.9330729904770851,
|
| 111 |
-
"num_tokens": 81341930.0,
|
| 112 |
-
"step": 200
|
| 113 |
-
},
|
| 114 |
-
{
|
| 115 |
-
"epoch": 4.0,
|
| 116 |
-
"eval_loss": NaN,
|
| 117 |
-
"eval_mean_token_accuracy": 0.9852228583173549,
|
| 118 |
-
"eval_num_tokens": 84250831.0,
|
| 119 |
-
"eval_runtime": 41.6702,
|
| 120 |
-
"eval_samples_per_second": 8.855,
|
| 121 |
-
"eval_steps_per_second": 1.128,
|
| 122 |
-
"step": 208
|
| 123 |
-
},
|
| 124 |
-
{
|
| 125 |
-
"epoch": 4.327710843373494,
|
| 126 |
-
"grad_norm": 0.04485568404197693,
|
| 127 |
-
"learning_rate": 0.0001739556124639496,
|
| 128 |
-
"loss": 0.0208,
|
| 129 |
-
"mean_token_accuracy": 0.9436070158253962,
|
| 130 |
-
"num_tokens": 91589095.0,
|
| 131 |
-
"step": 225
|
| 132 |
-
},
|
| 133 |
-
{
|
| 134 |
-
"epoch": 4.809638554216868,
|
| 135 |
-
"grad_norm": 0.03364564850926399,
|
| 136 |
-
"learning_rate": 0.00012253518458496144,
|
| 137 |
-
"loss": 0.0185,
|
| 138 |
-
"mean_token_accuracy": 0.9403582978248596,
|
| 139 |
-
"num_tokens": 101826338.0,
|
| 140 |
-
"step": 250
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"epoch": 5.0,
|
| 144 |
-
"eval_loss": NaN,
|
| 145 |
-
"eval_mean_token_accuracy": 0.9858830913584283,
|
| 146 |
-
"eval_num_tokens": 105323752.0,
|
| 147 |
-
"eval_runtime": 41.5429,
|
| 148 |
-
"eval_samples_per_second": 8.882,
|
| 149 |
-
"eval_steps_per_second": 1.131,
|
| 150 |
-
"step": 260
|
| 151 |
}
|
| 152 |
],
|
| 153 |
"logging_steps": 25,
|
|
@@ -167,7 +111,7 @@
|
|
| 167 |
"attributes": {}
|
| 168 |
}
|
| 169 |
},
|
| 170 |
-
"total_flos":
|
| 171 |
"train_batch_size": 1,
|
| 172 |
"trial_name": null,
|
| 173 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 3.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 156,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 92 |
"eval_samples_per_second": 8.675,
|
| 93 |
"eval_steps_per_second": 1.105,
|
| 94 |
"step": 156
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
}
|
| 96 |
],
|
| 97 |
"logging_steps": 25,
|
|
|
|
| 111 |
"attributes": {}
|
| 112 |
}
|
| 113 |
},
|
| 114 |
+
"total_flos": 2.7746000370139136e+18,
|
| 115 |
"train_batch_size": 1,
|
| 116 |
"trial_name": null,
|
| 117 |
"trial_params": null
|