Training in progress, step 135, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 45118424
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f892944e1d6553c2988900130a3362ea080ce87049af159434348e43983a67f7
|
| 3 |
size 45118424
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 23159290
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f294a2b1a687df224adc5ac3e37eb23eab0bfe8458ff9f9b4712852a5997cf77
|
| 3 |
size 23159290
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbd0b0d00d8a6ce2af47f7a318c5367a4519b639c67ff4d1f9441e0f3c04db1f
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3002a39ac6502366eefa64e828fe85e0b7d2b42f2ce52a223a7439ad2a05fd9b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 8,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1053,6 +1053,41 @@
|
|
| 1053 |
"learning_rate": 9.903113209758096e-06,
|
| 1054 |
"loss": 1.0679,
|
| 1055 |
"step": 130
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1056 |
}
|
| 1057 |
],
|
| 1058 |
"logging_steps": 1,
|
|
@@ -1072,7 +1107,7 @@
|
|
| 1072 |
"attributes": {}
|
| 1073 |
}
|
| 1074 |
},
|
| 1075 |
-
"total_flos": 1.
|
| 1076 |
"train_batch_size": 10,
|
| 1077 |
"trial_name": null,
|
| 1078 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.8421052631578947,
|
| 5 |
"eval_steps": 8,
|
| 6 |
+
"global_step": 135,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1053 |
"learning_rate": 9.903113209758096e-06,
|
| 1054 |
"loss": 1.0679,
|
| 1055 |
"step": 130
|
| 1056 |
+
},
|
| 1057 |
+
{
|
| 1058 |
+
"epoch": 2.7578947368421054,
|
| 1059 |
+
"grad_norm": 0.5930253267288208,
|
| 1060 |
+
"learning_rate": 8.952245334118414e-06,
|
| 1061 |
+
"loss": 0.8819,
|
| 1062 |
+
"step": 131
|
| 1063 |
+
},
|
| 1064 |
+
{
|
| 1065 |
+
"epoch": 2.7789473684210524,
|
| 1066 |
+
"grad_norm": 0.6247056126594543,
|
| 1067 |
+
"learning_rate": 8.047222744854943e-06,
|
| 1068 |
+
"loss": 0.991,
|
| 1069 |
+
"step": 132
|
| 1070 |
+
},
|
| 1071 |
+
{
|
| 1072 |
+
"epoch": 2.8,
|
| 1073 |
+
"grad_norm": 0.5282688736915588,
|
| 1074 |
+
"learning_rate": 7.1885011480961164e-06,
|
| 1075 |
+
"loss": 0.9508,
|
| 1076 |
+
"step": 133
|
| 1077 |
+
},
|
| 1078 |
+
{
|
| 1079 |
+
"epoch": 2.8210526315789473,
|
| 1080 |
+
"grad_norm": 0.4279923141002655,
|
| 1081 |
+
"learning_rate": 6.37651293602628e-06,
|
| 1082 |
+
"loss": 0.9463,
|
| 1083 |
+
"step": 134
|
| 1084 |
+
},
|
| 1085 |
+
{
|
| 1086 |
+
"epoch": 2.8421052631578947,
|
| 1087 |
+
"grad_norm": 0.4681239426136017,
|
| 1088 |
+
"learning_rate": 5.611666969163243e-06,
|
| 1089 |
+
"loss": 1.1093,
|
| 1090 |
+
"step": 135
|
| 1091 |
}
|
| 1092 |
],
|
| 1093 |
"logging_steps": 1,
|
|
|
|
| 1107 |
"attributes": {}
|
| 1108 |
}
|
| 1109 |
},
|
| 1110 |
+
"total_flos": 1.366908129509376e+16,
|
| 1111 |
"train_batch_size": 10,
|
| 1112 |
"trial_name": null,
|
| 1113 |
"trial_params": null
|