Training in progress, step 300
Browse files- adapter_model.safetensors +1 -1
- metrics.json +1 -0
- state.json +18 -3
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35668592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2161fd6cea778e72ee4a1f309af0fcaaf9a9e81b4f5c1b39f0d379d23ec50d10
|
| 3 |
size 35668592
|
metrics.json
CHANGED
|
@@ -3,3 +3,4 @@
|
|
| 3 |
{"Step":150,"eval_loss":0.789476037,"eval_runtime":56.6018,"eval_samples_per_second":1.767,"eval_steps_per_second":0.23,"epoch":0.2901353965}
|
| 4 |
{"Step":200,"eval_loss":0.7783958316,"eval_runtime":56.5799,"eval_samples_per_second":1.767,"eval_steps_per_second":0.23,"epoch":0.3868471954}
|
| 5 |
{"Step":250,"eval_loss":0.7721498013,"eval_runtime":57.9466,"eval_samples_per_second":1.726,"eval_steps_per_second":0.224,"epoch":0.4835589942}
|
|
|
|
|
|
| 3 |
{"Step":150,"eval_loss":0.789476037,"eval_runtime":56.6018,"eval_samples_per_second":1.767,"eval_steps_per_second":0.23,"epoch":0.2901353965}
|
| 4 |
{"Step":200,"eval_loss":0.7783958316,"eval_runtime":56.5799,"eval_samples_per_second":1.767,"eval_steps_per_second":0.23,"epoch":0.3868471954}
|
| 5 |
{"Step":250,"eval_loss":0.7721498013,"eval_runtime":57.9466,"eval_samples_per_second":1.726,"eval_steps_per_second":0.224,"epoch":0.4835589942}
|
| 6 |
+
{"Step":300,"eval_loss":0.7687731385,"eval_runtime":60.6867,"eval_samples_per_second":1.648,"eval_steps_per_second":0.214,"epoch":0.580270793}
|
state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -67,6 +67,21 @@
|
|
| 67 |
"eval_samples_per_second": 1.767,
|
| 68 |
"eval_steps_per_second": 0.23,
|
| 69 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
}
|
| 71 |
],
|
| 72 |
"logging_steps": 50,
|
|
@@ -86,7 +101,7 @@
|
|
| 86 |
"attributes": {}
|
| 87 |
}
|
| 88 |
},
|
| 89 |
-
"total_flos":
|
| 90 |
"train_batch_size": 8,
|
| 91 |
"trial_name": null,
|
| 92 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.5802707930367504,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 300,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 67 |
"eval_samples_per_second": 1.767,
|
| 68 |
"eval_steps_per_second": 0.23,
|
| 69 |
"step": 200
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"epoch": 0.4835589941972921,
|
| 73 |
+
"grad_norm": 0.03580320626497269,
|
| 74 |
+
"learning_rate": 0.00046630824372759856,
|
| 75 |
+
"loss": 0.7671,
|
| 76 |
+
"step": 250
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"epoch": 0.4835589941972921,
|
| 80 |
+
"eval_loss": 0.7721498012542725,
|
| 81 |
+
"eval_runtime": 57.9466,
|
| 82 |
+
"eval_samples_per_second": 1.726,
|
| 83 |
+
"eval_steps_per_second": 0.224,
|
| 84 |
+
"step": 250
|
| 85 |
}
|
| 86 |
],
|
| 87 |
"logging_steps": 50,
|
|
|
|
| 101 |
"attributes": {}
|
| 102 |
}
|
| 103 |
},
|
| 104 |
+
"total_flos": 3.250714490560512e+16,
|
| 105 |
"train_batch_size": 8,
|
| 106 |
"trial_name": null,
|
| 107 |
"trial_params": null
|