SM commited on
Commit ·
c3eb26d
1
Parent(s): 90db77c
With better accuracy
Browse files- README.md +7 -7
- all_results.json +11 -11
- checkpoint-1000/model.safetensors +1 -1
- checkpoint-1000/optimizer.pt +2 -2
- checkpoint-1000/rng_state.pth +2 -2
- checkpoint-1000/scheduler.pt +1 -1
- checkpoint-1000/trainer_state.json +11 -11
- checkpoint-1000/training_args.bin +1 -1
- checkpoint-1500/model.safetensors +1 -1
- checkpoint-1500/optimizer.pt +2 -2
- checkpoint-1500/rng_state.pth +2 -2
- checkpoint-1500/scheduler.pt +1 -1
- checkpoint-1500/trainer_state.json +14 -14
- checkpoint-1500/training_args.bin +1 -1
- checkpoint-2000/model.safetensors +1 -1
- checkpoint-2000/optimizer.pt +2 -2
- checkpoint-2000/rng_state.pth +2 -2
- checkpoint-2000/scheduler.pt +1 -1
- checkpoint-2000/trainer_state.json +17 -17
- checkpoint-2000/training_args.bin +1 -1
- checkpoint-500/model.safetensors +1 -1
- checkpoint-500/optimizer.pt +2 -2
- checkpoint-500/rng_state.pth +2 -2
- checkpoint-500/scheduler.pt +1 -1
- checkpoint-500/trainer_state.json +8 -8
- checkpoint-500/training_args.bin +1 -1
- eval_results.json +7 -7
- model.safetensors +1 -1
- train_results.json +5 -5
- trainer_state.json +25 -31
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -17,8 +17,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 17 |
|
| 18 |
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
|
| 19 |
It achieves the following results on the evaluation set:
|
| 20 |
-
- Loss:
|
| 21 |
-
- Accuracy: 0.
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
@@ -38,12 +38,12 @@ More information needed
|
|
| 38 |
|
| 39 |
The following hyperparameters were used during training:
|
| 40 |
- learning_rate: 5e-05
|
| 41 |
-
- train_batch_size:
|
| 42 |
-
- eval_batch_size:
|
| 43 |
- seed: 42
|
| 44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
- lr_scheduler_type: linear
|
| 46 |
-
- num_epochs:
|
| 47 |
|
| 48 |
### Training results
|
| 49 |
|
|
@@ -52,6 +52,6 @@ The following hyperparameters were used during training:
|
|
| 52 |
### Framework versions
|
| 53 |
|
| 54 |
- Transformers 4.37.0.dev0
|
| 55 |
-
- Pytorch 2.1.2
|
| 56 |
-
- Datasets 2.
|
| 57 |
- Tokenizers 0.15.0
|
|
|
|
| 17 |
|
| 18 |
This model is a fine-tuned version of [gpt2](https://huggingface.co/gpt2) on an unknown dataset.
|
| 19 |
It achieves the following results on the evaluation set:
|
| 20 |
+
- Loss: 3.5093
|
| 21 |
+
- Accuracy: 0.4494
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
|
|
| 38 |
|
| 39 |
The following hyperparameters were used during training:
|
| 40 |
- learning_rate: 5e-05
|
| 41 |
+
- train_batch_size: 4
|
| 42 |
+
- eval_batch_size: 4
|
| 43 |
- seed: 42
|
| 44 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
| 45 |
- lr_scheduler_type: linear
|
| 46 |
+
- num_epochs: 60.0
|
| 47 |
|
| 48 |
### Training results
|
| 49 |
|
|
|
|
| 52 |
### Framework versions
|
| 53 |
|
| 54 |
- Transformers 4.37.0.dev0
|
| 55 |
+
- Pytorch 2.1.2+cu121
|
| 56 |
+
- Datasets 2.16.0
|
| 57 |
- Tokenizers 0.15.0
|
all_results.json
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_loss":
|
| 5 |
-
"eval_runtime":
|
| 6 |
"eval_samples": 9,
|
| 7 |
-
"eval_samples_per_second":
|
| 8 |
-
"eval_steps_per_second":
|
| 9 |
-
"perplexity":
|
| 10 |
-
"train_loss":
|
| 11 |
-
"train_runtime":
|
| 12 |
"train_samples": 138,
|
| 13 |
-
"train_samples_per_second":
|
| 14 |
-
"train_steps_per_second":
|
| 15 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 60.0,
|
| 3 |
+
"eval_accuracy": 0.4494406429890301,
|
| 4 |
+
"eval_loss": 3.5092644691467285,
|
| 5 |
+
"eval_runtime": 0.6069,
|
| 6 |
"eval_samples": 9,
|
| 7 |
+
"eval_samples_per_second": 14.829,
|
| 8 |
+
"eval_steps_per_second": 4.943,
|
| 9 |
+
"perplexity": 33.423674596634115,
|
| 10 |
+
"train_loss": 0.9941332481020972,
|
| 11 |
+
"train_runtime": 1577.313,
|
| 12 |
"train_samples": 138,
|
| 13 |
+
"train_samples_per_second": 5.249,
|
| 14 |
+
"train_steps_per_second": 1.331
|
| 15 |
}
|
checkpoint-1000/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:12f996d0719a66b10d90e36ef43b31f9eaae1c31749508f6854af308bb085736
|
| 3 |
size 497774208
|
checkpoint-1000/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9adc9e7ab852a33f9fe84c647c87587bada024b6538c02088b10c9c8717806e2
|
| 3 |
+
size 995642298
|
checkpoint-1000/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0a6632d83b7bb45efc05bef8c034ed3b2854a29a949ed96a7ca5bd50bcb7d902
|
| 3 |
+
size 14244
|
checkpoint-1000/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eee3c9505132ed967b6539dd4a6fb45e2bc29520ec4ec39ac5c68d846d45dec5
|
| 3 |
size 1064
|
checkpoint-1000/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,25 +9,25 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"learning_rate":
|
| 14 |
-
"loss":
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
-
"epoch":
|
| 19 |
-
"learning_rate":
|
| 20 |
-
"loss":
|
| 21 |
"step": 1000
|
| 22 |
}
|
| 23 |
],
|
| 24 |
"logging_steps": 500,
|
| 25 |
-
"max_steps":
|
| 26 |
"num_input_tokens_seen": 0,
|
| 27 |
-
"num_train_epochs":
|
| 28 |
"save_steps": 500,
|
| 29 |
-
"total_flos":
|
| 30 |
-
"train_batch_size":
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
| 33 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 28.571428571428573,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1000,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 14.29,
|
| 13 |
+
"learning_rate": 3.809523809523809e-05,
|
| 14 |
+
"loss": 1.8052,
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
+
"epoch": 28.57,
|
| 19 |
+
"learning_rate": 2.6190476190476192e-05,
|
| 20 |
+
"loss": 1.0819,
|
| 21 |
"step": 1000
|
| 22 |
}
|
| 23 |
],
|
| 24 |
"logging_steps": 500,
|
| 25 |
+
"max_steps": 2100,
|
| 26 |
"num_input_tokens_seen": 0,
|
| 27 |
+
"num_train_epochs": 60,
|
| 28 |
"save_steps": 500,
|
| 29 |
+
"total_flos": 2061071548416000.0,
|
| 30 |
+
"train_batch_size": 4,
|
| 31 |
"trial_name": null,
|
| 32 |
"trial_params": null
|
| 33 |
}
|
checkpoint-1000/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
|
| 3 |
size 4664
|
checkpoint-1500/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6db548188c57c7cc26a03d2f3836dac8ae7b3f171ffc94f210669f0684391440
|
| 3 |
size 497774208
|
checkpoint-1500/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0857cce43798c01f9e56b21f550de159b359b827bbbe6664dc0920bb722a5373
|
| 3 |
+
size 995642298
|
checkpoint-1500/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00755da9ed0bcc7ab4ff6d1881daf48c315a760cde6596f5e5de6ebdb5140f8
|
| 3 |
+
size 14244
|
checkpoint-1500/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f136c5ddfbd5bfe9857c433dcc2fc706e931bc068d2eabf598b25c109d462906
|
| 3 |
size 1064
|
checkpoint-1500/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,31 +9,31 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"learning_rate":
|
| 14 |
-
"loss":
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
-
"epoch":
|
| 19 |
-
"learning_rate":
|
| 20 |
-
"loss":
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
-
"epoch":
|
| 25 |
-
"learning_rate":
|
| 26 |
-
"loss":
|
| 27 |
"step": 1500
|
| 28 |
}
|
| 29 |
],
|
| 30 |
"logging_steps": 500,
|
| 31 |
-
"max_steps":
|
| 32 |
"num_input_tokens_seen": 0,
|
| 33 |
-
"num_train_epochs":
|
| 34 |
"save_steps": 500,
|
| 35 |
-
"total_flos":
|
| 36 |
-
"train_batch_size":
|
| 37 |
"trial_name": null,
|
| 38 |
"trial_params": null
|
| 39 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 42.857142857142854,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 14.29,
|
| 13 |
+
"learning_rate": 3.809523809523809e-05,
|
| 14 |
+
"loss": 1.8052,
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
+
"epoch": 28.57,
|
| 19 |
+
"learning_rate": 2.6190476190476192e-05,
|
| 20 |
+
"loss": 1.0819,
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
+
"epoch": 42.86,
|
| 25 |
+
"learning_rate": 1.4285714285714285e-05,
|
| 26 |
+
"loss": 0.6781,
|
| 27 |
"step": 1500
|
| 28 |
}
|
| 29 |
],
|
| 30 |
"logging_steps": 500,
|
| 31 |
+
"max_steps": 2100,
|
| 32 |
"num_input_tokens_seen": 0,
|
| 33 |
+
"num_train_epochs": 60,
|
| 34 |
"save_steps": 500,
|
| 35 |
+
"total_flos": 3091607322624000.0,
|
| 36 |
+
"train_batch_size": 4,
|
| 37 |
"trial_name": null,
|
| 38 |
"trial_params": null
|
| 39 |
}
|
checkpoint-1500/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
|
| 3 |
size 4664
|
checkpoint-2000/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:49f835d6e202641570e280cfd02c7abc724ffcf647ef3c4919ddf9d0244fefb0
|
| 3 |
size 497774208
|
checkpoint-2000/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5953577a0a2b353baf20d78a8d3cafd7804195fc51c78ba605dbd587f53247e
|
| 3 |
+
size 995642298
|
checkpoint-2000/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54f33e1ce978f1c3e97263679165565a2045be7c9c46fe4e4856a58d36de3efa
|
| 3 |
+
size 14244
|
checkpoint-2000/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9389d2d6b649add7b167e5f96d84163bb4de87fbdbcea2e7d94c8fc162243048
|
| 3 |
size 1064
|
checkpoint-2000/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 2000,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,37 +9,37 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"learning_rate":
|
| 14 |
-
"loss":
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
-
"epoch":
|
| 19 |
-
"learning_rate":
|
| 20 |
-
"loss":
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
-
"epoch":
|
| 25 |
-
"learning_rate":
|
| 26 |
-
"loss":
|
| 27 |
"step": 1500
|
| 28 |
},
|
| 29 |
{
|
| 30 |
-
"epoch":
|
| 31 |
-
"learning_rate":
|
| 32 |
-
"loss":
|
| 33 |
"step": 2000
|
| 34 |
}
|
| 35 |
],
|
| 36 |
"logging_steps": 500,
|
| 37 |
-
"max_steps":
|
| 38 |
"num_input_tokens_seen": 0,
|
| 39 |
-
"num_train_epochs":
|
| 40 |
"save_steps": 500,
|
| 41 |
-
"total_flos":
|
| 42 |
-
"train_batch_size":
|
| 43 |
"trial_name": null,
|
| 44 |
"trial_params": null
|
| 45 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 57.142857142857146,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 2000,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 14.29,
|
| 13 |
+
"learning_rate": 3.809523809523809e-05,
|
| 14 |
+
"loss": 1.8052,
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
+
"epoch": 28.57,
|
| 19 |
+
"learning_rate": 2.6190476190476192e-05,
|
| 20 |
+
"loss": 1.0819,
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
+
"epoch": 42.86,
|
| 25 |
+
"learning_rate": 1.4285714285714285e-05,
|
| 26 |
+
"loss": 0.6781,
|
| 27 |
"step": 1500
|
| 28 |
},
|
| 29 |
{
|
| 30 |
+
"epoch": 57.14,
|
| 31 |
+
"learning_rate": 2.3809523809523808e-06,
|
| 32 |
+
"loss": 0.5142,
|
| 33 |
"step": 2000
|
| 34 |
}
|
| 35 |
],
|
| 36 |
"logging_steps": 500,
|
| 37 |
+
"max_steps": 2100,
|
| 38 |
"num_input_tokens_seen": 0,
|
| 39 |
+
"num_train_epochs": 60,
|
| 40 |
"save_steps": 500,
|
| 41 |
+
"total_flos": 4121097928704000.0,
|
| 42 |
+
"train_batch_size": 4,
|
| 43 |
"trial_name": null,
|
| 44 |
"trial_params": null
|
| 45 |
}
|
checkpoint-2000/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
|
| 3 |
size 4664
|
checkpoint-500/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b4dcf13f972feb06a7bc56ddc93ab0a631fe7d88e27643cbf9c0e042041fcff
|
| 3 |
size 497774208
|
checkpoint-500/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84d55291718bd821470b8b79351ab3e47b578d3bc202542399220a633dc12848
|
| 3 |
+
size 995642298
|
checkpoint-500/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e651e34957d21ef1419934c9c311ba824f956fd612f9f4b5bea9e4854d09d528
|
| 3 |
+
size 14244
|
checkpoint-500/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d84cb0a17b808448928ddd21fdfb54eabfda0598dfe3f0b7eebb6d442d67f65
|
| 3 |
size 1064
|
checkpoint-500/trainer_state.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 500,
|
| 7 |
"is_hyper_param_search": false,
|
|
@@ -9,19 +9,19 @@
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"learning_rate":
|
| 14 |
-
"loss":
|
| 15 |
"step": 500
|
| 16 |
}
|
| 17 |
],
|
| 18 |
"logging_steps": 500,
|
| 19 |
-
"max_steps":
|
| 20 |
"num_input_tokens_seen": 0,
|
| 21 |
-
"num_train_epochs":
|
| 22 |
"save_steps": 500,
|
| 23 |
-
"total_flos":
|
| 24 |
-
"train_batch_size":
|
| 25 |
"trial_name": null,
|
| 26 |
"trial_params": null
|
| 27 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 14.285714285714286,
|
| 5 |
"eval_steps": 500,
|
| 6 |
"global_step": 500,
|
| 7 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 14.29,
|
| 13 |
+
"learning_rate": 3.809523809523809e-05,
|
| 14 |
+
"loss": 1.8052,
|
| 15 |
"step": 500
|
| 16 |
}
|
| 17 |
],
|
| 18 |
"logging_steps": 500,
|
| 19 |
+
"max_steps": 2100,
|
| 20 |
"num_input_tokens_seen": 0,
|
| 21 |
+
"num_train_epochs": 60,
|
| 22 |
"save_steps": 500,
|
| 23 |
+
"total_flos": 1030535774208000.0,
|
| 24 |
+
"train_batch_size": 4,
|
| 25 |
"trial_name": null,
|
| 26 |
"trial_params": null
|
| 27 |
}
|
checkpoint-500/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
|
| 3 |
size 4664
|
eval_results.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"eval_accuracy": 0.
|
| 4 |
-
"eval_loss":
|
| 5 |
-
"eval_runtime":
|
| 6 |
"eval_samples": 9,
|
| 7 |
-
"eval_samples_per_second":
|
| 8 |
-
"eval_steps_per_second":
|
| 9 |
-
"perplexity":
|
| 10 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 60.0,
|
| 3 |
+
"eval_accuracy": 0.4494406429890301,
|
| 4 |
+
"eval_loss": 3.5092644691467285,
|
| 5 |
+
"eval_runtime": 0.6069,
|
| 6 |
"eval_samples": 9,
|
| 7 |
+
"eval_samples_per_second": 14.829,
|
| 8 |
+
"eval_steps_per_second": 4.943,
|
| 9 |
+
"perplexity": 33.423674596634115
|
| 10 |
}
|
model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 497774208
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5aaa8c132a2f82e4f3a057cd8336ab86a6452f68defee110eba5fbb71b1e662c
|
| 3 |
size 497774208
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
-
"epoch":
|
| 3 |
-
"train_loss":
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 138,
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second":
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"epoch": 60.0,
|
| 3 |
+
"train_loss": 0.9941332481020972,
|
| 4 |
+
"train_runtime": 1577.313,
|
| 5 |
"train_samples": 138,
|
| 6 |
+
"train_samples_per_second": 5.249,
|
| 7 |
+
"train_steps_per_second": 1.331
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -1,60 +1,54 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
-
"epoch":
|
| 13 |
-
"learning_rate":
|
| 14 |
-
"loss":
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
-
"epoch":
|
| 19 |
-
"learning_rate":
|
| 20 |
-
"loss":
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
-
"epoch":
|
| 25 |
-
"learning_rate":
|
| 26 |
-
"loss":
|
| 27 |
"step": 1500
|
| 28 |
},
|
| 29 |
{
|
| 30 |
-
"epoch":
|
| 31 |
-
"learning_rate":
|
| 32 |
-
"loss":
|
| 33 |
"step": 2000
|
| 34 |
},
|
| 35 |
{
|
| 36 |
-
"epoch":
|
| 37 |
-
"
|
| 38 |
-
"
|
| 39 |
-
"
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
"
|
| 43 |
-
"step": 2760,
|
| 44 |
-
"total_flos": 2884664033280000.0,
|
| 45 |
-
"train_loss": 57.43311643738677,
|
| 46 |
-
"train_runtime": 10482.6781,
|
| 47 |
-
"train_samples_per_second": 0.527,
|
| 48 |
-
"train_steps_per_second": 0.263
|
| 49 |
}
|
| 50 |
],
|
| 51 |
"logging_steps": 500,
|
| 52 |
-
"max_steps":
|
| 53 |
"num_input_tokens_seen": 0,
|
| 54 |
-
"num_train_epochs":
|
| 55 |
"save_steps": 500,
|
| 56 |
-
"total_flos":
|
| 57 |
-
"train_batch_size":
|
| 58 |
"trial_name": null,
|
| 59 |
"trial_params": null
|
| 60 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 60.0,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 2100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
+
"epoch": 14.29,
|
| 13 |
+
"learning_rate": 3.809523809523809e-05,
|
| 14 |
+
"loss": 1.8052,
|
| 15 |
"step": 500
|
| 16 |
},
|
| 17 |
{
|
| 18 |
+
"epoch": 28.57,
|
| 19 |
+
"learning_rate": 2.6190476190476192e-05,
|
| 20 |
+
"loss": 1.0819,
|
| 21 |
"step": 1000
|
| 22 |
},
|
| 23 |
{
|
| 24 |
+
"epoch": 42.86,
|
| 25 |
+
"learning_rate": 1.4285714285714285e-05,
|
| 26 |
+
"loss": 0.6781,
|
| 27 |
"step": 1500
|
| 28 |
},
|
| 29 |
{
|
| 30 |
+
"epoch": 57.14,
|
| 31 |
+
"learning_rate": 2.3809523809523808e-06,
|
| 32 |
+
"loss": 0.5142,
|
| 33 |
"step": 2000
|
| 34 |
},
|
| 35 |
{
|
| 36 |
+
"epoch": 60.0,
|
| 37 |
+
"step": 2100,
|
| 38 |
+
"total_flos": 4326996049920000.0,
|
| 39 |
+
"train_loss": 0.9941332481020972,
|
| 40 |
+
"train_runtime": 1577.313,
|
| 41 |
+
"train_samples_per_second": 5.249,
|
| 42 |
+
"train_steps_per_second": 1.331
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
}
|
| 44 |
],
|
| 45 |
"logging_steps": 500,
|
| 46 |
+
"max_steps": 2100,
|
| 47 |
"num_input_tokens_seen": 0,
|
| 48 |
+
"num_train_epochs": 60,
|
| 49 |
"save_steps": 500,
|
| 50 |
+
"total_flos": 4326996049920000.0,
|
| 51 |
+
"train_batch_size": 4,
|
| 52 |
"trial_name": null,
|
| 53 |
"trial_params": null
|
| 54 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4664
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fb1fd55327e691254314ff0db99123383d66b9482283a479aeb28d89c26938a
|
| 3 |
size 4664
|