Commit
·
4949bd1
1
Parent(s):
bc3cf6b
Training in progress, step 200
Browse files- all_results.json +16 -16
- config.json +1 -1
- eval_results.json +6 -8
- pytorch_model.bin +2 -2
- runs/Feb13_11-49-17_node1/events.out.tfevents.1644724261.node1 +2 -2
- runs/Feb13_11-49-17_node1/events.out.tfevents.1644724681.node1 +3 -0
- runs/Feb13_17-52-43_node1/1644746076.9297862/events.out.tfevents.1644746076.node1 +3 -0
- runs/Feb13_17-52-43_node1/events.out.tfevents.1644746076.node1 +3 -0
- test_results.json +6 -6
- tokenizer.json +0 -0
- train_results.json +4 -4
- trainer_state.json +40 -45
- training_args.bin +1 -1
- training_config.json +1 -1
all_results.json
CHANGED
|
@@ -1,23 +1,23 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
-
"eval_accuracy":
|
| 4 |
-
"eval_average_metrics":
|
| 5 |
"eval_combined_score": 0.7652279521674141,
|
| 6 |
-
"eval_f1":
|
| 7 |
-
"eval_loss": 0.
|
| 8 |
-
"eval_runtime": 0.
|
| 9 |
"eval_samples": 408,
|
| 10 |
-
"eval_samples_per_second":
|
| 11 |
"eval_steps_per_second": 14.442,
|
| 12 |
-
"test_accuracy": 89.
|
| 13 |
-
"test_average_metrics":
|
| 14 |
-
"test_f1": 92.
|
| 15 |
-
"test_loss": 0.
|
| 16 |
-
"test_runtime":
|
| 17 |
-
"test_samples_per_second":
|
| 18 |
-
"train_loss": 0.
|
| 19 |
-
"train_runtime":
|
| 20 |
"train_samples": 3668,
|
| 21 |
-
"train_samples_per_second":
|
| 22 |
-
"train_steps_per_second":
|
| 23 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
+
"eval_accuracy": 88.23529411764706,
|
| 4 |
+
"eval_average_metrics": 89.89229494614747,
|
| 5 |
"eval_combined_score": 0.7652279521674141,
|
| 6 |
+
"eval_f1": 91.54929577464787,
|
| 7 |
+
"eval_loss": 0.16003645956516266,
|
| 8 |
+
"eval_runtime": 0.6995,
|
| 9 |
"eval_samples": 408,
|
| 10 |
+
"eval_samples_per_second": 291.653,
|
| 11 |
"eval_steps_per_second": 14.442,
|
| 12 |
+
"test_accuracy": 89.2156862745098,
|
| 13 |
+
"test_average_metrics": 90.86634653861545,
|
| 14 |
+
"test_f1": 92.51700680272108,
|
| 15 |
+
"test_loss": 0.14261329174041748,
|
| 16 |
+
"test_runtime": 0.7757,
|
| 17 |
+
"test_samples_per_second": 262.979,
|
| 18 |
+
"train_loss": 0.17493162812857793,
|
| 19 |
+
"train_runtime": 418.5318,
|
| 20 |
"train_samples": 3668,
|
| 21 |
+
"train_samples_per_second": 175.279,
|
| 22 |
+
"train_steps_per_second": 2.772
|
| 23 |
}
|
config.json
CHANGED
|
@@ -7,7 +7,7 @@
|
|
| 7 |
"d_kv": 64,
|
| 8 |
"d_model": 768,
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
-
"dropout_rate": 0.
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"feed_forward_proj": "relu",
|
| 13 |
"initializer_factor": 1.0,
|
|
|
|
| 7 |
"d_kv": 64,
|
| 8 |
"d_model": 768,
|
| 9 |
"decoder_start_token_id": 0,
|
| 10 |
+
"dropout_rate": 0.0,
|
| 11 |
"eos_token_id": 1,
|
| 12 |
"feed_forward_proj": "relu",
|
| 13 |
"initializer_factor": 1.0,
|
eval_results.json
CHANGED
|
@@ -1,11 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
-
"eval_accuracy":
|
| 4 |
-
"
|
| 5 |
-
"eval_f1":
|
| 6 |
-
"eval_loss": 0.
|
| 7 |
-
"eval_runtime": 0.
|
| 8 |
-
"
|
| 9 |
-
"eval_samples_per_second": 841.743,
|
| 10 |
-
"eval_steps_per_second": 14.442
|
| 11 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
+
"eval_accuracy": 88.23529411764706,
|
| 4 |
+
"eval_average_metrics": 89.89229494614747,
|
| 5 |
+
"eval_f1": 91.54929577464787,
|
| 6 |
+
"eval_loss": 0.16003645956516266,
|
| 7 |
+
"eval_runtime": 0.6995,
|
| 8 |
+
"eval_samples_per_second": 291.653
|
|
|
|
|
|
|
| 9 |
}
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b09e19d290866a921a7cc2616939f5dd9656a5f74c49c3237fddda3484eeed7f
|
| 3 |
+
size 7551621
|
runs/Feb13_11-49-17_node1/events.out.tfevents.1644724261.node1
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f396edc54350f58cc8d99f2bc02b68f5f30681e89c1a19e33aec3590944380e
|
| 3 |
+
size 6808
|
runs/Feb13_11-49-17_node1/events.out.tfevents.1644724681.node1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9050d4b6aa1251914583f3435db632594f03dddba0f359a1258077483c928cac
|
| 3 |
+
size 776
|
runs/Feb13_17-52-43_node1/1644746076.9297862/events.out.tfevents.1644746076.node1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19f0864fed08298ee6fc589e408d8adf75fc8ae3794ab37582d568adb62fb872
|
| 3 |
+
size 5011
|
runs/Feb13_17-52-43_node1/events.out.tfevents.1644746076.node1
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7846c8c79bfdfc70d23291dc0ae5e8136915ef0ecc078355b225793e40056682
|
| 3 |
+
size 4300
|
test_results.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
-
"test_accuracy": 89.
|
| 4 |
-
"test_average_metrics":
|
| 5 |
-
"test_f1": 92.
|
| 6 |
-
"test_loss": 0.
|
| 7 |
-
"test_runtime":
|
| 8 |
-
"test_samples_per_second":
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
+
"test_accuracy": 89.2156862745098,
|
| 4 |
+
"test_average_metrics": 90.86634653861545,
|
| 5 |
+
"test_f1": 92.51700680272108,
|
| 6 |
+
"test_loss": 0.14261329174041748,
|
| 7 |
+
"test_runtime": 0.7757,
|
| 8 |
+
"test_samples_per_second": 262.979
|
| 9 |
}
|
tokenizer.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
train_results.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
-
"train_loss": 0.
|
| 4 |
-
"train_runtime":
|
| 5 |
"train_samples": 3668,
|
| 6 |
-
"train_samples_per_second":
|
| 7 |
-
"train_steps_per_second":
|
| 8 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 20.0,
|
| 3 |
+
"train_loss": 0.17493162812857793,
|
| 4 |
+
"train_runtime": 418.5318,
|
| 5 |
"train_samples": 3668,
|
| 6 |
+
"train_samples_per_second": 175.279,
|
| 7 |
+
"train_steps_per_second": 2.772
|
| 8 |
}
|
trainer_state.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric":
|
| 3 |
-
"best_model_checkpoint": "outputs/
|
| 4 |
"epoch": 20.0,
|
| 5 |
"global_step": 1160,
|
| 6 |
"is_hyper_param_search": false,
|
|
@@ -9,84 +9,79 @@
|
|
| 9 |
"log_history": [
|
| 10 |
{
|
| 11 |
"epoch": 3.45,
|
| 12 |
-
"eval_accuracy":
|
| 13 |
-
"
|
| 14 |
-
"eval_f1":
|
| 15 |
-
"eval_loss": 0.
|
| 16 |
-
"eval_runtime": 1.
|
| 17 |
-
"eval_samples_per_second":
|
| 18 |
-
"eval_steps_per_second": 6.117,
|
| 19 |
"step": 200
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"epoch": 6.9,
|
| 23 |
-
"eval_accuracy":
|
| 24 |
-
"
|
| 25 |
-
"eval_f1":
|
| 26 |
-
"eval_loss": 0.
|
| 27 |
-
"eval_runtime": 0.
|
| 28 |
-
"eval_samples_per_second":
|
| 29 |
-
"eval_steps_per_second": 12.64,
|
| 30 |
"step": 400
|
| 31 |
},
|
| 32 |
{
|
| 33 |
"epoch": 8.62,
|
| 34 |
"learning_rate": 0.0001706896551724138,
|
| 35 |
-
"loss": 0.
|
| 36 |
"step": 500
|
| 37 |
},
|
| 38 |
{
|
| 39 |
"epoch": 10.34,
|
| 40 |
-
"eval_accuracy":
|
| 41 |
-
"
|
| 42 |
-
"eval_f1":
|
| 43 |
-
"eval_loss": 0.
|
| 44 |
-
"eval_runtime": 0.
|
| 45 |
-
"eval_samples_per_second":
|
| 46 |
-
"eval_steps_per_second": 11.503,
|
| 47 |
"step": 600
|
| 48 |
},
|
| 49 |
{
|
| 50 |
"epoch": 13.79,
|
| 51 |
-
"eval_accuracy":
|
| 52 |
-
"
|
| 53 |
-
"eval_f1":
|
| 54 |
-
"eval_loss": 0.
|
| 55 |
-
"eval_runtime": 0.
|
| 56 |
-
"eval_samples_per_second":
|
| 57 |
-
"eval_steps_per_second": 11.468,
|
| 58 |
"step": 800
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"epoch": 17.24,
|
| 62 |
"learning_rate": 4.137931034482758e-05,
|
| 63 |
-
"loss": 0.
|
| 64 |
"step": 1000
|
| 65 |
},
|
| 66 |
{
|
| 67 |
"epoch": 17.24,
|
| 68 |
-
"eval_accuracy":
|
| 69 |
-
"
|
| 70 |
-
"eval_f1": 0
|
| 71 |
-
"eval_loss": 0.
|
| 72 |
-
"eval_runtime": 0.
|
| 73 |
-
"eval_samples_per_second":
|
| 74 |
-
"eval_steps_per_second": 9.1,
|
| 75 |
"step": 1000
|
| 76 |
},
|
| 77 |
{
|
| 78 |
"epoch": 20.0,
|
| 79 |
"step": 1160,
|
| 80 |
-
"total_flos":
|
| 81 |
-
"train_loss": 0.
|
| 82 |
-
"train_runtime":
|
| 83 |
-
"train_samples_per_second":
|
| 84 |
-
"train_steps_per_second":
|
| 85 |
}
|
| 86 |
],
|
| 87 |
"max_steps": 1160,
|
| 88 |
"num_train_epochs": 20,
|
| 89 |
-
"total_flos":
|
| 90 |
"trial_name": null,
|
| 91 |
"trial_params": null
|
| 92 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 89.89229494614747,
|
| 3 |
+
"best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-600",
|
| 4 |
"epoch": 20.0,
|
| 5 |
"global_step": 1160,
|
| 6 |
"is_hyper_param_search": false,
|
|
|
|
| 9 |
"log_history": [
|
| 10 |
{
|
| 11 |
"epoch": 3.45,
|
| 12 |
+
"eval_accuracy": 87.25490196078431,
|
| 13 |
+
"eval_average_metrics": 89.04998619165977,
|
| 14 |
+
"eval_f1": 90.84507042253522,
|
| 15 |
+
"eval_loss": 0.15426486730575562,
|
| 16 |
+
"eval_runtime": 1.9622,
|
| 17 |
+
"eval_samples_per_second": 103.964,
|
|
|
|
| 18 |
"step": 200
|
| 19 |
},
|
| 20 |
{
|
| 21 |
"epoch": 6.9,
|
| 22 |
+
"eval_accuracy": 88.23529411764706,
|
| 23 |
+
"eval_average_metrics": 89.86232790988736,
|
| 24 |
+
"eval_f1": 91.48936170212765,
|
| 25 |
+
"eval_loss": 0.15000468492507935,
|
| 26 |
+
"eval_runtime": 0.758,
|
| 27 |
+
"eval_samples_per_second": 269.133,
|
|
|
|
| 28 |
"step": 400
|
| 29 |
},
|
| 30 |
{
|
| 31 |
"epoch": 8.62,
|
| 32 |
"learning_rate": 0.0001706896551724138,
|
| 33 |
+
"loss": 0.3075,
|
| 34 |
"step": 500
|
| 35 |
},
|
| 36 |
{
|
| 37 |
"epoch": 10.34,
|
| 38 |
+
"eval_accuracy": 88.23529411764706,
|
| 39 |
+
"eval_average_metrics": 89.89229494614747,
|
| 40 |
+
"eval_f1": 91.54929577464787,
|
| 41 |
+
"eval_loss": 0.16003645956516266,
|
| 42 |
+
"eval_runtime": 0.7968,
|
| 43 |
+
"eval_samples_per_second": 256.036,
|
|
|
|
| 44 |
"step": 600
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"epoch": 13.79,
|
| 48 |
+
"eval_accuracy": 88.23529411764706,
|
| 49 |
+
"eval_average_metrics": 89.89229494614747,
|
| 50 |
+
"eval_f1": 91.54929577464787,
|
| 51 |
+
"eval_loss": 0.17383529245853424,
|
| 52 |
+
"eval_runtime": 0.718,
|
| 53 |
+
"eval_samples_per_second": 284.125,
|
|
|
|
| 54 |
"step": 800
|
| 55 |
},
|
| 56 |
{
|
| 57 |
"epoch": 17.24,
|
| 58 |
"learning_rate": 4.137931034482758e-05,
|
| 59 |
+
"loss": 0.0775,
|
| 60 |
"step": 1000
|
| 61 |
},
|
| 62 |
{
|
| 63 |
"epoch": 17.24,
|
| 64 |
+
"eval_accuracy": 86.27450980392157,
|
| 65 |
+
"eval_average_metrics": 88.13725490196079,
|
| 66 |
+
"eval_f1": 90.0,
|
| 67 |
+
"eval_loss": 0.18229342997074127,
|
| 68 |
+
"eval_runtime": 0.9163,
|
| 69 |
+
"eval_samples_per_second": 222.63,
|
|
|
|
| 70 |
"step": 1000
|
| 71 |
},
|
| 72 |
{
|
| 73 |
"epoch": 20.0,
|
| 74 |
"step": 1160,
|
| 75 |
+
"total_flos": 9593848425259008.0,
|
| 76 |
+
"train_loss": 0.17493162812857793,
|
| 77 |
+
"train_runtime": 418.5318,
|
| 78 |
+
"train_samples_per_second": 175.279,
|
| 79 |
+
"train_steps_per_second": 2.772
|
| 80 |
}
|
| 81 |
],
|
| 82 |
"max_steps": 1160,
|
| 83 |
"num_train_epochs": 20,
|
| 84 |
+
"total_flos": 9593848425259008.0,
|
| 85 |
"trial_name": null,
|
| 86 |
"trial_params": null
|
| 87 |
}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3183
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17db249fae4619a2221659ee9b370dd6b54ccd58461e7019698d76ad354c5b1e
|
| 3 |
size 3183
|
training_config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"dataset_config_name": ["en"], "delta_type": "
|
|
|
|
| 1 |
+
{"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/mrpc", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
|