ShengdingHu commited on
Commit
4949bd1
·
1 Parent(s): bc3cf6b

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,23 +1,23 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.7083333333333334,
4
- "eval_average_metrics": 90.18092691622104,
5
  "eval_combined_score": 0.7652279521674141,
6
- "eval_f1": 0.8221225710014948,
7
- "eval_loss": 0.5735756754875183,
8
- "eval_runtime": 0.4847,
9
  "eval_samples": 408,
10
- "eval_samples_per_second": 841.743,
11
  "eval_steps_per_second": 14.442,
12
- "test_accuracy": 89.70588235294117,
13
- "test_average_metrics": 91.19440459110473,
14
- "test_f1": 92.6829268292683,
15
- "test_loss": 0.2836102843284607,
16
- "test_runtime": 1.0233,
17
- "test_samples_per_second": 199.347,
18
- "train_loss": 0.5910721482901737,
19
- "train_runtime": 193.8371,
20
  "train_samples": 3668,
21
- "train_samples_per_second": 378.462,
22
- "train_steps_per_second": 5.984
23
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 88.23529411764706,
4
+ "eval_average_metrics": 89.89229494614747,
5
  "eval_combined_score": 0.7652279521674141,
6
+ "eval_f1": 91.54929577464787,
7
+ "eval_loss": 0.16003645956516266,
8
+ "eval_runtime": 0.6995,
9
  "eval_samples": 408,
10
+ "eval_samples_per_second": 291.653,
11
  "eval_steps_per_second": 14.442,
12
+ "test_accuracy": 89.2156862745098,
13
+ "test_average_metrics": 90.86634653861545,
14
+ "test_f1": 92.51700680272108,
15
+ "test_loss": 0.14261329174041748,
16
+ "test_runtime": 0.7757,
17
+ "test_samples_per_second": 262.979,
18
+ "train_loss": 0.17493162812857793,
19
+ "train_runtime": 418.5318,
20
  "train_samples": 3668,
21
+ "train_samples_per_second": 175.279,
22
+ "train_steps_per_second": 2.772
23
  }
config.json CHANGED
@@ -7,7 +7,7 @@
7
  "d_kv": 64,
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
- "dropout_rate": 0.1,
11
  "eos_token_id": 1,
12
  "feed_forward_proj": "relu",
13
  "initializer_factor": 1.0,
 
7
  "d_kv": 64,
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
+ "dropout_rate": 0.0,
11
  "eos_token_id": 1,
12
  "feed_forward_proj": "relu",
13
  "initializer_factor": 1.0,
eval_results.json CHANGED
@@ -1,11 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 0.7083333333333334,
4
- "eval_combined_score": 0.7652279521674141,
5
- "eval_f1": 0.8221225710014948,
6
- "eval_loss": 0.5735756754875183,
7
- "eval_runtime": 0.4847,
8
- "eval_samples": 408,
9
- "eval_samples_per_second": 841.743,
10
- "eval_steps_per_second": 14.442
11
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 88.23529411764706,
4
+ "eval_average_metrics": 89.89229494614747,
5
+ "eval_f1": 91.54929577464787,
6
+ "eval_loss": 0.16003645956516266,
7
+ "eval_runtime": 0.6995,
8
+ "eval_samples_per_second": 291.653
 
 
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fdcf757e2885b5216fad5766c7f89d23d581c1ad49d8816f3562354de90eba6
3
- size 2631685
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09e19d290866a921a7cc2616939f5dd9656a5f74c49c3237fddda3484eeed7f
3
+ size 7551621
runs/Feb13_11-49-17_node1/events.out.tfevents.1644724261.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7bf01c2a75a84f02ed7c4327c6b5e95250e39604ab320cd265dbb148a5a2c84
3
- size 6086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f396edc54350f58cc8d99f2bc02b68f5f30681e89c1a19e33aec3590944380e
3
+ size 6808
runs/Feb13_11-49-17_node1/events.out.tfevents.1644724681.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9050d4b6aa1251914583f3435db632594f03dddba0f359a1258077483c928cac
3
+ size 776
runs/Feb13_17-52-43_node1/1644746076.9297862/events.out.tfevents.1644746076.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19f0864fed08298ee6fc589e408d8adf75fc8ae3794ab37582d568adb62fb872
3
+ size 5011
runs/Feb13_17-52-43_node1/events.out.tfevents.1644746076.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7846c8c79bfdfc70d23291dc0ae5e8136915ef0ecc078355b225793e40056682
3
+ size 4300
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "test_accuracy": 89.70588235294117,
4
- "test_average_metrics": 91.19440459110473,
5
- "test_f1": 92.6829268292683,
6
- "test_loss": 0.2836102843284607,
7
- "test_runtime": 1.0233,
8
- "test_samples_per_second": 199.347
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_accuracy": 89.2156862745098,
4
+ "test_average_metrics": 90.86634653861545,
5
+ "test_f1": 92.51700680272108,
6
+ "test_loss": 0.14261329174041748,
7
+ "test_runtime": 0.7757,
8
+ "test_samples_per_second": 262.979
9
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.5910721482901737,
4
- "train_runtime": 193.8371,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 378.462,
7
- "train_steps_per_second": 5.984
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.17493162812857793,
4
+ "train_runtime": 418.5318,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 175.279,
7
+ "train_steps_per_second": 2.772
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.7083333333333334,
3
- "best_model_checkpoint": "outputs/prefix/roberta-base/mrpc/checkpoint-600",
4
  "epoch": 20.0,
5
  "global_step": 1160,
6
  "is_hyper_param_search": false,
@@ -9,84 +9,79 @@
9
  "log_history": [
10
  {
11
  "epoch": 3.45,
12
- "eval_accuracy": 0.7058823529411765,
13
- "eval_combined_score": 0.7631208171891511,
14
- "eval_f1": 0.8203592814371258,
15
- "eval_loss": 0.602976381778717,
16
- "eval_runtime": 1.1443,
17
- "eval_samples_per_second": 356.557,
18
- "eval_steps_per_second": 6.117,
19
  "step": 200
20
  },
21
  {
22
  "epoch": 6.9,
23
- "eval_accuracy": 0.7034313725490197,
24
- "eval_combined_score": 0.7620860566448802,
25
- "eval_f1": 0.8207407407407408,
26
- "eval_loss": 0.58584064245224,
27
- "eval_runtime": 0.5538,
28
- "eval_samples_per_second": 736.744,
29
- "eval_steps_per_second": 12.64,
30
  "step": 400
31
  },
32
  {
33
  "epoch": 8.62,
34
  "learning_rate": 0.0001706896551724138,
35
- "loss": 0.6074,
36
  "step": 500
37
  },
38
  {
39
  "epoch": 10.34,
40
- "eval_accuracy": 0.7083333333333334,
41
- "eval_combined_score": 0.7652279521674141,
42
- "eval_f1": 0.8221225710014948,
43
- "eval_loss": 0.5735756754875183,
44
- "eval_runtime": 0.6085,
45
- "eval_samples_per_second": 670.475,
46
- "eval_steps_per_second": 11.503,
47
  "step": 600
48
  },
49
  {
50
  "epoch": 13.79,
51
- "eval_accuracy": 0.7034313725490197,
52
- "eval_combined_score": 0.7612822034643454,
53
- "eval_f1": 0.8191330343796712,
54
- "eval_loss": 0.5688421726226807,
55
- "eval_runtime": 0.6104,
56
- "eval_samples_per_second": 668.411,
57
- "eval_steps_per_second": 11.468,
58
  "step": 800
59
  },
60
  {
61
  "epoch": 17.24,
62
  "learning_rate": 4.137931034482758e-05,
63
- "loss": 0.5801,
64
  "step": 1000
65
  },
66
  {
67
  "epoch": 17.24,
68
- "eval_accuracy": 0.7083333333333334,
69
- "eval_combined_score": 0.7649612693653174,
70
- "eval_f1": 0.8215892053973014,
71
- "eval_loss": 0.5650697350502014,
72
- "eval_runtime": 0.7692,
73
- "eval_samples_per_second": 530.398,
74
- "eval_steps_per_second": 9.1,
75
  "step": 1000
76
  },
77
  {
78
  "epoch": 20.0,
79
  "step": 1160,
80
- "total_flos": 4831687561666560.0,
81
- "train_loss": 0.5910721482901737,
82
- "train_runtime": 193.8371,
83
- "train_samples_per_second": 378.462,
84
- "train_steps_per_second": 5.984
85
  }
86
  ],
87
  "max_steps": 1160,
88
  "num_train_epochs": 20,
89
- "total_flos": 4831687561666560.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }
 
1
  {
2
+ "best_metric": 89.89229494614747,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-600",
4
  "epoch": 20.0,
5
  "global_step": 1160,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 3.45,
12
+ "eval_accuracy": 87.25490196078431,
13
+ "eval_average_metrics": 89.04998619165977,
14
+ "eval_f1": 90.84507042253522,
15
+ "eval_loss": 0.15426486730575562,
16
+ "eval_runtime": 1.9622,
17
+ "eval_samples_per_second": 103.964,
 
18
  "step": 200
19
  },
20
  {
21
  "epoch": 6.9,
22
+ "eval_accuracy": 88.23529411764706,
23
+ "eval_average_metrics": 89.86232790988736,
24
+ "eval_f1": 91.48936170212765,
25
+ "eval_loss": 0.15000468492507935,
26
+ "eval_runtime": 0.758,
27
+ "eval_samples_per_second": 269.133,
 
28
  "step": 400
29
  },
30
  {
31
  "epoch": 8.62,
32
  "learning_rate": 0.0001706896551724138,
33
+ "loss": 0.3075,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 10.34,
38
+ "eval_accuracy": 88.23529411764706,
39
+ "eval_average_metrics": 89.89229494614747,
40
+ "eval_f1": 91.54929577464787,
41
+ "eval_loss": 0.16003645956516266,
42
+ "eval_runtime": 0.7968,
43
+ "eval_samples_per_second": 256.036,
 
44
  "step": 600
45
  },
46
  {
47
  "epoch": 13.79,
48
+ "eval_accuracy": 88.23529411764706,
49
+ "eval_average_metrics": 89.89229494614747,
50
+ "eval_f1": 91.54929577464787,
51
+ "eval_loss": 0.17383529245853424,
52
+ "eval_runtime": 0.718,
53
+ "eval_samples_per_second": 284.125,
 
54
  "step": 800
55
  },
56
  {
57
  "epoch": 17.24,
58
  "learning_rate": 4.137931034482758e-05,
59
+ "loss": 0.0775,
60
  "step": 1000
61
  },
62
  {
63
  "epoch": 17.24,
64
+ "eval_accuracy": 86.27450980392157,
65
+ "eval_average_metrics": 88.13725490196079,
66
+ "eval_f1": 90.0,
67
+ "eval_loss": 0.18229342997074127,
68
+ "eval_runtime": 0.9163,
69
+ "eval_samples_per_second": 222.63,
 
70
  "step": 1000
71
  },
72
  {
73
  "epoch": 20.0,
74
  "step": 1160,
75
+ "total_flos": 9593848425259008.0,
76
+ "train_loss": 0.17493162812857793,
77
+ "train_runtime": 418.5318,
78
+ "train_samples_per_second": 175.279,
79
+ "train_steps_per_second": 2.772
80
  }
81
  ],
82
  "max_steps": 1160,
83
  "num_train_epochs": 20,
84
+ "total_flos": 9593848425259008.0,
85
  "trial_name": null,
86
  "trial_params": null
87
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bb604d10a0afd55461caf0e5622569d6a7a1fb19a6cbca10a0d50be7bb0eff4
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17db249fae4619a2221659ee9b370dd6b54ccd58461e7019698d76ad354c5b1e
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/mrpc", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
 
1
+ {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/mrpc", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}