ShengdingHu commited on
Commit
9d731d1
·
1 Parent(s): fb8e3c7

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 87.74509803921569,
4
- "eval_average_metrics": 89.42415044309539,
5
- "eval_f1": 91.1032028469751,
6
- "eval_loss": 0.16199710965156555,
7
- "eval_runtime": 0.7045,
8
- "eval_samples_per_second": 289.552,
9
  "test_accuracy": 89.70588235294117,
10
  "test_average_metrics": 91.19440459110473,
11
  "test_f1": 92.6829268292683,
12
- "test_loss": 0.14617003500461578,
13
- "test_runtime": 0.6901,
14
- "test_samples_per_second": 295.608,
15
- "train_loss": 0.7308501913740828,
16
- "train_runtime": 299.5389,
17
  "train_samples": 3668,
18
- "train_samples_per_second": 244.91,
19
- "train_steps_per_second": 2.47
20
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 88.72549019607843,
4
+ "eval_average_metrics": 90.18092691622104,
5
+ "eval_f1": 91.63636363636364,
6
+ "eval_loss": 0.3694871664047241,
7
+ "eval_runtime": 1.0502,
8
+ "eval_samples_per_second": 194.246,
9
  "test_accuracy": 89.70588235294117,
10
  "test_average_metrics": 91.19440459110473,
11
  "test_f1": 92.6829268292683,
12
+ "test_loss": 0.2836102843284607,
13
+ "test_runtime": 1.0233,
14
+ "test_samples_per_second": 199.347,
15
+ "train_loss": 0.04842971423397893,
16
+ "train_runtime": 850.7003,
17
  "train_samples": 3668,
18
+ "train_samples_per_second": 86.235,
19
+ "train_steps_per_second": 2.704
20
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 87.74509803921569,
4
- "eval_average_metrics": 89.42415044309539,
5
- "eval_f1": 91.1032028469751,
6
- "eval_loss": 0.16199710965156555,
7
- "eval_runtime": 0.7045,
8
- "eval_samples_per_second": 289.552
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 88.72549019607843,
4
+ "eval_average_metrics": 90.18092691622104,
5
+ "eval_f1": 91.63636363636364,
6
+ "eval_loss": 0.3694871664047241,
7
+ "eval_runtime": 1.0502,
8
+ "eval_samples_per_second": 194.246
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4db1132eb4d4f318ec092debf436b18cf21627157a309b8cdb6bbd77024446ef
3
- size 879301
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbfb2ad7e2adc1edf657a4adfdaf82ea2c9087f6f8350e7ca2bb99595c7c6f2a
3
+ size 2631685
runs/Feb01_08-31-08_node1/events.out.tfevents.1643675552.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:235576e4cfadbf745109bfb856f0ac034d1876902d77d22d0f544f712e3ee1b4
3
- size 8608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6227709f5aad7ed804ebeda155906a65bf9d2206d845790d900d4680c6306b70
3
+ size 9330
runs/Feb01_08-31-08_node1/events.out.tfevents.1643676404.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbed59f6129b8c350b99a41ea1f5747d45d6962f22bd70213e151be23b33b4b9
3
+ size 776
runs/Feb02_20-26-01_node1/1643804875.4911196/events.out.tfevents.1643804875.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88aadcc41bf26f9a1969a01ba4db1d50be135c2af990afb27bfac067bb62a2f
3
+ size 5011
runs/Feb02_20-26-01_node1/events.out.tfevents.1643804875.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3c52e35645d95ab5bfb62d2403be8cf974439c5594a5950d7f9ed895a66c778
3
+ size 4300
test_results.json CHANGED
@@ -3,7 +3,7 @@
3
  "test_accuracy": 89.70588235294117,
4
  "test_average_metrics": 91.19440459110473,
5
  "test_f1": 92.6829268292683,
6
- "test_loss": 0.14617003500461578,
7
- "test_runtime": 0.6901,
8
- "test_samples_per_second": 295.608
9
  }
 
3
  "test_accuracy": 89.70588235294117,
4
  "test_average_metrics": 91.19440459110473,
5
  "test_f1": 92.6829268292683,
6
+ "test_loss": 0.2836102843284607,
7
+ "test_runtime": 1.0233,
8
+ "test_samples_per_second": 199.347
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.7308501913740828,
4
- "train_runtime": 299.5389,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 244.91,
7
- "train_steps_per_second": 2.47
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.04842971423397893,
4
+ "train_runtime": 850.7003,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 86.235,
7
+ "train_steps_per_second": 2.704
8
  }
trainer_state.json CHANGED
@@ -1,61 +1,159 @@
1
  {
2
- "best_metric": 89.42415044309539,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-600",
4
  "epoch": 20.0,
5
- "global_step": 740,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 5.41,
12
- "eval_accuracy": 85.29411764705883,
13
- "eval_average_metrics": 87.36536868268433,
14
- "eval_f1": 89.43661971830986,
15
- "eval_loss": 0.16359843313694,
16
- "eval_runtime": 0.6875,
17
- "eval_samples_per_second": 296.737,
18
  "step": 200
19
  },
20
  {
21
- "epoch": 10.81,
22
- "eval_accuracy": 86.76470588235294,
23
- "eval_average_metrics": 88.47326203208556,
24
- "eval_f1": 90.18181818181817,
25
- "eval_loss": 0.15707039833068848,
26
- "eval_runtime": 0.6899,
27
- "eval_samples_per_second": 295.7,
28
  "step": 400
29
  },
30
  {
31
- "epoch": 13.51,
32
- "learning_rate": 0.0003,
33
- "loss": 1.0294,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 16.22,
38
- "eval_accuracy": 87.74509803921569,
39
- "eval_average_metrics": 89.42415044309539,
40
- "eval_f1": 91.1032028469751,
41
- "eval_loss": 0.16199710965156555,
42
- "eval_runtime": 0.7169,
43
- "eval_samples_per_second": 284.564,
44
  "step": 600
45
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  {
47
  "epoch": 20.0,
48
- "step": 740,
49
- "total_flos": 9835227628938144.0,
50
- "train_loss": 0.7308501913740828,
51
- "train_runtime": 299.5389,
52
- "train_samples_per_second": 244.91,
53
- "train_steps_per_second": 2.47
54
  }
55
  ],
56
- "max_steps": 740,
57
  "num_train_epochs": 20,
58
- "total_flos": 9835227628938144.0,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
 
1
  {
2
+ "best_metric": 90.18092691622104,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-2200",
4
  "epoch": 20.0,
5
+ "global_step": 2300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.74,
12
+ "eval_accuracy": 86.76470588235294,
13
+ "eval_average_metrics": 88.54364326375712,
14
+ "eval_f1": 90.3225806451613,
15
+ "eval_loss": 0.1595430225133896,
16
+ "eval_runtime": 1.0042,
17
+ "eval_samples_per_second": 203.156,
18
  "step": 200
19
  },
20
  {
21
+ "epoch": 3.48,
22
+ "eval_accuracy": 87.25490196078431,
23
+ "eval_average_metrics": 89.1753961858716,
24
+ "eval_f1": 91.0958904109589,
25
+ "eval_loss": 0.19655147194862366,
26
+ "eval_runtime": 0.9978,
27
+ "eval_samples_per_second": 204.44,
28
  "step": 400
29
  },
30
  {
31
+ "epoch": 4.35,
32
+ "learning_rate": 0.00023478260869565215,
33
+ "loss": 0.1466,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 5.22,
38
+ "eval_accuracy": 88.23529411764706,
39
+ "eval_average_metrics": 89.83193277310924,
40
+ "eval_f1": 91.42857142857143,
41
+ "eval_loss": 0.20800147950649261,
42
+ "eval_runtime": 1.03,
43
+ "eval_samples_per_second": 198.057,
44
  "step": 600
45
  },
46
+ {
47
+ "epoch": 6.96,
48
+ "eval_accuracy": 87.25490196078431,
49
+ "eval_average_metrics": 88.91730605285592,
50
+ "eval_f1": 90.57971014492753,
51
+ "eval_loss": 0.25488224625587463,
52
+ "eval_runtime": 0.9975,
53
+ "eval_samples_per_second": 204.517,
54
+ "step": 800
55
+ },
56
+ {
57
+ "epoch": 8.7,
58
+ "learning_rate": 0.00016956521739130433,
59
+ "loss": 0.0433,
60
+ "step": 1000
61
+ },
62
+ {
63
+ "epoch": 8.7,
64
+ "eval_accuracy": 87.74509803921569,
65
+ "eval_average_metrics": 89.48658410732715,
66
+ "eval_f1": 91.2280701754386,
67
+ "eval_loss": 0.27675318717956543,
68
+ "eval_runtime": 1.004,
69
+ "eval_samples_per_second": 203.193,
70
+ "step": 1000
71
+ },
72
+ {
73
+ "epoch": 10.43,
74
+ "eval_accuracy": 88.23529411764706,
75
+ "eval_average_metrics": 89.83193277310924,
76
+ "eval_f1": 91.42857142857143,
77
+ "eval_loss": 0.3537355363368988,
78
+ "eval_runtime": 1.0419,
79
+ "eval_samples_per_second": 195.799,
80
+ "step": 1200
81
+ },
82
+ {
83
+ "epoch": 12.17,
84
+ "eval_accuracy": 86.76470588235294,
85
+ "eval_average_metrics": 88.43729799612153,
86
+ "eval_f1": 90.10989010989012,
87
+ "eval_loss": 0.4248361885547638,
88
+ "eval_runtime": 0.9784,
89
+ "eval_samples_per_second": 208.512,
90
+ "step": 1400
91
+ },
92
+ {
93
+ "epoch": 13.04,
94
+ "learning_rate": 0.00010434782608695651,
95
+ "loss": 0.0174,
96
+ "step": 1500
97
+ },
98
+ {
99
+ "epoch": 13.91,
100
+ "eval_accuracy": 87.25490196078431,
101
+ "eval_average_metrics": 88.9511919875864,
102
+ "eval_f1": 90.64748201438849,
103
+ "eval_loss": 0.35347217321395874,
104
+ "eval_runtime": 1.0071,
105
+ "eval_samples_per_second": 202.569,
106
+ "step": 1600
107
+ },
108
+ {
109
+ "epoch": 15.65,
110
+ "eval_accuracy": 88.23529411764706,
111
+ "eval_average_metrics": 89.7380850150279,
112
+ "eval_f1": 91.24087591240875,
113
+ "eval_loss": 0.37131425738334656,
114
+ "eval_runtime": 0.9929,
115
+ "eval_samples_per_second": 205.451,
116
+ "step": 1800
117
+ },
118
+ {
119
+ "epoch": 17.39,
120
+ "learning_rate": 3.913043478260869e-05,
121
+ "loss": 0.0119,
122
+ "step": 2000
123
+ },
124
+ {
125
+ "epoch": 17.39,
126
+ "eval_accuracy": 88.23529411764706,
127
+ "eval_average_metrics": 89.769820971867,
128
+ "eval_f1": 91.30434782608695,
129
+ "eval_loss": 0.3558318316936493,
130
+ "eval_runtime": 1.0675,
131
+ "eval_samples_per_second": 191.099,
132
+ "step": 2000
133
+ },
134
+ {
135
+ "epoch": 19.13,
136
+ "eval_accuracy": 88.72549019607843,
137
+ "eval_average_metrics": 90.18092691622104,
138
+ "eval_f1": 91.63636363636364,
139
+ "eval_loss": 0.3694871664047241,
140
+ "eval_runtime": 1.0401,
141
+ "eval_samples_per_second": 196.137,
142
+ "step": 2200
143
+ },
144
  {
145
  "epoch": 20.0,
146
+ "step": 2300,
147
+ "total_flos": 9229438723811328.0,
148
+ "train_loss": 0.04842971423397893,
149
+ "train_runtime": 850.7003,
150
+ "train_samples_per_second": 86.235,
151
+ "train_steps_per_second": 2.704
152
  }
153
  ],
154
+ "max_steps": 2300,
155
  "num_train_epochs": 20,
156
+ "total_flos": 9229438723811328.0,
157
  "trial_name": null,
158
  "trial_params": null
159
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62f92480d5f15a84117d30f2a6a088445639336c20d95f7392fa3071d4e4eb97
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:751e8fa023d89d0b3545748e209d604c31d9f00e9021244fa8bb26a386f798f0
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"dataset_config_name": ["en"], "delta_type": "compacter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "factorized_phm": true, "factorized_phm_rule": false, "gradient_clip": false, "greater_is_better": true, "hypercomplex_adapters": true, "hypercomplex_division": 4, "hypercomplex_nonlinearity": "glorot-uniform", "learn_phm": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "normalize_phm_weight": false, "num_train_epochs": 20, "output_dir": "outputs/lora/t5-base/mrpc", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "phm_c_init": "normal", "phm_clamp": false, "phm_init_range": 0.0001, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "shared_phm_rule": false, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "use_bias_down_sampler": true, "use_bias_up_sampler": true, "warmup_steps": 0}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "mrpc", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/mrpc", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "mrpc", "test_dataset_config_name": ["en"], "test_dataset_name": "mrpc", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}