ShengdingHu commited on
Commit
bd1a8d5
·
1 Parent(s): 88899d2

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,20 +1,23 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 88.72549019607843,
4
  "eval_average_metrics": 90.18092691622104,
5
- "eval_f1": 91.63636363636364,
6
- "eval_loss": 0.3694871664047241,
7
- "eval_runtime": 1.0502,
8
- "eval_samples_per_second": 194.246,
 
 
 
9
  "test_accuracy": 89.70588235294117,
10
  "test_average_metrics": 91.19440459110473,
11
  "test_f1": 92.6829268292683,
12
  "test_loss": 0.2836102843284607,
13
  "test_runtime": 1.0233,
14
  "test_samples_per_second": 199.347,
15
- "train_loss": 0.04842971423397893,
16
- "train_runtime": 850.7003,
17
  "train_samples": 3668,
18
- "train_samples_per_second": 86.235,
19
- "train_steps_per_second": 2.704
20
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.7083333333333334,
4
  "eval_average_metrics": 90.18092691622104,
5
+ "eval_combined_score": 0.7652279521674141,
6
+ "eval_f1": 0.8221225710014948,
7
+ "eval_loss": 0.5735756754875183,
8
+ "eval_runtime": 0.4847,
9
+ "eval_samples": 408,
10
+ "eval_samples_per_second": 841.743,
11
+ "eval_steps_per_second": 14.442,
12
  "test_accuracy": 89.70588235294117,
13
  "test_average_metrics": 91.19440459110473,
14
  "test_f1": 92.6829268292683,
15
  "test_loss": 0.2836102843284607,
16
  "test_runtime": 1.0233,
17
  "test_samples_per_second": 199.347,
18
+ "train_loss": 0.5910721482901737,
19
+ "train_runtime": 193.8371,
20
  "train_samples": 3668,
21
+ "train_samples_per_second": 378.462,
22
+ "train_steps_per_second": 5.984
23
  }
eval_results.json CHANGED
@@ -1,9 +1,11 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 88.72549019607843,
4
- "eval_average_metrics": 90.18092691622104,
5
- "eval_f1": 91.63636363636364,
6
- "eval_loss": 0.3694871664047241,
7
- "eval_runtime": 1.0502,
8
- "eval_samples_per_second": 194.246
 
 
9
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 0.7083333333333334,
4
+ "eval_combined_score": 0.7652279521674141,
5
+ "eval_f1": 0.8221225710014948,
6
+ "eval_loss": 0.5735756754875183,
7
+ "eval_runtime": 0.4847,
8
+ "eval_samples": 408,
9
+ "eval_samples_per_second": 841.743,
10
+ "eval_steps_per_second": 14.442
11
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:636e38716a1496de857a883086df4a681c1d638727ee2bcc063dbc37e14223c9
3
- size 2834001
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ea20482f4ef6da5b6daf69ce891f69b21c5ef0fc768a2ea5b9f90c3a92a3d81
3
+ size 41271819
runs/Feb12_11-11-44_node1/events.out.tfevents.1644635578.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:427123085ce51a80e53412b2a539079c62df93b916d4fcf8556e90505469741f
3
- size 5536
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11cc29a257317a9fb9172364c676a9aa44f65c9625e6da973d1a9ed814af1415
3
+ size 6317
runs/Feb12_11-11-44_node1/events.out.tfevents.1644635773.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3136b9d100428a4a7c558b04fb22957cd9b2568b26a29f316c4313decb5df276
3
+ size 467
runs/Feb12_11-26-13_node1/1644636416.8581543/events.out.tfevents.1644636416.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:413116e9ec2f144e058c72fd4402295ffd3f6cb63a706dbc7e2ef54668821cfb
3
+ size 4588
runs/Feb12_11-26-13_node1/events.out.tfevents.1644636416.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47cd20d02aa05775da961c47c7e7c1eae9e1a6f5f482031a893d11b5c7585429
3
+ size 3514
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.04842971423397893,
4
- "train_runtime": 850.7003,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 86.235,
7
- "train_steps_per_second": 2.704
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.5910721482901737,
4
+ "train_runtime": 193.8371,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 378.462,
7
+ "train_steps_per_second": 5.984
8
  }
trainer_state.json CHANGED
@@ -1,159 +1,92 @@
1
  {
2
- "best_metric": 90.18092691622104,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/mrpc/checkpoint-2200",
4
  "epoch": 20.0,
5
- "global_step": 2300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.74,
12
- "eval_accuracy": 86.76470588235294,
13
- "eval_average_metrics": 88.54364326375712,
14
- "eval_f1": 90.3225806451613,
15
- "eval_loss": 0.1595430225133896,
16
- "eval_runtime": 1.0042,
17
- "eval_samples_per_second": 203.156,
 
18
  "step": 200
19
  },
20
  {
21
- "epoch": 3.48,
22
- "eval_accuracy": 87.25490196078431,
23
- "eval_average_metrics": 89.1753961858716,
24
- "eval_f1": 91.0958904109589,
25
- "eval_loss": 0.19655147194862366,
26
- "eval_runtime": 0.9978,
27
- "eval_samples_per_second": 204.44,
 
28
  "step": 400
29
  },
30
  {
31
- "epoch": 4.35,
32
- "learning_rate": 0.00023478260869565215,
33
- "loss": 0.1466,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 5.22,
38
- "eval_accuracy": 88.23529411764706,
39
- "eval_average_metrics": 89.83193277310924,
40
- "eval_f1": 91.42857142857143,
41
- "eval_loss": 0.20800147950649261,
42
- "eval_runtime": 1.03,
43
- "eval_samples_per_second": 198.057,
 
44
  "step": 600
45
  },
46
  {
47
- "epoch": 6.96,
48
- "eval_accuracy": 87.25490196078431,
49
- "eval_average_metrics": 88.91730605285592,
50
- "eval_f1": 90.57971014492753,
51
- "eval_loss": 0.25488224625587463,
52
- "eval_runtime": 0.9975,
53
- "eval_samples_per_second": 204.517,
 
54
  "step": 800
55
  },
56
  {
57
- "epoch": 8.7,
58
- "learning_rate": 0.00016956521739130433,
59
- "loss": 0.0433,
60
  "step": 1000
61
  },
62
  {
63
- "epoch": 8.7,
64
- "eval_accuracy": 87.74509803921569,
65
- "eval_average_metrics": 89.48658410732715,
66
- "eval_f1": 91.2280701754386,
67
- "eval_loss": 0.27675318717956543,
68
- "eval_runtime": 1.004,
69
- "eval_samples_per_second": 203.193,
 
70
  "step": 1000
71
  },
72
- {
73
- "epoch": 10.43,
74
- "eval_accuracy": 88.23529411764706,
75
- "eval_average_metrics": 89.83193277310924,
76
- "eval_f1": 91.42857142857143,
77
- "eval_loss": 0.3537355363368988,
78
- "eval_runtime": 1.0419,
79
- "eval_samples_per_second": 195.799,
80
- "step": 1200
81
- },
82
- {
83
- "epoch": 12.17,
84
- "eval_accuracy": 86.76470588235294,
85
- "eval_average_metrics": 88.43729799612153,
86
- "eval_f1": 90.10989010989012,
87
- "eval_loss": 0.4248361885547638,
88
- "eval_runtime": 0.9784,
89
- "eval_samples_per_second": 208.512,
90
- "step": 1400
91
- },
92
- {
93
- "epoch": 13.04,
94
- "learning_rate": 0.00010434782608695651,
95
- "loss": 0.0174,
96
- "step": 1500
97
- },
98
- {
99
- "epoch": 13.91,
100
- "eval_accuracy": 87.25490196078431,
101
- "eval_average_metrics": 88.9511919875864,
102
- "eval_f1": 90.64748201438849,
103
- "eval_loss": 0.35347217321395874,
104
- "eval_runtime": 1.0071,
105
- "eval_samples_per_second": 202.569,
106
- "step": 1600
107
- },
108
- {
109
- "epoch": 15.65,
110
- "eval_accuracy": 88.23529411764706,
111
- "eval_average_metrics": 89.7380850150279,
112
- "eval_f1": 91.24087591240875,
113
- "eval_loss": 0.37131425738334656,
114
- "eval_runtime": 0.9929,
115
- "eval_samples_per_second": 205.451,
116
- "step": 1800
117
- },
118
- {
119
- "epoch": 17.39,
120
- "learning_rate": 3.913043478260869e-05,
121
- "loss": 0.0119,
122
- "step": 2000
123
- },
124
- {
125
- "epoch": 17.39,
126
- "eval_accuracy": 88.23529411764706,
127
- "eval_average_metrics": 89.769820971867,
128
- "eval_f1": 91.30434782608695,
129
- "eval_loss": 0.3558318316936493,
130
- "eval_runtime": 1.0675,
131
- "eval_samples_per_second": 191.099,
132
- "step": 2000
133
- },
134
- {
135
- "epoch": 19.13,
136
- "eval_accuracy": 88.72549019607843,
137
- "eval_average_metrics": 90.18092691622104,
138
- "eval_f1": 91.63636363636364,
139
- "eval_loss": 0.3694871664047241,
140
- "eval_runtime": 1.0401,
141
- "eval_samples_per_second": 196.137,
142
- "step": 2200
143
- },
144
  {
145
  "epoch": 20.0,
146
- "step": 2300,
147
- "total_flos": 9229438723811328.0,
148
- "train_loss": 0.04842971423397893,
149
- "train_runtime": 850.7003,
150
- "train_samples_per_second": 86.235,
151
- "train_steps_per_second": 2.704
152
  }
153
  ],
154
- "max_steps": 2300,
155
  "num_train_epochs": 20,
156
- "total_flos": 9229438723811328.0,
157
  "trial_name": null,
158
  "trial_params": null
159
  }
 
1
  {
2
+ "best_metric": 0.7083333333333334,
3
+ "best_model_checkpoint": "outputs/prefix/roberta-base/mrpc/checkpoint-600",
4
  "epoch": 20.0,
5
+ "global_step": 1160,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 3.45,
12
+ "eval_accuracy": 0.7058823529411765,
13
+ "eval_combined_score": 0.7631208171891511,
14
+ "eval_f1": 0.8203592814371258,
15
+ "eval_loss": 0.602976381778717,
16
+ "eval_runtime": 1.1443,
17
+ "eval_samples_per_second": 356.557,
18
+ "eval_steps_per_second": 6.117,
19
  "step": 200
20
  },
21
  {
22
+ "epoch": 6.9,
23
+ "eval_accuracy": 0.7034313725490197,
24
+ "eval_combined_score": 0.7620860566448802,
25
+ "eval_f1": 0.8207407407407408,
26
+ "eval_loss": 0.58584064245224,
27
+ "eval_runtime": 0.5538,
28
+ "eval_samples_per_second": 736.744,
29
+ "eval_steps_per_second": 12.64,
30
  "step": 400
31
  },
32
  {
33
+ "epoch": 8.62,
34
+ "learning_rate": 0.0001706896551724138,
35
+ "loss": 0.6074,
36
  "step": 500
37
  },
38
  {
39
+ "epoch": 10.34,
40
+ "eval_accuracy": 0.7083333333333334,
41
+ "eval_combined_score": 0.7652279521674141,
42
+ "eval_f1": 0.8221225710014948,
43
+ "eval_loss": 0.5735756754875183,
44
+ "eval_runtime": 0.6085,
45
+ "eval_samples_per_second": 670.475,
46
+ "eval_steps_per_second": 11.503,
47
  "step": 600
48
  },
49
  {
50
+ "epoch": 13.79,
51
+ "eval_accuracy": 0.7034313725490197,
52
+ "eval_combined_score": 0.7612822034643454,
53
+ "eval_f1": 0.8191330343796712,
54
+ "eval_loss": 0.5688421726226807,
55
+ "eval_runtime": 0.6104,
56
+ "eval_samples_per_second": 668.411,
57
+ "eval_steps_per_second": 11.468,
58
  "step": 800
59
  },
60
  {
61
+ "epoch": 17.24,
62
+ "learning_rate": 4.137931034482758e-05,
63
+ "loss": 0.5801,
64
  "step": 1000
65
  },
66
  {
67
+ "epoch": 17.24,
68
+ "eval_accuracy": 0.7083333333333334,
69
+ "eval_combined_score": 0.7649612693653174,
70
+ "eval_f1": 0.8215892053973014,
71
+ "eval_loss": 0.5650697350502014,
72
+ "eval_runtime": 0.7692,
73
+ "eval_samples_per_second": 530.398,
74
+ "eval_steps_per_second": 9.1,
75
  "step": 1000
76
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  {
78
  "epoch": 20.0,
79
+ "step": 1160,
80
+ "total_flos": 4831687561666560.0,
81
+ "train_loss": 0.5910721482901737,
82
+ "train_runtime": 193.8371,
83
+ "train_samples_per_second": 378.462,
84
+ "train_steps_per_second": 5.984
85
  }
86
  ],
87
+ "max_steps": 1160,
88
  "num_train_epochs": 20,
89
+ "total_flos": 4831687561666560.0,
90
  "trial_name": null,
91
  "trial_params": null
92
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6dd2861778bc36ab1ce0455b0ed185f4a6bd9fdabe4ba7aafabdd1fe22a2899c
3
  size 2991
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b636208b801856553c9b1677f10515a2d30edac604dfc118b64fa760241111b4
3
  size 2991