MHGanainy commited on
Commit
17e4d20
·
verified ·
1 Parent(s): 64e5f87

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -6
  2. train_results.json +6 -6
  3. trainer_state.json +96 -109
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 10.0,
3
  "eval_accuracy": 0.9177083333333333,
4
  "eval_f1": 0.2816188870151771,
5
  "eval_loss": 0.2582942843437195,
@@ -20,10 +20,10 @@
20
  "predict_samples": 50,
21
  "predict_samples_per_second": 17.575,
22
  "predict_steps_per_second": 4.57,
23
- "total_flos": 4.819469484490752e+16,
24
- "train_loss": 0.3849165085823305,
25
- "train_runtime": 337.1781,
26
  "train_samples": 247,
27
- "train_samples_per_second": 14.651,
28
- "train_steps_per_second": 3.678
29
  }
 
1
  {
2
+ "epoch": 9.0,
3
  "eval_accuracy": 0.9177083333333333,
4
  "eval_f1": 0.2816188870151771,
5
  "eval_loss": 0.2582942843437195,
 
20
  "predict_samples": 50,
21
  "predict_samples_per_second": 17.575,
22
  "predict_steps_per_second": 4.57,
23
+ "total_flos": 4.337522536041677e+16,
24
+ "train_loss": 0.8344365670262271,
25
+ "train_runtime": 303.7919,
26
  "train_samples": 247,
27
+ "train_samples_per_second": 16.261,
28
+ "train_steps_per_second": 4.082
29
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "total_flos": 4.819469484490752e+16,
4
- "train_loss": 0.3849165085823305,
5
- "train_runtime": 337.1781,
6
  "train_samples": 247,
7
- "train_samples_per_second": 14.651,
8
- "train_steps_per_second": 3.678
9
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "total_flos": 4.337522536041677e+16,
4
+ "train_loss": 0.8344365670262271,
5
+ "train_runtime": 303.7919,
6
  "train_samples": 247,
7
+ "train_samples_per_second": 16.261,
8
+ "train_steps_per_second": 4.082
9
  }
trainer_state.json CHANGED
@@ -1,158 +1,145 @@
1
  {
2
- "best_metric": 0.2816188870151771,
3
- "best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-434",
4
- "epoch": 10.0,
5
  "eval_steps": 500,
6
- "global_step": 620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.8779947916666667,
14
- "eval_f1": 0.09680365296803654,
15
- "eval_loss": 0.4225575029850006,
16
- "eval_micro-f1": 0.09680365296803654,
17
- "eval_precision": 0.08346456692913386,
18
- "eval_recall": 0.11521739130434783,
19
- "eval_runtime": 1.5383,
20
- "eval_samples_per_second": 19.503,
21
- "eval_steps_per_second": 5.201,
22
  "step": 62
23
  },
24
  {
25
  "epoch": 2.0,
26
- "eval_accuracy": 0.8930989583333333,
27
- "eval_f1": 0.19573796369376484,
28
- "eval_loss": 0.34750670194625854,
29
- "eval_micro-f1": 0.19573796369376484,
30
- "eval_precision": 0.1536555142503098,
31
- "eval_recall": 0.26956521739130435,
32
- "eval_runtime": 1.537,
33
- "eval_samples_per_second": 19.519,
34
- "eval_steps_per_second": 5.205,
35
  "step": 124
36
  },
37
  {
38
  "epoch": 3.0,
39
- "eval_accuracy": 0.9052083333333333,
40
- "eval_f1": 0.24148606811145507,
41
- "eval_loss": 0.2912306487560272,
42
- "eval_micro-f1": 0.24148606811145507,
43
- "eval_precision": 0.1875,
44
- "eval_recall": 0.3391304347826087,
45
- "eval_runtime": 1.5375,
46
- "eval_samples_per_second": 19.512,
47
- "eval_steps_per_second": 5.203,
48
  "step": 186
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9002604166666667,
53
- "eval_f1": 0.24856909239574815,
54
- "eval_loss": 0.29914453625679016,
55
- "eval_micro-f1": 0.24856909239574815,
56
- "eval_precision": 0.19921363040629095,
57
- "eval_recall": 0.33043478260869563,
58
- "eval_runtime": 1.5402,
59
- "eval_samples_per_second": 19.478,
60
- "eval_steps_per_second": 5.194,
61
  "step": 248
62
  },
63
  {
64
  "epoch": 5.0,
65
- "eval_accuracy": 0.906640625,
66
- "eval_f1": 0.24417009602194786,
67
- "eval_loss": 0.2833251357078552,
68
- "eval_micro-f1": 0.24417009602194786,
69
- "eval_precision": 0.17835671342685372,
70
- "eval_recall": 0.3869565217391304,
71
- "eval_runtime": 1.5382,
72
- "eval_samples_per_second": 19.504,
73
- "eval_steps_per_second": 5.201,
74
  "step": 310
75
  },
76
  {
77
  "epoch": 6.0,
78
- "eval_accuracy": 0.91484375,
79
- "eval_f1": 0.2718932443703086,
80
- "eval_loss": 0.26417481899261475,
81
- "eval_micro-f1": 0.2718932443703086,
82
- "eval_precision": 0.22056833558863329,
83
- "eval_recall": 0.35434782608695653,
84
- "eval_runtime": 1.5353,
85
- "eval_samples_per_second": 19.54,
86
- "eval_steps_per_second": 5.211,
87
  "step": 372
88
  },
89
  {
90
  "epoch": 7.0,
91
- "eval_accuracy": 0.9177083333333333,
92
- "eval_f1": 0.2816188870151771,
93
- "eval_loss": 0.2584071457386017,
94
- "eval_micro-f1": 0.2816188870151771,
95
- "eval_precision": 0.23002754820936638,
96
- "eval_recall": 0.3630434782608696,
97
- "eval_runtime": 1.5473,
98
- "eval_samples_per_second": 19.388,
99
- "eval_steps_per_second": 5.17,
100
  "step": 434
101
  },
102
  {
103
  "epoch": 8.0,
104
- "eval_accuracy": 0.9177083333333333,
105
- "eval_f1": 0.27419354838709675,
106
- "eval_loss": 0.25232046842575073,
107
- "eval_micro-f1": 0.27419354838709675,
108
- "eval_precision": 0.21794871794871795,
109
- "eval_recall": 0.3695652173913043,
110
- "eval_runtime": 1.5341,
111
- "eval_samples_per_second": 19.556,
112
- "eval_steps_per_second": 5.215,
113
  "step": 496
114
  },
115
  {
116
  "epoch": 8.064516129032258,
117
- "grad_norm": 2.654693603515625,
118
- "learning_rate": 1.7975806451612903e-05,
119
- "loss": 0.4245,
120
  "step": 500
121
  },
122
  {
123
  "epoch": 9.0,
124
- "eval_accuracy": 0.9166666666666666,
125
- "eval_f1": 0.25278810408921937,
126
- "eval_loss": 0.26304513216018677,
127
- "eval_micro-f1": 0.25278810408921937,
128
- "eval_precision": 0.192090395480226,
129
- "eval_recall": 0.3695652173913043,
130
- "eval_runtime": 2.1137,
131
- "eval_samples_per_second": 14.193,
132
- "eval_steps_per_second": 3.785,
133
  "step": 558
134
  },
135
  {
136
- "epoch": 10.0,
137
- "eval_accuracy": 0.919140625,
138
- "eval_f1": 0.25541795665634676,
139
- "eval_loss": 0.2640175521373749,
140
- "eval_micro-f1": 0.25541795665634676,
141
- "eval_precision": 0.19831730769230768,
142
- "eval_recall": 0.358695652173913,
143
- "eval_runtime": 1.5363,
144
- "eval_samples_per_second": 19.527,
145
- "eval_steps_per_second": 5.207,
146
- "step": 620
147
- },
148
- {
149
- "epoch": 10.0,
150
- "step": 620,
151
- "total_flos": 4.819469484490752e+16,
152
- "train_loss": 0.3849165085823305,
153
- "train_runtime": 337.1781,
154
- "train_samples_per_second": 14.651,
155
- "train_steps_per_second": 3.678
156
  }
157
  ],
158
  "logging_steps": 500,
@@ -181,7 +168,7 @@
181
  "attributes": {}
182
  }
183
  },
184
- "total_flos": 4.819469484490752e+16,
185
  "train_batch_size": 4,
186
  "trial_name": null,
187
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2831275720164609,
3
+ "best_model_checkpoint": "logs/indian_build_rr/roberta-base/seed_1/checkpoint-372",
4
+ "epoch": 9.0,
5
  "eval_steps": 500,
6
+ "global_step": 558,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6509204584925321,
14
+ "eval_f1": 0.0690423162583519,
15
+ "eval_loss": 1.1796680688858032,
16
+ "eval_micro-f1": 0.0690423162583519,
17
+ "eval_precision": 0.06981981981981981,
18
+ "eval_recall": 0.06828193832599119,
19
+ "eval_runtime": 1.4676,
20
+ "eval_samples_per_second": 20.442,
21
+ "eval_steps_per_second": 5.451,
22
  "step": 62
23
  },
24
  {
25
  "epoch": 2.0,
26
+ "eval_accuracy": 0.7349774227162209,
27
+ "eval_f1": 0.21954674220963172,
28
+ "eval_loss": 0.8353763222694397,
29
+ "eval_micro-f1": 0.21954674220963172,
30
+ "eval_precision": 0.1617954070981211,
31
+ "eval_recall": 0.34140969162995594,
32
+ "eval_runtime": 1.4604,
33
+ "eval_samples_per_second": 20.543,
34
+ "eval_steps_per_second": 5.478,
35
  "step": 124
36
  },
37
  {
38
  "epoch": 3.0,
39
+ "eval_accuracy": 0.7342827370614797,
40
+ "eval_f1": 0.2308802308802309,
41
+ "eval_loss": 0.8058456182479858,
42
+ "eval_micro-f1": 0.2308802308802309,
43
+ "eval_precision": 0.17167381974248927,
44
+ "eval_recall": 0.3524229074889868,
45
+ "eval_runtime": 1.461,
46
+ "eval_samples_per_second": 20.534,
47
+ "eval_steps_per_second": 5.476,
48
  "step": 186
49
  },
50
  {
51
  "epoch": 4.0,
52
+ "eval_accuracy": 0.7502605071205279,
53
+ "eval_f1": 0.2672,
54
+ "eval_loss": 0.7717716097831726,
55
+ "eval_micro-f1": 0.2672,
56
+ "eval_precision": 0.20979899497487436,
57
+ "eval_recall": 0.36784140969163,
58
+ "eval_runtime": 1.5174,
59
+ "eval_samples_per_second": 19.771,
60
+ "eval_steps_per_second": 5.272,
61
  "step": 248
62
  },
63
  {
64
  "epoch": 5.0,
65
+ "eval_accuracy": 0.7641542202153525,
66
+ "eval_f1": 0.250566037735849,
67
+ "eval_loss": 0.7306948900222778,
68
+ "eval_micro-f1": 0.250566037735849,
69
+ "eval_precision": 0.19058553386911595,
70
+ "eval_recall": 0.3656387665198238,
71
+ "eval_runtime": 1.4687,
72
+ "eval_samples_per_second": 20.426,
73
+ "eval_steps_per_second": 5.447,
74
  "step": 310
75
  },
76
  {
77
  "epoch": 6.0,
78
+ "eval_accuracy": 0.774574505036471,
79
+ "eval_f1": 0.2831275720164609,
80
+ "eval_loss": 0.7098783254623413,
81
+ "eval_micro-f1": 0.2831275720164609,
82
+ "eval_precision": 0.22601839684625494,
83
+ "eval_recall": 0.3788546255506608,
84
+ "eval_runtime": 1.4681,
85
+ "eval_samples_per_second": 20.435,
86
+ "eval_steps_per_second": 5.449,
87
  "step": 372
88
  },
89
  {
90
  "epoch": 7.0,
91
+ "eval_accuracy": 0.7811740187565127,
92
+ "eval_f1": 0.272,
93
+ "eval_loss": 0.7071970701217651,
94
+ "eval_micro-f1": 0.272,
95
+ "eval_precision": 0.22801788375558868,
96
+ "eval_recall": 0.3370044052863436,
97
+ "eval_runtime": 1.4714,
98
+ "eval_samples_per_second": 20.389,
99
+ "eval_steps_per_second": 5.437,
100
  "step": 434
101
  },
102
  {
103
  "epoch": 8.0,
104
+ "eval_accuracy": 0.7825633900659952,
105
+ "eval_f1": 0.27009113504556753,
106
+ "eval_loss": 0.6919089555740356,
107
+ "eval_micro-f1": 0.27009113504556753,
108
+ "eval_precision": 0.21646746347941567,
109
+ "eval_recall": 0.3590308370044053,
110
+ "eval_runtime": 1.471,
111
+ "eval_samples_per_second": 20.395,
112
+ "eval_steps_per_second": 5.439,
113
  "step": 496
114
  },
115
  {
116
  "epoch": 8.064516129032258,
117
+ "grad_norm": 7.422909736633301,
118
+ "learning_rate": 1.7951612903225806e-05,
119
+ "loss": 0.8758,
120
  "step": 500
121
  },
122
  {
123
  "epoch": 9.0,
124
+ "eval_accuracy": 0.7735324765543592,
125
+ "eval_f1": 0.26181818181818184,
126
+ "eval_loss": 0.7503196597099304,
127
+ "eval_micro-f1": 0.26181818181818184,
128
+ "eval_precision": 0.19543973941368079,
129
+ "eval_recall": 0.3964757709251101,
130
+ "eval_runtime": 1.4717,
131
+ "eval_samples_per_second": 20.385,
132
+ "eval_steps_per_second": 5.436,
133
  "step": 558
134
  },
135
  {
136
+ "epoch": 9.0,
137
+ "step": 558,
138
+ "total_flos": 4.337522536041677e+16,
139
+ "train_loss": 0.8344365670262271,
140
+ "train_runtime": 303.7919,
141
+ "train_samples_per_second": 16.261,
142
+ "train_steps_per_second": 4.082
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  }
144
  ],
145
  "logging_steps": 500,
 
168
  "attributes": {}
169
  }
170
  },
171
+ "total_flos": 4.337522536041677e+16,
172
  "train_batch_size": 4,
173
  "trial_name": null,
174
  "trial_params": null