Hartunka commited on
Commit
fe2faee
·
verified ·
1 Parent(s): fd1bf85

End of training

Browse files
README.md CHANGED
@@ -1,14 +1,32 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: Hartunka/bert_base_km_5_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - matthews_correlation
8
  - accuracy
9
  model-index:
10
  - name: bert_base_km_5_v1_cola
11
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,11 +34,11 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # bert_base_km_5_v1_cola
18
 
19
- This model is a fine-tuned version of [Hartunka/bert_base_km_5_v1](https://huggingface.co/Hartunka/bert_base_km_5_v1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.8536
22
- - Matthews Correlation: 0.1002
23
- - Accuracy: 0.6462
24
 
25
  ## Model description
26
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: Hartunka/bert_base_km_5_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - matthews_correlation
12
  - accuracy
13
  model-index:
14
  - name: bert_base_km_5_v1_cola
15
+ results:
16
+ - task:
17
+ name: Text Classification
18
+ type: text-classification
19
+ dataset:
20
+ name: GLUE COLA
21
+ type: glue
22
+ args: cola
23
+ metrics:
24
+ - name: Matthews Correlation
25
+ type: matthews_correlation
26
+ value: 0.04959298805408078
27
+ - name: Accuracy
28
+ type: accuracy
29
+ value: 0.6874400973320007
30
  ---
31
 
32
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  # bert_base_km_5_v1_cola
36
 
37
+ This model is a fine-tuned version of [Hartunka/bert_base_km_5_v1](https://huggingface.co/Hartunka/bert_base_km_5_v1) on the GLUE COLA dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.6169
40
+ - Matthews Correlation: 0.0496
41
+ - Accuracy: 0.6874
42
 
43
  ## Model description
44
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.693192720413208,
4
- "eval_loss": 0.6094943881034851,
5
- "eval_matthews_correlation": 0.060190597377967524,
6
- "eval_runtime": 0.6896,
7
  "eval_samples": 1043,
8
- "eval_samples_per_second": 1512.498,
9
- "eval_steps_per_second": 7.251,
10
  "total_flos": 8999450537533440.0,
11
- "train_loss": 0.4899906516075134,
12
- "train_runtime": 134.3561,
13
  "train_samples": 8551,
14
- "train_samples_per_second": 3182.216,
15
- "train_steps_per_second": 12.653
16
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.6874400973320007,
4
+ "eval_loss": 0.6168506741523743,
5
+ "eval_matthews_correlation": 0.04959298805408078,
6
+ "eval_runtime": 0.7317,
7
  "eval_samples": 1043,
8
+ "eval_samples_per_second": 1425.514,
9
+ "eval_steps_per_second": 6.834,
10
  "total_flos": 8999450537533440.0,
11
+ "train_loss": 0.47968793616575356,
12
+ "train_runtime": 135.9284,
13
  "train_samples": 8551,
14
+ "train_samples_per_second": 3145.406,
15
+ "train_steps_per_second": 12.507
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.693192720413208,
4
- "eval_loss": 0.6094943881034851,
5
- "eval_matthews_correlation": 0.060190597377967524,
6
- "eval_runtime": 0.6896,
7
  "eval_samples": 1043,
8
- "eval_samples_per_second": 1512.498,
9
- "eval_steps_per_second": 7.251
10
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.6874400973320007,
4
+ "eval_loss": 0.6168506741523743,
5
+ "eval_matthews_correlation": 0.04959298805408078,
6
+ "eval_runtime": 0.7317,
7
  "eval_samples": 1043,
8
+ "eval_samples_per_second": 1425.514,
9
+ "eval_steps_per_second": 6.834
10
  }
logs/events.out.tfevents.1744964173.s_005_m.2772371.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25cb565be9f59fb19e1c8ea6029211079cf1ecfad3a360cfae13a0b8d5a630a5
3
+ size 427
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 8999450537533440.0,
4
- "train_loss": 0.4899906516075134,
5
- "train_runtime": 134.3561,
6
  "train_samples": 8551,
7
- "train_samples_per_second": 3182.216,
8
- "train_steps_per_second": 12.653
9
  }
 
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 8999450537533440.0,
4
+ "train_loss": 0.47968793616575356,
5
+ "train_runtime": 135.9284,
6
  "train_samples": 8551,
7
+ "train_samples_per_second": 3145.406,
8
+ "train_steps_per_second": 12.507
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 102,
3
- "best_metric": 0.6094943881034851,
4
  "best_model_checkpoint": "bert_base_km_5_v1_cola/checkpoint-102",
5
  "epoch": 8.0,
6
  "eval_steps": 500,
@@ -11,148 +11,148 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 0.9332984089851379,
15
  "learning_rate": 4.9e-05,
16
- "loss": 0.6192,
17
  "step": 34
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.6912751793861389,
22
- "eval_loss": 0.6177230477333069,
23
- "eval_matthews_correlation": 0.0,
24
- "eval_runtime": 0.676,
25
- "eval_samples_per_second": 1542.856,
26
- "eval_steps_per_second": 7.396,
27
  "step": 34
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 1.2563631534576416,
32
  "learning_rate": 4.8e-05,
33
- "loss": 0.6088,
34
  "step": 68
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_accuracy": 0.6922339200973511,
39
- "eval_loss": 0.6191443800926208,
40
- "eval_matthews_correlation": 0.0463559874942472,
41
- "eval_runtime": 0.6989,
42
- "eval_samples_per_second": 1492.409,
43
- "eval_steps_per_second": 7.154,
44
  "step": 68
45
  },
46
  {
47
  "epoch": 3.0,
48
- "grad_norm": 0.9797320365905762,
49
  "learning_rate": 4.7e-05,
50
- "loss": 0.5837,
51
  "step": 102
52
  },
53
  {
54
  "epoch": 3.0,
55
- "eval_accuracy": 0.693192720413208,
56
- "eval_loss": 0.6094943881034851,
57
- "eval_matthews_correlation": 0.060190597377967524,
58
- "eval_runtime": 0.6754,
59
- "eval_samples_per_second": 1544.379,
60
- "eval_steps_per_second": 7.404,
61
  "step": 102
62
  },
63
  {
64
  "epoch": 4.0,
65
- "grad_norm": 2.736848831176758,
66
  "learning_rate": 4.600000000000001e-05,
67
- "loss": 0.5398,
68
  "step": 136
69
  },
70
  {
71
  "epoch": 4.0,
72
- "eval_accuracy": 0.6826462149620056,
73
- "eval_loss": 0.6479612588882446,
74
- "eval_matthews_correlation": 0.08323045697852056,
75
- "eval_runtime": 0.6667,
76
- "eval_samples_per_second": 1564.401,
77
- "eval_steps_per_second": 7.5,
78
  "step": 136
79
  },
80
  {
81
  "epoch": 5.0,
82
- "grad_norm": 1.7911914587020874,
83
  "learning_rate": 4.5e-05,
84
- "loss": 0.4723,
85
  "step": 170
86
  },
87
  {
88
  "epoch": 5.0,
89
- "eval_accuracy": 0.6433365345001221,
90
- "eval_loss": 0.6780942678451538,
91
- "eval_matthews_correlation": 0.13224881537677105,
92
- "eval_runtime": 0.6837,
93
- "eval_samples_per_second": 1525.525,
94
- "eval_steps_per_second": 7.313,
95
  "step": 170
96
  },
97
  {
98
  "epoch": 6.0,
99
- "grad_norm": 2.9178237915039062,
100
  "learning_rate": 4.4000000000000006e-05,
101
- "loss": 0.4133,
102
  "step": 204
103
  },
104
  {
105
  "epoch": 6.0,
106
- "eval_accuracy": 0.6155321002006531,
107
- "eval_loss": 0.781400740146637,
108
- "eval_matthews_correlation": 0.10001682090743959,
109
- "eval_runtime": 0.6758,
110
- "eval_samples_per_second": 1543.271,
111
- "eval_steps_per_second": 7.398,
112
  "step": 204
113
  },
114
  {
115
  "epoch": 7.0,
116
- "grad_norm": 2.418649196624756,
117
  "learning_rate": 4.3e-05,
118
- "loss": 0.3619,
119
  "step": 238
120
  },
121
  {
122
  "epoch": 7.0,
123
- "eval_accuracy": 0.6289549469947815,
124
- "eval_loss": 0.8527724146842957,
125
- "eval_matthews_correlation": 0.11484818571489955,
126
- "eval_runtime": 0.7193,
127
- "eval_samples_per_second": 1450.052,
128
- "eval_steps_per_second": 6.951,
129
  "step": 238
130
  },
131
  {
132
  "epoch": 8.0,
133
- "grad_norm": 4.517303943634033,
134
  "learning_rate": 4.2e-05,
135
- "loss": 0.321,
136
  "step": 272
137
  },
138
  {
139
  "epoch": 8.0,
140
  "eval_accuracy": 0.6462128758430481,
141
- "eval_loss": 0.9528720378875732,
142
- "eval_matthews_correlation": 0.12205331754627759,
143
- "eval_runtime": 0.6661,
144
- "eval_samples_per_second": 1565.727,
145
- "eval_steps_per_second": 7.506,
146
  "step": 272
147
  },
148
  {
149
  "epoch": 8.0,
150
  "step": 272,
151
  "total_flos": 8999450537533440.0,
152
- "train_loss": 0.4899906516075134,
153
- "train_runtime": 134.3561,
154
- "train_samples_per_second": 3182.216,
155
- "train_steps_per_second": 12.653
156
  }
157
  ],
158
  "logging_steps": 1,
 
1
  {
2
  "best_global_step": 102,
3
+ "best_metric": 0.6168506741523743,
4
  "best_model_checkpoint": "bert_base_km_5_v1_cola/checkpoint-102",
5
  "epoch": 8.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 2.0184812545776367,
15
  "learning_rate": 4.9e-05,
16
+ "loss": 0.6154,
17
  "step": 34
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.6893576383590698,
22
+ "eval_loss": 0.6183784604072571,
23
+ "eval_matthews_correlation": -0.007887379670285008,
24
+ "eval_runtime": 0.6744,
25
+ "eval_samples_per_second": 1546.637,
26
+ "eval_steps_per_second": 7.414,
27
  "step": 34
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 2.073906183242798,
32
  "learning_rate": 4.8e-05,
33
+ "loss": 0.5959,
34
  "step": 68
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.6740172505378723,
39
+ "eval_loss": 0.6171430349349976,
40
+ "eval_matthews_correlation": 0.005476740094736749,
41
+ "eval_runtime": 0.7236,
42
+ "eval_samples_per_second": 1441.409,
43
+ "eval_steps_per_second": 6.91,
44
  "step": 68
45
  },
46
  {
47
  "epoch": 3.0,
48
+ "grad_norm": 1.9242302179336548,
49
  "learning_rate": 4.7e-05,
50
+ "loss": 0.5583,
51
  "step": 102
52
  },
53
  {
54
  "epoch": 3.0,
55
+ "eval_accuracy": 0.6874400973320007,
56
+ "eval_loss": 0.6168506741523743,
57
+ "eval_matthews_correlation": 0.04959298805408078,
58
+ "eval_runtime": 0.6807,
59
+ "eval_samples_per_second": 1532.323,
60
+ "eval_steps_per_second": 7.346,
61
  "step": 102
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "grad_norm": 3.0941638946533203,
66
  "learning_rate": 4.600000000000001e-05,
67
+ "loss": 0.5145,
68
  "step": 136
69
  },
70
  {
71
  "epoch": 4.0,
72
+ "eval_accuracy": 0.6634707450866699,
73
+ "eval_loss": 0.6423744559288025,
74
+ "eval_matthews_correlation": 0.09532212745017962,
75
+ "eval_runtime": 0.6687,
76
+ "eval_samples_per_second": 1559.856,
77
+ "eval_steps_per_second": 7.478,
78
  "step": 136
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "grad_norm": 2.465773105621338,
83
  "learning_rate": 4.5e-05,
84
+ "loss": 0.4591,
85
  "step": 170
86
  },
87
  {
88
  "epoch": 5.0,
89
+ "eval_accuracy": 0.6490891575813293,
90
+ "eval_loss": 0.6875084638595581,
91
+ "eval_matthews_correlation": 0.09892672481640027,
92
+ "eval_runtime": 0.6908,
93
+ "eval_samples_per_second": 1509.867,
94
+ "eval_steps_per_second": 7.238,
95
  "step": 170
96
  },
97
  {
98
  "epoch": 6.0,
99
+ "grad_norm": 4.0577287673950195,
100
  "learning_rate": 4.4000000000000006e-05,
101
+ "loss": 0.412,
102
  "step": 204
103
  },
104
  {
105
  "epoch": 6.0,
106
+ "eval_accuracy": 0.6308724880218506,
107
+ "eval_loss": 0.7450611591339111,
108
+ "eval_matthews_correlation": 0.06511003868382283,
109
+ "eval_runtime": 0.6768,
110
+ "eval_samples_per_second": 1541.14,
111
+ "eval_steps_per_second": 7.388,
112
  "step": 204
113
  },
114
  {
115
  "epoch": 7.0,
116
+ "grad_norm": 2.787646532058716,
117
  "learning_rate": 4.3e-05,
118
+ "loss": 0.3633,
119
  "step": 238
120
  },
121
  {
122
  "epoch": 7.0,
123
+ "eval_accuracy": 0.6203259825706482,
124
+ "eval_loss": 0.7966165542602539,
125
+ "eval_matthews_correlation": 0.10894429086010696,
126
+ "eval_runtime": 0.6801,
127
+ "eval_samples_per_second": 1533.561,
128
+ "eval_steps_per_second": 7.352,
129
  "step": 238
130
  },
131
  {
132
  "epoch": 8.0,
133
+ "grad_norm": 3.5457634925842285,
134
  "learning_rate": 4.2e-05,
135
+ "loss": 0.3189,
136
  "step": 272
137
  },
138
  {
139
  "epoch": 8.0,
140
  "eval_accuracy": 0.6462128758430481,
141
+ "eval_loss": 0.8535791635513306,
142
+ "eval_matthews_correlation": 0.10018630520281055,
143
+ "eval_runtime": 0.7183,
144
+ "eval_samples_per_second": 1452.028,
145
+ "eval_steps_per_second": 6.961,
146
  "step": 272
147
  },
148
  {
149
  "epoch": 8.0,
150
  "step": 272,
151
  "total_flos": 8999450537533440.0,
152
+ "train_loss": 0.47968793616575356,
153
+ "train_runtime": 135.9284,
154
+ "train_samples_per_second": 3145.406,
155
+ "train_steps_per_second": 12.507
156
  }
157
  ],
158
  "logging_steps": 1,