Hartunka commited on
Commit
7380137
·
verified ·
1 Parent(s): a30aa77

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: Hartunka/bert_base_km_20_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: bert_base_km_20_v1_rte
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # bert_base_km_20_v1_rte
17
 
18
- This model is a fine-tuned version of [Hartunka/bert_base_km_20_v1](https://huggingface.co/Hartunka/bert_base_km_20_v1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.4270
21
- - Accuracy: 0.4946
22
 
23
  ## Model description
24
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: Hartunka/bert_base_km_20_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  model-index:
13
  - name: bert_base_km_20_v1_rte
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE RTE
20
+ type: glue
21
+ args: rte
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.48736462093862815
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # bert_base_km_20_v1_rte
32
 
33
+ This model is a fine-tuned version of [Hartunka/bert_base_km_20_v1](https://huggingface.co/Hartunka/bert_base_km_20_v1) on the GLUE RTE dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.7144
36
+ - Accuracy: 0.4874
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.49458483754512633,
4
- "eval_loss": 0.7144136428833008,
5
- "eval_runtime": 0.2138,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 1295.412,
8
- "eval_steps_per_second": 9.353,
9
  "total_flos": 2293012847462400.0,
10
- "train_loss": 0.47900971174240115,
11
- "train_runtime": 45.1124,
12
  "train_samples": 2490,
13
- "train_samples_per_second": 2759.772,
14
- "train_steps_per_second": 11.083
15
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.48736462093862815,
4
+ "eval_loss": 0.7143642902374268,
5
+ "eval_runtime": 0.2188,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 1265.975,
8
+ "eval_steps_per_second": 9.141,
9
  "total_flos": 2293012847462400.0,
10
+ "train_loss": 0.47949272564479284,
11
+ "train_runtime": 44.6997,
12
  "train_samples": 2490,
13
+ "train_samples_per_second": 2785.254,
14
+ "train_steps_per_second": 11.186
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.49458483754512633,
4
- "eval_loss": 0.7144136428833008,
5
- "eval_runtime": 0.2138,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 1295.412,
8
- "eval_steps_per_second": 9.353
9
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.48736462093862815,
4
+ "eval_loss": 0.7143642902374268,
5
+ "eval_runtime": 0.2188,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 1265.975,
8
+ "eval_steps_per_second": 9.141
9
  }
logs/events.out.tfevents.1745049936.s_005_m.2788007.9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15df30b4fa7702da063dfb491ce8d1b7f187b42a5cb4249e9d8e3c29fa4217ed
3
+ size 357
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 7.0,
3
  "total_flos": 2293012847462400.0,
4
- "train_loss": 0.47900971174240115,
5
- "train_runtime": 45.1124,
6
  "train_samples": 2490,
7
- "train_samples_per_second": 2759.772,
8
- "train_steps_per_second": 11.083
9
  }
 
1
  {
2
  "epoch": 7.0,
3
  "total_flos": 2293012847462400.0,
4
+ "train_loss": 0.47949272564479284,
5
+ "train_runtime": 44.6997,
6
  "train_samples": 2490,
7
+ "train_samples_per_second": 2785.254,
8
+ "train_steps_per_second": 11.186
9
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 20,
3
- "best_metric": 0.7144136428833008,
4
  "best_model_checkpoint": "bert_base_km_20_v1_rte/checkpoint-20",
5
  "epoch": 7.0,
6
  "eval_steps": 500,
@@ -11,124 +11,124 @@
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 1.612154245376587,
15
  "learning_rate": 4.9e-05,
16
  "loss": 0.7172,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.4548736462093863,
22
- "eval_loss": 0.7189333438873291,
23
- "eval_runtime": 0.1891,
24
- "eval_samples_per_second": 1465.169,
25
- "eval_steps_per_second": 10.579,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 2.0,
30
- "grad_norm": 1.070043683052063,
31
  "learning_rate": 4.8e-05,
32
- "loss": 0.6657,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.49458483754512633,
38
- "eval_loss": 0.7144136428833008,
39
- "eval_runtime": 0.1939,
40
- "eval_samples_per_second": 1428.509,
41
- "eval_steps_per_second": 10.314,
42
  "step": 20
43
  },
44
  {
45
  "epoch": 3.0,
46
- "grad_norm": 1.0873299837112427,
47
  "learning_rate": 4.7e-05,
48
- "loss": 0.6167,
49
  "step": 30
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_accuracy": 0.48014440433212996,
54
- "eval_loss": 0.7387924194335938,
55
- "eval_runtime": 0.1956,
56
- "eval_samples_per_second": 1415.945,
57
- "eval_steps_per_second": 10.223,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 4.0,
62
- "grad_norm": 1.5245287418365479,
63
  "learning_rate": 4.600000000000001e-05,
64
- "loss": 0.5298,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.4981949458483754,
70
- "eval_loss": 0.7878444194793701,
71
- "eval_runtime": 0.1917,
72
- "eval_samples_per_second": 1445.232,
73
- "eval_steps_per_second": 10.435,
74
  "step": 40
75
  },
76
  {
77
  "epoch": 5.0,
78
- "grad_norm": 2.408193588256836,
79
  "learning_rate": 4.5e-05,
80
- "loss": 0.4001,
81
  "step": 50
82
  },
83
  {
84
  "epoch": 5.0,
85
  "eval_accuracy": 0.4657039711191336,
86
- "eval_loss": 0.9474132657051086,
87
- "eval_runtime": 0.1934,
88
- "eval_samples_per_second": 1432.46,
89
- "eval_steps_per_second": 10.343,
90
  "step": 50
91
  },
92
  {
93
  "epoch": 6.0,
94
- "grad_norm": 5.390678405761719,
95
  "learning_rate": 4.4000000000000006e-05,
96
- "loss": 0.2708,
97
  "step": 60
98
  },
99
  {
100
  "epoch": 6.0,
101
  "eval_accuracy": 0.48736462093862815,
102
- "eval_loss": 1.1789220571517944,
103
- "eval_runtime": 0.1869,
104
- "eval_samples_per_second": 1482.292,
105
- "eval_steps_per_second": 10.702,
106
  "step": 60
107
  },
108
  {
109
  "epoch": 7.0,
110
- "grad_norm": 3.910654306411743,
111
  "learning_rate": 4.3e-05,
112
- "loss": 0.1527,
113
  "step": 70
114
  },
115
  {
116
  "epoch": 7.0,
117
- "eval_accuracy": 0.48375451263537905,
118
- "eval_loss": 1.4288209676742554,
119
- "eval_runtime": 0.2156,
120
- "eval_samples_per_second": 1284.848,
121
- "eval_steps_per_second": 9.277,
122
  "step": 70
123
  },
124
  {
125
  "epoch": 7.0,
126
  "step": 70,
127
  "total_flos": 2293012847462400.0,
128
- "train_loss": 0.47900971174240115,
129
- "train_runtime": 45.1124,
130
- "train_samples_per_second": 2759.772,
131
- "train_steps_per_second": 11.083
132
  }
133
  ],
134
  "logging_steps": 1,
 
1
  {
2
  "best_global_step": 20,
3
+ "best_metric": 0.7143642902374268,
4
  "best_model_checkpoint": "bert_base_km_20_v1_rte/checkpoint-20",
5
  "epoch": 7.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 1.6082864999771118,
15
  "learning_rate": 4.9e-05,
16
  "loss": 0.7172,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.45126353790613716,
22
+ "eval_loss": 0.7190461158752441,
23
+ "eval_runtime": 0.2048,
24
+ "eval_samples_per_second": 1352.543,
25
+ "eval_steps_per_second": 9.766,
26
  "step": 10
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "grad_norm": 1.069560170173645,
31
  "learning_rate": 4.8e-05,
32
+ "loss": 0.6658,
33
  "step": 20
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.48736462093862815,
38
+ "eval_loss": 0.7143642902374268,
39
+ "eval_runtime": 0.1966,
40
+ "eval_samples_per_second": 1408.66,
41
+ "eval_steps_per_second": 10.171,
42
  "step": 20
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "grad_norm": 1.088363766670227,
47
  "learning_rate": 4.7e-05,
48
+ "loss": 0.6169,
49
  "step": 30
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.48375451263537905,
54
+ "eval_loss": 0.7388594150543213,
55
+ "eval_runtime": 0.2029,
56
+ "eval_samples_per_second": 1365.048,
57
+ "eval_steps_per_second": 9.856,
58
  "step": 30
59
  },
60
  {
61
  "epoch": 4.0,
62
+ "grad_norm": 1.4970088005065918,
63
  "learning_rate": 4.600000000000001e-05,
64
+ "loss": 0.5307,
65
  "step": 40
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.49458483754512633,
70
+ "eval_loss": 0.7875006794929504,
71
+ "eval_runtime": 0.1976,
72
+ "eval_samples_per_second": 1401.985,
73
+ "eval_steps_per_second": 10.123,
74
  "step": 40
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "grad_norm": 2.31677508354187,
79
  "learning_rate": 4.5e-05,
80
+ "loss": 0.4009,
81
  "step": 50
82
  },
83
  {
84
  "epoch": 5.0,
85
  "eval_accuracy": 0.4657039711191336,
86
+ "eval_loss": 0.947138249874115,
87
+ "eval_runtime": 0.2288,
88
+ "eval_samples_per_second": 1210.554,
89
+ "eval_steps_per_second": 8.74,
90
  "step": 50
91
  },
92
  {
93
  "epoch": 6.0,
94
+ "grad_norm": 5.374250411987305,
95
  "learning_rate": 4.4000000000000006e-05,
96
+ "loss": 0.2719,
97
  "step": 60
98
  },
99
  {
100
  "epoch": 6.0,
101
  "eval_accuracy": 0.48736462093862815,
102
+ "eval_loss": 1.1685609817504883,
103
+ "eval_runtime": 0.1915,
104
+ "eval_samples_per_second": 1446.654,
105
+ "eval_steps_per_second": 10.445,
106
  "step": 60
107
  },
108
  {
109
  "epoch": 7.0,
110
+ "grad_norm": 3.844560384750366,
111
  "learning_rate": 4.3e-05,
112
+ "loss": 0.153,
113
  "step": 70
114
  },
115
  {
116
  "epoch": 7.0,
117
+ "eval_accuracy": 0.49458483754512633,
118
+ "eval_loss": 1.4269626140594482,
119
+ "eval_runtime": 0.1935,
120
+ "eval_samples_per_second": 1431.625,
121
+ "eval_steps_per_second": 10.337,
122
  "step": 70
123
  },
124
  {
125
  "epoch": 7.0,
126
  "step": 70,
127
  "total_flos": 2293012847462400.0,
128
+ "train_loss": 0.47949272564479284,
129
+ "train_runtime": 44.6997,
130
+ "train_samples_per_second": 2785.254,
131
+ "train_steps_per_second": 11.186
132
  }
133
  ],
134
  "logging_steps": 1,