Hartunka commited on
Commit
89beee1
·
verified ·
1 Parent(s): f0f0f88

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: Hartunka/bert_base_km_5_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: bert_base_km_5_v1_qnli
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # bert_base_km_5_v1_qnli
17
 
18
- This model is a fine-tuned version of [Hartunka/bert_base_km_5_v1](https://huggingface.co/Hartunka/bert_base_km_5_v1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.0977
21
- - Accuracy: 0.7205
22
 
23
  ## Model description
24
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: Hartunka/bert_base_km_5_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  model-index:
13
  - name: bert_base_km_5_v1_qnli
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE QNLI
20
+ type: glue
21
+ args: qnli
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.7230459454512173
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # bert_base_km_5_v1_qnli
32
 
33
+ This model is a fine-tuned version of [Hartunka/bert_base_km_5_v1](https://huggingface.co/Hartunka/bert_base_km_5_v1) on the GLUE QNLI dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.5546
36
+ - Accuracy: 0.7230
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.6355482335712979,
4
- "eval_loss": 0.6360806822776794,
5
- "eval_runtime": 3.4105,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 1601.841,
8
- "eval_steps_per_second": 6.451,
9
- "total_flos": 9.645664445050368e+16,
10
- "train_loss": 0.4762156921812051,
11
- "train_runtime": 1213.2441,
12
  "train_samples": 104743,
13
- "train_samples_per_second": 4316.65,
14
- "train_steps_per_second": 16.897
15
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.7230459454512173,
4
+ "eval_loss": 0.5545818209648132,
5
+ "eval_runtime": 3.4801,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 1569.798,
8
+ "eval_steps_per_second": 6.322,
9
+ "total_flos": 1.1023616508628992e+17,
10
+ "train_loss": 0.3288262250946789,
11
+ "train_runtime": 1406.5934,
12
  "train_samples": 104743,
13
+ "train_samples_per_second": 3723.286,
14
+ "train_steps_per_second": 14.574
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.0,
3
- "eval_accuracy": 0.6355482335712979,
4
- "eval_loss": 0.6360806822776794,
5
- "eval_runtime": 3.4105,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 1601.841,
8
- "eval_steps_per_second": 6.451
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.7230459454512173,
4
+ "eval_loss": 0.5545818209648132,
5
+ "eval_runtime": 3.4801,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 1569.798,
8
+ "eval_steps_per_second": 6.322
9
  }
logs/events.out.tfevents.1744965771.s_005_m.2772371.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6e8649c32aa70f691649e18cd338ed1c4ab2f93d0b772d8e5b63ea4e38fca0a
3
+ size 363
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.0,
3
- "total_flos": 9.645664445050368e+16,
4
- "train_loss": 0.4762156921812051,
5
- "train_runtime": 1213.2441,
6
  "train_samples": 104743,
7
- "train_samples_per_second": 4316.65,
8
- "train_steps_per_second": 16.897
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 1.1023616508628992e+17,
4
+ "train_loss": 0.3288262250946789,
5
+ "train_runtime": 1406.5934,
6
  "train_samples": 104743,
7
+ "train_samples_per_second": 3723.286,
8
+ "train_steps_per_second": 14.574
9
  }
trainer_state.json CHANGED
@@ -1,134 +1,150 @@
1
  {
2
- "best_global_step": 820,
3
- "best_metric": 0.6360806822776794,
4
- "best_model_checkpoint": "bert_base_km_5_v1_qnli/checkpoint-820",
5
- "epoch": 7.0,
6
  "eval_steps": 500,
7
- "global_step": 2870,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "grad_norm": 1.828690767288208,
15
  "learning_rate": 4.9e-05,
16
- "loss": 0.6649,
17
  "step": 410
18
  },
19
  {
20
  "epoch": 1.0,
21
- "eval_accuracy": 0.628226249313564,
22
- "eval_loss": 0.6430281400680542,
23
- "eval_runtime": 3.466,
24
- "eval_samples_per_second": 1576.185,
25
- "eval_steps_per_second": 6.347,
26
  "step": 410
27
  },
28
  {
29
  "epoch": 2.0,
30
- "grad_norm": 1.3895260095596313,
31
  "learning_rate": 4.8e-05,
32
- "loss": 0.6306,
33
  "step": 820
34
  },
35
  {
36
  "epoch": 2.0,
37
- "eval_accuracy": 0.6355482335712979,
38
- "eval_loss": 0.6360806822776794,
39
- "eval_runtime": 3.4177,
40
- "eval_samples_per_second": 1598.464,
41
- "eval_steps_per_second": 6.437,
42
  "step": 820
43
  },
44
  {
45
  "epoch": 3.0,
46
- "grad_norm": 2.440150737762451,
47
  "learning_rate": 4.7e-05,
48
- "loss": 0.5825,
49
  "step": 1230
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_accuracy": 0.6346329855390811,
54
- "eval_loss": 0.6825335025787354,
55
- "eval_runtime": 3.4179,
56
- "eval_samples_per_second": 1598.346,
57
- "eval_steps_per_second": 6.437,
58
  "step": 1230
59
  },
60
  {
61
  "epoch": 4.0,
62
- "grad_norm": 3.394643545150757,
63
  "learning_rate": 4.600000000000001e-05,
64
- "loss": 0.5099,
65
  "step": 1640
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.6339007871133077,
70
- "eval_loss": 0.6797336935997009,
71
- "eval_runtime": 3.4223,
72
- "eval_samples_per_second": 1596.287,
73
- "eval_steps_per_second": 6.428,
74
  "step": 1640
75
  },
76
  {
77
  "epoch": 5.0,
78
- "grad_norm": 4.846761703491211,
79
  "learning_rate": 4.5e-05,
80
- "loss": 0.413,
81
  "step": 2050
82
  },
83
  {
84
  "epoch": 5.0,
85
- "eval_accuracy": 0.6326194398682042,
86
- "eval_loss": 0.7664294242858887,
87
- "eval_runtime": 3.4103,
88
- "eval_samples_per_second": 1601.897,
89
- "eval_steps_per_second": 6.451,
90
  "step": 2050
91
  },
92
  {
93
  "epoch": 6.0,
94
- "grad_norm": 5.692334175109863,
95
  "learning_rate": 4.4000000000000006e-05,
96
- "loss": 0.3084,
97
  "step": 2460
98
  },
99
  {
100
  "epoch": 6.0,
101
- "eval_accuracy": 0.6306058941973275,
102
- "eval_loss": 0.9365506172180176,
103
- "eval_runtime": 3.4155,
104
- "eval_samples_per_second": 1599.487,
105
- "eval_steps_per_second": 6.441,
106
  "step": 2460
107
  },
108
  {
109
  "epoch": 7.0,
110
- "grad_norm": 9.478910446166992,
111
  "learning_rate": 4.3e-05,
112
- "loss": 0.2241,
113
  "step": 2870
114
  },
115
  {
116
  "epoch": 7.0,
117
- "eval_accuracy": 0.6220025626944902,
118
- "eval_loss": 1.1781185865402222,
119
- "eval_runtime": 3.4223,
120
- "eval_samples_per_second": 1596.288,
121
- "eval_steps_per_second": 6.428,
122
  "step": 2870
123
  },
124
  {
125
- "epoch": 7.0,
126
- "step": 2870,
127
- "total_flos": 9.645664445050368e+16,
128
- "train_loss": 0.4762156921812051,
129
- "train_runtime": 1213.2441,
130
- "train_samples_per_second": 4316.65,
131
- "train_steps_per_second": 16.897
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  }
133
  ],
134
  "logging_steps": 1,
@@ -157,7 +173,7 @@
157
  "attributes": {}
158
  }
159
  },
160
- "total_flos": 9.645664445050368e+16,
161
  "train_batch_size": 256,
162
  "trial_name": null,
163
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1230,
3
+ "best_metric": 0.5545818209648132,
4
+ "best_model_checkpoint": "bert_base_km_5_v1_qnli/checkpoint-1230",
5
+ "epoch": 8.0,
6
  "eval_steps": 500,
7
+ "global_step": 3280,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "grad_norm": 3.1364433765411377,
15
  "learning_rate": 4.9e-05,
16
+ "loss": 0.6587,
17
  "step": 410
18
  },
19
  {
20
  "epoch": 1.0,
21
+ "eval_accuracy": 0.653120995789859,
22
+ "eval_loss": 0.6231846809387207,
23
+ "eval_runtime": 3.4944,
24
+ "eval_samples_per_second": 1563.373,
25
+ "eval_steps_per_second": 6.296,
26
  "step": 410
27
  },
28
  {
29
  "epoch": 2.0,
30
+ "grad_norm": 3.362776279449463,
31
  "learning_rate": 4.8e-05,
32
+ "loss": 0.5937,
33
  "step": 820
34
  },
35
  {
36
  "epoch": 2.0,
37
+ "eval_accuracy": 0.7095002745744097,
38
+ "eval_loss": 0.5632237792015076,
39
+ "eval_runtime": 3.4737,
40
+ "eval_samples_per_second": 1572.683,
41
+ "eval_steps_per_second": 6.333,
42
  "step": 820
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "grad_norm": 4.1579790115356445,
47
  "learning_rate": 4.7e-05,
48
+ "loss": 0.4625,
49
  "step": 1230
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.7230459454512173,
54
+ "eval_loss": 0.5545818209648132,
55
+ "eval_runtime": 3.4852,
56
+ "eval_samples_per_second": 1567.485,
57
+ "eval_steps_per_second": 6.312,
58
  "step": 1230
59
  },
60
  {
61
  "epoch": 4.0,
62
+ "grad_norm": 5.969952583312988,
63
  "learning_rate": 4.600000000000001e-05,
64
+ "loss": 0.3374,
65
  "step": 1640
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.7349441698700347,
70
+ "eval_loss": 0.6059415936470032,
71
+ "eval_runtime": 3.4663,
72
+ "eval_samples_per_second": 1576.053,
73
+ "eval_steps_per_second": 6.347,
74
  "step": 1640
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "grad_norm": 5.252902507781982,
79
  "learning_rate": 4.5e-05,
80
+ "loss": 0.2281,
81
  "step": 2050
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "eval_accuracy": 0.7358594179022515,
86
+ "eval_loss": 0.6952667832374573,
87
+ "eval_runtime": 3.4644,
88
+ "eval_samples_per_second": 1576.908,
89
+ "eval_steps_per_second": 6.35,
90
  "step": 2050
91
  },
92
  {
93
  "epoch": 6.0,
94
+ "grad_norm": 9.05676555633545,
95
  "learning_rate": 4.4000000000000006e-05,
96
+ "loss": 0.1522,
97
  "step": 2460
98
  },
99
  {
100
  "epoch": 6.0,
101
+ "eval_accuracy": 0.7316492769540546,
102
+ "eval_loss": 0.8388772010803223,
103
+ "eval_runtime": 3.4941,
104
+ "eval_samples_per_second": 1563.513,
105
+ "eval_steps_per_second": 6.296,
106
  "step": 2460
107
  },
108
  {
109
  "epoch": 7.0,
110
+ "grad_norm": 11.310493469238281,
111
  "learning_rate": 4.3e-05,
112
+ "loss": 0.113,
113
  "step": 2870
114
  },
115
  {
116
  "epoch": 7.0,
117
+ "eval_accuracy": 0.7254255903349808,
118
+ "eval_loss": 0.987945556640625,
119
+ "eval_runtime": 3.4671,
120
+ "eval_samples_per_second": 1575.652,
121
+ "eval_steps_per_second": 6.345,
122
  "step": 2870
123
  },
124
  {
125
+ "epoch": 8.0,
126
+ "grad_norm": 6.990315914154053,
127
+ "learning_rate": 4.2e-05,
128
+ "loss": 0.0849,
129
+ "step": 3280
130
+ },
131
+ {
132
+ "epoch": 8.0,
133
+ "eval_accuracy": 0.7204832509610104,
134
+ "eval_loss": 1.0977376699447632,
135
+ "eval_runtime": 3.4638,
136
+ "eval_samples_per_second": 1577.179,
137
+ "eval_steps_per_second": 6.351,
138
+ "step": 3280
139
+ },
140
+ {
141
+ "epoch": 8.0,
142
+ "step": 3280,
143
+ "total_flos": 1.1023616508628992e+17,
144
+ "train_loss": 0.3288262250946789,
145
+ "train_runtime": 1406.5934,
146
+ "train_samples_per_second": 3723.286,
147
+ "train_steps_per_second": 14.574
148
  }
149
  ],
150
  "logging_steps": 1,
 
173
  "attributes": {}
174
  }
175
  },
176
+ "total_flos": 1.1023616508628992e+17,
177
  "train_batch_size": 256,
178
  "trial_name": null,
179
  "trial_params": null