Hartunka commited on
Commit
20d0a25
·
verified ·
1 Parent(s): f0f1747

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: Hartunka/tiny_bert_rand_10_v2
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: tiny_bert_rand_10_v2_qnli
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # tiny_bert_rand_10_v2_qnli
17
 
18
- This model is a fine-tuned version of [Hartunka/tiny_bert_rand_10_v2](https://huggingface.co/Hartunka/tiny_bert_rand_10_v2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.0601
21
- - Accuracy: 0.5973
22
 
23
  ## Model description
24
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: Hartunka/tiny_bert_rand_10_v2
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  model-index:
13
  - name: tiny_bert_rand_10_v2_qnli
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE QNLI
20
+ type: glue
21
+ args: qnli
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.6218195130880468
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # tiny_bert_rand_10_v2_qnli
32
 
33
+ This model is a fine-tuned version of [Hartunka/tiny_bert_rand_10_v2](https://huggingface.co/Hartunka/tiny_bert_rand_10_v2) on the GLUE QNLI dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.6458
36
+ - Accuracy: 0.6218
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -2,14 +2,14 @@
2
  "epoch": 7.0,
3
  "eval_accuracy": 0.6218195130880468,
4
  "eval_loss": 0.6458103060722351,
5
- "eval_runtime": 1.5759,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 3466.588,
8
- "eval_steps_per_second": 13.96,
9
  "total_flos": 1.922715369529651e+16,
10
- "train_loss": 0.521182724158523,
11
- "train_runtime": 397.0763,
12
  "train_samples": 104743,
13
- "train_samples_per_second": 13189.279,
14
- "train_steps_per_second": 51.627
15
  }
 
2
  "epoch": 7.0,
3
  "eval_accuracy": 0.6218195130880468,
4
  "eval_loss": 0.6458103060722351,
5
+ "eval_runtime": 1.5854,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 3445.79,
8
+ "eval_steps_per_second": 13.877,
9
  "total_flos": 1.922715369529651e+16,
10
+ "train_loss": 0.5211214391196647,
11
+ "train_runtime": 400.0726,
12
  "train_samples": 104743,
13
+ "train_samples_per_second": 13090.498,
14
+ "train_steps_per_second": 51.241
15
  }
eval_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 7.0,
3
  "eval_accuracy": 0.6218195130880468,
4
  "eval_loss": 0.6458103060722351,
5
- "eval_runtime": 1.5759,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 3466.588,
8
- "eval_steps_per_second": 13.96
9
  }
 
2
  "epoch": 7.0,
3
  "eval_accuracy": 0.6218195130880468,
4
  "eval_loss": 0.6458103060722351,
5
+ "eval_runtime": 1.5854,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 3445.79,
8
+ "eval_steps_per_second": 13.877
9
  }
logs/events.out.tfevents.1745307862.s_005_m.2850172.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d7df05aa0d17de1938ac4e3113f749dd4ad9a4e31f29a9ec9d1df041d468dfa
3
+ size 363
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 7.0,
3
  "total_flos": 1.922715369529651e+16,
4
- "train_loss": 0.521182724158523,
5
- "train_runtime": 397.0763,
6
  "train_samples": 104743,
7
- "train_samples_per_second": 13189.279,
8
- "train_steps_per_second": 51.627
9
  }
 
1
  {
2
  "epoch": 7.0,
3
  "total_flos": 1.922715369529651e+16,
4
+ "train_loss": 0.5211214391196647,
5
+ "train_runtime": 400.0726,
6
  "train_samples": 104743,
7
+ "train_samples_per_second": 13090.498,
8
+ "train_steps_per_second": 51.241
9
  }
trainer_state.json CHANGED
@@ -20,9 +20,9 @@
20
  "epoch": 1.0,
21
  "eval_accuracy": 0.6201720666300568,
22
  "eval_loss": 0.6485046148300171,
23
- "eval_runtime": 1.5703,
24
- "eval_samples_per_second": 3478.947,
25
- "eval_steps_per_second": 14.01,
26
  "step": 410
27
  },
28
  {
@@ -36,99 +36,99 @@
36
  "epoch": 2.0,
37
  "eval_accuracy": 0.6218195130880468,
38
  "eval_loss": 0.6458103060722351,
39
- "eval_runtime": 1.5605,
40
- "eval_samples_per_second": 3500.742,
41
- "eval_steps_per_second": 14.098,
42
  "step": 820
43
  },
44
  {
45
  "epoch": 3.0,
46
- "grad_norm": 2.3196091651916504,
47
  "learning_rate": 4.7e-05,
48
- "loss": 0.594,
49
  "step": 1230
50
  },
51
  {
52
  "epoch": 3.0,
53
- "eval_accuracy": 0.6238330587589237,
54
- "eval_loss": 0.6675131916999817,
55
- "eval_runtime": 1.5875,
56
- "eval_samples_per_second": 3441.203,
57
- "eval_steps_per_second": 13.858,
58
  "step": 1230
59
  },
60
  {
61
  "epoch": 4.0,
62
- "grad_norm": 2.5276989936828613,
63
  "learning_rate": 4.600000000000001e-05,
64
  "loss": 0.5378,
65
  "step": 1640
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.6183415705656233,
70
- "eval_loss": 0.6989854574203491,
71
- "eval_runtime": 1.5603,
72
- "eval_samples_per_second": 3501.193,
73
- "eval_steps_per_second": 14.1,
74
  "step": 1640
75
  },
76
  {
77
  "epoch": 5.0,
78
- "grad_norm": 3.2850587368011475,
79
  "learning_rate": 4.5e-05,
80
- "loss": 0.4719,
81
  "step": 2050
82
  },
83
  {
84
  "epoch": 5.0,
85
- "eval_accuracy": 0.613948380010983,
86
- "eval_loss": 0.8101333975791931,
87
- "eval_runtime": 1.5459,
88
- "eval_samples_per_second": 3533.951,
89
- "eval_steps_per_second": 14.232,
90
  "step": 2050
91
  },
92
  {
93
  "epoch": 6.0,
94
- "grad_norm": 4.961696624755859,
95
  "learning_rate": 4.4000000000000006e-05,
96
- "loss": 0.4033,
97
  "step": 2460
98
  },
99
  {
100
  "epoch": 6.0,
101
- "eval_accuracy": 0.6086399414241259,
102
- "eval_loss": 0.9288338422775269,
103
- "eval_runtime": 1.5731,
104
- "eval_samples_per_second": 3472.797,
105
- "eval_steps_per_second": 13.985,
106
  "step": 2460
107
  },
108
  {
109
  "epoch": 7.0,
110
- "grad_norm": 5.6496782302856445,
111
  "learning_rate": 4.3e-05,
112
- "loss": 0.3409,
113
  "step": 2870
114
  },
115
  {
116
  "epoch": 7.0,
117
- "eval_accuracy": 0.5980230642504119,
118
- "eval_loss": 1.0519371032714844,
119
- "eval_runtime": 1.5521,
120
- "eval_samples_per_second": 3519.723,
121
- "eval_steps_per_second": 14.174,
122
  "step": 2870
123
  },
124
  {
125
  "epoch": 7.0,
126
  "step": 2870,
127
  "total_flos": 1.922715369529651e+16,
128
- "train_loss": 0.521182724158523,
129
- "train_runtime": 397.0763,
130
- "train_samples_per_second": 13189.279,
131
- "train_steps_per_second": 51.627
132
  }
133
  ],
134
  "logging_steps": 1,
 
20
  "epoch": 1.0,
21
  "eval_accuracy": 0.6201720666300568,
22
  "eval_loss": 0.6485046148300171,
23
+ "eval_runtime": 1.6382,
24
+ "eval_samples_per_second": 3334.701,
25
+ "eval_steps_per_second": 13.429,
26
  "step": 410
27
  },
28
  {
 
36
  "epoch": 2.0,
37
  "eval_accuracy": 0.6218195130880468,
38
  "eval_loss": 0.6458103060722351,
39
+ "eval_runtime": 1.5547,
40
+ "eval_samples_per_second": 3513.962,
41
+ "eval_steps_per_second": 14.151,
42
  "step": 820
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "grad_norm": 2.337428569793701,
47
  "learning_rate": 4.7e-05,
48
+ "loss": 0.5941,
49
  "step": 1230
50
  },
51
  {
52
  "epoch": 3.0,
53
+ "eval_accuracy": 0.6231008603331503,
54
+ "eval_loss": 0.6681211590766907,
55
+ "eval_runtime": 1.5513,
56
+ "eval_samples_per_second": 3521.628,
57
+ "eval_steps_per_second": 14.182,
58
  "step": 1230
59
  },
60
  {
61
  "epoch": 4.0,
62
+ "grad_norm": 2.5682601928710938,
63
  "learning_rate": 4.600000000000001e-05,
64
  "loss": 0.5378,
65
  "step": 1640
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.6198059674171701,
70
+ "eval_loss": 0.7004396915435791,
71
+ "eval_runtime": 1.5729,
72
+ "eval_samples_per_second": 3473.155,
73
+ "eval_steps_per_second": 13.987,
74
  "step": 1640
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "grad_norm": 3.3654308319091797,
79
  "learning_rate": 4.5e-05,
80
+ "loss": 0.4716,
81
  "step": 2050
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "eval_accuracy": 0.6159619256818598,
86
+ "eval_loss": 0.8119640946388245,
87
+ "eval_runtime": 1.567,
88
+ "eval_samples_per_second": 3486.19,
89
+ "eval_steps_per_second": 14.039,
90
  "step": 2050
91
  },
92
  {
93
  "epoch": 6.0,
94
+ "grad_norm": 5.089620590209961,
95
  "learning_rate": 4.4000000000000006e-05,
96
+ "loss": 0.4032,
97
  "step": 2460
98
  },
99
  {
100
  "epoch": 6.0,
101
+ "eval_accuracy": 0.6075416437854658,
102
+ "eval_loss": 0.9388319253921509,
103
+ "eval_runtime": 1.592,
104
+ "eval_samples_per_second": 3431.577,
105
+ "eval_steps_per_second": 13.819,
106
  "step": 2460
107
  },
108
  {
109
  "epoch": 7.0,
110
+ "grad_norm": 5.411346435546875,
111
  "learning_rate": 4.3e-05,
112
+ "loss": 0.3408,
113
  "step": 2870
114
  },
115
  {
116
  "epoch": 7.0,
117
+ "eval_accuracy": 0.5972908658246385,
118
+ "eval_loss": 1.0600956678390503,
119
+ "eval_runtime": 1.5662,
120
+ "eval_samples_per_second": 3487.966,
121
+ "eval_steps_per_second": 14.046,
122
  "step": 2870
123
  },
124
  {
125
  "epoch": 7.0,
126
  "step": 2870,
127
  "total_flos": 1.922715369529651e+16,
128
+ "train_loss": 0.5211214391196647,
129
+ "train_runtime": 400.0726,
130
+ "train_samples_per_second": 13090.498,
131
+ "train_steps_per_second": 51.241
132
  }
133
  ],
134
  "logging_steps": 1,