edloginovad commited on
Commit
3635006
·
verified ·
1 Parent(s): 12d6cbe

Model save

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [DedalusHealthCare/tinybert-mlm-en](https://huggingface.co/DedalusHealthCare/tinybert-mlm-en) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.6019
25
  - Accuracy: 0.9816
26
  - F1: 0.0
27
  - Precision: 0.0
@@ -58,17 +58,17 @@ The following hyperparameters were used during training:
58
 
59
  ### Training results
60
 
61
- | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
- |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
63
- | 0.7327 | 0.2857 | 1 | 0.7513 | 0.0341 | 0.0366 | 0.0187 | 1.0 |
64
- | 0.7327 | 0.5714 | 2 | 0.7464 | 0.0525 | 0.0373 | 0.0190 | 1.0 |
65
- | 0.7327 | 0.8571 | 3 | 0.7368 | 0.0892 | 0.0388 | 0.0198 | 1.0 |
66
- | 0.7327 | 1.1429 | 4 | 0.7225 | 0.1916 | 0.0375 | 0.0192 | 0.8571 |
67
- | 0.7327 | 1.4286 | 5 | 0.7037 | 0.3885 | 0.0251 | 0.0129 | 0.4286 |
68
- | 0.7327 | 1.7143 | 6 | 0.6806 | 0.6325 | 0.0 | 0.0 | 0.0 |
69
- | 0.7327 | 2.0 | 7 | 0.6538 | 0.8688 | 0.0 | 0.0 | 0.0 |
70
- | 0.7327 | 2.2857 | 8 | 0.6274 | 0.9738 | 0.0 | 0.0 | 0.0 |
71
- | 0.7327 | 2.5714 | 9 | 0.6019 | 0.9816 | 0.0 | 0.0 | 0.0 |
72
 
73
 
74
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [DedalusHealthCare/tinybert-mlm-en](https://huggingface.co/DedalusHealthCare/tinybert-mlm-en) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.5007
25
  - Accuracy: 0.9816
26
  - F1: 0.0
27
  - Precision: 0.0
 
58
 
59
  ### Training results
60
 
61
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:---:|:---------:|:------:|
63
+ | 0.6651 | 0.2857 | 1 | 0.6412 | 0.9003 | 0.0 | 0.0 | 0.0 |
64
+ | 0.6651 | 0.5714 | 2 | 0.6363 | 0.9239 | 0.0 | 0.0 | 0.0 |
65
+ | 0.6651 | 0.8571 | 3 | 0.6264 | 0.9475 | 0.0 | 0.0 | 0.0 |
66
+ | 0.6651 | 1.1429 | 4 | 0.6125 | 0.9606 | 0.0 | 0.0 | 0.0 |
67
+ | 0.6651 | 1.4286 | 5 | 0.5944 | 0.9685 | 0.0 | 0.0 | 0.0 |
68
+ | 0.6651 | 1.7143 | 6 | 0.5722 | 0.9738 | 0.0 | 0.0 | 0.0 |
69
+ | 0.6651 | 2.0 | 7 | 0.5467 | 0.9790 | 0.0 | 0.0 | 0.0 |
70
+ | 0.6651 | 2.2857 | 8 | 0.5230 | 0.9816 | 0.0 | 0.0 | 0.0 |
71
+ | 0.6651 | 2.5714 | 9 | 0.5007 | 0.9816 | 0.0 | 0.0 | 0.0 |
72
 
73
 
74
  ### Framework versions
logs/events.out.tfevents.1760011961.ip-172-31-12-22.10247.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6725d7513e0d6bed9233b2693f6fdf0f3eb35ffea312bd982e7d86e7b9efe1a8
3
- size 9102
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a11e83f00225c05834b318ee327c15a0a7afbf22b06b17388b80551fff119d6c
3
+ size 9913
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9973c72a478da0220790b93125c8a455209f350fceb5c666924c1784696ae544
3
  size 46634832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c270f4d5425845329b57b5a00429e669c0149bcf21a9e0c8303b71b8aa504852
3
  size 46634832
training_artifacts/training_history.csv CHANGED
@@ -1,12 +1,12 @@
1
  loss;grad_norm;learning_rate;epoch;step;eval_loss;eval_accuracy;eval_f1;eval_precision;eval_recall;eval_runtime;eval_samples_per_second;eval_steps_per_second;train_runtime;train_samples_per_second;train_steps_per_second;total_flos;train_loss
2
- 0.7327;2.7726080417633057;3.3333333333333333e-06;0.2857142857142857;1;;;;;;;;;;;;;
3
- ;;;0.2857142857142857;1;0.7512667775154114;0.03412073490813648;0.03664921465968587;0.018666666666666668;1.0;5.3956;70.613;4.448;;;;;
4
- ;;;0.5714285714285714;2;0.7464109659194946;0.05249343832020997;0.037333333333333336;0.019021739130434784;1.0;5.5933;68.117;4.291;;;;;
5
- ;;;0.8571428571428571;3;0.7368069291114807;0.08923884514435695;0.038781163434903045;0.01977401129943503;1.0;5.5621;68.5;4.315;;;;;
6
- ;;;1.1428571428571428;4;0.7224730849266052;0.19160104986876642;0.03749999999999999;0.019169329073482427;0.8571428571428571;5.6279;67.698;4.264;;;;;
7
- ;;;1.4285714285714286;5;0.7036912441253662;0.3884514435695538;0.02510460251046025;0.01293103448275862;0.42857142857142855;5.703;66.807;4.208;;;;;
8
- ;;;1.7142857142857144;6;0.6806334853172302;0.6325459317585301;0.0;0.0;0.0;5.773;65.997;4.157;;;;;
9
- ;;;2.0;7;0.6538448929786682;0.868766404199475;0.0;0.0;0.0;5.6817;67.057;4.224;;;;;
10
- ;;;2.2857142857142856;8;0.6274169683456421;0.973753280839895;0.0;0.0;0.0;5.8624;64.991;4.094;;;;;
11
- ;;;2.571428571428571;9;0.6018883585929871;0.9816272965879265;0.0;0.0;0.0;5.644;67.505;4.252;;;;;
12
- ;;;2.571428571428571;9;;;;;;;;;101.6832;41.698;0.59;2156353020864.0;0.6984957986407809
 
1
  loss;grad_norm;learning_rate;epoch;step;eval_loss;eval_accuracy;eval_f1;eval_precision;eval_recall;eval_runtime;eval_samples_per_second;eval_steps_per_second;train_runtime;train_samples_per_second;train_steps_per_second;total_flos;train_loss
2
+ 0.6651;2.0492517948150635;3.3333333333333333e-06;0.2857142857142857;1;;;;;;;;;;;;;
3
+ ;;;0.2857142857142857;1;0.641178548336029;0.9002624671916011;0.0;0.0;0.0;5.428;70.191;4.422;;;;;
4
+ ;;;0.5714285714285714;2;0.6362672448158264;0.9238845144356955;0.0;0.0;0.0;5.345;71.282;4.49;;;;;
5
+ ;;;0.8571428571428571;3;0.6264415383338928;0.94750656167979;0.0;0.0;0.0;5.2138;73.076;4.603;;;;;
6
+ ;;;1.1428571428571428;4;0.6125248074531555;0.9606299212598425;0.0;0.0;0.0;5.2287;72.867;4.59;;;;;
7
+ ;;;1.4285714285714286;5;0.5944115519523621;0.968503937007874;0.0;0.0;0.0;5.3131;71.709;4.517;;;;;
8
+ ;;;1.7142857142857144;6;0.5722081661224365;0.973753280839895;0.0;0.0;0.0;5.3197;71.62;4.512;;;;;
9
+ ;;;2.0;7;0.5467000603675842;0.979002624671916;0.0;0.0;0.0;5.2338;72.796;4.586;;;;;
10
+ ;;;2.2857142857142856;8;0.5230435729026794;0.9816272965879265;0.0;0.0;0.0;5.3484;71.237;4.487;;;;;
11
+ ;;;2.571428571428571;9;0.5007050633430481;0.9816272965879265;0.0;0.0;0.0;5.4954;69.331;4.367;;;;;
12
+ ;;;2.571428571428571;9;;;;;;;;;92.7268;45.726;0.647;2140139840256.0;0.6428146097395155
training_artifacts/training_history.json CHANGED
@@ -1,125 +1,125 @@
1
  [
2
  {
3
- "loss": 0.7327,
4
- "grad_norm": 2.7726080417633057,
5
  "learning_rate": 3.3333333333333333e-06,
6
  "epoch": 0.2857142857142857,
7
  "step": 1
8
  },
9
  {
10
- "eval_loss": 0.7512667775154114,
11
- "eval_accuracy": 0.03412073490813648,
12
- "eval_f1": 0.03664921465968587,
13
- "eval_precision": 0.018666666666666668,
14
- "eval_recall": 1.0,
15
- "eval_runtime": 5.3956,
16
- "eval_samples_per_second": 70.613,
17
- "eval_steps_per_second": 4.448,
18
  "epoch": 0.2857142857142857,
19
  "step": 1
20
  },
21
  {
22
- "eval_loss": 0.7464109659194946,
23
- "eval_accuracy": 0.05249343832020997,
24
- "eval_f1": 0.037333333333333336,
25
- "eval_precision": 0.019021739130434784,
26
- "eval_recall": 1.0,
27
- "eval_runtime": 5.5933,
28
- "eval_samples_per_second": 68.117,
29
- "eval_steps_per_second": 4.291,
30
  "epoch": 0.5714285714285714,
31
  "step": 2
32
  },
33
  {
34
- "eval_loss": 0.7368069291114807,
35
- "eval_accuracy": 0.08923884514435695,
36
- "eval_f1": 0.038781163434903045,
37
- "eval_precision": 0.01977401129943503,
38
- "eval_recall": 1.0,
39
- "eval_runtime": 5.5621,
40
- "eval_samples_per_second": 68.5,
41
- "eval_steps_per_second": 4.315,
42
  "epoch": 0.8571428571428571,
43
  "step": 3
44
  },
45
  {
46
- "eval_loss": 0.7224730849266052,
47
- "eval_accuracy": 0.19160104986876642,
48
- "eval_f1": 0.03749999999999999,
49
- "eval_precision": 0.019169329073482427,
50
- "eval_recall": 0.8571428571428571,
51
- "eval_runtime": 5.6279,
52
- "eval_samples_per_second": 67.698,
53
- "eval_steps_per_second": 4.264,
54
  "epoch": 1.1428571428571428,
55
  "step": 4
56
  },
57
  {
58
- "eval_loss": 0.7036912441253662,
59
- "eval_accuracy": 0.3884514435695538,
60
- "eval_f1": 0.02510460251046025,
61
- "eval_precision": 0.01293103448275862,
62
- "eval_recall": 0.42857142857142855,
63
- "eval_runtime": 5.703,
64
- "eval_samples_per_second": 66.807,
65
- "eval_steps_per_second": 4.208,
66
  "epoch": 1.4285714285714286,
67
  "step": 5
68
  },
69
  {
70
- "eval_loss": 0.6806334853172302,
71
- "eval_accuracy": 0.6325459317585301,
72
  "eval_f1": 0.0,
73
  "eval_precision": 0.0,
74
  "eval_recall": 0.0,
75
- "eval_runtime": 5.773,
76
- "eval_samples_per_second": 65.997,
77
- "eval_steps_per_second": 4.157,
78
  "epoch": 1.7142857142857144,
79
  "step": 6
80
  },
81
  {
82
- "eval_loss": 0.6538448929786682,
83
- "eval_accuracy": 0.868766404199475,
84
  "eval_f1": 0.0,
85
  "eval_precision": 0.0,
86
  "eval_recall": 0.0,
87
- "eval_runtime": 5.6817,
88
- "eval_samples_per_second": 67.057,
89
- "eval_steps_per_second": 4.224,
90
  "epoch": 2.0,
91
  "step": 7
92
  },
93
  {
94
- "eval_loss": 0.6274169683456421,
95
- "eval_accuracy": 0.973753280839895,
96
  "eval_f1": 0.0,
97
  "eval_precision": 0.0,
98
  "eval_recall": 0.0,
99
- "eval_runtime": 5.8624,
100
- "eval_samples_per_second": 64.991,
101
- "eval_steps_per_second": 4.094,
102
  "epoch": 2.2857142857142856,
103
  "step": 8
104
  },
105
  {
106
- "eval_loss": 0.6018883585929871,
107
  "eval_accuracy": 0.9816272965879265,
108
  "eval_f1": 0.0,
109
  "eval_precision": 0.0,
110
  "eval_recall": 0.0,
111
- "eval_runtime": 5.644,
112
- "eval_samples_per_second": 67.505,
113
- "eval_steps_per_second": 4.252,
114
  "epoch": 2.571428571428571,
115
  "step": 9
116
  },
117
  {
118
- "train_runtime": 101.6832,
119
- "train_samples_per_second": 41.698,
120
- "train_steps_per_second": 0.59,
121
- "total_flos": 2156353020864.0,
122
- "train_loss": 0.6984957986407809,
123
  "epoch": 2.571428571428571,
124
  "step": 9
125
  }
 
1
  [
2
  {
3
+ "loss": 0.6651,
4
+ "grad_norm": 2.0492517948150635,
5
  "learning_rate": 3.3333333333333333e-06,
6
  "epoch": 0.2857142857142857,
7
  "step": 1
8
  },
9
  {
10
+ "eval_loss": 0.641178548336029,
11
+ "eval_accuracy": 0.9002624671916011,
12
+ "eval_f1": 0.0,
13
+ "eval_precision": 0.0,
14
+ "eval_recall": 0.0,
15
+ "eval_runtime": 5.428,
16
+ "eval_samples_per_second": 70.191,
17
+ "eval_steps_per_second": 4.422,
18
  "epoch": 0.2857142857142857,
19
  "step": 1
20
  },
21
  {
22
+ "eval_loss": 0.6362672448158264,
23
+ "eval_accuracy": 0.9238845144356955,
24
+ "eval_f1": 0.0,
25
+ "eval_precision": 0.0,
26
+ "eval_recall": 0.0,
27
+ "eval_runtime": 5.345,
28
+ "eval_samples_per_second": 71.282,
29
+ "eval_steps_per_second": 4.49,
30
  "epoch": 0.5714285714285714,
31
  "step": 2
32
  },
33
  {
34
+ "eval_loss": 0.6264415383338928,
35
+ "eval_accuracy": 0.94750656167979,
36
+ "eval_f1": 0.0,
37
+ "eval_precision": 0.0,
38
+ "eval_recall": 0.0,
39
+ "eval_runtime": 5.2138,
40
+ "eval_samples_per_second": 73.076,
41
+ "eval_steps_per_second": 4.603,
42
  "epoch": 0.8571428571428571,
43
  "step": 3
44
  },
45
  {
46
+ "eval_loss": 0.6125248074531555,
47
+ "eval_accuracy": 0.9606299212598425,
48
+ "eval_f1": 0.0,
49
+ "eval_precision": 0.0,
50
+ "eval_recall": 0.0,
51
+ "eval_runtime": 5.2287,
52
+ "eval_samples_per_second": 72.867,
53
+ "eval_steps_per_second": 4.59,
54
  "epoch": 1.1428571428571428,
55
  "step": 4
56
  },
57
  {
58
+ "eval_loss": 0.5944115519523621,
59
+ "eval_accuracy": 0.968503937007874,
60
+ "eval_f1": 0.0,
61
+ "eval_precision": 0.0,
62
+ "eval_recall": 0.0,
63
+ "eval_runtime": 5.3131,
64
+ "eval_samples_per_second": 71.709,
65
+ "eval_steps_per_second": 4.517,
66
  "epoch": 1.4285714285714286,
67
  "step": 5
68
  },
69
  {
70
+ "eval_loss": 0.5722081661224365,
71
+ "eval_accuracy": 0.973753280839895,
72
  "eval_f1": 0.0,
73
  "eval_precision": 0.0,
74
  "eval_recall": 0.0,
75
+ "eval_runtime": 5.3197,
76
+ "eval_samples_per_second": 71.62,
77
+ "eval_steps_per_second": 4.512,
78
  "epoch": 1.7142857142857144,
79
  "step": 6
80
  },
81
  {
82
+ "eval_loss": 0.5467000603675842,
83
+ "eval_accuracy": 0.979002624671916,
84
  "eval_f1": 0.0,
85
  "eval_precision": 0.0,
86
  "eval_recall": 0.0,
87
+ "eval_runtime": 5.2338,
88
+ "eval_samples_per_second": 72.796,
89
+ "eval_steps_per_second": 4.586,
90
  "epoch": 2.0,
91
  "step": 7
92
  },
93
  {
94
+ "eval_loss": 0.5230435729026794,
95
+ "eval_accuracy": 0.9816272965879265,
96
  "eval_f1": 0.0,
97
  "eval_precision": 0.0,
98
  "eval_recall": 0.0,
99
+ "eval_runtime": 5.3484,
100
+ "eval_samples_per_second": 71.237,
101
+ "eval_steps_per_second": 4.487,
102
  "epoch": 2.2857142857142856,
103
  "step": 8
104
  },
105
  {
106
+ "eval_loss": 0.5007050633430481,
107
  "eval_accuracy": 0.9816272965879265,
108
  "eval_f1": 0.0,
109
  "eval_precision": 0.0,
110
  "eval_recall": 0.0,
111
+ "eval_runtime": 5.4954,
112
+ "eval_samples_per_second": 69.331,
113
+ "eval_steps_per_second": 4.367,
114
  "epoch": 2.571428571428571,
115
  "step": 9
116
  },
117
  {
118
+ "train_runtime": 92.7268,
119
+ "train_samples_per_second": 45.726,
120
+ "train_steps_per_second": 0.647,
121
+ "total_flos": 2140139840256.0,
122
+ "train_loss": 0.6428146097395155,
123
  "epoch": 2.571428571428571,
124
  "step": 9
125
  }
training_artifacts/training_loss.png CHANGED

Git LFS Details

  • SHA256: 2442a6dfb2b6e233c954ed8baa916fc80416d0df3dad826dd04a67185674fd3f
  • Pointer size: 131 Bytes
  • Size of remote file: 181 kB

Git LFS Details

  • SHA256: 8593303465058c9f2b4497414fc44ebadc71f382414f16d681a20cbaf431a31a
  • Pointer size: 131 Bytes
  • Size of remote file: 174 kB
training_artifacts/training_metrics.png CHANGED

Git LFS Details

  • SHA256: 8fdaf612382fde5a85de85e053554c899ec7aad819b7ce3dca3dd632d3964b1c
  • Pointer size: 131 Bytes
  • Size of remote file: 186 kB

Git LFS Details

  • SHA256: 1399d16359f4c292a66873d1f987d40cb5be0c4c414f4be132f021f62d9c2ca7
  • Pointer size: 131 Bytes
  • Size of remote file: 120 kB
training_artifacts/training_summary.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "total_epochs": 2.571428571428571,
3
  "total_steps": "9",
4
- "final_train_loss": 0.7327,
5
- "final_eval_loss": 0.6018883585929871,
6
- "best_eval_loss": 0.6018883585929871
7
  }
 
1
  {
2
  "total_epochs": 2.571428571428571,
3
  "total_steps": "9",
4
+ "final_train_loss": 0.6651,
5
+ "final_eval_loss": 0.5007050633430481,
6
+ "best_eval_loss": 0.5007050633430481
7
  }