edloginovad commited on
Commit
84a2a17
·
verified ·
1 Parent(s): 6b935d3

Model save

Browse files
README.md CHANGED
@@ -21,7 +21,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  This model is a fine-tuned version of [DedalusHealthCare/tinybert-mlm-en](https://huggingface.co/DedalusHealthCare/tinybert-mlm-en) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.5703
25
  - Accuracy: 0.9816
26
  - F1: 0.0
27
  - Precision: 0.0
@@ -60,15 +60,15 @@ The following hyperparameters were used during training:
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
  |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
63
- | 0.703 | 0.2857 | 1 | 0.7166 | 0.2310 | 0.0456 | 0.0233 | 1.0 |
64
- | 0.703 | 0.5714 | 2 | 0.7117 | 0.2677 | 0.0412 | 0.0211 | 0.8571 |
65
- | 0.703 | 0.8571 | 3 | 0.7020 | 0.3701 | 0.04 | 0.0206 | 0.7143 |
66
- | 0.703 | 1.1429 | 4 | 0.6875 | 0.5774 | 0.0359 | 0.0187 | 0.4286 |
67
- | 0.703 | 1.4286 | 5 | 0.6686 | 0.7874 | 0.0 | 0.0 | 0.0 |
68
- | 0.703 | 1.7143 | 6 | 0.6458 | 0.9423 | 0.0 | 0.0 | 0.0 |
69
- | 0.703 | 2.0 | 7 | 0.6198 | 0.9816 | 0.0 | 0.0 | 0.0 |
70
- | 0.703 | 2.2857 | 8 | 0.5946 | 0.9816 | 0.0 | 0.0 | 0.0 |
71
- | 0.703 | 2.5714 | 9 | 0.5703 | 0.9816 | 0.0 | 0.0 | 0.0 |
72
 
73
 
74
  ### Framework versions
 
21
 
22
  This model is a fine-tuned version of [DedalusHealthCare/tinybert-mlm-en](https://huggingface.co/DedalusHealthCare/tinybert-mlm-en) on the None dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 0.5492
25
  - Accuracy: 0.9816
26
  - F1: 0.0
27
  - Precision: 0.0
 
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
  |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
63
+ | 0.689 | 0.2857 | 1 | 0.6890 | 0.5564 | 0.0231 | 0.0120 | 0.2857 |
64
+ | 0.689 | 0.5714 | 2 | 0.6843 | 0.6352 | 0.0 | 0.0 | 0.0 |
65
+ | 0.689 | 0.8571 | 3 | 0.6749 | 0.7507 | 0.0 | 0.0 | 0.0 |
66
+ | 0.689 | 1.1429 | 4 | 0.6614 | 0.8688 | 0.0 | 0.0 | 0.0 |
67
+ | 0.689 | 1.4286 | 5 | 0.6435 | 0.9475 | 0.0 | 0.0 | 0.0 |
68
+ | 0.689 | 1.7143 | 6 | 0.6220 | 0.9816 | 0.0 | 0.0 | 0.0 |
69
+ | 0.689 | 2.0 | 7 | 0.5971 | 0.9816 | 0.0 | 0.0 | 0.0 |
70
+ | 0.689 | 2.2857 | 8 | 0.5728 | 0.9816 | 0.0 | 0.0 | 0.0 |
71
+ | 0.689 | 2.5714 | 9 | 0.5492 | 0.9816 | 0.0 | 0.0 | 0.0 |
72
 
73
 
74
  ### Framework versions
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f163bfefd249064226a7fa02fe22dde624b62e553bf4ba63375bfb00a734a92d
3
  size 46634832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:140cdfee2958f1b237065f5364a532a0d8ad78e4847d35bdf445438cf1e09c69
3
  size 46634832
training_artifacts/training_history.csv CHANGED
@@ -1,12 +1,12 @@
1
  loss;grad_norm;learning_rate;epoch;step;eval_loss;eval_accuracy;eval_f1;eval_precision;eval_recall;eval_runtime;eval_samples_per_second;eval_steps_per_second;train_runtime;train_samples_per_second;train_steps_per_second;total_flos;train_loss
2
- 0.703;2.2138702869415283;3.3333333333333333e-06;0.2857142857142857;1;;;;;;;;;;;;;
3
- ;;;0.2857142857142857;1;0.7165892720222473;0.23097112860892388;0.04560260586319218;0.023333333333333334;1.0;0.7454;511.131;32.197;;;;;
4
- ;;;0.5714285714285714;2;0.7117474675178528;0.2677165354330709;0.041237113402061855;0.02112676056338028;0.8571428571428571;1.5578;244.576;15.406;;;;;
5
- ;;;0.8571428571428571;3;0.7019844055175781;0.3700787401574803;0.04;0.0205761316872428;0.7142857142857143;2.8847;132.075;8.32;;;;;
6
- ;;;1.1428571428571428;4;0.6875269412994385;0.5774278215223098;0.03592814371257485;0.01875;0.42857142857142855;2.1358;178.386;11.237;;;;;
7
- ;;;1.4285714285714286;5;0.6685800552368164;0.7874015748031497;0.0;0.0;0.0;0.7158;532.274;33.529;;;;;
8
- ;;;1.7142857142857144;6;0.6458192467689514;0.9422572178477691;0.0;0.0;0.0;1.2139;313.874;19.772;;;;;
9
- ;;;2.0;7;0.6197686195373535;0.9816272965879265;0.0;0.0;0.0;0.7728;493.017;31.056;;;;;
10
- ;;;2.2857142857142856;8;0.5946394205093384;0.9816272965879265;0.0;0.0;0.0;0.7291;522.579;32.918;;;;;
11
- ;;;2.571428571428571;9;0.5703445672988892;0.9816272965879265;0.0;0.0;0.0;0.9339;407.972;25.699;;;;;
12
- ;;;2.571428571428571;9;;;;;;;;;22.1708;191.242;2.706;2172566201472.0;0.6836379369099935
 
1
  loss;grad_norm;learning_rate;epoch;step;eval_loss;eval_accuracy;eval_f1;eval_precision;eval_recall;eval_runtime;eval_samples_per_second;eval_steps_per_second;train_runtime;train_samples_per_second;train_steps_per_second;total_flos;train_loss
2
+ 0.689;2.7823822498321533;3.3333333333333333e-06;0.2857142857142857;1;;;;;;;;;;;;;
3
+ ;;;0.2857142857142857;1;0.6890391707420349;0.5564304461942258;0.023121387283236997;0.012048192771084338;0.2857142857142857;0.7451;511.33;32.21;;;;;
4
+ ;;;0.5714285714285714;2;0.6843268275260925;0.6351706036745407;0.0;0.0;0.0;0.7197;529.417;33.349;;;;;
5
+ ;;;0.8571428571428571;3;0.6749046444892883;0.7506561679790026;0.0;0.0;0.0;3.3581;113.455;7.147;;;;;
6
+ ;;;1.1428571428571428;4;0.6614006161689758;0.868766404199475;0.0;0.0;0.0;0.7108;535.986;33.763;;;;;
7
+ ;;;1.4285714285714286;5;0.6434675455093384;0.94750656167979;0.0;0.0;0.0;1.1845;321.664;20.262;;;;;
8
+ ;;;1.7142857142857144;6;0.6219972372055054;0.9816272965879265;0.0;0.0;0.0;0.7216;528.026;33.261;;;;;
9
+ ;;;2.0;7;0.5970718264579773;0.9816272965879265;0.0;0.0;0.0;0.7206;528.749;33.307;;;;;
10
+ ;;;2.2857142857142856;8;0.5727897882461548;0.9816272965879265;0.0;0.0;0.0;0.7052;540.284;34.034;;;;;
11
+ ;;;2.571428571428571;9;0.549211323261261;0.9816272965879265;0.0;0.0;0.0;0.6987;545.308;34.35;;;;;
12
+ ;;;2.571428571428571;9;;;;;;;;;18.2317;232.562;3.291;2148246430560.0;0.6723573472764757
training_artifacts/training_history.json CHANGED
@@ -1,125 +1,125 @@
1
  [
2
  {
3
- "loss": 0.703,
4
- "grad_norm": 2.2138702869415283,
5
  "learning_rate": 3.3333333333333333e-06,
6
  "epoch": 0.2857142857142857,
7
  "step": 1
8
  },
9
  {
10
- "eval_loss": 0.7165892720222473,
11
- "eval_accuracy": 0.23097112860892388,
12
- "eval_f1": 0.04560260586319218,
13
- "eval_precision": 0.023333333333333334,
14
- "eval_recall": 1.0,
15
- "eval_runtime": 0.7454,
16
- "eval_samples_per_second": 511.131,
17
- "eval_steps_per_second": 32.197,
18
  "epoch": 0.2857142857142857,
19
  "step": 1
20
  },
21
  {
22
- "eval_loss": 0.7117474675178528,
23
- "eval_accuracy": 0.2677165354330709,
24
- "eval_f1": 0.041237113402061855,
25
- "eval_precision": 0.02112676056338028,
26
- "eval_recall": 0.8571428571428571,
27
- "eval_runtime": 1.5578,
28
- "eval_samples_per_second": 244.576,
29
- "eval_steps_per_second": 15.406,
30
  "epoch": 0.5714285714285714,
31
  "step": 2
32
  },
33
  {
34
- "eval_loss": 0.7019844055175781,
35
- "eval_accuracy": 0.3700787401574803,
36
- "eval_f1": 0.04,
37
- "eval_precision": 0.0205761316872428,
38
- "eval_recall": 0.7142857142857143,
39
- "eval_runtime": 2.8847,
40
- "eval_samples_per_second": 132.075,
41
- "eval_steps_per_second": 8.32,
42
  "epoch": 0.8571428571428571,
43
  "step": 3
44
  },
45
  {
46
- "eval_loss": 0.6875269412994385,
47
- "eval_accuracy": 0.5774278215223098,
48
- "eval_f1": 0.03592814371257485,
49
- "eval_precision": 0.01875,
50
- "eval_recall": 0.42857142857142855,
51
- "eval_runtime": 2.1358,
52
- "eval_samples_per_second": 178.386,
53
- "eval_steps_per_second": 11.237,
54
  "epoch": 1.1428571428571428,
55
  "step": 4
56
  },
57
  {
58
- "eval_loss": 0.6685800552368164,
59
- "eval_accuracy": 0.7874015748031497,
60
  "eval_f1": 0.0,
61
  "eval_precision": 0.0,
62
  "eval_recall": 0.0,
63
- "eval_runtime": 0.7158,
64
- "eval_samples_per_second": 532.274,
65
- "eval_steps_per_second": 33.529,
66
  "epoch": 1.4285714285714286,
67
  "step": 5
68
  },
69
  {
70
- "eval_loss": 0.6458192467689514,
71
- "eval_accuracy": 0.9422572178477691,
72
  "eval_f1": 0.0,
73
  "eval_precision": 0.0,
74
  "eval_recall": 0.0,
75
- "eval_runtime": 1.2139,
76
- "eval_samples_per_second": 313.874,
77
- "eval_steps_per_second": 19.772,
78
  "epoch": 1.7142857142857144,
79
  "step": 6
80
  },
81
  {
82
- "eval_loss": 0.6197686195373535,
83
  "eval_accuracy": 0.9816272965879265,
84
  "eval_f1": 0.0,
85
  "eval_precision": 0.0,
86
  "eval_recall": 0.0,
87
- "eval_runtime": 0.7728,
88
- "eval_samples_per_second": 493.017,
89
- "eval_steps_per_second": 31.056,
90
  "epoch": 2.0,
91
  "step": 7
92
  },
93
  {
94
- "eval_loss": 0.5946394205093384,
95
  "eval_accuracy": 0.9816272965879265,
96
  "eval_f1": 0.0,
97
  "eval_precision": 0.0,
98
  "eval_recall": 0.0,
99
- "eval_runtime": 0.7291,
100
- "eval_samples_per_second": 522.579,
101
- "eval_steps_per_second": 32.918,
102
  "epoch": 2.2857142857142856,
103
  "step": 8
104
  },
105
  {
106
- "eval_loss": 0.5703445672988892,
107
  "eval_accuracy": 0.9816272965879265,
108
  "eval_f1": 0.0,
109
  "eval_precision": 0.0,
110
  "eval_recall": 0.0,
111
- "eval_runtime": 0.9339,
112
- "eval_samples_per_second": 407.972,
113
- "eval_steps_per_second": 25.699,
114
  "epoch": 2.571428571428571,
115
  "step": 9
116
  },
117
  {
118
- "train_runtime": 22.1708,
119
- "train_samples_per_second": 191.242,
120
- "train_steps_per_second": 2.706,
121
- "total_flos": 2172566201472.0,
122
- "train_loss": 0.6836379369099935,
123
  "epoch": 2.571428571428571,
124
  "step": 9
125
  }
 
1
  [
2
  {
3
+ "loss": 0.689,
4
+ "grad_norm": 2.7823822498321533,
5
  "learning_rate": 3.3333333333333333e-06,
6
  "epoch": 0.2857142857142857,
7
  "step": 1
8
  },
9
  {
10
+ "eval_loss": 0.6890391707420349,
11
+ "eval_accuracy": 0.5564304461942258,
12
+ "eval_f1": 0.023121387283236997,
13
+ "eval_precision": 0.012048192771084338,
14
+ "eval_recall": 0.2857142857142857,
15
+ "eval_runtime": 0.7451,
16
+ "eval_samples_per_second": 511.33,
17
+ "eval_steps_per_second": 32.21,
18
  "epoch": 0.2857142857142857,
19
  "step": 1
20
  },
21
  {
22
+ "eval_loss": 0.6843268275260925,
23
+ "eval_accuracy": 0.6351706036745407,
24
+ "eval_f1": 0.0,
25
+ "eval_precision": 0.0,
26
+ "eval_recall": 0.0,
27
+ "eval_runtime": 0.7197,
28
+ "eval_samples_per_second": 529.417,
29
+ "eval_steps_per_second": 33.349,
30
  "epoch": 0.5714285714285714,
31
  "step": 2
32
  },
33
  {
34
+ "eval_loss": 0.6749046444892883,
35
+ "eval_accuracy": 0.7506561679790026,
36
+ "eval_f1": 0.0,
37
+ "eval_precision": 0.0,
38
+ "eval_recall": 0.0,
39
+ "eval_runtime": 3.3581,
40
+ "eval_samples_per_second": 113.455,
41
+ "eval_steps_per_second": 7.147,
42
  "epoch": 0.8571428571428571,
43
  "step": 3
44
  },
45
  {
46
+ "eval_loss": 0.6614006161689758,
47
+ "eval_accuracy": 0.868766404199475,
48
+ "eval_f1": 0.0,
49
+ "eval_precision": 0.0,
50
+ "eval_recall": 0.0,
51
+ "eval_runtime": 0.7108,
52
+ "eval_samples_per_second": 535.986,
53
+ "eval_steps_per_second": 33.763,
54
  "epoch": 1.1428571428571428,
55
  "step": 4
56
  },
57
  {
58
+ "eval_loss": 0.6434675455093384,
59
+ "eval_accuracy": 0.94750656167979,
60
  "eval_f1": 0.0,
61
  "eval_precision": 0.0,
62
  "eval_recall": 0.0,
63
+ "eval_runtime": 1.1845,
64
+ "eval_samples_per_second": 321.664,
65
+ "eval_steps_per_second": 20.262,
66
  "epoch": 1.4285714285714286,
67
  "step": 5
68
  },
69
  {
70
+ "eval_loss": 0.6219972372055054,
71
+ "eval_accuracy": 0.9816272965879265,
72
  "eval_f1": 0.0,
73
  "eval_precision": 0.0,
74
  "eval_recall": 0.0,
75
+ "eval_runtime": 0.7216,
76
+ "eval_samples_per_second": 528.026,
77
+ "eval_steps_per_second": 33.261,
78
  "epoch": 1.7142857142857144,
79
  "step": 6
80
  },
81
  {
82
+ "eval_loss": 0.5970718264579773,
83
  "eval_accuracy": 0.9816272965879265,
84
  "eval_f1": 0.0,
85
  "eval_precision": 0.0,
86
  "eval_recall": 0.0,
87
+ "eval_runtime": 0.7206,
88
+ "eval_samples_per_second": 528.749,
89
+ "eval_steps_per_second": 33.307,
90
  "epoch": 2.0,
91
  "step": 7
92
  },
93
  {
94
+ "eval_loss": 0.5727897882461548,
95
  "eval_accuracy": 0.9816272965879265,
96
  "eval_f1": 0.0,
97
  "eval_precision": 0.0,
98
  "eval_recall": 0.0,
99
+ "eval_runtime": 0.7052,
100
+ "eval_samples_per_second": 540.284,
101
+ "eval_steps_per_second": 34.034,
102
  "epoch": 2.2857142857142856,
103
  "step": 8
104
  },
105
  {
106
+ "eval_loss": 0.549211323261261,
107
  "eval_accuracy": 0.9816272965879265,
108
  "eval_f1": 0.0,
109
  "eval_precision": 0.0,
110
  "eval_recall": 0.0,
111
+ "eval_runtime": 0.6987,
112
+ "eval_samples_per_second": 545.308,
113
+ "eval_steps_per_second": 34.35,
114
  "epoch": 2.571428571428571,
115
  "step": 9
116
  },
117
  {
118
+ "train_runtime": 18.2317,
119
+ "train_samples_per_second": 232.562,
120
+ "train_steps_per_second": 3.291,
121
+ "total_flos": 2148246430560.0,
122
+ "train_loss": 0.6723573472764757,
123
  "epoch": 2.571428571428571,
124
  "step": 9
125
  }
training_artifacts/training_loss.png CHANGED

Git LFS Details

  • SHA256: d9dd3d1b27f37437e53fef5ad7f231f8cabfd2885c48083da27f13ba02bf3572
  • Pointer size: 131 Bytes
  • Size of remote file: 182 kB

Git LFS Details

  • SHA256: 5285168a28fc0e066461b4a01000ddec4357c7d75e07201d573dcbbcaf4ef05a
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB
training_artifacts/training_metrics.png CHANGED

Git LFS Details

  • SHA256: fe165a04f9de27f51cb9fc8eeae61fea79a1abaa594b0d38f5aec389f97090b0
  • Pointer size: 131 Bytes
  • Size of remote file: 154 kB

Git LFS Details

  • SHA256: 3752ceb78f6f8998424e1ab2d56154578de5ac9b04bb6fe9ce8bedc1fc96b3af
  • Pointer size: 131 Bytes
  • Size of remote file: 144 kB
training_artifacts/training_summary.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "total_epochs": 2.571428571428571,
3
  "total_steps": "9",
4
- "final_train_loss": 0.703,
5
- "final_eval_loss": 0.5703445672988892,
6
- "best_eval_loss": 0.5703445672988892
7
  }
 
1
  {
2
  "total_epochs": 2.571428571428571,
3
  "total_steps": "9",
4
+ "final_train_loss": 0.689,
5
+ "final_eval_loss": 0.549211323261261,
6
+ "best_eval_loss": 0.549211323261261
7
  }