edloginovad commited on
Commit
20d1786
·
verified ·
1 Parent(s): 73a5ef7

Model save

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ training_artifacts/training_loss.png filter=lfs diff=lfs merge=lfs -text
37
+ training_artifacts/training_metrics.png filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ base_model: DedalusHealthCare/tinybert-mlm-en
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - accuracy
9
+ - f1
10
+ - precision
11
+ - recall
12
+ model-index:
13
+ - name: tinybert
14
+ results: []
15
+ ---
16
+
17
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
18
+ should probably proofread and complete it, then remove this comment. -->
19
+
20
+ # tinybert
21
+
22
+ This model is a fine-tuned version of [DedalusHealthCare/tinybert-mlm-en](https://huggingface.co/DedalusHealthCare/tinybert-mlm-en) on the None dataset.
23
+ It achieves the following results on the evaluation set:
24
+ - Loss: 0.5198
25
+ - Accuracy: 0.9816
26
+ - F1: 0.0
27
+ - Precision: 0.0
28
+ - Recall: 0.0
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 2e-05
48
+ - train_batch_size: 32
49
+ - eval_batch_size: 16
50
+ - seed: 42
51
+ - gradient_accumulation_steps: 2
52
+ - total_train_batch_size: 64
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: cosine
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 20
57
+ - mixed_precision_training: Native AMP
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy | F1 | Precision | Recall |
62
+ |:-------------:|:------:|:----:|:---------------:|:--------:|:------:|:---------:|:------:|
63
+ | 0.6922 | 0.2857 | 1 | 0.6659 | 0.8373 | 0.0606 | 0.0339 | 0.2857 |
64
+ | 0.6922 | 0.5714 | 2 | 0.6609 | 0.8688 | 0.0385 | 0.0222 | 0.1429 |
65
+ | 0.6922 | 0.8571 | 3 | 0.6511 | 0.9186 | 0.0606 | 0.0385 | 0.1429 |
66
+ | 0.6922 | 1.1429 | 4 | 0.6367 | 0.9711 | 0.0 | 0.0 | 0.0 |
67
+ | 0.6922 | 1.4286 | 5 | 0.6178 | 0.9816 | 0.0 | 0.0 | 0.0 |
68
+ | 0.6922 | 1.7143 | 6 | 0.5948 | 0.9816 | 0.0 | 0.0 | 0.0 |
69
+ | 0.6922 | 2.0 | 7 | 0.5687 | 0.9816 | 0.0 | 0.0 | 0.0 |
70
+ | 0.6922 | 2.2857 | 8 | 0.5438 | 0.9816 | 0.0 | 0.0 | 0.0 |
71
+ | 0.6922 | 2.5714 | 9 | 0.5198 | 0.9816 | 0.0 | 0.0 | 0.0 |
72
+
73
+
74
+ ### Framework versions
75
+
76
+ - Transformers 4.45.1
77
+ - Pytorch 2.6.0+cu124
78
+ - Datasets 2.16.0
79
+ - Tokenizers 0.20.3
logs/events.out.tfevents.1758794250.e4c37a3f7732.1802.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cd69ff6ddd9695c410d56dc9f5e9faa9081dcaee18b699373636d7ebf3694bc
3
- size 8549
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b5b9ad2c188e4b9c413d9287969a79c0edabf8b906d5581ee9a7727e53938d5
3
+ size 9823
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc362ab46c9c63e0360628c1d992b5b577fefbfefd6979aa4c1e2d37ae43416
3
  size 46634832
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:420641270a1fa45d21ec8f1270943cc49aea3eab32437400c8aba4c1ee422c02
3
  size 46634832
training_artifacts/training_history.csv ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ loss;grad_norm;learning_rate;epoch;step;eval_loss;eval_accuracy;eval_f1;eval_precision;eval_recall;eval_runtime;eval_samples_per_second;eval_steps_per_second;train_runtime;train_samples_per_second;train_steps_per_second;total_flos;train_loss
2
+ 0.6922;2.2436933517456055;3.3333333333333333e-06;0.2857142857142857;1;;;;;;;;;;;;;
3
+ ;;;0.2857142857142857;1;0.6658656597137451;0.8372703412073491;0.060606060606060594;0.03389830508474576;0.2857142857142857;0.7232;526.847;33.187;;;;;
4
+ ;;;0.5714285714285714;2;0.6608893275260925;0.868766404199475;0.038461538461538464;0.022222222222222223;0.14285714285714285;1.1627;327.685;20.642;;;;;
5
+ ;;;0.8571428571428571;3;0.6511300802230835;0.9186351706036745;0.060606060606060615;0.038461538461538464;0.14285714285714285;0.708;538.127;33.898;;;;;
6
+ ;;;1.1428571428571428;4;0.6366764307022095;0.9711286089238845;0.0;0.0;0.0;0.6896;552.485;34.802;;;;;
7
+ ;;;1.4285714285714286;5;0.6178359985351562;0.9816272965879265;0.0;0.0;0.0;2.3625;161.269;10.159;;;;;
8
+ ;;;1.7142857142857144;6;0.5948342084884644;0.9816272965879265;0.0;0.0;0.0;0.6961;547.358;34.479;;;;;
9
+ ;;;2.0;7;0.56866055727005;0.9816272965879265;0.0;0.0;0.0;0.7148;533.007;33.575;;;;;
10
+ ;;;2.2857142857142856;8;0.5438190698623657;0.9816272965879265;0.0;0.0;0.0;2.841;134.109;8.448;;;;;
11
+ ;;;2.571428571428571;9;0.5197792649269104;0.9816272965879265;0.0;0.0;0.0;0.6932;549.626;34.622;;;;;
12
+ ;;;2.571428571428571;9;;;;;;;;;17.9546;236.152;3.342;1986114624480.0;0.665169874827067
training_artifacts/training_history.json ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "loss": 0.6922,
4
+ "grad_norm": 2.2436933517456055,
5
+ "learning_rate": 3.3333333333333333e-06,
6
+ "epoch": 0.2857142857142857,
7
+ "step": 1
8
+ },
9
+ {
10
+ "eval_loss": 0.6658656597137451,
11
+ "eval_accuracy": 0.8372703412073491,
12
+ "eval_f1": 0.060606060606060594,
13
+ "eval_precision": 0.03389830508474576,
14
+ "eval_recall": 0.2857142857142857,
15
+ "eval_runtime": 0.7232,
16
+ "eval_samples_per_second": 526.847,
17
+ "eval_steps_per_second": 33.187,
18
+ "epoch": 0.2857142857142857,
19
+ "step": 1
20
+ },
21
+ {
22
+ "eval_loss": 0.6608893275260925,
23
+ "eval_accuracy": 0.868766404199475,
24
+ "eval_f1": 0.038461538461538464,
25
+ "eval_precision": 0.022222222222222223,
26
+ "eval_recall": 0.14285714285714285,
27
+ "eval_runtime": 1.1627,
28
+ "eval_samples_per_second": 327.685,
29
+ "eval_steps_per_second": 20.642,
30
+ "epoch": 0.5714285714285714,
31
+ "step": 2
32
+ },
33
+ {
34
+ "eval_loss": 0.6511300802230835,
35
+ "eval_accuracy": 0.9186351706036745,
36
+ "eval_f1": 0.060606060606060615,
37
+ "eval_precision": 0.038461538461538464,
38
+ "eval_recall": 0.14285714285714285,
39
+ "eval_runtime": 0.708,
40
+ "eval_samples_per_second": 538.127,
41
+ "eval_steps_per_second": 33.898,
42
+ "epoch": 0.8571428571428571,
43
+ "step": 3
44
+ },
45
+ {
46
+ "eval_loss": 0.6366764307022095,
47
+ "eval_accuracy": 0.9711286089238845,
48
+ "eval_f1": 0.0,
49
+ "eval_precision": 0.0,
50
+ "eval_recall": 0.0,
51
+ "eval_runtime": 0.6896,
52
+ "eval_samples_per_second": 552.485,
53
+ "eval_steps_per_second": 34.802,
54
+ "epoch": 1.1428571428571428,
55
+ "step": 4
56
+ },
57
+ {
58
+ "eval_loss": 0.6178359985351562,
59
+ "eval_accuracy": 0.9816272965879265,
60
+ "eval_f1": 0.0,
61
+ "eval_precision": 0.0,
62
+ "eval_recall": 0.0,
63
+ "eval_runtime": 2.3625,
64
+ "eval_samples_per_second": 161.269,
65
+ "eval_steps_per_second": 10.159,
66
+ "epoch": 1.4285714285714286,
67
+ "step": 5
68
+ },
69
+ {
70
+ "eval_loss": 0.5948342084884644,
71
+ "eval_accuracy": 0.9816272965879265,
72
+ "eval_f1": 0.0,
73
+ "eval_precision": 0.0,
74
+ "eval_recall": 0.0,
75
+ "eval_runtime": 0.6961,
76
+ "eval_samples_per_second": 547.358,
77
+ "eval_steps_per_second": 34.479,
78
+ "epoch": 1.7142857142857144,
79
+ "step": 6
80
+ },
81
+ {
82
+ "eval_loss": 0.56866055727005,
83
+ "eval_accuracy": 0.9816272965879265,
84
+ "eval_f1": 0.0,
85
+ "eval_precision": 0.0,
86
+ "eval_recall": 0.0,
87
+ "eval_runtime": 0.7148,
88
+ "eval_samples_per_second": 533.007,
89
+ "eval_steps_per_second": 33.575,
90
+ "epoch": 2.0,
91
+ "step": 7
92
+ },
93
+ {
94
+ "eval_loss": 0.5438190698623657,
95
+ "eval_accuracy": 0.9816272965879265,
96
+ "eval_f1": 0.0,
97
+ "eval_precision": 0.0,
98
+ "eval_recall": 0.0,
99
+ "eval_runtime": 2.841,
100
+ "eval_samples_per_second": 134.109,
101
+ "eval_steps_per_second": 8.448,
102
+ "epoch": 2.2857142857142856,
103
+ "step": 8
104
+ },
105
+ {
106
+ "eval_loss": 0.5197792649269104,
107
+ "eval_accuracy": 0.9816272965879265,
108
+ "eval_f1": 0.0,
109
+ "eval_precision": 0.0,
110
+ "eval_recall": 0.0,
111
+ "eval_runtime": 0.6932,
112
+ "eval_samples_per_second": 549.626,
113
+ "eval_steps_per_second": 34.622,
114
+ "epoch": 2.571428571428571,
115
+ "step": 9
116
+ },
117
+ {
118
+ "train_runtime": 17.9546,
119
+ "train_samples_per_second": 236.152,
120
+ "train_steps_per_second": 3.342,
121
+ "total_flos": 1986114624480.0,
122
+ "train_loss": 0.665169874827067,
123
+ "epoch": 2.571428571428571,
124
+ "step": 9
125
+ }
126
+ ]
training_artifacts/training_loss.png ADDED

Git LFS Details

  • SHA256: c82b658885e1b433ac35f6a1b6d40139e7fec254890a4dbb388b6f488817c499
  • Pointer size: 131 Bytes
  • Size of remote file: 182 kB
training_artifacts/training_metrics.png ADDED

Git LFS Details

  • SHA256: 89a6b5bac39ddebf0c1f93d68e7484550ba9c9ea459227e71f6710d7ce808b13
  • Pointer size: 131 Bytes
  • Size of remote file: 189 kB
training_artifacts/training_summary.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_epochs": 2.571428571428571,
3
+ "total_steps": "9",
4
+ "final_train_loss": 0.6922,
5
+ "final_eval_loss": 0.5197792649269104,
6
+ "best_eval_loss": 0.5197792649269104
7
+ }