Sebsa commited on
Commit
8324b6e
·
verified ·
1 Parent(s): dcfe136

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text-regression
6
+ base_model: distilbert/distilbert-base-uncased
7
+ widget:
8
+ - text: "I love AutoTrain"
9
+ ---
10
+
11
+ # Model Trained Using AutoTrain
12
+
13
+ - Problem type: Text Regression
14
+
15
+ ## Validation Metrics
16
+ loss: 0.02739923633635044
17
+
18
+ mse: 0.02739923633635044
19
+
20
+ mae: 0.10979370772838593
21
+
22
+ r2: 0.008571624755859375
23
+
24
+ rmse: 0.1655271468259827
25
+
26
+ explained_variance: 0.020441055297851562
checkpoint-76/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "_num_labels": 1,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "target"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "max_position_embeddings": 512,
20
+ "model_type": "distilbert",
21
+ "n_heads": 12,
22
+ "n_layers": 6,
23
+ "pad_token_id": 0,
24
+ "problem_type": "regression",
25
+ "qa_dropout": 0.1,
26
+ "seq_classif_dropout": 0.2,
27
+ "sinusoidal_pos_embds": false,
28
+ "tie_weights_": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.48.0",
31
+ "vocab_size": 30522
32
+ }
checkpoint-76/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d2e4c01f155a5450cc49916c41293c1a12ba56a436b914d0beca432831ce28
3
+ size 267829484
checkpoint-76/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71fcaae1809658230e417caaf35ad1d4881dd8e8cc474b7329251de9c10a39a
3
+ size 535718266
checkpoint-76/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc17a6051e9b5f0acb0f35b4fb75a05f870d06a0251a12c147ab76af0d27cc02
3
+ size 13990
checkpoint-76/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9193ff89692fbba4cf254195b04cde91912398e77c179f71c638f3dddee3854e
3
+ size 1064
checkpoint-76/trainer_state.json ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.02739923633635044,
3
+ "best_model_checkpoint": "autotrain-bert-arg/checkpoint-76",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 76,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.039473684210526314,
13
+ "grad_norm": 2.780918598175049,
14
+ "learning_rate": 6.521739130434783e-06,
15
+ "loss": 0.0448,
16
+ "step": 3
17
+ },
18
+ {
19
+ "epoch": 0.07894736842105263,
20
+ "grad_norm": 0.544288158416748,
21
+ "learning_rate": 1.3043478260869566e-05,
22
+ "loss": 0.062,
23
+ "step": 6
24
+ },
25
+ {
26
+ "epoch": 0.11842105263157894,
27
+ "grad_norm": 0.5275054574012756,
28
+ "learning_rate": 1.956521739130435e-05,
29
+ "loss": 0.0228,
30
+ "step": 9
31
+ },
32
+ {
33
+ "epoch": 0.15789473684210525,
34
+ "grad_norm": 0.8615014553070068,
35
+ "learning_rate": 2.608695652173913e-05,
36
+ "loss": 0.0381,
37
+ "step": 12
38
+ },
39
+ {
40
+ "epoch": 0.19736842105263158,
41
+ "grad_norm": 1.1464006900787354,
42
+ "learning_rate": 3.260869565217392e-05,
43
+ "loss": 0.0329,
44
+ "step": 15
45
+ },
46
+ {
47
+ "epoch": 0.23684210526315788,
48
+ "grad_norm": 0.22914494574069977,
49
+ "learning_rate": 3.91304347826087e-05,
50
+ "loss": 0.0148,
51
+ "step": 18
52
+ },
53
+ {
54
+ "epoch": 0.27631578947368424,
55
+ "grad_norm": 1.2268567085266113,
56
+ "learning_rate": 4.565217391304348e-05,
57
+ "loss": 0.05,
58
+ "step": 21
59
+ },
60
+ {
61
+ "epoch": 0.3157894736842105,
62
+ "grad_norm": 0.9654967188835144,
63
+ "learning_rate": 4.975609756097561e-05,
64
+ "loss": 0.0551,
65
+ "step": 24
66
+ },
67
+ {
68
+ "epoch": 0.35526315789473684,
69
+ "grad_norm": 0.9826757907867432,
70
+ "learning_rate": 4.902439024390244e-05,
71
+ "loss": 0.0466,
72
+ "step": 27
73
+ },
74
+ {
75
+ "epoch": 0.39473684210526316,
76
+ "grad_norm": 1.1838877201080322,
77
+ "learning_rate": 4.829268292682927e-05,
78
+ "loss": 0.0507,
79
+ "step": 30
80
+ },
81
+ {
82
+ "epoch": 0.4342105263157895,
83
+ "grad_norm": 0.40032073855400085,
84
+ "learning_rate": 4.75609756097561e-05,
85
+ "loss": 0.05,
86
+ "step": 33
87
+ },
88
+ {
89
+ "epoch": 0.47368421052631576,
90
+ "grad_norm": 0.7170906662940979,
91
+ "learning_rate": 4.682926829268293e-05,
92
+ "loss": 0.0338,
93
+ "step": 36
94
+ },
95
+ {
96
+ "epoch": 0.5131578947368421,
97
+ "grad_norm": 0.31745052337646484,
98
+ "learning_rate": 4.609756097560976e-05,
99
+ "loss": 0.0167,
100
+ "step": 39
101
+ },
102
+ {
103
+ "epoch": 0.5526315789473685,
104
+ "grad_norm": 0.7707906365394592,
105
+ "learning_rate": 4.536585365853659e-05,
106
+ "loss": 0.02,
107
+ "step": 42
108
+ },
109
+ {
110
+ "epoch": 0.5921052631578947,
111
+ "grad_norm": 0.7838310599327087,
112
+ "learning_rate": 4.4634146341463416e-05,
113
+ "loss": 0.0212,
114
+ "step": 45
115
+ },
116
+ {
117
+ "epoch": 0.631578947368421,
118
+ "grad_norm": 1.0543047189712524,
119
+ "learning_rate": 4.390243902439025e-05,
120
+ "loss": 0.0372,
121
+ "step": 48
122
+ },
123
+ {
124
+ "epoch": 0.6710526315789473,
125
+ "grad_norm": 0.4342570900917053,
126
+ "learning_rate": 4.317073170731707e-05,
127
+ "loss": 0.0257,
128
+ "step": 51
129
+ },
130
+ {
131
+ "epoch": 0.7105263157894737,
132
+ "grad_norm": 0.37514448165893555,
133
+ "learning_rate": 4.2439024390243905e-05,
134
+ "loss": 0.0198,
135
+ "step": 54
136
+ },
137
+ {
138
+ "epoch": 0.75,
139
+ "grad_norm": 0.3972807824611664,
140
+ "learning_rate": 4.170731707317073e-05,
141
+ "loss": 0.0234,
142
+ "step": 57
143
+ },
144
+ {
145
+ "epoch": 0.7894736842105263,
146
+ "grad_norm": 0.5491408705711365,
147
+ "learning_rate": 4.097560975609756e-05,
148
+ "loss": 0.0671,
149
+ "step": 60
150
+ },
151
+ {
152
+ "epoch": 0.8289473684210527,
153
+ "grad_norm": 0.46997275948524475,
154
+ "learning_rate": 4.0243902439024395e-05,
155
+ "loss": 0.0457,
156
+ "step": 63
157
+ },
158
+ {
159
+ "epoch": 0.868421052631579,
160
+ "grad_norm": 0.7849195599555969,
161
+ "learning_rate": 3.951219512195122e-05,
162
+ "loss": 0.0409,
163
+ "step": 66
164
+ },
165
+ {
166
+ "epoch": 0.9078947368421053,
167
+ "grad_norm": 0.7742244005203247,
168
+ "learning_rate": 3.878048780487805e-05,
169
+ "loss": 0.0606,
170
+ "step": 69
171
+ },
172
+ {
173
+ "epoch": 0.9473684210526315,
174
+ "grad_norm": 0.23496082425117493,
175
+ "learning_rate": 3.804878048780488e-05,
176
+ "loss": 0.0279,
177
+ "step": 72
178
+ },
179
+ {
180
+ "epoch": 0.9868421052631579,
181
+ "grad_norm": 0.45524537563323975,
182
+ "learning_rate": 3.731707317073171e-05,
183
+ "loss": 0.0181,
184
+ "step": 75
185
+ },
186
+ {
187
+ "epoch": 1.0,
188
+ "eval_explained_variance": 0.020441055297851562,
189
+ "eval_loss": 0.02739923633635044,
190
+ "eval_mae": 0.10979370772838593,
191
+ "eval_mse": 0.02739923633635044,
192
+ "eval_r2": 0.008571624755859375,
193
+ "eval_rmse": 0.1655271468259827,
194
+ "eval_runtime": 11.5516,
195
+ "eval_samples_per_second": 13.072,
196
+ "eval_steps_per_second": 0.866,
197
+ "step": 76
198
+ }
199
+ ],
200
+ "logging_steps": 3,
201
+ "max_steps": 228,
202
+ "num_input_tokens_seen": 0,
203
+ "num_train_epochs": 3,
204
+ "save_steps": 500,
205
+ "stateful_callbacks": {
206
+ "EarlyStoppingCallback": {
207
+ "args": {
208
+ "early_stopping_patience": 5,
209
+ "early_stopping_threshold": 0.01
210
+ },
211
+ "attributes": {
212
+ "early_stopping_patience_counter": 0
213
+ }
214
+ },
215
+ "TrainerControl": {
216
+ "args": {
217
+ "should_epoch_stop": false,
218
+ "should_evaluate": false,
219
+ "should_log": false,
220
+ "should_save": true,
221
+ "should_training_stop": false
222
+ },
223
+ "attributes": {}
224
+ }
225
+ },
226
+ "total_flos": 20002220479488.0,
227
+ "train_batch_size": 8,
228
+ "trial_name": null,
229
+ "trial_params": null
230
+ }
checkpoint-76/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d216198309fb6f9912636e9dd495f35f3880a6122867d7ede41d12c540bd4b
3
+ size 5368
config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert/distilbert-base-uncased",
3
+ "_num_labels": 1,
4
+ "activation": "gelu",
5
+ "architectures": [
6
+ "DistilBertForSequenceClassification"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "dim": 768,
10
+ "dropout": 0.1,
11
+ "hidden_dim": 3072,
12
+ "id2label": {
13
+ "0": "target"
14
+ },
15
+ "initializer_range": 0.02,
16
+ "label2id": {
17
+ "target": 0
18
+ },
19
+ "max_position_embeddings": 512,
20
+ "model_type": "distilbert",
21
+ "n_heads": 12,
22
+ "n_layers": 6,
23
+ "pad_token_id": 0,
24
+ "problem_type": "regression",
25
+ "qa_dropout": 0.1,
26
+ "seq_classif_dropout": 0.2,
27
+ "sinusoidal_pos_embds": false,
28
+ "tie_weights_": true,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.48.0",
31
+ "vocab_size": 30522
32
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0d2e4c01f155a5450cc49916c41293c1a12ba56a436b914d0beca432831ce28
3
+ size 267829484
runs/Jul13_15-49-14_r-sebsa-asai-newstextbert-r5u4m245-e6f67-mmh1x/events.out.tfevents.1752421756.r-sebsa-asai-newstextbert-r5u4m245-e6f67-mmh1x.109.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c9ae7967377838e0ea15e0c1839f86b2b86886ead99fe8d7e64967d91770946
3
- size 5064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee19744418d8b178af2594531b7595dbb37cc0f18f00f5c065f2b97abed144e1
3
+ size 22839
runs/Jul13_15-49-14_r-sebsa-asai-newstextbert-r5u4m245-e6f67-mmh1x/events.out.tfevents.1752422343.r-sebsa-asai-newstextbert-r5u4m245-e6f67-mmh1x.109.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb7feb9ef892653907fcae22ef06b577d4e6e56897aafade45368175104dc884
3
+ size 609
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": false,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "extra_special_tokens": {},
48
+ "mask_token": "[MASK]",
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "strip_accents": null,
53
+ "tokenize_chinese_chars": true,
54
+ "tokenizer_class": "DistilBertTokenizer",
55
+ "unk_token": "[UNK]"
56
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d216198309fb6f9912636e9dd495f35f3880a6122867d7ede41d12c540bd4b
3
+ size 5368
training_params.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-bert-arg/autotrain-data",
3
+ "model": "distilbert/distilbert-base-uncased",
4
+ "lr": 5e-05,
5
+ "epochs": 3,
6
+ "max_seq_length": 128,
7
+ "batch_size": 8,
8
+ "warmup_ratio": 0.1,
9
+ "gradient_accumulation": 1,
10
+ "optimizer": "adamw_torch",
11
+ "scheduler": "linear",
12
+ "weight_decay": 0.0,
13
+ "max_grad_norm": 1.0,
14
+ "seed": 42,
15
+ "train_split": "train",
16
+ "valid_split": "validation",
17
+ "text_column": "autotrain_text",
18
+ "target_column": "autotrain_label",
19
+ "logging_steps": -1,
20
+ "project_name": "autotrain-bert-arg",
21
+ "auto_find_batch_size": false,
22
+ "mixed_precision": "fp16",
23
+ "save_total_limit": 1,
24
+ "push_to_hub": true,
25
+ "eval_strategy": "epoch",
26
+ "username": "Sebsa",
27
+ "log": "tensorboard",
28
+ "early_stopping_patience": 5,
29
+ "early_stopping_threshold": 0.01
30
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff