yigagilbert commited on
Commit
95ded89
·
verified ·
1 Parent(s): 55d0cbd

yigagilbert/sunflower_language_ID

Browse files
README.md CHANGED
@@ -1,8 +1,9 @@
1
  ---
 
2
  license: apache-2.0
 
3
  tags:
4
  - generated_from_trainer
5
- base_model: google/t5-efficient-tiny
6
  datasets:
7
  - generator
8
  metrics:
@@ -11,8 +12,8 @@ model-index:
11
  - name: salt_language_ID
12
  results:
13
  - task:
14
- type: text2text-generation
15
  name: Sequence-to-sequence Language Modeling
 
16
  dataset:
17
  name: generator
18
  type: generator
@@ -20,9 +21,9 @@ model-index:
20
  split: train
21
  args: default
22
  metrics:
23
- - type: accuracy
24
- value: 0.980510752688172
25
- name: Accuracy
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +33,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [google/t5-efficient-tiny](https://huggingface.co/google/t5-efficient-tiny) on the generator dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 0.0127
36
- - Accuracy: 0.9805
37
 
38
  ## Model description
39
 
@@ -56,7 +57,7 @@ The following hyperparameters were used during training:
56
  - train_batch_size: 64
57
  - eval_batch_size: 64
58
  - seed: 42
59
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
  - lr_scheduler_type: linear
61
  - lr_scheduler_warmup_steps: 10
62
  - training_steps: 20000
@@ -65,51 +66,51 @@ The following hyperparameters were used during training:
65
 
66
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
67
  |:-------------:|:-----:|:-----:|:---------------:|:--------:|
68
- | 0.5069 | 0.025 | 500 | 0.1145 | 0.8337 |
69
- | 0.0644 | 0.05 | 1000 | 0.0489 | 0.9170 |
70
- | 0.0511 | 0.075 | 1500 | 0.0605 | 0.9056 |
71
- | 0.0462 | 0.1 | 2000 | 0.0332 | 0.9432 |
72
- | 0.0411 | 0.125 | 2500 | 0.0358 | 0.9385 |
73
- | 0.0409 | 0.15 | 3000 | 0.0267 | 0.9509 |
74
- | 0.0365 | 0.175 | 3500 | 0.0244 | 0.9563 |
75
- | 0.0359 | 0.2 | 4000 | 0.0285 | 0.9536 |
76
- | 0.035 | 0.225 | 4500 | 0.0355 | 0.9388 |
77
- | 0.0321 | 0.25 | 5000 | 0.0264 | 0.9570 |
78
- | 0.0327 | 0.275 | 5500 | 0.0278 | 0.9513 |
79
- | 0.0313 | 0.3 | 6000 | 0.0217 | 0.9630 |
80
- | 0.0305 | 0.325 | 6500 | 0.0255 | 0.9556 |
81
- | 0.0285 | 0.35 | 7000 | 0.0187 | 0.9630 |
82
- | 0.0293 | 0.375 | 7500 | 0.0225 | 0.9620 |
83
- | 0.0264 | 0.4 | 8000 | 0.0228 | 0.9614 |
84
- | 0.0272 | 0.425 | 8500 | 0.0195 | 0.9664 |
85
- | 0.0268 | 0.45 | 9000 | 0.0178 | 0.9688 |
86
- | 0.0259 | 0.475 | 9500 | 0.0164 | 0.9677 |
87
- | 0.0256 | 0.5 | 10000 | 0.0167 | 0.9721 |
88
- | 0.0241 | 0.525 | 10500 | 0.0182 | 0.9647 |
89
- | 0.0235 | 0.55 | 11000 | 0.0212 | 0.9657 |
90
- | 0.0239 | 0.575 | 11500 | 0.0145 | 0.9735 |
91
- | 0.0239 | 0.6 | 12000 | 0.0173 | 0.9704 |
92
- | 0.0234 | 0.625 | 12500 | 0.0152 | 0.9768 |
93
- | 0.0229 | 0.65 | 13000 | 0.0181 | 0.9698 |
94
- | 0.023 | 0.675 | 13500 | 0.0154 | 0.9735 |
95
- | 0.0224 | 0.7 | 14000 | 0.0157 | 0.9708 |
96
- | 0.0221 | 0.725 | 14500 | 0.0155 | 0.9714 |
97
- | 0.0219 | 0.75 | 15000 | 0.0145 | 0.9755 |
98
- | 0.0213 | 0.775 | 15500 | 0.0159 | 0.9735 |
99
- | 0.0197 | 0.8 | 16000 | 0.0129 | 0.9751 |
100
- | 0.0206 | 0.825 | 16500 | 0.0154 | 0.9724 |
101
- | 0.02 | 0.85 | 17000 | 0.0140 | 0.9724 |
102
- | 0.0209 | 0.875 | 17500 | 0.0115 | 0.9772 |
103
- | 0.0191 | 0.9 | 18000 | 0.0129 | 0.9735 |
104
- | 0.0194 | 0.925 | 18500 | 0.0120 | 0.9765 |
105
- | 0.0191 | 0.95 | 19000 | 0.0133 | 0.9741 |
106
- | 0.0183 | 0.975 | 19500 | 0.0166 | 0.9731 |
107
- | 0.0207 | 1.0 | 20000 | 0.0127 | 0.9805 |
108
 
109
 
110
  ### Framework versions
111
 
112
- - Transformers 4.40.2
113
- - Pytorch 2.2.1+cu121
114
- - Datasets 2.19.1
115
- - Tokenizers 0.19.1
 
1
  ---
2
+ library_name: transformers
3
  license: apache-2.0
4
+ base_model: google/t5-efficient-tiny
5
  tags:
6
  - generated_from_trainer
 
7
  datasets:
8
  - generator
9
  metrics:
 
12
  - name: salt_language_ID
13
  results:
14
  - task:
 
15
  name: Sequence-to-sequence Language Modeling
16
+ type: text2text-generation
17
  dataset:
18
  name: generator
19
  type: generator
 
21
  split: train
22
  args: default
23
  metrics:
24
+ - name: Accuracy
25
+ type: accuracy
26
+ value: 0.608582394590625
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
33
 
34
  This model is a fine-tuned version of [google/t5-efficient-tiny](https://huggingface.co/google/t5-efficient-tiny) on the generator dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.4200
37
+ - Accuracy: 0.6086
38
 
39
  ## Model description
40
 
 
57
  - train_batch_size: 64
58
  - eval_batch_size: 64
59
  - seed: 42
60
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
61
  - lr_scheduler_type: linear
62
  - lr_scheduler_warmup_steps: 10
63
  - training_steps: 20000
 
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
68
  |:-------------:|:-----:|:-----:|:---------------:|:--------:|
69
+ | 0.9948 | 0.025 | 500 | 0.7153 | 0.1757 |
70
+ | 0.3269 | 0.05 | 1000 | 0.7217 | 0.2611 |
71
+ | 0.2853 | 0.075 | 1500 | 0.9151 | 0.2412 |
72
+ | 0.1823 | 0.1 | 2000 | 0.5561 | 0.3965 |
73
+ | 0.1953 | 0.125 | 2500 | 0.5975 | 0.3824 |
74
+ | 0.1831 | 0.15 | 3000 | 0.5670 | 0.4264 |
75
+ | 0.141 | 0.175 | 3500 | 0.7885 | 0.3443 |
76
+ | 0.1081 | 0.2 | 4000 | 0.8961 | 0.3111 |
77
+ | 0.154 | 0.225 | 4500 | 0.7975 | 0.3491 |
78
+ | 0.1306 | 0.25 | 5000 | 0.4824 | 0.5092 |
79
+ | 0.1013 | 0.275 | 5500 | 0.4946 | 0.4613 |
80
+ | 0.1083 | 0.3 | 6000 | 0.6959 | 0.4038 |
81
+ | 0.1121 | 0.325 | 6500 | 0.6938 | 0.4004 |
82
+ | 0.1168 | 0.35 | 7000 | 0.7787 | 0.3948 |
83
+ | 0.1202 | 0.375 | 7500 | 0.5420 | 0.4975 |
84
+ | 0.1169 | 0.4 | 8000 | 0.5099 | 0.5128 |
85
+ | 0.1119 | 0.425 | 8500 | 0.5815 | 0.4582 |
86
+ | 0.1258 | 0.45 | 9000 | 0.5103 | 0.5002 |
87
+ | 0.0878 | 0.475 | 9500 | 0.5189 | 0.5089 |
88
+ | 0.1032 | 0.5 | 10000 | 0.4365 | 0.5674 |
89
+ | 0.0854 | 0.525 | 10500 | 0.5854 | 0.5176 |
90
+ | 0.1028 | 0.55 | 11000 | 0.5167 | 0.5253 |
91
+ | 0.0853 | 0.575 | 11500 | 0.4268 | 0.5922 |
92
+ | 0.0716 | 0.6 | 12000 | 0.5486 | 0.5204 |
93
+ | 0.0771 | 0.625 | 12500 | 0.4643 | 0.5532 |
94
+ | 0.0613 | 0.65 | 13000 | 0.5525 | 0.5050 |
95
+ | 0.0819 | 0.675 | 13500 | 0.4500 | 0.5953 |
96
+ | 0.0785 | 0.7 | 14000 | 0.5016 | 0.5245 |
97
+ | 0.079 | 0.725 | 14500 | 0.4453 | 0.5789 |
98
+ | 0.0749 | 0.75 | 15000 | 0.4218 | 0.5866 |
99
+ | 0.0749 | 0.775 | 15500 | 0.4208 | 0.6114 |
100
+ | 0.0655 | 0.8 | 16000 | 0.4203 | 0.6133 |
101
+ | 0.077 | 0.825 | 16500 | 0.4446 | 0.5891 |
102
+ | 0.0516 | 0.85 | 17000 | 0.4239 | 0.5985 |
103
+ | 0.0555 | 0.875 | 17500 | 0.4040 | 0.6237 |
104
+ | 0.0622 | 0.9 | 18000 | 0.4575 | 0.5978 |
105
+ | 0.0752 | 0.925 | 18500 | 0.4257 | 0.5959 |
106
+ | 0.0555 | 0.95 | 19000 | 0.4462 | 0.5997 |
107
+ | 0.0646 | 0.975 | 19500 | 0.4225 | 0.6124 |
108
+ | 0.0676 | 1.0 | 20000 | 0.4200 | 0.6086 |
109
 
110
 
111
  ### Framework versions
112
 
113
+ - Transformers 4.57.1
114
+ - Pytorch 2.8.0+cu126
115
+ - Datasets 4.4.0
116
+ - Tokenizers 0.22.1
config.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "_name_or_path": "google/t5-efficient-tiny",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
@@ -10,6 +9,7 @@
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "relu",
12
  "dropout_rate": 0.1,
 
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
@@ -24,8 +24,7 @@
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
- "torch_dtype": "float32",
28
- "transformers_version": "4.40.2",
29
  "use_cache": true,
30
  "vocab_size": 32128
31
  }
 
1
  {
 
2
  "architectures": [
3
  "T5ForConditionalGeneration"
4
  ],
 
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "relu",
11
  "dropout_rate": 0.1,
12
+ "dtype": "float32",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "relu",
15
  "initializer_factor": 1.0,
 
24
  "pad_token_id": 0,
25
  "relative_attention_max_distance": 128,
26
  "relative_attention_num_buckets": 32,
27
+ "transformers_version": "4.57.1",
 
28
  "use_cache": true,
29
  "vocab_size": 32128
30
  }
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
- "transformers_version": "4.40.2"
6
  }
 
2
  "decoder_start_token_id": 0,
3
  "eos_token_id": 1,
4
  "pad_token_id": 0,
5
+ "transformers_version": "4.57.1"
6
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da8e3f5353a304aaa3427bcc1a832edd023f325f520fd821b629705c47e723f4
3
  size 62293080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7e90d49e4c7e9e993800a6427094c6e3694fed5d41649c66ecec07975899ec3
3
  size 62293080
runs/Nov05_12-55-45_254b22cf8d87/events.out.tfevents.1762347355.254b22cf8d87.856.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efad07ff5857d7df922dce0628e8b2e3b814d9d9328159d3bb608f7a8ab720fe
3
+ size 5316
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -927,9 +928,10 @@
927
  "<extra_id_98>",
928
  "<extra_id_99>"
929
  ],
930
- "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 1000000000000000019884624838656,
934
  "pad_token": "<pad>",
935
  "sp_model_kwargs": {},
 
1
  {
2
+ "add_prefix_space": null,
3
  "added_tokens_decoder": {
4
  "0": {
5
  "content": "<pad>",
 
928
  "<extra_id_98>",
929
  "<extra_id_99>"
930
  ],
931
+ "clean_up_tokenization_spaces": false,
932
  "eos_token": "</s>",
933
  "extra_ids": 100,
934
+ "extra_special_tokens": {},
935
  "model_max_length": 1000000000000000019884624838656,
936
  "pad_token": "<pad>",
937
  "sp_model_kwargs": {},
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbf65a2117c3857ff03b0cfa6555fb01aed1b57452bc916c71367155aa1fd6a5
3
- size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e33217ca5619b1e381fcb22e86ca10b8e90d8327b18bd3187e12c0987a58d91
3
+ size 6033