KitsuVp commited on
Commit
d2e1abb
·
verified ·
1 Parent(s): b8e000c

Model save

Browse files
Files changed (5) hide show
  1. README.md +15 -21
  2. config.json +5 -1
  3. generation_config.json +1 -1
  4. model.safetensors +2 -2
  5. training_args.bin +1 -1
README.md CHANGED
@@ -14,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 3.3235
18
 
19
  ## Model description
20
 
@@ -44,28 +44,22 @@ The following hyperparameters were used during training:
44
 
45
  ### Training results
46
 
47
- | Training Loss | Epoch | Step | Validation Loss |
48
- |:-------------:|:-----:|:-----:|:---------------:|
49
- | 3.901 | 0.064 | 5000 | 3.8367 |
50
- | 3.6772 | 0.128 | 10000 | 3.6281 |
51
- | 3.5953 | 0.192 | 15000 | 3.5480 |
52
- | 3.5516 | 0.256 | 20000 | 3.5038 |
53
- | 3.5219 | 0.32 | 25000 | 3.4746 |
54
- | 3.5077 | 0.384 | 30000 | 3.4526 |
55
- | 3.4891 | 0.448 | 35000 | 3.4360 |
56
- | 3.4705 | 0.512 | 40000 | 3.4208 |
57
- | 3.464 | 0.576 | 45000 | 3.4114 |
58
- | 3.4546 | 0.64 | 50000 | 3.3989 |
59
- | 3.4452 | 0.704 | 55000 | 3.3886 |
60
- | 3.4383 | 0.768 | 60000 | 3.3772 |
61
- | 3.4148 | 0.832 | 65000 | 3.3581 |
62
- | 3.3919 | 0.896 | 70000 | 3.3376 |
63
- | 3.3815 | 0.96 | 75000 | 3.3235 |
64
 
65
 
66
  ### Framework versions
67
 
68
- - Transformers 4.57.3
69
- - Pytorch 2.8.0+cu128
70
- - Datasets 4.4.2
71
  - Tokenizers 0.22.2
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 2.7250
18
 
19
  ## Model description
20
 
 
44
 
45
  ### Training results
46
 
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:------:|:-----:|:---------------:|
49
+ | 3.7833 | 0.1067 | 5000 | 3.6697 |
50
+ | 3.4271 | 0.2133 | 10000 | 3.2796 |
51
+ | 3.2851 | 0.32 | 15000 | 3.1214 |
52
+ | 3.2115 | 0.4267 | 20000 | 3.0228 |
53
+ | 3.166 | 0.5333 | 25000 | 2.9708 |
54
+ | 3.118 | 0.64 | 30000 | 2.9159 |
55
+ | 3.0656 | 0.7467 | 35000 | 2.8546 |
56
+ | 2.9881 | 0.8533 | 40000 | 2.7819 |
57
+ | 2.9405 | 0.96 | 45000 | 2.7250 |
 
 
 
 
 
 
58
 
59
 
60
  ### Framework versions
61
 
62
+ - Transformers 4.57.6
63
+ - Pytorch 2.10.0+cu130
64
+ - Datasets 4.5.0
65
  - Tokenizers 0.22.2
config.json CHANGED
@@ -24,11 +24,15 @@
24
  "num_attention_heads": 8,
25
  "num_hidden_layers": 12,
26
  "num_key_value_heads": 2,
 
27
  "pad_token_id": 151643,
28
  "partial_rotary_factor": 0.25,
29
  "rms_norm_eps": 1e-06,
30
  "rope_scaling": null,
31
  "rope_theta": 10000.0,
32
- "transformers_version": "4.57.3",
 
 
 
33
  "vocab_size": 151665
34
  }
 
24
  "num_attention_heads": 8,
25
  "num_hidden_layers": 12,
26
  "num_key_value_heads": 2,
27
+ "num_stack_heads": 4,
28
  "pad_token_id": 151643,
29
  "partial_rotary_factor": 0.25,
30
  "rms_norm_eps": 1e-06,
31
  "rope_scaling": null,
32
  "rope_theta": 10000.0,
33
+ "stack_d_model": 16,
34
+ "stack_slots": 24,
35
+ "transformers_version": "4.57.6",
36
+ "use_stack": true,
37
  "vocab_size": 151665
38
  }
generation_config.json CHANGED
@@ -4,5 +4,5 @@
4
  151643
5
  ],
6
  "pad_token_id": 151643,
7
- "transformers_version": "4.57.3"
8
  }
 
4
  151643
5
  ],
6
  "pad_token_id": 151643,
7
+ "transformers_version": "4.57.6"
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a91c397dc45b9aa4aacd762e3a9a769470ded360eccb80d66418d32b01de278
3
- size 251027488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc2cbf5ebfaeea4dbc63143d14ca208f4b5531357f7f31c2a4688e3b762c510b
3
+ size 251434000
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb17f745b950d54af953674a17895dead3a37d42e72e1c2b7c18dbfd8c0a43d1
3
  size 6033
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22cc0c67d1158f3a68631cff288743112f63360559878c1007d8da0095cfb98f
3
  size 6033