anhnct commited on
Commit
8810bbb
·
verified ·
1 Parent(s): 17e6ef7

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: outputs/Simp_22_1_2026
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
14
+ <details><summary>See axolotl config</summary>
15
+
16
+ axolotl version: `0.8.0`
17
+ ```yaml
18
+ base_model: /root/anhnct/Spark-TTS-finetune/extend_vocab_pretrained/LLM
19
+ # Automatically upload checkpoint and final model to HF
20
+ # hub_model_id: username/custom_model_name
21
+
22
+ trust_remote_code: true
23
+
24
+ strict: false
25
+
26
+ datasets:
27
+ - path: .
28
+ data_files: ["/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/elevenlab_dataset_3.jsonl", "/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/elevenlab_dataset_4.jsonl", "/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/elevenlab_dataset_reflex.jsonl", "/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/elevenlab_slow.jsonl", "/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/hf_song_ngu.jsonl", "/root/anhnct/Spark-TTS-finetune/PROMPTS/product_ft_data/LibriTTS.jsonl"]
29
+ type: completion
30
+
31
+ dataset_prepared_path:
32
+ val_set_size: 0.05
33
+ output_dir: ./outputs/Simp_22_1_2026
34
+
35
+
36
+ sequence_len: 2048
37
+ sample_packing: true
38
+ eval_sample_packing: false
39
+ pad_to_sequence_len: true
40
+
41
+ wandb_project:
42
+ wandb_entity:
43
+ wandb_watch:
44
+ wandb_name:
45
+ wandb_log_model:
46
+
47
+ gradient_accumulation_steps: 1
48
+ micro_batch_size: 8
49
+ num_epochs: 10
50
+ optimizer: adamw_torch_fused
51
+ lr_scheduler: cosine
52
+ learning_rate: 0.0002
53
+
54
+ train_on_inputs: false
55
+ group_by_length: false
56
+ bf16: auto
57
+ fp16:
58
+ tf32: true
59
+
60
+ gradient_checkpointing: false
61
+ gradient_checkpointing_kwargs:
62
+ use_reentrant: false
63
+ early_stopping_patience:
64
+ resume_from_checkpoint:
65
+ local_rank:
66
+ logging_steps: 50
67
+ xformers_attention:
68
+ flash_attention: true
69
+
70
+ warmup_steps: 10
71
+ evals_per_epoch: 1
72
+ save_steps: 10000
73
+ save_total_limit: 100
74
+ debug:
75
+ deepspeed:
76
+ weight_decay: 0.0
77
+
78
+ ```
79
+
80
+ </details><br>
81
+
82
+ # outputs/Simp_22_1_2026
83
+
84
+ This model was trained from scratch on the None dataset.
85
+ It achieves the following results on the evaluation set:
86
+ - Loss: 5.3568
87
+
88
+ ## Model description
89
+
90
+ More information needed
91
+
92
+ ## Intended uses & limitations
93
+
94
+ More information needed
95
+
96
+ ## Training and evaluation data
97
+
98
+ More information needed
99
+
100
+ ## Training procedure
101
+
102
+ ### Training hyperparameters
103
+
104
+ The following hyperparameters were used during training:
105
+ - learning_rate: 0.0002
106
+ - train_batch_size: 8
107
+ - eval_batch_size: 8
108
+ - seed: 42
109
+ - optimizer: Use adamw_torch_fused with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
110
+ - lr_scheduler_type: cosine
111
+ - lr_scheduler_warmup_steps: 10
112
+ - num_epochs: 10.0
113
+
114
+ ### Training results
115
+
116
+ | Training Loss | Epoch | Step | Validation Loss |
117
+ |:-------------:|:------:|:-----:|:---------------:|
118
+ | No log | 0.0005 | 1 | 5.6361 |
119
+ | 4.5777 | 1.0 | 2216 | 5.3235 |
120
+ | 4.5116 | 2.0 | 4432 | 5.3313 |
121
+ | 4.4611 | 3.0 | 6648 | 5.3390 |
122
+ | 4.4496 | 4.0 | 8864 | 5.3471 |
123
+ | 4.4141 | 5.0 | 11080 | 5.3521 |
124
+ | 4.4031 | 6.0 | 13296 | 5.3541 |
125
+ | 4.4174 | 7.0 | 15512 | 5.3562 |
126
+ | 4.4071 | 8.0 | 17728 | 5.3561 |
127
+ | 4.4179 | 9.0 | 19944 | 5.3567 |
128
+ | 4.3882 | 10.0 | 22160 | 5.3568 |
129
+
130
+
131
+ ### Framework versions
132
+
133
+ - Transformers 4.50.3
134
+ - Pytorch 2.6.0+cu124
135
+ - Datasets 3.5.0
136
+ - Tokenizers 0.21.4
added_tokens.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "eos_token_id": 151645,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 896,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 4864,
11
+ "max_position_embeddings": 32768,
12
+ "max_window_layers": 21,
13
+ "model_type": "qwen2",
14
+ "num_attention_heads": 14,
15
+ "num_hidden_layers": 24,
16
+ "num_key_value_heads": 2,
17
+ "rms_norm_eps": 1e-06,
18
+ "rope_scaling": null,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 32768,
21
+ "tie_word_embeddings": true,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.50.3",
24
+ "use_cache": false,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 165167
27
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 151643,
4
+ "do_sample": true,
5
+ "eos_token_id": 151645,
6
+ "transformers_version": "4.50.3"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d6d41b3696b398d96de5f74b58abddf394dc8ac57446f35ebbfb357691deb45
3
+ size 1011807800
special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e754b7a11590bc1467423e545a4bdc824009e387f81ac6d7bc30b23c06accefd
3
+ size 14130827
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
vocab.json ADDED
The diff for this file is too large to render. See raw diff