Improve language tag

#1
by lbourdois - opened
Files changed (1) hide show
  1. README.md +128 -117
README.md CHANGED
@@ -1,117 +1,128 @@
1
-
2
- ---
3
-
4
- license: apache-2.0
5
- base_model:
6
- - Qwen/Qwen2.5-7B
7
- library_name: transformers
8
-
9
- ---
10
-
11
- [![QuantFactory Banner](https://lh7-rt.googleusercontent.com/docsz/AD_4nXeiuCm7c8lEwEJuRey9kiVZsRn2W-b4pWlu3-X534V3YmVuVc2ZL-NXg2RkzSOOS2JXGHutDuyyNAUtdJI65jGTo8jT9Y99tMi4H4MqL44Uc5QKG77B0d6-JfIkZHFaUA71-RtjyYZWVIhqsNZcx8-OMaA?key=xt3VSDoCbmTY7o-cwwOFwQ)](https://hf.co/QuantFactory)
12
-
13
-
14
- # QuantFactory/Vapor_7B-GGUF
15
- This is quantized version of [FourOhFour/Vapor_7B](https://huggingface.co/FourOhFour/Vapor_7B) created using llama.cpp
16
-
17
- # Original Model Card
18
-
19
-
20
- ```
21
- base_model: Qwen/Qwen2.5-7B
22
- model_type: AutoModelForCausalLM
23
- tokenizer_type: AutoTokenizer
24
-
25
- load_in_8bit: false
26
- load_in_4bit: false
27
- strict: false
28
-
29
- datasets:
30
- - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
31
- type: sharegpt
32
- conversation: chatml
33
- - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
34
- type: sharegpt
35
- conversation: chatml
36
- - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
37
- type: sharegpt
38
- conversation: chatml
39
- - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
40
- type: sharegpt
41
- conversation: chatml
42
- - path: Nitral-AI/Reasoning-1shot_ShareGPT
43
- type: sharegpt
44
- conversation: chatml
45
- - path: Nitral-AI/GU_Instruct-ShareGPT
46
- type: sharegpt
47
- conversation: chatml
48
- - path: Nitral-AI/Medical_Instruct-ShareGPT
49
- type: sharegpt
50
- conversation: chatml
51
-
52
- chat_template: chatml
53
-
54
- val_set_size: 0.01
55
- output_dir: ./outputs/out
56
-
57
- adapter:
58
- lora_r:
59
- lora_alpha:
60
- lora_dropout:
61
- lora_target_linear:
62
-
63
- sequence_len: 8192
64
- # sequence_len: 32768
65
- sample_packing: true
66
- eval_sample_packing: false
67
- pad_to_sequence_len: true
68
-
69
- plugins:
70
- - axolotl.integrations.liger.LigerPlugin
71
- liger_rope: true
72
- liger_rms_norm: true
73
- liger_swiglu: true
74
- liger_fused_linear_cross_entropy: true
75
-
76
- wandb_project: qwen7B
77
- wandb_entity:
78
- wandb_watch:
79
- wandb_name: qwen7B
80
- wandb_log_model:
81
-
82
- gradient_accumulation_steps: 32
83
- micro_batch_size: 1
84
- num_epochs: 2
85
- optimizer: adamw_bnb_8bit
86
- lr_scheduler: cosine
87
- learning_rate: 0.00001
88
- weight_decay: 0.05
89
-
90
- train_on_inputs: false
91
- group_by_length: false
92
- bf16: auto
93
- fp16:
94
- tf32: true
95
-
96
- gradient_checkpointing: true
97
- early_stopping_patience:
98
- resume_from_checkpoint:
99
- local_rank:
100
- logging_steps: 1
101
- xformers_attention:
102
- flash_attention: true
103
-
104
- warmup_ratio: 0.1
105
- evals_per_epoch: 4
106
- eval_table_size:
107
- eval_max_new_tokens: 128
108
- saves_per_epoch: 2
109
-
110
- debug:
111
- deepspeed:
112
- fsdp:
113
- fsdp_config:
114
-
115
- special_tokens:
116
- pad_token: <pad>
117
- ```
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model:
4
+ - Qwen/Qwen2.5-7B
5
+ library_name: transformers
6
+ language:
7
+ - zho
8
+ - eng
9
+ - fra
10
+ - spa
11
+ - por
12
+ - deu
13
+ - ita
14
+ - rus
15
+ - jpn
16
+ - kor
17
+ - vie
18
+ - tha
19
+ - ara
20
+ ---
21
+
22
+ [![QuantFactory Banner](https://lh7-rt.googleusercontent.com/docsz/AD_4nXeiuCm7c8lEwEJuRey9kiVZsRn2W-b4pWlu3-X534V3YmVuVc2ZL-NXg2RkzSOOS2JXGHutDuyyNAUtdJI65jGTo8jT9Y99tMi4H4MqL44Uc5QKG77B0d6-JfIkZHFaUA71-RtjyYZWVIhqsNZcx8-OMaA?key=xt3VSDoCbmTY7o-cwwOFwQ)](https://hf.co/QuantFactory)
23
+
24
+
25
+ # QuantFactory/Vapor_7B-GGUF
26
+ This is quantized version of [FourOhFour/Vapor_7B](https://huggingface.co/FourOhFour/Vapor_7B) created using llama.cpp
27
+
28
+ # Original Model Card
29
+
30
+
31
+ ```
32
+ base_model: Qwen/Qwen2.5-7B
33
+ model_type: AutoModelForCausalLM
34
+ tokenizer_type: AutoTokenizer
35
+
36
+ load_in_8bit: false
37
+ load_in_4bit: false
38
+ strict: false
39
+
40
+ datasets:
41
+ - path: PocketDoc/Dans-MemoryCore-CoreCurriculum-Small
42
+ type: sharegpt
43
+ conversation: chatml
44
+ - path: NewEden/Kalo-Opus-Instruct-22k-Refusal-Murdered
45
+ type: sharegpt
46
+ conversation: chatml
47
+ - path: Epiculous/Synthstruct-Gens-v1.1-Filtered-n-Cleaned
48
+ type: sharegpt
49
+ conversation: chatml
50
+ - path: NewEden/Gryphe-Sonnet-3.5-35k-Subset
51
+ type: sharegpt
52
+ conversation: chatml
53
+ - path: Nitral-AI/Reasoning-1shot_ShareGPT
54
+ type: sharegpt
55
+ conversation: chatml
56
+ - path: Nitral-AI/GU_Instruct-ShareGPT
57
+ type: sharegpt
58
+ conversation: chatml
59
+ - path: Nitral-AI/Medical_Instruct-ShareGPT
60
+ type: sharegpt
61
+ conversation: chatml
62
+
63
+ chat_template: chatml
64
+
65
+ val_set_size: 0.01
66
+ output_dir: ./outputs/out
67
+
68
+ adapter:
69
+ lora_r:
70
+ lora_alpha:
71
+ lora_dropout:
72
+ lora_target_linear:
73
+
74
+ sequence_len: 8192
75
+ # sequence_len: 32768
76
+ sample_packing: true
77
+ eval_sample_packing: false
78
+ pad_to_sequence_len: true
79
+
80
+ plugins:
81
+ - axolotl.integrations.liger.LigerPlugin
82
+ liger_rope: true
83
+ liger_rms_norm: true
84
+ liger_swiglu: true
85
+ liger_fused_linear_cross_entropy: true
86
+
87
+ wandb_project: qwen7B
88
+ wandb_entity:
89
+ wandb_watch:
90
+ wandb_name: qwen7B
91
+ wandb_log_model:
92
+
93
+ gradient_accumulation_steps: 32
94
+ micro_batch_size: 1
95
+ num_epochs: 2
96
+ optimizer: adamw_bnb_8bit
97
+ lr_scheduler: cosine
98
+ learning_rate: 0.00001
99
+ weight_decay: 0.05
100
+
101
+ train_on_inputs: false
102
+ group_by_length: false
103
+ bf16: auto
104
+ fp16:
105
+ tf32: true
106
+
107
+ gradient_checkpointing: true
108
+ early_stopping_patience:
109
+ resume_from_checkpoint:
110
+ local_rank:
111
+ logging_steps: 1
112
+ xformers_attention:
113
+ flash_attention: true
114
+
115
+ warmup_ratio: 0.1
116
+ evals_per_epoch: 4
117
+ eval_table_size:
118
+ eval_max_new_tokens: 128
119
+ saves_per_epoch: 2
120
+
121
+ debug:
122
+ deepspeed:
123
+ fsdp:
124
+ fsdp_config:
125
+
126
+ special_tokens:
127
+ pad_token: <pad>
128
+ ```