Update v2.yml
Browse files
v2.yml
CHANGED
|
@@ -10,6 +10,8 @@ strict: false
|
|
| 10 |
|
| 11 |
## data
|
| 12 |
datasets:
|
|
|
|
|
|
|
| 13 |
- path: Nitral-AI/ARES-ShareGPT
|
| 14 |
type: dan-chat-advanced
|
| 15 |
- path: PocketDoc/Dans-Logicmaxx-FI-VeriMed
|
|
@@ -38,16 +40,10 @@ datasets:
|
|
| 38 |
type: dan-chat-advanced
|
| 39 |
- path: PocketDoc/Dans-Prosemaxx-Instructwriter-Long
|
| 40 |
type: dan-chat-advanced
|
| 41 |
-
- path: PocketDoc/Dans-Personamaxx-VN
|
| 42 |
-
type: dan-chat-advanced
|
| 43 |
- path: PocketDoc/Dans-Prosemaxx-Cowriter-3-XS
|
| 44 |
type: dan-chat-advanced
|
| 45 |
- path: PocketDoc/Dans-Prosemaxx-InstructWriter-ZeroShot
|
| 46 |
type: dan-chat-advanced
|
| 47 |
-
- path: Delta-Vector/Orion-BlueSky-10K-Complexity
|
| 48 |
-
type: dan-chat-advanced
|
| 49 |
-
- path: Delta-Vector/Orion-Shoujo-AI-Filtered-ShareGPT
|
| 50 |
-
type: dan-chat-advanced
|
| 51 |
- path: PocketDoc/Dans-Benchmaxx-COT
|
| 52 |
type: dan-chat-advanced
|
| 53 |
- path: PocketDoc/Dans-Benchmaxx
|
|
@@ -76,7 +72,7 @@ eval_sample_packing: false
|
|
| 76 |
pad_to_sequence_len: true
|
| 77 |
|
| 78 |
## max grad norm
|
| 79 |
-
max_grad_norm:
|
| 80 |
|
| 81 |
|
| 82 |
## WandB
|
|
@@ -92,14 +88,15 @@ eval_table_size:
|
|
| 92 |
eval_max_new_tokens: 128
|
| 93 |
|
| 94 |
## hparams
|
| 95 |
-
gradient_accumulation_steps:
|
| 96 |
-
micro_batch_size:
|
| 97 |
-
num_epochs:
|
| 98 |
optimizer: paged_ademamix_8bit
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
|
|
|
| 103 |
|
| 104 |
train_on_inputs: false
|
| 105 |
group_by_length: false
|
|
|
|
| 10 |
|
| 11 |
## data
|
| 12 |
datasets:
|
| 13 |
+
- path: PocketDoc/Dans-Codemaxx-LeetCode
|
| 14 |
+
type: dan-chat-advanced
|
| 15 |
- path: Nitral-AI/ARES-ShareGPT
|
| 16 |
type: dan-chat-advanced
|
| 17 |
- path: PocketDoc/Dans-Logicmaxx-FI-VeriMed
|
|
|
|
| 40 |
type: dan-chat-advanced
|
| 41 |
- path: PocketDoc/Dans-Prosemaxx-Instructwriter-Long
|
| 42 |
type: dan-chat-advanced
|
|
|
|
|
|
|
| 43 |
- path: PocketDoc/Dans-Prosemaxx-Cowriter-3-XS
|
| 44 |
type: dan-chat-advanced
|
| 45 |
- path: PocketDoc/Dans-Prosemaxx-InstructWriter-ZeroShot
|
| 46 |
type: dan-chat-advanced
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
- path: PocketDoc/Dans-Benchmaxx-COT
|
| 48 |
type: dan-chat-advanced
|
| 49 |
- path: PocketDoc/Dans-Benchmaxx
|
|
|
|
| 72 |
pad_to_sequence_len: true
|
| 73 |
|
| 74 |
## max grad norm
|
| 75 |
+
max_grad_norm: 0.001
|
| 76 |
|
| 77 |
|
| 78 |
## WandB
|
|
|
|
| 88 |
eval_max_new_tokens: 128
|
| 89 |
|
| 90 |
## hparams
|
| 91 |
+
gradient_accumulation_steps: 6
|
| 92 |
+
micro_batch_size: 6
|
| 93 |
+
num_epochs: 4
|
| 94 |
optimizer: paged_ademamix_8bit
|
| 95 |
+
optim_args: "beta1=0.9,beta2=0.999,beta3=0.999,alpha=5"
|
| 96 |
+
lr_scheduler: rex
|
| 97 |
+
learning_rate: 1e-6
|
| 98 |
+
warmup_ratio: 0.1
|
| 99 |
+
weight_decay: 0.0
|
| 100 |
|
| 101 |
train_on_inputs: false
|
| 102 |
group_by_length: false
|