Update Clone_Big/PETER - PARAMETERS.txt
Browse files- Clone_Big/PETER - PARAMETERS.txt +80 -80
Clone_Big/PETER - PARAMETERS.txt
CHANGED
|
@@ -1,80 +1,80 @@
|
|
| 1 |
-
The parameters used to train this model, are encoded as follows:
|
| 2 |
-
|
| 3 |
-
G-B1,
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
Here's the actual python command to train it, we used this in Google Colab Notebook.
|
| 8 |
-
|
| 9 |
-
<CODE>
|
| 10 |
-
# Begin the fine-tuning proces
|
| 11 |
-
|
| 12 |
-
%cd /content/drive/MyDrive/VibeVoice-finetuning/
|
| 13 |
-
|
| 14 |
-
# Define your parameters as Python variables
|
| 15 |
-
batch_size = 1
|
| 16 |
-
drop_rate = 0.2
|
| 17 |
-
grad_accum = 1
|
| 18 |
-
lr = 2.5e-5
|
| 19 |
-
lora_r = 128
|
| 20 |
-
lora_alpha = 512
|
| 21 |
-
epochs = 20
|
| 22 |
-
train_diff = True
|
| 23 |
-
bf16 = True
|
| 24 |
-
grad_clip = True
|
| 25 |
-
max_grad = 0.8
|
| 26 |
-
grad_checkpoint = False
|
| 27 |
-
diff_weight = 1.4
|
| 28 |
-
ce_weight = 0.04
|
| 29 |
-
warmup = 0.03
|
| 30 |
-
scheduler = "cosine"
|
| 31 |
-
run_num = 2
|
| 32 |
-
|
| 33 |
-
# Build the output directory dynamically
|
| 34 |
-
output_dir = (
|
| 35 |
-
|
| 36 |
-
f"Precise/G-B{batch_size},DR{drop_rate},ACC{grad_accum},"
|
| 37 |
-
f"L{lr},R{lora_r},A{lora_alpha},E{epochs},"
|
| 38 |
-
f"TDF{'T' if train_diff else 'F'},BF16{'T' if bf16 else 'F'},"
|
| 39 |
-
f"GCL{'T' if grad_clip else 'F'},MG{max_grad},"
|
| 40 |
-
f"GCH{'T' if grad_checkpoint else 'F'},D{diff_weight},"
|
| 41 |
-
f"CE{ce_weight},W{warmup},{scheduler},R{run_num}"
|
| 42 |
-
|
| 43 |
-
)
|
| 44 |
-
|
| 45 |
-
# Now use the variables in your command
|
| 46 |
-
!python -m src.finetune_vibevoice_lora \
|
| 47 |
-
--model_name_or_path vibevoice/VibeVoice-7B \
|
| 48 |
-
--processor_name_or_path src/vibevoice/processor \
|
| 49 |
-
--text_column_name text \
|
| 50 |
-
--audio_column_name audio \
|
| 51 |
-
--output_dir {output_dir} \
|
| 52 |
-
\
|
| 53 |
-
--train_jsonl GOLD_cortana_train_data.jsonl \
|
| 54 |
-
--per_device_train_batch_size {batch_size} \
|
| 55 |
-
--voice_prompt_drop_rate {drop_rate} \
|
| 56 |
-
--gradient_accumulation_steps {grad_accum} \
|
| 57 |
-
--learning_rate {lr} \
|
| 58 |
-
--lora_r {lora_r} \
|
| 59 |
-
--lora_alpha {lora_alpha} \
|
| 60 |
-
--num_train_epochs {epochs} \
|
| 61 |
-
--train_diffusion_head {train_diff} \
|
| 62 |
-
--bf16 {bf16} \
|
| 63 |
-
--gradient_clipping \
|
| 64 |
-
--max_grad_norm {max_grad} \
|
| 65 |
-
--gradient_checkpointing {grad_checkpoint} \
|
| 66 |
-
--diffusion_loss_weight {diff_weight} \
|
| 67 |
-
--ce_loss_weight {ce_weight} \
|
| 68 |
-
--warmup_ratio {warmup} \
|
| 69 |
-
--lr_scheduler_type {scheduler} \
|
| 70 |
-
\
|
| 71 |
-
--logging_steps 10 \
|
| 72 |
-
--save_steps 1528 \
|
| 73 |
-
\
|
| 74 |
-
--report_to wandb \
|
| 75 |
-
--remove_unused_columns False \
|
| 76 |
-
--do_train \
|
| 77 |
-
--ddpm_batch_mul 4 \
|
| 78 |
-
--lora_target_modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj
|
| 79 |
-
|
| 80 |
-
</CODE>
|
|
|
|
| 1 |
+
The parameters used to train this model, are encoded as follows:
|
| 2 |
+
|
| 3 |
+
G-B1,DR0.2,ACC1,L2.5e-05,R128,A512,E20,TDFT,BF16T,GCLT,MG0.8,GCHF,D1.4,CE0.04,W0.03,cosine,R1 - THE KING
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
Here's the actual python command to train it, we used this in Google Colab Notebook.
|
| 8 |
+
|
| 9 |
+
<CODE>
|
| 10 |
+
# Begin the fine-tuning proces
|
| 11 |
+
|
| 12 |
+
%cd /content/drive/MyDrive/VibeVoice-finetuning/
|
| 13 |
+
|
| 14 |
+
# Define your parameters as Python variables
|
| 15 |
+
batch_size = 1
|
| 16 |
+
drop_rate = 0.2
|
| 17 |
+
grad_accum = 1
|
| 18 |
+
lr = 2.5e-5
|
| 19 |
+
lora_r = 128
|
| 20 |
+
lora_alpha = 512
|
| 21 |
+
epochs = 20
|
| 22 |
+
train_diff = True
|
| 23 |
+
bf16 = True
|
| 24 |
+
grad_clip = True
|
| 25 |
+
max_grad = 0.8
|
| 26 |
+
grad_checkpoint = False
|
| 27 |
+
diff_weight = 1.4
|
| 28 |
+
ce_weight = 0.04
|
| 29 |
+
warmup = 0.03
|
| 30 |
+
scheduler = "cosine"
|
| 31 |
+
run_num = 2
|
| 32 |
+
|
| 33 |
+
# Build the output directory dynamically
|
| 34 |
+
output_dir = (
|
| 35 |
+
|
| 36 |
+
f"Precise/G-B{batch_size},DR{drop_rate},ACC{grad_accum},"
|
| 37 |
+
f"L{lr},R{lora_r},A{lora_alpha},E{epochs},"
|
| 38 |
+
f"TDF{'T' if train_diff else 'F'},BF16{'T' if bf16 else 'F'},"
|
| 39 |
+
f"GCL{'T' if grad_clip else 'F'},MG{max_grad},"
|
| 40 |
+
f"GCH{'T' if grad_checkpoint else 'F'},D{diff_weight},"
|
| 41 |
+
f"CE{ce_weight},W{warmup},{scheduler},R{run_num}"
|
| 42 |
+
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Now use the variables in your command
|
| 46 |
+
!python -m src.finetune_vibevoice_lora \
|
| 47 |
+
--model_name_or_path vibevoice/VibeVoice-7B \
|
| 48 |
+
--processor_name_or_path src/vibevoice/processor \
|
| 49 |
+
--text_column_name text \
|
| 50 |
+
--audio_column_name audio \
|
| 51 |
+
--output_dir {output_dir} \
|
| 52 |
+
\
|
| 53 |
+
--train_jsonl GOLD_cortana_train_data.jsonl \
|
| 54 |
+
--per_device_train_batch_size {batch_size} \
|
| 55 |
+
--voice_prompt_drop_rate {drop_rate} \
|
| 56 |
+
--gradient_accumulation_steps {grad_accum} \
|
| 57 |
+
--learning_rate {lr} \
|
| 58 |
+
--lora_r {lora_r} \
|
| 59 |
+
--lora_alpha {lora_alpha} \
|
| 60 |
+
--num_train_epochs {epochs} \
|
| 61 |
+
--train_diffusion_head {train_diff} \
|
| 62 |
+
--bf16 {bf16} \
|
| 63 |
+
--gradient_clipping \
|
| 64 |
+
--max_grad_norm {max_grad} \
|
| 65 |
+
--gradient_checkpointing {grad_checkpoint} \
|
| 66 |
+
--diffusion_loss_weight {diff_weight} \
|
| 67 |
+
--ce_loss_weight {ce_weight} \
|
| 68 |
+
--warmup_ratio {warmup} \
|
| 69 |
+
--lr_scheduler_type {scheduler} \
|
| 70 |
+
\
|
| 71 |
+
--logging_steps 10 \
|
| 72 |
+
--save_steps 1528 \
|
| 73 |
+
\
|
| 74 |
+
--report_to wandb \
|
| 75 |
+
--remove_unused_columns False \
|
| 76 |
+
--do_train \
|
| 77 |
+
--ddpm_batch_mul 4 \
|
| 78 |
+
--lora_target_modules q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj
|
| 79 |
+
|
| 80 |
+
</CODE>
|