steg_schemes / punctuation /decoder_model /training_config.yaml
lizardp1's picture
Upload folder using huggingface_hub
8b32d7f verified
raw
history blame contribute delete
767 Bytes
eval_steps: 50
eval_strategy: steps
logging_steps: 10
lora:
alpha: 128
dropout: 0.1
r: 64
target_modules:
- q_proj
- k_proj
- v_proj
- o_proj
- gate_proj
- up_proj
- down_proj
max_length: 1024
model_name: deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
save_steps: 50
save_strategy: steps
training:
batch_size: 2
bf16: true
eval_batch_size: 2
fp16: false
gradient_accumulation_steps: 8
gradient_checkpointing: false
greater_is_better: true
group_by_length: true
learning_rate: 0.0001
load_best_model_at_end: true
lr_scheduler: cosine
max_grad_norm: 1.0
metric_for_best_model: eval_decoder_accuracy
num_epochs: 3
optim: adamw_torch
save_total_limit: 2
warmup_ratio: 0.1
wandb:
enabled: true
project: steg-finetune