orbgen-training / config-huggingface.yaml
javasop's picture
Upload folder using huggingface_hub
9791706 verified
# OrbGen Training Configuration
# Optimized for HuggingFace Spaces with A10G GPU (24GB VRAM)
model:
base_model: "Qwen/Qwen2.5-Coder-1.5B"
output_dir: "./orbgen-1.5b"
max_seq_length: 4096 # Full context for schema generation
data:
# Load from HuggingFace Hub (upload dataset first)
dataset: "javasop/orbital-schemas"
train_split: "train"
eval_split: "validation"
training:
# SFT Configuration - optimized for A10G (24GB VRAM)
num_epochs: 3
per_device_train_batch_size: 4 # Can use larger batches
per_device_eval_batch_size: 4
gradient_accumulation_steps: 4 # Effective batch size = 16
learning_rate: 2.0e-5
warmup_ratio: 0.1
weight_decay: 0.01
max_grad_norm: 1.0
# Logging
logging_steps: 10
eval_steps: 50
save_steps: 100
save_total_limit: 3
lora:
enabled: true
r: 64 # Full LoRA rank
lora_alpha: 128
lora_dropout: 0.05
target_modules:
- "q_proj"
- "k_proj"
- "v_proj"
- "o_proj"
- "gate_proj"
- "up_proj"
- "down_proj"
bias: "none"
task_type: "CAUSAL_LM"
# No quantization needed - enough VRAM for bf16
quantization:
enabled: false
generation:
max_new_tokens: 4096
temperature: 0.7
top_p: 0.95
do_sample: true
wandb:
project: "orbgen-training"
entity: null # Will use default
run_name: "orbgen-1.5b-sft-hf"
# HuggingFace Hub settings
hub:
push_to_hub: true
hub_model_id: "javasop/orbgen-1.5b"
hub_strategy: "checkpoint"