Spaces:
Runtime error
Runtime error
| # OrbGen Training Configuration | |
| # Optimized for HuggingFace Spaces with A10G GPU (24GB VRAM) | |
| model: | |
| base_model: "Qwen/Qwen2.5-Coder-1.5B" | |
| output_dir: "./orbgen-1.5b" | |
| max_seq_length: 4096 # Full context for schema generation | |
| data: | |
| # Load from HuggingFace Hub (upload dataset first) | |
| dataset: "javasop/orbital-schemas" | |
| train_split: "train" | |
| eval_split: "validation" | |
| training: | |
| # SFT Configuration - optimized for A10G (24GB VRAM) | |
| num_epochs: 3 | |
| per_device_train_batch_size: 4 # Can use larger batches | |
| per_device_eval_batch_size: 4 | |
| gradient_accumulation_steps: 4 # Effective batch size = 16 | |
| learning_rate: 2.0e-5 | |
| warmup_ratio: 0.1 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| # Logging | |
| logging_steps: 10 | |
| eval_steps: 50 | |
| save_steps: 100 | |
| save_total_limit: 3 | |
| lora: | |
| enabled: true | |
| r: 64 # Full LoRA rank | |
| lora_alpha: 128 | |
| lora_dropout: 0.05 | |
| target_modules: | |
| - "q_proj" | |
| - "k_proj" | |
| - "v_proj" | |
| - "o_proj" | |
| - "gate_proj" | |
| - "up_proj" | |
| - "down_proj" | |
| bias: "none" | |
| task_type: "CAUSAL_LM" | |
| # No quantization needed - enough VRAM for bf16 | |
| quantization: | |
| enabled: false | |
| generation: | |
| max_new_tokens: 4096 | |
| temperature: 0.7 | |
| top_p: 0.95 | |
| do_sample: true | |
| wandb: | |
| project: "orbgen-training" | |
| entity: null # Will use default | |
| run_name: "orbgen-1.5b-sft-hf" | |
| # HuggingFace Hub settings | |
| hub: | |
| push_to_hub: true | |
| hub_model_id: "javasop/orbgen-1.5b" | |
| hub_strategy: "checkpoint" | |