Spaces:

javasop
/

orbgen-training

Runtime error

orbgen-training / config-huggingface.yaml

Upload folder using huggingface_hub

9791706 verified 3 months ago

1.45 kB

	# OrbGen Training Configuration
	# Optimized for HuggingFace Spaces with A10G GPU (24GB VRAM)

	model:
	base_model: "Qwen/Qwen2.5-Coder-1.5B"
	output_dir: "./orbgen-1.5b"
	max_seq_length: 4096 # Full context for schema generation

	data:
	# Load from HuggingFace Hub (upload dataset first)
	dataset: "javasop/orbital-schemas"
	train_split: "train"
	eval_split: "validation"

	training:
	# SFT Configuration - optimized for A10G (24GB VRAM)
	num_epochs: 3
	per_device_train_batch_size: 4 # Can use larger batches
	per_device_eval_batch_size: 4
	gradient_accumulation_steps: 4 # Effective batch size = 16
	learning_rate: 2.0e-5
	warmup_ratio: 0.1
	weight_decay: 0.01
	max_grad_norm: 1.0

	# Logging
	logging_steps: 10
	eval_steps: 50
	save_steps: 100
	save_total_limit: 3

	lora:
	enabled: true
	r: 64 # Full LoRA rank
	lora_alpha: 128
	lora_dropout: 0.05
	target_modules:
	- "q_proj"
	- "k_proj"
	- "v_proj"
	- "o_proj"
	- "gate_proj"
	- "up_proj"
	- "down_proj"
	bias: "none"
	task_type: "CAUSAL_LM"

	# No quantization needed - enough VRAM for bf16
	quantization:
	enabled: false

	generation:
	max_new_tokens: 4096
	temperature: 0.7
	top_p: 0.95
	do_sample: true

	wandb:
	project: "orbgen-training"
	entity: null # Will use default
	run_name: "orbgen-1.5b-sft-hf"

	# HuggingFace Hub settings
	hub:
	push_to_hub: true
	hub_model_id: "javasop/orbgen-1.5b"
	hub_strategy: "checkpoint"