fsds_cleaning_env / configs /sft_config.json
israaaML's picture
v2: curriculum scheduling, SFT pipeline, reward redesign, agent guide
16038fc
raw
history blame contribute delete
751 Bytes
{
"base_model": "unsloth/Qwen2.5-1.5B-Instruct-bnb-4bit",
"env_url": "https://israaaML-fsds-cleaning-env.hf.space",
"demo_path": "./demos/expert_demos.json",
"sft_output_dir": "./data-cleaning-sft",
"sft_final_dir": "./data-cleaning-sft-final",
"collect_fresh": true,
"n_per_task": 20,
"task_ids": ["ecommerce_mobile", "subscription_churn", "delivery_eta"],
"sft_mode": "step",
"successful_only": true,
"seed_offset": 1000,
"training": {
"num_train_epochs": 2,
"per_device_train_batch_size": 4,
"gradient_accumulation_steps": 2,
"learning_rate": 2e-4,
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.05,
"logging_steps": 5,
"save_steps": 100,
"max_seq_length": 2048,
"fp16": true
}
}