Spaces:
Running
Running
| { | |
| "model_name": "Qwen/Qwen2.5-Math-1.5B-Instruct", | |
| "lora_r": 8, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj" | |
| ], | |
| "num_train_epochs": 10, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 8, | |
| "learning_rate": 0.0002, | |
| "lr_scheduler_type": "cosine", | |
| "warmup_ratio": 0.05, | |
| "weight_decay": 0.01, | |
| "fp16": true, | |
| "max_seq_length": 512, | |
| "label_smoothing_configs": [ | |
| 0.0, | |
| 0.02, | |
| 0.05, | |
| 0.1, | |
| 0.2 | |
| ], | |
| "perturbation_sigmas": [ | |
| 0.005, | |
| 0.01, | |
| 0.015, | |
| 0.02, | |
| 0.025, | |
| 0.03 | |
| ], | |
| "data_seed": 42, | |
| "data_size": 2000, | |
| "eval_questions": 300, | |
| "created_at": "2026-03-11 17:52:45" | |
| } |