| { | |
| "model_variant": "0.6B", | |
| "train_file": "data/train_queries.csv", | |
| "output_dir": "models/qwen-finetuned", | |
| "run_name": "20251001-014002", | |
| "epochs": 5, | |
| "batch_size": 64, | |
| "learning_rate": 2e-05, | |
| "warmup_ratio": 0.1, | |
| "validation_split": 0.2, | |
| "eval_steps": 0, | |
| "checkpoint_steps": 0, | |
| "seed": 2025, | |
| "mixed_precision": false, | |
| "push_to_hub": true, | |
| "hub_model_id": "JacobLinCool/Qwen3-Embedding-0.6B-GIR-2", | |
| "hub_token": null, | |
| "hub_private_repo": false, | |
| "save_total_limit": 2, | |
| "logging_steps": null, | |
| "gradient_accumulation_steps": 1, | |
| "gradient_checkpointing": true, | |
| "optim": "paged_adamw_8bit" | |
| } |