| { | |
| "compute": { | |
| "accelerator": "A10G", | |
| "instance": "g5.2xlarge" | |
| }, | |
| "model": { | |
| "name": "microsoft/phi-4", | |
| "tokenizer": "microsoft/phi-4" | |
| }, | |
| "dataset": { | |
| "name": "adel67460/straburo-dataset", | |
| "train_split": "train", | |
| "eval_split": "train", | |
| "validation_size": 0.2 | |
| }, | |
| "training": { | |
| "epochs": 3, | |
| "batch_size": 1, | |
| "learning_rate": 2e-5, | |
| "warmup_ratio": 0.03, | |
| "evaluation_strategy": "steps", | |
| "eval_steps": 100, | |
| "save_strategy": "steps", | |
| "save_steps": 100, | |
| "save_total_limit": 1, | |
| "load_best_model_at_end": true, | |
| "metric_for_best_model": "loss", | |
| "greater_is_better": false, | |
| "gradient_accumulation_steps": 16, | |
| "logging_steps": 1, | |
| "fp16": false, | |
| "bf16": true | |
| }, | |
| "framework": "gradio", | |
| "base_model": "microsoft/phi-4", | |
| "task": "text-generation" | |
| } | |