| { | |
| "output_dir": "models/braille256_scaled/braille256_scaled_64M", | |
| "learning_rate": 0.0003, | |
| "weight_decay": 0.01, | |
| "warmup_steps": 1000, | |
| "max_steps": 10000, | |
| "per_device_train_batch_size": 16, | |
| "per_device_eval_batch_size": 16, | |
| "gradient_accumulation_steps": 2, | |
| "max_seq_length": 512, | |
| "logging_steps": 100, | |
| "eval_steps": 500, | |
| "save_steps": 1000, | |
| "track_emergent_patterns": true, | |
| "pattern_analysis_steps": 1000, | |
| "fp16": true, | |
| "dataloader_num_workers": 4 | |
| } |