| { |
| "model_name": "deepseek-ai/DeepSeek-OCR", |
| "output_dir": "/app/trained_model", |
| "batch_size": 6, |
| "num_epochs": 3, |
| "gradient_accumulation_steps": 2, |
| "max_length": 512, |
| "mixed_precision": "bf16", |
| "optimizer": { |
| "learning_rate": 2e-05, |
| "weight_decay": 0.0, |
| "warmup_steps": 0, |
| "warmup_ratio": null, |
| "scheduler_type": "cosine_with_warmup" |
| }, |
| "log_interval": 10, |
| "save_interval": 500, |
| "push_to_hub": true, |
| "hub_model_id": "baconnier/deepsynth-ocr-finetuned", |
| "hub_private": false, |
| "evaluation_split": "validation", |
| "save_checkpoints_to_hub": true, |
| "resume_from_checkpoint": null, |
| "metrics_output_path": null, |
| "save_metrics_to_hub": true, |
| "max_train_samples": null, |
| "max_eval_samples": null, |
| "expert_dropout_rate": 0.0, |
| "expert_dropout_min_keep": 1, |
| "bidrop_passes": 1, |
| "gate_dropout_rate": 0.0, |
| "gate_dropout_keywords": [ |
| "gate", |
| "router" |
| ], |
| "target_resolution": "base", |
| "use_augmentation": true, |
| "random_resize_min": null, |
| "random_resize_max": null, |
| "rotation_degrees": 3.0, |
| "perspective_distortion": 0.1, |
| "perspective_prob": 0.3, |
| "color_jitter_brightness": 0.1, |
| "color_jitter_contrast": 0.1, |
| "horizontal_flip_prob": 0.3, |
| "use_lora": true, |
| "lora_rank": 64, |
| "lora_alpha": 128, |
| "lora_dropout": 0.05, |
| "lora_target_modules": null, |
| "lora_bias": "none", |
| "use_qlora": false, |
| "qlora_bits": 4, |
| "qlora_type": "nf4", |
| "qlora_double_quant": true, |
| "lora_modules_to_save": null, |
| "use_text_encoder": false, |
| "text_encoder_type": null, |
| "text_encoder_model": null, |
| "text_encoder_trainable": true, |
| "instruction_prompt": "Summarize this text:", |
| "use_text_projection": false |
| } |