deepsynth-ocr-finetuned / training_config.json
baconnier's picture
Upload training_config.json
fed1131 verified
{
"model_name": "deepseek-ai/DeepSeek-OCR",
"output_dir": "/app/trained_model",
"batch_size": 6,
"num_epochs": 3,
"gradient_accumulation_steps": 2,
"max_length": 512,
"mixed_precision": "bf16",
"optimizer": {
"learning_rate": 2e-05,
"weight_decay": 0.0,
"warmup_steps": 0,
"warmup_ratio": null,
"scheduler_type": "cosine_with_warmup"
},
"log_interval": 10,
"save_interval": 500,
"push_to_hub": true,
"hub_model_id": "baconnier/deepsynth-ocr-finetuned",
"hub_private": false,
"evaluation_split": "validation",
"save_checkpoints_to_hub": true,
"resume_from_checkpoint": null,
"metrics_output_path": null,
"save_metrics_to_hub": true,
"max_train_samples": null,
"max_eval_samples": null,
"expert_dropout_rate": 0.0,
"expert_dropout_min_keep": 1,
"bidrop_passes": 1,
"gate_dropout_rate": 0.0,
"gate_dropout_keywords": [
"gate",
"router"
],
"target_resolution": "base",
"use_augmentation": true,
"random_resize_min": null,
"random_resize_max": null,
"rotation_degrees": 3.0,
"perspective_distortion": 0.1,
"perspective_prob": 0.3,
"color_jitter_brightness": 0.1,
"color_jitter_contrast": 0.1,
"horizontal_flip_prob": 0.3,
"use_lora": true,
"lora_rank": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"lora_target_modules": null,
"lora_bias": "none",
"use_qlora": false,
"qlora_bits": 4,
"qlora_type": "nf4",
"qlora_double_quant": true,
"lora_modules_to_save": null,
"use_text_encoder": false,
"text_encoder_type": null,
"text_encoder_model": null,
"text_encoder_trainable": true,
"instruction_prompt": "Summarize this text:",
"use_text_projection": false
}