Gemma-2-9B-PL-DevOps-Instruct / training_metadata.json
Ennon's picture
Gemma 2 9B DevOps - Polish finetuned model
9ca9e8b verified
{
"model_name": "google/gemma-2-9b-it",
"display_name": "Gemma 2 9B (BF16, Batch16 MaxSafe)",
"timestamp": "2026-02-04T13:14:38.929340",
"training_config": {
"num_train_epochs": 1,
"per_device_train_batch_size": 16,
"gradient_accumulation_steps": 6,
"learning_rate": 5e-05,
"warmup_ratio": 0.03,
"lr_scheduler_type": "cosine",
"weight_decay": 0.01,
"max_seq_length": 2048,
"logging_steps": 25,
"eval_steps": 100,
"save_steps": 200,
"seed": 42,
"bf16": true,
"optim": "adamw_torch_fused",
"dataloader_num_workers": 8,
"torch_compile": false
},
"lora_config": {
"r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"bias": "none",
"task_type": "CAUSAL_LM"
},
"train_loss": 0.6173567452229245,
"train_samples": 170305,
"val_samples": 8965,
"train_time_minutes": 666.9830995202065,
"max_memory_gb": 77.72561597824097,
"fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96"
}