{ "training_metadata": { "timestamp": "20251016_174948", "training_date": "2025-10-17", "training_time": "00:42:52", "final_epoch": 3.0382470119521914, "total_steps": 1431, "status": "completed" }, "model_config": { "base_model": "Qwen/Qwen2.5-Coder-32B-Instruct", "model_type": "causal_lm", "architecture": "Qwen2ForCausalLM" }, "lora_config": { "r": 64, "lora_alpha": 128, "lora_dropout": 0.05, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ] }, "training_config": { "num_epochs": 5, "per_device_train_batch_size": 2, "per_device_eval_batch_size": 1, "gradient_accumulation_steps": 8, "effective_batch_size": 32, "learning_rate": 5e-05, "lr_scheduler_type": "cosine", "warmup_ratio": 0.02, "weight_decay": 0.1, "max_grad_norm": 0.5, "bf16": true, "gradient_checkpointing": true, "optim": "adamw_torch", "logging_steps": 10, "save_steps": 50, "eval_steps": 25 }, "dataset_info": { "train_samples": 15057, "eval_samples": 1674, "max_seq_length": 8192, "sample_packing": false }, "hardware_config": { "num_gpus": 2, "gpu_model": "Unknown", "distributed_strategy": "DeepSpeed ZeRO-2", "flash_attention": "2.8.3" }, "performance_metrics": { "final_train_loss": 0.3949, "final_eval_loss": 0.4636613428592682, "final_train_perplexity": 1.4842357599234954, "final_eval_perplexity": 1.5898844535357601, "final_token_accuracy": 0.8872479304671288, "initial_loss": 1.724, "initial_perplexity": 5.606911313988792, "initial_accuracy": 0.5987553134560585 }, "framework_versions": { "torch": "2.4.1+cu124", "transformers": "4.57.1", "peft": "0.17.1", "trl": "0.23.1", "deepspeed": "0.18.0", "flash_attn": "2.8.3", "python": "3.12.3" }, "special_features": { "flash_attention_2": true, "gradient_checkpointing": true, "bf16_training": true, "sample_packing": false, "deepspeed_zero2": true, "distributed_training": true } }