llama_cot_instruction_R1 / training_metadata.json
gboxo's picture
Upload folder using huggingface_hub
21d71ad verified
{
"model_name": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
"dataset_path": "transformed_rollouts",
"dataset_samples": 440,
"lora_config": {
"r": 8,
"alpha": 16,
"dropout": 0.1,
"target_modules": [
"k_proj",
"o_proj",
"gate_proj",
"up_proj",
"v_proj",
"down_proj",
"q_proj"
]
},
"training_config": {
"learning_rate": 0.0001,
"num_epochs": 3,
"batch_size": 1,
"gradient_accumulation_steps": 4,
"max_length": 1024,
"disable_cache": true
},
"dataset_info": {
"source": "transformed_rollouts",
"description": "Various instruction types and reasoning patterns from transformed rollouts"
}
}