assignment2-artifacts / part1_sft_metadata.json
Pritish92's picture
Upload Assignment 2 artifacts
8f75784 verified
raw
history blame contribute delete
746 Bytes
{
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
"dataset_id": "medalpaca/medical_meadow_medqa",
"train_rows": 6106,
"val_rows": 2036,
"max_length": 768,
"train_batch_size": 4,
"eval_batch_size": 16,
"gradient_accumulation_steps": 16,
"effective_batch_size": 64,
"learning_rate": 0.0002,
"weight_decay": 0.0,
"num_epochs": 3,
"warmup_ratio": 0.03,
"early_stopping_patience": 1,
"lora_rank": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
],
"train_runtime_seconds": 999.5917,
"train_samples_per_second": 18.325,
"adapter_dir": "/root/SafeGenAI/work/models/model_sft_lora_adapter"
}