AdityaNarayan's picture
Upload 7 files
0c735e1 verified
{
"model": {
"base_model": "zai-org/GLM-4.6",
"final_model_path": "outputs_fsdp/final_model"
},
"training_config": {
"lora_r": 64,
"lora_alpha": 128,
"lora_dropout": 0.05,
"lora_target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj"
],
"learning_rate": 2.5e-05,
"lr_scheduler_type": "cosine",
"micro_batch_size": 1,
"gradient_accumulation_steps": 2,
"effective_batch_size": 32,
"sequence_length": 16384,
"chunk_overlap": 2048,
"weight_decay": 0.01,
"max_grad_norm": 1.0,
"warmup_ratio": 0.1,
"eval_split": 0.05,
"bf16": true,
"seed": 42
},
"hardware": {
"num_gpus": 16,
"gpu_name": "NVIDIA H200",
"num_nodes": 1,
"gpus_per_node": 8
},
"phases": [
{
"phase": 1,
"name": "phase1_foundation",
"description": "Foundation: Learn codebase structure and file patterns",
"dataset": "dataset/phase1_foundation.jsonl",
"epochs": 2,
"learning_rate": 2.5e-05,
"warmup_ratio": 0.15,
"num_train_samples": 9293,
"num_eval_samples": 512,
"num_chunks": 9805,
"train_metrics": {
"train_runtime": 116260.86471509933,
"train_runtime_minutes": 1937.6810785849889,
"train_steps": 581,
"train_loss": 0.6090635275339993,
"train_perplexity": 1.838708692210649,
"samples_per_second": 0.1598646289578659,
"steps_per_second": 0.004997382407431405
},
"eval_metrics": {
"eval_loss": 0.34885369252151577,
"eval_perplexity": 1.4174417928358451,
"eval_accuracy": 90.55727554179566,
"best_eval_loss": 0.34950478435712284
}
},
{
"phase": 2,
"name": "phase2_evolution",
"description": "Evolution: Learn commit patterns and code changes",
"dataset": "dataset/phase2_evolution.jsonl",
"epochs": 2,
"learning_rate": 2e-05,
"warmup_ratio": 0.1,
"num_train_samples": 16622,
"num_eval_samples": 1545,
"num_chunks": 18167,
"train_metrics": {
"train_runtime": 232094.11823368073,
"train_runtime_minutes": 3868.235303894679,
"train_steps": 1039,
"train_loss": 0.7480631428085002,
"train_perplexity": 2.112903658217297,
"samples_per_second": 0.14323499558282102,
"steps_per_second": 0.004476632186576557
},
"eval_metrics": {
"eval_loss": 2.45627436399119,
"eval_perplexity": 11.661284805363318,
"eval_accuracy": 42.272774071154785,
"best_eval_loss": 2.456274959661988
}
},
{
"phase": 3,
"name": "phase3_pr_mastery",
"description": "PR Mastery: Learn PR review patterns and discussions",
"dataset": "dataset/phase3_pr_mastery.jsonl",
"epochs": 1,
"learning_rate": 1.5e-05,
"warmup_ratio": 0.05,
"num_train_samples": 9797,
"num_eval_samples": 509,
"num_chunks": 10306,
"train_metrics": {
"train_runtime": 63952.77484560013,
"train_runtime_minutes": 1065.8795807600022,
"train_steps": 306,
"train_loss": 0.4651245652436236,
"train_perplexity": 1.592212510874149,
"samples_per_second": 0.15319116369309535,
"steps_per_second": 0.004784780656332263
},
"eval_metrics": {
"eval_loss": 0.47181596884547616,
"eval_perplexity": 1.6029023726075684,
"eval_accuracy": 90.83844610286057,
"best_eval_loss": 0.47182859617532813
}
}
],
"phase_checkpoints": [
"outputs_fsdp/phase1_foundation/final",
"outputs_fsdp/phase2_evolution/final",
"outputs_fsdp/phase3_pr_mastery/final"
],
"summary": {
"initial_train_loss": 0.6090635275339993,
"final_train_loss": 0.4651245652436236,
"initial_eval_loss": 0.34885369252151577,
"final_eval_loss": 0.47181596884547616,
"initial_perplexity": 1.4174417928358451,
"final_perplexity": 1.6029023726075684,
"total_epochs": 5,
"total_phases": 3,
"total_steps": 1926,
"total_training_time_seconds": 419524.9958562851,
"total_training_time_hours": 116.5347210711903
},
"timestamp": "20251214_065303",
"run_name": "glm-curriculum-16gpu",
"output_directory": "outputs_fsdp"
}