AdityaNarayan
/

KAT-Dev-72B-Exp-CPT-LoRA-Adapter-HyperSwitch

Text Generation

Model card Files Files and versions

AdityaNarayan commited on Nov 1, 2025

Commit

3b84d4a

·

verified ·

1 Parent(s): f33ce46

deleted file

Files changed (1) hide show

training_info.json +0 -86

training_info.json DELETED Viewed

@@ -1,86 +0,0 @@
-{
-  "training_metadata": {
-    "timestamp": "20251030_133401",
-    "training_date": "2025-10-30",
-    "training_time": "19:48:49",
-    "final_epoch": 2.321353065539112,
-    "total_steps": 550,
-    "status": "completed"
-  },
-  "model_config": {
-    "base_model": "Kwaipilot/KAT-Dev-72B-Exp",
-    "model_type": "causal_lm",
-    "architecture": "Qwen2ForCausalLM"
-  },
-  "lora_config": {
-    "r": 64,
-    "lora_alpha": 128,
-    "lora_dropout": 0.05,
-    "target_modules": [
-      "q_proj",
-      "k_proj",
-      "v_proj",
-      "o_proj",
-      "gate_proj",
-      "up_proj",
-      "down_proj"
-    ]
-  },
-  "training_config": {
-    "num_epochs": 3,
-    "per_device_train_batch_size": 2,
-    "per_device_eval_batch_size": 2,
-    "gradient_accumulation_steps": 8,
-    "effective_batch_size": 64,
-    "learning_rate": 5e-05,
-    "lr_scheduler_type": "cosine",
-    "warmup_ratio": 0.03,
-    "weight_decay": 0.1,
-    "max_grad_norm": 0.5,
-    "bf16": true,
-    "gradient_checkpointing": true,
-    "optim": "adamw_torch",
-    "logging_steps": 10,
-    "save_steps": 50,
-    "eval_steps": 25
-  },
-  "dataset_info": {
-    "train_samples": 15129,
-    "eval_samples": 1682,
-    "max_seq_length": 8192,
-    "sample_packing": false
-  },
-  "hardware_config": {
-    "num_gpus": 4,
-    "gpu_model": "NVIDIA H200",
-    "distributed_strategy": "DeepSpeed ZeRO-2",
-    "flash_attention": "2.8.3"
-  },
-  "performance_metrics": {
-    "final_train_loss": 0.2793,
-    "final_eval_loss": 0.3765236437320709,
-    "final_train_perplexity": 1.322203945559979,
-    "final_eval_perplexity": 1.457209992899547,
-    "final_token_accuracy": 0.9227368004620076,
-    "initial_loss": 1.6654,
-    "initial_perplexity": 5.2877879419709135,
-    "initial_accuracy": 0.6416946474462748
-  },
-  "framework_versions": {
-    "torch": "2.9.0+cu128",
-    "transformers": "4.57.1",
-    "peft": "0.17.1",
-    "trl": "0.24.0",
-    "deepspeed": "0.18.1",
-    "flash_attn": "2.8.3",
-    "python": "3.12.3"
-  },
-  "special_features": {
-    "flash_attention_2": true,
-    "gradient_checkpointing": true,
-    "bf16_training": true,
-    "sample_packing": false,
-    "deepspeed_zero2": true,
-    "distributed_training": true
-  }
-}