AdityaNarayan commited on
Commit
3b84d4a
·
verified ·
1 Parent(s): f33ce46

deleted file

Browse files
Files changed (1) hide show
  1. training_info.json +0 -86
training_info.json DELETED
@@ -1,86 +0,0 @@
1
- {
2
- "training_metadata": {
3
- "timestamp": "20251030_133401",
4
- "training_date": "2025-10-30",
5
- "training_time": "19:48:49",
6
- "final_epoch": 2.321353065539112,
7
- "total_steps": 550,
8
- "status": "completed"
9
- },
10
- "model_config": {
11
- "base_model": "Kwaipilot/KAT-Dev-72B-Exp",
12
- "model_type": "causal_lm",
13
- "architecture": "Qwen2ForCausalLM"
14
- },
15
- "lora_config": {
16
- "r": 64,
17
- "lora_alpha": 128,
18
- "lora_dropout": 0.05,
19
- "target_modules": [
20
- "q_proj",
21
- "k_proj",
22
- "v_proj",
23
- "o_proj",
24
- "gate_proj",
25
- "up_proj",
26
- "down_proj"
27
- ]
28
- },
29
- "training_config": {
30
- "num_epochs": 3,
31
- "per_device_train_batch_size": 2,
32
- "per_device_eval_batch_size": 2,
33
- "gradient_accumulation_steps": 8,
34
- "effective_batch_size": 64,
35
- "learning_rate": 5e-05,
36
- "lr_scheduler_type": "cosine",
37
- "warmup_ratio": 0.03,
38
- "weight_decay": 0.1,
39
- "max_grad_norm": 0.5,
40
- "bf16": true,
41
- "gradient_checkpointing": true,
42
- "optim": "adamw_torch",
43
- "logging_steps": 10,
44
- "save_steps": 50,
45
- "eval_steps": 25
46
- },
47
- "dataset_info": {
48
- "train_samples": 15129,
49
- "eval_samples": 1682,
50
- "max_seq_length": 8192,
51
- "sample_packing": false
52
- },
53
- "hardware_config": {
54
- "num_gpus": 4,
55
- "gpu_model": "NVIDIA H200",
56
- "distributed_strategy": "DeepSpeed ZeRO-2",
57
- "flash_attention": "2.8.3"
58
- },
59
- "performance_metrics": {
60
- "final_train_loss": 0.2793,
61
- "final_eval_loss": 0.3765236437320709,
62
- "final_train_perplexity": 1.322203945559979,
63
- "final_eval_perplexity": 1.457209992899547,
64
- "final_token_accuracy": 0.9227368004620076,
65
- "initial_loss": 1.6654,
66
- "initial_perplexity": 5.2877879419709135,
67
- "initial_accuracy": 0.6416946474462748
68
- },
69
- "framework_versions": {
70
- "torch": "2.9.0+cu128",
71
- "transformers": "4.57.1",
72
- "peft": "0.17.1",
73
- "trl": "0.24.0",
74
- "deepspeed": "0.18.1",
75
- "flash_attn": "2.8.3",
76
- "python": "3.12.3"
77
- },
78
- "special_features": {
79
- "flash_attention_2": true,
80
- "gradient_checkpointing": true,
81
- "bf16_training": true,
82
- "sample_packing": false,
83
- "deepspeed_zero2": true,
84
- "distributed_training": true
85
- }
86
- }