harpreetmann commited on
Commit
f380db9
·
verified ·
1 Parent(s): c5d0e53

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -25,13 +25,13 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
28
  "v_proj",
29
- "q_proj",
30
  "down_proj",
 
31
  "up_proj",
32
- "gate_proj",
33
- "o_proj",
34
- "k_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "gate_proj",
29
+ "k_proj",
30
  "v_proj",
 
31
  "down_proj",
32
+ "q_proj",
33
  "up_proj",
34
+ "o_proj"
 
 
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce46b99801974dee4d50e7762369c85db39e3719f7e13f39c7757ed0201a65c
3
  size 664584480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4fd2f2f24b663f9f3697c74aa5c31dd0b09aa20161cfd75d3472b50e1bbf472
3
  size 664584480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a59b6490efd84469e8acf94b22f2093427cf545527a325d87b0512d1162e4bb2
3
  size 1329377575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cfd1aad66e313c4b2faa3c98a87e667e8808b634d9d98b7f19bc2a73239fdec
3
  size 1329377575
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:447f6d9c3def923b2023bfae8d2c470e245de58e058e98ae4722cc77fe074f8b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:012319d9d7b07efb800bfdc5b30f3b33091204a1f615665fe2368e0bd6978503
3
  size 14645
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 100,
3
- "best_metric": 0.09546195715665817,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
@@ -10,108 +10,108 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 2.4611108183860777,
14
  "epoch": 0.3418803418803419,
15
- "grad_norm": 6.039472579956055,
16
  "learning_rate": 8.389830508474577e-06,
17
- "loss": 0.3829,
18
- "mean_token_accuracy": 0.8768418416380882,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
- "eval_entropy": 2.3045855587364263,
25
- "eval_loss": 0.14144526422023773,
26
- "eval_mean_token_accuracy": 0.9522208352374215,
27
  "eval_num_tokens": 113164.0,
28
- "eval_runtime": 46.5777,
29
- "eval_samples_per_second": 39.933,
30
- "eval_steps_per_second": 2.512,
31
  "step": 20
32
  },
33
  {
34
- "entropy": 2.3063884317874908,
35
  "epoch": 0.6837606837606838,
36
- "grad_norm": 2.049039840698242,
37
  "learning_rate": 6.694915254237288e-06,
38
- "loss": 0.1361,
39
- "mean_token_accuracy": 0.9577587321400642,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
- "eval_entropy": 2.2767278709982195,
46
- "eval_loss": 0.11506718397140503,
47
- "eval_mean_token_accuracy": 0.9626757140852448,
48
  "eval_num_tokens": 225335.0,
49
- "eval_runtime": 45.3833,
50
- "eval_samples_per_second": 40.984,
51
- "eval_steps_per_second": 2.578,
52
  "step": 40
53
  },
54
  {
55
- "entropy": 2.299742362438104,
56
  "epoch": 1.017094017094017,
57
- "grad_norm": 2.2758777141571045,
58
  "learning_rate": 5e-06,
59
- "loss": 0.1132,
60
- "mean_token_accuracy": 0.9659271622315432,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
- "eval_entropy": 2.2947008232785087,
67
- "eval_loss": 0.10887181758880615,
68
- "eval_mean_token_accuracy": 0.9646475982462239,
69
  "eval_num_tokens": 330390.0,
70
- "eval_runtime": 45.9184,
71
- "eval_samples_per_second": 40.507,
72
- "eval_steps_per_second": 2.548,
73
  "step": 60
74
  },
75
  {
76
- "entropy": 2.277792435884476,
77
  "epoch": 1.358974358974359,
78
- "grad_norm": 2.237907886505127,
79
  "learning_rate": 3.305084745762712e-06,
80
- "loss": 0.0848,
81
- "mean_token_accuracy": 0.9723074913024903,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
- "eval_entropy": 2.2612023394331975,
88
- "eval_loss": 0.10450778901576996,
89
- "eval_mean_token_accuracy": 0.9650997463454548,
90
  "eval_num_tokens": 440357.0,
91
- "eval_runtime": 46.2067,
92
- "eval_samples_per_second": 40.254,
93
- "eval_steps_per_second": 2.532,
94
  "step": 80
95
  },
96
  {
97
- "entropy": 2.2707396924495695,
98
  "epoch": 1.7008547008547008,
99
- "grad_norm": 1.7053431272506714,
100
  "learning_rate": 1.6101694915254237e-06,
101
  "loss": 0.0715,
102
- "mean_token_accuracy": 0.9734432741999626,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
- "eval_entropy": 2.2446225402701616,
109
- "eval_loss": 0.09546195715665817,
110
- "eval_mean_token_accuracy": 0.9683777588045496,
111
  "eval_num_tokens": 552807.0,
112
- "eval_runtime": 45.8097,
113
- "eval_samples_per_second": 40.603,
114
- "eval_steps_per_second": 2.554,
115
  "step": 100
116
  }
117
  ],
 
1
  {
2
  "best_global_step": 100,
3
+ "best_metric": 0.09543681889772415,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.4621588349342347,
14
  "epoch": 0.3418803418803419,
15
+ "grad_norm": 6.012325763702393,
16
  "learning_rate": 8.389830508474577e-06,
17
+ "loss": 0.3823,
18
+ "mean_token_accuracy": 0.8764635115861893,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
+ "eval_entropy": 2.307236980169247,
25
+ "eval_loss": 0.1412736475467682,
26
+ "eval_mean_token_accuracy": 0.9523680826537629,
27
  "eval_num_tokens": 113164.0,
28
+ "eval_runtime": 46.9847,
29
+ "eval_samples_per_second": 39.587,
30
+ "eval_steps_per_second": 2.49,
31
  "step": 20
32
  },
33
  {
34
+ "entropy": 2.30367816388607,
35
  "epoch": 0.6837606837606838,
36
+ "grad_norm": 2.036860466003418,
37
  "learning_rate": 6.694915254237288e-06,
38
+ "loss": 0.1364,
39
+ "mean_token_accuracy": 0.9568088531494141,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
+ "eval_entropy": 2.2767869560127583,
46
+ "eval_loss": 0.11478288471698761,
47
+ "eval_mean_token_accuracy": 0.9625477276296697,
48
  "eval_num_tokens": 225335.0,
49
+ "eval_runtime": 45.5897,
50
+ "eval_samples_per_second": 40.799,
51
+ "eval_steps_per_second": 2.566,
52
  "step": 40
53
  },
54
  {
55
+ "entropy": 2.2971509970151462,
56
  "epoch": 1.017094017094017,
57
+ "grad_norm": 2.243170976638794,
58
  "learning_rate": 5e-06,
59
+ "loss": 0.1134,
60
+ "mean_token_accuracy": 0.966560884928092,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
+ "eval_entropy": 2.289694781996246,
67
+ "eval_loss": 0.10902266204357147,
68
+ "eval_mean_token_accuracy": 0.9641746343710483,
69
  "eval_num_tokens": 330390.0,
70
+ "eval_runtime": 46.8001,
71
+ "eval_samples_per_second": 39.744,
72
+ "eval_steps_per_second": 2.5,
73
  "step": 60
74
  },
75
  {
76
+ "entropy": 2.272606986761093,
77
  "epoch": 1.358974358974359,
78
+ "grad_norm": 2.2923057079315186,
79
  "learning_rate": 3.305084745762712e-06,
80
+ "loss": 0.0845,
81
+ "mean_token_accuracy": 0.9724922418594361,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
+ "eval_entropy": 2.2530585782140746,
88
+ "eval_loss": 0.1047038808465004,
89
+ "eval_mean_token_accuracy": 0.9652910543303205,
90
  "eval_num_tokens": 440357.0,
91
+ "eval_runtime": 46.2572,
92
+ "eval_samples_per_second": 40.21,
93
+ "eval_steps_per_second": 2.529,
94
  "step": 80
95
  },
96
  {
97
+ "entropy": 2.26450654566288,
98
  "epoch": 1.7008547008547008,
99
+ "grad_norm": 1.6888355016708374,
100
  "learning_rate": 1.6101694915254237e-06,
101
  "loss": 0.0715,
102
+ "mean_token_accuracy": 0.9734723582863808,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
+ "eval_entropy": 2.240677540118878,
109
+ "eval_loss": 0.09543681889772415,
110
+ "eval_mean_token_accuracy": 0.9683268676456224,
111
  "eval_num_tokens": 552807.0,
112
+ "eval_runtime": 46.1331,
113
+ "eval_samples_per_second": 40.318,
114
+ "eval_steps_per_second": 2.536,
115
  "step": 100
116
  }
117
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:062d76cca895bbae1ddc531b525e8d06e13c9a529c7c97a8a71d7d66d737ee28
3
- size 6225
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9399b718a9dab6f82ca03abb475407342319536237c41ccfc6081473e94f69b
3
+ size 6289