harpreetmann commited on
Commit
4220108
·
verified ·
1 Parent(s): f380db9

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -25,13 +25,13 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "gate_proj",
29
  "k_proj",
30
- "v_proj",
31
  "down_proj",
32
  "q_proj",
33
- "up_proj",
34
- "o_proj"
 
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
28
  "k_proj",
29
+ "up_proj",
30
  "down_proj",
31
  "q_proj",
32
+ "o_proj",
33
+ "v_proj",
34
+ "gate_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4fd2f2f24b663f9f3697c74aa5c31dd0b09aa20161cfd75d3472b50e1bbf472
3
  size 664584480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:942c4e792a20d5d36d62e57ecc20b664777946d0835a9271383afd5e99b85f11
3
  size 664584480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cfd1aad66e313c4b2faa3c98a87e667e8808b634d9d98b7f19bc2a73239fdec
3
  size 1329377575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2373cf17766c2fbe6c76d2c61a20aec8a4ac34fb5d9556819e6fb72699a31531
3
  size 1329377575
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 100,
3
- "best_metric": 0.09543681889772415,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
@@ -10,108 +10,108 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 2.4621588349342347,
14
  "epoch": 0.3418803418803419,
15
- "grad_norm": 6.012325763702393,
16
  "learning_rate": 8.389830508474577e-06,
17
- "loss": 0.3823,
18
- "mean_token_accuracy": 0.8764635115861893,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
- "eval_entropy": 2.307236980169247,
25
- "eval_loss": 0.1412736475467682,
26
- "eval_mean_token_accuracy": 0.9523680826537629,
27
  "eval_num_tokens": 113164.0,
28
- "eval_runtime": 46.9847,
29
- "eval_samples_per_second": 39.587,
30
- "eval_steps_per_second": 2.49,
31
  "step": 20
32
  },
33
  {
34
- "entropy": 2.30367816388607,
35
  "epoch": 0.6837606837606838,
36
- "grad_norm": 2.036860466003418,
37
  "learning_rate": 6.694915254237288e-06,
38
- "loss": 0.1364,
39
- "mean_token_accuracy": 0.9568088531494141,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
- "eval_entropy": 2.2767869560127583,
46
- "eval_loss": 0.11478288471698761,
47
- "eval_mean_token_accuracy": 0.9625477276296697,
48
  "eval_num_tokens": 225335.0,
49
- "eval_runtime": 45.5897,
50
- "eval_samples_per_second": 40.799,
51
- "eval_steps_per_second": 2.566,
52
  "step": 40
53
  },
54
  {
55
- "entropy": 2.2971509970151462,
56
  "epoch": 1.017094017094017,
57
- "grad_norm": 2.243170976638794,
58
  "learning_rate": 5e-06,
59
- "loss": 0.1134,
60
- "mean_token_accuracy": 0.966560884928092,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
- "eval_entropy": 2.289694781996246,
67
- "eval_loss": 0.10902266204357147,
68
- "eval_mean_token_accuracy": 0.9641746343710483,
69
  "eval_num_tokens": 330390.0,
70
- "eval_runtime": 46.8001,
71
- "eval_samples_per_second": 39.744,
72
- "eval_steps_per_second": 2.5,
73
  "step": 60
74
  },
75
  {
76
- "entropy": 2.272606986761093,
77
  "epoch": 1.358974358974359,
78
- "grad_norm": 2.2923057079315186,
79
  "learning_rate": 3.305084745762712e-06,
80
  "loss": 0.0845,
81
- "mean_token_accuracy": 0.9724922418594361,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
- "eval_entropy": 2.2530585782140746,
88
- "eval_loss": 0.1047038808465004,
89
- "eval_mean_token_accuracy": 0.9652910543303205,
90
  "eval_num_tokens": 440357.0,
91
- "eval_runtime": 46.2572,
92
- "eval_samples_per_second": 40.21,
93
- "eval_steps_per_second": 2.529,
94
  "step": 80
95
  },
96
  {
97
- "entropy": 2.26450654566288,
98
  "epoch": 1.7008547008547008,
99
- "grad_norm": 1.6888355016708374,
100
  "learning_rate": 1.6101694915254237e-06,
101
  "loss": 0.0715,
102
- "mean_token_accuracy": 0.9734723582863808,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
- "eval_entropy": 2.240677540118878,
109
- "eval_loss": 0.09543681889772415,
110
- "eval_mean_token_accuracy": 0.9683268676456224,
111
  "eval_num_tokens": 552807.0,
112
- "eval_runtime": 46.1331,
113
- "eval_samples_per_second": 40.318,
114
- "eval_steps_per_second": 2.536,
115
  "step": 100
116
  }
117
  ],
 
1
  {
2
  "best_global_step": 100,
3
+ "best_metric": 0.09553248435258865,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.4642674922943115,
14
  "epoch": 0.3418803418803419,
15
+ "grad_norm": 6.1703619956970215,
16
  "learning_rate": 8.389830508474577e-06,
17
+ "loss": 0.3828,
18
+ "mean_token_accuracy": 0.875461021065712,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
+ "eval_entropy": 2.313392945843884,
25
+ "eval_loss": 0.1408257782459259,
26
+ "eval_mean_token_accuracy": 0.9526278610922333,
27
  "eval_num_tokens": 113164.0,
28
+ "eval_runtime": 46.6856,
29
+ "eval_samples_per_second": 39.841,
30
+ "eval_steps_per_second": 2.506,
31
  "step": 20
32
  },
33
  {
34
+ "entropy": 2.3076194286346436,
35
  "epoch": 0.6837606837606838,
36
+ "grad_norm": 2.0425662994384766,
37
  "learning_rate": 6.694915254237288e-06,
38
+ "loss": 0.1357,
39
+ "mean_token_accuracy": 0.9569604843854904,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
+ "eval_entropy": 2.276767115307669,
46
+ "eval_loss": 0.1144598051905632,
47
+ "eval_mean_token_accuracy": 0.9625413275172567,
48
  "eval_num_tokens": 225335.0,
49
+ "eval_runtime": 45.4774,
50
+ "eval_samples_per_second": 40.899,
51
+ "eval_steps_per_second": 2.573,
52
  "step": 40
53
  },
54
  {
55
+ "entropy": 2.298072344217545,
56
  "epoch": 1.017094017094017,
57
+ "grad_norm": 2.246678113937378,
58
  "learning_rate": 5e-06,
59
+ "loss": 0.113,
60
+ "mean_token_accuracy": 0.9657873175083063,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
+ "eval_entropy": 2.2912978331247964,
67
+ "eval_loss": 0.10871552675962448,
68
+ "eval_mean_token_accuracy": 0.9649902301975805,
69
  "eval_num_tokens": 330390.0,
70
+ "eval_runtime": 46.0256,
71
+ "eval_samples_per_second": 40.412,
72
+ "eval_steps_per_second": 2.542,
73
  "step": 60
74
  },
75
  {
76
+ "entropy": 2.27278618812561,
77
  "epoch": 1.358974358974359,
78
+ "grad_norm": 2.236058473587036,
79
  "learning_rate": 3.305084745762712e-06,
80
  "loss": 0.0845,
81
+ "mean_token_accuracy": 0.9728620991110801,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
+ "eval_entropy": 2.254611888502398,
88
+ "eval_loss": 0.10490305721759796,
89
+ "eval_mean_token_accuracy": 0.965580604524694,
90
  "eval_num_tokens": 440357.0,
91
+ "eval_runtime": 46.2372,
92
+ "eval_samples_per_second": 40.227,
93
+ "eval_steps_per_second": 2.53,
94
  "step": 80
95
  },
96
  {
97
+ "entropy": 2.2653892546892167,
98
  "epoch": 1.7008547008547008,
99
+ "grad_norm": 1.7268085479736328,
100
  "learning_rate": 1.6101694915254237e-06,
101
  "loss": 0.0715,
102
+ "mean_token_accuracy": 0.9734208568930626,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
+ "eval_entropy": 2.2389834895093217,
109
+ "eval_loss": 0.09553248435258865,
110
+ "eval_mean_token_accuracy": 0.9684329369129279,
111
  "eval_num_tokens": 552807.0,
112
+ "eval_runtime": 46.1644,
113
+ "eval_samples_per_second": 40.291,
114
+ "eval_steps_per_second": 2.534,
115
  "step": 100
116
  }
117
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9399b718a9dab6f82ca03abb475407342319536237c41ccfc6081473e94f69b
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f974810c7f4f0af8e66ac9807b37a99c6690f3fbac636ea7560f6e4b434eb1
3
  size 6289