harpreetmann commited on
Commit
2d15b6f
·
verified ·
1 Parent(s): 4220108

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -206,4 +206,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
206
  [More Information Needed]
207
  ### Framework versions
208
 
209
- - PEFT 0.17.1
 
206
  [More Information Needed]
207
  ### Framework versions
208
 
209
+ - PEFT 0.18.0
adapter_config.json CHANGED
@@ -1,9 +1,12 @@
1
  {
 
2
  "alpha_pattern": {},
 
3
  "auto_mapping": null,
4
  "base_model_name_or_path": "google/gemma-2-2b",
5
  "bias": "none",
6
  "corda_config": null,
 
7
  "eva_config": null,
8
  "exclude_modules": null,
9
  "fan_in_fan_out": false,
@@ -20,18 +23,19 @@
20
  "megatron_core": "megatron.core",
21
  "modules_to_save": null,
22
  "peft_type": "LORA",
 
23
  "qalora_group_size": 16,
24
  "r": 128,
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "k_proj",
29
- "up_proj",
30
  "down_proj",
31
  "q_proj",
 
32
  "o_proj",
33
- "v_proj",
34
- "gate_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
1
  {
2
+ "alora_invocation_tokens": null,
3
  "alpha_pattern": {},
4
+ "arrow_config": null,
5
  "auto_mapping": null,
6
  "base_model_name_or_path": "google/gemma-2-2b",
7
  "bias": "none",
8
  "corda_config": null,
9
+ "ensure_weight_tying": false,
10
  "eva_config": null,
11
  "exclude_modules": null,
12
  "fan_in_fan_out": false,
 
23
  "megatron_core": "megatron.core",
24
  "modules_to_save": null,
25
  "peft_type": "LORA",
26
+ "peft_version": "0.18.0",
27
  "qalora_group_size": 16,
28
  "r": 128,
29
  "rank_pattern": {},
30
  "revision": null,
31
  "target_modules": [
32
+ "gate_proj",
 
33
  "down_proj",
34
  "q_proj",
35
+ "k_proj",
36
  "o_proj",
37
+ "up_proj",
38
+ "v_proj"
39
  ],
40
  "target_parameters": null,
41
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:942c4e792a20d5d36d62e57ecc20b664777946d0835a9271383afd5e99b85f11
3
  size 664584480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b06721a9b3d61c6c0c66e2744028ccd466f233ba8b323a55d8f740451ae2c850
3
  size 664584480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2373cf17766c2fbe6c76d2c61a20aec8a4ac34fb5d9556819e6fb72699a31531
3
  size 1329377575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2181f8034cbfc2dbfadd605470f68b73ba590be0a8c4032f888499a4f6444e54
3
  size 1329377575
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:012319d9d7b07efb800bfdc5b30f3b33091204a1f615665fe2368e0bd6978503
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:447f6d9c3def923b2023bfae8d2c470e245de58e058e98ae4722cc77fe074f8b
3
  size 14645
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 100,
3
- "best_metric": 0.09553248435258865,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
@@ -10,108 +10,108 @@
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 2.4642674922943115,
14
  "epoch": 0.3418803418803419,
15
- "grad_norm": 6.1703619956970215,
16
  "learning_rate": 8.389830508474577e-06,
17
- "loss": 0.3828,
18
- "mean_token_accuracy": 0.875461021065712,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
- "eval_entropy": 2.313392945843884,
25
- "eval_loss": 0.1408257782459259,
26
- "eval_mean_token_accuracy": 0.9526278610922333,
27
  "eval_num_tokens": 113164.0,
28
- "eval_runtime": 46.6856,
29
- "eval_samples_per_second": 39.841,
30
- "eval_steps_per_second": 2.506,
31
  "step": 20
32
  },
33
  {
34
- "entropy": 2.3076194286346436,
35
  "epoch": 0.6837606837606838,
36
- "grad_norm": 2.0425662994384766,
37
  "learning_rate": 6.694915254237288e-06,
38
  "loss": 0.1357,
39
- "mean_token_accuracy": 0.9569604843854904,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
- "eval_entropy": 2.276767115307669,
46
- "eval_loss": 0.1144598051905632,
47
- "eval_mean_token_accuracy": 0.9625413275172567,
48
  "eval_num_tokens": 225335.0,
49
- "eval_runtime": 45.4774,
50
- "eval_samples_per_second": 40.899,
51
- "eval_steps_per_second": 2.573,
52
  "step": 40
53
  },
54
  {
55
- "entropy": 2.298072344217545,
56
  "epoch": 1.017094017094017,
57
- "grad_norm": 2.246678113937378,
58
  "learning_rate": 5e-06,
59
  "loss": 0.113,
60
- "mean_token_accuracy": 0.9657873175083063,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
- "eval_entropy": 2.2912978331247964,
67
- "eval_loss": 0.10871552675962448,
68
- "eval_mean_token_accuracy": 0.9649902301975805,
69
  "eval_num_tokens": 330390.0,
70
- "eval_runtime": 46.0256,
71
- "eval_samples_per_second": 40.412,
72
- "eval_steps_per_second": 2.542,
73
  "step": 60
74
  },
75
  {
76
- "entropy": 2.27278618812561,
77
  "epoch": 1.358974358974359,
78
- "grad_norm": 2.236058473587036,
79
  "learning_rate": 3.305084745762712e-06,
80
- "loss": 0.0845,
81
- "mean_token_accuracy": 0.9728620991110801,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
- "eval_entropy": 2.254611888502398,
88
- "eval_loss": 0.10490305721759796,
89
- "eval_mean_token_accuracy": 0.965580604524694,
90
  "eval_num_tokens": 440357.0,
91
- "eval_runtime": 46.2372,
92
- "eval_samples_per_second": 40.227,
93
- "eval_steps_per_second": 2.53,
94
  "step": 80
95
  },
96
  {
97
- "entropy": 2.2653892546892167,
98
  "epoch": 1.7008547008547008,
99
- "grad_norm": 1.7268085479736328,
100
  "learning_rate": 1.6101694915254237e-06,
101
- "loss": 0.0715,
102
- "mean_token_accuracy": 0.9734208568930626,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
- "eval_entropy": 2.2389834895093217,
109
- "eval_loss": 0.09553248435258865,
110
- "eval_mean_token_accuracy": 0.9684329369129279,
111
  "eval_num_tokens": 552807.0,
112
- "eval_runtime": 46.1644,
113
- "eval_samples_per_second": 40.291,
114
- "eval_steps_per_second": 2.534,
115
  "step": 100
116
  }
117
  ],
 
1
  {
2
  "best_global_step": 100,
3
+ "best_metric": 0.09561321139335632,
4
  "best_model_checkpoint": "/content/models/gemma_qlora_lmh/checkpoint-100",
5
  "epoch": 1.7008547008547008,
6
  "eval_steps": 20,
 
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.458172196149826,
14
  "epoch": 0.3418803418803419,
15
+ "grad_norm": 6.135525226593018,
16
  "learning_rate": 8.389830508474577e-06,
17
+ "loss": 0.3827,
18
+ "mean_token_accuracy": 0.8760345175862312,
19
  "num_tokens": 113164.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.3418803418803419,
24
+ "eval_entropy": 2.299590313536489,
25
+ "eval_loss": 0.14134672284126282,
26
+ "eval_mean_token_accuracy": 0.9523653464439588,
27
  "eval_num_tokens": 113164.0,
28
+ "eval_runtime": 46.8429,
29
+ "eval_samples_per_second": 39.707,
30
+ "eval_steps_per_second": 2.498,
31
  "step": 20
32
  },
33
  {
34
+ "entropy": 2.298478972911835,
35
  "epoch": 0.6837606837606838,
36
+ "grad_norm": 2.0751421451568604,
37
  "learning_rate": 6.694915254237288e-06,
38
  "loss": 0.1357,
39
+ "mean_token_accuracy": 0.9575570523738861,
40
  "num_tokens": 225335.0,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 0.6837606837606838,
45
+ "eval_entropy": 2.2715458065016656,
46
+ "eval_loss": 0.11509539932012558,
47
+ "eval_mean_token_accuracy": 0.9629310033260248,
48
  "eval_num_tokens": 225335.0,
49
+ "eval_runtime": 45.696,
50
+ "eval_samples_per_second": 40.704,
51
+ "eval_steps_per_second": 2.56,
52
  "step": 40
53
  },
54
  {
55
+ "entropy": 2.295832566725902,
56
  "epoch": 1.017094017094017,
57
+ "grad_norm": 2.188286542892456,
58
  "learning_rate": 5e-06,
59
  "loss": 0.113,
60
+ "mean_token_accuracy": 0.9653458717541817,
61
  "num_tokens": 330390.0,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 1.017094017094017,
66
+ "eval_entropy": 2.2908162163873005,
67
+ "eval_loss": 0.10838180035352707,
68
+ "eval_mean_token_accuracy": 0.9647057086993487,
69
  "eval_num_tokens": 330390.0,
70
+ "eval_runtime": 46.2535,
71
+ "eval_samples_per_second": 40.213,
72
+ "eval_steps_per_second": 2.53,
73
  "step": 60
74
  },
75
  {
76
+ "entropy": 2.271016186475754,
77
  "epoch": 1.358974358974359,
78
+ "grad_norm": 2.2554891109466553,
79
  "learning_rate": 3.305084745762712e-06,
80
+ "loss": 0.0848,
81
+ "mean_token_accuracy": 0.9718978926539421,
82
  "num_tokens": 440357.0,
83
  "step": 80
84
  },
85
  {
86
  "epoch": 1.358974358974359,
87
+ "eval_entropy": 2.254208923405052,
88
+ "eval_loss": 0.10406262427568436,
89
+ "eval_mean_token_accuracy": 0.9654262356269054,
90
  "eval_num_tokens": 440357.0,
91
+ "eval_runtime": 46.3191,
92
+ "eval_samples_per_second": 40.156,
93
+ "eval_steps_per_second": 2.526,
94
  "step": 80
95
  },
96
  {
97
+ "entropy": 2.2658998131752015,
98
  "epoch": 1.7008547008547008,
99
+ "grad_norm": 1.6946748495101929,
100
  "learning_rate": 1.6101694915254237e-06,
101
+ "loss": 0.0716,
102
+ "mean_token_accuracy": 0.9734383270144462,
103
  "num_tokens": 552807.0,
104
  "step": 100
105
  },
106
  {
107
  "epoch": 1.7008547008547008,
108
+ "eval_entropy": 2.2408512260159874,
109
+ "eval_loss": 0.09561321139335632,
110
+ "eval_mean_token_accuracy": 0.9683785734013615,
111
  "eval_num_tokens": 552807.0,
112
+ "eval_runtime": 47.0074,
113
+ "eval_samples_per_second": 39.568,
114
+ "eval_steps_per_second": 2.489,
115
  "step": 100
116
  }
117
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8f974810c7f4f0af8e66ac9807b37a99c6690f3fbac636ea7560f6e4b434eb1
3
  size 6289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:145c7bf7d5850bcddd7a14f18529815a5613136bdd82409c2bf849d5a8d3cdd4
3
  size 6289