Hugging-GK commited on
Commit
034c7a9
·
verified ·
1 Parent(s): 940c1b4

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -25,13 +25,13 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "gate_proj",
29
- "down_proj",
30
  "o_proj",
31
- "up_proj",
 
32
  "k_proj",
 
33
  "q_proj",
34
- "v_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
 
 
28
  "o_proj",
29
+ "v_proj",
30
+ "gate_proj",
31
  "k_proj",
32
+ "down_proj",
33
  "q_proj",
34
+ "up_proj"
35
  ],
36
  "target_parameters": null,
37
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03cc0484974038386f70d77c46f5aac3b533c7bced6962979281fe333c94d025
3
  size 664584480
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79def310c55b37cd8259a02e3faace2907e9317b665c755507979147e8030cfd
3
  size 664584480
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875f3b2ab1fcb4f6e9f7c489f0befe3d16113bb96aa2b87fff811d3c44fc13d1
3
  size 1329377575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:428eb21aeeadb3f413515d892ed4fbbf367950def367306a9420106e1bd5f77a
3
  size 1329377575
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:326c0bca4395f2945822b3c5c26887d8851ac679204da1bb6c4d1f291262d7f1
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b9e176b8f2be84b6dc94b0764395d41a6cae49568e6336c00495dfadd4a8a56
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f2a49e35cdb54fb4cb47a212fa58478891c6773b4d4d385b309321fbef45523
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6302d42699f02b238748e313b2253d63e288a72f6ded5bf4047590e79ba04256
3
  size 1465
trainer_state.json CHANGED
@@ -1,34 +1,97 @@
1
  {
2
- "best_global_step": 20,
3
- "best_metric": 0.10567178577184677,
4
- "best_model_checkpoint": "/content/models/gemma_jigsaw_instruction/checkpoint-20",
5
- "epoch": 0.39408866995073893,
6
  "eval_steps": 20,
7
- "global_step": 20,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
- "entropy": 1.9691255465718627,
14
  "epoch": 0.39408866995073893,
15
- "grad_norm": 1.3069469928741455,
16
  "learning_rate": 8.758169934640524e-06,
17
- "loss": 0.1069,
18
- "mean_token_accuracy": 0.9541643948955391,
19
- "num_tokens": 626062.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.39408866995073893,
24
- "eval_entropy": 1.9961118835669298,
25
- "eval_loss": 0.10567178577184677,
26
- "eval_mean_token_accuracy": 0.9499198725590339,
27
- "eval_num_tokens": 626062.0,
28
- "eval_runtime": 4.3895,
29
- "eval_samples_per_second": 46.247,
30
- "eval_steps_per_second": 5.923,
31
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "logging_steps": 20,
@@ -48,7 +111,7 @@
48
  "attributes": {}
49
  }
50
  },
51
- "total_flos": 1404085029875712.0,
52
  "train_batch_size": 8,
53
  "trial_name": null,
54
  "trial_params": null
 
1
  {
2
+ "best_global_step": 80,
3
+ "best_metric": 0.10693139582872391,
4
+ "best_model_checkpoint": "/content/models/gemma_jigsaw_instruction/checkpoint-80",
5
+ "epoch": 1.5714285714285714,
6
  "eval_steps": 20,
7
+ "global_step": 80,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
+ "entropy": 2.0030554473400115,
14
  "epoch": 0.39408866995073893,
15
+ "grad_norm": 2.5103867053985596,
16
  "learning_rate": 8.758169934640524e-06,
17
+ "loss": 0.3751,
18
+ "mean_token_accuracy": 0.859375,
19
+ "num_tokens": 72532.0,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 0.39408866995073893,
24
+ "eval_entropy": 2.080273380646339,
25
+ "eval_loss": 0.14906036853790283,
26
+ "eval_mean_token_accuracy": 0.9230769230769231,
27
+ "eval_num_tokens": 72532.0,
28
+ "eval_runtime": 4.4741,
29
+ "eval_samples_per_second": 45.372,
30
+ "eval_steps_per_second": 5.811,
31
  "step": 20
32
+ },
33
+ {
34
+ "entropy": 2.026494912803173,
35
+ "epoch": 0.7881773399014779,
36
+ "grad_norm": 8.653420448303223,
37
+ "learning_rate": 7.450980392156863e-06,
38
+ "loss": 0.1611,
39
+ "mean_token_accuracy": 0.916015625,
40
+ "num_tokens": 145048.0,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 0.7881773399014779,
45
+ "eval_entropy": 2.0305226903695326,
46
+ "eval_loss": 0.16201180219650269,
47
+ "eval_mean_token_accuracy": 0.9254807692307693,
48
+ "eval_num_tokens": 145048.0,
49
+ "eval_runtime": 4.4358,
50
+ "eval_samples_per_second": 45.764,
51
+ "eval_steps_per_second": 5.861,
52
+ "step": 40
53
+ },
54
+ {
55
+ "entropy": 2.0197064559670945,
56
+ "epoch": 1.1773399014778325,
57
+ "grad_norm": 2.4835453033447266,
58
+ "learning_rate": 6.143790849673204e-06,
59
+ "loss": 0.1224,
60
+ "mean_token_accuracy": 0.9418512658227848,
61
+ "num_tokens": 216334.0,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 1.1773399014778325,
66
+ "eval_entropy": 2.0450107271854696,
67
+ "eval_loss": 0.1152966096997261,
68
+ "eval_mean_token_accuracy": 0.9407051274409661,
69
+ "eval_num_tokens": 216334.0,
70
+ "eval_runtime": 4.4391,
71
+ "eval_samples_per_second": 45.73,
72
+ "eval_steps_per_second": 5.857,
73
+ "step": 60
74
+ },
75
+ {
76
+ "entropy": 1.9920476481318474,
77
+ "epoch": 1.5714285714285714,
78
+ "grad_norm": 5.939174175262451,
79
+ "learning_rate": 4.836601307189543e-06,
80
+ "loss": 0.0992,
81
+ "mean_token_accuracy": 0.95625,
82
+ "num_tokens": 289742.0,
83
+ "step": 80
84
+ },
85
+ {
86
+ "epoch": 1.5714285714285714,
87
+ "eval_entropy": 2.0168408017892103,
88
+ "eval_loss": 0.10693139582872391,
89
+ "eval_mean_token_accuracy": 0.9499198725590339,
90
+ "eval_num_tokens": 289742.0,
91
+ "eval_runtime": 4.4376,
92
+ "eval_samples_per_second": 45.746,
93
+ "eval_steps_per_second": 5.859,
94
+ "step": 80
95
  }
96
  ],
97
  "logging_steps": 20,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 5508744073159680.0,
115
  "train_batch_size": 8,
116
  "trial_name": null,
117
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c61e50d4f9446fcee0ac9cf1d49d4ab9df79d5ea238b486457c023a9ad8b210c
3
  size 6353
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c16b42437cdd8867c8a12eb5f0a4290982935695952cbda1ebb8366981cfa51f
3
  size 6353