baby-dev commited on
Commit
22e19ad
·
verified ·
1 Parent(s): 4540a2f

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "q_proj",
 
 
25
  "k_proj",
26
- "v_proj",
27
  "up_proj",
28
- "gate_proj",
29
- "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
  "q_proj",
25
+ "o_proj",
26
+ "down_proj",
27
  "k_proj",
 
28
  "up_proj",
29
+ "gate_proj"
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a4fe9a8d061e269b4ecf9497557df4a3b79024e949961c9b3f5107954fcaa6e
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60956d4123c40702e76ca8864d83217c1dc95a2e0d8ec133ff6c3b057383fd24
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cbd18d01c35c65c21e7e47169045c0f5bd5bd616cf14690b113bc8526a377bd2
3
- size 170920532
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7140f8d37aefe0c3e09cbca9e3ccc91afbfba7f1af0069327732abc6b46868c
3
+ size 170920084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9c39ce6b0873a5ec9d657fe833fd9cbd0b01bb8cfae95545a4d03900874ab13
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7948bdba81d8f5f193578cce546ee1f9832c818d1624438f1cea32d257d1417
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b38320f40535085de37905499e1385aa34aad6a41a68078b15be5cc295f0a80
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:820d227375038f02873567923da684b2500a67bbf69535a5b7d5b614defc8040
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,174 +1,42 @@
1
  {
2
- "best_metric": 2.347598075866699,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
- "epoch": 0.09556966345825654,
5
  "eval_steps": 100,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0001365280906546522,
13
- "eval_loss": 3.062295436859131,
14
- "eval_runtime": 175.46,
15
- "eval_samples_per_second": 35.153,
16
- "eval_steps_per_second": 8.788,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.00682640453273261,
21
- "grad_norm": 70.176025390625,
22
  "learning_rate": 0.00025,
23
- "loss": 8.7436,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.01365280906546522,
28
- "grad_norm": 62.00896453857422,
29
  "learning_rate": 0.00025,
30
- "loss": 8.9827,
31
  "step": 100
32
  },
33
  {
34
  "epoch": 0.01365280906546522,
35
- "eval_loss": 2.4358701705932617,
36
- "eval_runtime": 175.0684,
37
- "eval_samples_per_second": 35.232,
38
- "eval_steps_per_second": 8.808,
39
  "step": 100
40
- },
41
- {
42
- "epoch": 0.020479213598197828,
43
- "grad_norm": 71.84336853027344,
44
- "learning_rate": 0.00025,
45
- "loss": 8.512,
46
- "step": 150
47
- },
48
- {
49
- "epoch": 0.02730561813093044,
50
- "grad_norm": 98.38690948486328,
51
- "learning_rate": 0.00025,
52
- "loss": 8.9938,
53
- "step": 200
54
- },
55
- {
56
- "epoch": 0.02730561813093044,
57
- "eval_loss": 2.347598075866699,
58
- "eval_runtime": 175.3596,
59
- "eval_samples_per_second": 35.173,
60
- "eval_steps_per_second": 8.793,
61
- "step": 200
62
- },
63
- {
64
- "epoch": 0.034132022663663046,
65
- "grad_norm": 114.78697204589844,
66
- "learning_rate": 0.00025,
67
- "loss": 8.9744,
68
- "step": 250
69
- },
70
- {
71
- "epoch": 0.040958427196395655,
72
- "grad_norm": 73.92906951904297,
73
- "learning_rate": 0.00025,
74
- "loss": 8.8146,
75
- "step": 300
76
- },
77
- {
78
- "epoch": 0.040958427196395655,
79
- "eval_loss": 2.367624044418335,
80
- "eval_runtime": 174.9282,
81
- "eval_samples_per_second": 35.26,
82
- "eval_steps_per_second": 8.815,
83
- "step": 300
84
- },
85
- {
86
- "epoch": 0.04778483172912827,
87
- "grad_norm": 61.38362503051758,
88
- "learning_rate": 0.00025,
89
- "loss": 8.7201,
90
- "step": 350
91
- },
92
- {
93
- "epoch": 0.05461123626186088,
94
- "grad_norm": 85.3577880859375,
95
- "learning_rate": 0.00025,
96
- "loss": 8.8152,
97
- "step": 400
98
- },
99
- {
100
- "epoch": 0.05461123626186088,
101
- "eval_loss": 2.380793809890747,
102
- "eval_runtime": 174.9714,
103
- "eval_samples_per_second": 35.251,
104
- "eval_steps_per_second": 8.813,
105
- "step": 400
106
- },
107
- {
108
- "epoch": 0.06143764079459349,
109
- "grad_norm": 126.75369262695312,
110
- "learning_rate": 0.00025,
111
- "loss": 8.8341,
112
- "step": 450
113
- },
114
- {
115
- "epoch": 0.06826404532732609,
116
- "grad_norm": 77.14250946044922,
117
- "learning_rate": 0.00025,
118
- "loss": 8.8336,
119
- "step": 500
120
- },
121
- {
122
- "epoch": 0.06826404532732609,
123
- "eval_loss": 2.465590000152588,
124
- "eval_runtime": 175.1619,
125
- "eval_samples_per_second": 35.213,
126
- "eval_steps_per_second": 8.803,
127
- "step": 500
128
- },
129
- {
130
- "epoch": 0.07509044986005871,
131
- "grad_norm": 154.3630828857422,
132
- "learning_rate": 0.00025,
133
- "loss": 8.9199,
134
- "step": 550
135
- },
136
- {
137
- "epoch": 0.08191685439279131,
138
- "grad_norm": 96.36137390136719,
139
- "learning_rate": 0.00025,
140
- "loss": 9.0899,
141
- "step": 600
142
- },
143
- {
144
- "epoch": 0.08191685439279131,
145
- "eval_loss": 2.384577989578247,
146
- "eval_runtime": 175.3324,
147
- "eval_samples_per_second": 35.179,
148
- "eval_steps_per_second": 8.795,
149
- "step": 600
150
- },
151
- {
152
- "epoch": 0.08874325892552393,
153
- "grad_norm": 108.7186508178711,
154
- "learning_rate": 0.00025,
155
- "loss": 9.1215,
156
- "step": 650
157
- },
158
- {
159
- "epoch": 0.09556966345825654,
160
- "grad_norm": 92.59368896484375,
161
- "learning_rate": 0.00025,
162
- "loss": 9.2024,
163
- "step": 700
164
- },
165
- {
166
- "epoch": 0.09556966345825654,
167
- "eval_loss": 2.4196524620056152,
168
- "eval_runtime": 175.6839,
169
- "eval_samples_per_second": 35.109,
170
- "eval_steps_per_second": 8.777,
171
- "step": 700
172
  }
173
  ],
174
  "logging_steps": 50,
@@ -188,7 +56,7 @@
188
  "attributes": {}
189
  }
190
  },
191
- "total_flos": 2.475384335499264e+17,
192
  "train_batch_size": 4,
193
  "trial_name": null,
194
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.436293601989746,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 0.01365280906546522,
5
  "eval_steps": 100,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.0001365280906546522,
13
+ "eval_loss": 3.0683064460754395,
14
+ "eval_runtime": 176.1658,
15
+ "eval_samples_per_second": 35.012,
16
+ "eval_steps_per_second": 8.753,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 0.00682640453273261,
21
+ "grad_norm": 47.15786361694336,
22
  "learning_rate": 0.00025,
23
+ "loss": 8.7525,
24
  "step": 50
25
  },
26
  {
27
  "epoch": 0.01365280906546522,
28
+ "grad_norm": 86.82730102539062,
29
  "learning_rate": 0.00025,
30
+ "loss": 8.94,
31
  "step": 100
32
  },
33
  {
34
  "epoch": 0.01365280906546522,
35
+ "eval_loss": 2.436293601989746,
36
+ "eval_runtime": 176.1858,
37
+ "eval_samples_per_second": 35.009,
38
+ "eval_steps_per_second": 8.752,
39
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  }
41
  ],
42
  "logging_steps": 50,
 
56
  "attributes": {}
57
  }
58
  },
59
+ "total_flos": 3.53626333642752e+16,
60
  "train_batch_size": 4,
61
  "trial_name": null,
62
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e914a4e5888a1c0b239ff8cc1241b42a96888544eea02802cc1a066a8c763b0c
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f4f8aae3baeda401bd6f9758e8af16e75c2a1ef864c98d43053f4089ef95e26
3
  size 6776