Thunderbolts123 commited on
Commit
47ad5fd
·
verified ·
1 Parent(s): 91e4de1

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -34,13 +34,13 @@
34
  "rank_pattern": {},
35
  "revision": null,
36
  "target_modules": [
37
- "q_proj",
38
- "gate_proj",
39
  "v_proj",
40
- "o_proj",
41
  "down_proj",
 
42
  "k_proj",
43
- "up_proj"
 
44
  ],
45
  "target_parameters": null,
46
  "task_type": "CAUSAL_LM",
 
34
  "rank_pattern": {},
35
  "revision": null,
36
  "target_modules": [
37
+ "up_proj",
 
38
  "v_proj",
 
39
  "down_proj",
40
+ "gate_proj",
41
  "k_proj",
42
+ "q_proj",
43
+ "o_proj"
44
  ],
45
  "target_parameters": null,
46
  "task_type": "CAUSAL_LM",
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c072b8006de91fd16554375d116f410546a24756b76486e7ff3f6165d5cd1c01
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b32b5959735aa57b511b7726bfbc2ef5de45ff1ebf9d62c5f60891e02815697b
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37ee5112fd5a35910b9a64c3a2c317dfb07eb43033e8df25ee5003f288101d96
3
  size 243807941
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7bb1fceeac45ca2476246b3d206978cdd4cb987a38fb6a0552e774331666a57
3
  size 243807941
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:25e591b6ccdc9dcb49a29bd97e2c898e0e2dc4799b75694557b3955730633d8b
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:317b5a305b2a9e21e527e7f85fdb3c6126a0ca02234bcb93021996746c86138a
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90daa5cc6fd25d912a2841b492510679aba1d1fd92344153762534657395f260
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8bf6871cccebbd8019e51a8751deebfdc1a27237b371091ed859a0e2e1ce5c9
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,15 +2,156 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0008,
6
  "eval_steps": 500,
7
- "global_step": 1,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
- "log_history": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "logging_steps": 10,
13
- "max_steps": 1,
14
  "num_input_tokens_seen": 0,
15
  "num_train_epochs": 1,
16
  "save_steps": 200,
@@ -21,12 +162,12 @@
21
  "should_evaluate": false,
22
  "should_log": false,
23
  "should_save": true,
24
- "should_training_stop": true
25
  },
26
  "attributes": {}
27
  }
28
  },
29
- "total_flos": 112399535898624.0,
30
  "train_batch_size": 1,
31
  "trial_name": null,
32
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.16,
6
  "eval_steps": 500,
7
+ "global_step": 200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.008,
14
+ "grad_norm": 0.800916850566864,
15
+ "learning_rate": 3.6e-05,
16
+ "loss": 1.431671142578125,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.016,
21
+ "grad_norm": 0.4533734917640686,
22
+ "learning_rate": 7.6e-05,
23
+ "loss": 1.1412681579589843,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 0.024,
28
+ "grad_norm": 0.4183805584907532,
29
+ "learning_rate": 0.000116,
30
+ "loss": 1.0069389343261719,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 0.032,
35
+ "grad_norm": 2.1650190353393555,
36
+ "learning_rate": 0.00015600000000000002,
37
+ "loss": 0.9767860412597656,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 0.04,
42
+ "grad_norm": 3.925295352935791,
43
+ "learning_rate": 0.000188,
44
+ "loss": 0.9355104446411133,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 0.048,
49
+ "grad_norm": 0.5158158540725708,
50
+ "learning_rate": 0.0001999732083645129,
51
+ "loss": 0.9155762672424317,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 0.056,
56
+ "grad_norm": 0.3576229214668274,
57
+ "learning_rate": 0.00019984201858549693,
58
+ "loss": 0.8876156806945801,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.064,
63
+ "grad_norm": 0.45568719506263733,
64
+ "learning_rate": 0.0001996016530250235,
65
+ "loss": 0.9064264297485352,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 0.072,
70
+ "grad_norm": 0.35059407353401184,
71
+ "learning_rate": 0.0001992523745193039,
72
+ "loss": 0.9449616432189941,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 0.08,
77
+ "grad_norm": 0.3109589219093323,
78
+ "learning_rate": 0.00019879456499925614,
79
+ "loss": 0.9112279891967774,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 0.088,
84
+ "grad_norm": 0.3478052616119385,
85
+ "learning_rate": 0.0001982287250728689,
86
+ "loss": 0.9097712516784668,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 0.096,
91
+ "grad_norm": 0.3412795960903168,
92
+ "learning_rate": 0.00019755547347779403,
93
+ "loss": 0.9231362342834473,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 0.104,
98
+ "grad_norm": 0.75782710313797,
99
+ "learning_rate": 0.00019677554640476624,
100
+ "loss": 0.9049114227294922,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 0.112,
105
+ "grad_norm": 0.35266366600990295,
106
+ "learning_rate": 0.0001958897966925891,
107
+ "loss": 0.8955144882202148,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 0.12,
112
+ "grad_norm": 0.3340380787849426,
113
+ "learning_rate": 0.00019489919289556845,
114
+ "loss": 0.9052764892578125,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.128,
119
+ "grad_norm": 0.3489997982978821,
120
+ "learning_rate": 0.00019380481822441235,
121
+ "loss": 0.918581199645996,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 0.136,
126
+ "grad_norm": 0.3366387188434601,
127
+ "learning_rate": 0.00019260786936175635,
128
+ "loss": 0.8691808700561523,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 0.144,
133
+ "grad_norm": 0.35365116596221924,
134
+ "learning_rate": 0.0001913096551536083,
135
+ "loss": 0.9018807411193848,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 0.152,
140
+ "grad_norm": 0.3102000653743744,
141
+ "learning_rate": 0.0001899115951781446,
142
+ "loss": 0.8774255752563477,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 0.16,
147
+ "grad_norm": 0.41139864921569824,
148
+ "learning_rate": 0.00018841521819342236,
149
+ "loss": 0.8466087341308594,
150
+ "step": 200
151
+ }
152
+ ],
153
  "logging_steps": 10,
154
+ "max_steps": 1000,
155
  "num_input_tokens_seen": 0,
156
  "num_train_epochs": 1,
157
  "save_steps": 200,
 
162
  "should_evaluate": false,
163
  "should_log": false,
164
  "should_save": true,
165
+ "should_training_stop": false
166
  },
167
  "attributes": {}
168
  }
169
  },
170
+ "total_flos": 2.8420601253617664e+16,
171
  "train_batch_size": 1,
172
  "trial_name": null,
173
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f64590a7b7803a555a01a39aba443c022199cbe4883538827236b8875f588e15
3
  size 5649
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6eed1a57e558818ad765c74a1b5250d3193d3f5a9f2c99695dab37df84671b2
3
  size 5649