ghidav commited on
Commit
ff55734
·
verified ·
1 Parent(s): 89d4079

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ optimizer.pt
2
+ scheduler.pt
3
+ emissions.csv
checkpoint-650/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.35.2",
37
+ "use_cache": false,
38
+ "vocab_size": 50257
39
+ }
checkpoint-650/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.35.2"
6
+ }
checkpoint-650/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382198e6cd0cac4f332170e12112eb84deda057ba97f6519ca05753033ccd9ab
3
+ size 497774208
checkpoint-650/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc99124f8c990480e26e94fe84470cb27ca559f3ebee8ca6904cd99d0d9e7bb5
3
+ size 995641861
checkpoint-650/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad6c28002af6bb4669d6d2df8dcb091a4be5a79ee965e45e468cc707112e210
3
+ size 17641
checkpoint-650/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59c9b2dab80c1c8446fc580818f0fd94aa11197aa1aba15285e6efccfa5bd6c9
3
+ size 627
checkpoint-650/trainer_state.json ADDED
@@ -0,0 +1,513 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.6376384496688843,
3
+ "best_model_checkpoint": "models/gpt2-lora-20g2s/checkpoint-650",
4
+ "epoch": 3.7818181818181817,
5
+ "eval_steps": 50,
6
+ "global_step": 650,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.06,
13
+ "learning_rate": 2e-05,
14
+ "loss": 3.2975,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.12,
19
+ "learning_rate": 1.9703264094955493e-05,
20
+ "loss": 2.4638,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.17,
25
+ "learning_rate": 1.940652818991098e-05,
26
+ "loss": 2.0924,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.23,
31
+ "learning_rate": 1.910979228486647e-05,
32
+ "loss": 1.9836,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.29,
37
+ "learning_rate": 1.8813056379821958e-05,
38
+ "loss": 1.9073,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.29,
43
+ "eval_loss": 1.7905287742614746,
44
+ "eval_runtime": 38.1325,
45
+ "eval_samples_per_second": 52.449,
46
+ "eval_steps_per_second": 6.556,
47
+ "step": 50
48
+ },
49
+ {
50
+ "epoch": 0.35,
51
+ "learning_rate": 1.851632047477745e-05,
52
+ "loss": 1.9059,
53
+ "step": 60
54
+ },
55
+ {
56
+ "epoch": 0.41,
57
+ "learning_rate": 1.821958456973294e-05,
58
+ "loss": 1.8664,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 0.47,
63
+ "learning_rate": 1.792284866468843e-05,
64
+ "loss": 1.8451,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.52,
69
+ "learning_rate": 1.7626112759643918e-05,
70
+ "loss": 1.8454,
71
+ "step": 90
72
+ },
73
+ {
74
+ "epoch": 0.58,
75
+ "learning_rate": 1.732937685459941e-05,
76
+ "loss": 1.8305,
77
+ "step": 100
78
+ },
79
+ {
80
+ "epoch": 0.58,
81
+ "eval_loss": 1.7229812145233154,
82
+ "eval_runtime": 37.9618,
83
+ "eval_samples_per_second": 52.685,
84
+ "eval_steps_per_second": 6.586,
85
+ "step": 100
86
+ },
87
+ {
88
+ "epoch": 0.64,
89
+ "learning_rate": 1.7032640949554898e-05,
90
+ "loss": 1.8263,
91
+ "step": 110
92
+ },
93
+ {
94
+ "epoch": 0.7,
95
+ "learning_rate": 1.673590504451039e-05,
96
+ "loss": 1.8206,
97
+ "step": 120
98
+ },
99
+ {
100
+ "epoch": 0.76,
101
+ "learning_rate": 1.6439169139465877e-05,
102
+ "loss": 1.7967,
103
+ "step": 130
104
+ },
105
+ {
106
+ "epoch": 0.81,
107
+ "learning_rate": 1.6142433234421366e-05,
108
+ "loss": 1.7962,
109
+ "step": 140
110
+ },
111
+ {
112
+ "epoch": 0.87,
113
+ "learning_rate": 1.5845697329376857e-05,
114
+ "loss": 1.7786,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 0.87,
119
+ "eval_loss": 1.6951161623001099,
120
+ "eval_runtime": 37.8104,
121
+ "eval_samples_per_second": 52.895,
122
+ "eval_steps_per_second": 6.612,
123
+ "step": 150
124
+ },
125
+ {
126
+ "epoch": 0.93,
127
+ "learning_rate": 1.5548961424332346e-05,
128
+ "loss": 1.7931,
129
+ "step": 160
130
+ },
131
+ {
132
+ "epoch": 0.99,
133
+ "learning_rate": 1.5252225519287836e-05,
134
+ "loss": 1.7973,
135
+ "step": 170
136
+ },
137
+ {
138
+ "epoch": 1.05,
139
+ "learning_rate": 1.4955489614243324e-05,
140
+ "loss": 1.7681,
141
+ "step": 180
142
+ },
143
+ {
144
+ "epoch": 1.11,
145
+ "learning_rate": 1.4658753709198814e-05,
146
+ "loss": 1.7741,
147
+ "step": 190
148
+ },
149
+ {
150
+ "epoch": 1.16,
151
+ "learning_rate": 1.4362017804154305e-05,
152
+ "loss": 1.7542,
153
+ "step": 200
154
+ },
155
+ {
156
+ "epoch": 1.16,
157
+ "eval_loss": 1.6791621446609497,
158
+ "eval_runtime": 37.8129,
159
+ "eval_samples_per_second": 52.892,
160
+ "eval_steps_per_second": 6.612,
161
+ "step": 200
162
+ },
163
+ {
164
+ "epoch": 1.22,
165
+ "learning_rate": 1.4065281899109794e-05,
166
+ "loss": 1.7657,
167
+ "step": 210
168
+ },
169
+ {
170
+ "epoch": 1.28,
171
+ "learning_rate": 1.3768545994065284e-05,
172
+ "loss": 1.7393,
173
+ "step": 220
174
+ },
175
+ {
176
+ "epoch": 1.34,
177
+ "learning_rate": 1.3471810089020773e-05,
178
+ "loss": 1.7611,
179
+ "step": 230
180
+ },
181
+ {
182
+ "epoch": 1.4,
183
+ "learning_rate": 1.3175074183976262e-05,
184
+ "loss": 1.7362,
185
+ "step": 240
186
+ },
187
+ {
188
+ "epoch": 1.45,
189
+ "learning_rate": 1.2878338278931752e-05,
190
+ "loss": 1.7204,
191
+ "step": 250
192
+ },
193
+ {
194
+ "epoch": 1.45,
195
+ "eval_loss": 1.6689482927322388,
196
+ "eval_runtime": 37.8604,
197
+ "eval_samples_per_second": 52.826,
198
+ "eval_steps_per_second": 6.603,
199
+ "step": 250
200
+ },
201
+ {
202
+ "epoch": 1.51,
203
+ "learning_rate": 1.258160237388724e-05,
204
+ "loss": 1.7452,
205
+ "step": 260
206
+ },
207
+ {
208
+ "epoch": 1.57,
209
+ "learning_rate": 1.2284866468842732e-05,
210
+ "loss": 1.7307,
211
+ "step": 270
212
+ },
213
+ {
214
+ "epoch": 1.63,
215
+ "learning_rate": 1.1988130563798221e-05,
216
+ "loss": 1.7232,
217
+ "step": 280
218
+ },
219
+ {
220
+ "epoch": 1.69,
221
+ "learning_rate": 1.169139465875371e-05,
222
+ "loss": 1.7132,
223
+ "step": 290
224
+ },
225
+ {
226
+ "epoch": 1.75,
227
+ "learning_rate": 1.13946587537092e-05,
228
+ "loss": 1.7233,
229
+ "step": 300
230
+ },
231
+ {
232
+ "epoch": 1.75,
233
+ "eval_loss": 1.6592705249786377,
234
+ "eval_runtime": 37.8213,
235
+ "eval_samples_per_second": 52.88,
236
+ "eval_steps_per_second": 6.61,
237
+ "step": 300
238
+ },
239
+ {
240
+ "epoch": 1.8,
241
+ "learning_rate": 1.1097922848664688e-05,
242
+ "loss": 1.7342,
243
+ "step": 310
244
+ },
245
+ {
246
+ "epoch": 1.86,
247
+ "learning_rate": 1.080118694362018e-05,
248
+ "loss": 1.698,
249
+ "step": 320
250
+ },
251
+ {
252
+ "epoch": 1.92,
253
+ "learning_rate": 1.050445103857567e-05,
254
+ "loss": 1.714,
255
+ "step": 330
256
+ },
257
+ {
258
+ "epoch": 1.98,
259
+ "learning_rate": 1.0207715133531158e-05,
260
+ "loss": 1.7059,
261
+ "step": 340
262
+ },
263
+ {
264
+ "epoch": 2.04,
265
+ "learning_rate": 9.910979228486648e-06,
266
+ "loss": 1.7033,
267
+ "step": 350
268
+ },
269
+ {
270
+ "epoch": 2.04,
271
+ "eval_loss": 1.6531213521957397,
272
+ "eval_runtime": 37.9884,
273
+ "eval_samples_per_second": 52.648,
274
+ "eval_steps_per_second": 6.581,
275
+ "step": 350
276
+ },
277
+ {
278
+ "epoch": 2.09,
279
+ "learning_rate": 9.614243323442138e-06,
280
+ "loss": 1.7045,
281
+ "step": 360
282
+ },
283
+ {
284
+ "epoch": 2.15,
285
+ "learning_rate": 9.317507418397626e-06,
286
+ "loss": 1.6734,
287
+ "step": 370
288
+ },
289
+ {
290
+ "epoch": 2.21,
291
+ "learning_rate": 9.020771513353116e-06,
292
+ "loss": 1.6923,
293
+ "step": 380
294
+ },
295
+ {
296
+ "epoch": 2.27,
297
+ "learning_rate": 8.724035608308606e-06,
298
+ "loss": 1.6846,
299
+ "step": 390
300
+ },
301
+ {
302
+ "epoch": 2.33,
303
+ "learning_rate": 8.427299703264096e-06,
304
+ "loss": 1.689,
305
+ "step": 400
306
+ },
307
+ {
308
+ "epoch": 2.33,
309
+ "eval_loss": 1.6485120058059692,
310
+ "eval_runtime": 37.8818,
311
+ "eval_samples_per_second": 52.796,
312
+ "eval_steps_per_second": 6.599,
313
+ "step": 400
314
+ },
315
+ {
316
+ "epoch": 2.39,
317
+ "learning_rate": 8.130563798219586e-06,
318
+ "loss": 1.6893,
319
+ "step": 410
320
+ },
321
+ {
322
+ "epoch": 2.44,
323
+ "learning_rate": 7.833827893175074e-06,
324
+ "loss": 1.6967,
325
+ "step": 420
326
+ },
327
+ {
328
+ "epoch": 2.5,
329
+ "learning_rate": 7.537091988130565e-06,
330
+ "loss": 1.7133,
331
+ "step": 430
332
+ },
333
+ {
334
+ "epoch": 2.56,
335
+ "learning_rate": 7.2403560830860545e-06,
336
+ "loss": 1.6804,
337
+ "step": 440
338
+ },
339
+ {
340
+ "epoch": 2.62,
341
+ "learning_rate": 6.943620178041544e-06,
342
+ "loss": 1.6858,
343
+ "step": 450
344
+ },
345
+ {
346
+ "epoch": 2.62,
347
+ "eval_loss": 1.6442703008651733,
348
+ "eval_runtime": 37.7726,
349
+ "eval_samples_per_second": 52.948,
350
+ "eval_steps_per_second": 6.619,
351
+ "step": 450
352
+ },
353
+ {
354
+ "epoch": 2.68,
355
+ "learning_rate": 6.646884272997033e-06,
356
+ "loss": 1.7165,
357
+ "step": 460
358
+ },
359
+ {
360
+ "epoch": 2.73,
361
+ "learning_rate": 6.3501483679525235e-06,
362
+ "loss": 1.7057,
363
+ "step": 470
364
+ },
365
+ {
366
+ "epoch": 2.79,
367
+ "learning_rate": 6.0534124629080126e-06,
368
+ "loss": 1.7044,
369
+ "step": 480
370
+ },
371
+ {
372
+ "epoch": 2.85,
373
+ "learning_rate": 5.756676557863502e-06,
374
+ "loss": 1.7139,
375
+ "step": 490
376
+ },
377
+ {
378
+ "epoch": 2.91,
379
+ "learning_rate": 5.459940652818992e-06,
380
+ "loss": 1.6919,
381
+ "step": 500
382
+ },
383
+ {
384
+ "epoch": 2.91,
385
+ "eval_loss": 1.6420339345932007,
386
+ "eval_runtime": 37.8869,
387
+ "eval_samples_per_second": 52.789,
388
+ "eval_steps_per_second": 6.599,
389
+ "step": 500
390
+ },
391
+ {
392
+ "epoch": 2.97,
393
+ "learning_rate": 5.163204747774481e-06,
394
+ "loss": 1.6894,
395
+ "step": 510
396
+ },
397
+ {
398
+ "epoch": 3.03,
399
+ "learning_rate": 4.866468842729971e-06,
400
+ "loss": 1.6738,
401
+ "step": 520
402
+ },
403
+ {
404
+ "epoch": 3.08,
405
+ "learning_rate": 4.5697329376854606e-06,
406
+ "loss": 1.6934,
407
+ "step": 530
408
+ },
409
+ {
410
+ "epoch": 3.14,
411
+ "learning_rate": 4.27299703264095e-06,
412
+ "loss": 1.6921,
413
+ "step": 540
414
+ },
415
+ {
416
+ "epoch": 3.2,
417
+ "learning_rate": 3.97626112759644e-06,
418
+ "loss": 1.694,
419
+ "step": 550
420
+ },
421
+ {
422
+ "epoch": 3.2,
423
+ "eval_loss": 1.6401287317276,
424
+ "eval_runtime": 37.7337,
425
+ "eval_samples_per_second": 53.003,
426
+ "eval_steps_per_second": 6.625,
427
+ "step": 550
428
+ },
429
+ {
430
+ "epoch": 3.26,
431
+ "learning_rate": 3.679525222551929e-06,
432
+ "loss": 1.6873,
433
+ "step": 560
434
+ },
435
+ {
436
+ "epoch": 3.32,
437
+ "learning_rate": 3.382789317507419e-06,
438
+ "loss": 1.6865,
439
+ "step": 570
440
+ },
441
+ {
442
+ "epoch": 3.37,
443
+ "learning_rate": 3.086053412462908e-06,
444
+ "loss": 1.6559,
445
+ "step": 580
446
+ },
447
+ {
448
+ "epoch": 3.43,
449
+ "learning_rate": 2.789317507418398e-06,
450
+ "loss": 1.6821,
451
+ "step": 590
452
+ },
453
+ {
454
+ "epoch": 3.49,
455
+ "learning_rate": 2.4925816023738876e-06,
456
+ "loss": 1.6758,
457
+ "step": 600
458
+ },
459
+ {
460
+ "epoch": 3.49,
461
+ "eval_loss": 1.6383044719696045,
462
+ "eval_runtime": 37.9349,
463
+ "eval_samples_per_second": 52.722,
464
+ "eval_steps_per_second": 6.59,
465
+ "step": 600
466
+ },
467
+ {
468
+ "epoch": 3.55,
469
+ "learning_rate": 2.195845697329377e-06,
470
+ "loss": 1.6634,
471
+ "step": 610
472
+ },
473
+ {
474
+ "epoch": 3.61,
475
+ "learning_rate": 1.8991097922848666e-06,
476
+ "loss": 1.6653,
477
+ "step": 620
478
+ },
479
+ {
480
+ "epoch": 3.67,
481
+ "learning_rate": 1.6023738872403563e-06,
482
+ "loss": 1.6663,
483
+ "step": 630
484
+ },
485
+ {
486
+ "epoch": 3.72,
487
+ "learning_rate": 1.3056379821958458e-06,
488
+ "loss": 1.6745,
489
+ "step": 640
490
+ },
491
+ {
492
+ "epoch": 3.78,
493
+ "learning_rate": 1.0089020771513354e-06,
494
+ "loss": 1.6997,
495
+ "step": 650
496
+ },
497
+ {
498
+ "epoch": 3.78,
499
+ "eval_loss": 1.6376384496688843,
500
+ "eval_runtime": 37.8135,
501
+ "eval_samples_per_second": 52.891,
502
+ "eval_steps_per_second": 6.611,
503
+ "step": 650
504
+ }
505
+ ],
506
+ "logging_steps": 10,
507
+ "max_steps": 684,
508
+ "num_train_epochs": 4,
509
+ "save_steps": 50,
510
+ "total_flos": 7247277483622400.0,
511
+ "trial_name": null,
512
+ "trial_params": null
513
+ }
checkpoint-650/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e45d0b6463555987601f0aaee8f8db6dcf6ba1e87e756ba81e518e89348c70a7
3
+ size 4091
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 768,
16
+ "n_head": 12,
17
+ "n_inner": null,
18
+ "n_layer": 12,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.35.2",
37
+ "use_cache": false,
38
+ "vocab_size": 50257
39
+ }
emissions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2
+ 2024-02-17T22:56:09,codecarbon,376d5494-80d9-4c1f-a01f-3f0425571f06,4937.664623737335,0.09999964238539377,2.025241688239727e-05,42.5,102.62131288069982,38.31930112838745,0.058291479068166674,0.143021615528312,0.05249310435122111,0.2538061989476999,USA,USA,Iowa,gcp,us-central1,Linux-5.10.0-28-cloud-amd64-x86_64-with-glibc2.31,3.9.2,2.3.4,16,Intel(R) Xeon(R) CPU @ 2.30GHz,4,4 x Tesla T4,,,102.1848030090332,machine,Y,1.0
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.35.2"
6
+ }