finalform commited on
Commit
c83dca2
·
verified ·
1 Parent(s): 76b9408

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af95b1c5a220d6a55dfa035d107d9e91bfabf1b2db706471abe4481fe57a2c18
3
  size 645975704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b3ce68718b8a35a3a048606c7227ce62896cef62da3846077b079cf4e65f928f
3
  size 645975704
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a94642b6cbfd74bb89c873dfb0f163ca25cacf3de48ebe5602d93d089f703814
3
  size 1292087499
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9181fa39faa989eb419ddd21e690cc24b9c81c4b9dc4a6788f9876d477a054a
3
  size 1292087499
rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1076c60313859db66ff0de37f0791c974b8c08d8b63ddb5f7cbe475f61adaae8
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a71d6f2e3805dd0fbc1c29e9123bdf79aa32a2021db986be4d7381af5577b720
3
  size 14917
rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b18641396ff6afcf33050e5d126e243d87afe83252f83da2f3ac56c52a96e88
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f871e3d061f929ed6b8e9123a74713f9fefb89526ab0c1dde6ff4d5effb9bb4
3
  size 14917
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa1cb0b33cdae18d9d01e7f84345dc8aa89b0d8db4af3c1d3869e0251d9ef27
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f408f4bf2063c74f79db312cf2d3df67bb058ff318f993c9aede44c2e050c1
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b679dc767877b4670dc9f5034576dafc9e3e774d98ec6626b0317647bc6c0dbb
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b77c6fe0af7c0d4f86eaffcb54d6c452a11391b58cf3a89cac254d6f14013233
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 5.0,
6
  "eval_steps": 500,
7
- "global_step": 1040,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -427,6 +427,88 @@
427
  "eval_samples_per_second": 25.616,
428
  "eval_steps_per_second": 6.456,
429
  "step": 1040
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
430
  }
431
  ],
432
  "logging_steps": 25,
@@ -446,7 +528,7 @@
446
  "attributes": {}
447
  }
448
  },
449
- "total_flos": 5.12159461381505e+17,
450
  "train_batch_size": 2,
451
  "trial_name": null,
452
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 6.0,
6
  "eval_steps": 500,
7
+ "global_step": 1248,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
427
  "eval_samples_per_second": 25.616,
428
  "eval_steps_per_second": 6.456,
429
  "step": 1040
430
+ },
431
+ {
432
+ "epoch": 5.048250904704463,
433
+ "grad_norm": 0.02901943400502205,
434
+ "learning_rate": 9.77906761542642e-05,
435
+ "loss": 0.0196,
436
+ "mean_token_accuracy": 0.9936874963573574,
437
+ "num_tokens": 11906951.0,
438
+ "step": 1050
439
+ },
440
+ {
441
+ "epoch": 5.168878166465621,
442
+ "grad_norm": 0.03153559938073158,
443
+ "learning_rate": 8.685913460440795e-05,
444
+ "loss": 0.0184,
445
+ "mean_token_accuracy": 0.994182522892952,
446
+ "num_tokens": 12191281.0,
447
+ "step": 1075
448
+ },
449
+ {
450
+ "epoch": 5.2895054282267795,
451
+ "grad_norm": 0.025045236572623253,
452
+ "learning_rate": 7.64492214390088e-05,
453
+ "loss": 0.0186,
454
+ "mean_token_accuracy": 0.9937938040494919,
455
+ "num_tokens": 12477131.0,
456
+ "step": 1100
457
+ },
458
+ {
459
+ "epoch": 5.410132689987937,
460
+ "grad_norm": 0.027324741706252098,
461
+ "learning_rate": 6.659313588910162e-05,
462
+ "loss": 0.0177,
463
+ "mean_token_accuracy": 0.994279220700264,
464
+ "num_tokens": 12761273.0,
465
+ "step": 1125
466
+ },
467
+ {
468
+ "epoch": 5.530759951749095,
469
+ "grad_norm": 0.026910969987511635,
470
+ "learning_rate": 5.732136412404048e-05,
471
+ "loss": 0.0186,
472
+ "mean_token_accuracy": 0.9938160961866379,
473
+ "num_tokens": 13045337.0,
474
+ "step": 1150
475
+ },
476
+ {
477
+ "epoch": 5.651387213510254,
478
+ "grad_norm": 0.020254185423254967,
479
+ "learning_rate": 4.8662584953765875e-05,
480
+ "loss": 0.0173,
481
+ "mean_token_accuracy": 0.994459273815155,
482
+ "num_tokens": 13330131.0,
483
+ "step": 1175
484
+ },
485
+ {
486
+ "epoch": 5.772014475271411,
487
+ "grad_norm": 0.024621177464723587,
488
+ "learning_rate": 4.064358112147213e-05,
489
+ "loss": 0.0172,
490
+ "mean_token_accuracy": 0.994288050532341,
491
+ "num_tokens": 13612873.0,
492
+ "step": 1200
493
+ },
494
+ {
495
+ "epoch": 5.892641737032569,
496
+ "grad_norm": 0.026756085455417633,
497
+ "learning_rate": 3.328915646105903e-05,
498
+ "loss": 0.0175,
499
+ "mean_token_accuracy": 0.9942980527877807,
500
+ "num_tokens": 13895752.0,
501
+ "step": 1225
502
+ },
503
+ {
504
+ "epoch": 6.0,
505
+ "eval_loss": 0.04168795421719551,
506
+ "eval_mean_token_accuracy": 0.9895306697455786,
507
+ "eval_num_tokens": 14130881.0,
508
+ "eval_runtime": 14.4268,
509
+ "eval_samples_per_second": 25.577,
510
+ "eval_steps_per_second": 6.446,
511
+ "step": 1248
512
  }
513
  ],
514
  "logging_steps": 25,
 
528
  "attributes": {}
529
  }
530
  },
531
+ "total_flos": 6.145950713412321e+17,
532
  "train_batch_size": 2,
533
  "trial_name": null,
534
  "trial_params": null