besimray commited on
Commit
c2a5f92
·
verified ·
1 Parent(s): 0522ca6

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cbce5af76e1e48d89a397d83170bc57c3eb9658c035203bed506dd64251a17a
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9580f23f6650701b8c5c6baa7b6251793876fc45c8700caf4e48d91b6ad89806
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:138b30553fd9154412683c6e5b96709767ee98389d8f5203e88b0fa44dfbe649
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a855a05342866a4aba16944f07a8978153f6a90eff7647124c560db0db5cc6
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a25e101f39164e92467712a28e8e6cb74e5f3b47c630735ff43fc58e6de6580
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c899beef0e4b648cad8954a66a8d655d219bebd0e6f90f7ad69eea43e18a96c4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b80d146b8b37f82a1962ba385b8329fd6c2c35ba5116c53e131bb661ab681b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11406784504652023,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
- "epoch": 0.7717041800643086,
5
  "eval_steps": 20,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 23.441,
460
  "eval_steps_per_second": 2.43,
461
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 1,
@@ -487,7 +635,7 @@
487
  "attributes": {}
488
  }
489
  },
490
- "total_flos": 1.446291621543936e+16,
491
  "train_batch_size": 10,
492
  "trial_name": null,
493
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.10349678248167038,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
4
+ "epoch": 1.0289389067524115,
5
  "eval_steps": 20,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 23.441,
460
  "eval_steps_per_second": 2.43,
461
  "step": 60
462
+ },
463
+ {
464
+ "epoch": 0.7845659163987139,
465
+ "grad_norm": 1.2640259265899658,
466
+ "learning_rate": 0.00014135585049665207,
467
+ "loss": 0.1332,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 0.797427652733119,
472
+ "grad_norm": 2.274766445159912,
473
+ "learning_rate": 0.00013930250316539238,
474
+ "loss": 0.1278,
475
+ "step": 62
476
+ },
477
+ {
478
+ "epoch": 0.8102893890675241,
479
+ "grad_norm": 2.1382172107696533,
480
+ "learning_rate": 0.00013722936584019453,
481
+ "loss": 0.1881,
482
+ "step": 63
483
+ },
484
+ {
485
+ "epoch": 0.8231511254019293,
486
+ "grad_norm": 1.8246463537216187,
487
+ "learning_rate": 0.0001351374824081343,
488
+ "loss": 0.1527,
489
+ "step": 64
490
+ },
491
+ {
492
+ "epoch": 0.8360128617363344,
493
+ "grad_norm": 2.0515213012695312,
494
+ "learning_rate": 0.00013302790619551674,
495
+ "loss": 0.178,
496
+ "step": 65
497
+ },
498
+ {
499
+ "epoch": 0.8488745980707395,
500
+ "grad_norm": 1.2779912948608398,
501
+ "learning_rate": 0.00013090169943749476,
502
+ "loss": 0.1207,
503
+ "step": 66
504
+ },
505
+ {
506
+ "epoch": 0.8617363344051447,
507
+ "grad_norm": 1.3247287273406982,
508
+ "learning_rate": 0.00012875993274320173,
509
+ "loss": 0.1003,
510
+ "step": 67
511
+ },
512
+ {
513
+ "epoch": 0.8745980707395499,
514
+ "grad_norm": 1.5200964212417603,
515
+ "learning_rate": 0.00012660368455666752,
516
+ "loss": 0.1518,
517
+ "step": 68
518
+ },
519
+ {
520
+ "epoch": 0.887459807073955,
521
+ "grad_norm": 1.2721142768859863,
522
+ "learning_rate": 0.0001244340406137894,
523
+ "loss": 0.153,
524
+ "step": 69
525
+ },
526
+ {
527
+ "epoch": 0.9003215434083601,
528
+ "grad_norm": 1.0692765712738037,
529
+ "learning_rate": 0.00012225209339563145,
530
+ "loss": 0.1125,
531
+ "step": 70
532
+ },
533
+ {
534
+ "epoch": 0.9131832797427653,
535
+ "grad_norm": 0.9016242027282715,
536
+ "learning_rate": 0.00012005894157832729,
537
+ "loss": 0.0923,
538
+ "step": 71
539
+ },
540
+ {
541
+ "epoch": 0.9260450160771704,
542
+ "grad_norm": 0.9723653197288513,
543
+ "learning_rate": 0.00011785568947986367,
544
+ "loss": 0.0711,
545
+ "step": 72
546
+ },
547
+ {
548
+ "epoch": 0.9389067524115756,
549
+ "grad_norm": 1.1226719617843628,
550
+ "learning_rate": 0.0001156434465040231,
551
+ "loss": 0.0733,
552
+ "step": 73
553
+ },
554
+ {
555
+ "epoch": 0.9517684887459807,
556
+ "grad_norm": 0.7384305596351624,
557
+ "learning_rate": 0.00011342332658176555,
558
+ "loss": 0.0509,
559
+ "step": 74
560
+ },
561
+ {
562
+ "epoch": 0.9646302250803859,
563
+ "grad_norm": 1.1397889852523804,
564
+ "learning_rate": 0.00011119644761033078,
565
+ "loss": 0.1132,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 0.977491961414791,
570
+ "grad_norm": 0.7340204119682312,
571
+ "learning_rate": 0.00010896393089034336,
572
+ "loss": 0.0548,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 0.9903536977491961,
577
+ "grad_norm": 1.2681351900100708,
578
+ "learning_rate": 0.00010672690056120399,
579
+ "loss": 0.128,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.0032154340836013,
584
+ "grad_norm": 1.2592830657958984,
585
+ "learning_rate": 0.00010448648303505151,
586
+ "loss": 0.1188,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.0160771704180065,
591
+ "grad_norm": 0.7425731420516968,
592
+ "learning_rate": 0.00010224380642958052,
593
+ "loss": 0.0486,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.0289389067524115,
598
+ "grad_norm": 1.5176936388015747,
599
+ "learning_rate": 0.0001,
600
+ "loss": 0.1457,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 1.0289389067524115,
605
+ "eval_loss": 0.10349678248167038,
606
+ "eval_runtime": 6.832,
607
+ "eval_samples_per_second": 24.005,
608
+ "eval_steps_per_second": 2.488,
609
+ "step": 80
610
  }
611
  ],
612
  "logging_steps": 1,
 
635
  "attributes": {}
636
  }
637
  },
638
+ "total_flos": 1.911441083203584e+16,
639
  "train_batch_size": 10,
640
  "trial_name": null,
641
  "trial_params": null