madhuHuggingface commited on
Commit
644f024
·
verified ·
1 Parent(s): 0e80cf4

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:303343475309841b85eed76df986b9e6645cec918e116af2f899f51b3ecf6251
3
  size 60785144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:685409aaa17ee0260333238b8f951d4be16bd186276e4a6c264bea5d6441d5e5
3
  size 60785144
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4fde37edca5adb4005644c406622925ee8e2714e074424bc24af3f6441bbc502
3
  size 31149205
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f5ea3233d57c06aa6d5e9c81457e9e63a891ef3ef967c49d90747bc1a7f4bc
3
  size 31149205
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff4532c1ad6082d83324dc653af69e29d03fa02637d181855b2e21b79b948367
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da6cffd29d107485fd65a3291d66df7b87424f25de973bb529c2fbe605d9c752
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 2.8,
6
  "eval_steps": 500,
7
- "global_step": 700,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -498,6 +498,41 @@
498
  "learning_rate": 2.39894493676317e-06,
499
  "loss": 0.0117,
500
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  }
502
  ],
503
  "logging_steps": 10,
@@ -512,12 +547,12 @@
512
  "should_evaluate": false,
513
  "should_log": false,
514
  "should_save": true,
515
- "should_training_stop": false
516
  },
517
  "attributes": {}
518
  }
519
  },
520
- "total_flos": 2236049469769728.0,
521
  "train_batch_size": 2,
522
  "trial_name": null,
523
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0,
6
  "eval_steps": 500,
7
+ "global_step": 750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
498
  "learning_rate": 2.39894493676317e-06,
499
  "loss": 0.0117,
500
  "step": 700
501
+ },
502
+ {
503
+ "epoch": 2.84,
504
+ "grad_norm": 0.13301163911819458,
505
+ "learning_rate": 1.5526183247907778e-06,
506
+ "loss": 0.0052,
507
+ "step": 710
508
+ },
509
+ {
510
+ "epoch": 2.88,
511
+ "grad_norm": 0.2115318477153778,
512
+ "learning_rate": 8.885936006545303e-07,
513
+ "loss": 0.0094,
514
+ "step": 720
515
+ },
516
+ {
517
+ "epoch": 2.92,
518
+ "grad_norm": 0.30391114950180054,
519
+ "learning_rate": 4.0810038528209525e-07,
520
+ "loss": 0.0256,
521
+ "step": 730
522
+ },
523
+ {
524
+ "epoch": 2.96,
525
+ "grad_norm": 0.09898550063371658,
526
+ "learning_rate": 1.1202844149663661e-07,
527
+ "loss": 0.0083,
528
+ "step": 740
529
+ },
530
+ {
531
+ "epoch": 3.0,
532
+ "grad_norm": 0.2619428038597107,
533
+ "learning_rate": 9.26026380987377e-10,
534
+ "loss": 0.0144,
535
+ "step": 750
536
  }
537
  ],
538
  "logging_steps": 10,
 
547
  "should_evaluate": false,
548
  "should_log": false,
549
  "should_save": true,
550
+ "should_training_stop": true
551
  },
552
  "attributes": {}
553
  }
554
  },
555
+ "total_flos": 2397029295816192.0,
556
  "train_batch_size": 2,
557
  "trial_name": null,
558
  "trial_params": null