moos124 commited on
Commit
f10256e
·
verified ·
1 Parent(s): 9482b4e

Training in progress, step 670, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e61d1ce6eee4db4f03b66ed79bd4af7559b73446813d7f61aded2136c3f4638c
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0531f8ac4ffd361df2a883371a501814336cb53edb4dea0066d7ab4aa93e3f53
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd6baca095808d02e92c43f96fc1a66a23524029cb0e1a8f1c9f1ae7822ef7dc
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2483c8508d4b9ec6d70bd368be4872d1ea2ed1409a7140667d2e3e3ce1b3b9
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b5f88a93444460d1014be9a7f9defe4780737900c6e285a029cebddf392adf3
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf717f19fdf97da383af66ee53d68fb9633f6ad6fa65078ba8a8bc9e9a19c857
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b59c5d619e8d9fe2fb09b701937a06cf585e9d9564fe1d0bf53187d4d0146c7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36844094c1f7cad7a42b80920bb25d88c570bab17b2a4fd3b3b6930500e65ec2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.1408,
6
  "eval_steps": 500,
7
- "global_step": 660,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -678,6 +678,16 @@
678
  "mean_token_accuracy": 0.7648672193288804,
679
  "num_tokens": 3052837.0,
680
  "step": 660
 
 
 
 
 
 
 
 
 
 
681
  }
682
  ],
683
  "logging_steps": 10,
@@ -697,7 +707,7 @@
697
  "attributes": {}
698
  }
699
  },
700
- "total_flos": 1.44496302286848e+16,
701
  "train_batch_size": 4,
702
  "trial_name": null,
703
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.14293333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
678
  "mean_token_accuracy": 0.7648672193288804,
679
  "num_tokens": 3052837.0,
680
  "step": 660
681
+ },
682
+ {
683
+ "entropy": 1.113273823261261,
684
+ "epoch": 0.14293333333333333,
685
+ "grad_norm": 0.2903483510017395,
686
+ "learning_rate": 9.95927013185221e-05,
687
+ "loss": 1.2222958564758302,
688
+ "mean_token_accuracy": 0.7364178076386452,
689
+ "num_tokens": 3101650.0,
690
+ "step": 670
691
  }
692
  ],
693
  "logging_steps": 10,
 
707
  "attributes": {}
708
  }
709
  },
710
+ "total_flos": 1.46852905944576e+16,
711
  "train_batch_size": 4,
712
  "trial_name": null,
713
  "trial_params": null