moos124 commited on
Commit
02a7a6c
·
verified ·
1 Parent(s): 02dba6d

Training in progress, step 3550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6abe6b20e50f0cd882bad9eae9273328985ecef8a256aadab122834b3151431
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:426be9a43c12141fa5790c71f51d784566641c58c8c212414e4ef4bba36e8719
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b4f557f12ca90d94b174420b4779b88d959d7c57a6beb85b9140065d127466f
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5d0a7907b49dce936894818200b0755db855512004b105170a76710c57e2c0b
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2875a4b179039cc15eab719821faa1d49633f813ba1cc64aa5c16a4a837f702
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f12378eadbf68114d99c85b8b3414a0aa5d00fdcd928f61ed245f1a29dab32f9
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe86c621cf5e24510a000969182228a8eda127b5a3e17532385a1501315df4c0
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:672108c6f68a182db61db4c5d50cde93962aa7a8768280cbd14655c187d8e697
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7552,
6
  "eval_steps": 500,
7
- "global_step": 3540,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3558,6 +3558,16 @@
3558
  "mean_token_accuracy": 0.7466149963438511,
3559
  "num_tokens": 16498069.0,
3560
  "step": 3540
 
 
 
 
 
 
 
 
 
 
3561
  }
3562
  ],
3563
  "logging_steps": 10,
@@ -3577,7 +3587,7 @@
3577
  "attributes": {}
3578
  }
3579
  },
3580
- "total_flos": 7.818332279145984e+16,
3581
  "train_batch_size": 4,
3582
  "trial_name": null,
3583
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7573333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 3550,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3558
  "mean_token_accuracy": 0.7466149963438511,
3559
  "num_tokens": 16498069.0,
3560
  "step": 3540
3561
+ },
3562
+ {
3563
+ "entropy": 0.932567299157381,
3564
+ "epoch": 0.7573333333333333,
3565
+ "grad_norm": 0.2745480239391327,
3566
+ "learning_rate": 7.157604980736962e-05,
3567
+ "loss": 1.02783260345459,
3568
+ "mean_token_accuracy": 0.7691405609250068,
3569
+ "num_tokens": 16546746.0,
3570
+ "step": 3550
3571
  }
3572
  ],
3573
  "logging_steps": 10,
 
3587
  "attributes": {}
3588
  }
3589
  },
3590
+ "total_flos": 7.842238964359066e+16,
3591
  "train_batch_size": 4,
3592
  "trial_name": null,
3593
  "trial_params": null