moos124 commited on
Commit
5ad83d8
·
verified ·
1 Parent(s): 4fe42f0

Training in progress, step 2660, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df2af7b278014781a1638c9e0d43f4fd1f22205f9a1d47e1823c55a04920a940
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc2a52f631bb23d43d1dea45d7a7325b4df494a0f95479a20fafd93f64116d3d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50e2a54735eb8cf392f843b4ca345e651749e88420d06314fade1899c18cd608
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:817167f27db28f0cc9ebe01bd7bd6a513f3c7084c9d492ea835b0752589c5ae4
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7284ecfd155617c16a7c438c925d8d666f50626b3863231652b162becace7850
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e41dc6d5212dde965a479f47b1243aedee4c086cdf430d0a16cb1dd6dc04bef
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4413f079b5a916ef56df9b83416e8145e5cbab75e88df3311028064282b6191f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda687afde00bd86952da862edf2407e53c102e389a2fa70497668b0309bd135
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5653333333333334,
6
  "eval_steps": 500,
7
- "global_step": 2650,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2668,6 +2668,16 @@
2668
  "mean_token_accuracy": 0.7484898209571839,
2669
  "num_tokens": 12319765.0,
2670
  "step": 2650
 
 
 
 
 
 
 
 
 
 
2671
  }
2672
  ],
2673
  "logging_steps": 10,
@@ -2687,7 +2697,7 @@
2687
  "attributes": {}
2688
  }
2689
  },
2690
- "total_flos": 5.839613397385421e+16,
2691
  "train_batch_size": 4,
2692
  "trial_name": null,
2693
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5674666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 2660,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2668
  "mean_token_accuracy": 0.7484898209571839,
2669
  "num_tokens": 12319765.0,
2670
  "step": 2650
2671
+ },
2672
+ {
2673
+ "entropy": 0.9943435691297055,
2674
+ "epoch": 0.5674666666666667,
2675
+ "grad_norm": 0.22841870784759521,
2676
+ "learning_rate": 8.423694254899283e-05,
2677
+ "loss": 1.0581014633178711,
2678
+ "mean_token_accuracy": 0.7553936064243316,
2679
+ "num_tokens": 12364825.0,
2680
+ "step": 2660
2681
  }
2682
  ],
2683
  "logging_steps": 10,
 
2697
  "attributes": {}
2698
  }
2699
  },
2700
+ "total_flos": 5.860292932427059e+16,
2701
  "train_batch_size": 4,
2702
  "trial_name": null,
2703
  "trial_params": null