moos124 commited on
Commit
ecf1869
·
verified ·
1 Parent(s): 6a62efe

Training in progress, step 2670, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc2a52f631bb23d43d1dea45d7a7325b4df494a0f95479a20fafd93f64116d3d
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fea02dafee6af77fcc05302cdcd3c08925d33a237f4d9c4f318a23d898dd49e
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:817167f27db28f0cc9ebe01bd7bd6a513f3c7084c9d492ea835b0752589c5ae4
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f8b1491e54b56dac6668bb1f9b8008f64661441ec030dec6d7537277a8d134
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e41dc6d5212dde965a479f47b1243aedee4c086cdf430d0a16cb1dd6dc04bef
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93050bb0e4996f96ffd6fe9bb4ee470c055d92b6a4ab88576dc261fd1add6462
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda687afde00bd86952da862edf2407e53c102e389a2fa70497668b0309bd135
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:533c8992a24b25a748216ad595bf3b193ab5865cb5a440e1bcd3c8ac180eddd5
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5674666666666667,
6
  "eval_steps": 500,
7
- "global_step": 2660,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2678,6 +2678,16 @@
2678
  "mean_token_accuracy": 0.7553936064243316,
2679
  "num_tokens": 12364825.0,
2680
  "step": 2660
 
 
 
 
 
 
 
 
 
 
2681
  }
2682
  ],
2683
  "logging_steps": 10,
@@ -2697,7 +2707,7 @@
2697
  "attributes": {}
2698
  }
2699
  },
2700
- "total_flos": 5.860292932427059e+16,
2701
  "train_batch_size": 4,
2702
  "trial_name": null,
2703
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.5696,
6
  "eval_steps": 500,
7
+ "global_step": 2670,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2678
  "mean_token_accuracy": 0.7553936064243316,
2679
  "num_tokens": 12364825.0,
2680
  "step": 2660
2681
+ },
2682
+ {
2683
+ "entropy": 0.8084396831691265,
2684
+ "epoch": 0.5696,
2685
+ "grad_norm": 0.2357935756444931,
2686
+ "learning_rate": 8.411060522283685e-05,
2687
+ "loss": 0.8782732963562012,
2688
+ "mean_token_accuracy": 0.7895583346486091,
2689
+ "num_tokens": 12406871.0,
2690
+ "step": 2670
2691
  }
2692
  ],
2693
  "logging_steps": 10,
 
2707
  "attributes": {}
2708
  }
2709
  },
2710
+ "total_flos": 5.880851708428493e+16,
2711
  "train_batch_size": 4,
2712
  "trial_name": null,
2713
  "trial_params": null