moos124 commited on
Commit
7930e1a
·
verified ·
1 Parent(s): 542c469

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:711ba2d185c44fc8e20ab4f888d62244e3c1309e7490154583be93f2b928c3b2
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53346a82ae215f6aa601bad33bbdf44cc81dabd5f3b7ffa0278521604a9d7247
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4a6f5f5c5a09e17c46b8b0ba0b83e713bf257aa5fb227f4925ed8ce8369a498
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4bea2dddc467b755411ea03249fcb45acb1a7d0b1d63bc3bd89bcf576b51cac
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b0aa5778692ef33a083df0d0ba2997f41efb39e2997b2a7fb5e6458610d0f68
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba8424e6ca8dc52c976593578f8900a61445da0a32107b67688c0971f3ca77e2
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adb30875ad280e3402b454dabfc01daec7a7f2c00677260bacf1b4c6b86f5931
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c8ad7d484220c33d58f2ec0797ed03142384c76c415a1aa226b570f1fa24558
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.008533333333333334,
6
  "eval_steps": 500,
7
- "global_step": 40,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -58,6 +58,16 @@
58
  "mean_token_accuracy": 0.6752387754619121,
59
  "num_tokens": 182338.0,
60
  "step": 40
 
 
 
 
 
 
 
 
 
 
61
  }
62
  ],
63
  "logging_steps": 10,
@@ -77,7 +87,7 @@
77
  "attributes": {}
78
  }
79
  },
80
- "total_flos": 899808051293184.0,
81
  "train_batch_size": 4,
82
  "trial_name": null,
83
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.010666666666666666,
6
  "eval_steps": 500,
7
+ "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
58
  "mean_token_accuracy": 0.6752387754619121,
59
  "num_tokens": 182338.0,
60
  "step": 40
61
+ },
62
+ {
63
+ "entropy": 1.236506675183773,
64
+ "epoch": 0.010666666666666666,
65
+ "grad_norm": 0.5518174171447754,
66
+ "learning_rate": 1.6333333333333335e-05,
67
+ "loss": 1.9000749588012695,
68
+ "mean_token_accuracy": 0.6715238064527511,
69
+ "num_tokens": 235345.0,
70
+ "step": 50
71
  }
72
  ],
73
  "logging_steps": 10,
 
87
  "attributes": {}
88
  }
89
  },
90
+ "total_flos": 1164045052624896.0,
91
  "train_batch_size": 4,
92
  "trial_name": null,
93
  "trial_params": null