moos124 commited on
Commit
9ccd066
·
verified ·
1 Parent(s): 0beab72

Training in progress, step 350, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ae5b0c96b948543b4a0cfaaa841cf92df27115ed94d5b57b8d2f1ece4db3116
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf8ee527fa91aa5dbdcdb819ff11185ee6d1e902df96a1650bf4a6d2e009058c
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7116d6d70213e242c51795bf736c96d4fc59175f6720c092180a5bd549bf32c4
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c6e4c4ad130e31bde8c0eaab8e9d95a7fc24d96382dcef585a0fd79ce24228e
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d1209fea45a4e9e4d845d3eacee92565a2e0b89ddb3aa42425c5191b43cfcae7
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7d2e204c8e6c7ce35f9439cf108ff7970920b5f63992b1b4c1109c1020960ac
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f226b4aaba1bef9c3d2d6adc5d1efbb6f5042041fc30343a263920b93ee20d16
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60cc17b17b71f67b56c6a43176ee4ccaf356beb51adcc6b22acb39d21228113a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.07253333333333334,
6
  "eval_steps": 500,
7
- "global_step": 340,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -358,6 +358,16 @@
358
  "mean_token_accuracy": 0.7481713563203811,
359
  "num_tokens": 1570475.0,
360
  "step": 340
 
 
 
 
 
 
 
 
 
 
361
  }
362
  ],
363
  "logging_steps": 10,
@@ -377,7 +387,7 @@
377
  "attributes": {}
378
  }
379
  },
380
- "total_flos": 7467874224294912.0,
381
  "train_batch_size": 4,
382
  "trial_name": null,
383
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07466666666666667,
6
  "eval_steps": 500,
7
+ "global_step": 350,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
358
  "mean_token_accuracy": 0.7481713563203811,
359
  "num_tokens": 1570475.0,
360
  "step": 340
361
+ },
362
+ {
363
+ "entropy": 0.9679042734205723,
364
+ "epoch": 0.07466666666666667,
365
+ "grad_norm": 0.2880384027957916,
366
+ "learning_rate": 9.999280828400803e-05,
367
+ "loss": 1.0482870101928712,
368
+ "mean_token_accuracy": 0.7645679444074631,
369
+ "num_tokens": 1617068.0,
370
+ "step": 350
371
  }
372
  ],
373
  "logging_steps": 10,
 
387
  "attributes": {}
388
  }
389
  },
390
+ "total_flos": 7679995589096448.0,
391
  "train_batch_size": 4,
392
  "trial_name": null,
393
  "trial_params": null