moos124 commited on
Commit
dab7d9c
·
verified ·
1 Parent(s): 51a513d

Training in progress, step 340, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:080e280080559f2791d55f3e9b530866ef137afce957188c20fa52869278185a
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae5b0c96b948543b4a0cfaaa841cf92df27115ed94d5b57b8d2f1ece4db3116
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9817785732336e82575cd690ac86eee871b24cb8b95746ad574d47c0f1ba7156
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7116d6d70213e242c51795bf736c96d4fc59175f6720c092180a5bd549bf32c4
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:872fe76418f47a93b19f7178149bfab3a4c567f9bf8f19fe875e06de31f34354
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1209fea45a4e9e4d845d3eacee92565a2e0b89ddb3aa42425c5191b43cfcae7
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f133a959bfddbf7e52765c340dc6f5b0914229e18f54079bc6ff8b34af89bb5f
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f226b4aaba1bef9c3d2d6adc5d1efbb6f5042041fc30343a263920b93ee20d16
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.0704,
6
  "eval_steps": 500,
7
- "global_step": 330,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -348,6 +348,16 @@
348
  "mean_token_accuracy": 0.7610545977950096,
349
  "num_tokens": 1518725.0,
350
  "step": 330
 
 
 
 
 
 
 
 
 
 
351
  }
352
  ],
353
  "logging_steps": 10,
@@ -367,7 +377,7 @@
367
  "attributes": {}
368
  }
369
  },
370
- "total_flos": 7212522925473792.0,
371
  "train_batch_size": 4,
372
  "trial_name": null,
373
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07253333333333334,
6
  "eval_steps": 500,
7
+ "global_step": 340,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
348
  "mean_token_accuracy": 0.7610545977950096,
349
  "num_tokens": 1518725.0,
350
  "step": 330
351
+ },
352
+ {
353
+ "entropy": 1.014043178409338,
354
+ "epoch": 0.07253333333333334,
355
+ "grad_norm": 0.22224737703800201,
356
+ "learning_rate": 9.999544410823167e-05,
357
+ "loss": 1.0504605293273925,
358
+ "mean_token_accuracy": 0.7481713563203811,
359
+ "num_tokens": 1570475.0,
360
+ "step": 340
361
  }
362
  ],
363
  "logging_steps": 10,
 
377
  "attributes": {}
378
  }
379
  },
380
+ "total_flos": 7467874224294912.0,
381
  "train_batch_size": 4,
382
  "trial_name": null,
383
  "trial_params": null