moos124 commited on
Commit
f6a7b6a
·
verified ·
1 Parent(s): 7b0f091

Training in progress, step 3520, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a6594af318f768698e46ac33dc99ef63f42a5d03bca8c599f745bb426c122e
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eaabce9a37b2dc21c4ddcfe2cc5789904f2de3abd7f17c2f73a36bc76bc5dc5d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ede8203fd1b0ac93dfee8feeeb24c21d69cfda7d7a2982d535dfb38b2ee91a1
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cd72153623ce8b1d5da24cf3891fd3128eaf61447bf3ad3158a82ee0a361466
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c028e0f5cb02e2b669b527e06f6f7cacda2350ebfba70f1d680f14324a0ea68
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1795a6a4ac0dd6383625543080a09d385e140315b6e92298e0cedcdf486f7f99
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4db738a52663c624104403330bb40ae7d88dddbaac71b85203b9cfdaf2fee888
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b768181766a9a624e33d294d2855e294f5735388c4e8a9a87ae347064fbed161
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.7488,
6
  "eval_steps": 500,
7
- "global_step": 3510,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3528,6 +3528,16 @@
3528
  "mean_token_accuracy": 0.7408729113638401,
3529
  "num_tokens": 16355489.0,
3530
  "step": 3510
 
 
 
 
 
 
 
 
 
 
3531
  }
3532
  ],
3533
  "logging_steps": 10,
@@ -3547,7 +3557,7 @@
3547
  "attributes": {}
3548
  }
3549
  },
3550
- "total_flos": 7.749732132440986e+16,
3551
  "train_batch_size": 4,
3552
  "trial_name": null,
3553
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.7509333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 3520,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3528
  "mean_token_accuracy": 0.7408729113638401,
3529
  "num_tokens": 16355489.0,
3530
  "step": 3510
3531
+ },
3532
+ {
3533
+ "entropy": 0.9718878209590912,
3534
+ "epoch": 0.7509333333333333,
3535
+ "grad_norm": 0.3055277168750763,
3536
+ "learning_rate": 7.20432628121669e-05,
3537
+ "loss": 1.1054911613464355,
3538
+ "mean_token_accuracy": 0.7583063259720803,
3539
+ "num_tokens": 16400826.0,
3540
+ "step": 3520
3541
  }
3542
  ],
3543
  "logging_steps": 10,
 
3557
  "attributes": {}
3558
  }
3559
  },
3560
+ "total_flos": 7.770614434527744e+16,
3561
  "train_batch_size": 4,
3562
  "trial_name": null,
3563
  "trial_params": null