mgh6 commited on
Commit
21d00ad
·
verified ·
1 Parent(s): ab01a1e

Training in progress, step 6900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24a830ff0ef712063cf263061c7715c7e4ab0d373d9ab416fed9dafed056d279
3
  size 8137792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae50fb16913624ef3c6c3e417ad92f5263de849d4297b7a9b7855b7c1a90aa12
3
  size 8137792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:238448f357f5f9357969c09f556fa44edf59423c99eb066a1c97611e85eb8392
3
  size 16386426
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b529112831d3ef3c0a1c58d46db3375d9c615744e36fc10560f30c02e5a59332
3
  size 16386426
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8815de27fb5e157afb197262352952f1b64bb5354465a8d3442882c2c526a7dd
3
  size 15006
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06051ff2563f6929a52be1f2a6173daac8bdf2ae58052429e1cd974260b79ebf
3
  size 15006
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a3bc73ddfd4eb0f55767d8999842e62d8ee283abef545702b66292a0270b4a2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:344062337e3a13dab9ea0f70913ee0c41826ffaef2b21b1cfec0d360964d9faa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.597875331979377,
5
  "eval_steps": 500,
6
- "global_step": 6800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -587,6 +587,13 @@
587
  "learning_rate": 5.466666666666666e-05,
588
  "loss": 1.2365,
589
  "step": 6800
 
 
 
 
 
 
 
590
  }
591
  ],
592
  "logging_steps": 100,
@@ -606,7 +613,7 @@
606
  "attributes": {}
607
  }
608
  },
609
- "total_flos": 1.0612979001481757e+19,
610
  "train_batch_size": 8,
611
  "trial_name": null,
612
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.797844086861428,
5
  "eval_steps": 500,
6
+ "global_step": 6900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
587
  "learning_rate": 5.466666666666666e-05,
588
  "loss": 1.2365,
589
  "step": 6800
590
+ },
591
+ {
592
+ "epoch": 13.797844086861428,
593
+ "grad_norm": 0.20576031506061554,
594
+ "learning_rate": 5.4000000000000005e-05,
595
+ "loss": 1.236,
596
+ "step": 6900
597
  }
598
  ],
599
  "logging_steps": 100,
 
613
  "attributes": {}
614
  }
615
  },
616
+ "total_flos": 1.0769053972669923e+19,
617
  "train_batch_size": 8,
618
  "trial_name": null,
619
  "trial_params": null