Mels22 commited on
Commit
6797c5d
·
verified ·
1 Parent(s): 18c95d3

Training in progress, epoch 4, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7ba1a686c8a39f794d28715ce50c9040e9efadcb215a6f1f68ceb99c4484a7
3
  size 2526824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1f89678bfcaea704d111e574ffb02381b58f29949b041052259e9fd46388477
3
  size 2526824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72345b01893c27f8914342501492def172f5e4c93c9da8b92a4adb674b44009b
3
- size 1628858
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1105023e70cd979f84058cf88271139d3dad49f2efada53694e464226c755e6
3
+ size 1628986
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afe3dd6761509c3affdccc74c67f5156c1b67b6fe137882be5fc0e5e35d8afb0
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d417edef81aadd1524150f603a1f1f4d9e4feaebc56043e6552c54b598024246
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c066b8439215624d8c1217bbfd75abe82eda412f0f6f8d22a58b49123756c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18c56cb174b19403e5cd64826715733b24b1187efc1a85a31ad182bce916247
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 3.0,
6
  "eval_steps": 500,
7
- "global_step": 207,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -43,6 +43,13 @@
43
  "learning_rate": 0.00029364619555955005,
44
  "loss": 1.6815,
45
  "step": 205
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "logging_steps": 41,
@@ -62,7 +69,7 @@
62
  "attributes": {}
63
  }
64
  },
65
- "total_flos": 1.7857827154427904e+16,
66
  "train_batch_size": 4,
67
  "trial_name": null,
68
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 4.0,
6
  "eval_steps": 500,
7
+ "global_step": 276,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
43
  "learning_rate": 0.00029364619555955005,
44
  "loss": 1.6815,
45
  "step": 205
46
+ },
47
+ {
48
+ "epoch": 3.5693430656934306,
49
+ "grad_norm": 0.4026730954647064,
50
+ "learning_rate": 0.00020635380444044998,
51
+ "loss": 1.5832,
52
+ "step": 246
53
  }
54
  ],
55
  "logging_steps": 41,
 
69
  "attributes": {}
70
  }
71
  },
72
+ "total_flos": 2.381043620590387e+16,
73
  "train_batch_size": 4,
74
  "trial_name": null,
75
  "trial_params": null