brian-todd commited on
Commit
8afd5dd
·
1 Parent(s): 32702df

Training in progress, step 180

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b731b33a663df9b5d325ea24811ab15b363e54445aff4ec4bd6fe507e6467d7
3
  size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94f6600b9fcffce4258035ce042291e23c4579968b61a1277bb0e5f15c47b97
3
  size 522284877
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b731b33a663df9b5d325ea24811ab15b363e54445aff4ec4bd6fe507e6467d7
3
  size 522284877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f94f6600b9fcffce4258035ce042291e23c4579968b61a1277bb0e5f15c47b97
3
  size 522284877
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51b0f686c1f2bee0e68037a4cca40921cdb05f7528cabced5713e8ae0cb43297
3
  size 1044539653
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc45693ba4970551b76864817602c11f4ca8ba9235888c175fdf6a9bf4e31f61
3
  size 1044539653
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeb77c5432001f75ca5c8c8d2df598ea67c727a71f0a1359228917568a1d0916
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e14d8247c9ae610692787b8915c107f0bf91789be26bc0740710193dd513ac6b
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3841b7d3094f97af299f97dddaa8fe1e5f1142ed0601dee132ea7f776f6ffff
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000fa190e62d83a5e8bdaab8d33f6975c549b9c8705916359f713129b2584290
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 42.5,
5
- "global_step": 170,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -108,11 +108,17 @@
108
  "learning_rate": 0.0002,
109
  "loss": 0.0088,
110
  "step": 170
 
 
 
 
 
 
111
  }
112
  ],
113
  "max_steps": 200,
114
  "num_train_epochs": 50,
115
- "total_flos": 5.558694508363776e+16,
116
  "trial_name": null,
117
  "trial_params": null
118
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 45.0,
5
+ "global_step": 180,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
108
  "learning_rate": 0.0002,
109
  "loss": 0.0088,
110
  "step": 170
111
+ },
112
+ {
113
+ "epoch": 45.0,
114
+ "learning_rate": 0.0002,
115
+ "loss": 0.0089,
116
+ "step": 180
117
  }
118
  ],
119
  "max_steps": 200,
120
  "num_train_epochs": 50,
121
+ "total_flos": 5.88457764421632e+16,
122
  "trial_name": null,
123
  "trial_params": null
124
  }