moos124 commited on
Commit
c3381e6
·
verified ·
1 Parent(s): d199bd4

Training in progress, step 3130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6707292d7f654e5124c3e926150bc642c498945f51878660f225650de5246c50
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bbfe00c1ba1c09c4d04368deaf63404868c1f4870af7814d7f420fa3e7adff7
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:086d9369ed7b9b1b0db7b13e9ce72ff9f192de08d450f900a44752b156fb06a4
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07bbeb4c05c5dba10d049492552ad0b3686827c29d53626a81aabe093340f41b
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:671dc5d364c5724905180db7a8f088b1689fd04a21018fd65eb0b930b5fd8447
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d70ee58499f6c87ec78098e4c8bebf10e921da4016edd9194284849f517bd96
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5be2b28db77843da54a5469ae9097a28157a8cf17202b01284ef63e0481acf8e
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e7450676e9219efb4cdb0d6b5ddd5da561820d0ad1621a7a5c5769cbb9c7dc6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6656,
6
  "eval_steps": 500,
7
- "global_step": 3120,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3138,6 +3138,16 @@
3138
  "mean_token_accuracy": 0.7467011958360672,
3139
  "num_tokens": 14526227.0,
3140
  "step": 3120
 
 
 
 
 
 
 
 
 
 
3141
  }
3142
  ],
3143
  "logging_steps": 10,
@@ -3157,7 +3167,7 @@
3157
  "attributes": {}
3158
  }
3159
  },
3160
- "total_flos": 6.879296464710451e+16,
3161
  "train_batch_size": 4,
3162
  "trial_name": null,
3163
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6677333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 3130,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3138
  "mean_token_accuracy": 0.7467011958360672,
3139
  "num_tokens": 14526227.0,
3140
  "step": 3120
3141
+ },
3142
+ {
3143
+ "entropy": 0.850496319681406,
3144
+ "epoch": 0.6677333333333333,
3145
+ "grad_norm": 0.37918779253959656,
3146
+ "learning_rate": 7.788275420728123e-05,
3147
+ "loss": 0.914525032043457,
3148
+ "mean_token_accuracy": 0.7852855637669564,
3149
+ "num_tokens": 14566458.0,
3150
+ "step": 3130
3151
  }
3152
  ],
3153
  "logging_steps": 10,
 
3167
  "attributes": {}
3168
  }
3169
  },
3170
+ "total_flos": 6.897869926043443e+16,
3171
  "train_batch_size": 4,
3172
  "trial_name": null,
3173
  "trial_params": null