moos124 commited on
Commit
728d1a7
·
verified ·
1 Parent(s): d3e2b5e

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1f6c4fba22cc3ffa68c73da8a65e6ac0292a8494736ebd9cf01fa275521ba15
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca3edb767af2db5c9696c9950f7374913f7090936ec2cdbfde431ec3a86ad325
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8985561580e2a3a135e83bff5598977e85d195297db16c1cf76c514be2eeb2b0
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1f4ddf789315e96cbf17dcdf74b128fd008ab5ca9c45b9b7439d99b8819c1f
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6396f47afe3aa623c294a866435dbc7a9bb028716704d2f2f48e0b2b87a290f
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f8ed9bded014bd5f67ef92b18c77d5888fc7641dd1c54e595106bee829b861f
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4392e26fd215c89319acad5af1671eb7abce7b3a173715912a2f15034a0d89b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129b3f59411be48363861c37db911af3f82058403d8c8148d50126fb936d15af
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.5098666666666667,
6
  "eval_steps": 500,
7
- "global_step": 2390,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2408,6 +2408,16 @@
2408
  "mean_token_accuracy": 0.7495349571108818,
2409
  "num_tokens": 11117249.0,
2410
  "step": 2390
 
 
 
 
 
 
 
 
 
 
2411
  }
2412
  ],
2413
  "logging_steps": 10,
@@ -2427,7 +2437,7 @@
2427
  "attributes": {}
2428
  }
2429
  },
2430
- "total_flos": 5.271640374332621e+16,
2431
  "train_batch_size": 4,
2432
  "trial_name": null,
2433
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.512,
6
  "eval_steps": 500,
7
+ "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2408
  "mean_token_accuracy": 0.7495349571108818,
2409
  "num_tokens": 11117249.0,
2410
  "step": 2390
2411
+ },
2412
+ {
2413
+ "entropy": 1.0916487082839013,
2414
+ "epoch": 0.512,
2415
+ "grad_norm": 0.31891921162605286,
2416
+ "learning_rate": 8.737340441993575e-05,
2417
+ "loss": 1.1685538291931152,
2418
+ "mean_token_accuracy": 0.7355501770973205,
2419
+ "num_tokens": 11167106.0,
2420
+ "step": 2400
2421
  }
2422
  ],
2423
  "logging_steps": 10,
 
2437
  "attributes": {}
2438
  }
2439
  },
2440
+ "total_flos": 5.297771188733952e+16,
2441
  "train_batch_size": 4,
2442
  "trial_name": null,
2443
  "trial_params": null