moos124 commited on
Commit
4d54e84
·
verified ·
1 Parent(s): 2453255

Training in progress, step 1960, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fe39fff458d6b62846442523528cce378f798e09f1e67c0b4568c2d3f592d4b
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebbae0f0867475b8a7f9335bb2f9f044d3aa5699f0d45c4f102be3f38251975d
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:acbd7b4dd42466221bba71411da9f586fd88a167e9e8a901f7cd5049747da468
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6d3afcbf9f263042330c77d242154f5c3d73cc2e38e06131d797d8bad84fad0
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5a6c67f9781c4e91ddafe3e118cf0facdb785f4abb89cd52f5fc13e9252a6b
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d524ad371b2644db9c062ec3cdcd458f039156f9cddc00cfb9d4b3e22962fe
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e9566af3e319364a3808e69e353e8c1787eec78598708372beecd198ce7557b
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef00cdc61ae10b88377c2844fd6923057cb361d7ff885b050e7e9f4d7086e26a
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.416,
6
  "eval_steps": 500,
7
- "global_step": 1950,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1968,6 +1968,16 @@
1968
  "mean_token_accuracy": 0.7525161564350128,
1969
  "num_tokens": 9037859.0,
1970
  "step": 1950
 
 
 
 
 
 
 
 
 
 
1971
  }
1972
  ],
1973
  "logging_steps": 10,
@@ -1987,7 +1997,7 @@
1987
  "attributes": {}
1988
  }
1989
  },
1990
- "total_flos": 4.290455148931277e+16,
1991
  "train_batch_size": 4,
1992
  "trial_name": null,
1993
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.41813333333333336,
6
  "eval_steps": 500,
7
+ "global_step": 1960,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1968
  "mean_token_accuracy": 0.7525161564350128,
1969
  "num_tokens": 9037859.0,
1970
  "step": 1950
1971
+ },
1972
+ {
1973
+ "entropy": 1.0254975706338882,
1974
+ "epoch": 0.41813333333333336,
1975
+ "grad_norm": 0.2604680061340332,
1976
+ "learning_rate": 9.197996955444732e-05,
1977
+ "loss": 1.0939658164978028,
1978
+ "mean_token_accuracy": 0.7426734983921051,
1979
+ "num_tokens": 9088894.0,
1980
+ "step": 1960
1981
  }
1982
  ],
1983
  "logging_steps": 10,
 
1997
  "attributes": {}
1998
  }
1999
  },
2000
+ "total_flos": 4.315311685102387e+16,
2001
  "train_batch_size": 4,
2002
  "trial_name": null,
2003
  "trial_params": null