Ahil1991 commited on
Commit
c67da12
·
verified ·
1 Parent(s): d248df4

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96b2d10a844b11cf5b2544f35d93e70b315fececa5c2d3ca624e6dfead07a827
3
  size 5991064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0abce668b3eb4d44dc784d9560c9e8eaf1d78e218c11316de74bb009145f23e
3
  size 5991064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e1836d73c8a5897489d644f69854e2f687fca0e79b5b6c08dd2c8dbd2938ca9
3
  size 3875258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af9af34cee614f2f3b9fcd4f30272cd7939d050f865cdf0b26c2e35e2ffb59c
3
  size 3875258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20654594740d79ce8368c8a1d84e3daa1b6c26318ed3b339b4307a9b85892639
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0823cffba25b0205e4aaa4829d080da287f93b4b12f54638f5eb7ec2af7f34c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a0475b68e927911bcde368a8d1060fd9a7ea6fc58f735f74af0c4bfd6634af6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f1cdaac6e6d07ba74169a3057ad987f49c993cbd8847dc15c52a36f581d092
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.184617413378169,
6
  "eval_steps": 500,
7
- "global_step": 2800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1968,6 +1968,41 @@
1968
  "learning_rate": 0.0002,
1969
  "loss": 0.7594,
1970
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1971
  }
1972
  ],
1973
  "logging_steps": 10,
@@ -1987,7 +2022,7 @@
1987
  "attributes": {}
1988
  }
1989
  },
1990
- "total_flos": 4845174148349952.0,
1991
  "train_batch_size": 2,
1992
  "trial_name": null,
1993
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.18791415290277916,
6
  "eval_steps": 500,
7
+ "global_step": 2850,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1968
  "learning_rate": 0.0002,
1969
  "loss": 0.7594,
1970
  "step": 2800
1971
+ },
1972
+ {
1973
+ "epoch": 0.18527676128309103,
1974
+ "grad_norm": 1.489174246788025,
1975
+ "learning_rate": 0.0002,
1976
+ "loss": 0.696,
1977
+ "step": 2810
1978
+ },
1979
+ {
1980
+ "epoch": 0.18593610918801307,
1981
+ "grad_norm": 2.3775784969329834,
1982
+ "learning_rate": 0.0002,
1983
+ "loss": 0.7494,
1984
+ "step": 2820
1985
+ },
1986
+ {
1987
+ "epoch": 0.1865954570929351,
1988
+ "grad_norm": 1.5026384592056274,
1989
+ "learning_rate": 0.0002,
1990
+ "loss": 0.7631,
1991
+ "step": 2830
1992
+ },
1993
+ {
1994
+ "epoch": 0.18725480499785713,
1995
+ "grad_norm": 2.0681509971618652,
1996
+ "learning_rate": 0.0002,
1997
+ "loss": 0.7628,
1998
+ "step": 2840
1999
+ },
2000
+ {
2001
+ "epoch": 0.18791415290277916,
2002
+ "grad_norm": 1.44792640209198,
2003
+ "learning_rate": 0.0002,
2004
+ "loss": 0.7436,
2005
+ "step": 2850
2006
  }
2007
  ],
2008
  "logging_steps": 10,
 
2022
  "attributes": {}
2023
  }
2024
  },
2025
+ "total_flos": 4931223928266240.0,
2026
  "train_batch_size": 2,
2027
  "trial_name": null,
2028
  "trial_params": null