rootxhacker commited on
Commit
c89156c
·
verified ·
1 Parent(s): a13719b

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:951de22caec4595c4ffeef6e17bf618ff4c5009026d3bf61fbbf6c21e394a753
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04d930c5c4a881dfd26ea978e652c702b6adb1b5b7ed1eed9ca7135e21510049
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94352e8877cdd376b4e0992fbcd1a31c79a5cbb8edc941287bffbb6813ad2fd9
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ca03346afd36214198fb9410564c8a86b40b37890c4e424de07767c10a30ec
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ce35713c6cfde8b254092a62a162b21058e7d89b73dff276ea2ee9e65249418
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:205a7550a4c055924e0126c3501b4b656ca55ae3243c46218f86d29fc2fb9758
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7733da2de8ba0d811f711ea7fdb6cdb69bbfeb6cd9a30bcfdad50f83ead3040c
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699adc1847435a16f1605601a8df2b43b7960dcbb43148ad8a4600f5fe2cb9d4
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d47850787d7113906407c13287aaa8fd754cf26d87c84c8d6940554fabd6e6fa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:638497dfdf5eb5c99bf0b6f11b605f213450ed644d6ebdee360506f0a10a4ae0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 11000,
3
- "best_metric": 4.307990074157715,
4
- "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-11000",
5
- "epoch": 0.8845473425121144,
6
  "eval_steps": 250,
7
- "global_step": 11500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1986,6 +1986,92 @@
1986
  "eval_samples_per_second": 53.176,
1987
  "eval_steps_per_second": 13.294,
1988
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1989
  }
1990
  ],
1991
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 12000,
3
+ "best_metric": 1.5159597396850586,
4
+ "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-12000",
5
+ "epoch": 0.9230059226213368,
6
  "eval_steps": 250,
7
+ "global_step": 12000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1986
  "eval_samples_per_second": 53.176,
1987
  "eval_steps_per_second": 13.294,
1988
  "step": 11500
1989
+ },
1990
+ {
1991
+ "epoch": 0.8883932005230367,
1992
+ "grad_norm": 5.072123050689697,
1993
+ "learning_rate": 8.112495864122642e-05,
1994
+ "loss": 4.5564,
1995
+ "step": 11550
1996
+ },
1997
+ {
1998
+ "epoch": 0.8922390585339589,
1999
+ "grad_norm": 3.130788564682007,
2000
+ "learning_rate": 8.08676151612073e-05,
2001
+ "loss": 4.427,
2002
+ "step": 11600
2003
+ },
2004
+ {
2005
+ "epoch": 0.8960849165448812,
2006
+ "grad_norm": 2.615147352218628,
2007
+ "learning_rate": 8.06102716811882e-05,
2008
+ "loss": 4.3831,
2009
+ "step": 11650
2010
+ },
2011
+ {
2012
+ "epoch": 0.8999307745558034,
2013
+ "grad_norm": 8.039403915405273,
2014
+ "learning_rate": 8.035292820116906e-05,
2015
+ "loss": 4.3388,
2016
+ "step": 11700
2017
+ },
2018
+ {
2019
+ "epoch": 0.9037766325667257,
2020
+ "grad_norm": 2.6177854537963867,
2021
+ "learning_rate": 8.009558472114994e-05,
2022
+ "loss": 1.4931,
2023
+ "step": 11750
2024
+ },
2025
+ {
2026
+ "epoch": 0.9037766325667257,
2027
+ "eval_loss": 1.534182071685791,
2028
+ "eval_runtime": 18.0719,
2029
+ "eval_samples_per_second": 55.335,
2030
+ "eval_steps_per_second": 13.834,
2031
+ "step": 11750
2032
+ },
2033
+ {
2034
+ "epoch": 0.9076224905776479,
2035
+ "grad_norm": 1.4090014696121216,
2036
+ "learning_rate": 7.983824124113084e-05,
2037
+ "loss": 1.5524,
2038
+ "step": 11800
2039
+ },
2040
+ {
2041
+ "epoch": 0.9114683485885701,
2042
+ "grad_norm": 1.4773452281951904,
2043
+ "learning_rate": 7.958089776111171e-05,
2044
+ "loss": 1.4703,
2045
+ "step": 11850
2046
+ },
2047
+ {
2048
+ "epoch": 0.9153142065994924,
2049
+ "grad_norm": 1.7350648641586304,
2050
+ "learning_rate": 7.932355428109261e-05,
2051
+ "loss": 1.4752,
2052
+ "step": 11900
2053
+ },
2054
+ {
2055
+ "epoch": 0.9191600646104146,
2056
+ "grad_norm": 1.9704972505569458,
2057
+ "learning_rate": 7.906621080107349e-05,
2058
+ "loss": 1.5257,
2059
+ "step": 11950
2060
+ },
2061
+ {
2062
+ "epoch": 0.9230059226213368,
2063
+ "grad_norm": 1.6183151006698608,
2064
+ "learning_rate": 7.880886732105437e-05,
2065
+ "loss": 1.4704,
2066
+ "step": 12000
2067
+ },
2068
+ {
2069
+ "epoch": 0.9230059226213368,
2070
+ "eval_loss": 1.5159597396850586,
2071
+ "eval_runtime": 17.891,
2072
+ "eval_samples_per_second": 55.894,
2073
+ "eval_steps_per_second": 13.974,
2074
+ "step": 12000
2075
  }
2076
  ],
2077
  "logging_steps": 50,