rootxhacker commited on
Commit
107bd75
·
verified ·
1 Parent(s): b518f3e

Training in progress, step 17000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47aa7b6dff74362f082b035925e2a5f7cb54a6d412fcc30408f989b971b037c6
3
  size 36730224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441effd53b237a6d55d25ad93a62aed9e9a9b704129f5cc8576e5b658221b597
3
  size 36730224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac0c8a0f8ae72f5f1c6ba696fc1b1a47bee4f015c1aa8caebecb7d165365a472
3
  size 73588346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d99045a76b65192b2e41aefcf5abebda18e67ecc70d381cfe9c7b19f1066bfd
3
  size 73588346
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d28d5f3a49ada942ec355f276b33d6e4bff345075b872b6e2b651f5666a06f0a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9d5359cc143881ecdb88768e105d086fd2336ed10ecf8c85dd03dc0e505da1
3
  size 14244
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ac02e2eb311b0a2b3525f61fc41905e20d307e2a487c095f2b84052384c6ffa
3
  size 988
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5daa13f981d2fdcf2543f987afffb899e74fcea4fd93baa2e74fd8a3169ecb99
3
  size 988
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb9a109782b96ee7b703894b485d97bae6299b8517060ebdc76bb5c7072fef6b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fd0711c5430aa1d8a873bb88a5b6bcc960ba760770a1807e688afccd4facc3e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 16250,
3
- "best_metric": 1.4850120544433594,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
- "epoch": 1.2691331436043383,
6
  "eval_steps": 250,
7
- "global_step": 16500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -2846,6 +2846,92 @@
2846
  "eval_samples_per_second": 55.479,
2847
  "eval_steps_per_second": 13.87,
2848
  "step": 16500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2849
  }
2850
  ],
2851
  "logging_steps": 50,
 
1
  {
2
+ "best_global_step": 16750,
3
+ "best_metric": 1.4708431959152222,
4
  "best_model_checkpoint": "./ar-diffusion-checkpoints/checkpoint-15500",
5
+ "epoch": 1.3075917237135606,
6
  "eval_steps": 250,
7
+ "global_step": 17000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
2846
  "eval_samples_per_second": 55.479,
2847
  "eval_steps_per_second": 13.87,
2848
  "step": 16500
2849
+ },
2850
+ {
2851
+ "epoch": 1.2729790016152602,
2852
+ "grad_norm": 1.4080452919006348,
2853
+ "learning_rate": 5.5390610639314724e-05,
2854
+ "loss": 1.4667,
2855
+ "step": 16550
2856
+ },
2857
+ {
2858
+ "epoch": 1.2768248596261826,
2859
+ "grad_norm": 1.6634443998336792,
2860
+ "learning_rate": 5.513326715929561e-05,
2861
+ "loss": 1.4619,
2862
+ "step": 16600
2863
+ },
2864
+ {
2865
+ "epoch": 1.2806707176371048,
2866
+ "grad_norm": 2.0469400882720947,
2867
+ "learning_rate": 5.487592367927649e-05,
2868
+ "loss": 1.4105,
2869
+ "step": 16650
2870
+ },
2871
+ {
2872
+ "epoch": 1.284516575648027,
2873
+ "grad_norm": 1.5735753774642944,
2874
+ "learning_rate": 5.461858019925738e-05,
2875
+ "loss": 1.4002,
2876
+ "step": 16700
2877
+ },
2878
+ {
2879
+ "epoch": 1.2883624336589494,
2880
+ "grad_norm": 1.43183434009552,
2881
+ "learning_rate": 5.436123671923826e-05,
2882
+ "loss": 1.4586,
2883
+ "step": 16750
2884
+ },
2885
+ {
2886
+ "epoch": 1.2883624336589494,
2887
+ "eval_loss": 1.4708431959152222,
2888
+ "eval_runtime": 18.2152,
2889
+ "eval_samples_per_second": 54.899,
2890
+ "eval_steps_per_second": 13.725,
2891
+ "step": 16750
2892
+ },
2893
+ {
2894
+ "epoch": 1.2922082916698716,
2895
+ "grad_norm": 1.6342015266418457,
2896
+ "learning_rate": 5.4103893239219136e-05,
2897
+ "loss": 1.4113,
2898
+ "step": 16800
2899
+ },
2900
+ {
2901
+ "epoch": 1.2960541496807938,
2902
+ "grad_norm": 3.80155873298645,
2903
+ "learning_rate": 5.384654975920002e-05,
2904
+ "loss": 1.4793,
2905
+ "step": 16850
2906
+ },
2907
+ {
2908
+ "epoch": 1.299900007691716,
2909
+ "grad_norm": 1.4240097999572754,
2910
+ "learning_rate": 5.358920627918091e-05,
2911
+ "loss": 1.4072,
2912
+ "step": 16900
2913
+ },
2914
+ {
2915
+ "epoch": 1.3037458657026382,
2916
+ "grad_norm": 1.4548074007034302,
2917
+ "learning_rate": 5.333186279916179e-05,
2918
+ "loss": 1.4275,
2919
+ "step": 16950
2920
+ },
2921
+ {
2922
+ "epoch": 1.3075917237135606,
2923
+ "grad_norm": 1.7287901639938354,
2924
+ "learning_rate": 5.3074519319142675e-05,
2925
+ "loss": 1.4741,
2926
+ "step": 17000
2927
+ },
2928
+ {
2929
+ "epoch": 1.3075917237135606,
2930
+ "eval_loss": 1.4836150407791138,
2931
+ "eval_runtime": 18.0219,
2932
+ "eval_samples_per_second": 55.488,
2933
+ "eval_steps_per_second": 13.872,
2934
+ "step": 17000
2935
  }
2936
  ],
2937
  "logging_steps": 50,