irishprancer commited on
Commit
6587bb3
·
verified ·
1 Parent(s): 1cc1913

Training in progress, step 3150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5294f50c0baae12880a019af7aff602de0021ca7f152917dde18bdf22ba93937
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d63441ec3a969a74407420396fbf80d70a54603fc26523cf80a059be318bdc6
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b7e62ea9f776c4c1909b99bd4b2be719e11c652940cf5f69aa1e349ee14b1fc
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8341a25de57f92cfd4f595bd362441a1aa28e0e40a28c5233a174e404e93cb3
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84ddf8abb9ec9ea656e462efff79374386d297b7f7fbb9fe2e12d28f7e1de152
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dc11cf7bbf295ee9c52e4bc96c7945f90dee5f465d4b3d8a5908a292cedccce
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8b1ff2305e39132563133ecd5a3bc22eb8aea6f062c680ce8b5a9a3f47c0580
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3561b109706461e913d6181bf1abc2a9b68bea4d15e3fa953484e4d068be280b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 130.43478260869566,
5
  "eval_steps": 150,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2907,6 +2907,151 @@
2907
  "EMA_steps_per_second": 24.244,
2908
  "epoch": 130.43478260869566,
2909
  "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2910
  }
2911
  ],
2912
  "logging_steps": 10,
@@ -2926,7 +3071,7 @@
2926
  "attributes": {}
2927
  }
2928
  },
2929
- "total_flos": 7.725240940312166e+16,
2930
  "train_batch_size": 4,
2931
  "trial_name": null,
2932
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 136.95652173913044,
5
  "eval_steps": 150,
6
+ "global_step": 3150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2907
  "EMA_steps_per_second": 24.244,
2908
  "epoch": 130.43478260869566,
2909
  "step": 3000
2910
+ },
2911
+ {
2912
+ "epoch": 130.8695652173913,
2913
+ "grad_norm": 2.050870418548584,
2914
+ "learning_rate": 3.9099160349471675e-06,
2915
+ "loss": 0.23,
2916
+ "step": 3010
2917
+ },
2918
+ {
2919
+ "epoch": 131.30434782608697,
2920
+ "grad_norm": 1.7972759008407593,
2921
+ "learning_rate": 3.90989937474532e-06,
2922
+ "loss": 0.2704,
2923
+ "step": 3020
2924
+ },
2925
+ {
2926
+ "epoch": 131.7391304347826,
2927
+ "grad_norm": 1.958837628364563,
2928
+ "learning_rate": 3.909881939701041e-06,
2929
+ "loss": 0.2614,
2930
+ "step": 3030
2931
+ },
2932
+ {
2933
+ "epoch": 132.17391304347825,
2934
+ "grad_norm": 1.825850486755371,
2935
+ "learning_rate": 3.909863729821243e-06,
2936
+ "loss": 0.2269,
2937
+ "step": 3040
2938
+ },
2939
+ {
2940
+ "epoch": 132.6086956521739,
2941
+ "grad_norm": 2.1669623851776123,
2942
+ "learning_rate": 3.9098447451131435e-06,
2943
+ "loss": 0.2528,
2944
+ "step": 3050
2945
+ },
2946
+ {
2947
+ "epoch": 133.04347826086956,
2948
+ "grad_norm": 2.685922622680664,
2949
+ "learning_rate": 3.909824985584268e-06,
2950
+ "loss": 0.215,
2951
+ "step": 3060
2952
+ },
2953
+ {
2954
+ "epoch": 133.47826086956522,
2955
+ "grad_norm": 1.285071611404419,
2956
+ "learning_rate": 3.9098044512424475e-06,
2957
+ "loss": 0.2484,
2958
+ "step": 3070
2959
+ },
2960
+ {
2961
+ "epoch": 133.91304347826087,
2962
+ "grad_norm": 2.4123470783233643,
2963
+ "learning_rate": 3.909783142095821e-06,
2964
+ "loss": 0.2733,
2965
+ "step": 3080
2966
+ },
2967
+ {
2968
+ "epoch": 134.34782608695653,
2969
+ "grad_norm": 1.9801201820373535,
2970
+ "learning_rate": 3.909761058152836e-06,
2971
+ "loss": 0.2539,
2972
+ "step": 3090
2973
+ },
2974
+ {
2975
+ "epoch": 134.7826086956522,
2976
+ "grad_norm": 1.934043049812317,
2977
+ "learning_rate": 3.9097381994222444e-06,
2978
+ "loss": 0.206,
2979
+ "step": 3100
2980
+ },
2981
+ {
2982
+ "epoch": 135.2173913043478,
2983
+ "grad_norm": 2.4174482822418213,
2984
+ "learning_rate": 3.9097145659131085e-06,
2985
+ "loss": 0.244,
2986
+ "step": 3110
2987
+ },
2988
+ {
2989
+ "epoch": 135.65217391304347,
2990
+ "grad_norm": 1.85491943359375,
2991
+ "learning_rate": 3.909690157634794e-06,
2992
+ "loss": 0.2852,
2993
+ "step": 3120
2994
+ },
2995
+ {
2996
+ "epoch": 136.08695652173913,
2997
+ "grad_norm": 2.3516900539398193,
2998
+ "learning_rate": 3.909664974596977e-06,
2999
+ "loss": 0.2128,
3000
+ "step": 3130
3001
+ },
3002
+ {
3003
+ "epoch": 136.52173913043478,
3004
+ "grad_norm": 2.355637788772583,
3005
+ "learning_rate": 3.909639016809639e-06,
3006
+ "loss": 0.2381,
3007
+ "step": 3140
3008
+ },
3009
+ {
3010
+ "epoch": 136.95652173913044,
3011
+ "grad_norm": 2.8338263034820557,
3012
+ "learning_rate": 3.909612284283068e-06,
3013
+ "loss": 0.2338,
3014
+ "step": 3150
3015
+ },
3016
+ {
3017
+ "epoch": 136.95652173913044,
3018
+ "eval_loss": 0.9423562288284302,
3019
+ "eval_runtime": 0.4463,
3020
+ "eval_samples_per_second": 22.407,
3021
+ "eval_steps_per_second": 22.407,
3022
+ "step": 3150
3023
+ },
3024
+ {
3025
+ "Start_State_loss": 0.861186683177948,
3026
+ "Start_State_runtime": 0.394,
3027
+ "Start_State_samples_per_second": 25.38,
3028
+ "Start_State_steps_per_second": 25.38,
3029
+ "epoch": 136.95652173913044,
3030
+ "step": 3150
3031
+ },
3032
+ {
3033
+ "Raw_Model_loss": 0.9423562288284302,
3034
+ "Raw_Model_runtime": 0.4021,
3035
+ "Raw_Model_samples_per_second": 24.868,
3036
+ "Raw_Model_steps_per_second": 24.868,
3037
+ "epoch": 136.95652173913044,
3038
+ "step": 3150
3039
+ },
3040
+ {
3041
+ "SWA_loss": 0.7764584422111511,
3042
+ "SWA_runtime": 0.4066,
3043
+ "SWA_samples_per_second": 24.596,
3044
+ "SWA_steps_per_second": 24.596,
3045
+ "epoch": 136.95652173913044,
3046
+ "step": 3150
3047
+ },
3048
+ {
3049
+ "EMA_loss": 0.861250102519989,
3050
+ "EMA_runtime": 0.391,
3051
+ "EMA_samples_per_second": 25.577,
3052
+ "EMA_steps_per_second": 25.577,
3053
+ "epoch": 136.95652173913044,
3054
+ "step": 3150
3055
  }
3056
  ],
3057
  "logging_steps": 10,
 
3071
  "attributes": {}
3072
  }
3073
  },
3074
+ "total_flos": 8.121206262826598e+16,
3075
  "train_batch_size": 4,
3076
  "trial_name": null,
3077
  "trial_params": null