Training in progress, step 3150, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1d63441ec3a969a74407420396fbf80d70a54603fc26523cf80a059be318bdc6
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8341a25de57f92cfd4f595bd362441a1aa28e0e40a28c5233a174e404e93cb3
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1dc11cf7bbf295ee9c52e4bc96c7945f90dee5f465d4b3d8a5908a292cedccce
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3561b109706461e913d6181bf1abc2a9b68bea4d15e3fa953484e4d068be280b
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2907,6 +2907,151 @@
|
|
| 2907 |
"EMA_steps_per_second": 24.244,
|
| 2908 |
"epoch": 130.43478260869566,
|
| 2909 |
"step": 3000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2910 |
}
|
| 2911 |
],
|
| 2912 |
"logging_steps": 10,
|
|
@@ -2926,7 +3071,7 @@
|
|
| 2926 |
"attributes": {}
|
| 2927 |
}
|
| 2928 |
},
|
| 2929 |
-
"total_flos":
|
| 2930 |
"train_batch_size": 4,
|
| 2931 |
"trial_name": null,
|
| 2932 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 136.95652173913044,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 3150,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2907 |
"EMA_steps_per_second": 24.244,
|
| 2908 |
"epoch": 130.43478260869566,
|
| 2909 |
"step": 3000
|
| 2910 |
+
},
|
| 2911 |
+
{
|
| 2912 |
+
"epoch": 130.8695652173913,
|
| 2913 |
+
"grad_norm": 2.050870418548584,
|
| 2914 |
+
"learning_rate": 3.9099160349471675e-06,
|
| 2915 |
+
"loss": 0.23,
|
| 2916 |
+
"step": 3010
|
| 2917 |
+
},
|
| 2918 |
+
{
|
| 2919 |
+
"epoch": 131.30434782608697,
|
| 2920 |
+
"grad_norm": 1.7972759008407593,
|
| 2921 |
+
"learning_rate": 3.90989937474532e-06,
|
| 2922 |
+
"loss": 0.2704,
|
| 2923 |
+
"step": 3020
|
| 2924 |
+
},
|
| 2925 |
+
{
|
| 2926 |
+
"epoch": 131.7391304347826,
|
| 2927 |
+
"grad_norm": 1.958837628364563,
|
| 2928 |
+
"learning_rate": 3.909881939701041e-06,
|
| 2929 |
+
"loss": 0.2614,
|
| 2930 |
+
"step": 3030
|
| 2931 |
+
},
|
| 2932 |
+
{
|
| 2933 |
+
"epoch": 132.17391304347825,
|
| 2934 |
+
"grad_norm": 1.825850486755371,
|
| 2935 |
+
"learning_rate": 3.909863729821243e-06,
|
| 2936 |
+
"loss": 0.2269,
|
| 2937 |
+
"step": 3040
|
| 2938 |
+
},
|
| 2939 |
+
{
|
| 2940 |
+
"epoch": 132.6086956521739,
|
| 2941 |
+
"grad_norm": 2.1669623851776123,
|
| 2942 |
+
"learning_rate": 3.9098447451131435e-06,
|
| 2943 |
+
"loss": 0.2528,
|
| 2944 |
+
"step": 3050
|
| 2945 |
+
},
|
| 2946 |
+
{
|
| 2947 |
+
"epoch": 133.04347826086956,
|
| 2948 |
+
"grad_norm": 2.685922622680664,
|
| 2949 |
+
"learning_rate": 3.909824985584268e-06,
|
| 2950 |
+
"loss": 0.215,
|
| 2951 |
+
"step": 3060
|
| 2952 |
+
},
|
| 2953 |
+
{
|
| 2954 |
+
"epoch": 133.47826086956522,
|
| 2955 |
+
"grad_norm": 1.285071611404419,
|
| 2956 |
+
"learning_rate": 3.9098044512424475e-06,
|
| 2957 |
+
"loss": 0.2484,
|
| 2958 |
+
"step": 3070
|
| 2959 |
+
},
|
| 2960 |
+
{
|
| 2961 |
+
"epoch": 133.91304347826087,
|
| 2962 |
+
"grad_norm": 2.4123470783233643,
|
| 2963 |
+
"learning_rate": 3.909783142095821e-06,
|
| 2964 |
+
"loss": 0.2733,
|
| 2965 |
+
"step": 3080
|
| 2966 |
+
},
|
| 2967 |
+
{
|
| 2968 |
+
"epoch": 134.34782608695653,
|
| 2969 |
+
"grad_norm": 1.9801201820373535,
|
| 2970 |
+
"learning_rate": 3.909761058152836e-06,
|
| 2971 |
+
"loss": 0.2539,
|
| 2972 |
+
"step": 3090
|
| 2973 |
+
},
|
| 2974 |
+
{
|
| 2975 |
+
"epoch": 134.7826086956522,
|
| 2976 |
+
"grad_norm": 1.934043049812317,
|
| 2977 |
+
"learning_rate": 3.9097381994222444e-06,
|
| 2978 |
+
"loss": 0.206,
|
| 2979 |
+
"step": 3100
|
| 2980 |
+
},
|
| 2981 |
+
{
|
| 2982 |
+
"epoch": 135.2173913043478,
|
| 2983 |
+
"grad_norm": 2.4174482822418213,
|
| 2984 |
+
"learning_rate": 3.9097145659131085e-06,
|
| 2985 |
+
"loss": 0.244,
|
| 2986 |
+
"step": 3110
|
| 2987 |
+
},
|
| 2988 |
+
{
|
| 2989 |
+
"epoch": 135.65217391304347,
|
| 2990 |
+
"grad_norm": 1.85491943359375,
|
| 2991 |
+
"learning_rate": 3.909690157634794e-06,
|
| 2992 |
+
"loss": 0.2852,
|
| 2993 |
+
"step": 3120
|
| 2994 |
+
},
|
| 2995 |
+
{
|
| 2996 |
+
"epoch": 136.08695652173913,
|
| 2997 |
+
"grad_norm": 2.3516900539398193,
|
| 2998 |
+
"learning_rate": 3.909664974596977e-06,
|
| 2999 |
+
"loss": 0.2128,
|
| 3000 |
+
"step": 3130
|
| 3001 |
+
},
|
| 3002 |
+
{
|
| 3003 |
+
"epoch": 136.52173913043478,
|
| 3004 |
+
"grad_norm": 2.355637788772583,
|
| 3005 |
+
"learning_rate": 3.909639016809639e-06,
|
| 3006 |
+
"loss": 0.2381,
|
| 3007 |
+
"step": 3140
|
| 3008 |
+
},
|
| 3009 |
+
{
|
| 3010 |
+
"epoch": 136.95652173913044,
|
| 3011 |
+
"grad_norm": 2.8338263034820557,
|
| 3012 |
+
"learning_rate": 3.909612284283068e-06,
|
| 3013 |
+
"loss": 0.2338,
|
| 3014 |
+
"step": 3150
|
| 3015 |
+
},
|
| 3016 |
+
{
|
| 3017 |
+
"epoch": 136.95652173913044,
|
| 3018 |
+
"eval_loss": 0.9423562288284302,
|
| 3019 |
+
"eval_runtime": 0.4463,
|
| 3020 |
+
"eval_samples_per_second": 22.407,
|
| 3021 |
+
"eval_steps_per_second": 22.407,
|
| 3022 |
+
"step": 3150
|
| 3023 |
+
},
|
| 3024 |
+
{
|
| 3025 |
+
"Start_State_loss": 0.861186683177948,
|
| 3026 |
+
"Start_State_runtime": 0.394,
|
| 3027 |
+
"Start_State_samples_per_second": 25.38,
|
| 3028 |
+
"Start_State_steps_per_second": 25.38,
|
| 3029 |
+
"epoch": 136.95652173913044,
|
| 3030 |
+
"step": 3150
|
| 3031 |
+
},
|
| 3032 |
+
{
|
| 3033 |
+
"Raw_Model_loss": 0.9423562288284302,
|
| 3034 |
+
"Raw_Model_runtime": 0.4021,
|
| 3035 |
+
"Raw_Model_samples_per_second": 24.868,
|
| 3036 |
+
"Raw_Model_steps_per_second": 24.868,
|
| 3037 |
+
"epoch": 136.95652173913044,
|
| 3038 |
+
"step": 3150
|
| 3039 |
+
},
|
| 3040 |
+
{
|
| 3041 |
+
"SWA_loss": 0.7764584422111511,
|
| 3042 |
+
"SWA_runtime": 0.4066,
|
| 3043 |
+
"SWA_samples_per_second": 24.596,
|
| 3044 |
+
"SWA_steps_per_second": 24.596,
|
| 3045 |
+
"epoch": 136.95652173913044,
|
| 3046 |
+
"step": 3150
|
| 3047 |
+
},
|
| 3048 |
+
{
|
| 3049 |
+
"EMA_loss": 0.861250102519989,
|
| 3050 |
+
"EMA_runtime": 0.391,
|
| 3051 |
+
"EMA_samples_per_second": 25.577,
|
| 3052 |
+
"EMA_steps_per_second": 25.577,
|
| 3053 |
+
"epoch": 136.95652173913044,
|
| 3054 |
+
"step": 3150
|
| 3055 |
}
|
| 3056 |
],
|
| 3057 |
"logging_steps": 10,
|
|
|
|
| 3071 |
"attributes": {}
|
| 3072 |
}
|
| 3073 |
},
|
| 3074 |
+
"total_flos": 8.121206262826598e+16,
|
| 3075 |
"train_batch_size": 4,
|
| 3076 |
"trial_name": null,
|
| 3077 |
"trial_params": null
|