Training in progress, step 4050, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1502116544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f2cb27e04d92bb595af7bcd531079cb1a7260601f737a8d637a4c37175b81770
|
| 3 |
size 1502116544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2924673466
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b40cce3a5cd81047edcec8097e717345eaf07d2d73f6cd4162aba9976f43dc2c
|
| 3 |
size 2924673466
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4ef1bf71bf6833b710c538ae8c380dfe197fac7f08964a7d0acdc6e98e34ee2
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45f92a97ffdbc7f88199b20e87167f2ab2e0d78a2ac0becd89030b1e9e2faac0
|
| 3 |
size 1256
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9355312585830688,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-3150",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2945,6 +2945,119 @@
|
|
| 2945 |
"eval_samples_per_second": 9.266,
|
| 2946 |
"eval_steps_per_second": 9.266,
|
| 2947 |
"step": 3900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2948 |
}
|
| 2949 |
],
|
| 2950 |
"logging_steps": 10,
|
|
@@ -2964,7 +3077,7 @@
|
|
| 2964 |
"attributes": {}
|
| 2965 |
}
|
| 2966 |
},
|
| 2967 |
-
"total_flos": 6.
|
| 2968 |
"train_batch_size": 4,
|
| 2969 |
"trial_name": null,
|
| 2970 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.9355312585830688,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-3150",
|
| 4 |
+
"epoch": 2.5139664804469275,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 4050,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2945 |
"eval_samples_per_second": 9.266,
|
| 2946 |
"eval_steps_per_second": 9.266,
|
| 2947 |
"step": 3900
|
| 2948 |
+
},
|
| 2949 |
+
{
|
| 2950 |
+
"epoch": 2.4270639354438237,
|
| 2951 |
+
"grad_norm": 1.6727492809295654,
|
| 2952 |
+
"learning_rate": 9.376546391952211e-06,
|
| 2953 |
+
"loss": 0.4695,
|
| 2954 |
+
"step": 3910
|
| 2955 |
+
},
|
| 2956 |
+
{
|
| 2957 |
+
"epoch": 2.4332712600869026,
|
| 2958 |
+
"grad_norm": 1.9848363399505615,
|
| 2959 |
+
"learning_rate": 9.212189928903758e-06,
|
| 2960 |
+
"loss": 0.5046,
|
| 2961 |
+
"step": 3920
|
| 2962 |
+
},
|
| 2963 |
+
{
|
| 2964 |
+
"epoch": 2.439478584729981,
|
| 2965 |
+
"grad_norm": 1.9910500049591064,
|
| 2966 |
+
"learning_rate": 9.049099032139725e-06,
|
| 2967 |
+
"loss": 0.4243,
|
| 2968 |
+
"step": 3930
|
| 2969 |
+
},
|
| 2970 |
+
{
|
| 2971 |
+
"epoch": 2.44568590937306,
|
| 2972 |
+
"grad_norm": 1.6215895414352417,
|
| 2973 |
+
"learning_rate": 8.887280405688106e-06,
|
| 2974 |
+
"loss": 0.4843,
|
| 2975 |
+
"step": 3940
|
| 2976 |
+
},
|
| 2977 |
+
{
|
| 2978 |
+
"epoch": 2.451893234016139,
|
| 2979 |
+
"grad_norm": 1.9749666452407837,
|
| 2980 |
+
"learning_rate": 8.72674070127881e-06,
|
| 2981 |
+
"loss": 0.4632,
|
| 2982 |
+
"step": 3950
|
| 2983 |
+
},
|
| 2984 |
+
{
|
| 2985 |
+
"epoch": 2.458100558659218,
|
| 2986 |
+
"grad_norm": 2.1119041442871094,
|
| 2987 |
+
"learning_rate": 8.567486518070306e-06,
|
| 2988 |
+
"loss": 0.4471,
|
| 2989 |
+
"step": 3960
|
| 2990 |
+
},
|
| 2991 |
+
{
|
| 2992 |
+
"epoch": 2.464307883302297,
|
| 2993 |
+
"grad_norm": 1.5868020057678223,
|
| 2994 |
+
"learning_rate": 8.409524402378308e-06,
|
| 2995 |
+
"loss": 0.4282,
|
| 2996 |
+
"step": 3970
|
| 2997 |
+
},
|
| 2998 |
+
{
|
| 2999 |
+
"epoch": 2.4705152079453754,
|
| 3000 |
+
"grad_norm": 2.0005483627319336,
|
| 3001 |
+
"learning_rate": 8.252860847406712e-06,
|
| 3002 |
+
"loss": 0.3916,
|
| 3003 |
+
"step": 3980
|
| 3004 |
+
},
|
| 3005 |
+
{
|
| 3006 |
+
"epoch": 2.4767225325884543,
|
| 3007 |
+
"grad_norm": 1.7088433504104614,
|
| 3008 |
+
"learning_rate": 8.097502292980626e-06,
|
| 3009 |
+
"loss": 0.4363,
|
| 3010 |
+
"step": 3990
|
| 3011 |
+
},
|
| 3012 |
+
{
|
| 3013 |
+
"epoch": 2.4829298572315333,
|
| 3014 |
+
"grad_norm": 1.8316535949707031,
|
| 3015 |
+
"learning_rate": 7.943455125281741e-06,
|
| 3016 |
+
"loss": 0.4325,
|
| 3017 |
+
"step": 4000
|
| 3018 |
+
},
|
| 3019 |
+
{
|
| 3020 |
+
"epoch": 2.489137181874612,
|
| 3021 |
+
"grad_norm": 1.8140100240707397,
|
| 3022 |
+
"learning_rate": 7.790725676585756e-06,
|
| 3023 |
+
"loss": 0.4846,
|
| 3024 |
+
"step": 4010
|
| 3025 |
+
},
|
| 3026 |
+
{
|
| 3027 |
+
"epoch": 2.4953445065176907,
|
| 3028 |
+
"grad_norm": 2.005836248397827,
|
| 3029 |
+
"learning_rate": 7.639320225002106e-06,
|
| 3030 |
+
"loss": 0.4892,
|
| 3031 |
+
"step": 4020
|
| 3032 |
+
},
|
| 3033 |
+
{
|
| 3034 |
+
"epoch": 2.5015518311607696,
|
| 3035 |
+
"grad_norm": 2.0285496711730957,
|
| 3036 |
+
"learning_rate": 7.489244994215897e-06,
|
| 3037 |
+
"loss": 0.4536,
|
| 3038 |
+
"step": 4030
|
| 3039 |
+
},
|
| 3040 |
+
{
|
| 3041 |
+
"epoch": 2.5077591558038486,
|
| 3042 |
+
"grad_norm": 1.8983845710754395,
|
| 3043 |
+
"learning_rate": 7.340506153232052e-06,
|
| 3044 |
+
"loss": 0.4346,
|
| 3045 |
+
"step": 4040
|
| 3046 |
+
},
|
| 3047 |
+
{
|
| 3048 |
+
"epoch": 2.5139664804469275,
|
| 3049 |
+
"grad_norm": 1.8659793138504028,
|
| 3050 |
+
"learning_rate": 7.193109816121762e-06,
|
| 3051 |
+
"loss": 0.4594,
|
| 3052 |
+
"step": 4050
|
| 3053 |
+
},
|
| 3054 |
+
{
|
| 3055 |
+
"epoch": 2.5139664804469275,
|
| 3056 |
+
"eval_loss": 0.9785549640655518,
|
| 3057 |
+
"eval_runtime": 54.6526,
|
| 3058 |
+
"eval_samples_per_second": 9.167,
|
| 3059 |
+
"eval_steps_per_second": 9.167,
|
| 3060 |
+
"step": 4050
|
| 3061 |
}
|
| 3062 |
],
|
| 3063 |
"logging_steps": 10,
|
|
|
|
| 3077 |
"attributes": {}
|
| 3078 |
}
|
| 3079 |
},
|
| 3080 |
+
"total_flos": 6.741170098839982e+17,
|
| 3081 |
"train_batch_size": 4,
|
| 3082 |
"trial_name": null,
|
| 3083 |
"trial_params": null
|