TweedleDeepLearnings commited on
Commit
a9fd63c
·
verified ·
1 Parent(s): 48853db

Training in progress, step 4050, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f8fa8f720af10124343d16ee209390207e597ea5c09bd0bc6be42bf045fc2a8
3
  size 1502116544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2cb27e04d92bb595af7bcd531079cb1a7260601f737a8d637a4c37175b81770
3
  size 1502116544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec25ee5915e8a4add2599f15d550cbf7ca81fdfa42a3b57acd6c3bd3832debd9
3
  size 2924673466
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b40cce3a5cd81047edcec8097e717345eaf07d2d73f6cd4162aba9976f43dc2c
3
  size 2924673466
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a82af09a987f26290b9f3f00d4e9552d816c6951e034630adf85af19b034a40f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4ef1bf71bf6833b710c538ae8c380dfe197fac7f08964a7d0acdc6e98e34ee2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fcddfb1f106a755911246d49690b13d594cc29b7e565480a222840e2793c588
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45f92a97ffdbc7f88199b20e87167f2ab2e0d78a2ac0becd89030b1e9e2faac0
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.9355312585830688,
3
  "best_model_checkpoint": "./output/checkpoint-3150",
4
- "epoch": 2.4208566108007448,
5
  "eval_steps": 150,
6
- "global_step": 3900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2945,6 +2945,119 @@
2945
  "eval_samples_per_second": 9.266,
2946
  "eval_steps_per_second": 9.266,
2947
  "step": 3900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2948
  }
2949
  ],
2950
  "logging_steps": 10,
@@ -2964,7 +3077,7 @@
2964
  "attributes": {}
2965
  }
2966
  },
2967
- "total_flos": 6.49117140564609e+17,
2968
  "train_batch_size": 4,
2969
  "trial_name": null,
2970
  "trial_params": null
 
1
  {
2
  "best_metric": 0.9355312585830688,
3
  "best_model_checkpoint": "./output/checkpoint-3150",
4
+ "epoch": 2.5139664804469275,
5
  "eval_steps": 150,
6
+ "global_step": 4050,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2945
  "eval_samples_per_second": 9.266,
2946
  "eval_steps_per_second": 9.266,
2947
  "step": 3900
2948
+ },
2949
+ {
2950
+ "epoch": 2.4270639354438237,
2951
+ "grad_norm": 1.6727492809295654,
2952
+ "learning_rate": 9.376546391952211e-06,
2953
+ "loss": 0.4695,
2954
+ "step": 3910
2955
+ },
2956
+ {
2957
+ "epoch": 2.4332712600869026,
2958
+ "grad_norm": 1.9848363399505615,
2959
+ "learning_rate": 9.212189928903758e-06,
2960
+ "loss": 0.5046,
2961
+ "step": 3920
2962
+ },
2963
+ {
2964
+ "epoch": 2.439478584729981,
2965
+ "grad_norm": 1.9910500049591064,
2966
+ "learning_rate": 9.049099032139725e-06,
2967
+ "loss": 0.4243,
2968
+ "step": 3930
2969
+ },
2970
+ {
2971
+ "epoch": 2.44568590937306,
2972
+ "grad_norm": 1.6215895414352417,
2973
+ "learning_rate": 8.887280405688106e-06,
2974
+ "loss": 0.4843,
2975
+ "step": 3940
2976
+ },
2977
+ {
2978
+ "epoch": 2.451893234016139,
2979
+ "grad_norm": 1.9749666452407837,
2980
+ "learning_rate": 8.72674070127881e-06,
2981
+ "loss": 0.4632,
2982
+ "step": 3950
2983
+ },
2984
+ {
2985
+ "epoch": 2.458100558659218,
2986
+ "grad_norm": 2.1119041442871094,
2987
+ "learning_rate": 8.567486518070306e-06,
2988
+ "loss": 0.4471,
2989
+ "step": 3960
2990
+ },
2991
+ {
2992
+ "epoch": 2.464307883302297,
2993
+ "grad_norm": 1.5868020057678223,
2994
+ "learning_rate": 8.409524402378308e-06,
2995
+ "loss": 0.4282,
2996
+ "step": 3970
2997
+ },
2998
+ {
2999
+ "epoch": 2.4705152079453754,
3000
+ "grad_norm": 2.0005483627319336,
3001
+ "learning_rate": 8.252860847406712e-06,
3002
+ "loss": 0.3916,
3003
+ "step": 3980
3004
+ },
3005
+ {
3006
+ "epoch": 2.4767225325884543,
3007
+ "grad_norm": 1.7088433504104614,
3008
+ "learning_rate": 8.097502292980626e-06,
3009
+ "loss": 0.4363,
3010
+ "step": 3990
3011
+ },
3012
+ {
3013
+ "epoch": 2.4829298572315333,
3014
+ "grad_norm": 1.8316535949707031,
3015
+ "learning_rate": 7.943455125281741e-06,
3016
+ "loss": 0.4325,
3017
+ "step": 4000
3018
+ },
3019
+ {
3020
+ "epoch": 2.489137181874612,
3021
+ "grad_norm": 1.8140100240707397,
3022
+ "learning_rate": 7.790725676585756e-06,
3023
+ "loss": 0.4846,
3024
+ "step": 4010
3025
+ },
3026
+ {
3027
+ "epoch": 2.4953445065176907,
3028
+ "grad_norm": 2.005836248397827,
3029
+ "learning_rate": 7.639320225002106e-06,
3030
+ "loss": 0.4892,
3031
+ "step": 4020
3032
+ },
3033
+ {
3034
+ "epoch": 2.5015518311607696,
3035
+ "grad_norm": 2.0285496711730957,
3036
+ "learning_rate": 7.489244994215897e-06,
3037
+ "loss": 0.4536,
3038
+ "step": 4030
3039
+ },
3040
+ {
3041
+ "epoch": 2.5077591558038486,
3042
+ "grad_norm": 1.8983845710754395,
3043
+ "learning_rate": 7.340506153232052e-06,
3044
+ "loss": 0.4346,
3045
+ "step": 4040
3046
+ },
3047
+ {
3048
+ "epoch": 2.5139664804469275,
3049
+ "grad_norm": 1.8659793138504028,
3050
+ "learning_rate": 7.193109816121762e-06,
3051
+ "loss": 0.4594,
3052
+ "step": 4050
3053
+ },
3054
+ {
3055
+ "epoch": 2.5139664804469275,
3056
+ "eval_loss": 0.9785549640655518,
3057
+ "eval_runtime": 54.6526,
3058
+ "eval_samples_per_second": 9.167,
3059
+ "eval_steps_per_second": 9.167,
3060
+ "step": 4050
3061
  }
3062
  ],
3063
  "logging_steps": 10,
 
3077
  "attributes": {}
3078
  }
3079
  },
3080
+ "total_flos": 6.741170098839982e+17,
3081
  "train_batch_size": 4,
3082
  "trial_name": null,
3083
  "trial_params": null