FormlessAI commited on
Commit
98f5533
·
verified ·
1 Parent(s): 0e9e06b

Training in progress, epoch 1, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d377acbba120fee452cb1d03550d9665582a6be0583ccf01beca6d60f3068954
3
  size 98088784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28f960cc3fc3041049728628ca0f3995042d6d6b849410ccc76854759f1de38e
3
  size 98088784
last-checkpoint/global_step2000/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc4ab868d13d7e25e7c50daae00bf5536887bcf2c1d19ba949bc264e7dfc4117
3
+ size 73939813
last-checkpoint/global_step2000/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c0093cece272c92cef31aaad91a148bae7541a7cb779fe0bdf9242d1b424091
3
+ size 73939813
last-checkpoint/global_step2000/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95252e914f5d5433a20f25169ef73e387bb7472971595d4f55445c445c3c5a4e
3
+ size 73939877
last-checkpoint/global_step2000/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:951ff763ca5b28ce437168afed25c674ab36435f89a9d54cb252a215250b8104
3
+ size 73939877
last-checkpoint/global_step2000/mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:013abcfc6d5041aee8df7400c9f24dae1570abbaa9e21064ff32f5fc208b40ad
3
+ size 564993061
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1950
 
1
+ global_step2000
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2747623e531b52c955116a1758c4c3b7702bc046434dcac538cbc3384623204e
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a14f6a698f92007bbc2dcb792b9b6ca2830509bab22cc9bd34d4e6a2c1d3b8a
3
  size 15429
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba8e2d73257c90bb7fccef88f8b355c2b6047cf464eab08a043eb68c59a585f
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fdbb189d7242681559cf6c379ca732f316bca39a80295113d8476efdbf2845a
3
  size 15429
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a8f10702d3ccb9b37c2be68aaec1a7be7d307b01ae96b4338b884b09aeab75b
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db64115c44140dcc749fb50ed04400a835cc578facdcfdf170d2c657b74fd53
3
  size 15429
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cf8f798773612226d5bcd3505626cca08d37d8aefa69b231e6a2870c09d7106
3
  size 15429
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622aa7be447c67092898a74b40ee6e6a8c1fee0783bf1f9c83af0dde5fce5c73
3
  size 15429
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7867b13d1149754c953736bbad37f67bb9cd0c04cec7aac6bcf4539459e754ab
3
  size 1401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8544c996407cdf577fc237cd843a7fc6fa9441fad75c673215c6414edef0e8c3
3
  size 1401
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "best_global_step": null,
3
- "best_metric": 0.6521090865135193,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9112063686466625,
6
  "eval_steps": 50,
7
- "global_step": 1950,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -3050,6 +3050,84 @@
3050
  "eval_samples_per_second": 126.427,
3051
  "eval_steps_per_second": 15.811,
3052
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3053
  }
3054
  ],
3055
  "logging_steps": 5,
@@ -3078,7 +3156,7 @@
3078
  "attributes": {}
3079
  }
3080
  },
3081
- "total_flos": 1.0062774197886648e+18,
3082
  "train_batch_size": 2,
3083
  "trial_name": null,
3084
  "trial_params": null
 
1
  {
2
  "best_global_step": null,
3
+ "best_metric": 0.6503395438194275,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.9601959583588489,
6
  "eval_steps": 50,
7
+ "global_step": 2000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
3050
  "eval_samples_per_second": 126.427,
3051
  "eval_steps_per_second": 15.811,
3052
  "step": 1950
3053
+ },
3054
+ {
3055
+ "epoch": 1.9161053276178812,
3056
+ "grad_norm": 0.22474640607833862,
3057
+ "learning_rate": 7.605616150846442e-05,
3058
+ "loss": 0.6932,
3059
+ "step": 1955
3060
+ },
3061
+ {
3062
+ "epoch": 1.9210042865891,
3063
+ "grad_norm": 0.2242085039615631,
3064
+ "learning_rate": 7.58912724661567e-05,
3065
+ "loss": 0.708,
3066
+ "step": 1960
3067
+ },
3068
+ {
3069
+ "epoch": 1.9259032455603184,
3070
+ "grad_norm": 0.21359029412269592,
3071
+ "learning_rate": 7.572617283033086e-05,
3072
+ "loss": 0.6908,
3073
+ "step": 1965
3074
+ },
3075
+ {
3076
+ "epoch": 1.930802204531537,
3077
+ "grad_norm": 0.1957738995552063,
3078
+ "learning_rate": 7.556086429764114e-05,
3079
+ "loss": 0.6746,
3080
+ "step": 1970
3081
+ },
3082
+ {
3083
+ "epoch": 1.9357011635027557,
3084
+ "grad_norm": 0.19697311520576477,
3085
+ "learning_rate": 7.539534856688843e-05,
3086
+ "loss": 0.6868,
3087
+ "step": 1975
3088
+ },
3089
+ {
3090
+ "epoch": 1.9406001224739744,
3091
+ "grad_norm": 0.23822586238384247,
3092
+ "learning_rate": 7.522962733900299e-05,
3093
+ "loss": 0.6672,
3094
+ "step": 1980
3095
+ },
3096
+ {
3097
+ "epoch": 1.945499081445193,
3098
+ "grad_norm": 0.21542146801948547,
3099
+ "learning_rate": 7.506370231702681e-05,
3100
+ "loss": 0.6835,
3101
+ "step": 1985
3102
+ },
3103
+ {
3104
+ "epoch": 1.9503980404164114,
3105
+ "grad_norm": 0.1972765177488327,
3106
+ "learning_rate": 7.489757520609624e-05,
3107
+ "loss": 0.6701,
3108
+ "step": 1990
3109
+ },
3110
+ {
3111
+ "epoch": 1.9552969993876301,
3112
+ "grad_norm": 0.208944171667099,
3113
+ "learning_rate": 7.473124771342437e-05,
3114
+ "loss": 0.68,
3115
+ "step": 1995
3116
+ },
3117
+ {
3118
+ "epoch": 1.9601959583588489,
3119
+ "grad_norm": 0.19232727587223053,
3120
+ "learning_rate": 7.456472154828355e-05,
3121
+ "loss": 0.6733,
3122
+ "step": 2000
3123
+ },
3124
+ {
3125
+ "epoch": 1.9601959583588489,
3126
+ "eval_loss": 0.6503395438194275,
3127
+ "eval_runtime": 15.4202,
3128
+ "eval_samples_per_second": 127.041,
3129
+ "eval_steps_per_second": 15.888,
3130
+ "step": 2000
3131
  }
3132
  ],
3133
  "logging_steps": 5,
 
3156
  "attributes": {}
3157
  }
3158
  },
3159
+ "total_flos": 1.0314720434234327e+18,
3160
  "train_batch_size": 2,
3161
  "trial_name": null,
3162
  "trial_params": null