Chef-Mistral-7B / checkpoint-500 /trainer_state.json

Upload folder using huggingface_hub

2d6f639 verified 4 months ago

9.39 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.32362459546925565,
	"eval_steps": 500,
	"global_step": 500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.006472491909385114,
	"grad_norm": 2.0135042667388916,
	"learning_rate": 1.8e-05,
	"loss": 1.5311,
	"step": 10
	},
	{
	"epoch": 0.012944983818770227,
	"grad_norm": 1.0551478862762451,
	"learning_rate": 3.8e-05,
	"loss": 1.2229,
	"step": 20
	},
	{
	"epoch": 0.019417475728155338,
	"grad_norm": 1.101570725440979,
	"learning_rate": 5.8e-05,
	"loss": 0.9845,
	"step": 30
	},
	{
	"epoch": 0.025889967637540454,
	"grad_norm": 0.9952124357223511,
	"learning_rate": 7.800000000000001e-05,
	"loss": 0.8599,
	"step": 40
	},
	{
	"epoch": 0.032362459546925564,
	"grad_norm": 0.8940704464912415,
	"learning_rate": 9.8e-05,
	"loss": 0.8401,
	"step": 50
	},
	{
	"epoch": 0.038834951456310676,
	"grad_norm": 0.8487522602081299,
	"learning_rate": 9.93979933110368e-05,
	"loss": 0.776,
	"step": 60
	},
	{
	"epoch": 0.045307443365695796,
	"grad_norm": 0.7864758968353271,
	"learning_rate": 9.872909698996656e-05,
	"loss": 0.8153,
	"step": 70
	},
	{
	"epoch": 0.05177993527508091,
	"grad_norm": 0.7295967936515808,
	"learning_rate": 9.806020066889633e-05,
	"loss": 0.8251,
	"step": 80
	},
	{
	"epoch": 0.05825242718446602,
	"grad_norm": 0.7075174450874329,
	"learning_rate": 9.739130434782609e-05,
	"loss": 0.749,
	"step": 90
	},
	{
	"epoch": 0.06472491909385113,
	"grad_norm": 0.6422586441040039,
	"learning_rate": 9.672240802675586e-05,
	"loss": 0.7428,
	"step": 100
	},
	{
	"epoch": 0.07119741100323625,
	"grad_norm": 0.6745761632919312,
	"learning_rate": 9.605351170568563e-05,
	"loss": 0.7806,
	"step": 110
	},
	{
	"epoch": 0.07766990291262135,
	"grad_norm": 0.6769530177116394,
	"learning_rate": 9.53846153846154e-05,
	"loss": 0.796,
	"step": 120
	},
	{
	"epoch": 0.08414239482200647,
	"grad_norm": 0.5150454044342041,
	"learning_rate": 9.471571906354515e-05,
	"loss": 0.7486,
	"step": 130
	},
	{
	"epoch": 0.09061488673139159,
	"grad_norm": 0.6074182391166687,
	"learning_rate": 9.404682274247491e-05,
	"loss": 0.7783,
	"step": 140
	},
	{
	"epoch": 0.0970873786407767,
	"grad_norm": 0.6745702624320984,
	"learning_rate": 9.337792642140469e-05,
	"loss": 0.7503,
	"step": 150
	},
	{
	"epoch": 0.10355987055016182,
	"grad_norm": 0.6088436841964722,
	"learning_rate": 9.270903010033445e-05,
	"loss": 0.7771,
	"step": 160
	},
	{
	"epoch": 0.11003236245954692,
	"grad_norm": 0.6351744532585144,
	"learning_rate": 9.204013377926422e-05,
	"loss": 0.7554,
	"step": 170
	},
	{
	"epoch": 0.11650485436893204,
	"grad_norm": 0.5281522274017334,
	"learning_rate": 9.137123745819398e-05,
	"loss": 0.7635,
	"step": 180
	},
	{
	"epoch": 0.12297734627831715,
	"grad_norm": 0.6833154559135437,
	"learning_rate": 9.070234113712374e-05,
	"loss": 0.753,
	"step": 190
	},
	{
	"epoch": 0.12944983818770225,
	"grad_norm": 0.5272049307823181,
	"learning_rate": 9.003344481605351e-05,
	"loss": 0.7384,
	"step": 200
	},
	{
	"epoch": 0.13592233009708737,
	"grad_norm": 0.6461851596832275,
	"learning_rate": 8.936454849498329e-05,
	"loss": 0.7548,
	"step": 210
	},
	{
	"epoch": 0.1423948220064725,
	"grad_norm": 0.5979950428009033,
	"learning_rate": 8.869565217391305e-05,
	"loss": 0.7534,
	"step": 220
	},
	{
	"epoch": 0.1488673139158576,
	"grad_norm": 0.6396514773368835,
	"learning_rate": 8.802675585284281e-05,
	"loss": 0.7436,
	"step": 230
	},
	{
	"epoch": 0.1553398058252427,
	"grad_norm": 0.6104384064674377,
	"learning_rate": 8.735785953177258e-05,
	"loss": 0.7345,
	"step": 240
	},
	{
	"epoch": 0.16181229773462782,
	"grad_norm": 0.5535375475883484,
	"learning_rate": 8.668896321070234e-05,
	"loss": 0.7293,
	"step": 250
	},
	{
	"epoch": 0.16828478964401294,
	"grad_norm": 0.5984886884689331,
	"learning_rate": 8.602006688963212e-05,
	"loss": 0.7173,
	"step": 260
	},
	{
	"epoch": 0.17475728155339806,
	"grad_norm": 0.517716646194458,
	"learning_rate": 8.535117056856188e-05,
	"loss": 0.7694,
	"step": 270
	},
	{
	"epoch": 0.18122977346278318,
	"grad_norm": 0.5802426934242249,
	"learning_rate": 8.468227424749165e-05,
	"loss": 0.7505,
	"step": 280
	},
	{
	"epoch": 0.18770226537216828,
	"grad_norm": 0.5289272665977478,
	"learning_rate": 8.401337792642141e-05,
	"loss": 0.7678,
	"step": 290
	},
	{
	"epoch": 0.1941747572815534,
	"grad_norm": 0.5293733477592468,
	"learning_rate": 8.334448160535117e-05,
	"loss": 0.7242,
	"step": 300
	},
	{
	"epoch": 0.20064724919093851,
	"grad_norm": 0.566146969795227,
	"learning_rate": 8.267558528428094e-05,
	"loss": 0.7248,
	"step": 310
	},
	{
	"epoch": 0.20711974110032363,
	"grad_norm": 0.5496853590011597,
	"learning_rate": 8.200668896321071e-05,
	"loss": 0.7588,
	"step": 320
	},
	{
	"epoch": 0.21359223300970873,
	"grad_norm": 0.5420507192611694,
	"learning_rate": 8.133779264214048e-05,
	"loss": 0.7673,
	"step": 330
	},
	{
	"epoch": 0.22006472491909385,
	"grad_norm": 0.582582950592041,
	"learning_rate": 8.066889632107024e-05,
	"loss": 0.7341,
	"step": 340
	},
	{
	"epoch": 0.22653721682847897,
	"grad_norm": 0.5332103371620178,
	"learning_rate": 8e-05,
	"loss": 0.7518,
	"step": 350
	},
	{
	"epoch": 0.23300970873786409,
	"grad_norm": 0.5132858157157898,
	"learning_rate": 7.933110367892977e-05,
	"loss": 0.752,
	"step": 360
	},
	{
	"epoch": 0.23948220064724918,
	"grad_norm": 0.5356752276420593,
	"learning_rate": 7.866220735785953e-05,
	"loss": 0.7432,
	"step": 370
	},
	{
	"epoch": 0.2459546925566343,
	"grad_norm": 0.5592623353004456,
	"learning_rate": 7.79933110367893e-05,
	"loss": 0.7396,
	"step": 380
	},
	{
	"epoch": 0.2524271844660194,
	"grad_norm": 0.5103404521942139,
	"learning_rate": 7.732441471571906e-05,
	"loss": 0.7253,
	"step": 390
	},
	{
	"epoch": 0.2588996763754045,
	"grad_norm": 0.5380160212516785,
	"learning_rate": 7.665551839464883e-05,
	"loss": 0.7464,
	"step": 400
	},
	{
	"epoch": 0.26537216828478966,
	"grad_norm": 0.5373779535293579,
	"learning_rate": 7.598662207357859e-05,
	"loss": 0.7377,
	"step": 410
	},
	{
	"epoch": 0.27184466019417475,
	"grad_norm": 0.523170530796051,
	"learning_rate": 7.531772575250837e-05,
	"loss": 0.7413,
	"step": 420
	},
	{
	"epoch": 0.2783171521035599,
	"grad_norm": 0.5368985533714294,
	"learning_rate": 7.464882943143813e-05,
	"loss": 0.7227,
	"step": 430
	},
	{
	"epoch": 0.284789644012945,
	"grad_norm": 0.587778627872467,
	"learning_rate": 7.39799331103679e-05,
	"loss": 0.7289,
	"step": 440
	},
	{
	"epoch": 0.2912621359223301,
	"grad_norm": 0.5471896529197693,
	"learning_rate": 7.331103678929766e-05,
	"loss": 0.7301,
	"step": 450
	},
	{
	"epoch": 0.2977346278317152,
	"grad_norm": 0.5446394085884094,
	"learning_rate": 7.264214046822742e-05,
	"loss": 0.7325,
	"step": 460
	},
	{
	"epoch": 0.3042071197411003,
	"grad_norm": 0.5761464238166809,
	"learning_rate": 7.197324414715719e-05,
	"loss": 0.7583,
	"step": 470
	},
	{
	"epoch": 0.3106796116504854,
	"grad_norm": 0.5082632899284363,
	"learning_rate": 7.130434782608696e-05,
	"loss": 0.7382,
	"step": 480
	},
	{
	"epoch": 0.31715210355987056,
	"grad_norm": 0.5441333651542664,
	"learning_rate": 7.063545150501673e-05,
	"loss": 0.7411,
	"step": 490
	},
	{
	"epoch": 0.32362459546925565,
	"grad_norm": 0.5039440393447876,
	"learning_rate": 6.996655518394649e-05,
	"loss": 0.7332,
	"step": 500
	}
	],
	"logging_steps": 10,
	"max_steps": 1545,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": false
	},
	"attributes": {}
	}
	},
	"total_flos": 1.803063236717445e+17,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}