temp_ss / cycle_6 /cycle_metadata.json

abprune experiment

98b9392 verified 23 days ago

23 kB

Invalid JSON: Unexpected token 'I', ..."s": [ Infinity, "... is not valid JSON

	{
	"cycle": 6,
	"layer_merged": 21,
	"num_layers_before": 27,
	"num_layers_after": 26,
	"fused_layer_state": "fused_layer.pt",
	"dwce_score": 0.060528135887943944,
	"dwce_scores": [
	Infinity,
	Infinity,
	0.10885138210341537,
	0.09962797171315212,
	0.09704690759872063,
	0.10032807292174113,
	0.07477073061165104,
	0.06816870958942334,
	0.08890422275158102,
	0.1163330007130214,
	0.14856900172908535,
	0.15104770110436222,
	0.15976559604817797,
	0.16747390251227062,
	0.17331518344021776,
	0.14107504898742165,
	0.12868654620236872,
	0.13613157978454246,
	0.1218191001657377,
	0.10902217379118222,
	0.08936219242296228,
	0.060528135887943944,
	0.07539605212866492,
	0.09116655794274726,
	0.11185780392379165,
	Infinity
	],
	"dwce_meta": {
	"per_pair": [
	{
	"excluded": true
	},
	{
	"excluded": true
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.45602849053786343,
	"self_attn.k_proj.weight": 0.393914402513139,
	"self_attn.v_proj.weight": 0.540396339894555,
	"self_attn.o_proj.weight": 0.4469669933138352,
	"mlp.gate_proj.weight": 0.3287149601078028,
	"mlp.up_proj.weight": 0.35017769317939573,
	"mlp.down_proj.weight": 0.396783937778306,
	"input_layernorm.weight": 0.17714660120924552,
	"post_attention_layernorm.weight": 0.5706843420160465
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.2956975268685422,
	"self_attn.k_proj.weight": 0.3772577526025228,
	"self_attn.v_proj.weight": 0.5824193735130013,
	"self_attn.o_proj.weight": 0.44336875810997856,
	"mlp.gate_proj.weight": 0.30769135298804196,
	"mlp.up_proj.weight": 0.35382330315519916,
	"mlp.down_proj.weight": 0.37185043779817395,
	"input_layernorm.weight": 0.7024250477029108,
	"post_attention_layernorm.weight": 0.5121384967983699
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.30319082595336366,
	"self_attn.k_proj.weight": 0.3492474196377044,
	"self_attn.v_proj.weight": 0.569890311972998,
	"self_attn.o_proj.weight": 0.40464328058154025,
	"mlp.gate_proj.weight": 0.28532597693591105,
	"mlp.up_proj.weight": 0.37282024323447943,
	"mlp.down_proj.weight": 0.3841916217498055,
	"input_layernorm.weight": 0.4863317106902496,
	"post_attention_layernorm.weight": 0.57501372396618
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.30239571422605777,
	"self_attn.k_proj.weight": 0.28777982633775234,
	"self_attn.v_proj.weight": 0.5214361921442343,
	"self_attn.o_proj.weight": 0.43105923012955555,
	"mlp.gate_proj.weight": 0.2730870548532783,
	"mlp.up_proj.weight": 0.3623753277725939,
	"mlp.down_proj.weight": 0.38708954857683764,
	"input_layernorm.weight": 0.4643158587476409,
	"post_attention_layernorm.weight": 0.4712457550840951
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.34829974358469984,
	"self_attn.k_proj.weight": 0.3124058890492968,
	"self_attn.v_proj.weight": 0.44195795428146895,
	"self_attn.o_proj.weight": 0.4143407830271052,
	"mlp.gate_proj.weight": 0.5,
	"mlp.up_proj.weight": 0.5509473359135564,
	"mlp.down_proj.weight": 0.5172651880216964,
	"input_layernorm.weight": 0.6178273084347614,
	"post_attention_layernorm.weight": 0.34427816773879094
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.2730859710153346,
	"self_attn.k_proj.weight": 0.3061184297688129,
	"self_attn.v_proj.weight": 0.533020418936679,
	"self_attn.o_proj.weight": 0.4119345004856529,
	"mlp.gate_proj.weight": 0.5,
	"mlp.up_proj.weight": 0.13066309879562818,
	"mlp.down_proj.weight": 0.2330601154961996,
	"input_layernorm.weight": 0.8059542094784746,
	"post_attention_layernorm.weight": 0.6625755866545766
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.48768939039675996,
	"self_attn.k_proj.weight": 0.4777739962444542,
	"self_attn.v_proj.weight": 0.5103851366152496,
	"self_attn.o_proj.weight": 0.43250945492739656,
	"mlp.gate_proj.weight": 0.3257707165607638,
	"mlp.up_proj.weight": 0.4096843334038478,
	"mlp.down_proj.weight": 0.34885861400447404,
	"input_layernorm.weight": 0.4200429536840167,
	"post_attention_layernorm.weight": 0.5367871634336786
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.29755787633442426,
	"self_attn.k_proj.weight": 0.37623755403743336,
	"self_attn.v_proj.weight": 0.6136683116996197,
	"self_attn.o_proj.weight": 0.5741847831544276,
	"mlp.gate_proj.weight": 0.27139084405979624,
	"mlp.up_proj.weight": 0.352502726534821,
	"mlp.down_proj.weight": 0.3594937163598074,
	"input_layernorm.weight": 0.5244406580066927,
	"post_attention_layernorm.weight": 0.5383591174280284
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.48784027196921265,
	"self_attn.k_proj.weight": 0.39846873275053135,
	"self_attn.v_proj.weight": 0.4618083281504365,
	"self_attn.o_proj.weight": 0.5396497221340433,
	"mlp.gate_proj.weight": 0.28731097517799203,
	"mlp.up_proj.weight": 0.37777335079811836,
	"mlp.down_proj.weight": 0.38422114872976587,
	"input_layernorm.weight": 0.5041621307381597,
	"post_attention_layernorm.weight": 0.5670639954113798
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.23827875940660836,
	"self_attn.k_proj.weight": 0.3238894105610408,
	"self_attn.v_proj.weight": 0.5690090563700821,
	"self_attn.o_proj.weight": 0.5450013202392769,
	"mlp.gate_proj.weight": 0.27271401584431,
	"mlp.up_proj.weight": 0.3708806850281214,
	"mlp.down_proj.weight": 0.39500743275208433,
	"input_layernorm.weight": 0.6987078362557021,
	"post_attention_layernorm.weight": 0.5374434799165528
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.3946032688367209,
	"self_attn.k_proj.weight": 0.40690493743203565,
	"self_attn.v_proj.weight": 0.5164469910119862,
	"self_attn.o_proj.weight": 0.4597598293804492,
	"mlp.gate_proj.weight": 0.269573396995867,
	"mlp.up_proj.weight": 0.35366279000533957,
	"mlp.down_proj.weight": 0.3877383926763193,
	"input_layernorm.weight": 0.5460628958641058,
	"post_attention_layernorm.weight": 0.5071179303686918
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.3008175631152415,
	"self_attn.v_proj.weight": 0.5493743908332103,
	"self_attn.o_proj.weight": 0.4661606538187534,
	"mlp.gate_proj.weight": 0.2783245620057637,
	"mlp.up_proj.weight": 0.36798475292532096,
	"mlp.down_proj.weight": 0.41990780468185046,
	"input_layernorm.weight": 0.3526699737302493,
	"post_attention_layernorm.weight": 0.5026090703676459
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.5046790280321507,
	"self_attn.o_proj.weight": 0.47862372917098306,
	"mlp.gate_proj.weight": 0.28429170701639556,
	"mlp.up_proj.weight": 0.3568216404807901,
	"mlp.down_proj.weight": 0.4024126479370246,
	"input_layernorm.weight": 0.6964693773965374,
	"post_attention_layernorm.weight": 0.5389671586153284
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.5711961200240154,
	"self_attn.o_proj.weight": 0.4091136757892634,
	"mlp.gate_proj.weight": 0.28165197173200957,
	"mlp.up_proj.weight": 0.3558940378823637,
	"mlp.down_proj.weight": 0.3944025471751477,
	"input_layernorm.weight": 0.34554249375842755,
	"post_attention_layernorm.weight": 0.5083749806126059
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.4798904930258487,
	"self_attn.o_proj.weight": 0.3796993330110259,
	"mlp.gate_proj.weight": 0.2834371362960443,
	"mlp.up_proj.weight": 0.34598115404819363,
	"mlp.down_proj.weight": 0.3810985843525749,
	"input_layernorm.weight": 0.40754399908659844,
	"post_attention_layernorm.weight": 0.497314728281144
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.5081409141199547,
	"self_attn.o_proj.weight": 0.3629102582375656,
	"mlp.gate_proj.weight": 0.2855739089361027,
	"mlp.up_proj.weight": 0.35880372082662226,
	"mlp.down_proj.weight": 0.38944742416779293,
	"input_layernorm.weight": 0.6608006181001739,
	"post_attention_layernorm.weight": 0.5296959070502865
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.49572269146465,
	"self_attn.o_proj.weight": 0.32994298892262397,
	"mlp.gate_proj.weight": 0.27769206189679957,
	"mlp.up_proj.weight": 0.3455361882658386,
	"mlp.down_proj.weight": 0.3735752249423331,
	"input_layernorm.weight": 0.4170270576790789,
	"post_attention_layernorm.weight": 0.5209363007605031
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.5,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.4867919706742694,
	"self_attn.o_proj.weight": 0.3538593097308571,
	"mlp.gate_proj.weight": 0.27117298636599874,
	"mlp.up_proj.weight": 0.33462896686497223,
	"mlp.down_proj.weight": 0.3612770373233193,
	"input_layernorm.weight": 0.6936974577548146,
	"post_attention_layernorm.weight": 0.5001178185439411
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.09349704918366467,
	"self_attn.k_proj.weight": 0.5,
	"self_attn.v_proj.weight": 0.47712484653665943,
	"self_attn.o_proj.weight": 0.29886608310554436,
	"mlp.gate_proj.weight": 0.2693949444903617,
	"mlp.up_proj.weight": 0.3291873676707558,
	"mlp.down_proj.weight": 0.35004469298030644,
	"input_layernorm.weight": 0.33369843125590304,
	"post_attention_layernorm.weight": 0.5011505559153778
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.40387717352354646,
	"self_attn.k_proj.weight": 0.32231430163408453,
	"self_attn.v_proj.weight": 0.42154930635885873,
	"self_attn.o_proj.weight": 0.2721077912952483,
	"mlp.gate_proj.weight": 0.5,
	"mlp.up_proj.weight": 0.40582628271960014,
	"mlp.down_proj.weight": 0.3570913409001458,
	"input_layernorm.weight": 0.5122671690645244,
	"post_attention_layernorm.weight": 0.49147814328550515
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.1520219921182753,
	"self_attn.k_proj.weight": 0.13721084571035752,
	"self_attn.v_proj.weight": 0.40551897093604916,
	"self_attn.o_proj.weight": 0.280649542041515,
	"mlp.gate_proj.weight": 0.5,
	"mlp.up_proj.weight": 0.14720318423611445,
	"mlp.down_proj.weight": 0.17429299506277318,
	"input_layernorm.weight": 0.09607062192796464,
	"post_attention_layernorm.weight": 0.44690356138952886
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.007074450011806872,
	"self_attn.k_proj.weight": 0.1615631246455786,
	"self_attn.v_proj.weight": 0.5877874751289588,
	"self_attn.o_proj.weight": 0.3278764246216598,
	"mlp.gate_proj.weight": 0.313893134211228,
	"mlp.up_proj.weight": 0.3528312375210034,
	"mlp.down_proj.weight": 0.35640966437698607,
	"input_layernorm.weight": 0.6657340014175253,
	"post_attention_layernorm.weight": 0.39247642201648
	},
	"metric": "dwce"
	},
	{
	"num_batches": 64,
	"token_count": 131072.0,
	"norm": "relative",
	"supports_kwargs": true,
	"fuse_priors": {
	"self_attn.q_proj.weight": 0.9892781643373502,
	"self_attn.k_proj.weight": 0.657748170777267,
	"self_attn.v_proj.weight": 0.4684578019872222,
	"self_attn.o_proj.weight": 0.4193140384970065,
	"mlp.gate_proj.weight": 0.5,
	"mlp.up_proj.weight": 0.3405968802707512,
	"mlp.down_proj.weight": 0.3304433709433969,
	"input_layernorm.weight": 0.6733460076353963,
	"post_attention_layernorm.weight": 0.15409983400561383
	},
	"metric": "dwce"
	},
	{
	"excluded": true
	}
	],
	"supports_kwargs": true,
	"max_batches": 0,
	"norm": "relative",
	"metric": "dwce",
	"cosine_topk": 3,
	"start_index": 0,
	"excluded_pairs": [
	0,
	1,
	25
	]
	},
	"fisher_num_batches": 64,
	"merge_method": "reparam",
	"merged_params": 3,
	"num_sequences": 128,
	"teacher_source": "previous_cycle_memory",
	"teacher_cycle": 5,
	"eval": {
	"datasets": [
	"wikitext"
	],
	"configs": [
	"wikitext-2-raw-v1"
	],
	"split": "test",
	"num_samples": 200,
	"seq_len": 1024,
	"post_ppl": {
	"wikitext:wikitext-2-raw-v1": 19.929857094583117
	}
	},
	"comm": {
	"enabled": true,
	"train_mode": "lora",
	"opt_steps": 6,
	"grad_accum_steps": 1,
	"lr": 1e-05,
	"temp": 2.0,
	"steps_ratio": 0.1,
	"lr_scale": 0.1,
	"interaction_mode": "relative",
	"interaction_eps": 1e-08,
	"mu": 0.5,
	"mu_auto": true,
	"mu_auto_rho": 0.1,
	"mu_auto_eps": 1e-08,
	"sample_eta": 0.5,
	"sample_dwce_scale": 1.0,
	"topk": 1,
	"candidate_pairs": [
	21
	],
	"trainable_params": 17694720,
	"pair_selection": {
	"num_pairs": 26,
	"excluded_pairs": [
	0,
	1,
	25
	],
	"candidate_pairs": [
	21
	],
	"total_samples": 6,
	"unique_pairs": 1,
	"counts": [
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	0,
	6,
	0,
	0,
	0,
	0
	],
	"freqs": [
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	1.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	"probs": [
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	0.0,
	1.0,
	0.0,
	0.0,
	0.0,
	0.0
	],
	"top_pairs": [
	{
	"pair": 21,
	"count": 6,
	"freq": 1.0,
	"prob": 1.0
	}
	]
	},
	"avg_loss": 0.0004060502259892,
	"avg_anchor": 0.0002513490423249702,
	"avg_interaction": 0.09605961292982101,
	"avg_mu": 0.0015758845697112923,
	"teacher_source": "previous_cycle_memory",
	"teacher_cycle": 5,
	"dwce_scores_pre": [
	Infinity,
	Infinity,
	0.10885138210341537,
	0.09962797171315212,
	0.09704690759872063,
	0.10032807292174113,
	0.07477073061165104,
	0.06816870958942334,
	0.08890422275158102,
	0.1163330007130214,
	0.14856900172908535,
	0.15104770110436222,
	0.15976559604817797,
	0.16747390251227062,
	0.17331518344021776,
	0.14107504898742165,
	0.12868654620236872,
	0.13613157978454246,
	0.1218191001657377,
	0.10902217379118222,
	0.08936219242296228,
	0.060528135887943944,
	0.07539605212866492,
	0.09116655794274726,
	0.11185780392379165,
	Infinity
	],
	"post_ppl": {
	"wikitext:wikitext-2-raw-v1": 18.54056249689346
	},
	"post_selection_recomputed": false,
	"selected_layer_post": 21
	},
	"distill": {
	"enabled": true,
	"method": "reparam",
	"calib_samples": 256,
	"inst_samples": 0,
	"seq_len": 1024,
	"batch_size": 1,
	"epochs": 1.0,
	"lr": 0.0001,
	"kl_weight": 0.02,
	"kl_temp": 4.0,
	"hidden_mse_weight": 1.0,
	"attn_mse_weight": 0.25,
	"mlp_mse_weight": 1.0,
	"reparam_eta": 0.0,
	"reparam_gamma": 0.0,
	"reparam_attn_reg_scale": 1.0,
	"reparam_mlp_reg_scale": 1.0,
	"reparam_param_subset": "mlp",
	"reparam_stats": {
	"enabled": true,
	"epochs": 1.0,
	"lr": 0.0001,
	"hidden_mse_weight": 1.0,
	"attn_mse_weight": 0.25,
	"mlp_mse_weight": 1.0,
	"eta": 0.0,
	"gamma": 0.0,
	"attn_reg_scale": 1.0,
	"mlp_reg_scale": 1.0,
	"param_subset": "mlp",
	"num_gates": 3,
	"num_attn_gates": 0,
	"num_mlp_gates": 3,
	"num_other_gates": 0,
	"lambda_init": "fisher_prior"
	},
	"reparam_gate_summary": {
	"num_tensors": 3,
	"num_elements": 176160768,
	"global_mean": 0.48417738505772184,
	"per_tensor_mean": {
	"mlp.gate_proj.weight": 0.4960911273956299,
	"mlp.up_proj.weight": 0.4809410572052002,
	"mlp.down_proj.weight": 0.47550004720687866
	}
	},
	"post_ppl": {
	"wikitext:wikitext-2-raw-v1": 19.929857094583117
	},
	"weight_decay": 0.0,
	"max_grad_norm": 1.0,
	"grad_accum_steps": 1,
	"instruction_dataset": null,
	"instruction_config": null,
	"instruction_split": "train"
	},
	"lora": {
	"enabled": false,
	"seq_len": 1024,
	"batch_size": 1,
	"epochs": 0.0,
	"rank": 8,
	"alpha": 16.0,
	"dropout": 0.0,
	"target_modules": [
	"q_proj",
	"k_proj",
	"v_proj",
	"o_proj",
	"gate_proj",
	"down_proj",
	"up_proj"
	],
	"respect_exclude_pairs": false,
	"kl_enabled": false,
	"kl_weight": 0.1,
	"kl_temp": 4.0,
	"post_ppl": null,
	"lr": 0.0001,
	"weight_decay": 0.0,
	"max_grad_norm": 1.0,
	"grad_accum_steps": 1,
	"log_steps": 100,
	"eval_every": 2000,
	"eval_max_batches": null
	},
	"norm_policy": "hybrid",
	"full_model_saved": true,
	"full_model": {
	"stage": "cycle_6_full_model",
	"path": "llama3_mlp_skipdwce_nopermute/cycle_6/full_model",
	"weight_bytes": 13460487442,
	"post_ppl": {
	"wikitext:wikitext-2-raw-v1": 19.929857094583117
	},
	"resume_info": "resume_info.json"
	}
	}