File size: 2,622 Bytes
c740ca3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | {
"ppl_full": 9.958177526791891,
"by_K": {
"16": {
"recall_avg": 0.2488617408769063,
"recall_per_layer": {
"4": 0.06929636437908497,
"8": 0.209304789624183,
"12": 0.2291079452614379,
"16": 0.31758450776143793,
"20": 0.3559002246732026,
"24": 0.3119766135620915
},
"ppl_ann": 10.705916802088419,
"ppl_gap_relative": 0.0750879639657738
},
"32": {
"recall_avg": 0.22758947123797024,
"recall_per_layer": {
"4": 0.08549663713910761,
"8": 0.1827639281906168,
"12": 0.20015622436843833,
"16": 0.29272973568733596,
"20": 0.31894826012959315,
"24": 0.28544204191272965
},
"ppl_ann": 10.40695869922638,
"ppl_gap_relative": 0.04506659689758181
},
"64": {
"recall_avg": 0.2313686687059083,
"recall_per_layer": {
"4": 0.109822914083168,
"8": 0.18851337735615079,
"12": 0.2026925869088955,
"16": 0.29454920531580686,
"20": 0.3052448898396164,
"24": 0.28738903873181215
},
"ppl_ann": 10.19960351785024,
"ppl_gap_relative": 0.02424399348262342
},
"128": {
"recall_avg": 0.2596596885325661,
"recall_per_layer": {
"4": 0.15761951733660953,
"8": 0.22230808709257394,
"12": 0.23406030798471103,
"16": 0.3191429876512097,
"20": 0.31382029543640794,
"24": 0.31100693569388443
},
"ppl_ann": 10.039695183436075,
"ppl_gap_relative": 0.008186001547458431
},
"256": {
"recall_avg": 0.3158585866292318,
"recall_per_layer": {
"4": 0.23482767740885416,
"8": 0.28606397840711806,
"12": 0.2944536844889323,
"16": 0.36897023518880206,
"20": 0.35041291978624134,
"24": 0.3604230244954427
},
"ppl_ann": 9.879923025767008,
"ppl_gap_relative": -0.0078583155215243
},
"512": {
"recall_avg": 0.4077308518545968,
"recall_per_layer": {
"4": 0.34663236708868117,
"8": 0.3869971320742652,
"12": 0.3905042466663179,
"16": 0.45224675678071524,
"20": 0.42589560009184335,
"24": 0.444109008425758
},
"ppl_ann": 9.670466581980387,
"ppl_gap_relative": -0.028891927668233962
}
},
"model": "Qwen/Qwen3-4B-Instruct-2507",
"checkpoint": "search_step_2000.pt",
"trained_layers": [4, 8, 12, 16, 20, 24],
"d_search": 64,
"seq_len": 4096,
"num_eval_batches": 12,
"eval_dataset": "Salesforce/wikitext (wikitext-103-raw-v1, validation split)"
}
|