| { |
| "ppl_full": 9.958177526791891, |
| "by_K": { |
| "16": { |
| "recall_avg": 0.2488617408769063, |
| "recall_per_layer": { |
| "4": 0.06929636437908497, |
| "8": 0.209304789624183, |
| "12": 0.2291079452614379, |
| "16": 0.31758450776143793, |
| "20": 0.3559002246732026, |
| "24": 0.3119766135620915 |
| }, |
| "ppl_ann": 10.705916802088419, |
| "ppl_gap_relative": 0.0750879639657738 |
| }, |
| "32": { |
| "recall_avg": 0.22758947123797024, |
| "recall_per_layer": { |
| "4": 0.08549663713910761, |
| "8": 0.1827639281906168, |
| "12": 0.20015622436843833, |
| "16": 0.29272973568733596, |
| "20": 0.31894826012959315, |
| "24": 0.28544204191272965 |
| }, |
| "ppl_ann": 10.40695869922638, |
| "ppl_gap_relative": 0.04506659689758181 |
| }, |
| "64": { |
| "recall_avg": 0.2313686687059083, |
| "recall_per_layer": { |
| "4": 0.109822914083168, |
| "8": 0.18851337735615079, |
| "12": 0.2026925869088955, |
| "16": 0.29454920531580686, |
| "20": 0.3052448898396164, |
| "24": 0.28738903873181215 |
| }, |
| "ppl_ann": 10.19960351785024, |
| "ppl_gap_relative": 0.02424399348262342 |
| }, |
| "128": { |
| "recall_avg": 0.2596596885325661, |
| "recall_per_layer": { |
| "4": 0.15761951733660953, |
| "8": 0.22230808709257394, |
| "12": 0.23406030798471103, |
| "16": 0.3191429876512097, |
| "20": 0.31382029543640794, |
| "24": 0.31100693569388443 |
| }, |
| "ppl_ann": 10.039695183436075, |
| "ppl_gap_relative": 0.008186001547458431 |
| }, |
| "256": { |
| "recall_avg": 0.3158585866292318, |
| "recall_per_layer": { |
| "4": 0.23482767740885416, |
| "8": 0.28606397840711806, |
| "12": 0.2944536844889323, |
| "16": 0.36897023518880206, |
| "20": 0.35041291978624134, |
| "24": 0.3604230244954427 |
| }, |
| "ppl_ann": 9.879923025767008, |
| "ppl_gap_relative": -0.0078583155215243 |
| }, |
| "512": { |
| "recall_avg": 0.4077308518545968, |
| "recall_per_layer": { |
| "4": 0.34663236708868117, |
| "8": 0.3869971320742652, |
| "12": 0.3905042466663179, |
| "16": 0.45224675678071524, |
| "20": 0.42589560009184335, |
| "24": 0.444109008425758 |
| }, |
| "ppl_ann": 9.670466581980387, |
| "ppl_gap_relative": -0.028891927668233962 |
| } |
| }, |
| "model": "Qwen/Qwen3-4B-Instruct-2507", |
| "checkpoint": "search_step_2000.pt", |
| "trained_layers": [4, 8, 12, 16, 20, 24], |
| "d_search": 64, |
| "seq_len": 4096, |
| "num_eval_batches": 12, |
| "eval_dataset": "Salesforce/wikitext (wikitext-103-raw-v1, validation split)" |
| } |
|
|