qwen3.5-2b-shellcommand-linux-lora / eval /verifier_summary.json
louisguthmann's picture
Update 2B adapter with repair_v3b full run
432b303 verified
{
"avg_gen_seconds_per_example": 0.6461,
"base_model": "Qwen/Qwen3.5-2B",
"category_breakdown": {
"ambiguous_delete": {
"ok": 4,
"ok_rate": 0.5,
"rows": 8
},
"ambiguous_secret": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"cannot_cli": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"count_extension": {
"ok": 2,
"ok_rate": 0.25,
"rows": 8
},
"create_archive": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"delete_specific_logs": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"enabled_services": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"extract_archive": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"find_jpgs": {
"ok": 2,
"ok_rate": 0.25,
"rows": 8
},
"git_branch": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"grep_literal": {
"ok": 1,
"ok_rate": 0.125,
"rows": 8
},
"json_query": {
"ok": 5,
"ok_rate": 0.625,
"rows": 8
},
"replace_literal": {
"ok": 7,
"ok_rate": 0.875,
"rows": 8
},
"show_env": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"top_ips": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
}
},
"enable_thinking": false,
"image": "local",
"mode_breakdown": {
"ask": {
"ok": 12,
"ok_rate": 0.75,
"rows": 16
},
"cannot": {
"ok": 8,
"ok_rate": 1.0,
"rows": 8
},
"command": {
"ok": 73,
"ok_rate": 0.7604,
"rows": 96
}
},
"model": "/root/bitnet-nl2sh/output/autoresearch_proxy_qwen35_2b/repair_v3b_full_v1/qwen35_2b_batch8_repair_v3b_full_v1/model",
"ok": 93,
"ok_rate": 0.775,
"prompt_file": "/root/bitnet-nl2sh/prompts/student_linux_shell_v2g.txt",
"rows": 120
}