File size: 1,169 Bytes
e897220
 
432b303
 
e897220
432b303
 
 
 
e897220
432b303
 
 
 
e897220
 
 
 
 
 
 
 
 
 
432b303
e897220
 
283e3dd
432b303
e897220
432b303
e897220
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
{
  "components": {
    "any_exact_rate": 0.25,
    "ask_rate": 0.75,
    "cannot_rate": 1.0,
    "command_rate": 0.7604,
    "ok_rate": 0.775,
    "parse_ok_rate": 0.98,
    "weak_category_rate": 0.7678571428571429
  },
  "exact_summary": "/root/bitnet-nl2sh/output/autoresearch_proxy_qwen35_2b/repair_v3b_full_v1/qwen35_2b_batch8_repair_v3b_full_v1/eval_exact/summary.json",
  "score": 276.5033,
  "score_0_to_1": 2.765033,
  "verifier_summary": "/root/bitnet-nl2sh/output/autoresearch_proxy_qwen35_2b/repair_v3b_full_v1/qwen35_2b_batch8_repair_v3b_full_v1/eval_verifier/summary.json",
  "weak_categories": [
    "ambiguous_delete",
    "ambiguous_secret",
    "create_archive",
    "enabled_services",
    "find_jpgs",
    "json_query",
    "top_ips"
  ],
  "weak_category_rates": {
    "ambiguous_delete": 0.5,
    "ambiguous_secret": 1.0,
    "create_archive": 1.0,
    "enabled_services": 1.0,
    "find_jpgs": 0.25,
    "json_query": 0.625,
    "top_ips": 1.0
  },
  "weights": {
    "any_exact_rate": 0.2,
    "ask_rate": 1.0,
    "cannot_rate": 0.25,
    "command_rate": 0.35,
    "ok_rate": 1.0,
    "parse_ok_rate": 0.1,
    "weak_category_rate": 0.75
  }
}