File size: 1,799 Bytes
0584798
53d5d9f
 
 
0584798
1519226
53d5d9f
0584798
53d5d9f
1519226
0584798
 
53d5d9f
0584798
53d5d9f
 
 
 
 
0584798
53d5d9f
 
 
 
 
 
0584798
 
1519226
53d5d9f
1519226
53d5d9f
 
 
 
 
1519226
53d5d9f
 
 
 
 
 
1519226
0584798
 
 
 
 
 
 
 
 
 
 
 
1519226
 
 
 
0584798
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "accepted_accuracy": 0.9278,
  "accepted_coverage": 0.996,
  "accuracy": 0.9247,
  "count": 3282,
  "dataset_path": "/content/agentic-intent-classifier/data/iab/test.jsonl",
  "fallback_rate": 0.004,
  "head": "iab_content",
  "macro_f1": 0.8814,
  "primary_source": "supervised_classifier",
  "suite": "test",
  "tier_metrics": {
    "average_prediction_depth": 2.1706,
    "error_buckets": {
      "exact_match": 3035,
      "parent_safe_stop": 87,
      "right_tier1_wrong_tier2": 56,
      "wrong_deep_leaf": 69,
      "wrong_tier1": 35
    },
    "exact_path_accuracy": 0.9247,
    "parent_safe_accuracy": 0.961,
    "tier1_accuracy": 0.9893,
    "tier2_accuracy": 0.9707,
    "tier3_accuracy": 0.8487,
    "tier4_accuracy": 0.5714
  },
  "view_metrics": {
    "classifier": {
      "average_prediction_depth": 2.1706,
      "error_buckets": {
        "exact_match": 3004,
        "parent_safe_stop": 84,
        "right_tier1_wrong_tier2": 68,
        "wrong_deep_leaf": 91,
        "wrong_tier1": 35
      },
      "exact_path_accuracy": 0.9153,
      "parent_safe_accuracy": 0.9506,
      "tier1_accuracy": 0.9893,
      "tier2_accuracy": 0.9665,
      "tier3_accuracy": 0.8259,
      "tier4_accuracy": 0.4429
    },
    "combined_path": {
      "count": 3282,
      "max_combined_rows": 500,
      "reason": "dataset_too_large_for_combined_view",
      "skipped": true
    },
    "disagreements": {
      "count": 3282,
      "max_combined_rows": 500,
      "reason": "dataset_too_large_for_combined_view",
      "skipped": true
    },
    "shadow_embedding_retrieval": {
      "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
      "reason": "disabled_by_default",
      "skipped": true
    }
  }
}