File size: 1,816 Bytes
0584798
53d5d9f
 
 
0584798
1519226
53d5d9f
0584798
53d5d9f
1519226
0584798
 
53d5d9f
0584798
53d5d9f
 
 
 
 
0584798
53d5d9f
 
 
 
 
 
0584798
 
1519226
53d5d9f
0584798
53d5d9f
 
 
 
 
0584798
53d5d9f
 
 
 
 
 
1519226
 
 
 
 
 
0584798
 
1519226
 
 
 
0584798
1519226
 
 
 
0584798
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
{
  "accepted_accuracy": 0.9314,
  "accepted_coverage": 0.9972,
  "accuracy": 0.9295,
  "count": 13211,
  "dataset_path": "/content/agentic-intent-classifier/data/iab/train.jsonl",
  "fallback_rate": 0.0028,
  "head": "iab_content",
  "macro_f1": 0.8927,
  "primary_source": "supervised_classifier",
  "suite": "train",
  "tier_metrics": {
    "average_prediction_depth": 2.1683,
    "error_buckets": {
      "exact_match": 12280,
      "parent_safe_stop": 312,
      "right_tier1_wrong_tier2": 215,
      "wrong_deep_leaf": 288,
      "wrong_tier1": 116
    },
    "exact_path_accuracy": 0.9295,
    "parent_safe_accuracy": 0.9618,
    "tier1_accuracy": 0.9912,
    "tier2_accuracy": 0.9737,
    "tier3_accuracy": 0.8557,
    "tier4_accuracy": 0.6107
  },
  "view_metrics": {
    "classifier": {
      "average_prediction_depth": 2.1683,
      "error_buckets": {
        "exact_match": 12145,
        "parent_safe_stop": 300,
        "right_tier1_wrong_tier2": 263,
        "wrong_deep_leaf": 387,
        "wrong_tier1": 116
      },
      "exact_path_accuracy": 0.9193,
      "parent_safe_accuracy": 0.9507,
      "tier1_accuracy": 0.9912,
      "tier2_accuracy": 0.9695,
      "tier3_accuracy": 0.8301,
      "tier4_accuracy": 0.475
    },
    "combined_path": {
      "count": 13211,
      "max_combined_rows": 500,
      "reason": "dataset_too_large_for_combined_view",
      "skipped": true
    },
    "disagreements": {
      "count": 13211,
      "max_combined_rows": 500,
      "reason": "dataset_too_large_for_combined_view",
      "skipped": true
    },
    "shadow_embedding_retrieval": {
      "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).",
      "reason": "disabled_by_default",
      "skipped": true
    }
  }
}