| { |
| "accepted_accuracy": 0.6, |
| "accepted_coverage": 0.625, |
| "accuracy": 0.375, |
| "count": 8, |
| "dataset_path": "/content/agentic-intent-classifier/data/iab/hard_cases.jsonl", |
| "fallback_rate": 0.375, |
| "head": "iab_content", |
| "macro_f1": 0.2308, |
| "primary_source": "supervised_classifier", |
| "suite": "hard_cases", |
| "tier_metrics": { |
| "average_prediction_depth": 1.75, |
| "error_buckets": { |
| "exact_match": 3, |
| "right_tier1_wrong_tier2": 1, |
| "wrong_tier1": 4 |
| }, |
| "exact_path_accuracy": 0.375, |
| "parent_safe_accuracy": 0.5, |
| "tier1_accuracy": 0.5, |
| "tier2_accuracy": 0.375, |
| "tier3_accuracy": 0.4, |
| "tier4_accuracy": 0.0 |
| }, |
| "view_metrics": { |
| "classifier": { |
| "average_prediction_depth": 1.75, |
| "error_buckets": { |
| "exact_match": 3, |
| "right_tier1_wrong_tier2": 1, |
| "wrong_tier1": 4 |
| }, |
| "exact_path_accuracy": 0.375, |
| "parent_safe_accuracy": 0.5, |
| "tier1_accuracy": 0.5, |
| "tier2_accuracy": 0.375, |
| "tier3_accuracy": 0.4, |
| "tier4_accuracy": 0.0 |
| }, |
| "combined_path": { |
| "average_prediction_depth": 1.75, |
| "error_buckets": { |
| "exact_match": 3, |
| "right_tier1_wrong_tier2": 1, |
| "wrong_tier1": 4 |
| }, |
| "exact_path_accuracy": 0.375, |
| "fallback_overuse_count": 1, |
| "fallback_rate": 0.125, |
| "parent_safe_accuracy": 0.5, |
| "tier1_accuracy": 0.5, |
| "tier2_accuracy": 0.375, |
| "tier3_accuracy": 0.4, |
| "tier4_accuracy": 0.0 |
| }, |
| "disagreements": { |
| "classifier_vs_combined": 0 |
| }, |
| "shadow_embedding_retrieval": { |
| "hint": "Set IAB_EVAL_INCLUDE_SHADOW_RETRIEVAL=1 to run shadow embedding retrieval (downloads/loads gte-Qwen2 when index is present).", |
| "reason": "disabled_by_default", |
| "skipped": true |
| } |
| } |
| } |
|
|