Upload iab_taxonomy.py with huggingface_hub
Browse files- iab_taxonomy.py +30 -1
iab_taxonomy.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
| 2 |
|
| 3 |
import csv
|
| 4 |
import json
|
|
|
|
| 5 |
from dataclasses import dataclass
|
| 6 |
from functools import lru_cache
|
| 7 |
from pathlib import Path
|
|
@@ -11,6 +12,8 @@ try:
|
|
| 11 |
except ImportError:
|
| 12 |
from config import IAB_TAXONOMY_GRAPH_PATH, IAB_TAXONOMY_PATH, IAB_TAXONOMY_VERSION
|
| 13 |
|
|
|
|
|
|
|
| 14 |
|
| 15 |
@dataclass(frozen=True)
|
| 16 |
class IabNode:
|
|
@@ -154,10 +157,36 @@ def _load_rows(path: Path) -> list[dict]:
|
|
| 154 |
return parsed
|
| 155 |
|
| 156 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
@lru_cache(maxsize=1)
|
| 158 |
def get_iab_taxonomy() -> IabTaxonomy:
|
| 159 |
nodes = []
|
| 160 |
-
for row in _load_rows(
|
| 161 |
path = tuple(
|
| 162 |
value.strip()
|
| 163 |
for key in ("Tier 1", "Tier 2", "Tier 3", "Tier 4")
|
|
|
|
| 2 |
|
| 3 |
import csv
|
| 4 |
import json
|
| 5 |
+
import os
|
| 6 |
from dataclasses import dataclass
|
| 7 |
from functools import lru_cache
|
| 8 |
from pathlib import Path
|
|
|
|
| 12 |
except ImportError:
|
| 13 |
from config import IAB_TAXONOMY_GRAPH_PATH, IAB_TAXONOMY_PATH, IAB_TAXONOMY_VERSION
|
| 14 |
|
| 15 |
+
_DEFAULT_MODEL_REPO_ID = "admesh/agentic-intent-classifier"
|
| 16 |
+
|
| 17 |
|
| 18 |
@dataclass(frozen=True)
|
| 19 |
class IabNode:
|
|
|
|
| 157 |
return parsed
|
| 158 |
|
| 159 |
|
| 160 |
+
def _resolve_taxonomy_path() -> Path:
|
| 161 |
+
"""Resolve taxonomy TSV path for local and HF trust_remote_code environments."""
|
| 162 |
+
if IAB_TAXONOMY_PATH.exists():
|
| 163 |
+
return IAB_TAXONOMY_PATH
|
| 164 |
+
|
| 165 |
+
# HF dynamic modules often do not contain non-Python data files.
|
| 166 |
+
# Fetch the taxonomy TSV directly from the model repo as a fallback.
|
| 167 |
+
repo_id = os.environ.get("ADMESH_MODEL_REPO_ID", _DEFAULT_MODEL_REPO_ID).strip() or _DEFAULT_MODEL_REPO_ID
|
| 168 |
+
revision = os.environ.get("ADMESH_MODEL_REVISION", "").strip() or None
|
| 169 |
+
filename = f"data/iab-content/Content Taxonomy {IAB_TAXONOMY_VERSION}.tsv"
|
| 170 |
+
try:
|
| 171 |
+
from huggingface_hub import hf_hub_download
|
| 172 |
+
except ModuleNotFoundError as exc:
|
| 173 |
+
raise FileNotFoundError(
|
| 174 |
+
f"Taxonomy TSV missing at {IAB_TAXONOMY_PATH}; install huggingface_hub or provide local taxonomy file."
|
| 175 |
+
) from exc
|
| 176 |
+
|
| 177 |
+
downloaded = hf_hub_download(
|
| 178 |
+
repo_id=repo_id,
|
| 179 |
+
repo_type="model",
|
| 180 |
+
filename=filename,
|
| 181 |
+
revision=revision,
|
| 182 |
+
)
|
| 183 |
+
return Path(downloaded)
|
| 184 |
+
|
| 185 |
+
|
| 186 |
@lru_cache(maxsize=1)
|
| 187 |
def get_iab_taxonomy() -> IabTaxonomy:
|
| 188 |
nodes = []
|
| 189 |
+
for row in _load_rows(_resolve_taxonomy_path()):
|
| 190 |
path = tuple(
|
| 191 |
value.strip()
|
| 192 |
for key in ("Tier 1", "Tier 2", "Tier 3", "Tier 4")
|