agentic-intent-classifier / training /pipeline_verify.py
manikumargouni's picture
Upload folder using huggingface_hub
0584798 verified
from __future__ import annotations
import argparse
import sys
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent.parent
if str(BASE_DIR) not in sys.path:
sys.path.insert(0, str(BASE_DIR))
from config import ( # noqa: E402
HEAD_CONFIGS,
IAB_CLASSIFIER_MODEL_DIR,
MULTITASK_INTENT_MODEL_DIR,
_looks_like_local_hf_model_dir,
)
def verify_production_artifacts() -> tuple[bool, list[tuple[str, bool, str]]]:
"""Return (all_ok, rows of (label, ok, path_str))."""
rows: list[tuple[str, bool, str]] = []
ok = True
for label, path in (
("multitask weights", MULTITASK_INTENT_MODEL_DIR / "multitask_model.pt"),
("multitask metadata", MULTITASK_INTENT_MODEL_DIR / "metadata.json"),
):
exists = path.exists()
rows.append((label, exists, str(path)))
ok = ok and exists
# AutoTokenizer(...) typically saves tokenizer.json/vocab.txt/tokenizer_config.json,
# but not necessarily a top-level `config.json` (that would be model config).
multitask_tokenizer_files = [
"tokenizer.json",
"vocab.txt",
"tokenizer_config.json",
"special_tokens_map.json",
]
tokenizer_exists = any(
(MULTITASK_INTENT_MODEL_DIR / fname).exists() for fname in multitask_tokenizer_files
)
rows.append(
(
"multitask tokenizer files",
tokenizer_exists,
str(MULTITASK_INTENT_MODEL_DIR),
)
)
ok = ok and tokenizer_exists
iab_dir = IAB_CLASSIFIER_MODEL_DIR
iab_ok = _looks_like_local_hf_model_dir(iab_dir)
rows.append(("IAB classifier (HF layout)", iab_ok, str(iab_dir)))
ok = ok and iab_ok
for slug, cfg in HEAD_CONFIGS.items():
path = cfg.calibration_path
exists = path.exists()
rows.append((f"calibration {slug}", exists, str(path)))
ok = ok and exists
return ok, rows
def main() -> int:
parser = argparse.ArgumentParser(
description="Verify multitask, IAB, and calibration artifacts exist for production inference."
)
args = parser.parse_args()
all_ok, rows = verify_production_artifacts()
for label, row_ok, path in rows:
status = "OK " if row_ok else "MISS"
print(f"[{status}] {label}: {path}")
if not all_ok:
print(
"\nFix: run training/run_full_training_pipeline.py (or train_multitask_intent, train_iab, "
"calibrate_confidence for each head).",
flush=True,
)
return 1
print("\nAll production artifacts present.")
return 0
if __name__ == "__main__":
raise SystemExit(main())