#!/usr/bin/env python3 from __future__ import annotations import argparse import json import os import sys from pathlib import Path from typing import Any, Dict, List from urllib.parse import urlparse REPO_ROOT = Path(__file__).resolve().parents[2] BACKEND_DIR = REPO_ROOT / "backend" if str(BACKEND_DIR) not in sys.path: sys.path.insert(0, str(BACKEND_DIR)) from app.auth import load_auth_settings # noqa: E402 EXPECTED_DEFAULT_MODEL = "gpt-5-mini" FORBIDDEN_AUDIENCE_VALUES = { "https://masters-toolkit-api", "https://masters-toolkit-api/", } PARSEC_RELATIVE_CANDIDATES = ( Path("01_documents") / "antennas" / "parsec" / "Parsec-Portfolio-Catalog-2024-09.pdf", Path("01_documents") / "routers" / "parsec" / "Parsec-Antenna-Data Sheet.pdf", Path("01_documents") / "antennas" / "parsec" / "Parsec-Enterprise Portfolio-Router Matching Guide.pdf", ) def _is_placeholder_key(value: str) -> bool: v = str(value or "").strip() if not v: return True vu = v.upper() return vu.startswith("YOUR_KEY") or vu in {"", "YOUR_OPENAI_API_KEY", "REPLACE_ME"} def _clean_env_value(value: str) -> str: return str(value or "").strip().strip("\"'").strip() def _resolve_router_rag_paths() -> Dict[str, Path]: data_dir = Path(str(os.getenv("ROUTER_RAG_DATA_DIR", "") or "").strip() or (REPO_ROOT / "_RAG_Ready_KB_Organized")) chunks = Path(str(os.getenv("ROUTER_RAG_CHUNKS_PATH", "") or "").strip() or (data_dir / "04_ingestion" / "rag_ingestion_chunks.jsonl")) manifest = Path(str(os.getenv("ROUTER_RAG_MANIFEST_PATH", "") or "").strip() or (data_dir / "03_manifests" / "rag_manifest_organized.csv")) return {"data_dir": data_dir, "chunks": chunks, "manifest": manifest} def _resolve_parsec_catalog_path(rag_paths: Dict[str, Path]) -> Path | None: configured = _clean_env_value(os.getenv("PARSEC_CATALOG_PATH", "")) if configured: configured_path = Path(configured) if configured_path.exists(): return configured_path repo_asset = REPO_ROOT / "ParsecCatalog.pdf" if repo_asset.exists(): return repo_asset data_dir = Path(rag_paths.get("data_dir") or "") for rel in PARSEC_RELATIVE_CANDIDATES: candidate = data_dir / rel if candidate.exists(): return candidate return None def _check_path(path: Path, *, file_required: bool = True) -> Dict[str, Any]: exists = path.exists() ok = exists and (path.is_file() if file_required else path.is_dir()) return {"path": str(path), "exists": exists, "ok": ok} def _normalize_origin(value: str) -> str: raw = _clean_env_value(value) if not raw: return "" parsed = urlparse(raw) if parsed.scheme not in {"http", "https"} or not parsed.netloc: return "" host = parsed.hostname or "" if not host: return "" port = f":{parsed.port}" if parsed.port else "" return f"{parsed.scheme.lower()}://{host.lower()}{port}" def _build_report(args: argparse.Namespace) -> Dict[str, Any]: checks: Dict[str, Any] = {} failures: List[str] = [] warnings: List[str] = [] openai_key = str(os.getenv("OPENAI_API_KEY", "") or "").strip() openai_ok = bool(openai_key) and not _is_placeholder_key(openai_key) checks["openai"] = { "required": bool(args.require_openai), "present": bool(openai_key), "placeholder": _is_placeholder_key(openai_key) if openai_key else True, "ok": openai_ok or (not args.require_openai), } if args.require_openai and not openai_ok: failures.append("OPENAI_API_KEY is missing or placeholder.") auth = load_auth_settings() checks["auth"] = { "required": bool(auth.required), "enabled": bool(auth.enabled), "config_error": auth.config_error, "config_details": list(auth.config_details), "config_warnings": list(auth.config_warnings), "ok": (not auth.required) or bool(auth.enabled), } if auth.required and not auth.enabled: failures.append(auth.config_error or "Auth is required but Auth0 config is invalid.") if auth.config_warnings: warnings.extend(list(auth.config_warnings)) model_env_names = ( "OPENAI_MODEL", "UNIFIED_KB_OPENAI_MODEL", "ROUTER_RAG_OPENAI_MODEL", ) checks["model_env"] = {} for name in model_env_names: raw = _clean_env_value(os.getenv(name, "")) model_ok = (not raw) or raw == EXPECTED_DEFAULT_MODEL or (not args.enforce_gpt5_mini) checks["model_env"][name] = { "set": bool(raw), "value": raw, "expected": EXPECTED_DEFAULT_MODEL, "ok": model_ok, } if args.enforce_gpt5_mini and raw and raw != EXPECTED_DEFAULT_MODEL: failures.append(f"{name} must be unset or '{EXPECTED_DEFAULT_MODEL}', found '{raw}'.") audience_env_names = ("AUTH0_AUDIENCE", "VITE_AUTH0_AUDIENCE") checks["forbidden_audience"] = {} for name in audience_env_names: raw = _clean_env_value(os.getenv(name, "")) normalized = raw.rstrip("/") if raw.startswith("https://") else raw forbidden = raw in FORBIDDEN_AUDIENCE_VALUES or normalized in {v.rstrip("/") for v in FORBIDDEN_AUDIENCE_VALUES} checks["forbidden_audience"][name] = { "set": bool(raw), "value": raw, "ok": not forbidden, } if forbidden: failures.append(f"{name} must remain unset unless a real API Identifier exists; found removed placeholder '{raw}'.") app_base_url = _clean_env_value(os.getenv("APP_BASE_URL", "")) vite_app_base_url = _clean_env_value(os.getenv("VITE_APP_BASE_URL", "")) app_base_origin = _normalize_origin(app_base_url) vite_app_base_origin = _normalize_origin(vite_app_base_url) base_urls_ok = True if app_base_url and not app_base_origin: base_urls_ok = False failures.append(f"APP_BASE_URL must be an absolute http(s) URL, found '{app_base_url}'.") if vite_app_base_url and not vite_app_base_origin: base_urls_ok = False failures.append(f"VITE_APP_BASE_URL must be an absolute http(s) URL, found '{vite_app_base_url}'.") if app_base_origin and vite_app_base_origin and app_base_origin != vite_app_base_origin: base_urls_ok = False failures.append( "APP_BASE_URL and VITE_APP_BASE_URL must point at the same origin; " f"found '{app_base_origin}' vs '{vite_app_base_origin}'." ) checks["base_urls"] = { "APP_BASE_URL": {"set": bool(app_base_url), "value": app_base_url, "origin": app_base_origin}, "VITE_APP_BASE_URL": {"set": bool(vite_app_base_url), "value": vite_app_base_url, "origin": vite_app_base_origin}, "ok": base_urls_ok, } rag = _resolve_router_rag_paths() repo_assets = { "routers_eos_eol_by_sku.csv": REPO_ROOT / "routers_eos_eol_by_sku.csv", "feb2026routers.csv": REPO_ROOT / "feb2026routers.csv", "FAQ_master_updated.csv": REPO_ROOT / "docs" / "faq" / "FAQ_master_updated.csv", } checks["required_assets"] = {name: _check_path(path, file_required=True) for name, path in repo_assets.items()} for name, data in checks["required_assets"].items(): if not bool(data.get("ok")): failures.append(f"Missing required asset: {name}") checks["router_rag"] = { "data_dir": _check_path(rag["data_dir"], file_required=False), "chunks": _check_path(rag["chunks"], file_required=True), "manifest": _check_path(rag["manifest"], file_required=True), "strict": bool(args.fail_on_missing_router_rag), } if args.fail_on_missing_router_rag: if not checks["router_rag"]["chunks"]["ok"]: failures.append("Router RAG chunks file is missing.") if not checks["router_rag"]["manifest"]["ok"]: failures.append("Router RAG manifest file is missing.") else: if not checks["router_rag"]["chunks"]["ok"] or not checks["router_rag"]["manifest"]["ok"]: warnings.append("Router RAG ingestion artifacts are missing; router-doc retrieval may be degraded.") parsec_catalog_path = _resolve_parsec_catalog_path(rag) parsec_default = Path(_clean_env_value(os.getenv("PARSEC_CATALOG_PATH", "")) or (REPO_ROOT / "ParsecCatalog.pdf")) parsec_check = _check_path(parsec_catalog_path or parsec_default, file_required=True) parsec_check["required"] = True checks["parsec_catalog"] = parsec_check if not bool(parsec_check.get("ok")): failures.append("Parsec catalog asset is missing.") return { "ok": not failures, "failures": failures, "warnings": warnings, "checks": checks, } def main() -> int: parser = argparse.ArgumentParser(description="Preflight environment/configuration checks for CI and release gates.") parser.add_argument("--require-openai", action="store_true", help="Fail if OPENAI_API_KEY is missing/placeholder.") parser.add_argument( "--enforce-gpt5-mini", action="store_true", help=f"Fail if active model env vars are set to anything other than {EXPECTED_DEFAULT_MODEL}.", ) parser.add_argument( "--fail-on-missing-router-rag", action="store_true", help="Fail if router RAG manifest/chunks files are missing.", ) parser.add_argument("--out", default="", help="Optional output JSON path.") args = parser.parse_args() report = _build_report(args) payload = json.dumps(report, indent=2) if args.out: out_path = Path(args.out) out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(payload + "\n", encoding="utf-8") print(f"Wrote preflight report: {out_path}") print(payload) return 0 if report.get("ok") else 2 if __name__ == "__main__": raise SystemExit(main())