""" Health check for ECE Compliance RAG system. Checks Ollama connectivity, model availability, corpus files, and index freshness. Usage: uv run python scripts/check.py # full check uv run python scripts/check.py --ollama # just Ollama check """ import os import sys import json import argparse import urllib.request import urllib.error PROJECT_ROOT = os.path.join(os.path.dirname(__file__), "..") CORPUS_DIR = os.path.join(PROJECT_ROOT, "corpus") INDEX_DIR = os.path.join(PROJECT_ROOT, "indexes") OLLAMA_URL = "http://localhost:11434" REQUIRED_MODEL = "qwen2.5:14b" DOMAINS = { "legislation": "legislation", "licensing-criteria": "licensing-criteria", "ero": "ero", "cross-regulator": "cross-regulator", "reform-context": "reform-context", } OK = "\033[32m✓\033[0m" FAIL = "\033[31m✗\033[0m" WARN = "\033[33m!\033[0m" def check_ollama(): """Check Ollama is running and model is loaded.""" # 1. Is Ollama reachable? try: req = urllib.request.Request(f"{OLLAMA_URL}/api/tags") with urllib.request.urlopen(req, timeout=5) as resp: data = json.loads(resp.read()) except (urllib.error.URLError, ConnectionRefusedError, OSError): print(f" {FAIL} Ollama not reachable at {OLLAMA_URL}") print(f" Run: ollama serve") return False print(f" {OK} Ollama running at {OLLAMA_URL}") # 2. Is the model available? models = [m.get("name", "") for m in data.get("models", [])] model_found = any(REQUIRED_MODEL in m for m in models) if model_found: print(f" {OK} Model {REQUIRED_MODEL} available") else: print(f" {FAIL} Model {REQUIRED_MODEL} not found") print(f" Available: {', '.join(models) or 'none'}") print(f" Run: ollama pull {REQUIRED_MODEL}") return False return True def check_corpus(): """Check corpus files exist and show sizes.""" all_ok = True for name, slug in DOMAINS.items(): path = os.path.join(CORPUS_DIR, f"{slug}.md") if os.path.exists(path): size_kb = os.path.getsize(path) / 1024 with open(path, "r") as f: lines = sum(1 for _ in f) marker = OK if size_kb > 1 else WARN print(f" {marker} {name:20s} {size_kb:6.0f} KB {lines:5d} lines") else: print(f" {FAIL} {name:20s} NOT FOUND") all_ok = False return all_ok def check_indexes(): """Check index files exist, show sizes, and flag stale ones.""" all_ok = True for name, slug in DOMAINS.items(): idx_path = os.path.join(INDEX_DIR, f"{slug}.json") corpus_path = os.path.join(CORPUS_DIR, f"{slug}.md") if not os.path.exists(idx_path): print(f" {FAIL} {name:20s} NOT FOUND") all_ok = False continue size_kb = os.path.getsize(idx_path) / 1024 # Check if index is older than corpus (stale) stale = False if os.path.exists(corpus_path): idx_mtime = os.path.getmtime(idx_path) corpus_mtime = os.path.getmtime(corpus_path) if idx_mtime < corpus_mtime: stale = True # Check if index has doc_description try: with open(idx_path) as f: tree = json.load(f) has_desc = bool(tree.get("doc_description")) node_count = len(_count_nodes(tree.get("structure", []))) except Exception: has_desc = False node_count = 0 if stale: marker = WARN suffix = " STALE (corpus is newer)" elif not has_desc: marker = WARN suffix = " (no doc_description)" else: marker = OK suffix = "" print(f" {marker} {name:20s} {size_kb:6.0f} KB {node_count:3d} nodes{suffix}") if stale: all_ok = False return all_ok def _count_nodes(structure): """Count all nodes in a PageIndex tree structure.""" nodes = [] for item in structure: nodes.append(item) if "nodes" in item and item["nodes"]: nodes.extend(_count_nodes(item["nodes"])) return nodes def check_pageindex(): """Check PageIndex repo is cloned.""" pi_dir = os.path.join(PROJECT_ROOT, "PageIndex") pi_module = os.path.join(pi_dir, "pageindex", "page_index_md.py") if os.path.exists(pi_module): print(f" {OK} PageIndex repo found") return True else: print(f" {FAIL} PageIndex repo not found at {pi_dir}") print(f" Run: git clone https://github.com/VectifyAI/PageIndex.git") return False def main(): parser = argparse.ArgumentParser(description="ECE Compliance RAG health check") parser.add_argument("--ollama", action="store_true", help="Only check Ollama") args = parser.parse_args() if args.ollama: print("\nOllama") ok = check_ollama() sys.exit(0 if ok else 1) print("\n" + "=" * 50) print("ECE Compliance RAG — System Check") print("=" * 50) results = {} print("\nOllama") results["ollama"] = check_ollama() print("\nPageIndex") results["pageindex"] = check_pageindex() print("\nCorpus files") results["corpus"] = check_corpus() print("\nIndexes") results["indexes"] = check_indexes() # Summary print("\n" + "-" * 50) all_ok = all(results.values()) if all_ok: print(f"{OK} All checks passed") else: failed = [k for k, v in results.items() if not v] print(f"{FAIL} Issues found: {', '.join(failed)}") print() sys.exit(0 if all_ok else 1) if __name__ == "__main__": main()