hmc-rag / scripts /check.py
webmuppet
Initial commit — health marketing compliance RAG
bad8b6c
"""
Health check for ECE Compliance RAG system.
Checks Ollama connectivity, model availability, corpus files, and index freshness.
Usage:
uv run python scripts/check.py # full check
uv run python scripts/check.py --ollama # just Ollama check
"""
import os
import sys
import json
import argparse
import urllib.request
import urllib.error
PROJECT_ROOT = os.path.join(os.path.dirname(__file__), "..")
CORPUS_DIR = os.path.join(PROJECT_ROOT, "corpus")
INDEX_DIR = os.path.join(PROJECT_ROOT, "indexes")
OLLAMA_URL = "http://localhost:11434"
REQUIRED_MODEL = "qwen2.5:14b"
DOMAINS = {
"legislation": "legislation",
"licensing-criteria": "licensing-criteria",
"ero": "ero",
"cross-regulator": "cross-regulator",
"reform-context": "reform-context",
}
OK = "\033[32m✓\033[0m"
FAIL = "\033[31m✗\033[0m"
WARN = "\033[33m!\033[0m"
def check_ollama():
"""Check Ollama is running and model is loaded."""
# 1. Is Ollama reachable?
try:
req = urllib.request.Request(f"{OLLAMA_URL}/api/tags")
with urllib.request.urlopen(req, timeout=5) as resp:
data = json.loads(resp.read())
except (urllib.error.URLError, ConnectionRefusedError, OSError):
print(f" {FAIL} Ollama not reachable at {OLLAMA_URL}")
print(f" Run: ollama serve")
return False
print(f" {OK} Ollama running at {OLLAMA_URL}")
# 2. Is the model available?
models = [m.get("name", "") for m in data.get("models", [])]
model_found = any(REQUIRED_MODEL in m for m in models)
if model_found:
print(f" {OK} Model {REQUIRED_MODEL} available")
else:
print(f" {FAIL} Model {REQUIRED_MODEL} not found")
print(f" Available: {', '.join(models) or 'none'}")
print(f" Run: ollama pull {REQUIRED_MODEL}")
return False
return True
def check_corpus():
"""Check corpus files exist and show sizes."""
all_ok = True
for name, slug in DOMAINS.items():
path = os.path.join(CORPUS_DIR, f"{slug}.md")
if os.path.exists(path):
size_kb = os.path.getsize(path) / 1024
with open(path, "r") as f:
lines = sum(1 for _ in f)
marker = OK if size_kb > 1 else WARN
print(f" {marker} {name:20s} {size_kb:6.0f} KB {lines:5d} lines")
else:
print(f" {FAIL} {name:20s} NOT FOUND")
all_ok = False
return all_ok
def check_indexes():
"""Check index files exist, show sizes, and flag stale ones."""
all_ok = True
for name, slug in DOMAINS.items():
idx_path = os.path.join(INDEX_DIR, f"{slug}.json")
corpus_path = os.path.join(CORPUS_DIR, f"{slug}.md")
if not os.path.exists(idx_path):
print(f" {FAIL} {name:20s} NOT FOUND")
all_ok = False
continue
size_kb = os.path.getsize(idx_path) / 1024
# Check if index is older than corpus (stale)
stale = False
if os.path.exists(corpus_path):
idx_mtime = os.path.getmtime(idx_path)
corpus_mtime = os.path.getmtime(corpus_path)
if idx_mtime < corpus_mtime:
stale = True
# Check if index has doc_description
try:
with open(idx_path) as f:
tree = json.load(f)
has_desc = bool(tree.get("doc_description"))
node_count = len(_count_nodes(tree.get("structure", [])))
except Exception:
has_desc = False
node_count = 0
if stale:
marker = WARN
suffix = " STALE (corpus is newer)"
elif not has_desc:
marker = WARN
suffix = " (no doc_description)"
else:
marker = OK
suffix = ""
print(f" {marker} {name:20s} {size_kb:6.0f} KB {node_count:3d} nodes{suffix}")
if stale:
all_ok = False
return all_ok
def _count_nodes(structure):
"""Count all nodes in a PageIndex tree structure."""
nodes = []
for item in structure:
nodes.append(item)
if "nodes" in item and item["nodes"]:
nodes.extend(_count_nodes(item["nodes"]))
return nodes
def check_pageindex():
"""Check PageIndex repo is cloned."""
pi_dir = os.path.join(PROJECT_ROOT, "PageIndex")
pi_module = os.path.join(pi_dir, "pageindex", "page_index_md.py")
if os.path.exists(pi_module):
print(f" {OK} PageIndex repo found")
return True
else:
print(f" {FAIL} PageIndex repo not found at {pi_dir}")
print(f" Run: git clone https://github.com/VectifyAI/PageIndex.git")
return False
def main():
parser = argparse.ArgumentParser(description="ECE Compliance RAG health check")
parser.add_argument("--ollama", action="store_true", help="Only check Ollama")
args = parser.parse_args()
if args.ollama:
print("\nOllama")
ok = check_ollama()
sys.exit(0 if ok else 1)
print("\n" + "=" * 50)
print("ECE Compliance RAG — System Check")
print("=" * 50)
results = {}
print("\nOllama")
results["ollama"] = check_ollama()
print("\nPageIndex")
results["pageindex"] = check_pageindex()
print("\nCorpus files")
results["corpus"] = check_corpus()
print("\nIndexes")
results["indexes"] = check_indexes()
# Summary
print("\n" + "-" * 50)
all_ok = all(results.values())
if all_ok:
print(f"{OK} All checks passed")
else:
failed = [k for k, v in results.items() if not v]
print(f"{FAIL} Issues found: {', '.join(failed)}")
print()
sys.exit(0 if all_ok else 1)
if __name__ == "__main__":
main()