Spaces:
Running
Running
| """ | |
| Health check for ECE Compliance RAG system. | |
| Checks Ollama connectivity, model availability, corpus files, and index freshness. | |
| Usage: | |
| uv run python scripts/check.py # full check | |
| uv run python scripts/check.py --ollama # just Ollama check | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import argparse | |
| import urllib.request | |
| import urllib.error | |
| PROJECT_ROOT = os.path.join(os.path.dirname(__file__), "..") | |
| CORPUS_DIR = os.path.join(PROJECT_ROOT, "corpus") | |
| INDEX_DIR = os.path.join(PROJECT_ROOT, "indexes") | |
| OLLAMA_URL = "http://localhost:11434" | |
| REQUIRED_MODEL = "qwen2.5:14b" | |
| DOMAINS = { | |
| "legislation": "legislation", | |
| "licensing-criteria": "licensing-criteria", | |
| "ero": "ero", | |
| "cross-regulator": "cross-regulator", | |
| "reform-context": "reform-context", | |
| } | |
| OK = "\033[32m✓\033[0m" | |
| FAIL = "\033[31m✗\033[0m" | |
| WARN = "\033[33m!\033[0m" | |
| def check_ollama(): | |
| """Check Ollama is running and model is loaded.""" | |
| # 1. Is Ollama reachable? | |
| try: | |
| req = urllib.request.Request(f"{OLLAMA_URL}/api/tags") | |
| with urllib.request.urlopen(req, timeout=5) as resp: | |
| data = json.loads(resp.read()) | |
| except (urllib.error.URLError, ConnectionRefusedError, OSError): | |
| print(f" {FAIL} Ollama not reachable at {OLLAMA_URL}") | |
| print(f" Run: ollama serve") | |
| return False | |
| print(f" {OK} Ollama running at {OLLAMA_URL}") | |
| # 2. Is the model available? | |
| models = [m.get("name", "") for m in data.get("models", [])] | |
| model_found = any(REQUIRED_MODEL in m for m in models) | |
| if model_found: | |
| print(f" {OK} Model {REQUIRED_MODEL} available") | |
| else: | |
| print(f" {FAIL} Model {REQUIRED_MODEL} not found") | |
| print(f" Available: {', '.join(models) or 'none'}") | |
| print(f" Run: ollama pull {REQUIRED_MODEL}") | |
| return False | |
| return True | |
| def check_corpus(): | |
| """Check corpus files exist and show sizes.""" | |
| all_ok = True | |
| for name, slug in DOMAINS.items(): | |
| path = os.path.join(CORPUS_DIR, f"{slug}.md") | |
| if os.path.exists(path): | |
| size_kb = os.path.getsize(path) / 1024 | |
| with open(path, "r") as f: | |
| lines = sum(1 for _ in f) | |
| marker = OK if size_kb > 1 else WARN | |
| print(f" {marker} {name:20s} {size_kb:6.0f} KB {lines:5d} lines") | |
| else: | |
| print(f" {FAIL} {name:20s} NOT FOUND") | |
| all_ok = False | |
| return all_ok | |
| def check_indexes(): | |
| """Check index files exist, show sizes, and flag stale ones.""" | |
| all_ok = True | |
| for name, slug in DOMAINS.items(): | |
| idx_path = os.path.join(INDEX_DIR, f"{slug}.json") | |
| corpus_path = os.path.join(CORPUS_DIR, f"{slug}.md") | |
| if not os.path.exists(idx_path): | |
| print(f" {FAIL} {name:20s} NOT FOUND") | |
| all_ok = False | |
| continue | |
| size_kb = os.path.getsize(idx_path) / 1024 | |
| # Check if index is older than corpus (stale) | |
| stale = False | |
| if os.path.exists(corpus_path): | |
| idx_mtime = os.path.getmtime(idx_path) | |
| corpus_mtime = os.path.getmtime(corpus_path) | |
| if idx_mtime < corpus_mtime: | |
| stale = True | |
| # Check if index has doc_description | |
| try: | |
| with open(idx_path) as f: | |
| tree = json.load(f) | |
| has_desc = bool(tree.get("doc_description")) | |
| node_count = len(_count_nodes(tree.get("structure", []))) | |
| except Exception: | |
| has_desc = False | |
| node_count = 0 | |
| if stale: | |
| marker = WARN | |
| suffix = " STALE (corpus is newer)" | |
| elif not has_desc: | |
| marker = WARN | |
| suffix = " (no doc_description)" | |
| else: | |
| marker = OK | |
| suffix = "" | |
| print(f" {marker} {name:20s} {size_kb:6.0f} KB {node_count:3d} nodes{suffix}") | |
| if stale: | |
| all_ok = False | |
| return all_ok | |
| def _count_nodes(structure): | |
| """Count all nodes in a PageIndex tree structure.""" | |
| nodes = [] | |
| for item in structure: | |
| nodes.append(item) | |
| if "nodes" in item and item["nodes"]: | |
| nodes.extend(_count_nodes(item["nodes"])) | |
| return nodes | |
| def check_pageindex(): | |
| """Check PageIndex repo is cloned.""" | |
| pi_dir = os.path.join(PROJECT_ROOT, "PageIndex") | |
| pi_module = os.path.join(pi_dir, "pageindex", "page_index_md.py") | |
| if os.path.exists(pi_module): | |
| print(f" {OK} PageIndex repo found") | |
| return True | |
| else: | |
| print(f" {FAIL} PageIndex repo not found at {pi_dir}") | |
| print(f" Run: git clone https://github.com/VectifyAI/PageIndex.git") | |
| return False | |
| def main(): | |
| parser = argparse.ArgumentParser(description="ECE Compliance RAG health check") | |
| parser.add_argument("--ollama", action="store_true", help="Only check Ollama") | |
| args = parser.parse_args() | |
| if args.ollama: | |
| print("\nOllama") | |
| ok = check_ollama() | |
| sys.exit(0 if ok else 1) | |
| print("\n" + "=" * 50) | |
| print("ECE Compliance RAG — System Check") | |
| print("=" * 50) | |
| results = {} | |
| print("\nOllama") | |
| results["ollama"] = check_ollama() | |
| print("\nPageIndex") | |
| results["pageindex"] = check_pageindex() | |
| print("\nCorpus files") | |
| results["corpus"] = check_corpus() | |
| print("\nIndexes") | |
| results["indexes"] = check_indexes() | |
| # Summary | |
| print("\n" + "-" * 50) | |
| all_ok = all(results.values()) | |
| if all_ok: | |
| print(f"{OK} All checks passed") | |
| else: | |
| failed = [k for k, v in results.items() if not v] | |
| print(f"{FAIL} Issues found: {', '.join(failed)}") | |
| print() | |
| sys.exit(0 if all_ok else 1) | |
| if __name__ == "__main__": | |
| main() | |