import os from collections import defaultdict from typing import Dict, List ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) KEYWORDS: Dict[str, List[str]] = { 'GraphDB (Neo4j)': ['neo4j', 'py2neo'], 'Vector DB (FAISS/Milvus)': ['faiss', 'milvus', 'weaviate', 'vector_store'], 'Neuro-symbolic (PyTorch models)': ['import torch', 'torch.nn', 'torch_geometric'], 'Quantum (Qiskit/Cirq/cuQuantum)': ['qiskit', 'cirq', 'cuquantum', 'tensornetwork'], 'Formal provers (Lean/Coq)': ['.lean', '.v', 'lean-toolchain', 'coq'], 'Orchestration (Kubernetes/Ray/Dask)': ['kubernetes', 'ray', 'dask'], 'gRPC/GraphQL': ['grpc', 'graphql', 'graphene'], } IMPORT_CHECKS = { 'neo4j': 'neo4j', 'py2neo': 'py2neo', 'faiss': 'faiss', 'torch': 'torch', 'qiskit': 'qiskit', 'cirq': 'cirq', 'tensornetwork': 'tensornetwork', } EXTS = {'.py', '.md', '.yaml', '.yml', '.toml', '.json', '.lean', '.v', '.ipynb'} def scan_repo(root: str) -> Dict[str, int]: counts = defaultdict(int) for dirpath, dirnames, filenames in os.walk(root): if any(skip in dirpath for skip in ['.git', '__pycache__', 'venv', 'env', 'node_modules', '.venv']): continue for fn in filenames: _, ext = os.path.splitext(fn) if ext.lower() not in EXTS and fn not in ('Dockerfile',): continue p = os.path.join(dirpath, fn) try: with open(p, 'r', encoding='utf-8', errors='ignore') as f: txt = f.read().lower() except Exception: continue for area, kws in KEYWORDS.items(): for kw in kws: if kw.lower() in txt: counts[area] += txt.count(kw.lower()) return counts def try_imports(mapping: Dict[str, str]) -> Dict[str, bool]: # Use importlib.util.find_spec to discover packages without executing # their top-level import code (safer and faster for heavy packages). import importlib.util res = {} for k, mod in mapping.items(): try: spec = importlib.util.find_spec(mod) res[k] = spec is not None except Exception: res[k] = False return res def find_formal_files(root: str) -> Dict[str, int]: c = {'lean': 0, 'coq': 0} for dirpath, dirnames, filenames in os.walk(root): for fn in filenames: if fn.endswith('.lean'): c['lean'] += 1 if fn.endswith('.v'): c['coq'] += 1 return c def print_summary(counts, imports, formal): print('Repo root:', ROOT) print('\nSummary:') for area in KEYWORDS: cnt = counts.get(area, 0) status = 'Missing' evidence = [] if cnt > 0: status = 'Partial (code references)' evidence.append(f'{cnt} mentions') if area.startswith('GraphDB') and (imports.get('neo4j') or imports.get('py2neo')): status = 'Present (neo4j driver importable)' if area.startswith('Vector DB') and imports.get('faiss'): status = 'Present (faiss importable)' if area.startswith('Neuro-symbolic') and imports.get('torch'): status = 'Present (torch importable)' print(f'- {area}: {status}') for e in evidence: print(' -', e) print('\nFormal prover artifacts:') print('- Lean files:', formal.get('lean', 0)) print('- Coq files (.v):', formal.get('coq', 0)) print('\nImport checks:') for k, v in sorted(imports.items()): print('-', k + ':', 'importable' if v else 'not importable') def main(): counts = scan_repo(ROOT) imports = try_imports(IMPORT_CHECKS) formal = find_formal_files(ROOT) print_summary(counts, imports, formal) if __name__ == '__main__': main()