|
|
import os
|
|
|
from collections import defaultdict
|
|
|
from typing import Dict, List
|
|
|
|
|
|
ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
|
|
|
|
|
|
KEYWORDS: Dict[str, List[str]] = {
|
|
|
'GraphDB (Neo4j)': ['neo4j', 'py2neo'],
|
|
|
'Vector DB (FAISS/Milvus)': ['faiss', 'milvus', 'weaviate', 'vector_store'],
|
|
|
'Neuro-symbolic (PyTorch models)': ['import torch', 'torch.nn', 'torch_geometric'],
|
|
|
'Quantum (Qiskit/Cirq/cuQuantum)': ['qiskit', 'cirq', 'cuquantum', 'tensornetwork'],
|
|
|
'Formal provers (Lean/Coq)': ['.lean', '.v', 'lean-toolchain', 'coq'],
|
|
|
'Orchestration (Kubernetes/Ray/Dask)': ['kubernetes', 'ray', 'dask'],
|
|
|
'gRPC/GraphQL': ['grpc', 'graphql', 'graphene'],
|
|
|
}
|
|
|
|
|
|
IMPORT_CHECKS = {
|
|
|
'neo4j': 'neo4j',
|
|
|
'py2neo': 'py2neo',
|
|
|
'faiss': 'faiss',
|
|
|
'torch': 'torch',
|
|
|
'qiskit': 'qiskit',
|
|
|
'cirq': 'cirq',
|
|
|
'tensornetwork': 'tensornetwork',
|
|
|
}
|
|
|
|
|
|
EXTS = {'.py', '.md', '.yaml', '.yml', '.toml', '.json', '.lean', '.v', '.ipynb'}
|
|
|
|
|
|
|
|
|
def scan_repo(root: str) -> Dict[str, int]:
|
|
|
counts = defaultdict(int)
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
|
if any(skip in dirpath for skip in ['.git', '__pycache__', 'venv', 'env', 'node_modules', '.venv']):
|
|
|
continue
|
|
|
for fn in filenames:
|
|
|
_, ext = os.path.splitext(fn)
|
|
|
if ext.lower() not in EXTS and fn not in ('Dockerfile',):
|
|
|
continue
|
|
|
p = os.path.join(dirpath, fn)
|
|
|
try:
|
|
|
with open(p, 'r', encoding='utf-8', errors='ignore') as f:
|
|
|
txt = f.read().lower()
|
|
|
except Exception:
|
|
|
continue
|
|
|
for area, kws in KEYWORDS.items():
|
|
|
for kw in kws:
|
|
|
if kw.lower() in txt:
|
|
|
counts[area] += txt.count(kw.lower())
|
|
|
return counts
|
|
|
|
|
|
|
|
|
def try_imports(mapping: Dict[str, str]) -> Dict[str, bool]:
|
|
|
|
|
|
|
|
|
import importlib.util
|
|
|
|
|
|
res = {}
|
|
|
for k, mod in mapping.items():
|
|
|
try:
|
|
|
spec = importlib.util.find_spec(mod)
|
|
|
res[k] = spec is not None
|
|
|
except Exception:
|
|
|
res[k] = False
|
|
|
return res
|
|
|
|
|
|
|
|
|
def find_formal_files(root: str) -> Dict[str, int]:
|
|
|
c = {'lean': 0, 'coq': 0}
|
|
|
for dirpath, dirnames, filenames in os.walk(root):
|
|
|
for fn in filenames:
|
|
|
if fn.endswith('.lean'):
|
|
|
c['lean'] += 1
|
|
|
if fn.endswith('.v'):
|
|
|
c['coq'] += 1
|
|
|
return c
|
|
|
|
|
|
|
|
|
def print_summary(counts, imports, formal):
|
|
|
print('Repo root:', ROOT)
|
|
|
print('\nSummary:')
|
|
|
for area in KEYWORDS:
|
|
|
cnt = counts.get(area, 0)
|
|
|
status = 'Missing'
|
|
|
evidence = []
|
|
|
if cnt > 0:
|
|
|
status = 'Partial (code references)'
|
|
|
evidence.append(f'{cnt} mentions')
|
|
|
if area.startswith('GraphDB') and (imports.get('neo4j') or imports.get('py2neo')):
|
|
|
status = 'Present (neo4j driver importable)'
|
|
|
if area.startswith('Vector DB') and imports.get('faiss'):
|
|
|
status = 'Present (faiss importable)'
|
|
|
if area.startswith('Neuro-symbolic') and imports.get('torch'):
|
|
|
status = 'Present (torch importable)'
|
|
|
print(f'- {area}: {status}')
|
|
|
for e in evidence:
|
|
|
print(' -', e)
|
|
|
|
|
|
print('\nFormal prover artifacts:')
|
|
|
print('- Lean files:', formal.get('lean', 0))
|
|
|
print('- Coq files (.v):', formal.get('coq', 0))
|
|
|
|
|
|
print('\nImport checks:')
|
|
|
for k, v in sorted(imports.items()):
|
|
|
print('-', k + ':', 'importable' if v else 'not importable')
|
|
|
|
|
|
|
|
|
def main():
|
|
|
counts = scan_repo(ROOT)
|
|
|
imports = try_imports(IMPORT_CHECKS)
|
|
|
formal = find_formal_files(ROOT)
|
|
|
print_summary(counts, imports, formal)
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|
|
|
|
|
|
|