Lean4-helper / scripts /build_index.py
p4r5kpftnp-cmd
Add CI/CD: GitHub Actions workflow + ruff config
6cf7e7c
Raw
History Blame Contribute Delete
1.43 kB
#!/usr/bin/env python3
"""
One-time script to build the FAISS + BM25 index from Mathlib4 source files.
Run this before using the LangGraph agent for the first time:
python scripts/build_index.py
Optional flags:
--mathlib-root PATH Path to Mathlib4 source (auto-detected if omitted)
--max-files N Limit to first N .lean files (useful for quick testing)
--index-dir PATH Where to save the index (default: data/mathlib_index)
"""
import argparse
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src')))
from retriever import MathLibRetriever
def main():
parser = argparse.ArgumentParser(description="Build Mathlib FAISS index.")
parser.add_argument("--mathlib-root", default=None, help="Path to Mathlib4 source root")
parser.add_argument("--max-files", type=int, default=None, help="Limit number of .lean files processed")
parser.add_argument("--index-dir", default=None, help="Directory to save the index")
args = parser.parse_args()
retriever = MathLibRetriever(index_dir=args.index_dir)
if retriever.is_index_built() and not args.max_files:
print(f"Index already exists at {retriever.index_dir}. Delete it to rebuild.")
return
retriever.build(mathlib_root=args.mathlib_root, max_files=args.max_files)
print("Done. Index is ready for use.")
if __name__ == "__main__":
main()