Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| One-time script to build the FAISS + BM25 index from Mathlib4 source files. | |
| Run this before using the LangGraph agent for the first time: | |
| python scripts/build_index.py | |
| Optional flags: | |
| --mathlib-root PATH Path to Mathlib4 source (auto-detected if omitted) | |
| --max-files N Limit to first N .lean files (useful for quick testing) | |
| --index-dir PATH Where to save the index (default: data/mathlib_index) | |
| """ | |
| import argparse | |
| import os | |
| import sys | |
| sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'src'))) | |
| from retriever import MathLibRetriever | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Build Mathlib FAISS index.") | |
| parser.add_argument("--mathlib-root", default=None, help="Path to Mathlib4 source root") | |
| parser.add_argument("--max-files", type=int, default=None, help="Limit number of .lean files processed") | |
| parser.add_argument("--index-dir", default=None, help="Directory to save the index") | |
| args = parser.parse_args() | |
| retriever = MathLibRetriever(index_dir=args.index_dir) | |
| if retriever.is_index_built() and not args.max_files: | |
| print(f"Index already exists at {retriever.index_dir}. Delete it to rebuild.") | |
| return | |
| retriever.build(mathlib_root=args.mathlib_root, max_files=args.max_files) | |
| print("Done. Index is ready for use.") | |
| if __name__ == "__main__": | |
| main() | |