""" Build FAISS Index Script. Chunks persona corpus, embeds with nomic-embed-text-v1.5, clusters into K buckets via K-Means, and saves the FAISS index + metadata to data/faiss_indices/{persona}/. Usage: python scripts/build_faiss.py --persona alex_rivera python scripts/build_faiss.py --persona alex_rivera --corpus-dir data/personas/alex_rivera """ from __future__ import annotations import argparse def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="Build FAISS index with K-Means bucket clustering for a persona." ) parser.add_argument( "--persona", required=True, help="Persona ID (e.g., alex_rivera)." ) parser.add_argument( "--corpus-dir", default=None, help="Path to persona corpus directory. Defaults to data/personas/{persona}." ) parser.add_argument( "--config", default="memorybridge/config/settings.yaml", help="Path to settings.yaml." ) return parser.parse_args() def main() -> None: args = parse_args() corpus_dir = args.corpus_dir or f"data/personas/{args.persona}" from memorybridge.core.models import ModelRegistry from memorybridge.memory.faiss_builder import FAISSBuilder registry = ModelRegistry(args.config) builder = FAISSBuilder(registry) print(f"Building FAISS index for: {args.persona}") print(f"Corpus directory: {corpus_dir}") print(f"Output: data/faiss_indices/{args.persona}/") print(f"K buckets: {registry.get_config('retrieval', 'faiss', 'k_buckets')}") builder.build(corpus_dir, args.persona) print("Done.") if __name__ == "__main__": main()