Spaces:
Build error
Build error
File size: 1,676 Bytes
1004967 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | """
Build FAISS Index Script.
Chunks persona corpus, embeds with nomic-embed-text-v1.5, clusters into K buckets
via K-Means, and saves the FAISS index + metadata to data/faiss_indices/{persona}/.
Usage:
python scripts/build_faiss.py --persona alex_rivera
python scripts/build_faiss.py --persona alex_rivera --corpus-dir data/personas/alex_rivera
"""
from __future__ import annotations
import argparse
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Build FAISS index with K-Means bucket clustering for a persona."
)
parser.add_argument(
"--persona", required=True,
help="Persona ID (e.g., alex_rivera)."
)
parser.add_argument(
"--corpus-dir", default=None,
help="Path to persona corpus directory. Defaults to data/personas/{persona}."
)
parser.add_argument(
"--config", default="memorybridge/config/settings.yaml",
help="Path to settings.yaml."
)
return parser.parse_args()
def main() -> None:
args = parse_args()
corpus_dir = args.corpus_dir or f"data/personas/{args.persona}"
from memorybridge.core.models import ModelRegistry
from memorybridge.memory.faiss_builder import FAISSBuilder
registry = ModelRegistry(args.config)
builder = FAISSBuilder(registry)
print(f"Building FAISS index for: {args.persona}")
print(f"Corpus directory: {corpus_dir}")
print(f"Output: data/faiss_indices/{args.persona}/")
print(f"K buckets: {registry.get_config('retrieval', 'faiss', 'k_buckets')}")
builder.build(corpus_dir, args.persona)
print("Done.")
if __name__ == "__main__":
main()
|