Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| #!/usr/bin/env python3 | |
| import argparse, os | |
| from pathlib import Path | |
| from dotenv import load_dotenv | |
| load_dotenv(dotenv_path=Path.cwd() / ".env") | |
| import sys | |
| from pathlib import Path | |
| sys.path.append(str(Path(__file__).resolve().parents[1])) | |
| def main(): | |
| p = argparse.ArgumentParser() | |
| p.add_argument("--source", default="data/known_bills_visualize.json") | |
| p.add_argument("--backend", choices=["chroma","pinecone"], default=os.getenv("VECTOR_BACKEND","chroma")) | |
| p.add_argument("--persist", default="data/bills_vectorstore") | |
| p.add_argument("--collection", default="bills") | |
| p.add_argument("--manifest", default="data/bills_vectorstore_manifest.json") | |
| p.add_argument("--model", default=None) | |
| p.add_argument("--batch", type=int, default=128) | |
| args = p.parse_args() | |
| if args.backend == "pinecone": | |
| from vectorstore.pinecone_bills_vectorstore import upsert_from_bills_json | |
| stats = upsert_from_bills_json( | |
| source_json_path=args.source, | |
| manifest_path=args.manifest, | |
| embed_model=args.model, | |
| batch_size=args.batch, | |
| ) | |
| else: | |
| from vectorstore.bills_vectorstore import upsert_from_bills_json | |
| stats = upsert_from_bills_json( | |
| source_json_path=args.source, | |
| persist_dir=args.persist, | |
| collection=args.collection, | |
| manifest_path=args.manifest, | |
| embed_model=args.model, | |
| batch_size=args.batch, | |
| ) | |
| print("✅ Vectorstore updated") | |
| for k, v in stats.items(): | |
| print(f" {k}: {v}") | |
| if __name__ == "__main__": | |
| main() |