legislation-tracker / data_updating_scripts /build_bills_vectorstore.py
ramanna's picture
Upload 30 files
b5a9373 verified
#!/usr/bin/env python3
import argparse, os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv(dotenv_path=Path.cwd() / ".env")
import sys
from pathlib import Path
sys.path.append(str(Path(__file__).resolve().parents[1]))
def main():
p = argparse.ArgumentParser()
p.add_argument("--source", default="data/known_bills_visualize.json")
p.add_argument("--backend", choices=["chroma","pinecone"], default=os.getenv("VECTOR_BACKEND","chroma"))
p.add_argument("--persist", default="data/bills_vectorstore")
p.add_argument("--collection", default="bills")
p.add_argument("--manifest", default="data/bills_vectorstore_manifest.json")
p.add_argument("--model", default=None)
p.add_argument("--batch", type=int, default=128)
args = p.parse_args()
if args.backend == "pinecone":
from vectorstore.pinecone_bills_vectorstore import upsert_from_bills_json
stats = upsert_from_bills_json(
source_json_path=args.source,
manifest_path=args.manifest,
embed_model=args.model,
batch_size=args.batch,
)
else:
from vectorstore.bills_vectorstore import upsert_from_bills_json
stats = upsert_from_bills_json(
source_json_path=args.source,
persist_dir=args.persist,
collection=args.collection,
manifest_path=args.manifest,
embed_model=args.model,
batch_size=args.batch,
)
print("✅ Vectorstore updated")
for k, v in stats.items():
print(f" {k}: {v}")
if __name__ == "__main__":
main()