""" Upload ChromaDB vector database to a Hugging Face Dataset repo. Usage: python scripts/upload_vectordb.py --repo-id YOUR_USERNAME/chemical-vectordb Prerequisites: pip install huggingface_hub huggingface-cli login """ import argparse from huggingface_hub import HfApi def main(): parser = argparse.ArgumentParser(description="Upload ChromaDB to HF Dataset") parser.add_argument( "--repo-id", required=True, help="HF dataset repo id, e.g. your-username/chemical-vectordb", ) parser.add_argument( "--local-dir", default="./vector_db/content/vector_db/chroma_db", help="Local path to the chroma_db folder", ) parser.add_argument("--private", action="store_true", help="Make the dataset private") args = parser.parse_args() api = HfApi() api.create_repo(repo_id=args.repo_id, repo_type="dataset", private=args.private, exist_ok=True) print(f"Repo ready: https://huggingface.co/datasets/{args.repo_id}") api.upload_folder( folder_path=args.local_dir, repo_id=args.repo_id, repo_type="dataset", ) print(f"Upload complete! Files pushed to datasets/{args.repo_id}") if __name__ == "__main__": main()