| """ | |
| Upload ChromaDB vector database to a Hugging Face Dataset repo. | |
| Usage: | |
| python scripts/upload_vectordb.py --repo-id YOUR_USERNAME/chemical-vectordb | |
| Prerequisites: | |
| pip install huggingface_hub | |
| huggingface-cli login | |
| """ | |
| import argparse | |
| from huggingface_hub import HfApi | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Upload ChromaDB to HF Dataset") | |
| parser.add_argument( | |
| "--repo-id", required=True, | |
| help="HF dataset repo id, e.g. your-username/chemical-vectordb", | |
| ) | |
| parser.add_argument( | |
| "--local-dir", | |
| default="./vector_db/content/vector_db/chroma_db", | |
| help="Local path to the chroma_db folder", | |
| ) | |
| parser.add_argument("--private", action="store_true", help="Make the dataset private") | |
| args = parser.parse_args() | |
| api = HfApi() | |
| api.create_repo(repo_id=args.repo_id, repo_type="dataset", private=args.private, exist_ok=True) | |
| print(f"Repo ready: https://huggingface.co/datasets/{args.repo_id}") | |
| api.upload_folder( | |
| folder_path=args.local_dir, | |
| repo_id=args.repo_id, | |
| repo_type="dataset", | |
| ) | |
| print(f"Upload complete! Files pushed to datasets/{args.repo_id}") | |
| if __name__ == "__main__": | |
| main() | |