Kimty's picture
Add Gradio app with RAG pipeline for chemical regulation lookup
1f1a99e
"""
Upload ChromaDB vector database to a Hugging Face Dataset repo.
Usage:
python scripts/upload_vectordb.py --repo-id YOUR_USERNAME/chemical-vectordb
Prerequisites:
pip install huggingface_hub
huggingface-cli login
"""
import argparse
from huggingface_hub import HfApi
def main():
parser = argparse.ArgumentParser(description="Upload ChromaDB to HF Dataset")
parser.add_argument(
"--repo-id", required=True,
help="HF dataset repo id, e.g. your-username/chemical-vectordb",
)
parser.add_argument(
"--local-dir",
default="./vector_db/content/vector_db/chroma_db",
help="Local path to the chroma_db folder",
)
parser.add_argument("--private", action="store_true", help="Make the dataset private")
args = parser.parse_args()
api = HfApi()
api.create_repo(repo_id=args.repo_id, repo_type="dataset", private=args.private, exist_ok=True)
print(f"Repo ready: https://huggingface.co/datasets/{args.repo_id}")
api.upload_folder(
folder_path=args.local_dir,
repo_id=args.repo_id,
repo_type="dataset",
)
print(f"Upload complete! Files pushed to datasets/{args.repo_id}")
if __name__ == "__main__":
main()