Spaces:
Build error
Build error
| import os | |
| import argparse | |
| from dotenv import load_dotenv | |
| from huggingface_hub import hf_hub_download, snapshot_download | |
| # Load environment variables from .env.local, overriding system envs if present | |
| load_dotenv(".env.local", override=True) | |
| def download_data(repo_id, token=None, output_dir="."): | |
| print(f"Checking for data in {repo_id}...", flush=True) | |
| # Download database | |
| try: | |
| print("Downloading rag-kb.db...", flush=True) | |
| hf_hub_download( | |
| repo_id=repo_id, | |
| filename="rag-kb.db", | |
| repo_type="dataset", | |
| local_dir=output_dir, | |
| token=token | |
| ) | |
| print("rag-kb.db downloaded.", flush=True) | |
| except Exception as e: | |
| print(f"Could not download rag-kb.db: {e}", flush=True) | |
| print("Starting with empty/new database if not present.", flush=True) | |
| # Download vector store | |
| try: | |
| print("Downloading vector_store...", flush=True) | |
| snapshot_download( | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| allow_patterns="vector_store/*", | |
| local_dir=output_dir, | |
| token=token | |
| ) | |
| print("vector_store downloaded.", flush=True) | |
| except Exception as e: | |
| print(f"Could not download vector_store: {e}", flush=True) | |
| if __name__ == "__main__": | |
| repo_id = os.environ.get("HF_DATASET_REPO", "duqing2026/rag-kb-data") | |
| token = os.environ.get("HF_TOKEN") | |
| # Check for dummy token and ignore it | |
| if token == "hf_XXXXXXXXXXXXXXXX": | |
| print("Warning: Detected dummy HF_TOKEN 'hf_XXXXXXXXXXXXXXXX'. Ignoring it.") | |
| token = None | |
| if not repo_id: | |
| print("No HF_DATASET_REPO environment variable set. Skipping download.") | |
| else: | |
| download_data(repo_id, token) | |