Spaces:
Sleeping
Sleeping
| import os | |
| import shutil | |
| import pickle | |
| import tarfile | |
| from pathlib import Path | |
| # Define paths | |
| PROCESSED_DATA_DIR = Path("processed_data") | |
| CHUNKS_FILE = PROCESSED_DATA_DIR / "document_chunks.pkl" | |
| QDRANT_DIR = PROCESSED_DATA_DIR / "qdrant_vectorstore" | |
| TARGET_PACKAGE = "processed_data.tar.gz" | |
| def package_data(): | |
| """ | |
| Package the processed data into a single compressed file for deployment. | |
| This creates a tar.gz file that contains the document chunks and | |
| Qdrant vector database, which can be uploaded to Hugging Face. | |
| """ | |
| print("Starting data packaging process...") | |
| # Verify source files exist | |
| if not os.path.exists(CHUNKS_FILE): | |
| raise FileNotFoundError(f"Document chunks file not found: {CHUNKS_FILE}") | |
| if not os.path.exists(QDRANT_DIR): | |
| raise FileNotFoundError(f"Qdrant directory not found: {QDRANT_DIR}") | |
| # Verify chunks file is valid | |
| try: | |
| with open(CHUNKS_FILE, 'rb') as f: | |
| chunks = pickle.load(f) | |
| print(f"Verified document chunks file. Contains {len(chunks)} chunks.") | |
| except Exception as e: | |
| raise ValueError(f"Invalid document chunks file: {str(e)}") | |
| # Create tar.gz file | |
| print(f"Creating package file: {TARGET_PACKAGE}") | |
| with tarfile.open(TARGET_PACKAGE, "w:gz") as tar: | |
| # Add chunks file | |
| tar.add(CHUNKS_FILE, arcname=os.path.basename(CHUNKS_FILE)) | |
| # Add Qdrant directory | |
| for root, dirs, files in os.walk(QDRANT_DIR): | |
| for file in files: | |
| file_path = os.path.join(root, file) | |
| arcname = os.path.join( | |
| "qdrant_vectorstore", | |
| os.path.relpath(file_path, QDRANT_DIR) | |
| ) | |
| print(f"Adding: {file_path} -> {arcname}") | |
| tar.add(file_path, arcname=arcname) | |
| # Verify the tarfile was created | |
| if os.path.exists(TARGET_PACKAGE): | |
| size_mb = os.path.getsize(TARGET_PACKAGE) / (1024 * 1024) | |
| print(f"Package created successfully: {TARGET_PACKAGE} ({size_mb:.2f} MB)") | |
| print("\nInstructions:") | |
| print(f"1. Upload {TARGET_PACKAGE} to your Hugging Face Space") | |
| print("2. The app will automatically extract it on startup") | |
| else: | |
| print("Failed to create package file") | |
| if __name__ == "__main__": | |
| package_data() |