Spaces:
Sleeping
Sleeping
| # Content for upload_datasets.py | |
| import pickle | |
| import os | |
| from datasets import Dataset | |
| from huggingface_hub import HfApi | |
| # Initialize Hugging Face API | |
| api = HfApi() | |
| # Upload embeddings | |
| print("Preparing embeddings dataset...") | |
| try: | |
| with open('embeddings/embeddings.pkl', 'rb') as f: | |
| embeddings_data = pickle.load(f) | |
| # Create dataset with metadata to preserve the format | |
| embeddings_ds = Dataset.from_dict({ | |
| "data": [pickle.dumps(embeddings_data)], | |
| "format": ["pickle"] | |
| }) | |
| # Push to hub | |
| print("Uploading embeddings dataset...") | |
| embeddings_ds.push_to_hub("vichudo/agentic-defensor-embeddings") | |
| print("Embeddings dataset uploaded successfully!") | |
| except Exception as e: | |
| print(f"Error uploading embeddings: {e}") | |
| # Upload FAISS index separately | |
| print("Uploading FAISS index file...") | |
| try: | |
| api.upload_file( | |
| path_or_fileobj="embeddings/faiss_index.index", | |
| path_in_repo="faiss_index.index", | |
| repo_id="vichudo/agentic-defensor-embeddings", | |
| repo_type="dataset" | |
| ) | |
| print("FAISS index uploaded successfully!") | |
| except Exception as e: | |
| print(f"Error uploading FAISS index: {e}") | |
| # Upload document chunks | |
| print("Preparing document chunks dataset...") | |
| try: | |
| with open('data/doc_chunks.pkl', 'rb') as f: | |
| chunks_data = pickle.load(f) | |
| # Create dataset | |
| chunks_ds = Dataset.from_dict({ | |
| "data": [pickle.dumps(chunks_data)], | |
| "format": ["pickle"] | |
| }) | |
| # Push to hub | |
| print("Uploading document chunks dataset...") | |
| chunks_ds.push_to_hub("vichudo/agentic-defensor-chunks") | |
| print("Document chunks dataset uploaded successfully!") | |
| except Exception as e: | |
| print(f"Error uploading document chunks: {e}") | |
| print("Dataset upload process complete!") |