# Content for upload_datasets.py import pickle import os from datasets import Dataset from huggingface_hub import HfApi # Initialize Hugging Face API api = HfApi() # Upload embeddings print("Preparing embeddings dataset...") try: with open('embeddings/embeddings.pkl', 'rb') as f: embeddings_data = pickle.load(f) # Create dataset with metadata to preserve the format embeddings_ds = Dataset.from_dict({ "data": [pickle.dumps(embeddings_data)], "format": ["pickle"] }) # Push to hub print("Uploading embeddings dataset...") embeddings_ds.push_to_hub("vichudo/agentic-defensor-embeddings") print("Embeddings dataset uploaded successfully!") except Exception as e: print(f"Error uploading embeddings: {e}") # Upload FAISS index separately print("Uploading FAISS index file...") try: api.upload_file( path_or_fileobj="embeddings/faiss_index.index", path_in_repo="faiss_index.index", repo_id="vichudo/agentic-defensor-embeddings", repo_type="dataset" ) print("FAISS index uploaded successfully!") except Exception as e: print(f"Error uploading FAISS index: {e}") # Upload document chunks print("Preparing document chunks dataset...") try: with open('data/doc_chunks.pkl', 'rb') as f: chunks_data = pickle.load(f) # Create dataset chunks_ds = Dataset.from_dict({ "data": [pickle.dumps(chunks_data)], "format": ["pickle"] }) # Push to hub print("Uploading document chunks dataset...") chunks_ds.push_to_hub("vichudo/agentic-defensor-chunks") print("Document chunks dataset uploaded successfully!") except Exception as e: print(f"Error uploading document chunks: {e}") print("Dataset upload process complete!")