Spaces:
Sleeping
Sleeping
File size: 1,796 Bytes
f41e5db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 | # Content for upload_datasets.py
import pickle
import os
from datasets import Dataset
from huggingface_hub import HfApi
# Initialize Hugging Face API
api = HfApi()
# Upload embeddings
print("Preparing embeddings dataset...")
try:
with open('embeddings/embeddings.pkl', 'rb') as f:
embeddings_data = pickle.load(f)
# Create dataset with metadata to preserve the format
embeddings_ds = Dataset.from_dict({
"data": [pickle.dumps(embeddings_data)],
"format": ["pickle"]
})
# Push to hub
print("Uploading embeddings dataset...")
embeddings_ds.push_to_hub("vichudo/agentic-defensor-embeddings")
print("Embeddings dataset uploaded successfully!")
except Exception as e:
print(f"Error uploading embeddings: {e}")
# Upload FAISS index separately
print("Uploading FAISS index file...")
try:
api.upload_file(
path_or_fileobj="embeddings/faiss_index.index",
path_in_repo="faiss_index.index",
repo_id="vichudo/agentic-defensor-embeddings",
repo_type="dataset"
)
print("FAISS index uploaded successfully!")
except Exception as e:
print(f"Error uploading FAISS index: {e}")
# Upload document chunks
print("Preparing document chunks dataset...")
try:
with open('data/doc_chunks.pkl', 'rb') as f:
chunks_data = pickle.load(f)
# Create dataset
chunks_ds = Dataset.from_dict({
"data": [pickle.dumps(chunks_data)],
"format": ["pickle"]
})
# Push to hub
print("Uploading document chunks dataset...")
chunks_ds.push_to_hub("vichudo/agentic-defensor-chunks")
print("Document chunks dataset uploaded successfully!")
except Exception as e:
print(f"Error uploading document chunks: {e}")
print("Dataset upload process complete!") |