Spaces:
Paused
Paused
| from datasets import Dataset | |
| from huggingface_hub import HfApi | |
| from config import DATASET_NAME | |
| import logging | |
| logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') | |
| def initialize_dataset(): | |
| # Initialize an empty dataset with the expected structure | |
| initial_data = { | |
| "entry_id": [], | |
| "title": [], | |
| "authors": [], | |
| "published": [], | |
| "updated": [], | |
| "pdf_url": [], | |
| "summary": [], | |
| "categories": [], | |
| "primary_category": [], | |
| "html_url": [] | |
| } | |
| # Create the dataset | |
| dataset = Dataset.from_dict(initial_data) | |
| try: | |
| # Push the initial dataset to the Hub | |
| dataset.push_to_hub(DATASET_NAME, split="train") | |
| logging.info(f"Dataset {DATASET_NAME} initialized successfully with 'train' split.") | |
| except Exception as e: | |
| logging.error(f"Failed to initialize dataset: {str(e)}") | |
| raise | |
| if __name__ == "__main__": | |
| initialize_dataset() |