Spaces:
Sleeping
Sleeping
| """Upload generated dataset to Hugging Face Hub. | |
| Usage: | |
| python -m src.data.upload_dataset --repo-id Mkaru/structural-mechanics-analytical-100k | |
| """ | |
| import argparse | |
| import logging | |
| from pathlib import Path | |
| from huggingface_hub import HfApi, create_repo | |
| logger = logging.getLogger(__name__) | |
| def upload_dataset( | |
| data_dir: str = "data/generated", | |
| repo_id: str = "Mkaru/structural-mechanics-analytical-100k", | |
| dataset_card_path: str = "docs/DATASET_CARD.md", | |
| ) -> None: | |
| """Upload Parquet files and dataset card to HF Hub.""" | |
| api = HfApi() | |
| # Create repo if it doesn't exist | |
| create_repo(repo_id, repo_type="dataset", exist_ok=True) | |
| logger.info(f"Repository: {repo_id}") | |
| data_path = Path(data_dir) | |
| # Upload Parquet files | |
| for split_file in data_path.glob("*.parquet"): | |
| api.upload_file( | |
| path_or_fileobj=str(split_file), | |
| path_in_repo=f"data/{split_file.name}", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| ) | |
| logger.info(f"Uploaded {split_file.name}") | |
| # Upload dataset card as README | |
| card_path = Path(dataset_card_path) | |
| if card_path.exists(): | |
| api.upload_file( | |
| path_or_fileobj=str(card_path), | |
| path_in_repo="README.md", | |
| repo_id=repo_id, | |
| repo_type="dataset", | |
| ) | |
| logger.info("Uploaded dataset card") | |
| logger.info(f"Dataset available at: https://huggingface.co/datasets/{repo_id}") | |
| def main() -> None: | |
| parser = argparse.ArgumentParser(description="Upload dataset to HF Hub") | |
| parser.add_argument("--data-dir", default="data/generated") | |
| parser.add_argument("--repo-id", default="Mkaru/structural-mechanics-analytical-100k") | |
| parser.add_argument("--card", default="docs/DATASET_CARD.md") | |
| args = parser.parse_args() | |
| logging.basicConfig(level=logging.INFO) | |
| upload_dataset(args.data_dir, args.repo_id, args.card) | |
| if __name__ == "__main__": | |
| main() | |