Spaces:
Running
Running
| import pandas as pd | |
| import os | |
| import logging | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from datetime import datetime | |
| logger = logging.getLogger(__name__) | |
| # Required environment variables on HF Space: | |
| # HF_TOKEN: Access token with write permissions | |
| # HF_DATASET_ID: Format "username/dataset-name" | |
| class HFDatasetStorage: | |
| def __init__(self): | |
| self.token = os.environ.get("HF_TOKEN") | |
| dataset_id = "Sachin5112/cachegah67hs627bhih6262772" | |
| self.repo_id = dataset_id | |
| self.api = HfApi() | |
| if not self.token or not self.repo_id: | |
| logger.warning("HF_TOKEN or HF_DATASET_ID not set. HF sync disabled.") | |
| self.enabled = False | |
| else: | |
| self.enabled = True | |
| def save_data(self, table_name, data_list): | |
| if not self.enabled: | |
| return | |
| try: | |
| if not data_list: | |
| return | |
| df = pd.DataFrame(data_list) | |
| filename = f"{table_name}.parquet" | |
| df.to_parquet(filename) | |
| self.api.upload_file( | |
| path_or_fileobj=filename, | |
| path_in_repo=filename, | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| logger.info(f"Successfully uploaded {table_name} to HF Dataset") | |
| except Exception as e: | |
| logger.error(f"Error saving to HF Dataset: {e}") | |
| def load_data(self, table_name): | |
| if not self.enabled: | |
| return None | |
| try: | |
| path = hf_hub_download( | |
| repo_id=self.repo_id, | |
| filename=f"{table_name}.parquet", | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| return pd.read_parquet(path).to_dict('records') | |
| except Exception as e: | |
| # File might not exist yet if it's a new dataset | |
| logger.debug(f"File {table_name}.parquet not found in HF Dataset: {e}") | |
| return None | |
| storage = HFDatasetStorage() | |