Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import logging | |
| from typing import Optional | |
| from huggingface_hub import HfApi, hf_hub_download | |
| from huggingface_hub.utils import RepositoryNotFoundError, EntryNotFoundError | |
| logger = logging.getLogger(__name__) | |
| class HFStorageManager: | |
| """ | |
| Manages persistent storage using Hugging Face Datasets. | |
| Stores JSON files in a HF dataset for persistence across Space restarts. | |
| """ | |
| def __init__(self): | |
| """Initialize HF Storage Manager with credentials from environment variables.""" | |
| self.token = os.getenv("HF_TOKEN") | |
| self.repo_id = os.getenv("HF_DATASET_REPO") | |
| self._cache = {} # In-memory cache to reduce API calls | |
| if not self.token: | |
| logger.warning("HF_TOKEN not found. Using local storage fallback.") | |
| self.use_hf = False | |
| return | |
| if not self.repo_id: | |
| logger.warning("HF_DATASET_REPO not found. Using local storage fallback.") | |
| self.use_hf = False | |
| return | |
| self.use_hf = True | |
| self.api = HfApi() | |
| logger.info(f"HF Storage initialized for dataset: {self.repo_id}") | |
| def save_file(self, filename: str, data: dict) -> bool: | |
| """ | |
| Save a JSON file to HF dataset. | |
| Args: | |
| filename: Name of the file (e.g., 'chat_history.json') | |
| data: Dictionary to save as JSON | |
| Returns: | |
| bool: True if successful, False otherwise | |
| """ | |
| if not self.use_hf: | |
| return self._save_local(filename, data) | |
| try: | |
| # Save locally first | |
| temp_path = f"/tmp/{filename}" | |
| with open(temp_path, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| # Upload to HF dataset | |
| self.api.upload_file( | |
| path_or_fileobj=temp_path, | |
| path_in_repo=filename, | |
| repo_id=self.repo_id, | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| # Cache the data | |
| self._cache[filename] = data | |
| logger.info(f"Successfully saved {filename} to HF dataset") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to save {filename} to HF dataset: {e}") | |
| # Fallback to local storage | |
| return self._save_local(filename, data) | |
| def load_file(self, filename: str) -> Optional[dict]: | |
| """ | |
| Load a JSON file from HF dataset. | |
| Args: | |
| filename: Name of the file to load | |
| Returns: | |
| dict or None: Loaded data or None if not found | |
| """ | |
| if not self.use_hf: | |
| return self._load_local(filename) | |
| # Check cache first | |
| if filename in self._cache: | |
| logger.info(f"Loaded {filename} from cache") | |
| return self._cache[filename] | |
| try: | |
| # Download from HF dataset | |
| file_path = hf_hub_download( | |
| repo_id=self.repo_id, | |
| filename=filename, | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| with open(file_path, 'r') as f: | |
| data = json.load(f) | |
| # Cache the data | |
| self._cache[filename] = data | |
| logger.info(f"Successfully loaded {filename} from HF dataset") | |
| return data | |
| except (RepositoryNotFoundError, EntryNotFoundError): | |
| logger.info(f"{filename} not found in HF dataset, returning None") | |
| return None | |
| except Exception as e: | |
| logger.error(f"Failed to load {filename} from HF dataset: {e}") | |
| # Fallback to local storage | |
| return self._load_local(filename) | |
| def file_exists(self, filename: str) -> bool: | |
| """ | |
| Check if a file exists in HF dataset. | |
| Args: | |
| filename: Name of the file | |
| Returns: | |
| bool: True if file exists, False otherwise | |
| """ | |
| if not self.use_hf: | |
| return os.path.exists(filename) | |
| try: | |
| hf_hub_download( | |
| repo_id=self.repo_id, | |
| filename=filename, | |
| repo_type="dataset", | |
| token=self.token | |
| ) | |
| return True | |
| except (RepositoryNotFoundError, EntryNotFoundError): | |
| return False | |
| except Exception as e: | |
| logger.error(f"Error checking if {filename} exists: {e}") | |
| return os.path.exists(filename) | |
| def _save_local(self, filename: str, data: dict) -> bool: | |
| """Fallback: Save to local filesystem.""" | |
| try: | |
| with open(filename, 'w') as f: | |
| json.dump(data, f, indent=2) | |
| logger.info(f"Saved {filename} locally (fallback)") | |
| return True | |
| except Exception as e: | |
| logger.error(f"Failed to save {filename} locally: {e}") | |
| return False | |
| def _load_local(self, filename: str) -> Optional[dict]: | |
| """Fallback: Load from local filesystem.""" | |
| try: | |
| if not os.path.exists(filename): | |
| return None | |
| with open(filename, 'r') as f: | |
| data = json.load(f) | |
| logger.info(f"Loaded {filename} locally (fallback)") | |
| return data | |
| except Exception as e: | |
| logger.error(f"Failed to load {filename} locally: {e}") | |
| return None | |