Mobile-emulator / hf_storage.py
tmgdlof
Migrate storage and database architecture to Hugging Face datasets and SQLite
214a88e
import pandas as pd
import os
import logging
from huggingface_hub import HfApi, hf_hub_download
from datetime import datetime
logger = logging.getLogger(__name__)
# Required environment variables on HF Space:
# HF_TOKEN: Access token with write permissions
# HF_DATASET_ID: Format "username/dataset-name"
class HFDatasetStorage:
def __init__(self):
self.token = os.environ.get("HF_TOKEN")
dataset_id = "Sachin5112/cachegah67hs627bhih6262772"
self.repo_id = dataset_id
self.api = HfApi()
if not self.token or not self.repo_id:
logger.warning("HF_TOKEN or HF_DATASET_ID not set. HF sync disabled.")
self.enabled = False
else:
self.enabled = True
def save_data(self, table_name, data_list):
if not self.enabled:
return
try:
if not data_list:
return
df = pd.DataFrame(data_list)
filename = f"{table_name}.parquet"
df.to_parquet(filename)
self.api.upload_file(
path_or_fileobj=filename,
path_in_repo=filename,
repo_id=self.repo_id,
repo_type="dataset",
token=self.token
)
logger.info(f"Successfully uploaded {table_name} to HF Dataset")
except Exception as e:
logger.error(f"Error saving to HF Dataset: {e}")
def load_data(self, table_name):
if not self.enabled:
return None
try:
path = hf_hub_download(
repo_id=self.repo_id,
filename=f"{table_name}.parquet",
repo_type="dataset",
token=self.token
)
return pd.read_parquet(path).to_dict('records')
except Exception as e:
# File might not exist yet if it's a new dataset
logger.debug(f"File {table_name}.parquet not found in HF Dataset: {e}")
return None
storage = HFDatasetStorage()