social-ai-flask / hub_sync.py
broadfield-dev's picture
Update hub_sync.py
aade055 verified
import os
import json
import datetime
from typing import Optional
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import HfFolder, RepositoryNotFoundError
HF_TOKEN = os.environ.get("HF_TOKEN")
HF_REPO_ID = os.environ.get("HF_REPO_ID","broadfield-dev/chat-ai-db")
DB_FILENAME = "social_media_platform.db"
DATASET_FILENAME = "interactions.jsonl"
def _ensure_repo_exists():
"""Creates the Hugging Face repository if it does not already exist."""
if not HF_TOKEN or not HF_REPO_ID:
return
try:
api = HfApi()
# This will create the repo if it doesn't exist and do nothing if it does.
api.create_repo(
repo_id=HF_REPO_ID,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True,
)
except Exception as e:
print(f"SYNC ERROR: Could not create or verify repository '{HF_REPO_ID}'. Please check your token and repo ID. Error: {e}")
def log_interaction_to_dataset(event_data: dict):
if not HF_TOKEN or not HF_REPO_ID:
return
try:
with open(DATASET_FILENAME, "a") as f:
f.write(json.dumps(event_data) + "\n")
except Exception as e:
print(f"DATASET LOG ERROR: {e}")
def sync_files_to_hub():
if not HF_TOKEN or not HF_REPO_ID:
return
_ensure_repo_exists() # Make sure the repo exists before uploading
print("SYNC: Uploading database and dataset to Hugging Face Hub...")
try:
api = HfApi()
api.upload_file(
path_or_fileobj=DB_FILENAME, path_in_repo=DB_FILENAME,
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN,
commit_message="Sync latest simulation state"
)
if os.path.exists(DATASET_FILENAME):
api.upload_file(
path_or_fileobj=DATASET_FILENAME, path_in_repo=DATASET_FILENAME,
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN,
commit_message="Sync latest interactions"
)
print("SYNC: Upload successful.")
except Exception as e:
print(f"SYNC ERROR: Failed to upload files: {e}")
def download_files_from_hub():
if not HF_TOKEN or not HF_REPO_ID:
print("SYNC: Hub environment variables not set. Skipping download.")
return
print(f"SYNC: Attempting to download latest state from '{HF_REPO_ID}'...")
try:
hf_hub_download(
repo_id=HF_REPO_ID, filename=DB_FILENAME,
repo_type="dataset", token=HF_TOKEN, local_dir="."
)
print("SYNC: Database download successful.")
hf_hub_download(
repo_id=HF_REPO_ID, filename=DATASET_FILENAME,
repo_type="dataset", token=HF_TOKEN, local_dir="."
)
print("SYNC: Dataset log download successful.")
except RepositoryNotFoundError:
print(f"SYNC INFO: Repository '{HF_REPO_ID}' not found on the Hub. A new one will be created on the first action.")
except Exception as e:
print(f"SYNC INFO: Could not download files (may not exist yet): {e}")
def upload_image_to_hub(image_file, agent_id: int) -> Optional[str]:
if not HF_TOKEN or not HF_REPO_ID:
return None
_ensure_repo_exists() # Make sure the repo exists before uploading
try:
api = HfApi()
timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
filename_ext = image_file.filename.split('.')[-1] if '.' in image_file.filename else 'jpg'
filename = f"images/{agent_id}_{timestamp}.{filename_ext}"
base_url = f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{filename}"
api.upload_file(
path_or_fileobj=image_file.stream.read(),
path_in_repo=filename,
repo_id=HF_REPO_ID,
repo_type="dataset",
token=HF_TOKEN,
)
return base_url
except Exception as e:
print(f"IMAGE UPLOAD ERROR: {e}")
return None