|
|
import os |
|
|
import json |
|
|
import datetime |
|
|
from typing import Optional |
|
|
from huggingface_hub import HfApi, hf_hub_download |
|
|
from huggingface_hub.utils import HfFolder, RepositoryNotFoundError |
|
|
|
|
|
HF_TOKEN = os.environ.get("HF_TOKEN") |
|
|
HF_REPO_ID = os.environ.get("HF_REPO_ID","broadfield-dev/chat-ai-db") |
|
|
DB_FILENAME = "social_media_platform.db" |
|
|
DATASET_FILENAME = "interactions.jsonl" |
|
|
|
|
|
def _ensure_repo_exists(): |
|
|
"""Creates the Hugging Face repository if it does not already exist.""" |
|
|
if not HF_TOKEN or not HF_REPO_ID: |
|
|
return |
|
|
try: |
|
|
api = HfApi() |
|
|
|
|
|
api.create_repo( |
|
|
repo_id=HF_REPO_ID, |
|
|
repo_type="dataset", |
|
|
token=HF_TOKEN, |
|
|
exist_ok=True, |
|
|
) |
|
|
except Exception as e: |
|
|
print(f"SYNC ERROR: Could not create or verify repository '{HF_REPO_ID}'. Please check your token and repo ID. Error: {e}") |
|
|
|
|
|
def log_interaction_to_dataset(event_data: dict): |
|
|
if not HF_TOKEN or not HF_REPO_ID: |
|
|
return |
|
|
try: |
|
|
with open(DATASET_FILENAME, "a") as f: |
|
|
f.write(json.dumps(event_data) + "\n") |
|
|
except Exception as e: |
|
|
print(f"DATASET LOG ERROR: {e}") |
|
|
|
|
|
def sync_files_to_hub(): |
|
|
if not HF_TOKEN or not HF_REPO_ID: |
|
|
return |
|
|
|
|
|
_ensure_repo_exists() |
|
|
|
|
|
print("SYNC: Uploading database and dataset to Hugging Face Hub...") |
|
|
try: |
|
|
api = HfApi() |
|
|
api.upload_file( |
|
|
path_or_fileobj=DB_FILENAME, path_in_repo=DB_FILENAME, |
|
|
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, |
|
|
commit_message="Sync latest simulation state" |
|
|
) |
|
|
if os.path.exists(DATASET_FILENAME): |
|
|
api.upload_file( |
|
|
path_or_fileobj=DATASET_FILENAME, path_in_repo=DATASET_FILENAME, |
|
|
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN, |
|
|
commit_message="Sync latest interactions" |
|
|
) |
|
|
print("SYNC: Upload successful.") |
|
|
except Exception as e: |
|
|
print(f"SYNC ERROR: Failed to upload files: {e}") |
|
|
|
|
|
def download_files_from_hub(): |
|
|
if not HF_TOKEN or not HF_REPO_ID: |
|
|
print("SYNC: Hub environment variables not set. Skipping download.") |
|
|
return |
|
|
print(f"SYNC: Attempting to download latest state from '{HF_REPO_ID}'...") |
|
|
try: |
|
|
hf_hub_download( |
|
|
repo_id=HF_REPO_ID, filename=DB_FILENAME, |
|
|
repo_type="dataset", token=HF_TOKEN, local_dir="." |
|
|
) |
|
|
print("SYNC: Database download successful.") |
|
|
hf_hub_download( |
|
|
repo_id=HF_REPO_ID, filename=DATASET_FILENAME, |
|
|
repo_type="dataset", token=HF_TOKEN, local_dir="." |
|
|
) |
|
|
print("SYNC: Dataset log download successful.") |
|
|
except RepositoryNotFoundError: |
|
|
print(f"SYNC INFO: Repository '{HF_REPO_ID}' not found on the Hub. A new one will be created on the first action.") |
|
|
except Exception as e: |
|
|
print(f"SYNC INFO: Could not download files (may not exist yet): {e}") |
|
|
|
|
|
def upload_image_to_hub(image_file, agent_id: int) -> Optional[str]: |
|
|
if not HF_TOKEN or not HF_REPO_ID: |
|
|
return None |
|
|
|
|
|
_ensure_repo_exists() |
|
|
|
|
|
try: |
|
|
api = HfApi() |
|
|
timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S") |
|
|
filename_ext = image_file.filename.split('.')[-1] if '.' in image_file.filename else 'jpg' |
|
|
filename = f"images/{agent_id}_{timestamp}.{filename_ext}" |
|
|
|
|
|
base_url = f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{filename}" |
|
|
|
|
|
api.upload_file( |
|
|
path_or_fileobj=image_file.stream.read(), |
|
|
path_in_repo=filename, |
|
|
repo_id=HF_REPO_ID, |
|
|
repo_type="dataset", |
|
|
token=HF_TOKEN, |
|
|
) |
|
|
return base_url |
|
|
except Exception as e: |
|
|
print(f"IMAGE UPLOAD ERROR: {e}") |
|
|
return None |