File size: 4,008 Bytes
294476c 1db491c 294476c aade055 294476c 67618a4 294476c 1db491c 294476c aade055 1db491c 294476c 1db491c 294476c 1db491c aade055 1db491c 294476c 1db491c 294476c 1db491c 294476c 1db491c 294476c 1db491c 294476c 1db491c 294476c 1db491c aade055 1db491c aade055 1db491c aade055 1db491c 294476c 1db491c 294476c 1db491c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import os
import json
import datetime
from typing import Optional
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import HfFolder, RepositoryNotFoundError
HF_TOKEN = os.environ.get("HF_TOKEN")
HF_REPO_ID = os.environ.get("HF_REPO_ID","broadfield-dev/chat-ai-db")
DB_FILENAME = "social_media_platform.db"
DATASET_FILENAME = "interactions.jsonl"
def _ensure_repo_exists():
"""Creates the Hugging Face repository if it does not already exist."""
if not HF_TOKEN or not HF_REPO_ID:
return
try:
api = HfApi()
# This will create the repo if it doesn't exist and do nothing if it does.
api.create_repo(
repo_id=HF_REPO_ID,
repo_type="dataset",
token=HF_TOKEN,
exist_ok=True,
)
except Exception as e:
print(f"SYNC ERROR: Could not create or verify repository '{HF_REPO_ID}'. Please check your token and repo ID. Error: {e}")
def log_interaction_to_dataset(event_data: dict):
if not HF_TOKEN or not HF_REPO_ID:
return
try:
with open(DATASET_FILENAME, "a") as f:
f.write(json.dumps(event_data) + "\n")
except Exception as e:
print(f"DATASET LOG ERROR: {e}")
def sync_files_to_hub():
if not HF_TOKEN or not HF_REPO_ID:
return
_ensure_repo_exists() # Make sure the repo exists before uploading
print("SYNC: Uploading database and dataset to Hugging Face Hub...")
try:
api = HfApi()
api.upload_file(
path_or_fileobj=DB_FILENAME, path_in_repo=DB_FILENAME,
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN,
commit_message="Sync latest simulation state"
)
if os.path.exists(DATASET_FILENAME):
api.upload_file(
path_or_fileobj=DATASET_FILENAME, path_in_repo=DATASET_FILENAME,
repo_id=HF_REPO_ID, repo_type="dataset", token=HF_TOKEN,
commit_message="Sync latest interactions"
)
print("SYNC: Upload successful.")
except Exception as e:
print(f"SYNC ERROR: Failed to upload files: {e}")
def download_files_from_hub():
if not HF_TOKEN or not HF_REPO_ID:
print("SYNC: Hub environment variables not set. Skipping download.")
return
print(f"SYNC: Attempting to download latest state from '{HF_REPO_ID}'...")
try:
hf_hub_download(
repo_id=HF_REPO_ID, filename=DB_FILENAME,
repo_type="dataset", token=HF_TOKEN, local_dir="."
)
print("SYNC: Database download successful.")
hf_hub_download(
repo_id=HF_REPO_ID, filename=DATASET_FILENAME,
repo_type="dataset", token=HF_TOKEN, local_dir="."
)
print("SYNC: Dataset log download successful.")
except RepositoryNotFoundError:
print(f"SYNC INFO: Repository '{HF_REPO_ID}' not found on the Hub. A new one will be created on the first action.")
except Exception as e:
print(f"SYNC INFO: Could not download files (may not exist yet): {e}")
def upload_image_to_hub(image_file, agent_id: int) -> Optional[str]:
if not HF_TOKEN or not HF_REPO_ID:
return None
_ensure_repo_exists() # Make sure the repo exists before uploading
try:
api = HfApi()
timestamp = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
filename_ext = image_file.filename.split('.')[-1] if '.' in image_file.filename else 'jpg'
filename = f"images/{agent_id}_{timestamp}.{filename_ext}"
base_url = f"https://huggingface.co/datasets/{HF_REPO_ID}/resolve/main/{filename}"
api.upload_file(
path_or_fileobj=image_file.stream.read(),
path_in_repo=filename,
repo_id=HF_REPO_ID,
repo_type="dataset",
token=HF_TOKEN,
)
return base_url
except Exception as e:
print(f"IMAGE UPLOAD ERROR: {e}")
return None |