traj-eval / hf_sync.py
KaushikSid
autocommit csv
6991151
"""Auto-sync evaluations CSV to HF Space git (self or external)."""
import os
from pathlib import Path
from huggingface_hub import CommitScheduler, HfApi
from datetime import datetime
# Config
CSV_FILE = "evaluations.csv"
SPACE_ID = os.getenv("SPACE_ID") # Auto-set by HF Spaces
HF_TOKEN = os.getenv("HF_TOKEN") # Optional for external sync
# Global state
_scheduler = None
_sync_mode = None
def init_hf_sync():
"""Initialize HF sync. Auto-detects if running in Space.
Modes:
- 'self': Running in HF Space, commits to same Space
- 'external': Local run, commits to external Space (needs HF_TOKEN + HF_SPACE_REPO)
- None: No sync
Returns:
bool: True if sync enabled
"""
global _scheduler, _sync_mode
# Mode 1: Running inside HF Space - commit to self
if SPACE_ID:
try:
from huggingface_hub import CommitScheduler
_scheduler = CommitScheduler(
repo_id=SPACE_ID,
repo_type="space",
folder_path=".",
path_in_repo="data",
every=1, # Commit every second if changes exist
)
_sync_mode = 'self'
print(f"βœ… HF Space auto-commit enabled: {SPACE_ID}")
print(f"πŸ“ CSV saves to Space git automatically")
return True
except Exception as e:
print(f"⚠️ Space auto-commit failed: {e}")
return False
# Mode 2: Running locally - sync to external Space
external_repo = os.getenv("HF_SPACE_REPO")
if external_repo and HF_TOKEN:
try:
_scheduler = CommitScheduler(
repo_id=external_repo,
repo_type="space",
folder_path=".",
path_in_repo="data",
every=1,
token=HF_TOKEN
)
_sync_mode = 'external'
print(f"βœ… External sync enabled: {external_repo}")
print(f"πŸ“ CSV syncs to external Space")
return True
except Exception as e:
print(f"⚠️ External sync failed: {e}")
return False
# Mode 3: No sync
print("πŸ“ Local-only mode (no HF sync)")
return False
def sync_to_hf():
"""Trigger immediate sync. CommitScheduler handles actual upload.
With CommitScheduler, just saving the file triggers auto-commit.
This function is a no-op but kept for API compatibility.
"""
# CommitScheduler watches file changes automatically
# No manual action needed
return _scheduler is not None
if __name__ == "__main__":
# Test sync
if init_hf_sync():
print("\nπŸ§ͺ Testing event-based sync...")
success = sync_to_hf()
if success:
print(f"βœ… Sync successful: https://huggingface.co/spaces/{HF_SPACE_REPO}/blob/main/data/{CSV_FILE}")
else:
print("❌ Sync failed")
else:
print("❌ Sync not configured")