import shutil import tempfile from pathlib import Path import huggingface_hub from huggingface_hub import sync_bucket from trackio.sqlite_storage import SQLiteStorage from trackio.utils import MEDIA_DIR, TRACKIO_DIR def create_bucket_if_not_exists(bucket_id: str, private: bool | None = None) -> None: huggingface_hub.create_bucket(bucket_id, private=private, exist_ok=True) def _list_bucket_file_paths(bucket_id: str, prefix: str | None = None) -> list[str]: items = huggingface_hub.list_bucket_tree(bucket_id, prefix=prefix, recursive=True) return [ item.path for item in items if getattr(item, "type", None) == "file" and getattr(item, "path", None) ] def download_bucket_to_trackio_dir(bucket_id: str) -> None: TRACKIO_DIR.mkdir(parents=True, exist_ok=True) sync_bucket( source=f"hf://buckets/{bucket_id}", dest=str(TRACKIO_DIR.parent), quiet=True, ) def upload_project_to_bucket(project: str, bucket_id: str) -> None: db_path = SQLiteStorage.get_project_db_path(project) if not db_path.exists(): raise FileNotFoundError(f"No database found for project '{project}'") with SQLiteStorage._get_connection( db_path, configure_pragmas=False, row_factory=None ) as conn: conn.execute("PRAGMA wal_checkpoint(TRUNCATE)") files_to_add = [(str(db_path), f"trackio/{db_path.name}")] media_dir = MEDIA_DIR / project if media_dir.exists(): for media_file in media_dir.rglob("*"): if media_file.is_file(): rel = media_file.relative_to(TRACKIO_DIR) files_to_add.append((str(media_file), f"trackio/{rel}")) huggingface_hub.batch_bucket_files(bucket_id, add=files_to_add) def _download_db_from_bucket( project: str, bucket_id: str, dest_path: Path | None = None ) -> bool: db_filename = SQLiteStorage.get_project_db_filename(project) remote_path = f"trackio/{db_filename}" local_path = dest_path or SQLiteStorage.get_project_db_path(project) local_path.parent.mkdir(parents=True, exist_ok=True) try: huggingface_hub.download_bucket_files( bucket_id, files=[(remote_path, str(local_path))], ) return local_path.exists() except Exception: return False def _local_db_has_data(project: str) -> bool: db_path = SQLiteStorage.get_project_db_path(project) if not db_path.exists() or db_path.stat().st_size == 0: return False try: with SQLiteStorage._get_connection( db_path, configure_pragmas=False, row_factory=None ) as conn: count = conn.execute("SELECT COUNT(*) FROM metrics").fetchone()[0] return count > 0 except Exception: return False def _export_and_upload_static( project: str, dest_bucket_id: str, db_path: Path, media_dir: Path | None = None, ) -> None: with tempfile.TemporaryDirectory() as tmp_dir: output_dir = Path(tmp_dir) SQLiteStorage.export_for_static_space( project, output_dir, db_path_override=db_path ) if media_dir and media_dir.exists(): shutil.copytree(media_dir, output_dir / "media") files_to_add = [] for f in output_dir.rglob("*"): if f.is_file(): rel = f.relative_to(output_dir) files_to_add.append((str(f), str(rel))) huggingface_hub.batch_bucket_files(dest_bucket_id, add=files_to_add) def upload_project_to_bucket_for_static(project: str, bucket_id: str) -> None: if not _local_db_has_data(project): _download_db_from_bucket(project, bucket_id) db_path = SQLiteStorage.get_project_db_path(project) _export_and_upload_static(project, bucket_id, db_path, MEDIA_DIR / project) def export_from_bucket_for_static( source_bucket_id: str, dest_bucket_id: str, project: str, ) -> None: with tempfile.TemporaryDirectory() as work_dir: work_path = Path(work_dir) db_path = work_path / SQLiteStorage.get_project_db_filename(project) if not _download_db_from_bucket(project, source_bucket_id, dest_path=db_path): raise FileNotFoundError( f"Could not download database for project '{project}' " f"from bucket '{source_bucket_id}'." ) media_dest = work_path / "media" source_media_prefix = f"trackio/media/{project}/" media_to_download = _list_bucket_file_paths( source_bucket_id, prefix=source_media_prefix ) if media_to_download: media_dest.mkdir(parents=True, exist_ok=True) dl_pairs = [] for remote_path in media_to_download: rel = remote_path[len(source_media_prefix) :] local_file = media_dest / rel local_file.parent.mkdir(parents=True, exist_ok=True) dl_pairs.append((remote_path, str(local_file))) huggingface_hub.download_bucket_files(source_bucket_id, files=dl_pairs) _export_and_upload_static( project, dest_bucket_id, db_path, media_dest if media_dest.exists() else None, )