maris-ai-master / core-python /scripts /sync_hf_datasets.py
MarisUK's picture
Maris AI model sync
f440f03 verified
"""Sinhronizē Maris atmiņas datus uz lokālo disku."""
from __future__ import annotations
import argparse
import logging
import os
logger = logging.getLogger(__name__)
def sync_datasets(
repo_id: str | None = None,
local_dir: str = "./data/hf_cache",
) -> None:
"""Lejupielādē Maris AI atmiņas datus no origin repozitorija."""
repo_id = (
repo_id
or os.getenv("MARIS_MEMORY_REPO")
or os.getenv("HF_DATASET_REPO", "MarisUK/maris-ai-memory")
)
token = os.getenv("MARIS_REPO_TOKEN") or os.getenv("MARIS_TOKEN") or os.getenv("HF_TOKEN")
try:
from huggingface_hub import snapshot_download # type: ignore
logger.info("Lejupielādē dataset: %s -> %s", repo_id, local_dir)
snapshot_download(
repo_id=repo_id,
repo_type="dataset",
local_dir=local_dir,
token=token,
)
logger.info("Sinhronizācija pabeigta: %s", local_dir)
except Exception as exc: # noqa: BLE001
logger.error("Sinhronizācijas kļūda: %s", exc)
raise
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Sinhronizē Maris AI atmiņas repozitoriju")
parser.add_argument("--repo-id", help="Atmiņas repo ID")
parser.add_argument("--local-dir", default="./data/hf_cache", help="Lokālā direktorija")
args = parser.parse_args()
logging.basicConfig(level=logging.INFO)
sync_datasets(args.repo_id, args.local_dir)