| | |
| | |
| | import json |
| | import os |
| | import time |
| | from typing import Callable, Dict, Any, Optional |
| |
|
| | from huggingface_hub import ( |
| | HfApi, |
| | HfFileSystem, |
| | create_repo, |
| | CommitOperationAdd, |
| | HfHubHTTPError, |
| | ) |
| |
|
| | HF_TOKEN = os.getenv("HF_TOKEN") |
| | if not HF_TOKEN: |
| | raise RuntimeError("HF_TOKEN não configurado. Defina em Settings → Variables & secrets do Space.") |
| |
|
| | |
| | DATASET_PATH = os.getenv("HF_DATASET_PATH", "SEUUSER/academia-arm-db") |
| | |
| | FILE_PATH = os.getenv("HF_DB_FILE", "database.json") |
| | REVISION = "main" |
| |
|
| | api = HfApi(token=HF_TOKEN) |
| | fs = HfFileSystem(token=HF_TOKEN) |
| |
|
| | def _ensure_dataset_exists(): |
| | try: |
| | create_repo( |
| | repo_id=DATASET_PATH, |
| | token=HF_TOKEN, |
| | repo_type="dataset", |
| | private=True, |
| | exist_ok=True, |
| | ) |
| | except Exception as e: |
| | |
| | if "409" not in str(e): |
| | raise |
| |
|
| | def load_data() -> Dict[str, Any]: |
| | """ |
| | Lê o JSON do dataset de forma direta. Evita cache para sempre obter o estado atual. |
| | """ |
| | _ensure_dataset_exists() |
| | path = f"datasets/{DATASET_PATH}/{FILE_PATH}" |
| | |
| | if not fs.exists(path): |
| | return {"alunos": [], "cursos": [], "cronogramas": [], "reposicoes": [], "certificados": [], "notas": []} |
| |
|
| | with fs.open(path, "r") as f: |
| | return json.load(f) |
| |
|
| | def save_data( |
| | data: Dict[str, Any], |
| | *, |
| | commit_message: Optional[str] = None, |
| | max_retries: int = 3, |
| | backoff_seconds: float = 1.0, |
| | on_merge_conflict: Optional[Callable[[Dict[str, Any], Dict[str, Any]], Dict[str, Any]]] = None, |
| | ) -> None: |
| | """ |
| | Salva o JSON no dataset usando commit atômico. |
| | - Tenta regravar em caso de conflito (até max_retries). |
| | - on_merge_conflict(local, remoto) -> retorna o payload final para gravar. |
| | """ |
| | _ensure_dataset_exists() |
| | attempt = 0 |
| | while True: |
| | attempt += 1 |
| | try: |
| | |
| | remote = load_data() |
| |
|
| | payload = data |
| | |
| | if on_merge_conflict: |
| | payload = on_merge_conflict(local=data, remoto=remote) |
| |
|
| | content = json.dumps(payload, indent=2, ensure_ascii=False).encode("utf-8") |
| |
|
| | api.create_commit( |
| | repo_id=DATASET_PATH, |
| | repo_type="dataset", |
| | operations=[ |
| | CommitOperationAdd( |
| | path_in_repo=FILE_PATH, |
| | path_or_fileobj=content, |
| | ), |
| | ], |
| | commit_message=commit_message or f"update {FILE_PATH} via Space", |
| | revision=REVISION, |
| | ) |
| | return |
| | except HfHubHTTPError as e: |
| | |
| | if attempt >= max_retries: |
| | raise |
| | time.sleep(backoff_seconds * attempt) |
| | except Exception: |
| | if attempt >= max_retries: |
| | raise |
| | time.sleep(backoff_seconds * attempt) |