| import os |
| import json |
| from pathlib import Path |
| from huggingface_hub import HfApi, hf_hub_download |
| from huggingface_hub.utils import EntryNotFoundError |
|
|
| HF_TOKEN = os.getenv("HF_TOKEN") |
| HF_USERNAME = os.getenv("HF_USERNAME") |
|
|
|
|
| def get_repo_id() -> str: |
| return f"{HF_USERNAME}/chatpaper-data" |
|
|
|
|
| def is_hf_configured() -> bool: |
| """Returns True only if HF credentials are available.""" |
| return bool(HF_TOKEN and HF_USERNAME) |
|
|
|
|
| def ensure_dataset_repo(): |
| if not is_hf_configured(): |
| return |
| api = HfApi(token=HF_TOKEN) |
| repo_id = get_repo_id() |
| try: |
| api.repo_info(repo_id=repo_id, repo_type="dataset") |
| print(f"HF dataset repo exists: {repo_id}") |
| except Exception: |
| try: |
| api.create_repo(repo_id=repo_id, repo_type="dataset", private=True) |
| print(f"Created HF dataset repo: {repo_id}") |
| except Exception as e: |
| if "already exists" in str(e) or "409" in str(e): |
| print(f"HF dataset repo already exists: {repo_id}") |
| else: |
| print(f"Warning: Could not create HF repo: {e}") |
|
|
|
|
| def upload_file(local_path: str, path_in_repo: str) -> bool: |
| try: |
| api = HfApi(token=HF_TOKEN) |
| api.upload_file( |
| path_or_fileobj=local_path, |
| path_in_repo=path_in_repo, |
| repo_id=get_repo_id(), |
| repo_type="dataset", |
| ) |
| return True |
| except Exception as e: |
| print(f"HF upload error: {e}") |
| return False |
|
|
|
|
| def download_file(path_in_repo: str, local_path: str) -> bool: |
| try: |
| downloaded = hf_hub_download( |
| repo_id=get_repo_id(), |
| filename=path_in_repo, |
| repo_type="dataset", |
| token=HF_TOKEN, |
| local_dir="/tmp/hf_downloads", |
| ) |
| Path(local_path).parent.mkdir(parents=True, exist_ok=True) |
| import shutil |
| shutil.copy(downloaded, local_path) |
| return True |
| except EntryNotFoundError: |
| return False |
| except Exception as e: |
| print(f"HF download error: {e}") |
| return False |
|
|
|
|
| def save_json_to_hf(data, path_in_repo: str) -> bool: |
| tmp_path = Path("/tmp") / ("hf_" + path_in_repo.replace("/", "_")) |
| tmp_path.parent.mkdir(parents=True, exist_ok=True) |
| with open(tmp_path, "w", encoding="utf-8") as f: |
| json.dump(data, f, indent=2, ensure_ascii=False) |
| result = upload_file(str(tmp_path), path_in_repo) |
| tmp_path.unlink(missing_ok=True) |
| return result |
|
|
|
|
| def load_json_from_hf(path_in_repo: str, default=None): |
| tmp_path = f"/tmp/hf_{path_in_repo.replace('/', '_')}" |
| success = download_file(path_in_repo, tmp_path) |
| if not success: |
| return default |
| try: |
| with open(tmp_path, "r", encoding="utf-8") as f: |
| return json.load(f) |
| except Exception: |
| return default |
| finally: |
| try: |
| Path(tmp_path).unlink(missing_ok=True) |
| except Exception: |
| pass |
|
|
|
|
| def save_chat(chat_data: dict) -> bool: |
| if not is_hf_configured(): |
| return False |
| path = f"chats/{chat_data['session_id']}.json" |
| return save_json_to_hf(chat_data, path) |
|
|
|
|
| def load_all_chats() -> list: |
| if not is_hf_configured(): |
| return [] |
| try: |
| api = HfApi(token=HF_TOKEN) |
| files = list(api.list_repo_files(repo_id=get_repo_id(), repo_type="dataset")) |
| chat_files = [f for f in files if f.startswith("chats/") and f.endswith(".json")] |
| except Exception: |
| return [] |
| chats = [] |
| for file_path in chat_files: |
| chat = load_json_from_hf(file_path, default=None) |
| if chat: |
| chats.append(chat) |
| chats.sort(key=lambda x: x.get("timestamp", ""), reverse=True) |
| return chats |
|
|
|
|
| def delete_chat(session_id: str) -> bool: |
| if not is_hf_configured(): |
| return False |
| try: |
| api = HfApi(token=HF_TOKEN) |
| api.delete_file( |
| path_in_repo=f"chats/{session_id}.json", |
| repo_id=get_repo_id(), |
| repo_type="dataset", |
| ) |
| return True |
| except Exception as e: |
| print(f"Delete chat error: {e}") |
| return False |
|
|
|
|
| def save_related_papers(data: dict) -> bool: |
| if not is_hf_configured(): |
| return False |
| return save_json_to_hf(data, "related_papers.json") |
|
|
|
|
| def load_related_papers() -> dict: |
| if not is_hf_configured(): |
| return {} |
| return load_json_from_hf("related_papers.json", default={}) |