# ./storage.py """ Persistence Layer - Handles the "Save/Load" functionality using Hugging Face Dataset as a database """ import json import os from datetime import datetime from huggingface_hub import HfApi, hf_hub_download # --- CONFIGURATION --- REPO_ID = "prashantmatlani/chathistorycoderg" HISTORY_DIR = "./chathistory" # Initialize the API with your token api = HfApi(token=os.getenv("HF_TOKEN")) def save_chat(chat_id, history): """Saves chat to local subdirectory and syncs to Hugging Face Dataset.""" if not os.path.exists(HISTORY_DIR): os.makedirs(HISTORY_DIR) # Generate a unique ID if none exists (e.g., for a brand new chat) if not chat_id: chat_id = datetime.now().strftime("%Y%m%d_%H%M%S") filename = f"{chat_id}.json" local_path = os.path.join(HISTORY_DIR, filename) # 1. Save Locally with open(local_path, "w", encoding="utf-8") as f: json.dump(history, f, indent=4) # 2. Sync to Hugging Face Dataset (Master Stroke Persistence) try: api.upload_file( path_or_fileobj=local_path, path_in_repo=f"chats/{filename}", repo_id=REPO_ID, repo_type="dataset" ) except Exception as e: print(f"Cloud Sync Warning: {e}") return chat_id def load_history(): """Retrieves list of chat IDs from the Hub to populate the sidebar.""" try: # We pull the list from the Hub so the sidebar reflects all saved sessions files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset") chat_files = [f.split("/")[-1].replace(".json", "") for f in files if f.startswith("chats/")] # IMPORTANT: Sorted by newest first; return as list of lists for Gradio Dataset component return [[f] for f in sorted(chat_files, reverse=True)] except: return [] def get_chat_content(chat_id): """Loads a specific chat's content from the Hub or local cache.""" filename = f"chats/{chat_id}.json" local_path = os.path.join(HISTORY_DIR, f"{chat_id}.json") try: # Ensure local dir exists if not os.path.exists(HISTORY_DIR): os.makedirs(HISTORY_DIR) # Download from Hub to keep local state fresh from huggingface_hub import hf_hub_download downloaded_path = hf_hub_download( repo_id=REPO_ID, repo_type="dataset", filename=filename, token=os.getenv("HF_TOKEN") ) with open(downloaded_path, "r", encoding="utf-8") as f: return json.load(f) except Exception: # Fallback to local if Hub is unreachable if os.path.exists(local_path): with open(local_path, "r", encoding="utf-8") as f: return json.load(f) return []