File size: 2,918 Bytes
0ea40d5
 
 
 
 
 
 
 
 
b5a0b96
0ea40d5
 
b5a0b96
 
 
 
 
0ea40d5
 
 
b5a0b96
 
 
 
 
 
 
 
 
 
0ea40d5
b5a0b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ea40d5
 
b5a0b96
0ea40d5
b5a0b96
0ea40d5
642547d
 
b5a0b96
642547d
0ea40d5
b5a0b96
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

# ./storage.py

"""

Persistence Layer - Handles the "Save/Load" functionality using Hugging Face Dataset as a database

"""

import json
import os
from datetime import datetime
from huggingface_hub import HfApi, hf_hub_download

# --- CONFIGURATION ---
REPO_ID = "prashantmatlani/chathistorycoderg"
HISTORY_DIR = "./chathistory"

# Initialize the API with your token
api = HfApi(token=os.getenv("HF_TOKEN"))

def save_chat(chat_id, history):
    """Saves chat to local subdirectory and syncs to Hugging Face Dataset."""
    if not os.path.exists(HISTORY_DIR):
        os.makedirs(HISTORY_DIR)
    
    # Generate a unique ID if none exists (e.g., for a brand new chat)
    if not chat_id:
        chat_id = datetime.now().strftime("%Y%m%d_%H%M%S")
    
    filename = f"{chat_id}.json"
    local_path = os.path.join(HISTORY_DIR, filename)
    
    # 1. Save Locally
    with open(local_path, "w", encoding="utf-8") as f:
        json.dump(history, f, indent=4)
    
    # 2. Sync to Hugging Face Dataset (Master Stroke Persistence)
    try:
        api.upload_file(
            path_or_fileobj=local_path,
            path_in_repo=f"chats/{filename}",
            repo_id=REPO_ID,
            repo_type="dataset"
        )
    except Exception as e:
        print(f"Cloud Sync Warning: {e}")
        
    return chat_id

def load_history():
    """Retrieves list of chat IDs from the Hub to populate the sidebar."""
    try:
        # We pull the list from the Hub so the sidebar reflects all saved sessions
        files = api.list_repo_files(repo_id=REPO_ID, repo_type="dataset")
        chat_files = [f.split("/")[-1].replace(".json", "") for f in files if f.startswith("chats/")]
        # IMPORTANT: Sorted by newest first; return as list of lists for Gradio Dataset component
        return [[f] for f in sorted(chat_files, reverse=True)]
    except:
        return []

def get_chat_content(chat_id):
    """Loads a specific chat's content from the Hub or local cache."""
    filename = f"chats/{chat_id}.json"
    local_path = os.path.join(HISTORY_DIR, f"{chat_id}.json")
    
    try:
        # Ensure local dir exists
        if not os.path.exists(HISTORY_DIR):
            os.makedirs(HISTORY_DIR)
            
        # Download from Hub to keep local state fresh
        from huggingface_hub import hf_hub_download
        downloaded_path = hf_hub_download(
            repo_id=REPO_ID, 
            repo_type="dataset", 
            filename=filename,
            token=os.getenv("HF_TOKEN")
        )
        with open(downloaded_path, "r", encoding="utf-8") as f:
            return json.load(f)
    except Exception:
        # Fallback to local if Hub is unreachable
        if os.path.exists(local_path):
            with open(local_path, "r", encoding="utf-8") as f:
                return json.load(f)
    return []