Spaces:
abc1181
/
Runtime error

File size: 5,744 Bytes
b6724be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ec62f6a
b6724be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
"""
VS Code Space β€” HF Dataset Persistence
Restores /data on boot, auto-saves every 5 minutes
"""

import os, sys, time, threading, shutil, traceback
from pathlib import Path
from datetime import datetime
from huggingface_hub import HfApi, snapshot_download

# ── Config ───────────────────────────────────────────────────────────────────

HF_TOKEN       = os.environ.get("HF_TOKEN", "")
DATASET_REPO   = os.environ.get("REPO", "")          # e.g. abc1181/vscode-storage
SYNC_INTERVAL  = int(os.environ.get("SYNC_INTERVAL", "300"))  # 5 mins default
DATA_DIR       = Path("/root/app/data")
PATH_IN_REPO   = "workspace"                          # folder name inside dataset repo

IGNORE = [
    "*.log", "*.lock", "*.tmp", "*.pid",
    "__pycache__", "node_modules/**",
    ".git/**", "*.pyc"
]

# ── Setup ────────────────────────────────────────────────────────────────────

DATA_DIR.mkdir(parents=True, exist_ok=True)

if not HF_TOKEN:
    print("[SYNC] WARNING: HF_TOKEN not set β€” persistence disabled")
    sys.exit(0)

if not DATASET_REPO:
    print("[SYNC] WARNING: REPO not set β€” persistence disabled")
    sys.exit(0)

api = HfApi(token=HF_TOKEN)

# ── Ensure dataset repo exists ───────────────────────────────────────────────

def ensure_repo():
    try:
        api.repo_info(repo_id=DATASET_REPO, repo_type="dataset")
        print(f"[SYNC] Dataset found: {DATASET_REPO}")
        return True
    except Exception:
        try:
            api.create_repo(repo_id=DATASET_REPO, repo_type="dataset", private=True)
            print(f"[SYNC] Created dataset: {DATASET_REPO}")
            return True
        except Exception as e:
            print(f"[SYNC] Failed to find/create dataset: {e}")
            return False

# ── Restore /data from dataset on boot ───────────────────────────────────────

def restore():
    print(f"[SYNC] Restoring /data from {DATASET_REPO}...")
    try:
        files = list(api.list_repo_files(repo_id=DATASET_REPO, repo_type="dataset"))
        ws_files = [f for f in files if f.startswith(f"{PATH_IN_REPO}/")]

        if not ws_files:
            print("[SYNC] No files in dataset yet β€” starting fresh")
            return

        print(f"[SYNC] Found {len(ws_files)} files β€” downloading...")
        import tempfile
        with tempfile.TemporaryDirectory() as tmpdir:
            snapshot_download(
                repo_id=DATASET_REPO,
                repo_type="dataset",
                allow_patterns=f"{PATH_IN_REPO}/**",
                local_dir=tmpdir,
                token=HF_TOKEN,
            )
            src = Path(tmpdir) / PATH_IN_REPO
            if src.exists():
                for item in src.rglob("*"):
                    if item.is_file():
                        dest = DATA_DIR / item.relative_to(src)
                        dest.parent.mkdir(parents=True, exist_ok=True)
                        shutil.copy2(str(item), str(dest))
        print("[SYNC] βœ… Restore complete!")

    except Exception as e:
        print(f"[SYNC] Restore failed: {e}")
        traceback.print_exc()

# ── Save /data to dataset ─────────────────────────────────────────────────────

def save():
    try:
        file_count = sum(1 for _, _, fs in os.walk(DATA_DIR) for _ in fs)
        if file_count == 0:
            print("[SYNC] Nothing to save β€” /data is empty")
            return

        print(f"[SYNC] Uploading {file_count} files β†’ {DATASET_REPO}...")
        api.upload_folder(
            folder_path=str(DATA_DIR),
            path_in_repo=PATH_IN_REPO,
            repo_id=DATASET_REPO,
            repo_type="dataset",
            token=HF_TOKEN,
            commit_message=f"autosave {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
            ignore_patterns=IGNORE,
        )
        print(f"[SYNC] πŸ’Ύ Saved at {datetime.now().strftime('%H:%M:%S')}")

    except Exception as e:
        print(f"[SYNC] Save failed: {e}")
        traceback.print_exc()

# ── Background sync loop ──────────────────────────────────────────────────────

def sync_loop(stop_event):
    print(f"[SYNC] Auto-save loop started (every {SYNC_INTERVAL}s)")
    while not stop_event.is_set():
        if stop_event.wait(timeout=SYNC_INTERVAL):
            break
        print(f"[SYNC] Periodic save at {datetime.now().isoformat()}")
        save()

# ── Main ──────────────────────────────────────────────────────────────────────

def main():
    if not ensure_repo():
        sys.exit(1)

    restore()

    stop_event = threading.Event()
    t = threading.Thread(target=sync_loop, args=(stop_event,), daemon=True)
    t.start()

    # Keep alive β€” VS Code server runs separately
    try:
        while True:
            time.sleep(60)
    except KeyboardInterrupt:
        print("[SYNC] Shutting down β€” final save...")
        stop_event.set()
        save()
        print("[SYNC] Done.")

if __name__ == "__main__":
    main()