Spaces:
Sleeping
Sleeping
| /** | |
| * HF Dataset ↔ SQLite DB sync. | |
| * On startup: pull raqim.db from a private HF Dataset if present. | |
| * On interval / SIGTERM: push raqim.db back to the dataset. | |
| * | |
| * Uses git (already installed in the Docker image) to push/pull, | |
| * which is the most reliable method for HF Hub repos. | |
| */ | |
| import { execSync, spawnSync } from "child_process"; | |
| import fs from "fs"; | |
| import path from "path"; | |
| import { fileURLToPath } from "url"; | |
| import { DatabaseSync } from "node:sqlite"; | |
| const __dirname = path.dirname(fileURLToPath(import.meta.url)); | |
| const HF_TOKEN = process.env.HF_TOKEN; | |
| const HF_USERNAME = process.env.HF_USERNAME; | |
| const DATASET_NAME = "raqim-db"; | |
| const DB_PATH = process.env.DB_PATH || "/data/raqim.db"; | |
| const CLONE_DIR = "/tmp/raqim-db-repo"; | |
| function run(cmd, opts = {}) { | |
| try { | |
| execSync(cmd, { stdio: "inherit", ...opts }); | |
| return true; | |
| } catch { | |
| return false; | |
| } | |
| } | |
| function gitUrl() { | |
| return `https://${HF_USERNAME}:${HF_TOKEN}@huggingface.co/datasets/${HF_USERNAME}/${DATASET_NAME}`; | |
| } | |
| async function ensureDataset() { | |
| if (!HF_TOKEN || !HF_USERNAME) return; | |
| await fetch("https://huggingface.co/api/repos/create", { | |
| method: "POST", | |
| headers: { | |
| Authorization: `Bearer ${HF_TOKEN}`, | |
| "Content-Type": "application/json", | |
| }, | |
| body: JSON.stringify({ type: "dataset", name: DATASET_NAME, private: true }), | |
| }).catch(() => {}); | |
| } | |
| export async function pullDb() { | |
| if (!HF_TOKEN || !HF_USERNAME) { | |
| console.log("[db-sync] No HF credentials — skipping pull."); | |
| return; | |
| } | |
| // /data is HF Spaces' persistent volume — it survives container restarts. | |
| // Only restore from the HF Dataset backup when there is no local DB at all | |
| // (i.e. first boot on a freshly created or fully reset Space). | |
| // Overwriting an existing /data/raqim.db would roll back any data that was | |
| // written after the last backup push, causing files to disappear. | |
| if (fs.existsSync(DB_PATH)) { | |
| console.log("[db-sync] Local DB already exists — skipping restore to preserve recent data."); | |
| return; | |
| } | |
| await ensureDataset(); | |
| // Ensure DB dir exists | |
| fs.mkdirSync(path.dirname(DB_PATH), { recursive: true }); | |
| // Clone or update the dataset repo | |
| if (fs.existsSync(CLONE_DIR)) { | |
| run(`git -C "${CLONE_DIR}" pull --depth=1 origin main`); | |
| } else { | |
| run(`git clone --depth=1 "${gitUrl()}" "${CLONE_DIR}"`); | |
| } | |
| const srcDb = path.join(CLONE_DIR, "raqim.db"); | |
| if (fs.existsSync(srcDb)) { | |
| fs.copyFileSync(srcDb, DB_PATH); | |
| // Remove any stale WAL/SHM files — they belong to a different DB instance. | |
| for (const suffix of ["-wal", "-shm"]) { | |
| const stale = DB_PATH + suffix; | |
| if (fs.existsSync(stale)) { | |
| fs.rmSync(stale); | |
| console.log(`[db-sync] Removed stale ${suffix} file.`); | |
| } | |
| } | |
| console.log("[db-sync] ✓ DB restored from HF Dataset (first boot)."); | |
| } else { | |
| console.log("[db-sync] No existing DB in dataset — starting fresh."); | |
| } | |
| } | |
| export async function pushDb() { | |
| if (!HF_TOKEN || !HF_USERNAME) return; | |
| if (!fs.existsSync(DB_PATH)) return; | |
| // Ensure repo is cloned | |
| if (!fs.existsSync(CLONE_DIR)) { | |
| await ensureDataset(); | |
| run(`git clone --depth=1 "${gitUrl()}" "${CLONE_DIR}"`); | |
| } | |
| // Checkpoint WAL into the main DB file before copying so the backup | |
| // is a self-contained, consistent snapshot without a dangling WAL. | |
| try { | |
| const tmpDb = new DatabaseSync(DB_PATH); | |
| tmpDb.exec("PRAGMA wal_checkpoint(TRUNCATE)"); | |
| tmpDb.close(); | |
| } catch (e) { | |
| console.warn("[db-sync] WAL checkpoint failed (non-fatal):", e.message); | |
| } | |
| fs.copyFileSync(DB_PATH, path.join(CLONE_DIR, "raqim.db")); | |
| run(`git -C "${CLONE_DIR}" config user.email "sync@raqim.app"`); | |
| run(`git -C "${CLONE_DIR}" config user.name "RAQIM Sync"`); | |
| run(`git -C "${CLONE_DIR}" add raqim.db`); | |
| const committed = run(`git -C "${CLONE_DIR}" commit -m "DB sync $(date '+%Y-%m-%d %H:%M')"`); | |
| if (committed) { | |
| const pushed = run(`git -C "${CLONE_DIR}" push origin HEAD:main`); | |
| if (pushed) console.log("[db-sync] ✓ DB saved to HF Dataset."); | |
| else console.warn("[db-sync] ✗ Push failed — will retry next interval."); | |
| } | |
| } | |
| // ── Called standalone: node scripts/db-hf-sync.mjs pull|push ──────────── | |
| const action = process.argv[2]; | |
| if (action === "pull") { | |
| await pullDb(); | |
| } else if (action === "push") { | |
| await pushDb(); | |
| } | |