raqim / scripts /db-hf-sync.mjs
RAQIM Deploy
Deploy RAQIM 2026-05-02 20:42
c2e719f
/**
* HF Dataset ↔ SQLite DB sync.
* On startup: pull raqim.db from a private HF Dataset if present.
* On interval / SIGTERM: push raqim.db back to the dataset.
*
* Uses git (already installed in the Docker image) to push/pull,
* which is the most reliable method for HF Hub repos.
*/
import { execSync, spawnSync } from "child_process";
import fs from "fs";
import path from "path";
import { fileURLToPath } from "url";
import { DatabaseSync } from "node:sqlite";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const HF_TOKEN = process.env.HF_TOKEN;
const HF_USERNAME = process.env.HF_USERNAME;
const DATASET_NAME = "raqim-db";
const DB_PATH = process.env.DB_PATH || "/data/raqim.db";
const CLONE_DIR = "/tmp/raqim-db-repo";
function run(cmd, opts = {}) {
try {
execSync(cmd, { stdio: "inherit", ...opts });
return true;
} catch {
return false;
}
}
function gitUrl() {
return `https://${HF_USERNAME}:${HF_TOKEN}@huggingface.co/datasets/${HF_USERNAME}/${DATASET_NAME}`;
}
async function ensureDataset() {
if (!HF_TOKEN || !HF_USERNAME) return;
await fetch("https://huggingface.co/api/repos/create", {
method: "POST",
headers: {
Authorization: `Bearer ${HF_TOKEN}`,
"Content-Type": "application/json",
},
body: JSON.stringify({ type: "dataset", name: DATASET_NAME, private: true }),
}).catch(() => {});
}
export async function pullDb() {
if (!HF_TOKEN || !HF_USERNAME) {
console.log("[db-sync] No HF credentials — skipping pull.");
return;
}
// /data is HF Spaces' persistent volume — it survives container restarts.
// Only restore from the HF Dataset backup when there is no local DB at all
// (i.e. first boot on a freshly created or fully reset Space).
// Overwriting an existing /data/raqim.db would roll back any data that was
// written after the last backup push, causing files to disappear.
if (fs.existsSync(DB_PATH)) {
console.log("[db-sync] Local DB already exists — skipping restore to preserve recent data.");
return;
}
await ensureDataset();
// Ensure DB dir exists
fs.mkdirSync(path.dirname(DB_PATH), { recursive: true });
// Clone or update the dataset repo
if (fs.existsSync(CLONE_DIR)) {
run(`git -C "${CLONE_DIR}" pull --depth=1 origin main`);
} else {
run(`git clone --depth=1 "${gitUrl()}" "${CLONE_DIR}"`);
}
const srcDb = path.join(CLONE_DIR, "raqim.db");
if (fs.existsSync(srcDb)) {
fs.copyFileSync(srcDb, DB_PATH);
// Remove any stale WAL/SHM files — they belong to a different DB instance.
for (const suffix of ["-wal", "-shm"]) {
const stale = DB_PATH + suffix;
if (fs.existsSync(stale)) {
fs.rmSync(stale);
console.log(`[db-sync] Removed stale ${suffix} file.`);
}
}
console.log("[db-sync] ✓ DB restored from HF Dataset (first boot).");
} else {
console.log("[db-sync] No existing DB in dataset — starting fresh.");
}
}
export async function pushDb() {
if (!HF_TOKEN || !HF_USERNAME) return;
if (!fs.existsSync(DB_PATH)) return;
// Ensure repo is cloned
if (!fs.existsSync(CLONE_DIR)) {
await ensureDataset();
run(`git clone --depth=1 "${gitUrl()}" "${CLONE_DIR}"`);
}
// Checkpoint WAL into the main DB file before copying so the backup
// is a self-contained, consistent snapshot without a dangling WAL.
try {
const tmpDb = new DatabaseSync(DB_PATH);
tmpDb.exec("PRAGMA wal_checkpoint(TRUNCATE)");
tmpDb.close();
} catch (e) {
console.warn("[db-sync] WAL checkpoint failed (non-fatal):", e.message);
}
fs.copyFileSync(DB_PATH, path.join(CLONE_DIR, "raqim.db"));
run(`git -C "${CLONE_DIR}" config user.email "sync@raqim.app"`);
run(`git -C "${CLONE_DIR}" config user.name "RAQIM Sync"`);
run(`git -C "${CLONE_DIR}" add raqim.db`);
const committed = run(`git -C "${CLONE_DIR}" commit -m "DB sync $(date '+%Y-%m-%d %H:%M')"`);
if (committed) {
const pushed = run(`git -C "${CLONE_DIR}" push origin HEAD:main`);
if (pushed) console.log("[db-sync] ✓ DB saved to HF Dataset.");
else console.warn("[db-sync] ✗ Push failed — will retry next interval.");
}
}
// ── Called standalone: node scripts/db-hf-sync.mjs pull|push ────────────
const action = process.argv[2];
if (action === "pull") {
await pullDb();
} else if (action === "push") {
await pushDb();
}