interface Env { CONTENT_VAULT: R2Bucket; HF_WEBHOOK_SECRET: string; HF_TOKEN: string; HF_REPO: string; HF_BASE_URL: string; } interface HFTreeItem { type: string; path: string; size?: number; } interface HFWebhookPayload { event: { action: string; scope: string }; repo: { name: string; type: string }; updatedRefs?: Array<{ ref: string; oldSha: string; newSha: string }>; } const CONTENT_TYPES: Record = { ".zip": "application/zip", ".yaml": "text/yaml", ".yml": "text/yaml", ".png": "image/png", ".jpg": "image/jpeg", ".jpeg": "image/jpeg", ".json": "application/json", ".md": "text/markdown", ".txt": "text/plain", }; function getContentType(path: string): string { const ext = path.slice(path.lastIndexOf(".")).toLowerCase(); return CONTENT_TYPES[ext] || "application/octet-stream"; } function encodeFilePath(filePath: string): string { return filePath.split("/").map(encodeURIComponent).join("/"); } async function listRepoFiles(baseUrl: string, repo: string, token: string): Promise { const files: string[] = []; async function fetchTree(path: string) { const url = `${baseUrl}/api/models/${repo}/tree/main${path ? "/" + path : ""}`; const resp = await fetch(url, { headers: token ? { Authorization: `Bearer ${token}` } : {}, }); if (!resp.ok) { throw new Error(`Failed to list ${url}: ${resp.status} ${resp.statusText}`); } const items: HFTreeItem[] = await resp.json(); for (const item of items) { if (item.type === "file") { files.push(item.path); } else if (item.type === "directory") { await fetchTree(item.path); } } } await fetchTree(""); return files; } async function getChangedFiles( baseUrl: string, repo: string, token: string, newSha: string, ): Promise<{ added: string[]; deleted: string[] }> { const url = `${baseUrl}/${repo}/commit/${newSha}.diff`; const resp = await fetch(url, { headers: token ? { Authorization: `Bearer ${token}` } : {}, }); if (!resp.ok) { console.log(`Failed to get diff for ${newSha}: ${resp.status}, falling back to full sync`); return { added: [], deleted: [] }; } const diff = await resp.text(); const added: string[] = []; const deleted: string[] = []; for (const match of diff.matchAll(/^diff --git a\/(.+) b\/(.+)$/gm)) { added.push(match[2]); } // Detect deleted files: "deleted file mode" after the diff --git line const lines = diff.split("\n"); for (let i = 0; i < lines.length; i++) { const diffMatch = lines[i].match(/^diff --git a\/(.+) b\/(.+)$/); if (diffMatch && i + 1 < lines.length && lines[i + 1].startsWith("deleted file mode")) { deleted.push(diffMatch[2]); // Remove from added since it's a deletion const idx = added.indexOf(diffMatch[2]); if (idx !== -1) added.splice(idx, 1); } } return { added, deleted }; } async function syncFile( env: Env, filePath: string, ): Promise<{ path: string; status: string }> { const url = `${env.HF_BASE_URL}/${env.HF_REPO}/resolve/main/${encodeFilePath(filePath)}`; const resp = await fetch(url, { headers: env.HF_TOKEN ? { Authorization: `Bearer ${env.HF_TOKEN}` } : {}, }); if (!resp.ok) { return { path: filePath, status: `error: ${resp.status}` }; } const body = resp.body ?? (await resp.arrayBuffer()); await env.CONTENT_VAULT.put(filePath, body, { httpMetadata: { contentType: getContentType(filePath), cacheControl: "public, max-age=2592000", }, }); return { path: filePath, status: "ok" }; } async function deleteFile( env: Env, filePath: string, ): Promise<{ path: string; status: string }> { await env.CONTENT_VAULT.delete(filePath); return { path: filePath, status: "deleted" }; } const SKIP = (f: string) => f.startsWith(".git") || f === ".env" || f.startsWith("worker/"); const CONCURRENCY = 5; async function runBatch(items: T[], fn: (item: T) => Promise): Promise { const queue = [...items]; const workers = Array.from({ length: Math.min(CONCURRENCY, queue.length) }, async () => { while (queue.length > 0) { const item = queue.shift()!; await fn(item); } }); await Promise.all(workers); } export default { async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise { if (request.method !== "POST") { return new Response("Method not allowed", { status: 405 }); } const secret = request.headers.get("X-Webhook-Secret"); if (!secret || secret !== env.HF_WEBHOOK_SECRET) { return new Response("Unauthorized", { status: 401 }); } const payload: HFWebhookPayload = await request.json(); console.log("Webhook event:", JSON.stringify(payload.event)); const updatedRefs = payload.updatedRefs || []; const mainRef = updatedRefs.find((r) => r.ref === "refs/heads/main"); ctx.waitUntil( (async () => { try { let filesToSync: string[] = []; let filesToDelete: string[] = []; if (mainRef) { const { added, deleted } = await getChangedFiles( env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN, mainRef.newSha, ); if (added.length > 0 || deleted.length > 0) { filesToSync = added.filter((f) => !SKIP(f)); filesToDelete = deleted.filter((f) => !SKIP(f)); console.log(`Incremental sync: ${filesToSync.length} to sync, ${filesToDelete.length} to delete`); } else { const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN); filesToSync = allFiles.filter((f) => !SKIP(f)); console.log(`Full sync fallback: ${filesToSync.length} files`); } } else { const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN); filesToSync = allFiles.filter((f) => !SKIP(f)); console.log(`Full sync: ${filesToSync.length} files`); } await runBatch(filesToDelete, async (file) => { const result = await deleteFile(env, file); console.log(`${result.path}: ${result.status}`); }); await runBatch(filesToSync, async (file) => { const result = await syncFile(env, file); console.log(`${result.path}: ${result.status}`); }); console.log(`Sync complete: ${filesToSync.length} synced, ${filesToDelete.length} deleted`); } catch (err: unknown) { const message = err instanceof Error ? err.message : "Unknown error"; console.error("Sync failed:", message); } })() ); return new Response(JSON.stringify({ status: "sync started" }), { headers: { "Content-Type": "application/json" }, }); }, };