| interface Env { |
| CONTENT_VAULT: R2Bucket; |
| HF_WEBHOOK_SECRET: string; |
| HF_TOKEN: string; |
| HF_REPO: string; |
| HF_BASE_URL: string; |
| } |
|
|
| interface HFTreeItem { |
| type: string; |
| path: string; |
| size?: number; |
| } |
|
|
| interface HFWebhookPayload { |
| event: { action: string; scope: string }; |
| repo: { name: string; type: string }; |
| updatedRefs?: Array<{ ref: string; oldSha: string; newSha: string }>; |
| } |
|
|
| const CONTENT_TYPES: Record<string, string> = { |
| ".zip": "application/zip", |
| ".yaml": "text/yaml", |
| ".yml": "text/yaml", |
| ".png": "image/png", |
| ".jpg": "image/jpeg", |
| ".jpeg": "image/jpeg", |
| ".json": "application/json", |
| ".md": "text/markdown", |
| ".txt": "text/plain", |
| }; |
|
|
| function getContentType(path: string): string { |
| const ext = path.slice(path.lastIndexOf(".")).toLowerCase(); |
| return CONTENT_TYPES[ext] || "application/octet-stream"; |
| } |
|
|
| function encodeFilePath(filePath: string): string { |
| return filePath.split("/").map(encodeURIComponent).join("/"); |
| } |
|
|
| async function listRepoFiles(baseUrl: string, repo: string, token: string): Promise<string[]> { |
| const files: string[] = []; |
|
|
| async function fetchTree(path: string) { |
| const url = `${baseUrl}/api/models/${repo}/tree/main${path ? "/" + path : ""}`; |
| const resp = await fetch(url, { |
| headers: token ? { Authorization: `Bearer ${token}` } : {}, |
| }); |
| if (!resp.ok) { |
| throw new Error(`Failed to list ${url}: ${resp.status} ${resp.statusText}`); |
| } |
| const items: HFTreeItem[] = await resp.json(); |
| for (const item of items) { |
| if (item.type === "file") { |
| files.push(item.path); |
| } else if (item.type === "directory") { |
| await fetchTree(item.path); |
| } |
| } |
| } |
|
|
| await fetchTree(""); |
| return files; |
| } |
|
|
| async function getChangedFiles( |
| baseUrl: string, |
| repo: string, |
| token: string, |
| newSha: string, |
| ): Promise<{ added: string[]; deleted: string[] }> { |
| const url = `${baseUrl}/${repo}/commit/${newSha}.diff`; |
| const resp = await fetch(url, { |
| headers: token ? { Authorization: `Bearer ${token}` } : {}, |
| }); |
|
|
| if (!resp.ok) { |
| console.log(`Failed to get diff for ${newSha}: ${resp.status}, falling back to full sync`); |
| return { added: [], deleted: [] }; |
| } |
|
|
| const diff = await resp.text(); |
| const added: string[] = []; |
| const deleted: string[] = []; |
|
|
| for (const match of diff.matchAll(/^diff --git a\/(.+) b\/(.+)$/gm)) { |
| added.push(match[2]); |
| } |
|
|
| |
| const lines = diff.split("\n"); |
| for (let i = 0; i < lines.length; i++) { |
| const diffMatch = lines[i].match(/^diff --git a\/(.+) b\/(.+)$/); |
| if (diffMatch && i + 1 < lines.length && lines[i + 1].startsWith("deleted file mode")) { |
| deleted.push(diffMatch[2]); |
| |
| const idx = added.indexOf(diffMatch[2]); |
| if (idx !== -1) added.splice(idx, 1); |
| } |
| } |
|
|
| return { added, deleted }; |
| } |
|
|
| async function syncFile( |
| env: Env, |
| filePath: string, |
| ): Promise<{ path: string; status: string }> { |
| const url = `${env.HF_BASE_URL}/${env.HF_REPO}/resolve/main/${encodeFilePath(filePath)}`; |
| const resp = await fetch(url, { |
| headers: env.HF_TOKEN ? { Authorization: `Bearer ${env.HF_TOKEN}` } : {}, |
| }); |
|
|
| if (!resp.ok) { |
| return { path: filePath, status: `error: ${resp.status}` }; |
| } |
|
|
| const body = resp.body ?? (await resp.arrayBuffer()); |
|
|
| await env.CONTENT_VAULT.put(filePath, body, { |
| httpMetadata: { |
| contentType: getContentType(filePath), |
| cacheControl: "public, max-age=2592000", |
| }, |
| }); |
|
|
| return { path: filePath, status: "ok" }; |
| } |
|
|
| async function deleteFile( |
| env: Env, |
| filePath: string, |
| ): Promise<{ path: string; status: string }> { |
| await env.CONTENT_VAULT.delete(filePath); |
| return { path: filePath, status: "deleted" }; |
| } |
|
|
| const SKIP = (f: string) => f.startsWith(".git") || f === ".env" || f.startsWith("worker/"); |
|
|
| const CONCURRENCY = 5; |
|
|
| async function runBatch<T>(items: T[], fn: (item: T) => Promise<void>): Promise<void> { |
| const queue = [...items]; |
| const workers = Array.from({ length: Math.min(CONCURRENCY, queue.length) }, async () => { |
| while (queue.length > 0) { |
| const item = queue.shift()!; |
| await fn(item); |
| } |
| }); |
| await Promise.all(workers); |
| } |
|
|
| export default { |
| async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> { |
| if (request.method !== "POST") { |
| return new Response("Method not allowed", { status: 405 }); |
| } |
|
|
| const secret = request.headers.get("X-Webhook-Secret"); |
| if (!secret || secret !== env.HF_WEBHOOK_SECRET) { |
| return new Response("Unauthorized", { status: 401 }); |
| } |
|
|
| const payload: HFWebhookPayload = await request.json(); |
| console.log("Webhook event:", JSON.stringify(payload.event)); |
|
|
| const updatedRefs = payload.updatedRefs || []; |
| const mainRef = updatedRefs.find((r) => r.ref === "refs/heads/main"); |
|
|
| ctx.waitUntil( |
| (async () => { |
| try { |
| let filesToSync: string[] = []; |
| let filesToDelete: string[] = []; |
|
|
| if (mainRef) { |
| const { added, deleted } = await getChangedFiles( |
| env.HF_BASE_URL, |
| env.HF_REPO, |
| env.HF_TOKEN, |
| mainRef.newSha, |
| ); |
|
|
| if (added.length > 0 || deleted.length > 0) { |
| filesToSync = added.filter((f) => !SKIP(f)); |
| filesToDelete = deleted.filter((f) => !SKIP(f)); |
| console.log(`Incremental sync: ${filesToSync.length} to sync, ${filesToDelete.length} to delete`); |
| } else { |
| const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN); |
| filesToSync = allFiles.filter((f) => !SKIP(f)); |
| console.log(`Full sync fallback: ${filesToSync.length} files`); |
| } |
| } else { |
| const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN); |
| filesToSync = allFiles.filter((f) => !SKIP(f)); |
| console.log(`Full sync: ${filesToSync.length} files`); |
| } |
|
|
| await runBatch(filesToDelete, async (file) => { |
| const result = await deleteFile(env, file); |
| console.log(`${result.path}: ${result.status}`); |
| }); |
|
|
| await runBatch(filesToSync, async (file) => { |
| const result = await syncFile(env, file); |
| console.log(`${result.path}: ${result.status}`); |
| }); |
|
|
| console.log(`Sync complete: ${filesToSync.length} synced, ${filesToDelete.length} deleted`); |
| } catch (err: unknown) { |
| const message = err instanceof Error ? err.message : "Unknown error"; |
| console.error("Sync failed:", message); |
| } |
| })() |
| ); |
|
|
| return new Response(JSON.stringify({ status: "sync started" }), { |
| headers: { "Content-Type": "application/json" }, |
| }); |
| }, |
| }; |
|
|