ContentVault / worker /src /index.ts
devrim99's picture
Add worker source, .gitignore, and clean up .gitattributes
9840c5d
interface Env {
CONTENT_VAULT: R2Bucket;
HF_WEBHOOK_SECRET: string;
HF_TOKEN: string;
HF_REPO: string;
HF_BASE_URL: string;
}
interface HFTreeItem {
type: string;
path: string;
size?: number;
}
interface HFWebhookPayload {
event: { action: string; scope: string };
repo: { name: string; type: string };
updatedRefs?: Array<{ ref: string; oldSha: string; newSha: string }>;
}
const CONTENT_TYPES: Record<string, string> = {
".zip": "application/zip",
".yaml": "text/yaml",
".yml": "text/yaml",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".json": "application/json",
".md": "text/markdown",
".txt": "text/plain",
};
function getContentType(path: string): string {
const ext = path.slice(path.lastIndexOf(".")).toLowerCase();
return CONTENT_TYPES[ext] || "application/octet-stream";
}
function encodeFilePath(filePath: string): string {
return filePath.split("/").map(encodeURIComponent).join("/");
}
async function listRepoFiles(baseUrl: string, repo: string, token: string): Promise<string[]> {
const files: string[] = [];
async function fetchTree(path: string) {
const url = `${baseUrl}/api/models/${repo}/tree/main${path ? "/" + path : ""}`;
const resp = await fetch(url, {
headers: token ? { Authorization: `Bearer ${token}` } : {},
});
if (!resp.ok) {
throw new Error(`Failed to list ${url}: ${resp.status} ${resp.statusText}`);
}
const items: HFTreeItem[] = await resp.json();
for (const item of items) {
if (item.type === "file") {
files.push(item.path);
} else if (item.type === "directory") {
await fetchTree(item.path);
}
}
}
await fetchTree("");
return files;
}
async function getChangedFiles(
baseUrl: string,
repo: string,
token: string,
newSha: string,
): Promise<{ added: string[]; deleted: string[] }> {
const url = `${baseUrl}/${repo}/commit/${newSha}.diff`;
const resp = await fetch(url, {
headers: token ? { Authorization: `Bearer ${token}` } : {},
});
if (!resp.ok) {
console.log(`Failed to get diff for ${newSha}: ${resp.status}, falling back to full sync`);
return { added: [], deleted: [] };
}
const diff = await resp.text();
const added: string[] = [];
const deleted: string[] = [];
for (const match of diff.matchAll(/^diff --git a\/(.+) b\/(.+)$/gm)) {
added.push(match[2]);
}
// Detect deleted files: "deleted file mode" after the diff --git line
const lines = diff.split("\n");
for (let i = 0; i < lines.length; i++) {
const diffMatch = lines[i].match(/^diff --git a\/(.+) b\/(.+)$/);
if (diffMatch && i + 1 < lines.length && lines[i + 1].startsWith("deleted file mode")) {
deleted.push(diffMatch[2]);
// Remove from added since it's a deletion
const idx = added.indexOf(diffMatch[2]);
if (idx !== -1) added.splice(idx, 1);
}
}
return { added, deleted };
}
async function syncFile(
env: Env,
filePath: string,
): Promise<{ path: string; status: string }> {
const url = `${env.HF_BASE_URL}/${env.HF_REPO}/resolve/main/${encodeFilePath(filePath)}`;
const resp = await fetch(url, {
headers: env.HF_TOKEN ? { Authorization: `Bearer ${env.HF_TOKEN}` } : {},
});
if (!resp.ok) {
return { path: filePath, status: `error: ${resp.status}` };
}
const body = resp.body ?? (await resp.arrayBuffer());
await env.CONTENT_VAULT.put(filePath, body, {
httpMetadata: {
contentType: getContentType(filePath),
cacheControl: "public, max-age=2592000",
},
});
return { path: filePath, status: "ok" };
}
async function deleteFile(
env: Env,
filePath: string,
): Promise<{ path: string; status: string }> {
await env.CONTENT_VAULT.delete(filePath);
return { path: filePath, status: "deleted" };
}
const SKIP = (f: string) => f.startsWith(".git") || f === ".env" || f.startsWith("worker/");
const CONCURRENCY = 5;
async function runBatch<T>(items: T[], fn: (item: T) => Promise<void>): Promise<void> {
const queue = [...items];
const workers = Array.from({ length: Math.min(CONCURRENCY, queue.length) }, async () => {
while (queue.length > 0) {
const item = queue.shift()!;
await fn(item);
}
});
await Promise.all(workers);
}
export default {
async fetch(request: Request, env: Env, ctx: ExecutionContext): Promise<Response> {
if (request.method !== "POST") {
return new Response("Method not allowed", { status: 405 });
}
const secret = request.headers.get("X-Webhook-Secret");
if (!secret || secret !== env.HF_WEBHOOK_SECRET) {
return new Response("Unauthorized", { status: 401 });
}
const payload: HFWebhookPayload = await request.json();
console.log("Webhook event:", JSON.stringify(payload.event));
const updatedRefs = payload.updatedRefs || [];
const mainRef = updatedRefs.find((r) => r.ref === "refs/heads/main");
ctx.waitUntil(
(async () => {
try {
let filesToSync: string[] = [];
let filesToDelete: string[] = [];
if (mainRef) {
const { added, deleted } = await getChangedFiles(
env.HF_BASE_URL,
env.HF_REPO,
env.HF_TOKEN,
mainRef.newSha,
);
if (added.length > 0 || deleted.length > 0) {
filesToSync = added.filter((f) => !SKIP(f));
filesToDelete = deleted.filter((f) => !SKIP(f));
console.log(`Incremental sync: ${filesToSync.length} to sync, ${filesToDelete.length} to delete`);
} else {
const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN);
filesToSync = allFiles.filter((f) => !SKIP(f));
console.log(`Full sync fallback: ${filesToSync.length} files`);
}
} else {
const allFiles = await listRepoFiles(env.HF_BASE_URL, env.HF_REPO, env.HF_TOKEN);
filesToSync = allFiles.filter((f) => !SKIP(f));
console.log(`Full sync: ${filesToSync.length} files`);
}
await runBatch(filesToDelete, async (file) => {
const result = await deleteFile(env, file);
console.log(`${result.path}: ${result.status}`);
});
await runBatch(filesToSync, async (file) => {
const result = await syncFile(env, file);
console.log(`${result.path}: ${result.status}`);
});
console.log(`Sync complete: ${filesToSync.length} synced, ${filesToDelete.length} deleted`);
} catch (err: unknown) {
const message = err instanceof Error ? err.message : "Unknown error";
console.error("Sync failed:", message);
}
})()
);
return new Response(JSON.stringify({ status: "sync started" }), {
headers: { "Content-Type": "application/json" },
});
},
};