File size: 8,503 Bytes
f221926
 
 
149fe2b
f221926
 
 
 
 
149fe2b
f221926
149fe2b
43a358a
f221926
 
 
 
43a358a
149fe2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55229e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f221926
149fe2b
 
 
 
 
 
 
 
 
 
 
299e359
f221926
 
 
 
43a358a
f221926
da0c2f2
55229e0
 
 
 
 
 
 
 
 
 
299e359
55229e0
 
299e359
 
 
da0c2f2
 
299e359
da0c2f2
299e359
 
 
 
 
f221926
 
 
299e359
 
55229e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299e359
55229e0
299e359
 
2ee9bac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149fe2b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f221926
149fe2b
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
// GGUF source. Single implementation now — every surface fetches directly
// from HF and caches in OPFS in the browser. The Express disk cache is
// gone, so localhost and HF Space share the same loader.
//
// Exposes:
//   isCached(repo, file)        → { cachedBytes, totalBytes? }
//   opfsHandleForModel(repo, file, onProgress, signal)
//                               → { handle, size, wasDownloaded }
//   evictModel(repo, file)      → { ok, bytesFreed, reason? }
//
// Helpers: inventoryOpfs(), purgeOpfs().

// Exported so bench-worker.js can re-resolve the OPFS file handle inside
// the worker. We can't transfer FileSystemFileHandle across postMessage on
// every browser (iOS Safari's structured-clone is missing the
// implementation), so instead we send the layout key (rootDir + repo
// segments + filename) and let the worker open the handle itself.
export const OPFS_ROOT_NAME = 'models';

async function getOpfsRoot() {
  if (!navigator.storage?.getDirectory) {
    throw new Error('OPFS is not available in this browser.');
  }
  const root = await navigator.storage.getDirectory();
  return root.getDirectoryHandle(OPFS_ROOT_NAME, { create: true });
}

function repoSegments(repo) {
  return String(repo).split('/').filter(Boolean);
}

async function getOpfsDirFor(repo, { create }) {
  let dir = await getOpfsRoot();
  for (const seg of repoSegments(repo)) {
    dir = await dir.getDirectoryHandle(seg, { create });
  }
  return dir;
}

async function getOpfsFileHandle(repo, file, { create }) {
  const dir = await getOpfsDirFor(repo, { create });
  return dir.getFileHandle(file, { create });
}

// WebKit (iOS Safari) returns one of these strings/names when the OPFS
// operation fails because something else (typically a stuck
// FileSystemSyncAccessHandle from a worker that was Jetsam-killed before
// it could close cleanly) is still holding the file. The handle is
// usually released within a few seconds, so retrying with backoff is the
// documented mitigation. Other "real" errors (NotFoundError, QuotaExceeded)
// are not transient and shouldn't be retried.
function isOpfsTransientError(err) {
  if (!err) return false;
  const msg = String(err.message || err);
  if (/unknown transient/i.test(msg)) return true;
  if (/no modification allowed/i.test(msg)) return true;
  if (err.name === 'InvalidStateError') return true;
  if (err.name === 'NoModificationAllowedError') return true;
  return false;
}

async function withOpfsRetry(fn) {
  const delays = [500, 2_000, 5_000];
  let lastErr;
  for (let attempt = 0; attempt <= delays.length; attempt++) {
    try {
      return await fn(attempt);
    } catch (err) {
      lastErr = err;
      if (!isOpfsTransientError(err)) throw err;
      if (attempt === delays.length) break;
      await new Promise((r) => setTimeout(r, delays[attempt]));
    }
  }
  throw lastErr;
}

export function ggufSource() {
  return {
    async isCached(repo, file) {
      try {
        const handle = await getOpfsFileHandle(repo, file, { create: false });
        const f = await handle.getFile();
        return { cachedBytes: f.size, totalBytes: f.size };
      } catch {
        return { cachedBytes: 0 };
      }
    },

    // Ensure the model is fully downloaded to OPFS, then return its
    // FileSystemFileHandle. The worker (bench-worker.js) opens a sync
    // access handle on this file and routes MEMFS reads through it, so
    // model bytes never enter the WASM heap. onProgress fires during
    // download with (fraction, downloaded, total). `wasDownloaded`
    // distinguishes a fresh download from a cache hit so the caller can
    // decide whether to evict the variant after the run.
    async opfsHandleForModel(repo, file, onProgress, signal) {
      // Cache lookup — wrapped in retry because getFile() can also hit
      // the WebKit transient (a sync access handle from a previous
      // worker that was Jetsam-killed mid-run blocks this for a few
      // seconds until WebKit's GC reaps it).
      const cached = await withOpfsRetry(async () => {
        const handle = await getOpfsFileHandle(repo, file, { create: false }).catch(() => null);
        if (!handle) return null;
        const f = await handle.getFile();
        return f.size > 0 ? { handle, size: f.size } : null;
      });
      if (cached) {
        onProgress?.(1, cached.size, cached.size);
        return { handle: cached.handle, size: cached.size, wasDownloaded: false };
      }

      // Cache miss — download from HF straight into a writable OPFS stream.
      // signal lets the caller cancel: fetch + reader.read both reject with
      // AbortError when it fires, and the catch below propagates that up.
      const url = `https://huggingface.co/${repo}/resolve/main/${file}`;
      const resp = await fetch(url, { signal });
      if (!resp.ok) {
        throw new Error(`Download failed: ${resp.status} ${resp.statusText}`);
      }
      const contentLength = parseInt(resp.headers.get('content-length') || '0', 10);

      // Opportunistically request persistent storage so eviction is less
      // likely once we commit to pulling large files. Best-effort — ignore
      // rejection (some browsers only grant on user gesture).
      navigator.storage?.persist?.().catch(() => {});

      // Retry the createWritable + drain loop on the WebKit transient.
      // Each retry restarts the download from byte 0; for streamed writes
      // we can't resume mid-file without re-issuing the fetch, and the
      // transient typically only fires on createWritable so retrying is
      // usually a no-op past attempt 0. Fresh fetch per attempt is the
      // simplest correct thing.
      return await withOpfsRetry(async (attempt) => {
        const handle = await getOpfsFileHandle(repo, file, { create: true });
        const writable = await handle.createWritable({ keepExistingData: false });

        // On retry we need a fresh response body — the original reader
        // was consumed (or aborted) by the previous attempt. Use the
        // already-fetched response on attempt 0; re-fetch on retries.
        const body = attempt === 0 ? resp.body : (await fetch(url, { signal })).body;

        try {
          const reader = body.getReader();
          let downloaded = 0;
          while (true) {
            const { done, value } = await reader.read();
            if (done) break;
            await writable.write(value);
            downloaded += value.byteLength;
            if (contentLength > 0) onProgress?.(downloaded / contentLength, downloaded, contentLength);
          }
          await writable.close();
          return { handle, size: downloaded, wasDownloaded: true };
        } catch (err) {
          try { await writable.abort(err); } catch { /* ignore */ }
          throw err;
        }
      });
    },

    async evictModel(repo, file) {
      try {
        const dir = await getOpfsDirFor(repo, { create: false });
        let bytesFreed = 0;
        try {
          const handle = await dir.getFileHandle(file, { create: false });
          const f = await handle.getFile();
          bytesFreed = f.size;
        } catch { /* not present */ }
        await dir.removeEntry(file);
        return { ok: true, bytesFreed };
      } catch (err) {
        return { ok: false, bytesFreed: 0, reason: err.message };
      }
    },
  };
}

// Walk OPFS and report every cached file as `{ 'repo/file': { cachedBytes } }`.
export async function inventoryOpfs() {
  if (!navigator.storage?.getDirectory) return {};
  const root = await navigator.storage.getDirectory();
  let modelsDir;
  try {
    modelsDir = await root.getDirectoryHandle(OPFS_ROOT_NAME, { create: false });
  } catch { return {}; }

  const out = {};
  async function walk(dir, relParts) {
    for await (const entry of dir.values()) {
      if (entry.kind === 'directory') {
        await walk(entry, [...relParts, entry.name]);
      } else if (entry.kind === 'file') {
        const f = await entry.getFile();
        const key = [...relParts, entry.name].join('/');
        out[key] = { cachedBytes: f.size };
      }
    }
  }
  await walk(modelsDir, []);
  return out;
}

// Delete every cached file under OPFS `models/`. Used by the [Purge] button.
export async function purgeOpfs() {
  if (!navigator.storage?.getDirectory) return;
  const root = await navigator.storage.getDirectory();
  try {
    await root.removeEntry(OPFS_ROOT_NAME, { recursive: true });
  } catch { /* didn't exist */ }
}