File size: 2,574 Bytes
5505540
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/**
 * Hugging Face Hub fetcher with browser caching.
 *
 * We don't pull in @huggingface/hub: it's a heavy dep with a lot of
 * upload-side machinery we don't need for a read-only inference
 * client. The hub URL pattern is stable enough to inline.
 *
 * URL pattern:
 *   https://huggingface.co/<repo>/resolve/<revision>/<filename>
 *
 * The ``main`` revision is fine for everyone except library authors;
 * pin to a commit SHA for reproducibility once the model lands.
 */

const HF_BASE = 'https://huggingface.co';

export interface HubFile {
  /** Repo identifier, e.g. ``cp500/infon-coref-pointer``. */
  repo: string;
  /** Path inside the repo, e.g. ``onnx/backbone_bio_fp16.onnx``. */
  path: string;
  /** Branch, tag, or commit SHA. */
  revision?: string;
}

/** Build a downloadable URL for a file in an HF repo. */
export function hubUrl({ repo, path, revision = 'main' }: HubFile): string {
  return `${HF_BASE}/${repo}/resolve/${revision}/${path}`;
}

/**
 * Fetch a file from the Hub as an ``ArrayBuffer``.
 *
 * Browsers automatically cache by URL via the HTTP cache, so repeated
 * loads in the same session reuse the disk copy. For longer-term
 * caching we use ``caches.open`` (Cache API) when available — that
 * survives reloads and works offline.
 */
export async function fetchHubFile(
  file: HubFile,
  opts?: { cacheName?: string },
): Promise<ArrayBuffer> {
  const url = hubUrl(file);
  const cacheName = opts?.cacheName ?? 'infon-coref-v1';

  // Browser Cache API path.
  if (typeof caches !== 'undefined') {
    try {
      const cache = await caches.open(cacheName);
      const cached = await cache.match(url);
      if (cached) return await cached.arrayBuffer();
      const r = await fetch(url);
      if (!r.ok) throw new Error(`hub fetch ${url}: ${r.status}`);
      // Clone before consuming so we can stash it in the cache.
      cache.put(url, r.clone()).catch(() => {
        /* cache failures are non-fatal */
      });
      return await r.arrayBuffer();
    } catch (err) {
      // Cache API exists but write failed (e.g. quota). Fall through
      // to plain fetch.
      if (!(err instanceof TypeError)) throw err;
    }
  }

  // Plain fetch (Node 18+ has it; older Node needs polyfill).
  const r = await fetch(url);
  if (!r.ok) throw new Error(`hub fetch ${url}: ${r.status}`);
  return await r.arrayBuffer();
}

/** Fetch a file as JSON. */
export async function fetchHubJson<T = unknown>(file: HubFile): Promise<T> {
  const buf = await fetchHubFile(file);
  return JSON.parse(new TextDecoder().decode(buf)) as T;
}