Spaces:

mihailik
/

localm

Configuration error

App Files Files Community

mihailik commited on Aug 21, 2025

Commit

2a0250a

1 Parent(s): 5df2a9b

Faster fetching of model list (with caching).

Browse files

Files changed (5) hide show

package.json +1 -1
src/app/boot-app.js +2 -2
src/app/init-milkdown.js +7 -68
src/app/model-list.js +0 -395
src/worker/list-chat-models.js +14 -14

package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "localm",
-  "version": "1.1.27",
   "description": "Chat application",
   "scripts": {
     "build": "esbuild src/index.js --target=es6 --bundle --sourcemap --outfile=./index.js --format=iife --external:fs --external:path --external:child_process --external:ws --external:katex/dist/katex.min.css",

 {
   "name": "localm",
+  "version": "1.1.28",
   "description": "Chat application",
   "scripts": {
     "build": "esbuild src/index.js --target=es6 --bundle --sourcemap --outfile=./index.js --format=iife --external:fs --external:path --external:child_process --external:ws --external:katex/dist/katex.min.css",

src/app/boot-app.js CHANGED Viewed

@@ -22,7 +22,7 @@ export async function bootApp() {
   worker.loaded.then(async ({ env }) => {
     document.title = name + ' v' + version + ' t/' + env.version;
     outputMessage(
-      'transformers.js v' + env.version);
   });
   const {
@@ -54,5 +54,5 @@ export async function bootApp() {
   // Setup Enter key handling for the Crepe input editor
   setupCrepeEnterKey(crepeInput, worker);
   document.title = name + ' v' + version;
-  outputMessage(description + ' v' + version);
 }

   worker.loaded.then(async ({ env }) => {
     document.title = name + ' v' + version + ' t/' + env.version;
     outputMessage(
+      'transformers.js **v' + env.version + '**');
   });
   const {
   // Setup Enter key handling for the Crepe input editor
   setupCrepeEnterKey(crepeInput, worker);
   document.title = name + ' v' + version;
+  outputMessage(description + ' **v' + version + '**');
 }

src/app/init-milkdown.js CHANGED Viewed

@@ -11,9 +11,10 @@ import { Crepe } from '@milkdown/crepe';
 import { blockEdit } from '@milkdown/crepe/feature/block-edit';
 import { commonmark } from '@milkdown/kit/preset/commonmark';
 import "@milkdown/crepe/theme/common/style.css";
 import "@milkdown/crepe/theme/frame.css";
-import { outputMessage } from './output-message';
 /**
  * @typedef {{
@@ -45,7 +46,6 @@ export async function initMilkdown({
   const chatLogEditor = await Editor.make()
     .config((ctx) => {
       ctx.set(rootCtx, chatLog);
-      ctx.set(defaultValueCtx, 'Loaded.');
       ctx.set(editorViewOptionsCtx, { editable: () => false });
     })
     .use(commonmark)
@@ -56,7 +56,6 @@ export async function initMilkdown({
     root: chatInput,
     defaultValue: '',
     features: {
-      // Do NOT enable BlockEdit here; we'll add it later after models load
       [Crepe.Feature.BlockEdit]: false,
       [Crepe.Feature.Placeholder]: true,
       [Crepe.Feature.Cursor]: true,
@@ -70,7 +69,7 @@ export async function initMilkdown({
     },
     featureConfigs: {
       [Crepe.Feature.Placeholder]: {
-        text: 'Start typing...',
         mode: 'block'
       }
     }
@@ -81,14 +80,8 @@ export async function initMilkdown({
   // Fetch models in background and add BlockEdit when ready
   (async () => {
     try {
-      if (!worker || typeof worker.listChatModels !== 'function') {
-        console.warn('[initMilkdown] worker.listChatModels not available; skipping BlockEdit setup');
-        return;
-      }
-  console.log('[initMilkdown] requesting models from worker');
       const { id, promise, cancel } = await worker.listChatModels({}, undefined);
       const out = await promise;
-  console.log('[initMilkdown] worker.listChatModels resolved', out && out.meta ? out.meta : out);
       // Normalize possible response shapes
       let entries = [];
@@ -106,76 +99,22 @@ export async function initMilkdown({
         requiresAuth: e.classification === 'auth-protected'
       }));
-      console.log('[initMilkdown] extracted models', { count: availableModels.length });
       outputMessage('Models discovered: **' + availableModels.length + '**');
-      // Add BlockEdit feature now that models are available
-      const _addFeatureResult = crepeInput.addFeature(blockEdit, {
         buildMenu: (groupBuilder) => {
           const modelsGroup = groupBuilder.addGroup('models', 'Models');
           (availableModels || []).forEach((model) => modelsGroup.addItem(model.slashCommand, {
             label: `${model.name} ${model.size ? `(${model.size})` : ''}`,
             icon: '🤖',
-            onRun: () => { if (onSlashCommand) onSlashCommand(model.id); }
           }));
         }
       });
-      // await in case addFeature returns a promise (some implementations do async init)
-      try {
-        await Promise.resolve(_addFeatureResult);
-      } catch (e) {
-        console.warn('[initMilkdown] addFeature promise rejected', e);
-      }
-      console.log('[initMilkdown] BlockEdit feature added');
-      // Non-destructive smoke-test: insert a '/' then remove it to trigger the slash provider
-      // This helps verify the menu actually shows when the feature is registered.
-      try {
-        crepeInput.editor.action((ctx) => {
-          const view = ctx.get(editorViewCtx);
-          if (!view) return;
-          const pos = view.state.selection.from;
-          try {
-            view.dispatch(view.state.tr.insertText('/', pos));
-            console.log('[initMilkdown] probe: inserted slash at', pos);
-          } catch (e) {
-            console.warn('[initMilkdown] probe insert failed', e);
-          }
-          // Remove the inserted slash shortly after to avoid mutating user content
-          setTimeout(() => {
-            try {
-              crepeInput.editor.action((ctx2) => {
-                const view2 = ctx2.get(editorViewCtx);
-                if (!view2) return;
-                const selFrom = view2.state.selection.from;
-                // delete the single character if still present at the original position
-                const delTr = view2.state.tr.delete(pos, pos + 1);
-                view2.dispatch(delTr);
-                console.log('[initMilkdown] probe: removed slash at', pos);
-              });
-            } catch (e) {
-              console.warn('[initMilkdown] probe cleanup failed', e);
-            }
-          }, 300);
-        });
-      } catch (e) {
-        console.warn('[initMilkdown] probe failed', e);
-      }
-      // Trigger a small editor action to ensure the UI acknowledges the new feature
-      try {
-        crepeInput.editor.action((ctx) => {
-          const view = ctx.get(editorViewCtx);
-          if (view && typeof view.update === 'function') try { view.update(view.state); } catch (e) {}
-        });
-      } catch (e) {
-        // if action fails, ignore
-      }
     } catch (e) {
       console.warn('Failed to load models for BlockEdit via worker:', e);
-      try {
-        const marker = document.getElementById('models-loaded-indicator');
-        if (marker && marker.parentNode) marker.parentNode.removeChild(marker);
-      } catch (ee) {}
     }
   })();

 import { blockEdit } from '@milkdown/crepe/feature/block-edit';
 import { commonmark } from '@milkdown/kit/preset/commonmark';
+import { outputMessage } from './output-message';
 import "@milkdown/crepe/theme/common/style.css";
 import "@milkdown/crepe/theme/frame.css";
 /**
  * @typedef {{
   const chatLogEditor = await Editor.make()
     .config((ctx) => {
       ctx.set(rootCtx, chatLog);
       ctx.set(editorViewOptionsCtx, { editable: () => false });
     })
     .use(commonmark)
     root: chatInput,
     defaultValue: '',
     features: {
       [Crepe.Feature.BlockEdit]: false,
       [Crepe.Feature.Placeholder]: true,
       [Crepe.Feature.Cursor]: true,
     },
     featureConfigs: {
       [Crepe.Feature.Placeholder]: {
+        text: 'Prompt (or /slash for model list)...',
         mode: 'block'
       }
     }
   // Fetch models in background and add BlockEdit when ready
   (async () => {
     try {
       const { id, promise, cancel } = await worker.listChatModels({}, undefined);
       const out = await promise;
       // Normalize possible response shapes
       let entries = [];
         requiresAuth: e.classification === 'auth-protected'
       }));
       outputMessage('Models discovered: **' + availableModels.length + '**');
+      crepeInput.addFeature(blockEdit, {
         buildMenu: (groupBuilder) => {
           const modelsGroup = groupBuilder.addGroup('models', 'Models');
           (availableModels || []).forEach((model) => modelsGroup.addItem(model.slashCommand, {
             label: `${model.name} ${model.size ? `(${model.size})` : ''}`,
             icon: '🤖',
+            onRun: () => {
+              if (onSlashCommand) onSlashCommand(model.id);
+            }
           }));
         }
       });
     } catch (e) {
       console.warn('Failed to load models for BlockEdit via worker:', e);
     }
   })();

src/app/model-list.js DELETED Viewed

@@ -1,395 +0,0 @@
-// @ts-check
-import { workerConnection } from './worker-connection.js';
-/**
- * @typedef {{
- *   id: string,
- *   name: string,
- *   vendor: string,
- *   size: string,
- *   slashCommand: string,
- *   description: string,
- *   downloads?: number,
- *   pipeline_tag?: string,
- *   requiresAuth?: boolean,
- *   hasOnnx?: boolean,
- *   hasTokenizer?: boolean,
- *   missingFiles?: boolean,
- *   missingReason?: string
- * }} ModelInfo
- */
-/**
- * Cache for fetched models to avoid repeated API calls
- */
-let modelCache = null;
-let cacheTimestamp = 0;
-const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
-const STORAGE_KEY = 'localm_models_cache_v1';
-const STORAGE_TTL = 24 * 60 * 60 * 1000; // 24 hours for persisted cache
-/**
- * Size thresholds for mobile capability (in billions of parameters)
- */
-const MOBILE_SIZE_THRESHOLD = 15; // Models under 15B are considered mobile-capable
-/**
- * Fetch models from Hugging Face Hub with transformers.js compatibility
- * @returns {Promise<ModelInfo[]>}
- */
-export async function fetchBrowserModels(params = {}) {
-  // Worker-backed implementation: call worker.listChatModels and return final models.
-  try {
-    const wc = workerConnection();
-  const { id, promise, cancel } = await wc.listChatModels(params, /* onProgress */ undefined);
-    // wait for final result (no caching, no localStorage)
-    const res = await promise;
-    // Map worker ModelEntry -> UI ModelInfo minimal shape
-    const mapped = Array.isArray(res.models ? res.models : res)
-      ? (res.models || res).map(e => ({
-        id: e.id,
-        name: e.name || (e.id || '').split('/').pop(),
-        vendor: extractVendor(e.id || ''),
-        size: '',
-        slashCommand: generateSlashCommand(e.id || ''),
-        description: '',
-        pipeline_tag: e.pipeline_tag || null,
-        requiresAuth: e.classification === 'auth-protected'
-      }))
-      : [];
-    return mapped.length ? mapped : FALLBACK_MODELS;
-  } catch (err) {
-    // on error, return small fallback list
-    console.warn('fetchBrowserModels: worker error, returning fallback', err && err.message ? err.message : err);
-    return FALLBACK_MODELS;
-  }
-}
-// Small fallback list used when worker fails or times out
-const FALLBACK_MODELS = [
-  { id: 'microsoft/Phi-3-mini-4k-instruct', name: 'Phi-3 Mini', vendor: 'Microsoft', size: '3.8B', slashCommand: 'phi3', description: 'Fallback Phi-3 Mini' },
-  { id: 'mistralai/Mistral-7B-v0.1', name: 'Mistral 7B', vendor: 'Mistral AI', size: '7.3B', slashCommand: 'mistral', description: 'Fallback Mistral' },
-  { id: 'Xenova/distilgpt2', name: 'DistilGPT-2', vendor: 'Xenova', size: '82M', slashCommand: 'distilgpt2', description: 'Fallback DistilGPT2' }
-];
-/**
- * Check if a model is suitable for mobile/browser use
- * @param {any} model - Raw model data from HF API
- * @returns {boolean}
- */
-function isModelMobileCapable(model) {
-  // Skip if no model ID
-  if (!model.id) return false;
-  // Estimate model size from various indicators
-  const sizeEstimate = estimateModelSize(model);
-  // Skip models that are too large
-  if (sizeEstimate > MOBILE_SIZE_THRESHOLD) {
-    return false;
-  }
-  // Prefer models with certain pipeline tags that work well in browsers
-  const preferredTags = [
-    'text-generation',
-    'text2text-generation',
-    'feature-extraction',
-    'sentence-similarity',
-    'fill-mask'
-  ];
-  const hasPreferredTag = !model.pipeline_tag || preferredTags.includes(model.pipeline_tag);
-  // Skip certain model types that are less suitable for general text generation
-  const excludePatterns = [
-    /whisper/i,
-    /vision/i,
-    /image/i,
-    /audio/i,
-    /translation/i,
-    /classification/i,
-    /embedding/i
-  ];
-  const isExcluded = excludePatterns.some(pattern => pattern.test(model.id));
-  return hasPreferredTag && !isExcluded;
-}
-/**
- * Estimate model size in billions of parameters from various indicators
- * @param {any} model - Raw model data from HF API
- * @returns {number}
- */
-function estimateModelSize(model) {
-  const modelId = model.id.toLowerCase();
-  // Extract size from model name patterns
-  const sizePatterns = [
-    /(\d+\.?\d*)b\b/i,    // "7b", "3.8b", etc.
-    /(\d+)m\b/i,          // "125m" -> convert to billions
-    /(\d+)k\b/i           // "125k" -> very small
-  ];
-  for (const pattern of sizePatterns) {
-    const match = modelId.match(pattern);
-    if (match) {
-      const size = parseFloat(match[1]);
-      if (pattern.source.includes('m\\b')) {
-        return size / 1000; // Convert millions to billions
-      } else if (pattern.source.includes('k\\b')) {
-        return size / 1000000; // Convert thousands to billions
-      } else {
-        return size; // Already in billions
-      }
-    }
-  }
-  // If no size found in name, make conservative estimates based on model family
-  if (modelId.includes('gpt2') || modelId.includes('distil')) return 0.2;
-  if (modelId.includes('phi-1') || modelId.includes('phi1')) return 1.3;
-  if (modelId.includes('phi-3') || modelId.includes('phi3')) return 3.8;
-  if (modelId.includes('mistral')) return 7;
-  if (modelId.includes('qwen') && modelId.includes('3b')) return 3;
-  if (modelId.includes('qwen') && modelId.includes('7b')) return 7;
-  if (modelId.includes('llama') && modelId.includes('7b')) return 7;
-  if (modelId.includes('llama') && modelId.includes('13b')) return 13;
-  // Default conservative estimate for unknown models
-  return 5;
-}
-/**
- * Process raw model data into our ModelInfo format
- * @param {any} model - Raw model data from HF API
- * @returns {ModelInfo | null}
- */
-function processModelData(model) {
-  try {
-    const size = estimateModelSize(model);
-    const vendor = extractVendor(model.id);
-    const name = extractModelName(model.id);
-    const slashCommand = generateSlashCommand(model.id);
-    return {
-      id: model.id,
-      name,
-      vendor,
-      size: formatSize(size),
-      slashCommand,
-      description: `${formatSize(size)} parameter model from ${vendor}`,
-      downloads: model.downloads || 0,
-      pipeline_tag: model.pipeline_tag
-    };
-  } catch (error) {
-    console.warn(`Failed to process model ${model.id}:`, error);
-    return null;
-  }
-}
-/**
- * Extract vendor/organization from model ID
- * @param {string} modelId
- * @returns {string}
- */
-function extractVendor(modelId) {
-  const parts = modelId.split('/');
-  if (parts.length > 1) {
-    const org = parts[0];
-    // Map known organizations to friendly names
-    const orgMap = {
-      'microsoft': 'Microsoft',
-      'mistralai': 'Mistral AI',
-      'Qwen': 'Alibaba',
-      'google': 'Google',
-      'openai-community': 'OpenAI',
-      'Xenova': 'Xenova',
-      'meta-llama': 'Meta',
-      'onnx-community': 'ONNX Community'
-    };
-    return orgMap[org] || org;
-  }
-  return 'Unknown';
-}
-/**
- * Extract clean model name from full ID
- * @param {string} modelId
- * @returns {string}
- */
-function extractModelName(modelId) {
-  const parts = modelId.split('/');
-  const name = parts[parts.length - 1];
-  // Clean up common patterns
-  return name
-    .replace(/-ONNX$/, '')
-    .replace(/-onnx$/, '')
-    .replace(/-instruct$/, '')
-    .replace(/-chat$/, '')
-    .replace(/^Xenova-/, '')
-    .replace(/-/g, ' ')
-    .replace(/\b\w/g, l => l.toUpperCase()); // Title case
-}
-/**
- * Generate a slash command from model ID
- * @param {string} modelId
- * @returns {string}
- */
-function generateSlashCommand(modelId) {
-  const name = (modelId.split('/').pop() || modelId).toLowerCase();
-  // Create short, memorable commands
-  if (name.includes('phi-3') || name.includes('phi3')) return 'phi3';
-  if (name.includes('phi-1') || name.includes('phi1')) return 'phi1';
-  if (name.includes('mistral')) return 'mistral';
-  if (name.includes('qwen') && name.includes('3b')) return 'qwen3b';
-  if (name.includes('qwen') && name.includes('7b')) return 'qwen7b';
-  if (name.includes('qwen')) return 'qwen';
-  if (name.includes('gpt2')) return 'gpt2';
-  if (name.includes('distilgpt2')) return 'distilgpt2';
-  if (name.includes('llama')) return 'llama';
-  if (name.includes('gemma')) return 'gemma';
-  if (name.includes('flan')) return 'flant5';
-  // Generate from first few characters of model name
-  const clean = name.replace(/[^a-z0-9]/g, '');
-  return clean.substring(0, 8);
-}
-/**
- * Format size number for display
- * @param {number} size
- * @returns {string}
- */
-function formatSize(size) {
-  if (size < 1) {
-    return `${Math.round(size * 1000)}M`;
-  } else {
-    return `${size.toFixed(1)}B`;
-  }
-}
-/**
- * Detect if the model repository includes necessary runtime files.
- * Uses 'siblings' list available when calling Hugging Face API with full=true.
- * @param {any} model
- * @returns {{hasOnnx:boolean, hasTokenizer:boolean, missingFiles:boolean, missingReason:string}}
- */
-function detectRequiredFiles(model) {
-  const siblings = Array.isArray(model.siblings) ? model.siblings : [];
-  const names = siblings.map(s => s.rfilename || s.filename || '');
-  const hasOnnx = names.some(n => /\.onnx$/i.test(n));
-  const hasTokenizer = names.some(n => /tokenizer\.json$/i.test(n) || /tokenizer_config\.json$/i.test(n));
-  const missing = !(hasOnnx && hasTokenizer);
-  let reason = '';
-  if (missing) {
-    if (!hasOnnx && !hasTokenizer) reason = 'Missing ONNX and tokenizer files';
-    else if (!hasOnnx) reason = 'Missing ONNX files';
-    else if (!hasTokenizer) reason = 'Missing tokenizer files';
-  }
-  return { hasOnnx, hasTokenizer, missingFiles: missing, missingReason: reason };
-}
-/**
- * Determine if a model supports chat-style inputs/outputs.
- * Uses pipeline_tag, tags, and name heuristics as fallback.
- * @param {any} model
- */
-function isModelChatCapable(model) {
-  if (!model) return false;
-  const allowedPipelines = new Set([
-    'text-generation', 'conversational', 'text2text-generation', 'chat',
-    'sentence'
-  ]);
-  if (model.pipeline_tag && allowedPipelines.has(model.pipeline_tag)) return true;
-  // tags array may contain 'conversational' or 'chat'
-  if (Array.isArray(model.tags)) {
-    for (const t of model.tags) {
-      if (typeof t === 'string' && allowedPipelines.has(t)) return true;
-    }
-  }
-  // fallback heuristics in id/name: look for chat, conversational, dialog, instruct
-  const id = (model.id || '').toLowerCase();
-  const name = (model.name || '').toLowerCase();
-  const heuristics = ['chat', 'conversational', 'dialog', 'instruct', 'instruction', 'sentence'];
-  for (const h of heuristics) {
-    if (id.includes(h) || name.includes(h)) return true;
-  }
-  return false;
-}
-/**
- * Get fallback models if API fetch fails
- * @returns {ModelInfo[]}
- */
-function getFallbackModels() {
-  return [
-    {
-      id: 'microsoft/Phi-3-mini-4k-instruct',
-      name: 'Phi-3 Mini',
-      vendor: 'Microsoft',
-      size: '3.8B',
-      slashCommand: 'phi3',
-      description: 'Exceptional performance-to-size ratio, strong in reasoning and math'
-    },
-    {
-      id: 'mistralai/Mistral-7B-v0.1',
-      name: 'Mistral 7B',
-      vendor: 'Mistral AI',
-      size: '7.3B',
-      slashCommand: 'mistral',
-      description: 'Highly efficient, outperforms larger models with innovative architecture'
-    },
-    {
-      id: 'Xenova/distilgpt2',
-      name: 'DistilGPT-2',
-      vendor: 'Xenova',
-      size: '82M',
-      slashCommand: 'distilgpt2',
-      description: 'Extremely fast and lightweight for quick prototyping'
-    },
-    {
-      id: 'openai-community/gpt2',
-      name: 'GPT-2',
-      vendor: 'OpenAI',
-      size: '124M',
-      slashCommand: 'gpt2',
-      description: 'Foundational model for reliable lightweight text generation'
-    }
-  ];
-}
-/**
- * Get model info by slash command
- * @param {string} command - The slash command (e.g., 'phi3')
- * @param {ModelInfo[]} [models] - Optional pre-fetched models list
- * @returns {Promise<ModelInfo | undefined>}
- */
-export async function getModelByCommand(command, models) {
-  const modelList = models || await fetchBrowserModels();
-  return modelList.find(model => model.slashCommand === command);
-}
-/**
- * Get model info by ID
- * @param {string} id - The model ID
- * @param {ModelInfo[]} [models] - Optional pre-fetched models list
- * @returns {Promise<ModelInfo | undefined>}
- */
-export async function getModelById(id, models) {
-  const modelList = models || await fetchBrowserModels();
-  return modelList.find(model => model.id === id);
-}
-/**
- * Get all available slash commands
- * @param {ModelInfo[]} [models] - Optional pre-fetched models list
- * @returns {Promise<string[]>}
- */
-export async function getAllSlashCommands(models) {
-  const modelList = models || await fetchBrowserModels();
-  return modelList.map(model => model.slashCommand);
-}

src/worker/list-chat-models.js CHANGED Viewed

@@ -81,18 +81,6 @@ export async function* listChatModelsIterator(params = {}) {
     }
   }
-  async function fetchWithController(url, init = {}) {
-    const c = new AbortController();
-    inFlight.add(c);
-    try {
-      const merged = Object.assign({}, init, { signal: c.signal });
-      const resp = await fetch(url, merged);
-      return resp;
-    } finally {
-      inFlight.delete(c);
-    }
-  }
   // helper: fetchConfigForModel (tries multiple paths, per-request timeouts & retries)
   async function fetchConfigForModel(modelId) {
     const urls = [
@@ -106,7 +94,13 @@ export async function* listChatModelsIterator(params = {}) {
         const controller = new AbortController();
         inFlight.add(controller);
         try {
-          const resp = await fetch(url, { signal: controller.signal, headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {} });
           if (resp.status === 200) {
             const json = await resp.json();
             counters.configFetch200++;
@@ -192,7 +186,13 @@ export async function* listChatModelsIterator(params = {}) {
       let ok = false;
       for (let attempt = 0; attempt <= RETRIES && !ok; attempt++) {
         try {
-          const resp = await fetch(url, { headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {} });
           if (resp.status === 429) {
             const backoff = BACKOFF_BASE_MS * Math.pow(2, attempt);
             await new Promise(r => setTimeout(r, backoff));

     }
   }
   // helper: fetchConfigForModel (tries multiple paths, per-request timeouts & retries)
   async function fetchConfigForModel(modelId) {
     const urls = [
         const controller = new AbortController();
         inFlight.add(controller);
         try {
+          const resp = await fetch(
+            url,
+            {
+              signal: controller.signal,
+              headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {},
+              cache: 'force-cache'
+            });
           if (resp.status === 200) {
             const json = await resp.json();
             counters.configFetch200++;
       let ok = false;
       for (let attempt = 0; attempt <= RETRIES && !ok; attempt++) {
         try {
+          const resp = await fetch(
+            url,
+            {
+              headers: hfToken ? { Authorization: `Bearer ${hfToken}` } : {},
+              cache: 'force-cache'
+            }
+          );
           if (resp.status === 429) {
             const backoff = BACKOFF_BASE_MS * Math.pow(2, attempt);
             await new Promise(r => setTimeout(r, backoff));