#!/usr/bin/env node
// Fetches top GGUF models from HF, parses quants + file sizes, writes static/models.json.
import { writeFileSync } from 'node:fs';
import { fileURLToPath } from 'node:url';
import { dirname, resolve } from 'node:path';

const __dirname = dirname(fileURLToPath(import.meta.url));
const OUT = resolve(__dirname, '..', 'static', 'models.json');

const LIMIT = 100;
const HF_TOKEN = process.env.HF_TOKEN;
const headers = HF_TOKEN ? { Authorization: `Bearer ${HF_TOKEN}` } : {};

const QUANT_RE = /(IQ\d_[A-Z0-9_]+|Q\d+_[A-Z0-9_]+|Q\d+|F16|BF16|F32|FP8|MXFP4(?:_MOE)?)/i;
const SHARD_RE = /-(\d{5})-of-(\d{5})/;
const PARAM_RE = /(\d+(?:\.\d+)?)\s*[bB](?![a-z])/;
const TEXT_GEN_TAGS = new Set(['text-generation', 'conversational', 'image-text-to-text']);

async function fetchJson(url) {
  const res = await fetch(url, { headers });
  if (!res.ok) throw new Error(`${res.status} ${url}`);
  return res.json();
}

function parseQuant(filename) {
  const base = filename.split('/').pop();
  const m = base.match(QUANT_RE);
  return m ? m[1].toUpperCase() : null;
}

function parseParams(modelId, fileNames, ggufMeta) {
  // Try gguf metadata first
  if (ggufMeta?.total) {
    const b = ggufMeta.total / 1e9;
    if (b > 0.1 && b < 2000) return Math.round(b * 10) / 10;
  }
  // Try filename (e.g. "Llama-3.1-8B")
  for (const f of [modelId, ...fileNames]) {
    const m = f.match(PARAM_RE);
    if (m) return parseFloat(m[1]);
  }
  return null;
}

function groupShards(ggufFiles) {
  // Group multi-part shards (e.g. "-00001-of-00003.gguf") into one logical file.
  const groups = new Map();
  for (const f of ggufFiles) {
    const sm = f.path.match(SHARD_RE);
    let key;
    if (sm) {
      key = f.path.replace(SHARD_RE, '');
    } else {
      key = f.path;
    }
    if (!groups.has(key)) groups.set(key, { path: key, size: 0, parts: 0 });
    const g = groups.get(key);
    g.size += f.size || 0;
    g.parts += 1;
  }
  return [...groups.values()];
}

async function processModel(m) {
  try {
    const tree = await fetchJson(
      `https://huggingface.co/api/models/${m.id}/tree/main?recursive=true`
    );
    const detail = await fetchJson(`https://huggingface.co/api/models/${m.id}`);
    const ggufFiles = tree
      .filter((t) => {
        if (t.type !== 'file' || !t.size) return false;
        const p = t.path.toLowerCase();
        if (!p.endsWith('.gguf')) return false;
        // Skip auxiliary files: multimodal projectors, imatrix calibration, embeddings
        if (p.includes('mmproj') || p.includes('projector')) return false;
        if (p.includes('imatrix')) return false;
        return true;
      })
      .map((t) => ({ path: t.path, size: t.size }));
    if (ggufFiles.length === 0) return null;

    const grouped = groupShards(ggufFiles);
    const quants = [];
    for (const g of grouped) {
      const quant = parseQuant(g.path);
      if (!quant) continue;
      quants.push({
        path: g.path,
        size: g.size,
        sizeGB: +(g.size / 1024 ** 3).toFixed(2),
        quant,
        sharded: g.parts > 1
      });
    }
    if (quants.length === 0) return null;

    const params = parseParams(m.id, ggufFiles.map((f) => f.path), detail.gguf);
    const arch = detail.gguf?.architecture || detail.config?.model_type || null;

    return {
      id: m.id,
      author: m.id.split('/')[0],
      name: m.id.split('/').slice(1).join('/'),
      downloads: m.downloads || 0,
      likes: m.likes || 0,
      pipeline_tag: m.pipeline_tag || null,
      params_b: params,
      arch,
      n_layers: detail.gguf?.n_layers || null,
      n_kv_heads: detail.gguf?.n_kv_heads || detail.gguf?.n_heads || null,
      n_embd: detail.gguf?.n_embd || null,
      context_length: detail.gguf?.context_length || null,
      tags: m.tags || [],
      quants: quants.sort((a, b) => a.size - b.size)
    };
  } catch (err) {
    console.warn(`  skip ${m.id}: ${err.message}`);
    return null;
  }
}

async function main() {
  console.log(`Fetching top ${LIMIT} GGUF models...`);
  const list = await fetchJson(
    `https://huggingface.co/api/models?filter=gguf&sort=downloads&direction=-1&limit=${LIMIT}`
  );
  console.log(`Got ${list.length} models. Filtering to text-generation...`);

  const candidates = list.filter(
    (m) => !m.pipeline_tag || TEXT_GEN_TAGS.has(m.pipeline_tag)
  );
  console.log(`${candidates.length} candidates after filter.`);

  const results = [];
  let i = 0;
  for (const m of candidates) {
    i++;
    process.stdout.write(`[${i}/${candidates.length}] ${m.id}... `);
    const out = await processModel(m);
    if (out) {
      results.push(out);
      console.log(`OK (${out.quants.length} quants)`);
    } else {
      console.log('skip');
    }
  }

  results.sort((a, b) => b.downloads - a.downloads);
  writeFileSync(
    OUT,
    JSON.stringify(
      { generated_at: new Date().toISOString(), count: results.length, models: results },
      null,
      2
    )
  );
  console.log(`\nWrote ${results.length} models to ${OUT}`);
}

main().catch((e) => {
  console.error(e);
  process.exit(1);
});