#!/usr/bin/env node import path from "node:path"; import { fetchModelData, modelDisplayName, normalizeBaseUrl, probeServedModelId } from "./hf.js"; import { buildAgentArtifacts } from "./agents.js"; import { writeArtifactBundle } from "./fs.js"; import { buildRuntimeArtifacts, inferServedModelId, listSupportedRuntimes, resolveRuntime } from "./runtimes.js"; function usage() { console.log(`hf-launch Usage: hf-launch inspect hf-launch scaffold [options] Agents: pi | openclaw | opencode | codex | claude Options for scaffold: --runtime Force a runtime (llama.cpp, vllm, sglang, tgi, mlx-lm, docker-model-runner, responses-gateway, anthropic-gateway) --base-url Override the runtime base URL --served-model Override the model id the agent should send to the backend --probe-model Probe /v1/models and use the first returned model id --out-dir Output directory (default: ./out/) --hf-token Hugging Face token for gated/private models `); } function parseArgs(argv) { const positional = []; const options = {}; for (let index = 0; index < argv.length; index += 1) { const token = argv[index]; if (!token.startsWith("--")) { positional.push(token); continue; } const key = token.slice(2); const next = argv[index + 1]; if (!next || next.startsWith("--")) { options[key] = true; continue; } options[key] = next; index += 1; } return { positional, options }; } function extractFirstCommand(content) { return ( String(content ?? "") .split(/\r?\n/) .map((line) => line.trim()) .find((line) => line && !line.startsWith("#")) ?? null ); } function printInspect(modelData, runtimes) { const lines = [ `model: ${modelDisplayName(modelData)}`, `pipeline: ${modelData.pipeline_tag ?? "unknown"}`, `library: ${modelData.library_name ?? "unknown"}`, `tags: ${(modelData.tags ?? []).join(", ") || "none"}`, `gguf context: ${modelData.gguf?.context_length ?? "n/a"}`, `supported runtimes: ${runtimes.join(", ") || "none"}` ]; if (runtimes.includes("llama.cpp")) { const runtimeArtifacts = buildRuntimeArtifacts(modelData, "llama.cpp"); if (runtimeArtifacts.selectedGgufQuant) { lines.push(`llama.cpp quant: ${runtimeArtifacts.selectedGgufQuant}`); } if (runtimeArtifacts.selectedGgufPath) { lines.push(`llama.cpp file: ${runtimeArtifacts.selectedGgufPath}`); } const serverCommand = extractFirstCommand(runtimeArtifacts.cliCommands[0]?.content); if (serverCommand) { lines.push(`llama.cpp server: ${serverCommand}`); } } console.log(lines.join("\n")); } async function runInspect(modelRef, options) { const modelData = await fetchModelData(modelRef, { token: options["hf-token"] ?? process.env.HF_TOKEN }); const runtimes = listSupportedRuntimes(modelData); printInspect(modelData, runtimes); } async function runScaffold(agent, modelRef, options) { const outDir = path.resolve(options["out-dir"] ?? path.join("out", agent)); const modelData = await fetchModelData(modelRef, { token: options["hf-token"] ?? process.env.HF_TOKEN }); const runtime = resolveRuntime(modelData, options.runtime); const runtimeArtifacts = buildRuntimeArtifacts(modelData, runtime.key); let baseUrl = normalizeBaseUrl(options["base-url"] ?? runtime.defaultBaseUrl); let servedModelId = options["served-model"]; if (!servedModelId && options["probe-model"]) { if (!baseUrl) { throw new Error("--probe-model requires a base URL."); } servedModelId = await probeServedModelId(baseUrl); } servedModelId = servedModelId ?? inferServedModelId(runtime, modelData, null); const agentArtifacts = buildAgentArtifacts({ agent, runtime, baseUrl, servedModelId, modelData }); const warnings = [...runtimeArtifacts.warnings]; if (servedModelId.startsWith("__")) { warnings.push( `The served model id is still "${servedModelId}". Start the runtime and rerun with --probe-model (or pass --served-model) before copying the generated config into place.` ); } const manifest = { model: modelData.repoId, variant: modelData.variant, agent, runtime: runtime.key, apiFormat: runtime.apiFormat, baseUrl, servedModelId, targetFiles: agentArtifacts.files.map((artifact) => ({ name: artifact.name, targetPath: artifact.targetPath })), warnings }; const cliCommands = [...runtimeArtifacts.cliCommands, ...agentArtifacts.cliCommands]; const artifacts = [ { name: "runtime.md", targetPath: "n/a (generated reference)", content: runtimeArtifacts.notesMarkdown }, ...agentArtifacts.files, { name: "manifest.json", targetPath: "n/a (generated metadata)", content: `${JSON.stringify(manifest, null, 2)}\n` } ]; await writeArtifactBundle(outDir, artifacts); console.log(`wrote ${artifacts.length} files to ${outDir}`); console.log(`agent: ${agent}`); console.log(`model: ${modelData.repoId}${modelData.variant ? `:${modelData.variant}` : ""}`); console.log(`runtime: ${runtime.key}`); console.log(`api format: ${runtime.apiFormat}`); console.log(`base url: ${baseUrl ?? "not set"}`); console.log(`served model id: ${servedModelId}`); if (cliCommands.length > 0) { console.log("\ncommands:"); for (const command of cliCommands) { console.log(`\n# ${command.title}`); console.log(command.content); } } if (warnings.length > 0) { console.log("\nwarnings:"); for (const warning of warnings) { console.log(`- ${warning}`); } } } async function main() { const { positional, options } = parseArgs(process.argv.slice(2)); const [command, arg1, arg2] = positional; if (!command || command === "--help" || command === "-h") { usage(); return; } if (command === "inspect") { if (!arg1) { usage(); process.exitCode = 1; return; } await runInspect(arg1, options); return; } if (command === "scaffold") { if (!arg1 || !arg2) { usage(); process.exitCode = 1; return; } await runScaffold(arg1, arg2, options); return; } usage(); process.exitCode = 1; } main().catch((error) => { console.error(error.message); process.exitCode = 1; });