carbon-tokenization

Running

File size: 2,301 Bytes

561e6f0
 
f6678ab
561e6f0
 
6b6afea
 
 
 
 
3afbbdf
 
 
 
 
 
 
 
 
 
 
 
6b6afea
 
 
 
 
 
 
f6678ab
6b6afea
 
3afbbdf
6b6afea
 
f6678ab
561e6f0
 
f6678ab
 
 
 
 
 
 
 
 
 
 
561e6f0

import { editorTools } from "./tools.js";
import { SYSTEM_PROMPT, buildMessages } from "./system-prompt.js";
import { streamChatResponse } from "./stream-handler.js";
import type { Request, Response } from "express";

/**
 * Models exposed in the UI picker. All ids must be served by Hugging
 * Face Inference Providers (`https://router.huggingface.co/v1`) and
 * support function/tool calling - the agent loop won't work without it.
 *
 * Note about provider suffixes (`:provider`):
 * HF Router defaults to the `:fastest` provider for a given model.
 * That's usually fine, but a few providers don't fit the editor's
 * workload:
 *  - Groq enforces strict tool-call validation and tends to reject
 *    our 18-tool registry with `Failed to call a function`.
 *  - Nscale + a few others reject the `tools` parameter outright.
 *  - Fireworks has deprecated several Llama 3.x checkpoints.
 * We pin `Llama-3.3-70B` to Together, which serves the model with
 * full tool-calling support. Unsuffixed ids use the default :fastest
 * policy.
 *
 * Discover more conversational models here:
 *   https://huggingface.co/models?inference_provider=all&other=conversational
 *
 * `context` is the advertised context window; `cost` is a rough
 * relative price tag ($, $$, $$$) - inference providers charge their
 * own rates, see the docs for the source of truth.
 */
export const AVAILABLE_MODELS = [
  { id: "openai/gpt-oss-120b", label: "GPT-OSS 120B", context: "131K", cost: "$$" },
  { id: "openai/gpt-oss-20b", label: "GPT-OSS 20B", context: "131K", cost: "$" },
  { id: "meta-llama/Llama-3.3-70B-Instruct:together", label: "Llama 3.3 70B", context: "128K", cost: "$" },
  { id: "Qwen/Qwen3-Coder-480B-A35B-Instruct", label: "Qwen3 Coder 480B", context: "262K", cost: "$$" },
  { id: "deepseek-ai/DeepSeek-V3.1", label: "DeepSeek V3.1", context: "128K", cost: "$$" },
];

export async function handleChat(req: Request, res: Response) {
  const { context } = req.body;
  const contextBlock = buildMessages(context?.document, context?.selection, context?.frontmatter);
  const systemPrompt = contextBlock
    ? `${SYSTEM_PROMPT}\n\n## Current context\n\n${contextBlock}`
    : SYSTEM_PROMPT;

  return streamChatResponse(req, res, {
    systemPrompt,
    tools: editorTools,
    logPrefix: "chat",
  });
}