Spaces:

aukaru
/

344

Paused

344

File size: 11,980 Bytes

5c5b371

// Don't import any other project files here as this is one of the first modules
// loaded and it will cause circular imports.

import type { Request } from "express";

/**
 * The service that a model is hosted on. Distinct from `APIFormat` because some
 * services have interoperable APIs (eg Anthropic/AWS/GCP, OpenAI/Azure).
 */
export type LLMService =
  | "openai"
  | "anthropic"
  | "google-ai"
  | "mistral-ai"
  | "aws"
  | "gcp"
  | "azure"
  | "deepseek"
  | "xai"
  | "cohere"
  | "qwen";

export type OpenAIModelFamily =
  | "turbo"
  | "gpt4"
  | "gpt4-32k"
  | "gpt4-turbo"
  | "gpt4o"
  | "gpt41"
  | "gpt41-mini"
  | "gpt41-nano"
  | "gpt45"
  | "o1"
  | "o1-mini"
  | "o1-pro"
  | "o3-pro"
  | "o3-mini"
  | "o3"
  | "o4-mini"
  | "codex-mini"
  | "dall-e"
  | "gpt-image";
export type AnthropicModelFamily = "claude" | "claude-opus";
export type GoogleAIModelFamily =
  | "gemini-flash"
  | "gemini-pro"
  | "gemini-ultra";
export type MistralAIModelFamily =
  // mistral changes their model classes frequently so these no longer
  // correspond to specific models. consider them rough pricing tiers.
  "mistral-tiny" | "mistral-small" | "mistral-medium" | "mistral-large";
export type AwsBedrockModelFamily = `aws-${
  | AnthropicModelFamily
  | MistralAIModelFamily}`;
export type GcpModelFamily = "gcp-claude" | "gcp-claude-opus";
export type AzureOpenAIModelFamily = `azure-${OpenAIModelFamily}`;
export type DeepseekModelFamily = "deepseek";
export type XaiModelFamily = "xai";
export type CohereModelFamily = "cohere";
export type QwenModelFamily = "qwen";

export type ModelFamily =
  | OpenAIModelFamily
  | AnthropicModelFamily
  | GoogleAIModelFamily
  | MistralAIModelFamily
  | AwsBedrockModelFamily
  | GcpModelFamily
  | AzureOpenAIModelFamily
  | DeepseekModelFamily
  | XaiModelFamily
  | CohereModelFamily
  | QwenModelFamily;

export const MODEL_FAMILIES = (<A extends readonly ModelFamily[]>(
  arr: A & ([ModelFamily] extends [A[number]] ? unknown : never)
) => arr)([
  "qwen",
  "cohere",
  "xai",
  "deepseek",
  "turbo",
  "gpt4",
  "gpt4-32k",
  "gpt4-turbo",
  "gpt4o",
  "gpt45",
  "gpt41",
  "gpt41-mini",
  "gpt41-nano",
  "o1",
  "o1-mini",
  "o1-pro",
  "o3-pro",
  "o3-mini",
  "o3",
  "o4-mini",
  "codex-mini",
  "dall-e",
  "gpt-image",
  "claude",
  "claude-opus",
  "gemini-flash",
  "gemini-pro",
  "gemini-ultra",
  "mistral-tiny",
  "mistral-small",
  "mistral-medium",
  "mistral-large",
  "aws-claude",
  "aws-claude-opus",
  "aws-mistral-tiny",
  "aws-mistral-small",
  "aws-mistral-medium",
  "aws-mistral-large",
  "gcp-claude",
  "gcp-claude-opus",
  "azure-turbo",
  "azure-gpt4",
  "azure-gpt4-32k",
  "azure-gpt4-turbo",
  "azure-gpt4o",
  "azure-gpt45",
  "azure-gpt41",
  "azure-gpt41-mini",
  "azure-gpt41-nano",
  "azure-dall-e",
  "azure-o1",
  "azure-o1-mini",
  "azure-o1-pro",
  "azure-o3-pro",
  "azure-o3-mini",
  "azure-o3",
  "azure-o4-mini",
  "azure-codex-mini",
  "azure-gpt-image",
] as const);

export const LLM_SERVICES = (<A extends readonly LLMService[]>(
  arr: A & ([LLMService] extends [A[number]] ? unknown : never)
) => arr)([
  "openai",
  "anthropic",
  "google-ai",
  "mistral-ai",
  "aws",
  "gcp",
  "azure",
  "deepseek",
  "xai",
  "cohere",
  "qwen"
] as const);

export const MODEL_FAMILY_SERVICE: {
  [f in ModelFamily]: LLMService;
} = {
  qwen: "qwen",
  cohere: "cohere",
  xai: "xai",
  deepseek: "deepseek",
  turbo: "openai",
  gpt4: "openai",
  "gpt4-turbo": "openai",
  "gpt4-32k": "openai",
  gpt4o: "openai",
  gpt45: "openai",
  gpt41: "openai",
  "gpt41-mini": "openai",
  "gpt41-nano": "openai",
  "o1": "openai",
  "o1-mini": "openai",
  "o1-pro": "openai",
  "o3-pro": "openai",
  "o3-mini": "openai",
  "o3": "openai",
  "o4-mini": "openai",
  "codex-mini": "openai",
  "dall-e": "openai",
  "gpt-image": "openai",
  claude: "anthropic",
  "claude-opus": "anthropic",
  "aws-claude": "aws",
  "aws-claude-opus": "aws",
  "aws-mistral-tiny": "aws",
  "aws-mistral-small": "aws",
  "aws-mistral-medium": "aws",
  "aws-mistral-large": "aws",
  "gcp-claude": "gcp",
  "gcp-claude-opus": "gcp",
  "azure-turbo": "azure",
  "azure-gpt4": "azure",
  "azure-gpt4-32k": "azure",
  "azure-gpt4-turbo": "azure",
  "azure-gpt4o": "azure",
  "azure-gpt45": "azure",
  "azure-gpt41": "azure",
  "azure-gpt41-mini": "azure",
  "azure-gpt41-nano": "azure",
  "azure-dall-e": "azure",
  "azure-o1": "azure",
  "azure-o1-mini": "azure",
  "azure-o1-pro": "azure",
  "azure-o3-pro": "azure",
  "azure-o3-mini": "azure",
  "azure-o3": "azure",
  "azure-o4-mini": "azure",
  "azure-codex-mini": "azure",
  "azure-gpt-image": "azure",
  "gemini-flash": "google-ai",
  "gemini-pro": "google-ai",
  "gemini-ultra": "google-ai",
  "mistral-tiny": "mistral-ai",
  "mistral-small": "mistral-ai",
  "mistral-medium": "mistral-ai",
  "mistral-large": "mistral-ai",
};

export const IMAGE_GEN_MODELS: ModelFamily[] = ["dall-e", "azure-dall-e", "gpt-image", "azure-gpt-image"];

export const OPENAI_MODEL_FAMILY_MAP: { [regex: string]: OpenAIModelFamily } = {
  "^gpt-image(-\\d+)?(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt-image",
  "^gpt-4\\.5(-preview)?(-\\d{4}-\\d{2}-\\d{2})?$": "gpt45",
  "^gpt-4\\.1(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41",
  "^gpt-4\\.1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-mini",
  "^gpt-4\\.1-nano(-\\d{4}-\\d{2}-\\d{2})?$": "gpt41-nano",
  "^gpt-4o(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4o",
  "^chatgpt-4o": "gpt4o",
  "^gpt-4o-mini(-\\d{4}-\\d{2}-\\d{2})?$": "turbo", // closest match
  "^gpt-4-turbo(-\\d{4}-\\d{2}-\\d{2})?$": "gpt4-turbo",
  "^gpt-4-turbo(-preview)?$": "gpt4-turbo",
  "^gpt-4-(0125|1106)(-preview)?$": "gpt4-turbo",
  "^gpt-4(-\\d{4})?-vision(-preview)?$": "gpt4-turbo",
  "^gpt-4-32k-\\d{4}$": "gpt4-32k",
  "^gpt-4-32k$": "gpt4-32k",
  "^gpt-4-\\d{4}$": "gpt4",
  "^gpt-4$": "gpt4",
  "^gpt-3.5-turbo": "turbo",
  "^text-embedding-ada-002$": "turbo",
  "^dall-e-\\d{1}$": "dall-e",
  "^o1-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o1-mini",
  "^o1-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o1-pro",
  "^o3-pro(-\\d{4}-\\d{2}-\\d{2})?$": "o3-pro",
  "^o1(-\\d{4}-\\d{2}-\\d{2})?$": "o1",
  "^o3-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o3-mini",
  "^o3(-\\d{4}-\\d{2}-\\d{2})?$": "o3",
  "^o4-mini(-\\d{4}-\\d{2}-\\d{2})?$": "o4-mini",
  "^codex-mini(-latest|-\d{4}-\d{2}-\d{2})?$": "codex-mini",
};

export function getOpenAIModelFamily(
  model: string,
  defaultFamily: OpenAIModelFamily = "gpt4"
): OpenAIModelFamily {
  for (const [regex, family] of Object.entries(OPENAI_MODEL_FAMILY_MAP)) {
    if (model.match(regex)) return family;
  }
  return defaultFamily;
}

export function getClaudeModelFamily(model: string): AnthropicModelFamily {
  if (model.includes("opus")) return "claude-opus";
  return "claude";
}

export function getGoogleAIModelFamily(model: string): GoogleAIModelFamily {
  return model.includes("ultra")
    ? "gemini-ultra"
    : model.includes("flash")
    ? "gemini-flash"
    : "gemini-pro";
}

export function getMistralAIModelFamily(model: string): MistralAIModelFamily {
  const prunedModel = model.replace(/-(latest|\d{4}(-\d{2}){0,2})$/, "");
  
  // Premier models (higher tier)
  switch (prunedModel) {
    // Existing direct matches
    case "mistral-tiny":
    case "mistral-small":
    case "mistral-medium":
    case "mistral-large":
      return prunedModel as MistralAIModelFamily;
      
    // Premier models - Large tier
    case "mistral-large":
    case "pixtral-large":
      return "mistral-large";
      
    // Premier models - Medium tier
    case "mistral-medium-2505":
    case "magistral-medium-latest":
      return "mistral-medium";
      
    // Premier models - Small tier
    case "codestral":
    case "ministral-8b":
    case "mistral-embed":
    case "pixtral-12b-2409":
    case "magistral-small-latest":
      return "mistral-small";
    
    // Premier models - Tiny tier
    case "ministral-3b":
      return "mistral-tiny";
      
    // Free models - Tiny tier
    case "open-mistral-7b":
      return "mistral-tiny";
      
    // Free models - Small tier
    case "mistral-small":
    case "pixtral":
    case "pixtral-12b":
    case "open-mistral-nemo":
    case "open-mixtral-8x7b":
    case "open-codestral-mamba":
    case "mathstral":
      return "mistral-small";
    
    // Free models - Medium tier
    case "open-mixtral-8x22b":
      return "mistral-medium";
      
    // Default to small if unknown
    default:
      return "mistral-small";
  }
}

export function getAwsBedrockModelFamily(model: string): AwsBedrockModelFamily {
  // remove vendor and version from AWS model ids
  // 'anthropic.claude-3-5-sonnet-20240620-v1:0' -> 'claude-3-5-sonnet-20240620'
  const deAwsified = model.replace(/^(\w+)\.(.+?)(-v\d+)?(:\d+)*$/, "$2");

  if (["claude", "anthropic"].some((x) => model.includes(x))) {
    return `aws-${getClaudeModelFamily(deAwsified)}`;
  } else if (model.includes("tral")) {
    return `aws-${getMistralAIModelFamily(deAwsified)}`;
  }
  return `aws-claude`;
}

export function getGcpModelFamily(model: string): GcpModelFamily {
  if (model.includes("opus")) return "gcp-claude-opus";
  return "gcp-claude";
}

export function getAzureOpenAIModelFamily(
  model: string,
  defaultFamily: AzureOpenAIModelFamily = "azure-gpt4"
): AzureOpenAIModelFamily {
  // Azure model names omit periods.  addAzureKey also prepends "azure-" to the
  // model name to route the request the correct keyprovider, so we need to
  // remove that as well.
  const modified = model
    .replace("gpt-35-turbo", "gpt-3.5-turbo")
    .replace("azure-", "");
  for (const [regex, family] of Object.entries(OPENAI_MODEL_FAMILY_MAP)) {
    if (modified.match(regex)) {
      return `azure-${family}` as AzureOpenAIModelFamily;
    }
  }
  return defaultFamily;
}

export function assertIsKnownModelFamily(
  modelFamily: string
): asserts modelFamily is ModelFamily {
  if (!MODEL_FAMILIES.includes(modelFamily as ModelFamily)) {
    throw new Error(`Unknown model family: ${modelFamily}`);
  }
}

export function getModelFamilyForRequest(req: Request): ModelFamily {
  if (req.modelFamily) return req.modelFamily;
  // There is a single request queue, but it is partitioned by model family.
  // Model families are typically separated on cost/rate limit boundaries so
  // they should be treated as separate queues.
  const model = req.body.model ?? "gpt-3.5-turbo";
  let modelFamily: ModelFamily;

  // Weird special case for AWS/GCP/Azure because they serve models with
  // different API formats, so the outbound API alone is not sufficient to
  // determine the partition.
  if (req.service === "aws") {
    modelFamily = getAwsBedrockModelFamily(model);
  } else if (req.service === "gcp") {
    modelFamily = getGcpModelFamily(model);
  } else if (req.service === "azure") {
    modelFamily = getAzureOpenAIModelFamily(model);
  } else if (req.service === "qwen") {
    modelFamily = "qwen";
  } else {
    switch (req.outboundApi) {
      case "anthropic-chat":
      case "anthropic-text":
        modelFamily = getClaudeModelFamily(model);
        break;
      case "openai":
      case "openai-text":
      case "openai-image":
        if (req.service === "deepseek") {
          modelFamily = "deepseek";
        } else {
          modelFamily = getOpenAIModelFamily(model);
        }
        break;
        if (req.service === "xai") {
          modelFamily = "xai";
        } else {
          modelFamily = getOpenAIModelFamily(model);
        }
        break;
      case "google-ai":
        modelFamily = getGoogleAIModelFamily(model);
        break;
      case "mistral-ai":
      case "mistral-text":
        modelFamily = getMistralAIModelFamily(model);
        break;
      case "openai-responses":
        modelFamily = getOpenAIModelFamily(model);
        break;
      default:
        assertNever(req.outboundApi);
    }
  }

  return (req.modelFamily = modelFamily);
}

function assertNever(x: never): never {
  throw new Error(`Called assertNever with argument ${x}.`);
}