File size: 19,731 Bytes
5374a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
from pydantic import BaseModel, Field
from typing import Optional, Union, List
# import torch
from ..core.base_config import BaseConfig
#### LLM Configs
class LLMConfig(BaseConfig):
llm_type: str
model: str
output_response: bool = Field(default=False, description="Whether to output LLM response.")
class OpenAILLMConfig(LLMConfig):
llm_type: str = "OpenAILLM"
openai_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenAI requests")
# generation parameters
temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens. This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.")
max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")
# tools
tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")
# reasoning parameters
reasoning_effort: Optional[str] = Field(default=None, description="Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.")
# token probabilities
logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")
# predicted outputs
prediction: Optional[dict] = Field(default=None, description="Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content.")
# output format
modalities: Optional[List] = Field(default=None, description="Output types that you would like the model to generate for this request. Most models are capable of generating text, which is the default: [\"text\"]")
response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")
# ==== Azure OpenAI Configuration ====
class AzureOpenAIConfig(LLMConfig):
llm_type: str = "AzureOpenAILLM"
azure_endpoint: str = Field(..., description="Azure OpenAI service endpoint URL")
azure_key: str = Field(..., description="Azure OpenAI API key for authentication")
api_version: Optional[str] = Field(default="2024-12-01-preview", description="Azure OpenAI API version to use")
# 'model' field inherited from LLMConfig will be used to specify the deployment name
# generation parameters (temperature, max_tokens, etc.) inherited from OpenAILLMConfig
class LiteLLMConfig(LLMConfig):
llm_type: str = "LiteLLM"
api_base: Optional[str] = Field(default=None, description="Base URL for the LLM API (e.g., http://localhost:11434/v1 for Ollama)")
is_local: Optional[bool] = Field(default=False, description="Whether the model is running locally (e.g., Ollama)")
api_key: Optional[str] = Field(default=None, description="the API key used to authenticate generic OpenAI-compatible requests (e.g., LM Studio, FastChat, LocalAI)")
# LLM keys
openai_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenAI requests")
anthropic_key: Optional[str] = Field(default=None, description="the API key used to authenticate Anthropic requests")
deepseek_key: Optional[str] = Field(default=None, description="the API key used to authenticate Deepseek requests")
gemini_key: Optional[str] = Field(default=None, description="the API key used to authenticate Gemini requests")
meta_llama_key: Optional[str] = Field(default=None, description="the API key used to authenticate Meta Llama requests")
openrouter_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenRouter requests")
openrouter_base: Optional[str] = Field(default="https://openrouter.ai/api/v1", description="the base URL used to authenticate OpenRouter requests")
perplexity_key: Optional[str] = Field(default=None, description="the API key used to authenticate Perplexity requests")
groq_key: Optional[str] = Field(default=None, description="the API key used to authenticate Groq requests")
# Azure OpenAI keys
azure_endpoint: Optional[str] = Field(default=None, description="Azure OpenAI service endpoint URL")
azure_key: Optional[str] = Field(default=None, description="Azure OpenAI API key for authentication")
api_version: Optional[str] = Field(default=None, description="Azure OpenAI API version to use")
# generation parameters
temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens")
max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")
# tools
tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")
# token probabilities
logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")
# output format
response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")
def __str__(self):
return self.model
class SiliconFlowConfig(LLMConfig):
# LLM keys
llm_type: str = "SiliconFlowLLM"
siliconflow_key: Optional[str] = Field(default=None, description="the API key used to authenticate SiliconFlow requests")
# generation parameters
temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens")
max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")
# tools
tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")
# token probabilities
logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")
# output format
response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")
def __str__(self):
return self.model
# def get_default_device():
# return "cuda" if torch.cuda.is_available() else "cpu"
class OpenRouterConfig(LLMConfig):
llm_type: str = "OpenRouterLLM"
# LLM keys
openrouter_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenRouter requests")
openrouter_base: Optional[str] = Field(default="https://openrouter.ai/api/v1", description="the base URL used to authenticate OpenRouter requests")
openrouter_model_base: Optional[str] = Field(default="https://openrouter.ai/api/v1/models", description="the model url to access model details")
# generation parameters
temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
top_p: Optional[float] = Field(default=None, description="This setting limits the model's choices to a percentage of likely tokens: only the top tokens whose probabilities add up to P. A lower value makes the model's responses more predictable, while the default setting allows for a full range of token choices.")
top_k: Optional[int] = Field(default=None, description="This limits the model's choice of tokens at each step, making it choose from a smaller set. A value of 1 means the model will always pick the most likely next token, leading to predictable results.")
frequency_penalty: Optional[float] = Field(default=None, description="Controls repetition of tokens based on frequency in input. Range: -2.0 to 2.0. Higher values reduce repetition of frequent tokens.")
presence_penalty: Optional[float] = Field(default=None, description="Adjusts repetition of specific tokens from input. Range: -2.0 to 2.0. Higher values reduce repetition.")
repetition_penalty: Optional[float] = Field(default=None, description="Reduces repetition of tokens from input. Range: 0.0 to 2.0. Higher values make repetition less likely.")
min_p: Optional[float] = Field(default=None, description="Minimum probability for a token relative to most likely token. Range: 0.0 to 1.0.")
top_a: Optional[float] = Field(default=None, description="Consider only tokens with 'sufficiently high' probabilities based on most likely token. Range: 0.0 to 1.0.")
seed: Optional[int] = Field(default=None, description="For deterministic sampling. Repeated requests with same seed and parameters should return same result.")
max_tokens: Optional[int] = Field(default=None, description="Upper limit for number of tokens model can generate. Must be 1 or above.")
logit_bias: Optional[dict] = Field(default=None, description="Map of token IDs to bias values (-100 to 100) to adjust token selection probabilities.")
logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of output tokens.")
top_logprobs: Optional[int] = Field(default=None, description="Number of most likely tokens to return at each position (0-20) with log probabilities.")
response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description="Forces model to produce specific output format (e.g. JSON mode).")
structured_outputs: Optional[bool] = Field(default=None, description="Whether model can return structured outputs using response_format json_schema.")
stop: Optional[List[str]] = Field(default=None, description="Stop generation if model encounters any token in this array.")
tools: Optional[List] = Field(default=None, description="Tool calling parameter following OpenAI's tool calling request shape.")
tool_choice: Optional[Union[str, dict]] = Field(default=None, description="Controls which tool is called by model. Can be 'none', 'auto', 'required', or specific tool configuration.")
stream: Optional[bool] = Field(default=None, description="If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
def __str__(self):
return self.model
class AliyunLLMConfig(LLMConfig):
llm_type: str = "AliyunLLM"
aliyun_api_key: Optional[str] = Field(default=None, description="The API key used to authenticate Aliyun requests")
aliyun_access_key_id: Optional[str] = Field(default=None, description="The Access Key ID for Aliyun authentication")
aliyun_access_key_secret: Optional[str] = Field(default=None, description="The Access Key Secret for Aliyun authentication")
# generation parameters
temperature: Optional[float] = Field(default=None, description="The temperature used to control randomness in generation. Higher values increase diversity.")
top_p: Optional[float] = Field(default=None, description="Nucleus sampling parameter. Only sample from tokens with cumulative probability greater than top_p.")
max_tokens: Optional[int] = Field(default=None, description="Maximum number of tokens to generate in the response.")
top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter. Only sample from the top k tokens at each step.")
repetition_penalty: Optional[float] = Field(default=None, description="Penalty for repeated tokens. Higher values discourage repetition.")
stream: Optional[bool] = Field(default=None, description="If set to true, enables streaming response where partial results are sent as they become available.")
timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (defaults to 600 seconds).")
# tools
tools: Optional[List] = Field(default=None, description="A list of tools or functions the model may call. Aliyun supports function calling for specific models.")
tool_choice: Optional[str] = Field(default=None, description="Controls whether the model should call a tool. Options include 'none' (no tool call), 'auto' (model decides), or a specific tool name.")
# model-specific parameters
model_name: Optional[str] = Field(default=None, description="The name of the Aliyun model to use, e.g., 'qwen-max', 'qwen-turbo'.")
enable_search: Optional[bool] = Field(default=None, description="Whether to enable web search augmentation for the model, if supported.")
# output format
response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description="Specifies the format of the model output, e.g., JSON schema for structured responses.")
output_modalities: Optional[List] = Field(default=None, description="Output types the model should generate, e.g., ['text', 'image'] for multimodal models.")
# token probabilities
logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of output tokens. Supported by some Aliyun models.")
top_logprobs: Optional[int] = Field(default=None, description="Number of most likely tokens to return with log probabilities at each position. Requires logprobs to be true.")
def __str__(self):
return self.model
|