File size: 19,731 Bytes
5374a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211

from pydantic import BaseModel, Field
from typing import Optional, Union, List

# import torch 

from ..core.base_config import BaseConfig


#### LLM Configs
class LLMConfig(BaseConfig):

    llm_type: str
    model: str 
    output_response: bool = Field(default=False, description="Whether to output LLM response.")


class OpenAILLMConfig(LLMConfig):

    llm_type: str = "OpenAILLM"
    openai_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenAI requests")

    # generation parameters
    temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
    max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens. This value is now deprecated in favor of max_completion_tokens, and is not compatible with o1 series models.")
    max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
    top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
    n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
    stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
    stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
    timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")

    # tools 
    tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
    tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
    parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")
    
    # reasoning parameters 
    reasoning_effort: Optional[str] = Field(default=None, description="Constrains effort on reasoning for reasoning models. Currently supported values are low, medium, and high. Reducing reasoning effort can result in faster responses and fewer tokens used on reasoning in a response.")

    # token probabilities
    logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
    top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")

    # predicted outputs 
    prediction: Optional[dict] = Field(default=None, description="Configuration for a Predicted Output, which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content.")

    # output format
    modalities: Optional[List] = Field(default=None, description="Output types that you would like the model to generate for this request. Most models are capable of generating text, which is the default: [\"text\"]")
    response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")

# ==== Azure OpenAI Configuration ====
class AzureOpenAIConfig(LLMConfig):
    llm_type: str = "AzureOpenAILLM"
    azure_endpoint: str = Field(..., description="Azure OpenAI service endpoint URL")
    azure_key: str = Field(..., description="Azure OpenAI API key for authentication")
    api_version: Optional[str] = Field(default="2024-12-01-preview", description="Azure OpenAI API version to use")
    # 'model' field inherited from LLMConfig will be used to specify the deployment name
    # generation parameters (temperature, max_tokens, etc.) inherited from OpenAILLMConfig


class LiteLLMConfig(LLMConfig):

    llm_type: str = "LiteLLM"
    api_base: Optional[str] = Field(default=None, description="Base URL for the LLM API (e.g., http://localhost:11434/v1 for Ollama)") 
    is_local: Optional[bool] = Field(default=False, description="Whether the model is running locally (e.g., Ollama)")
    api_key: Optional[str] = Field(default=None, description="the API key used to authenticate generic OpenAI-compatible requests (e.g., LM Studio, FastChat, LocalAI)")

    # LLM keys
    openai_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenAI requests")
    anthropic_key: Optional[str] = Field(default=None, description="the API key used to authenticate Anthropic requests")
    deepseek_key: Optional[str] = Field(default=None, description="the API key used to authenticate Deepseek requests")
    gemini_key: Optional[str] = Field(default=None, description="the API key used to authenticate Gemini requests")
    meta_llama_key: Optional[str] = Field(default=None, description="the API key used to authenticate Meta Llama requests")
    openrouter_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenRouter requests")
    openrouter_base: Optional[str] = Field(default="https://openrouter.ai/api/v1", description="the base URL used to authenticate OpenRouter requests")
    perplexity_key: Optional[str] = Field(default=None, description="the API key used to authenticate Perplexity requests")
    groq_key: Optional[str] = Field(default=None, description="the API key used to authenticate Groq requests")
    
    # Azure OpenAI keys
    azure_endpoint: Optional[str] = Field(default=None, description="Azure OpenAI service endpoint URL")
    azure_key: Optional[str] = Field(default=None, description="Azure OpenAI API key for authentication")
    api_version: Optional[str] = Field(default=None, description="Azure OpenAI API version to use")

    # generation parameters 
    temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
    max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens")
    max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
    top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
    n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
    stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
    stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
    timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")

    # tools
    tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
    tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
    parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")

    # token probabilities
    logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
    top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")

    # output format
    response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")

    def __str__(self):
        return self.model


class SiliconFlowConfig(LLMConfig):

    # LLM keys
    llm_type: str = "SiliconFlowLLM"
    siliconflow_key: Optional[str] = Field(default=None, description="the API key used to authenticate SiliconFlow requests") 

    # generation parameters 
    temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
    max_tokens : Optional[int] = Field(default=None, description="maximum number of generated tokens")
    max_completion_tokens: Optional[int] = Field(default=None, description="An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens. Commonly used in OpenAI's o1 series models.")
    top_p: Optional[float] = Field(default=None, description="Only sample from tokens with cumulative probability greater than top_p when generating text.")
    n: Optional[int] = Field(default=None, description="How many chat completion choices to generate for each input message.")
    stream: Optional[bool] = Field(default=None, description=" If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
    stream_options: Optional[dict] = Field(default=None, description="Options for streaming response. Only set this when you set stream: true")
    timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (Defaults to 600 seconds)")

    # tools
    tools: Optional[List] = Field(default=None, description="A list of tools the model may call. Currently, only functions are supported as a tool. Use this to provide a list of functions the model may generate JSON inputs for.")
    tool_choice: Optional[str] = Field(default=None, description="Controls which (if any) function is called by the model. none means the model will not call a function and instead generates a message. auto means the model can pick between generating a message or calling a function. Specifying a particular function via {\"type\": \"function\", \"function\": {\"name\": \"my_function\"}} forces the model to call that function.")
    parallel_tool_calls: Optional[bool] = Field(default=None, description="Whether to enable parallel function calling during tool use. OpenAI default is true.")

    # token probabilities
    logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the content of message.")
    top_logprobs: Optional[int] = Field(default=None, description="An integer between 0 and 5 specifying the number of most likely tokens to return at each token position, each with an associated log probability. logprobs must be set to true if this parameter is used.")

    # output format
    response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description=" An object specifying the format that the model must output.")

    def __str__(self):
        return self.model


# def get_default_device():
#     return "cuda" if torch.cuda.is_available() else "cpu"

class OpenRouterConfig(LLMConfig):
    llm_type: str = "OpenRouterLLM"
    
    # LLM keys
    openrouter_key: Optional[str] = Field(default=None, description="the API key used to authenticate OpenRouter requests")
    openrouter_base: Optional[str] = Field(default="https://openrouter.ai/api/v1", description="the base URL used to authenticate OpenRouter requests")
    openrouter_model_base: Optional[str] = Field(default="https://openrouter.ai/api/v1/models", description="the model url to access model details")
    # generation parameters 
    temperature: Optional[float] = Field(default=None, description="the temperature used to scaling logits")
    top_p: Optional[float] = Field(default=None, description="This setting limits the model's choices to a percentage of likely tokens: only the top tokens whose probabilities add up to P. A lower value makes the model's responses more predictable, while the default setting allows for a full range of token choices.")
    top_k: Optional[int] = Field(default=None, description="This limits the model's choice of tokens at each step, making it choose from a smaller set. A value of 1 means the model will always pick the most likely next token, leading to predictable results.")
    frequency_penalty: Optional[float] = Field(default=None, description="Controls repetition of tokens based on frequency in input. Range: -2.0 to 2.0. Higher values reduce repetition of frequent tokens.")
    presence_penalty: Optional[float] = Field(default=None, description="Adjusts repetition of specific tokens from input. Range: -2.0 to 2.0. Higher values reduce repetition.")
    repetition_penalty: Optional[float] = Field(default=None, description="Reduces repetition of tokens from input. Range: 0.0 to 2.0. Higher values make repetition less likely.")
    min_p: Optional[float] = Field(default=None, description="Minimum probability for a token relative to most likely token. Range: 0.0 to 1.0.")
    top_a: Optional[float] = Field(default=None, description="Consider only tokens with 'sufficiently high' probabilities based on most likely token. Range: 0.0 to 1.0.")
    seed: Optional[int] = Field(default=None, description="For deterministic sampling. Repeated requests with same seed and parameters should return same result.")
    max_tokens: Optional[int] = Field(default=None, description="Upper limit for number of tokens model can generate. Must be 1 or above.")
    logit_bias: Optional[dict] = Field(default=None, description="Map of token IDs to bias values (-100 to 100) to adjust token selection probabilities.")
    logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of output tokens.")
    top_logprobs: Optional[int] = Field(default=None, description="Number of most likely tokens to return at each position (0-20) with log probabilities.")
    response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description="Forces model to produce specific output format (e.g. JSON mode).")
    structured_outputs: Optional[bool] = Field(default=None, description="Whether model can return structured outputs using response_format json_schema.")
    stop: Optional[List[str]] = Field(default=None, description="Stop generation if model encounters any token in this array.")
    tools: Optional[List] = Field(default=None, description="Tool calling parameter following OpenAI's tool calling request shape.")
    tool_choice: Optional[Union[str, dict]] = Field(default=None, description="Controls which tool is called by model. Can be 'none', 'auto', 'required', or specific tool configuration.")

    stream: Optional[bool] = Field(default=None, description="If set to true, it sends partial message deltas. Tokens will be sent as they become available, with the stream terminated by a [DONE] message.")
    def __str__(self):
        return self.model


class AliyunLLMConfig(LLMConfig):
    llm_type: str = "AliyunLLM"
    aliyun_api_key: Optional[str] = Field(default=None, description="The API key used to authenticate Aliyun requests")
    aliyun_access_key_id: Optional[str] = Field(default=None, description="The Access Key ID for Aliyun authentication")
    aliyun_access_key_secret: Optional[str] = Field(default=None, description="The Access Key Secret for Aliyun authentication")
    
    # generation parameters
    temperature: Optional[float] = Field(default=None, description="The temperature used to control randomness in generation. Higher values increase diversity.")
    top_p: Optional[float] = Field(default=None, description="Nucleus sampling parameter. Only sample from tokens with cumulative probability greater than top_p.")
    max_tokens: Optional[int] = Field(default=None, description="Maximum number of tokens to generate in the response.")
    top_k: Optional[int] = Field(default=None, description="Top-k sampling parameter. Only sample from the top k tokens at each step.")
    repetition_penalty: Optional[float] = Field(default=None, description="Penalty for repeated tokens. Higher values discourage repetition.")
    stream: Optional[bool] = Field(default=None, description="If set to true, enables streaming response where partial results are sent as they become available.")
    timeout: Optional[Union[float, int]] = Field(default=None, description="Timeout in seconds for completion requests (defaults to 600 seconds).")

    # tools
    tools: Optional[List] = Field(default=None, description="A list of tools or functions the model may call. Aliyun supports function calling for specific models.")
    tool_choice: Optional[str] = Field(default=None, description="Controls whether the model should call a tool. Options include 'none' (no tool call), 'auto' (model decides), or a specific tool name.")
    
    # model-specific parameters
    model_name: Optional[str] = Field(default=None, description="The name of the Aliyun model to use, e.g., 'qwen-max', 'qwen-turbo'.")
    enable_search: Optional[bool] = Field(default=None, description="Whether to enable web search augmentation for the model, if supported.")
    
    # output format
    response_format: Optional[Union[BaseModel, dict]] = Field(default=None, description="Specifies the format of the model output, e.g., JSON schema for structured responses.")
    output_modalities: Optional[List] = Field(default=None, description="Output types the model should generate, e.g., ['text', 'image'] for multimodal models.")

    # token probabilities
    logprobs: Optional[bool] = Field(default=None, description="Whether to return log probabilities of output tokens. Supported by some Aliyun models.")
    top_logprobs: Optional[int] = Field(default=None, description="Number of most likely tokens to return with log probabilities at each position. Requires logprobs to be true.")
    
    def __str__(self):
        return self.model