File size: 14,646 Bytes
d12a6df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
# TODO: The current implementation is not based on textgrad, but rather a direct implementation of the LiteLLM API.
# Detached from textgrad: https://github.com/zou-group/textgrad/blob/main/textgrad/engine_experimental/litellm.py

try:
    import litellm
    from litellm import supports_reasoning
except ImportError:
    raise ImportError("If you'd like to use LiteLLM, please install the litellm package by running `pip install litellm`, and set appropriate API keys for the models you want to use.")

import os
import json
import base64
import platformdirs
import logging
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)
from typing import List, Union, Optional, Any, Dict

from .base import EngineLM, CachedEngine
from .engine_utils import get_image_type_from_bytes

def validate_structured_output_model(model_string: str) -> bool:
    """
    Check if the model supports structured outputs.
    
    Args:
        model_string: The name of the model to check
        
    Returns:
        True if the model supports structured outputs, False otherwise
    """
    # Models that support structured outputs
    structure_output_models = [
        "gpt-4", 
        "claude-opus-4", "claude-sonnet-4", "claude-3.7-sonnet", "claude-3.5-sonnet", "claude-3-opus",
        "gemini-",
    ]
    return any(x in model_string.lower() for x in structure_output_models)

def validate_chat_model(model_string: str) -> bool:
    # 99% of LiteLLM models are chat models
    return True


def validate_reasoning_model(model_string: str) -> bool:
    """
    Check if the model is a reasoning model.
    Includes OpenAI o1/o3/o4 variants (non-pro), Claude models, and other LLMs known for reasoning.
    """
    m = model_string.lower()
    if supports_reasoning(model_string):
        return True

    # Hard ways
    if any(x in m for x in ["o1", "o3", "o4"]) and not validate_pro_reasoning_model(model_string):
        return True

    if "claude" in m and not validate_pro_reasoning_model(model_string):
        return True

    extra = ["qwen-72b", "llama-3-70b", "mistral-large", "deepseek-reasoner", "xai/grok-3", "gemini-2.5-pro"]
    if any(e in model_string.lower() for e in extra):
        return True

    return False

def validate_pro_reasoning_model(model_string: str) -> bool:
    """
    Check if the model is a pro reasoning model:
    OpenAI o1-pro, o3-pro, o4-pro, and Claude-4/Sonnet variants.
    """
    m = model_string.lower()
    if any(x in m for x in ["o1-pro", "o3-pro", "o4-pro"]):
        return True
    if any(x in m for x in ["claude-opus-4", "claude-sonnet-4", "claude-3.7-sonnet"]):
        return True
    return False

def validate_multimodal_model(model_string: str) -> bool:
    """
    Check if the model supports multimodal inputs.

    Args:
        model_string: The name of the model to check

    Returns:
        True if the model supports multimodal inputs, False otherwise
    """
    m = model_string.lower()

    # Core multimodal models
    multimodal_models = [
        "gpt-4-vision", "gpt-4o", "gpt-4.1",  # OpenAI multimodal
        "gpt-4v",                            # alias for vision-capable GPT-4
        "claude-sonnet", "claude-opus",     # Claude multimodal variants
        "gemini",                            # Base Gemini models are multimodal :contentReference[oaicite:0]{index=0}
        "gpt-4v",                            # repeats for clarity
        "llama-4",                           # reported as multimodal
        "qwen-vl", "qwen2-vl",              # Qwen vision-language models
    ]

    # Add Gemini TTS / audio-capable variants (though audio is modality)
    audio_models = ["-tts", "-flash-preview-tts", "-pro-preview-tts"]
    if any(g in m for g in multimodal_models):
        return True
    
    if "gemini" in m and any(s in m for s in audio_models):
        return True  # E.g. gemini-2.5-flash-preview-tts
    
    # Make sure we catch edge cases like "gpt-4v" or "gpt-4 vision"
    if "vision" in m or "vl" in m:
        return True

    return False

class ChatLiteLLM(EngineLM, CachedEngine):
    """
    LiteLLM implementation of the EngineLM interface.
    This allows using any model supported by LiteLLM.
    """
    DEFAULT_SYSTEM_PROMPT = "You are a helpful, creative, and smart assistant."

    def __init__(
        self,
        model_string: str = "gpt-3.5-turbo",
        use_cache: bool = False,
        system_prompt: str = DEFAULT_SYSTEM_PROMPT,
        is_multimodal: bool = False,
        **kwargs
    ):
        """
        Initialize the LiteLLM engine.
        
        Args:
            model_string: The name of the model to use
            use_cache: Whether to use caching
            system_prompt: The system prompt to use
            is_multimodal: Whether to enable multimodal capabilities
            **kwargs: Additional arguments to pass to the LiteLLM client
        """
        self.model_string = model_string
        self.use_cache = use_cache
        self.system_prompt = system_prompt
        self.is_multimodal = is_multimodal or validate_multimodal_model(model_string)
        self.kwargs = kwargs
        
        # Set up caching if enabled
        if self.use_cache:
            root = platformdirs.user_cache_dir("agentflow")
            cache_path = os.path.join(root, f"cache_litellm_{model_string}.db")
            self.image_cache_dir = os.path.join(root, "image_cache")
            os.makedirs(self.image_cache_dir, exist_ok=True)
            super().__init__(cache_path=cache_path)
        
        # Disable telemetry
        litellm.telemetry = False
        
        # Set model capabilities based on model name
        self.support_structured_output = validate_structured_output_model(self.model_string)
        self.is_chat_model = validate_chat_model(self.model_string)
        self.is_reasoning_model = validate_reasoning_model(self.model_string)
        self.is_pro_reasoning_model = validate_pro_reasoning_model(self.model_string)
        
        # Suppress LiteLLM debug logs
        litellm.suppress_debug_info = True
        for key in logging.Logger.manager.loggerDict.keys():
            if "litellm" in key.lower():
                logging.getLogger(key).setLevel(logging.WARNING)

    def __call__(self, prompt, **kwargs):
        """
        Handle direct calls to the instance (e.g., model(prompt)).
        Forwards the call to the generate method.
        """
        return self.generate(prompt, **kwargs)

    def _format_content(self, content: List[Union[str, bytes]]) -> List[Dict[str, Any]]:
        """
        Format content for the LiteLLM API.
        
        Args:
            content: List of content items (strings and/or image bytes)
            
        Returns:
            Formatted content for the LiteLLM API
        """
        formatted_content = []
        for item in content:
            if isinstance(item, str):
                formatted_content.append({"type": "text", "text": item})
            elif isinstance(item, bytes):
                # For images, encode as base64
                image_type = get_image_type_from_bytes(item)
                if image_type:
                    base64_image = base64.b64encode(item).decode('utf-8')
                    formatted_content.append({
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/{image_type};base64,{base64_image}",
                            "detail": "auto"
                        }
                    })
            elif isinstance(item, dict) and "type" in item:
                # Already formatted content
                formatted_content.append(item)
        return formatted_content

    @retry(wait=wait_random_exponential(min=1, max=5), stop=stop_after_attempt(5))
    def generate(self, content: Union[str, List[Union[str, bytes]]], system_prompt=None, **kwargs):
        """
        Generate text from a prompt.
        
        Args:
            content: A string prompt or a list of strings and image bytes
            system_prompt: Optional system prompt to override the default
            **kwargs: Additional arguments to pass to the LiteLLM API
            
        Returns:
            Generated text response
        """
        try:
            if isinstance(content, str):
                return self._generate_text(content, system_prompt=system_prompt, **kwargs)
            
            elif isinstance(content, list):
                has_multimodal_input = any(isinstance(item, bytes) for item in content)
                if (has_multimodal_input) and (not self.is_multimodal):
                    raise NotImplementedError(f"Multimodal generation is only supported for multimodal models. Current model: {self.model_string}")
                
                return self._generate_multimodal(content, system_prompt=system_prompt, **kwargs)
        except litellm.exceptions.BadRequestError as e:
            print(f"Bad request error: {str(e)}")
            return {
                "error": "bad_request",
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
        except litellm.exceptions.RateLimitError as e:
            print(f"Rate limit error encountered: {str(e)}")
            return {
                "error": "rate_limit",
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
        except litellm.exceptions.ContextWindowExceededError as e:
            print(f"Context window exceeded: {str(e)}")
            return {
                "error": "context_window_exceeded",
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
        except litellm.exceptions.APIError as e:
            print(f"API error: {str(e)}")
            return {
                "error": "api_error",
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
        except litellm.exceptions.APIConnectionError as e:
            print(f"API connection error: {str(e)}")
            return {
                "error": "api_connection_error",
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
        except Exception as e:
            print(f"Error in generate method: {str(e)}")
            print(f"Error type: {type(e).__name__}")
            print(f"Error details: {e.args}")
            return {
                "error": type(e).__name__,
                "message": str(e),
                "details": getattr(e, 'args', None)
            }
    
    def _generate_text(
        self, prompt, system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, response_format=None, **kwargs
    ):
        """
        Generate text from a text prompt.
        
        Args:
            prompt: The text prompt
            system_prompt: Optional system prompt to override the default
            temperature: Controls randomness (higher = more random)
            max_tokens: Maximum number of tokens to generate
            top_p: Controls diversity via nucleus sampling
            response_format: Optional response format for structured outputs
            **kwargs: Additional arguments to pass to the LiteLLM API
            
        Returns:
            Generated text response
        """
        sys_prompt_arg = system_prompt if system_prompt else self.system_prompt

        if self.use_cache:
            cache_key = sys_prompt_arg + prompt
            cache_or_none = self._check_cache(cache_key)
            if cache_or_none is not None:
                return cache_or_none

        messages = [
            {"role": "system", "content": sys_prompt_arg},
            {"role": "user", "content": prompt},
        ]
        
        # Prepare additional parameters
        params = {
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
        }
        
        # Add response_format if supported and provided
        if self.support_structured_output and response_format:
            params["response_format"] = response_format
            
        # Add any additional kwargs
        params.update(self.kwargs)
        params.update(kwargs)
        
        # Make the API call
        response = litellm.completion(
            model=self.model_string,
            messages=messages,
            **params
        )
        
        response_text = response.choices[0].message.content
        
        if self.use_cache:
            self._save_cache(cache_key, response_text)
        
        return response_text
    
    def _generate_multimodal(
        self, content_list, system_prompt=None, temperature=0, max_tokens=4000, top_p=0.99, **kwargs
    ):
        """
        Generate text from a multimodal prompt (text and images).
        
        Args:
            content_list: List of content items (strings and/or image bytes)
            system_prompt: Optional system prompt to override the default
            temperature: Controls randomness (higher = more random)
            max_tokens: Maximum number of tokens to generate
            top_p: Controls diversity via nucleus sampling
            **kwargs: Additional arguments to pass to the LiteLLM API
            
        Returns:
            Generated text response
        """
        sys_prompt_arg = system_prompt if system_prompt else self.system_prompt
        formatted_content = self._format_content(content_list)
        
        if self.use_cache:
            cache_key = sys_prompt_arg + json.dumps(str(formatted_content))
            cache_or_none = self._check_cache(cache_key)
            if cache_or_none is not None:
                return cache_or_none
        
        messages = [
            {"role": "system", "content": sys_prompt_arg},
            {"role": "user", "content": formatted_content},
        ]
        
        # Prepare additional parameters
        params = {
            "temperature": temperature,
            "max_tokens": max_tokens,
            "top_p": top_p,
        }
        
        # Add any additional kwargs
        params.update(self.kwargs)
        params.update(kwargs)
        
        # Make the API call
        response = litellm.completion(
            model=self.model_string,
            messages=messages,
            **params
        )
        
        response_text = response.choices[0].message.content
        
        if self.use_cache:
            self._save_cache(cache_key, response_text)
        
        return response_text