from litellm import completion, _turn_on_debug from dotenv import load_dotenv import random import logging load_dotenv() logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) models_low=["gemini/gemini-2.0-flash","openai/gpt-4.1-nano"] models_mid=["gemini/gemini-2.5-flash-preview-05-20","openai/gpt-4.1-mini"] models_high=["gemini/gemini-2.5-pro-preview-05-06","openai/gpt-4.1"] model_low="openai/gpt-4.1-nano" model_mid="openai/gpt-4.1-mini" model_high="openai/gpt-4.1" def call_llm(prompt: str, temperature: float, model_type: str, response_format=None, tools=None, shuffle=False, return_tokens=False, images=None) -> str: if shuffle: if model_type=="low": model = random.choice(models_low) elif model_type=="mid": model = random.choice(models_mid) elif model_type=="high": model = random.choice(models_high) logger.info(f"SHUFFLE. Using model: {model}") else: if model_type=="low": model = model_low elif model_type=="mid": model = model_mid elif model_type=="high": model = model_high # Create message content - support both text-only and multimodal if images: # Multimodal message with images content = [{"type": "text", "text": prompt}] for image_url in images: content.append({ "type": "image_url", "image_url": {"url": image_url} }) messages = [{"role": "user", "content": content}] else: # Text-only message messages = [{"role": "user", "content": prompt}] completion_args = { "model": model, "messages": messages, "temperature": temperature } if response_format: completion_args["response_format"] = response_format if tools: completion_args["tools"] = tools response = completion(**completion_args) response_str = response.choices[0].message.content if return_tokens: output_tokens = response.usage.completion_tokens input_tokens = response.usage.prompt_tokens return response_str,input_tokens,output_tokens else: return response_str