# llm_clients/lmstudio.py from typing import Generator, Any, Dict import requests import json from .base import LlmClient class LmstudioClient(LlmClient): """LLM client for LM Studio models (OpenAI-compatible API).""" def __init__(self, config_dict: Dict[str, Any], system_prompt: str): super().__init__(config_dict, system_prompt) # LM Studio runs on OpenAI-compatible endpoint self.base_url = self.config.get('host', 'http://localhost:1234') # Test connection to LM Studio self._test_connection() print(f"✅ LM Studio Client initialized for model '{self.config['model']}' at host '{self.base_url}'.") print(f" Note: LM Studio uses just-in-time loading - model will load on first request.") def _test_connection(self): """Test connection to LM Studio server.""" try: # Try the models endpoint first (more reliable than health) response = requests.get(f"{self.base_url}/v1/models", timeout=5) response.raise_for_status() # Check if our specific model is available try: models_data = response.json() available_models = [model.get('id', '') for model in models_data.get('data', [])] if available_models: print(f" 📋 Available models in LM Studio: {', '.join(available_models)}") if self.config['model'] not in available_models: print(f" ⚠️ Warning: Model '{self.config['model']}' not found in available models.") print(f" This is normal with just-in-time loading - model will load on first use.") else: print(" 📋 LM Studio is running with just-in-time model loading.") except (json.JSONDecodeError, KeyError): print(" 📋 LM Studio is running (could not parse models list).") except requests.exceptions.RequestException as e: raise ConnectionError( f"Could not connect to LM Studio at {self.base_url}. " f"Error: {e}\n" f"Please ensure:\n" f"1. LM Studio is running\n" f"2. A model is loaded or just-in-time loading is enabled\n" f"3. The server is started (look for 'Server started' in LM Studio console)\n" f"4. The correct host/port is configured (default: http://localhost:1234)" ) def generate_content(self, prompt: str) -> str: """ Generates a non-streaming response from LM Studio. Uses OpenAI-compatible API format. """ url = f"{self.base_url}/v1/chat/completions" messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": prompt} ] payload = { "model": self.config['model'], "messages": messages, "stream": False, "temperature": self.config.get('temperature', 0.1), # Low temperature for security scanning "max_tokens": self.config.get('max_tokens', 500) } try: response = requests.post(url, json=payload, timeout=30) response.raise_for_status() result = response.json() if 'choices' in result and len(result['choices']) > 0: return result['choices'][0]['message']['content'] else: raise ValueError(f"Unexpected response format from LM Studio: {result}") except requests.exceptions.RequestException as e: if "404" in str(e): raise ConnectionError( f"LM Studio endpoint not found. Please ensure:\n" f"1. LM Studio server is running\n" f"2. A model is loaded (or just-in-time loading is enabled)\n" f"3. The model name '{self.config['model']}' is correct" ) else: raise ConnectionError(f"Error communicating with LM Studio: {e}") except (json.JSONDecodeError, KeyError, ValueError) as e: raise ValueError(f"Error parsing LM Studio response: {e}") def generate_content_stream(self, prompt: str) -> Generator[str, None, None]: """ Generates a streaming response from LM Studio. Uses OpenAI-compatible API format. """ url = f"{self.base_url}/v1/chat/completions" messages = [ {"role": "system", "content": self.system_prompt}, {"role": "user", "content": prompt} ] payload = { "model": self.config['model'], "messages": messages, "stream": True, "temperature": self.config.get('temperature', 0.7), "max_tokens": self.config.get('max_tokens', 2000) } try: with requests.post(url, json=payload, stream=True, timeout=30) as response: response.raise_for_status() for line in response.iter_lines(): if line: line_str = line.decode('utf-8') if line_str.startswith('data: '): line_str = line_str[6:] # Remove 'data: ' prefix if line_str.strip() == '[DONE]': break try: chunk = json.loads(line_str) if 'choices' in chunk and len(chunk['choices']) > 0: delta = chunk['choices'][0].get('delta', {}) if 'content' in delta: yield delta['content'] except json.JSONDecodeError: continue # Skip malformed JSON lines except requests.exceptions.RequestException as e: raise ConnectionError(f"Error during LM Studio streaming: {e}") def _generate_content_impl(self, prompt: str) -> str: """Implementation for base class compatibility.""" return self.generate_content(prompt) def _generate_content_stream_impl(self, prompt: str) -> Generator[str, None, None]: """Implementation for base class compatibility.""" return self.generate_content_stream(prompt)