Spaces:
Sleeping
Sleeping
| # llm_clients/lmstudio.py | |
| from typing import Generator, Any, Dict | |
| import requests | |
| import json | |
| from .base import LlmClient | |
| class LmstudioClient(LlmClient): | |
| """LLM client for LM Studio models (OpenAI-compatible API).""" | |
| def __init__(self, config_dict: Dict[str, Any], system_prompt: str): | |
| super().__init__(config_dict, system_prompt) | |
| # LM Studio runs on OpenAI-compatible endpoint | |
| self.base_url = self.config.get('host', 'http://localhost:1234') | |
| # Test connection to LM Studio | |
| self._test_connection() | |
| print(f"β LM Studio Client initialized for model '{self.config['model']}' at host '{self.base_url}'.") | |
| print(f" Note: LM Studio uses just-in-time loading - model will load on first request.") | |
| def _test_connection(self): | |
| """Test connection to LM Studio server.""" | |
| try: | |
| # Try the models endpoint first (more reliable than health) | |
| response = requests.get(f"{self.base_url}/v1/models", timeout=5) | |
| response.raise_for_status() | |
| # Check if our specific model is available | |
| try: | |
| models_data = response.json() | |
| available_models = [model.get('id', '') for model in models_data.get('data', [])] | |
| if available_models: | |
| print(f" π Available models in LM Studio: {', '.join(available_models)}") | |
| if self.config['model'] not in available_models: | |
| print(f" β οΈ Warning: Model '{self.config['model']}' not found in available models.") | |
| print(f" This is normal with just-in-time loading - model will load on first use.") | |
| else: | |
| print(" π LM Studio is running with just-in-time model loading.") | |
| except (json.JSONDecodeError, KeyError): | |
| print(" π LM Studio is running (could not parse models list).") | |
| except requests.exceptions.RequestException as e: | |
| raise ConnectionError( | |
| f"Could not connect to LM Studio at {self.base_url}. " | |
| f"Error: {e}\n" | |
| f"Please ensure:\n" | |
| f"1. LM Studio is running\n" | |
| f"2. A model is loaded or just-in-time loading is enabled\n" | |
| f"3. The server is started (look for 'Server started' in LM Studio console)\n" | |
| f"4. The correct host/port is configured (default: http://localhost:1234)" | |
| ) | |
| def generate_content(self, prompt: str) -> str: | |
| """ | |
| Generates a non-streaming response from LM Studio. | |
| Uses OpenAI-compatible API format. | |
| """ | |
| url = f"{self.base_url}/v1/chat/completions" | |
| messages = [ | |
| {"role": "system", "content": self.system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| payload = { | |
| "model": self.config['model'], | |
| "messages": messages, | |
| "stream": False, | |
| "temperature": self.config.get('temperature', 0.1), # Low temperature for security scanning | |
| "max_tokens": self.config.get('max_tokens', 500) | |
| } | |
| try: | |
| response = requests.post(url, json=payload, timeout=30) | |
| response.raise_for_status() | |
| result = response.json() | |
| if 'choices' in result and len(result['choices']) > 0: | |
| return result['choices'][0]['message']['content'] | |
| else: | |
| raise ValueError(f"Unexpected response format from LM Studio: {result}") | |
| except requests.exceptions.RequestException as e: | |
| if "404" in str(e): | |
| raise ConnectionError( | |
| f"LM Studio endpoint not found. Please ensure:\n" | |
| f"1. LM Studio server is running\n" | |
| f"2. A model is loaded (or just-in-time loading is enabled)\n" | |
| f"3. The model name '{self.config['model']}' is correct" | |
| ) | |
| else: | |
| raise ConnectionError(f"Error communicating with LM Studio: {e}") | |
| except (json.JSONDecodeError, KeyError, ValueError) as e: | |
| raise ValueError(f"Error parsing LM Studio response: {e}") | |
| def generate_content_stream(self, prompt: str) -> Generator[str, None, None]: | |
| """ | |
| Generates a streaming response from LM Studio. | |
| Uses OpenAI-compatible API format. | |
| """ | |
| url = f"{self.base_url}/v1/chat/completions" | |
| messages = [ | |
| {"role": "system", "content": self.system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| payload = { | |
| "model": self.config['model'], | |
| "messages": messages, | |
| "stream": True, | |
| "temperature": self.config.get('temperature', 0.7), | |
| "max_tokens": self.config.get('max_tokens', 2000) | |
| } | |
| try: | |
| with requests.post(url, json=payload, stream=True, timeout=30) as response: | |
| response.raise_for_status() | |
| for line in response.iter_lines(): | |
| if line: | |
| line_str = line.decode('utf-8') | |
| if line_str.startswith('data: '): | |
| line_str = line_str[6:] # Remove 'data: ' prefix | |
| if line_str.strip() == '[DONE]': | |
| break | |
| try: | |
| chunk = json.loads(line_str) | |
| if 'choices' in chunk and len(chunk['choices']) > 0: | |
| delta = chunk['choices'][0].get('delta', {}) | |
| if 'content' in delta: | |
| yield delta['content'] | |
| except json.JSONDecodeError: | |
| continue # Skip malformed JSON lines | |
| except requests.exceptions.RequestException as e: | |
| raise ConnectionError(f"Error during LM Studio streaming: {e}") | |
| def _generate_content_impl(self, prompt: str) -> str: | |
| """Implementation for base class compatibility.""" | |
| return self.generate_content(prompt) | |
| def _generate_content_stream_impl(self, prompt: str) -> Generator[str, None, None]: | |
| """Implementation for base class compatibility.""" | |
| return self.generate_content_stream(prompt) |