Spaces:

zazaman
/

guardrails-final

Sleeping

File size: 6,668 Bytes

a2e1879

# llm_clients/lmstudio.py
from typing import Generator, Any, Dict
import requests
import json
from .base import LlmClient

class LmstudioClient(LlmClient):
    """LLM client for LM Studio models (OpenAI-compatible API)."""

    def __init__(self, config_dict: Dict[str, Any], system_prompt: str):
        super().__init__(config_dict, system_prompt)
        # LM Studio runs on OpenAI-compatible endpoint
        self.base_url = self.config.get('host', 'http://localhost:1234')
        
        # Test connection to LM Studio
        self._test_connection()
        
        print(f"✅ LM Studio Client initialized for model '{self.config['model']}' at host '{self.base_url}'.")
        print(f"   Note: LM Studio uses just-in-time loading - model will load on first request.")

    def _test_connection(self):
        """Test connection to LM Studio server."""
        try:
            # Try the models endpoint first (more reliable than health)
            response = requests.get(f"{self.base_url}/v1/models", timeout=5)
            response.raise_for_status()
            
            # Check if our specific model is available
            try:
                models_data = response.json()
                available_models = [model.get('id', '') for model in models_data.get('data', [])]
                
                if available_models:
                    print(f"   📋 Available models in LM Studio: {', '.join(available_models)}")
                    if self.config['model'] not in available_models:
                        print(f"   ⚠️  Warning: Model '{self.config['model']}' not found in available models.")
                        print(f"       This is normal with just-in-time loading - model will load on first use.")
                else:
                    print("   📋 LM Studio is running with just-in-time model loading.")
                    
            except (json.JSONDecodeError, KeyError):
                print("   📋 LM Studio is running (could not parse models list).")
                
        except requests.exceptions.RequestException as e:
            raise ConnectionError(
                f"Could not connect to LM Studio at {self.base_url}. "
                f"Error: {e}\n"
                f"Please ensure:\n"
                f"1. LM Studio is running\n"
                f"2. A model is loaded or just-in-time loading is enabled\n"
                f"3. The server is started (look for 'Server started' in LM Studio console)\n"
                f"4. The correct host/port is configured (default: http://localhost:1234)"
            )

    def generate_content(self, prompt: str) -> str:
        """
        Generates a non-streaming response from LM Studio.
        Uses OpenAI-compatible API format.
        """
        url = f"{self.base_url}/v1/chat/completions"
        
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": prompt}
        ]
        
        payload = {
            "model": self.config['model'],
            "messages": messages,
            "stream": False,
            "temperature": self.config.get('temperature', 0.1),  # Low temperature for security scanning
            "max_tokens": self.config.get('max_tokens', 500)
        }
        
        try:
            response = requests.post(url, json=payload, timeout=30)
            response.raise_for_status()
            
            result = response.json()
            if 'choices' in result and len(result['choices']) > 0:
                return result['choices'][0]['message']['content']
            else:
                raise ValueError(f"Unexpected response format from LM Studio: {result}")
                
        except requests.exceptions.RequestException as e:
            if "404" in str(e):
                raise ConnectionError(
                    f"LM Studio endpoint not found. Please ensure:\n"
                    f"1. LM Studio server is running\n"
                    f"2. A model is loaded (or just-in-time loading is enabled)\n"
                    f"3. The model name '{self.config['model']}' is correct"
                )
            else:
                raise ConnectionError(f"Error communicating with LM Studio: {e}")
        except (json.JSONDecodeError, KeyError, ValueError) as e:
            raise ValueError(f"Error parsing LM Studio response: {e}")

    def generate_content_stream(self, prompt: str) -> Generator[str, None, None]:
        """
        Generates a streaming response from LM Studio.
        Uses OpenAI-compatible API format.
        """
        url = f"{self.base_url}/v1/chat/completions"
        
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": prompt}
        ]
        
        payload = {
            "model": self.config['model'],
            "messages": messages,
            "stream": True,
            "temperature": self.config.get('temperature', 0.7),
            "max_tokens": self.config.get('max_tokens', 2000)
        }
        
        try:
            with requests.post(url, json=payload, stream=True, timeout=30) as response:
                response.raise_for_status()
                
                for line in response.iter_lines():
                    if line:
                        line_str = line.decode('utf-8')
                        if line_str.startswith('data: '):
                            line_str = line_str[6:]  # Remove 'data: ' prefix
                            
                        if line_str.strip() == '[DONE]':
                            break
                            
                        try:
                            chunk = json.loads(line_str)
                            if 'choices' in chunk and len(chunk['choices']) > 0:
                                delta = chunk['choices'][0].get('delta', {})
                                if 'content' in delta:
                                    yield delta['content']
                        except json.JSONDecodeError:
                            continue  # Skip malformed JSON lines
                            
        except requests.exceptions.RequestException as e:
            raise ConnectionError(f"Error during LM Studio streaming: {e}")
    
    def _generate_content_impl(self, prompt: str) -> str:
        """Implementation for base class compatibility."""
        return self.generate_content(prompt)

    def _generate_content_stream_impl(self, prompt: str) -> Generator[str, None, None]:
        """Implementation for base class compatibility."""
        return self.generate_content_stream(prompt)