Spaces:

zazaman
/

guardrails-final

Sleeping

App Files Files Community

guardrails-final / llm_clients /lmstudio.py

zazaman

Add multilingual translation support with Qwen3-0.6B-GGUF and optimize for Hugging Face Spaces deployment

a2e1879 about 1 month ago

raw

history blame

6.67 kB

	# llm_clients/lmstudio.py
	from typing import Generator, Any, Dict
	import requests
	import json
	from .base import LlmClient

	class LmstudioClient(LlmClient):
	"""LLM client for LM Studio models (OpenAI-compatible API)."""

	def __init__(self, config_dict: Dict[str, Any], system_prompt: str):
	super().__init__(config_dict, system_prompt)
	# LM Studio runs on OpenAI-compatible endpoint
	self.base_url = self.config.get('host', 'http://localhost:1234')

	# Test connection to LM Studio
	self._test_connection()

	print(f"✅ LM Studio Client initialized for model '{self.config['model']}' at host '{self.base_url}'.")
	print(f" Note: LM Studio uses just-in-time loading - model will load on first request.")

	def _test_connection(self):
	"""Test connection to LM Studio server."""
	try:
	# Try the models endpoint first (more reliable than health)
	response = requests.get(f"{self.base_url}/v1/models", timeout=5)
	response.raise_for_status()

	# Check if our specific model is available
	try:
	models_data = response.json()
	available_models = [model.get('id', '') for model in models_data.get('data', [])]

	if available_models:
	print(f" 📋 Available models in LM Studio: {', '.join(available_models)}")
	if self.config['model'] not in available_models:
	print(f" ⚠️ Warning: Model '{self.config['model']}' not found in available models.")
	print(f" This is normal with just-in-time loading - model will load on first use.")
	else:
	print(" 📋 LM Studio is running with just-in-time model loading.")

	except (json.JSONDecodeError, KeyError):
	print(" 📋 LM Studio is running (could not parse models list).")

	except requests.exceptions.RequestException as e:
	raise ConnectionError(
	f"Could not connect to LM Studio at {self.base_url}. "
	f"Error: {e}\n"
	f"Please ensure:\n"
	f"1. LM Studio is running\n"
	f"2. A model is loaded or just-in-time loading is enabled\n"
	f"3. The server is started (look for 'Server started' in LM Studio console)\n"
	f"4. The correct host/port is configured (default: http://localhost:1234)"
	)

	def generate_content(self, prompt: str) -> str:
	"""
	Generates a non-streaming response from LM Studio.
	Uses OpenAI-compatible API format.
	"""
	url = f"{self.base_url}/v1/chat/completions"

	messages = [
	{"role": "system", "content": self.system_prompt},
	{"role": "user", "content": prompt}
	]

	payload = {
	"model": self.config['model'],
	"messages": messages,
	"stream": False,
	"temperature": self.config.get('temperature', 0.1), # Low temperature for security scanning
	"max_tokens": self.config.get('max_tokens', 500)
	}

	try:
	response = requests.post(url, json=payload, timeout=30)
	response.raise_for_status()

	result = response.json()
	if 'choices' in result and len(result['choices']) > 0:
	return result['choices'][0]['message']['content']
	else:
	raise ValueError(f"Unexpected response format from LM Studio: {result}")

	except requests.exceptions.RequestException as e:
	if "404" in str(e):
	raise ConnectionError(
	f"LM Studio endpoint not found. Please ensure:\n"
	f"1. LM Studio server is running\n"
	f"2. A model is loaded (or just-in-time loading is enabled)\n"
	f"3. The model name '{self.config['model']}' is correct"
	)
	else:
	raise ConnectionError(f"Error communicating with LM Studio: {e}")
	except (json.JSONDecodeError, KeyError, ValueError) as e:
	raise ValueError(f"Error parsing LM Studio response: {e}")

	def generate_content_stream(self, prompt: str) -> Generator[str, None, None]:
	"""
	Generates a streaming response from LM Studio.
	Uses OpenAI-compatible API format.
	"""
	url = f"{self.base_url}/v1/chat/completions"

	messages = [
	{"role": "system", "content": self.system_prompt},
	{"role": "user", "content": prompt}
	]

	payload = {
	"model": self.config['model'],
	"messages": messages,
	"stream": True,
	"temperature": self.config.get('temperature', 0.7),
	"max_tokens": self.config.get('max_tokens', 2000)
	}

	try:
	with requests.post(url, json=payload, stream=True, timeout=30) as response:
	response.raise_for_status()

	for line in response.iter_lines():
	if line:
	line_str = line.decode('utf-8')
	if line_str.startswith('data: '):
	line_str = line_str[6:] # Remove 'data: ' prefix

	if line_str.strip() == '[DONE]':
	break

	try:
	chunk = json.loads(line_str)
	if 'choices' in chunk and len(chunk['choices']) > 0:
	delta = chunk['choices'][0].get('delta', {})
	if 'content' in delta:
	yield delta['content']
	except json.JSONDecodeError:
	continue # Skip malformed JSON lines

	except requests.exceptions.RequestException as e:
	raise ConnectionError(f"Error during LM Studio streaming: {e}")

	def _generate_content_impl(self, prompt: str) -> str:
	"""Implementation for base class compatibility."""
	return self.generate_content(prompt)

	def _generate_content_stream_impl(self, prompt: str) -> Generator[str, None, None]:
	"""Implementation for base class compatibility."""
	return self.generate_content_stream(prompt)