Spaces:

Chars
/

CognitiveKernel-Launchpad

Sleeping

CognitiveKernel-Launchpad / ck_pro /agents /model.py

charSLee013

feat: complete Hugging Face Spaces deployment with production-ready CognitiveKernel-Launchpad

1ea26af 4 months ago

11.8 kB

	#!/usr/bin/env python3
	"""
	Pure HTTP LLM Client - Linus style: simple, direct, fail fast
	No provider abstraction, no defensive programming, no technical debt
	"""

	import requests
	from .utils import wrapped_trying, KwargsInitializable


	class RateLimitError(Exception):
	"""Special exception for HTTP 429 rate limit errors"""
	pass

	try:
	import tiktoken
	except ImportError:
	tiktoken = None


	class TikTokenMessageTruncator:
	def __init__(self, model_name="gpt-4"):
	if tiktoken is None:
	# Fallback will be used by MessageTruncator alias when tiktoken is missing
	# Keep class importable but non-functional if instantiated directly without tiktoken
	raise ImportError("tiktoken is required but not installed")
	self.encoding = tiktoken.encoding_for_model(model_name)

	def _count_text_tokens(self, content):
	"""Count tokens in a message's content"""
	if isinstance(content, str):
	return len(self.encoding.encode(content))
	elif isinstance(content, list):
	total = 0
	for part in content:
	if part.get("type") == "text":
	total += len(self.encoding.encode(part.get("text", "")))
	return total
	else:
	return 0

	def _truncate_text_content(self, content, max_tokens):
	"""Truncate text in content to fit max_tokens"""
	if isinstance(content, str):
	tokens = self.encoding.encode(content)
	truncated_tokens = tokens[:max_tokens]
	return self.encoding.decode(truncated_tokens)
	elif isinstance(content, list):
	new_content = []
	tokens_used = 0
	for part in content:
	if part.get("type") == "text":
	text = part.get("text", "")
	tokens = self.encoding.encode(text)
	if tokens_used + len(tokens) > max_tokens:
	remaining = max_tokens - tokens_used
	if remaining > 0:
	truncated_tokens = tokens[:remaining]
	truncated_text = self.encoding.decode(truncated_tokens)
	if truncated_text:
	new_content.append({"type": "text", "text": truncated_text})
	break
	else:
	new_content.append(part)
	tokens_used += len(tokens)
	else:
	new_content.append(part)
	return new_content
	else:
	return content

	def truncate_message_list(self, messages, max_length):
	"""Truncate a list of messages to fit max_length tokens"""
	truncated = []
	total_tokens = 0
	for msg in reversed(messages):
	content = msg.get("content", "")
	tokens = self._count_text_tokens(content)
	if total_tokens + tokens > max_length:
	if not truncated:
	truncated_content = self._truncate_text_content(content, max_length)
	truncated_msg = msg.copy()
	truncated_msg["content"] = truncated_content
	truncated.insert(0, truncated_msg)
	break
	truncated.insert(0, msg)
	total_tokens += tokens
	return truncated



	# Lightweight fallback truncator
	class _LightweightMessageTruncator:
	def truncate_message_list(self, messages, max_length):
	# Very simple char-based truncation as a fallback
	total = 0
	out = []
	for msg in reversed(messages):
	content = msg.get("content", "")
	size = len(str(content))
	if total + size > max_length:
	if not out:
	# truncate this one
	truncated_msg = msg.copy()
	text = str(content)
	truncated_msg["content"] = text[: max(0, max_length - total)]
	out.insert(0, truncated_msg)
	break
	out.insert(0, msg)
	total += size
	return out

	# Single, deterministic MessageTruncator alias - fail fast, no confusion
	if tiktoken is not None:
	MessageTruncator = TikTokenMessageTruncator
	else:
	MessageTruncator = _LightweightMessageTruncator


	class LLM(KwargsInitializable):
	"""
	Pure HTTP LLM Client - Linus style: simple, direct, fail fast

	Design principles:
	1. HTTP-only endpoints - no provider abstraction
	2. Fail fast validation - no defensive programming
	3. extract_body for request parameters
	4. Auto base64 for images

	Required fields: call_target (HTTP URL), api_key, model
	"""

	def __init__(self, **kwargs):
	# Pure HTTP config - no provider abstraction
	self.call_target = None # Must be full HTTP URL
	self.api_key = None
	self.api_base_url = None # Optional for provider-style targets
	self.model = None # Model ID - separate from extract_body
	self.extract_body = {} # Pure request parameters (no model!)
	self.max_retry_times = 3
	self.request_timeout = 600
	self.max_token_num = 20000

	# Backward compatibility attributes (ignored in pure HTTP mode)
	self.thinking = False
	self.seed = 1377
	self.print_call_in = None
	self.print_call_out = None
	self.call_kwargs = {} # Legacy attribute

	# Initialize
	super().__init__(**kwargs)

	# Handle _default_init case (skip validation)
	if kwargs.get('_default_init'):
	self.headers = None
	self.call_stat = {}
	self.message_truncator = TikTokenMessageTruncator()
	return

	# HTTP-only validation - fail fast, no provider abstraction
	if not self.call_target:
	raise ValueError("call_target (HTTP URL) is required")

	if not isinstance(self.call_target, str) or not self.call_target.startswith("http"):
	raise ValueError(f"call_target must be HTTP URL starting with 'http', got: {self.call_target}")

	if not self.api_key:
	raise ValueError("api_key is required")

	if not self.model:
	raise ValueError("model is required")

	# Setup HTTP headers - simple and direct
	self.headers = {
	"Content-Type": "application/json",
	"Authorization": f"Bearer {self.api_key}"
	}

	# Stats and truncator
	self.call_stat = {}
	self.message_truncator = TikTokenMessageTruncator()

	def __repr__(self):
	return f"LLM(target={self.call_target})"

	def __call__(self, messages, extract_body=None, **kwargs):
	"""Pure HTTP call interface"""
	func = lambda: self._call_with_messages(messages, extract_body, **kwargs)
	return wrapped_trying(func, max_times=self.max_retry_times, wait_error_names=('RateLimitError',))

	def _call_with_messages(self, messages, extract_body=None, **kwargs):
	"""Execute pure HTTP LLM call - no abstraction, fail fast"""
	# Handle uninitialized case
	if not self.headers or not self.call_target:
	raise RuntimeError("LLM not properly initialized - use proper call_target and api_key")

	# Process images to base64
	messages = self._process_images(messages)

	# Truncate messages
	messages = self.message_truncator.truncate_message_list(messages, self.max_token_num)

	# Build payload - start with required fields
	payload = {
	"model": self.model, # Model is separate, not in extract_body
	"messages": messages
	}

	# Add default extract_body parameters (pure request params only)
	if self.extract_body:
	payload.update(self.extract_body)

	# Add call-specific extract_body parameters (override defaults)
	if extract_body:
	payload.update(extract_body)

	# Add any additional kwargs
	payload.update(kwargs)

	# Execute HTTP call - direct to call_target
	response = requests.post(
	self.call_target,
	headers=self.headers,
	json=payload,
	timeout=self.request_timeout
	)

	# Handle different HTTP status codes appropriately
	if response.status_code == 429:
	# Rate limit exceeded - special handling for retry logic
	raise RateLimitError(f"HTTP {response.status_code}: {response.text}")
	elif response.status_code != 200:
	# Other HTTP errors - fail fast
	raise RuntimeError(f"HTTP {response.status_code}: {response.text}")

	# Parse response - fail fast on invalid format
	try:
	result = response.json()
	message = result["choices"][0]["message"]

	# Check for function calls (tool_calls)
	tool_calls = message.get("tool_calls")
	if tool_calls and len(tool_calls) > 0:
	# Extract function call arguments and synthesize as JSON string
	tool_call = tool_calls[0]
	if tool_call.get("type") == "function":
	function_args = tool_call.get("function", {}).get("arguments", "{}")
	# Return the function arguments as a JSON string
	content = function_args
	else:
	content = message.get("content", "")
	else:
	# Regular text response
	content = message.get("content", "")

	except (KeyError, IndexError):
	raise RuntimeError(f"Invalid response format: {result}")

	# Fail fast - empty response
	if not content or content.strip() == "":
	raise RuntimeError(f"Empty response: {result}")

	# Update stats
	self._update_stats(result)

	return content

	def _process_images(self, messages):
	"""Process images in messages - auto convert to base64 if needed"""
	processed_messages = []

	for message in messages:
	content = message.get("content", "")

	if isinstance(content, list):
	# Multi-modal content - process each part
	processed_content = []
	for part in content:
	if part.get("type") == "image_url":
	# Image part - ensure base64 format
	image_url = part["image_url"]["url"]
	if image_url.startswith("data:image/"):
	# Already base64 - keep as is
	processed_content.append(part)
	else:
	# Convert to base64 (if local file or URL)
	# For now, assume it's already properly formatted
	processed_content.append(part)
	else:
	# Text or other content
	processed_content.append(part)

	processed_message = message.copy()
	processed_message["content"] = processed_content
	processed_messages.append(processed_message)
	else:
	# Simple text content
	processed_messages.append(message)

	return processed_messages

	def _update_stats(self, result):
	"""Update call statistics"""
	usage = result.get("usage", {})
	if usage:
	self.call_stat["llm_call"] = self.call_stat.get("llm_call", 0) + 1
	for key in ["prompt_tokens", "completion_tokens", "total_tokens"]:
	self.call_stat[key] = self.call_stat.get(key, 0) + usage.get(key, 0)