Spaces:
Sleeping
Sleeping
| """Base agent with ContextForge and vLLM integration.""" | |
| from abc import ABC, abstractmethod | |
| from typing import Any | |
| import logging | |
| import time | |
| import httpx | |
| from apohara_context_forge.config import settings | |
| logger = logging.getLogger(__name__) | |
| class BaseAgent(ABC): | |
| """Abstract agent with ContextForge integration.""" | |
| def __init__(self, agent_id: str, role: str, thinking: bool = False): | |
| self.agent_id = agent_id | |
| self.role = role | |
| self.thinking = thinking | |
| async def process(self, input_data: Any) -> dict[str, Any]: | |
| """Process input and return result with metrics.""" | |
| pass | |
| async def call_contextforge_register(self, context: str) -> dict[str, Any]: | |
| """Register context with ContextForge MCP server.""" | |
| async with httpx.AsyncClient(timeout=30.0) as client: | |
| response = await client.post( | |
| f"http://localhost:{settings.contextforge_port}/tools/register_context", | |
| json={"agent_id": self.agent_id, "context": context}, | |
| ) | |
| return response.json() | |
| async def call_contextforge_optimize(self, context: str) -> dict[str, Any]: | |
| """Get optimized context from ContextForge.""" | |
| async with httpx.AsyncClient(timeout=30.0) as client: | |
| response = await client.post( | |
| f"http://localhost:{settings.contextforge_port}/tools/get_optimized_context", | |
| json={"agent_id": self.agent_id, "context": context}, | |
| ) | |
| return response.json() | |
| async def call_vllm( | |
| self, | |
| prompt: str, | |
| thinking: bool | None = None, | |
| ) -> tuple[str, float]: | |
| """ | |
| Call vLLM for completion with optional thinking mode. | |
| Args: | |
| prompt: The input prompt | |
| thinking: Override thinking mode (default: self.thinking) | |
| Returns: | |
| tuple of (response_text, ttft_ms) | |
| """ | |
| use_thinking = thinking if thinking is not None else self.thinking | |
| start = time.perf_counter() | |
| payload = { | |
| "model": settings.vllm_model, | |
| "messages": [{"role": "user", "content": prompt}], | |
| "max_tokens": 512, | |
| "temperature": 0 if not use_thinking else 0.6, | |
| "top_p": 0.95 if use_thinking else 1.0, | |
| "extra_body": { | |
| "thinking": use_thinking, | |
| }, | |
| } | |
| async with httpx.AsyncClient(timeout=60.0) as client: | |
| r = await client.post( | |
| f"{settings.vllm_base_url}/v1/chat/completions", | |
| json=payload, | |
| ) | |
| r.raise_for_status() | |
| ttft_ms = (time.perf_counter() - start) * 1000 | |
| content = r.json()["choices"][0]["message"]["content"] | |
| return content, ttft_ms |