from openai import AsyncOpenAI from app.core.config import settings class LLMService: def __init__(self): self.client = AsyncOpenAI( base_url=settings.LLM_BASE_URL, api_key=settings.LLM_API_KEY ) self.is_offline = False # Cache offline status to avoid repeated timeouts async def generate_response(self, message: str, system_prompt: str = settings.SYSTEM_PROMPT, tools: list = None) -> dict: if self.is_offline: return self._get_mock_swarm_response(message, system_prompt, tools) try: messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": message} ] kwargs = { "model": settings.MODEL_NAME, "messages": messages, "temperature": 0.7 } if tools: kwargs["tools"] = tools kwargs["tool_choice"] = "auto" completion = await self.client.chat.completions.create(**kwargs) choice = completion.choices[0].message return { "content": choice.content or "", "tool_calls": getattr(choice, "tool_calls", None) } except Exception as e: self.is_offline = True print(f"LLM Connectivity failed: {str(e)}. Switching to MOCK mode.") return self._get_mock_swarm_response(message, system_prompt, tools) def _get_mock_swarm_response(self, message: str, system_prompt: str, tools: list) -> dict: """ Simulates agent handoffs and tool calling based on keywords. Used for verification when Ollama is offline. """ msg_lower = message.lower() # Check for Episodic Memory in system prompt memory_insight = "" if "Relevant past insights:" in system_prompt: # Extract the first insight for the mock response parts = system_prompt.split("Relevant past insights:") if len(parts) > 1: memory_insight = parts[1].split("\n")[1].strip("- ").strip() # 1. RLM / REPL Mocking if "Python script" in system_prompt or "calculate" in msg_lower: return { "content": "```python\n# Simulated reasoning code\ndate1 = -586\ndate2 = 70\nprint(f'Total span: {date2 - date1} years')\n```", "tool_calls": None } # 2. Check for Handoff Keywords if tools: tool_names = [t["function"]["name"] for t in tools] if "transfer_to_theologian" in tool_names and ("bible" in msg_lower or "genesis" in msg_lower or "study" in msg_lower): return { "content": f"I see we previously talked about {memory_insight}. I will hand you over to our Theologian for a deeper Bible study.", "tool_calls": [type('ToolCall', (), { "id": "mock_handoff_1", "function": type('Func', (), {"name": "transfer_to_theologian", "arguments": "{}"}) })] } if "transfer_to_healer" in tool_names and ("sad" in msg_lower or "prayer" in msg_lower or "help" in msg_lower): return { "content": f"I remember you were feeling {memory_insight} earlier. I will connect you with our Healer for prayer.", "tool_calls": [type('ToolCall', (), { "id": "mock_handoff_2", "function": type('Func', (), {"name": "transfer_to_healer", "arguments": "{}"}) })] } response_content = f"[MOCK MODE] I am processing your message: '{message}'." if memory_insight: response_content += f" I remember you mentioned: '{memory_insight}'." return { "content": response_content, "tool_calls": None } async def get_embedding(self, text: str) -> list[float]: if self.is_offline: return self._get_mock_embedding(text) try: response = await self.client.embeddings.create( model=settings.MODEL_NAME, input=text ) return response.data[0].embedding except Exception as e: # First failure sets the flag for this session/instance self.is_offline = True print(f"Embedding connectivity failed: {str(e)}. Switching to MOCK mode for this session.") return self._get_mock_embedding(text) def _get_mock_embedding(self, text: str, dim: int = 1536) -> list[float]: """ Creates a deterministic sparse embedding based on word hashing. Allows basic keyword matching to work even without a real LLM. """ vec = [0.0] * dim words = text.lower().split() for word in words: # Simple hash to map word to index idx = sum(ord(c) for c in word) % dim vec[idx] = 1.0 return vec llm_service = LLMService()