Spaces:
Sleeping
Sleeping
| from openai import AsyncOpenAI | |
| from app.core.config import settings | |
| class LLMService: | |
| def __init__(self): | |
| self.client = AsyncOpenAI( | |
| base_url=settings.LLM_BASE_URL, | |
| api_key=settings.LLM_API_KEY | |
| ) | |
| self.is_offline = False # Cache offline status to avoid repeated timeouts | |
| async def generate_response(self, message: str, system_prompt: str = settings.SYSTEM_PROMPT, tools: list = None) -> dict: | |
| if self.is_offline: | |
| return self._get_mock_swarm_response(message, system_prompt, tools) | |
| try: | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": message} | |
| ] | |
| kwargs = { | |
| "model": settings.MODEL_NAME, | |
| "messages": messages, | |
| "temperature": 0.7 | |
| } | |
| if tools: | |
| kwargs["tools"] = tools | |
| kwargs["tool_choice"] = "auto" | |
| completion = await self.client.chat.completions.create(**kwargs) | |
| choice = completion.choices[0].message | |
| return { | |
| "content": choice.content or "", | |
| "tool_calls": getattr(choice, "tool_calls", None) | |
| } | |
| except Exception as e: | |
| self.is_offline = True | |
| print(f"LLM Connectivity failed: {str(e)}. Switching to MOCK mode.") | |
| return self._get_mock_swarm_response(message, system_prompt, tools) | |
| def _get_mock_swarm_response(self, message: str, system_prompt: str, tools: list) -> dict: | |
| """ | |
| Simulates agent handoffs and tool calling based on keywords. | |
| Used for verification when Ollama is offline. | |
| """ | |
| msg_lower = message.lower() | |
| # Check for Episodic Memory in system prompt | |
| memory_insight = "" | |
| if "Relevant past insights:" in system_prompt: | |
| # Extract the first insight for the mock response | |
| parts = system_prompt.split("Relevant past insights:") | |
| if len(parts) > 1: | |
| memory_insight = parts[1].split("\n")[1].strip("- ").strip() | |
| # 1. RLM / REPL Mocking | |
| if "Python script" in system_prompt or "calculate" in msg_lower: | |
| return { | |
| "content": "```python\n# Simulated reasoning code\ndate1 = -586\ndate2 = 70\nprint(f'Total span: {date2 - date1} years')\n```", | |
| "tool_calls": None | |
| } | |
| # 2. Check for Handoff Keywords | |
| if tools: | |
| tool_names = [t["function"]["name"] for t in tools] | |
| if "transfer_to_theologian" in tool_names and ("bible" in msg_lower or "genesis" in msg_lower or "study" in msg_lower): | |
| return { | |
| "content": f"I see we previously talked about {memory_insight}. I will hand you over to our Theologian for a deeper Bible study.", | |
| "tool_calls": [type('ToolCall', (), { | |
| "id": "mock_handoff_1", | |
| "function": type('Func', (), {"name": "transfer_to_theologian", "arguments": "{}"}) | |
| })] | |
| } | |
| if "transfer_to_healer" in tool_names and ("sad" in msg_lower or "prayer" in msg_lower or "help" in msg_lower): | |
| return { | |
| "content": f"I remember you were feeling {memory_insight} earlier. I will connect you with our Healer for prayer.", | |
| "tool_calls": [type('ToolCall', (), { | |
| "id": "mock_handoff_2", | |
| "function": type('Func', (), {"name": "transfer_to_healer", "arguments": "{}"}) | |
| })] | |
| } | |
| response_content = f"[MOCK MODE] I am processing your message: '{message}'." | |
| if memory_insight: | |
| response_content += f" I remember you mentioned: '{memory_insight}'." | |
| return { | |
| "content": response_content, | |
| "tool_calls": None | |
| } | |
| async def get_embedding(self, text: str) -> list[float]: | |
| if self.is_offline: | |
| return self._get_mock_embedding(text) | |
| try: | |
| response = await self.client.embeddings.create( | |
| model=settings.MODEL_NAME, | |
| input=text | |
| ) | |
| return response.data[0].embedding | |
| except Exception as e: | |
| # First failure sets the flag for this session/instance | |
| self.is_offline = True | |
| print(f"Embedding connectivity failed: {str(e)}. Switching to MOCK mode for this session.") | |
| return self._get_mock_embedding(text) | |
| def _get_mock_embedding(self, text: str, dim: int = 1536) -> list[float]: | |
| """ | |
| Creates a deterministic sparse embedding based on word hashing. | |
| Allows basic keyword matching to work even without a real LLM. | |
| """ | |
| vec = [0.0] * dim | |
| words = text.lower().split() | |
| for word in words: | |
| # Simple hash to map word to index | |
| idx = sum(ord(c) for c in word) % dim | |
| vec[idx] = 1.0 | |
| return vec | |
| llm_service = LLMService() | |