Spaces:

Abdalkaderdev
/

ORA

Sleeping

ORA

File size: 5,248 Bytes

5e0532d

from openai import AsyncOpenAI
from app.core.config import settings

class LLMService:
    def __init__(self):
        self.client = AsyncOpenAI(
            base_url=settings.LLM_BASE_URL,
            api_key=settings.LLM_API_KEY
        )
        self.is_offline = False # Cache offline status to avoid repeated timeouts

    async def generate_response(self, message: str, system_prompt: str = settings.SYSTEM_PROMPT, tools: list = None) -> dict:
        if self.is_offline:
            return self._get_mock_swarm_response(message, system_prompt, tools)
            
        try:
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": message}
            ]
            
            kwargs = {
                "model": settings.MODEL_NAME,
                "messages": messages,
                "temperature": 0.7
            }
            
            if tools:
                kwargs["tools"] = tools
                kwargs["tool_choice"] = "auto"

            completion = await self.client.chat.completions.create(**kwargs)
            
            choice = completion.choices[0].message
            return {
                "content": choice.content or "",
                "tool_calls": getattr(choice, "tool_calls", None)
            }
            
        except Exception as e:
            self.is_offline = True
            print(f"LLM Connectivity failed: {str(e)}. Switching to MOCK mode.")
            return self._get_mock_swarm_response(message, system_prompt, tools)

    def _get_mock_swarm_response(self, message: str, system_prompt: str, tools: list) -> dict:
        """
        Simulates agent handoffs and tool calling based on keywords.
        Used for verification when Ollama is offline.
        """
        msg_lower = message.lower()
        
        # Check for Episodic Memory in system prompt
        memory_insight = ""
        if "Relevant past insights:" in system_prompt:
            # Extract the first insight for the mock response
            parts = system_prompt.split("Relevant past insights:")
            if len(parts) > 1:
                memory_insight = parts[1].split("\n")[1].strip("- ").strip()

        # 1. RLM / REPL Mocking
        if "Python script" in system_prompt or "calculate" in msg_lower:
            return {
                "content": "```python\n# Simulated reasoning code\ndate1 = -586\ndate2 = 70\nprint(f'Total span: {date2 - date1} years')\n```",
                "tool_calls": None
            }

        # 2. Check for Handoff Keywords
        if tools:
            tool_names = [t["function"]["name"] for t in tools]
            
            if "transfer_to_theologian" in tool_names and ("bible" in msg_lower or "genesis" in msg_lower or "study" in msg_lower):
                return {
                    "content": f"I see we previously talked about {memory_insight}. I will hand you over to our Theologian for a deeper Bible study.",
                    "tool_calls": [type('ToolCall', (), {
                        "id": "mock_handoff_1",
                        "function": type('Func', (), {"name": "transfer_to_theologian", "arguments": "{}"})
                    })]
                }
            
            if "transfer_to_healer" in tool_names and ("sad" in msg_lower or "prayer" in msg_lower or "help" in msg_lower):
                return {
                    "content": f"I remember you were feeling {memory_insight} earlier. I will connect you with our Healer for prayer.",
                    "tool_calls": [type('ToolCall', (), {
                        "id": "mock_handoff_2",
                        "function": type('Func', (), {"name": "transfer_to_healer", "arguments": "{}"})
                    })]
                }

        response_content = f"[MOCK MODE] I am processing your message: '{message}'."
        if memory_insight:
            response_content += f" I remember you mentioned: '{memory_insight}'."
        
        return {
            "content": response_content,
            "tool_calls": None
        }


    async def get_embedding(self, text: str) -> list[float]:
        if self.is_offline:
            return self._get_mock_embedding(text)
            
        try:
            response = await self.client.embeddings.create(
                model=settings.MODEL_NAME, 
                input=text
            )
            return response.data[0].embedding
        except Exception as e:
            # First failure sets the flag for this session/instance
            self.is_offline = True
            print(f"Embedding connectivity failed: {str(e)}. Switching to MOCK mode for this session.")
            return self._get_mock_embedding(text)

    def _get_mock_embedding(self, text: str, dim: int = 1536) -> list[float]:
        """
        Creates a deterministic sparse embedding based on word hashing.
        Allows basic keyword matching to work even without a real LLM.
        """
        vec = [0.0] * dim
        words = text.lower().split()
        for word in words:
            # Simple hash to map word to index
            idx = sum(ord(c) for c in word) % dim
            vec[idx] = 1.0
        return vec

llm_service = LLMService()