File size: 5,248 Bytes
5e0532d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from openai import AsyncOpenAI
from app.core.config import settings

class LLMService:
    def __init__(self):
        self.client = AsyncOpenAI(
            base_url=settings.LLM_BASE_URL,
            api_key=settings.LLM_API_KEY
        )
        self.is_offline = False # Cache offline status to avoid repeated timeouts

    async def generate_response(self, message: str, system_prompt: str = settings.SYSTEM_PROMPT, tools: list = None) -> dict:
        if self.is_offline:
            return self._get_mock_swarm_response(message, system_prompt, tools)
            
        try:
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": message}
            ]
            
            kwargs = {
                "model": settings.MODEL_NAME,
                "messages": messages,
                "temperature": 0.7
            }
            
            if tools:
                kwargs["tools"] = tools
                kwargs["tool_choice"] = "auto"

            completion = await self.client.chat.completions.create(**kwargs)
            
            choice = completion.choices[0].message
            return {
                "content": choice.content or "",
                "tool_calls": getattr(choice, "tool_calls", None)
            }
            
        except Exception as e:
            self.is_offline = True
            print(f"LLM Connectivity failed: {str(e)}. Switching to MOCK mode.")
            return self._get_mock_swarm_response(message, system_prompt, tools)

    def _get_mock_swarm_response(self, message: str, system_prompt: str, tools: list) -> dict:
        """
        Simulates agent handoffs and tool calling based on keywords.
        Used for verification when Ollama is offline.
        """
        msg_lower = message.lower()
        
        # Check for Episodic Memory in system prompt
        memory_insight = ""
        if "Relevant past insights:" in system_prompt:
            # Extract the first insight for the mock response
            parts = system_prompt.split("Relevant past insights:")
            if len(parts) > 1:
                memory_insight = parts[1].split("\n")[1].strip("- ").strip()

        # 1. RLM / REPL Mocking
        if "Python script" in system_prompt or "calculate" in msg_lower:
            return {
                "content": "```python\n# Simulated reasoning code\ndate1 = -586\ndate2 = 70\nprint(f'Total span: {date2 - date1} years')\n```",
                "tool_calls": None
            }

        # 2. Check for Handoff Keywords
        if tools:
            tool_names = [t["function"]["name"] for t in tools]
            
            if "transfer_to_theologian" in tool_names and ("bible" in msg_lower or "genesis" in msg_lower or "study" in msg_lower):
                return {
                    "content": f"I see we previously talked about {memory_insight}. I will hand you over to our Theologian for a deeper Bible study.",
                    "tool_calls": [type('ToolCall', (), {
                        "id": "mock_handoff_1",
                        "function": type('Func', (), {"name": "transfer_to_theologian", "arguments": "{}"})
                    })]
                }
            
            if "transfer_to_healer" in tool_names and ("sad" in msg_lower or "prayer" in msg_lower or "help" in msg_lower):
                return {
                    "content": f"I remember you were feeling {memory_insight} earlier. I will connect you with our Healer for prayer.",
                    "tool_calls": [type('ToolCall', (), {
                        "id": "mock_handoff_2",
                        "function": type('Func', (), {"name": "transfer_to_healer", "arguments": "{}"})
                    })]
                }

        response_content = f"[MOCK MODE] I am processing your message: '{message}'."
        if memory_insight:
            response_content += f" I remember you mentioned: '{memory_insight}'."
        
        return {
            "content": response_content,
            "tool_calls": None
        }


    async def get_embedding(self, text: str) -> list[float]:
        if self.is_offline:
            return self._get_mock_embedding(text)
            
        try:
            response = await self.client.embeddings.create(
                model=settings.MODEL_NAME, 
                input=text
            )
            return response.data[0].embedding
        except Exception as e:
            # First failure sets the flag for this session/instance
            self.is_offline = True
            print(f"Embedding connectivity failed: {str(e)}. Switching to MOCK mode for this session.")
            return self._get_mock_embedding(text)

    def _get_mock_embedding(self, text: str, dim: int = 1536) -> list[float]:
        """
        Creates a deterministic sparse embedding based on word hashing.
        Allows basic keyword matching to work even without a real LLM.
        """
        vec = [0.0] * dim
        words = text.lower().split()
        for word in words:
            # Simple hash to map word to index
            idx = sum(ord(c) for c in word) % dim
            vec[idx] = 1.0
        return vec

llm_service = LLMService()