ORA / app /services /llm.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
from openai import AsyncOpenAI
from app.core.config import settings
class LLMService:
def __init__(self):
self.client = AsyncOpenAI(
base_url=settings.LLM_BASE_URL,
api_key=settings.LLM_API_KEY
)
self.is_offline = False # Cache offline status to avoid repeated timeouts
async def generate_response(self, message: str, system_prompt: str = settings.SYSTEM_PROMPT, tools: list = None) -> dict:
if self.is_offline:
return self._get_mock_swarm_response(message, system_prompt, tools)
try:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
]
kwargs = {
"model": settings.MODEL_NAME,
"messages": messages,
"temperature": 0.7
}
if tools:
kwargs["tools"] = tools
kwargs["tool_choice"] = "auto"
completion = await self.client.chat.completions.create(**kwargs)
choice = completion.choices[0].message
return {
"content": choice.content or "",
"tool_calls": getattr(choice, "tool_calls", None)
}
except Exception as e:
self.is_offline = True
print(f"LLM Connectivity failed: {str(e)}. Switching to MOCK mode.")
return self._get_mock_swarm_response(message, system_prompt, tools)
def _get_mock_swarm_response(self, message: str, system_prompt: str, tools: list) -> dict:
"""
Simulates agent handoffs and tool calling based on keywords.
Used for verification when Ollama is offline.
"""
msg_lower = message.lower()
# Check for Episodic Memory in system prompt
memory_insight = ""
if "Relevant past insights:" in system_prompt:
# Extract the first insight for the mock response
parts = system_prompt.split("Relevant past insights:")
if len(parts) > 1:
memory_insight = parts[1].split("\n")[1].strip("- ").strip()
# 1. RLM / REPL Mocking
if "Python script" in system_prompt or "calculate" in msg_lower:
return {
"content": "```python\n# Simulated reasoning code\ndate1 = -586\ndate2 = 70\nprint(f'Total span: {date2 - date1} years')\n```",
"tool_calls": None
}
# 2. Check for Handoff Keywords
if tools:
tool_names = [t["function"]["name"] for t in tools]
if "transfer_to_theologian" in tool_names and ("bible" in msg_lower or "genesis" in msg_lower or "study" in msg_lower):
return {
"content": f"I see we previously talked about {memory_insight}. I will hand you over to our Theologian for a deeper Bible study.",
"tool_calls": [type('ToolCall', (), {
"id": "mock_handoff_1",
"function": type('Func', (), {"name": "transfer_to_theologian", "arguments": "{}"})
})]
}
if "transfer_to_healer" in tool_names and ("sad" in msg_lower or "prayer" in msg_lower or "help" in msg_lower):
return {
"content": f"I remember you were feeling {memory_insight} earlier. I will connect you with our Healer for prayer.",
"tool_calls": [type('ToolCall', (), {
"id": "mock_handoff_2",
"function": type('Func', (), {"name": "transfer_to_healer", "arguments": "{}"})
})]
}
response_content = f"[MOCK MODE] I am processing your message: '{message}'."
if memory_insight:
response_content += f" I remember you mentioned: '{memory_insight}'."
return {
"content": response_content,
"tool_calls": None
}
async def get_embedding(self, text: str) -> list[float]:
if self.is_offline:
return self._get_mock_embedding(text)
try:
response = await self.client.embeddings.create(
model=settings.MODEL_NAME,
input=text
)
return response.data[0].embedding
except Exception as e:
# First failure sets the flag for this session/instance
self.is_offline = True
print(f"Embedding connectivity failed: {str(e)}. Switching to MOCK mode for this session.")
return self._get_mock_embedding(text)
def _get_mock_embedding(self, text: str, dim: int = 1536) -> list[float]:
"""
Creates a deterministic sparse embedding based on word hashing.
Allows basic keyword matching to work even without a real LLM.
"""
vec = [0.0] * dim
words = text.lower().split()
for word in words:
# Simple hash to map word to index
idx = sum(ord(c) for c in word) % dim
vec[idx] = 1.0
return vec
llm_service = LLMService()