Spaces:
Sleeping
Sleeping
| import logging | |
| import re | |
| import time | |
| from typing import List, Optional, Tuple, Literal | |
| from config import GROQ_API_KEYS, GROQ_BRAIN_MODEL | |
| logger = logging.getLogger("J.A.R.V.I.S") | |
| QueryType = Literal["general", "realtime"] | |
| MAX_CONTEXT_TURNS = 6 | |
| MAX_MESSAGE_PREVIEW = 500 | |
| REASONING_GENERAL = "Answerable from knowledge and context" | |
| REASONING_REALTIME = "Needs live web search" | |
| REASONING_DEFAULT = "Brain unavailable; defaulting to realtime" | |
| REASONING_UNCLEAR = "Unclear; defaulting to realtime" | |
| _BRAIN_SYSTEM_PROMPT = """You are a query classifier for an AI assistant. Your ONLY job is to decide whether a user's message needs LIVE WEB SEARCH or not. | |
| Output EXACTLY one word: either "general" or "realtime". | |
| - general: ONLY questions that are purely from static knowledge, learning data, or conversation. Examples: "Tell me a joke", "What did I ask you before?", "Open YouTube", "Write a poem about cats", "How do I improve my coding?", "What is the capital of France?", casual chit-chat. NO questions about people, current events, or things that could change. | |
| - realtime: ALWAYS use realtime for: | |
| * ANY question about a person (famous or not): "Who is Elon Musk?", "Tell me about [person]", "What is [name] known for?", "Who is that actor?" β the LLM has no real-time data; web search finds current info and may find info on lesser-known people. | |
| * Anything that could have changed: news, weather, stock prices, sports scores, elections, "latest", "current", "today", "recent", "now". | |
| * Factual lookups where real-time data would be better: events, companies, products, releases, versions. | |
| STRONG RULE: If the question is about a person (who, what, tell me about, etc.) β ALWAYS "realtime". The LLM cannot know current facts; web search can. | |
| When in doubt, prefer "realtime" β it's better to search when not needed than to miss current information. | |
| Output ONLY the word. No explanation, no punctuation, no other text.""" | |
| class BrainService: | |
| def __init__(self): | |
| self._llms = [] | |
| if GROQ_API_KEYS: | |
| try: | |
| from langchain_groq import ChatGroq | |
| self._llms = [ | |
| ChatGroq( | |
| groq_api_key=key, | |
| model_name=GROQ_BRAIN_MODEL, | |
| temperature=0.0, | |
| max_tokens=20, | |
| request_timeout=10, | |
| ) | |
| for key in GROQ_API_KEYS | |
| ] | |
| logger.info("[BRAIN] Groq brain initialized (model: %s) with %d key(s)", GROQ_BRAIN_MODEL, len(self._llms)) | |
| except Exception as e: | |
| logger.warning("[BRAIN] Failed to create Groq brain: %s", e) | |
| if not self._llms: | |
| logger.warning("[BRAIN] No API keys. Classification will default to realtime.") | |
| def classify( | |
| self, | |
| user_message: str, | |
| chat_history: Optional[List[Tuple[str, str]]] = None, | |
| key_index: int = 0, | |
| ) -> Tuple[QueryType, str, int]: | |
| if not self._llms: | |
| return ("realtime", REASONING_DEFAULT, 0) | |
| context_lines = [] | |
| if chat_history: | |
| for u, a in chat_history[-MAX_CONTEXT_TURNS:]: | |
| u_preview = (u or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(u or "") > MAX_MESSAGE_PREVIEW else "") | |
| a_preview = (a or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(a or "") > MAX_MESSAGE_PREVIEW else "") | |
| context_lines.append(f"User: {u_preview}") | |
| context_lines.append(f"Assistant: {a_preview}") | |
| context_block = "\n".join(context_lines) if context_lines else "(No prior conversation)" | |
| msg_preview = (user_message or "")[:MAX_MESSAGE_PREVIEW] | |
| user_content = f"""Conversation so far: | |
| {context_block} | |
| Current user message: {msg_preview} | |
| Classify the current message. Output ONLY: general or realtime""" | |
| t0 = time.perf_counter() | |
| try: | |
| from langchain_core.messages import SystemMessage, HumanMessage | |
| idx = key_index % len(self._llms) | |
| llm = self._llms[idx] | |
| response = llm.invoke([ | |
| SystemMessage(content=_BRAIN_SYSTEM_PROMPT), | |
| HumanMessage(content=user_content), | |
| ]) | |
| text = (response.content or "").strip().lower() | |
| except Exception as e: | |
| elapsed_ms = int((time.perf_counter() - t0) * 1000) | |
| logger.warning("[BRAIN] Groq error after %d ms: %s. Defaulting to realtime.", elapsed_ms, e) | |
| return ("realtime", f"API error: {str(e)[:60]}", elapsed_ms) | |
| elapsed_ms = int((time.perf_counter() - t0) * 1000) | |
| if re.search(r"\brealtime\b", text): | |
| logger.info("[BRAIN] Groq (key #%d) returned realtime in %d ms", key_index + 1, elapsed_ms) | |
| return ("realtime", REASONING_REALTIME, elapsed_ms) | |
| if re.search(r"\bgeneral\b", text): | |
| logger.info("[BRAIN] Groq (key #%d) returned general in %d ms", key_index + 1, elapsed_ms) | |
| return ("general", REASONING_GENERAL, elapsed_ms) | |
| logger.warning("[BRAIN] Unexpected output: %r in %d ms. Defaulting to realtime.", text[:100], elapsed_ms) | |
| return ("realtime", REASONING_UNCLEAR, elapsed_ms) |