File size: 5,350 Bytes
5d7e1ed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import logging
import re
import time
from typing import List, Optional, Tuple, Literal

from config import GROQ_API_KEYS, GROQ_BRAIN_MODEL

logger = logging.getLogger("J.A.R.V.I.S")

QueryType = Literal["general", "realtime"]
MAX_CONTEXT_TURNS = 6
MAX_MESSAGE_PREVIEW = 500
REASONING_GENERAL = "Answerable from knowledge and context"
REASONING_REALTIME = "Needs live web search"
REASONING_DEFAULT = "Brain unavailable; defaulting to realtime"
REASONING_UNCLEAR = "Unclear; defaulting to realtime"

_BRAIN_SYSTEM_PROMPT = """You are a query classifier for an AI assistant. Your ONLY job is to decide whether a user's message needs LIVE WEB SEARCH or not.



Output EXACTLY one word: either "general" or "realtime".



- general: ONLY questions that are purely from static knowledge, learning data, or conversation. Examples: "Tell me a joke", "What did I ask you before?", "Open YouTube", "Write a poem about cats", "How do I improve my coding?", "What is the capital of France?", casual chit-chat. NO questions about people, current events, or things that could change.



- realtime: ALWAYS use realtime for:

  * ANY question about a person (famous or not): "Who is Elon Musk?", "Tell me about [person]", "What is [name] known for?", "Who is that actor?" — the LLM has no real-time data; web search finds current info and may find info on lesser-known people.

  * Anything that could have changed: news, weather, stock prices, sports scores, elections, "latest", "current", "today", "recent", "now".

  * Factual lookups where real-time data would be better: events, companies, products, releases, versions.



STRONG RULE: If the question is about a person (who, what, tell me about, etc.) → ALWAYS "realtime". The LLM cannot know current facts; web search can.



When in doubt, prefer "realtime" — it's better to search when not needed than to miss current information.



Output ONLY the word. No explanation, no punctuation, no other text."""

class BrainService:
    def __init__(self):
        self._llms = []
        if GROQ_API_KEYS:
            try:
                from langchain_groq import ChatGroq
                self._llms = [
                    ChatGroq(
                        groq_api_key=key,
                        model_name=GROQ_BRAIN_MODEL,
                        temperature=0.0,
                        max_tokens=20,
                        request_timeout=10,
                    )
                    for key in GROQ_API_KEYS
                ]
                logger.info("[BRAIN] Groq brain initialized (model: %s) with %d key(s)", GROQ_BRAIN_MODEL, len(self._llms))
            except Exception as e:
                logger.warning("[BRAIN] Failed to create Groq brain: %s", e)
        if not self._llms:
            logger.warning("[BRAIN] No API keys. Classification will default to realtime.")

    def classify(

        self,

        user_message: str,

        chat_history: Optional[List[Tuple[str, str]]] = None,

        key_index: int = 0,

    ) -> Tuple[QueryType, str, int]:
        if not self._llms:
            return ("realtime", REASONING_DEFAULT, 0)

        context_lines = []
        if chat_history:
            for u, a in chat_history[-MAX_CONTEXT_TURNS:]:
                u_preview = (u or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(u or "") > MAX_MESSAGE_PREVIEW else "")
                a_preview = (a or "")[:MAX_MESSAGE_PREVIEW] + ("..." if len(a or "") > MAX_MESSAGE_PREVIEW else "")
                context_lines.append(f"User: {u_preview}")
                context_lines.append(f"Assistant: {a_preview}")
        context_block = "\n".join(context_lines) if context_lines else "(No prior conversation)"
        msg_preview = (user_message or "")[:MAX_MESSAGE_PREVIEW]
        user_content = f"""Conversation so far:

{context_block}



Current user message: {msg_preview}



Classify the current message. Output ONLY: general or realtime"""

        t0 = time.perf_counter()
        try:
            from langchain_core.messages import SystemMessage, HumanMessage
            idx = key_index % len(self._llms)
            llm = self._llms[idx]
            response = llm.invoke([
                SystemMessage(content=_BRAIN_SYSTEM_PROMPT),
                HumanMessage(content=user_content),
            ])
            text = (response.content or "").strip().lower()
        except Exception as e:
            elapsed_ms = int((time.perf_counter() - t0) * 1000)
            logger.warning("[BRAIN] Groq error after %d ms: %s. Defaulting to realtime.", elapsed_ms, e)
            return ("realtime", f"API error: {str(e)[:60]}", elapsed_ms)

        elapsed_ms = int((time.perf_counter() - t0) * 1000)
        if re.search(r"\brealtime\b", text):
            logger.info("[BRAIN] Groq (key #%d) returned realtime in %d ms", key_index + 1, elapsed_ms)
            return ("realtime", REASONING_REALTIME, elapsed_ms)
        if re.search(r"\bgeneral\b", text):
            logger.info("[BRAIN] Groq (key #%d) returned general in %d ms", key_index + 1, elapsed_ms)
            return ("general", REASONING_GENERAL, elapsed_ms)
        logger.warning("[BRAIN] Unexpected output: %r in %d ms. Defaulting to realtime.", text[:100], elapsed_ms)
        return ("realtime", REASONING_UNCLEAR, elapsed_ms)