File size: 7,298 Bytes
b7f3dcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
058ae1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7f3dcc
 
 
 
 
 
 
058ae1e
b7f3dcc
 
 
 
 
058ae1e
b7f3dcc
058ae1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7f3dcc
058ae1e
 
b7f3dcc
058ae1e
 
b7f3dcc
058ae1e
 
 
 
 
 
 
 
 
 
 
 
 
 
5713c03
058ae1e
 
5713c03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7f3dcc
5713c03
b7f3dcc
5713c03
 
 
 
 
 
 
 
 
 
 
 
b7f3dcc
 
5713c03
 
 
 
 
 
 
 
 
 
 
 
 
058ae1e
 
5713c03
058ae1e
b7f3dcc
 
5713c03
 
 
b7f3dcc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
import json
import re
import time
import logging
from transformers import TextIteratorStreamer
from llm_module import get_llm, detect_language
from oracle_module import get_oracle_data

logger = logging.getLogger("app.agent")

def build_agent_prompt(query, language="English", short_answers=False):
    style = "Be concise." if short_answers else ""
    today = time.strftime("%Y-%m-%d")
    return f"""You are Sage 6.5, a soulful Oracle Intermediary.
Current Date: {today}.

Available Tool: 'oracle_consultation' (topic, name, date_str).

STRICTURES:
1. Respond in {language}.
2. Provide reasoning before generating the JSON.
3. Utilize the 'oracle_consultation' capability for all databased queries.
4. INTENT DETECTION GUIDELINES:
   - **NAME**: Isolate the user's name from the greeting (e.g., "Julian").
   - **TOPIC**: Identify the core subject matter. For input "Thema: Liebe", the topic is "Liebe".
   - **DATE**: Default to "today" unless a specific date is provided.

EXAMPLES:
User: "Ich bin Julian"
Assistant: "Greetings Julian. I will consult the Oracle for you."
<tool_call>{{"name": "oracle_consultation", "arguments": {{ "topic": "General", "date_str": "today", "name": "Julian" }}}}</tool_call>

User: "Thema: Liebe"
Assistant: "I shall ask the Oracle about Love."
<tool_call>{{"name": "oracle_consultation", "arguments": {{ "topic": "Liebe", "date_str": "today", "name": "Seeker" }}}}</tool_call>

User: "Topic: Future"
Assistant: "Consulting the Oracle regarding the Future."
<tool_call>{{"name": "oracle_consultation", "arguments": {{ "topic": "Future", "date_str": "today", "name": "Seeker" }}}}</tool_call>

STRICT FORMAT:
To use the Oracle, output this JSON wrapped in tags:
<tool_call>{{"name": "oracle_consultation", "arguments": {{ "topic": "KEYWORD", "date_str": "YYYY-MM-DD", "name": "Name" }}}}</tool_call>
"""

def compress_history(history, max_turns=5):
    if len(history) > max_turns * 2:
        return history[-(max_turns * 2):]
    return history

def chat_agent_stream(query, history, user_lang=None, short_answers=False):
    model, processor = get_llm()
    lang = user_lang or detect_language(query)
    system_instruction = build_agent_prompt(query, language=lang, short_answers=short_answers)
    
    clean_history = compress_history(history)
    messages = []
    
    # Prepend system instruction
    intro = f"SYSTEM: {system_instruction}\n\n"
    
    if not clean_history:
        messages.append({"role": "user", "content": f"{intro}{query}"})
    else:
        first_role = "assistant" if clean_history[0].get("role") == "assistant" else "user"
        if first_role == "assistant":
             messages.append({"role": "user", "content": f"{intro}Greetings."})
        
        for turn in clean_history:
            role = "assistant" if turn.get("role") == "assistant" else "user"
            content = turn.get("content", "")
            if not content: continue
            
            if not messages:
                messages.append({"role": "user", "content": f"{intro}{content}"})
            elif messages[-1]["role"] == role:
                messages[-1]["content"] += f"\n{content}"
            else:
                messages.append({"role": role, "content": content})

        if messages[-1]["role"] == "assistant":
            messages.append({"role": "user", "content": query})
        else:
            if intro not in messages[0]["content"]: messages[0]["content"] = f"{intro}{messages[0]['content']}"
            messages[-1]["content"] += f"\n{query}"

    # Standard "LangChain" Loop (Model decides)
    for turn_idx in range(3):
        import sys
        sys.stderr.write(f"DEBUG: Messages list for template: {json.dumps(messages)}\n")
        sys.stderr.flush()
        input_ids = processor.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
        streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
         
        from threading import Thread
        thread = Thread(target=model.generate, kwargs={"input_ids": input_ids, "streamer": streamer, "max_new_tokens": 1024, "do_sample": True, "temperature": 0.7})
        thread.start()

        current_text = ""
        is_tool = False
        current_text = ""
        for new_text in streamer:
            current_text += new_text
            # Identify if we are entering a tool call
            # If we see <tool_call>, we stop yielding the part after it.
            # We yield the CLEAN part of current_text.
            
            display_text = current_text
            if "<tool_call>" in current_text:
                display_text = current_text.split("<tool_call>")[0]
            
            # Additional check to potential partial matches like "<tool" at the very end
            # (Optional, but safe: if current_text ends with "<", don't yield that last char yet)
            # For simplicity, we just yield what we have, assuming the tag arrives fast.
            # But strictly:
            if "<" in display_text and "tool_call>" not in display_text:
                 # It might be starting a tag? 
                 # Let's just yield the clean split.
                 pass

            yield display_text.strip()
        
        # Post-generation logic
        tool_data = None
        tool_start = current_text.find("<tool_call>")
        tool_end = current_text.find("</tool_call>")
        
        if tool_start != -1 and tool_end != -1:
             try:
                 json_str = current_text[tool_start + len("<tool_call>"):tool_end]
                 tool_data = json.loads(json_str)
             except: pass

        if tool_data and "arguments" in tool_data:
            # Do NOT yield consulting status. Just do the work.
            args = tool_data["arguments"]
            res = get_oracle_data(name=args.get("name", "Seeker"), topic=args.get("topic", ""), date_str=args.get("date_str", "today"))
            
            # We Append only the response from the next turn? 
            # The current 'display_text' is the assistant's intro.
            # We need to finalize this turn and start the next?
            # Actually, the efficient way is to append the Tool Result to history and continue loop if needed.
            # But here `messages` logic appends it.
            
            # Ensure we don't have the tool call in messages history for the UI (consumed)
            # But the model needs it?
            # We should append the FULL text (with tool call) to `messages` so the model knows it called it?
            # Yes. messages[-1] is assistant.
            # Check if assistant message exists
            if messages[-1]["role"] == "assistant":
                 messages[-1]["content"] = current_text # Store FULL thought process including tool call for context
            else:
                 messages.append({"role": "assistant", "content": current_text})

            messages.append({"role": "user", "content": f"SYSTEM: The Oracle has spoken. Wisdom: {json.dumps(res)}\nInterpret this soulfuly."})
            yield "__TURN_END__"
        else:
            # Final yield of clean text
            final_display = current_text.split("<tool_call>")[0].strip()
            yield final_display
            break