File size: 4,274 Bytes
ecd09c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f933cab
 
 
 
 
 
 
a1930fa
93b8c7f
 
f933cab
3206ace
f933cab
26ff597
93b8c7f
26ff597
3206ace
93b8c7f
26ff597
a1930fa
f933cab
 
 
 
 
93b8c7f
 
a1930fa
f933cab
 
ecd09c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import logging

from llama_index.core import Settings
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.core.base.llms.base import BaseLLM
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI

llm: BaseLLM
embed_model: BaseEmbedding
logger = logging.getLogger("agent_logger")


def set_llm(model, key, temperature):
    global llm
    global embed_model

    logger.info(f'Setting up LLM with {model} and associated embedding model...')

    if "gpt" in model:
        llm = OpenAI(api_key=key, temperature=temperature, model=model)
        embed_model = OpenAIEmbedding(api_key=key)
    elif "mistral" in model:
        llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
        embed_model = MistralAIEmbedding(api_key=key)
    else:
        # Default model
        llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
        embed_model = OpenAIEmbedding(api_key=key)

    Settings.llm = llm
    Settings.embed_model = embed_model


def get_llm():
    return llm


def generate_chat_response(index, message):
    logger.info("Generating chat response with history and rag...")
    string_output = ""

    memory = ChatMemoryBuffer.from_defaults(token_limit=3900)

    logger.info("Creating chat engine with index and memory...")
    chat_engine = CondensePlusContextChatEngine.from_defaults(
        index.as_retriever(),
        memory=memory,
        llm=llm,
        context_prompt=(
            """
            You are a proactive search assistant specializing in family sleep health. Your primary 
            goal is to help families improve their sleep habits and overall well-being. Maintain
            a B1 fluency level in the user's language. 
            \nKey Responsibilities:\n 
                1. Stay focused on sleep health topics. If users bring up unrelated subjects, gently guide them back to 
                    sleep-related discussions. 
                2. Ask follow-up questions to understand the user's situation and context better, 
                e.g. if the user wants to know how long their child should sleep, ask how old their child is, if a user
                is having trouble falling asleep, ask if they work late.
                3. Provide personalized recommendations based on the user's responses and context. 
            \nWhen retrieving information, use this output format (translate headings as 
            needed):\n
                Provide a 2 to 3 line summary of the main points of the response.
                **Insert a short title**:\n
                Add a more detailed explanation, examples, and step-by-step response reasoning.
                **Learn More About:**\n
                    - Suggest a related question to explore
                    - Suggest another related question to explore
                    - Suggest a question related to a common challenge or misconception about sleep                            
        \nRelevant context:\n
            {context_str}     
        \nInstructions:\n
            1. Analyze the user query and chat history.
            2. Use the provided context to formulate a helpful response.
            3. If more information is needed, such as age, current sleep habits, or family structure, ask a relevant 
               follow-up question before answering.
            4. Offer to explore related sleep topics that might benefit the user.    
        Maintain a conversational, supportive tone throughout the interaction.
        """
        ),
        verbose=True,
    )

    response = chat_engine.stream_chat(message)

    for node in response.source_nodes:
        # todo how to go from node id to document name?
        print(f"Fetched node {node} in search.")

    response_text = []
    for text in response.response_gen:
        response_text.append(text)
        string_output = ''.join(response_text)
        yield string_output

    logger.info(f'Assistant response: {string_output}')