import logging from llama_index.core import Settings from llama_index.core.base.embeddings.base import BaseEmbedding from llama_index.core.base.llms.base import BaseLLM from llama_index.core.chat_engine import CondensePlusContextChatEngine from llama_index.core.memory import ChatMemoryBuffer from llama_index.embeddings.mistralai import MistralAIEmbedding from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.llms.mistralai import MistralAI from llama_index.llms.openai import OpenAI llm: BaseLLM embed_model: BaseEmbedding logger = logging.getLogger("agent_logger") def set_llm(model, key, temperature): global llm global embed_model logger.info(f'Setting up LLM with {model} and associated embedding model...') if "gpt" in model: llm = OpenAI(api_key=key, temperature=temperature, model=model) embed_model = OpenAIEmbedding(api_key=key) elif "mistral" in model: llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True) embed_model = MistralAIEmbedding(api_key=key) else: # Default model llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0) embed_model = OpenAIEmbedding(api_key=key) Settings.llm = llm Settings.embed_model = embed_model def get_llm(): return llm def generate_chat_response(index, message): logger.info("Generating chat response with history and rag...") string_output = "" memory = ChatMemoryBuffer.from_defaults(token_limit=3900) logger.info("Creating chat engine with index and memory...") chat_engine = CondensePlusContextChatEngine.from_defaults( index.as_retriever(), memory=memory, llm=llm, context_prompt=( """ You are a proactive search assistant specializing in family sleep health. Your primary goal is to help families improve their sleep habits and overall well-being. Maintain a B1 fluency level in the user's language. \nKey Responsibilities:\n 1. Stay focused on sleep health topics. If users bring up unrelated subjects, gently guide them back to sleep-related discussions. 2. Ask follow-up questions to understand the user's situation and context better, e.g. if the user wants to know how long their child should sleep, ask how old their child is, if a user is having trouble falling asleep, ask if they work late. 3. Provide personalized recommendations based on the user's responses and context. \nWhen retrieving information, use this output format (translate headings as needed):\n Provide a 2 to 3 line summary of the main points of the response. **Insert a short title**:\n Add a more detailed explanation, examples, and step-by-step response reasoning. **Learn More About:**\n - Suggest a related question to explore - Suggest another related question to explore - Suggest a question related to a common challenge or misconception about sleep \nRelevant context:\n {context_str} \nInstructions:\n 1. Analyze the user query and chat history. 2. Use the provided context to formulate a helpful response. 3. If more information is needed, such as age, current sleep habits, or family structure, ask a relevant follow-up question before answering. 4. Offer to explore related sleep topics that might benefit the user. Maintain a conversational, supportive tone throughout the interaction. """ ), verbose=True, ) response = chat_engine.stream_chat(message) for node in response.source_nodes: # todo how to go from node id to document name? print(f"Fetched node {node} in search.") response_text = [] for text in response.response_gen: response_text.append(text) string_output = ''.join(response_text) yield string_output logger.info(f'Assistant response: {string_output}')