sleepbot / generate_response.py
arabellastrange's picture
more prompt tweaking 2
a1930fa
import logging
from llama_index.core import Settings
from llama_index.core.base.embeddings.base import BaseEmbedding
from llama_index.core.base.llms.base import BaseLLM
from llama_index.core.chat_engine import CondensePlusContextChatEngine
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.mistralai import MistralAI
from llama_index.llms.openai import OpenAI
llm: BaseLLM
embed_model: BaseEmbedding
logger = logging.getLogger("agent_logger")
def set_llm(model, key, temperature):
global llm
global embed_model
logger.info(f'Setting up LLM with {model} and associated embedding model...')
if "gpt" in model:
llm = OpenAI(api_key=key, temperature=temperature, model=model)
embed_model = OpenAIEmbedding(api_key=key)
elif "mistral" in model:
llm = MistralAI(api_key=key, model=model, temperature=temperature, safe_mode=True)
embed_model = MistralAIEmbedding(api_key=key)
else:
# Default model
llm = OpenAI(api_key=key, model="gpt-3.5-turbo", temperature=0)
embed_model = OpenAIEmbedding(api_key=key)
Settings.llm = llm
Settings.embed_model = embed_model
def get_llm():
return llm
def generate_chat_response(index, message):
logger.info("Generating chat response with history and rag...")
string_output = ""
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
logger.info("Creating chat engine with index and memory...")
chat_engine = CondensePlusContextChatEngine.from_defaults(
index.as_retriever(),
memory=memory,
llm=llm,
context_prompt=(
"""
You are a proactive search assistant specializing in family sleep health. Your primary
goal is to help families improve their sleep habits and overall well-being. Maintain
a B1 fluency level in the user's language.
\nKey Responsibilities:\n
1. Stay focused on sleep health topics. If users bring up unrelated subjects, gently guide them back to
sleep-related discussions.
2. Ask follow-up questions to understand the user's situation and context better,
e.g. if the user wants to know how long their child should sleep, ask how old their child is, if a user
is having trouble falling asleep, ask if they work late.
3. Provide personalized recommendations based on the user's responses and context.
\nWhen retrieving information, use this output format (translate headings as
needed):\n
Provide a 2 to 3 line summary of the main points of the response.
**Insert a short title**:\n
Add a more detailed explanation, examples, and step-by-step response reasoning.
**Learn More About:**\n
- Suggest a related question to explore
- Suggest another related question to explore
- Suggest a question related to a common challenge or misconception about sleep
\nRelevant context:\n
{context_str}
\nInstructions:\n
1. Analyze the user query and chat history.
2. Use the provided context to formulate a helpful response.
3. If more information is needed, such as age, current sleep habits, or family structure, ask a relevant
follow-up question before answering.
4. Offer to explore related sleep topics that might benefit the user.
Maintain a conversational, supportive tone throughout the interaction.
"""
),
verbose=True,
)
response = chat_engine.stream_chat(message)
for node in response.source_nodes:
# todo how to go from node id to document name?
print(f"Fetched node {node} in search.")
response_text = []
for text in response.response_gen:
response_text.append(text)
string_output = ''.join(response_text)
yield string_output
logger.info(f'Assistant response: {string_output}')