team-ai / backend /chain.py
peichao.dong
upate trim_messages token limit
b6dcc76
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chat_models import init_chat_model
from langchain_openai import ChatOpenAI
from langchain_core.chat_history import (
BaseChatMessageHistory,
InMemoryChatMessageHistory,
)
from langchain_core.messages import SystemMessage, HumanMessage, trim_messages
from langchain_core.runnables.history import RunnableWithMessageHistory
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from backend.models import models
first_llm = init_chat_model(
model="gpt-4o-mini",
streaming=True,
temperature=0,
configurable_fields=("model", "model_provider", "temperature", "max_tokens"),
config_prefix="first", # useful when you have a chain with multiple models
)
model = ChatOpenAI(model="gpt-4o-mini", streaming=True)
trimmer = trim_messages(
max_tokens=100000,
strategy="last",
token_counter=model,
include_system=True,
allow_partial=False,
start_on="human",
)
store = {}
def get_session_history(session_id: str) -> BaseChatMessageHistory:
if session_id not in store:
store[session_id] = InMemoryChatMessageHistory()
return store[session_id]
prompt = ChatPromptTemplate.from_messages(
[
(
"system",
"You are a helpful assistant. Answer all questions to the best of your ability in {language}"
),
MessagesPlaceholder(variable_name="messages"),
]
)
chain = (
RunnablePassthrough.assign(messages=itemgetter("messages") | trimmer)
| prompt
| first_llm
)
with_message_history = RunnableWithMessageHistory(chain, get_session_history, input_messages_key="messages")
def chainRespond(
message,
history: list[tuple[str, str]],
model,
language,
max_tokens,
temperature
):
providor = models[model] if model in models else "openai"
response = ""
for r in with_message_history.stream(
{"messages": [ HumanMessage(content=message)], "language": language},
config={"configurable": {"session_id": "abc11",
"first_model": model,
"first_model_provider": providor,
"first_max_tokens": max_tokens,
"first_temperature": temperature
}},
):
response += r.content
yield response