champ-chatbot / champ /agent.py
qyle's picture
deployment for load testing
e82f783 verified
# app/champ/agent.py
from typing import Literal
from langchain.agents import create_agent
from langchain.agents.middleware import dynamic_prompt, ModelRequest
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langchain_community.vectorstores import FAISS as LCFAISS
from opentelemetry import trace
from .prompts import CHAMP_SYSTEM_PROMPT_V12
tracer = trace.get_tracer(__name__)
def _build_retrieval_query(messages) -> str:
user_turns = []
for m in messages:
# LangChain HumanMessage
if hasattr(m, "type") and m.type == "human":
user_turns.append(m.text)
# Fallback: just use last message
if not user_turns:
return messages[-1].text
return " ".join(user_turns[-2:])
def make_prompt_with_context(
vector_store: LCFAISS, lang: Literal["en", "fr"], k: int = 4, prompt_template: str | None = None
):
context_store = {"last_retrieved_docs": []} # shared mutable container
@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
with tracer.start_as_current_span("retrieving documents"):
retrieval_query = _build_retrieval_query(request.state["messages"])
fetch_k = 20
try:
retrieved_docs = vector_store.max_marginal_relevance_search(
retrieval_query,
k=k,
fetch_k=fetch_k,
lambda_mult=0.5, # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical
)
except Exception:
retrieved_docs = vector_store.similarity_search(retrieval_query, k=k)
seen = set()
unique_docs = []
for doc in retrieved_docs:
text = (doc.page_content or "").strip()
if not text or text in seen:
continue
seen.add(text)
unique_docs.append(doc)
docs_content = "\n\n".join(doc.page_content for doc in unique_docs)
context_store["last_retrieved_docs"] = [doc.page_content for doc in unique_docs]
language = "English" if lang == "en" else "French"
template = CHAMP_SYSTEM_PROMPT_V12 if prompt_template is None else prompt_template
return template.format(
last_query=retrieval_query,
context=docs_content,
language=language,
)
return prompt_with_context, context_store
def build_champ_agent(
vector_store: LCFAISS,
lang: Literal["en", "fr"],
repo_id: str = "openai/gpt-oss-20b",
prompt_template: str | None = None,
):
# Reducing the temperature and increasing top_p is not recommended, because
# the model would start answering in a very unnatural manner.
hf_llm = HuggingFaceEndpoint(
repo_id=repo_id,
task="text-generation",
max_new_tokens=1024,
temperature=0.2,
top_p=0.9,
# huggingfacehub_api_token=... (optional; see service.py)
)
# Unforntunately, LangChain and Ecologits do not work togehter.
model_chat = ChatHuggingFace(llm=hf_llm)
prompt_middleware, context_store = make_prompt_with_context(vector_store, lang, prompt_template=prompt_template)
return create_agent(
model_chat,
tools=[],
middleware=[
prompt_middleware,
],
), context_store