# app/champ/agent.py from typing import Literal from langchain.agents import create_agent from langchain.agents.middleware import dynamic_prompt, ModelRequest from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint from langchain_community.vectorstores import FAISS as LCFAISS from opentelemetry import trace from .prompts import CHAMP_SYSTEM_PROMPT_V12 tracer = trace.get_tracer(__name__) def _build_retrieval_query(messages) -> str: user_turns = [] for m in messages: # LangChain HumanMessage if hasattr(m, "type") and m.type == "human": user_turns.append(m.text) # Fallback: just use last message if not user_turns: return messages[-1].text return " ".join(user_turns[-2:]) def make_prompt_with_context( vector_store: LCFAISS, lang: Literal["en", "fr"], k: int = 4, prompt_template: str | None = None ): context_store = {"last_retrieved_docs": []} # shared mutable container @dynamic_prompt def prompt_with_context(request: ModelRequest) -> str: with tracer.start_as_current_span("retrieving documents"): retrieval_query = _build_retrieval_query(request.state["messages"]) fetch_k = 20 try: retrieved_docs = vector_store.max_marginal_relevance_search( retrieval_query, k=k, fetch_k=fetch_k, lambda_mult=0.5, # 0.0 = diverse, 1.0 = similar; 0.3–0.7 is typical ) except Exception: retrieved_docs = vector_store.similarity_search(retrieval_query, k=k) seen = set() unique_docs = [] for doc in retrieved_docs: text = (doc.page_content or "").strip() if not text or text in seen: continue seen.add(text) unique_docs.append(doc) docs_content = "\n\n".join(doc.page_content for doc in unique_docs) context_store["last_retrieved_docs"] = [doc.page_content for doc in unique_docs] language = "English" if lang == "en" else "French" template = CHAMP_SYSTEM_PROMPT_V12 if prompt_template is None else prompt_template return template.format( last_query=retrieval_query, context=docs_content, language=language, ) return prompt_with_context, context_store def build_champ_agent( vector_store: LCFAISS, lang: Literal["en", "fr"], repo_id: str = "openai/gpt-oss-20b", prompt_template: str | None = None, ): # Reducing the temperature and increasing top_p is not recommended, because # the model would start answering in a very unnatural manner. hf_llm = HuggingFaceEndpoint( repo_id=repo_id, task="text-generation", max_new_tokens=1024, temperature=0.2, top_p=0.9, # huggingfacehub_api_token=... (optional; see service.py) ) # Unforntunately, LangChain and Ecologits do not work togehter. model_chat = ChatHuggingFace(llm=hf_llm) prompt_middleware, context_store = make_prompt_with_context(vector_store, lang, prompt_template=prompt_template) return create_agent( model_chat, tools=[], middleware=[ prompt_middleware, ], ), context_store