| from typing import Any, Optional |
|
|
| import weave |
| from langchain_core.documents import Document |
| from langchain_core.prompts import ChatPromptTemplate |
| from langchain_groq import ChatGroq |
|
|
| from rag_pipelines.prompts import USEFULNESS_EVALUATOR_PROMPT, UsefulnessEvaluatorResult |
|
|
| SCORE_UPPER_LIMIT = 5 |
| SCORE_LOWER_LIMIT = 1 |
|
|
|
|
| class UsefulnessEvaluator: |
| """Evaluates and filters document contexts based on their usefulness score. |
| |
| Uses a language model chain to assess document usefulness and retains only contexts |
| meeting or exceeding the specified score threshold. |
| |
| Attributes: |
| llm (ChatGroq): Language model instance for scoring. |
| prompt (ChatPromptTemplate): Template for usefulness evaluation prompt. |
| evaluator_chain (RunnableSequence): LangChain pipeline for evaluation. |
| score_threshold (int): Minimum score (1-5) required for context inclusion. |
| """ |
|
|
| def __init__(self, llm: ChatGroq, score_threshold: Optional[int] = 3) -> None: |
| """Initialize evaluator with language model and scoring threshold. |
| |
| Args: |
| llm (ChatGroq): Configured ChatGroq instance for evaluation. |
| score_threshold (Optional[int]): Minimum usefulness score (1-5) to retain. |
| Defaults to 3. Must be between 1 and 5 inclusive. |
| """ |
| if not (SCORE_LOWER_LIMIT <= score_threshold <= SCORE_UPPER_LIMIT): |
| msg = "score_threshold must be between 1 and 5" |
| raise ValueError(msg) |
|
|
| self.llm = llm |
| self.prompt = ChatPromptTemplate.from_messages([("system", USEFULNESS_EVALUATOR_PROMPT)]) |
| self.evaluator_chain = self.prompt | self.llm.with_structured_output(UsefulnessEvaluatorResult) |
| self.score_threshold = score_threshold |
|
|
| @weave.op() |
| def score_context(self, question: str, context: str) -> int: |
| """Calculate usefulness score for a document context relative to a question. |
| |
| Args: |
| question (str): User question to evaluate against. |
| context (str): Document text content to assess. |
| |
| Returns: |
| int: Numerical usefulness score (1-5). |
| """ |
| result = self.evaluator_chain.invoke({"question": question, "context": context}) |
| return int(result.decision) |
|
|
| @weave.op() |
| def __call__(self, state: dict[str, Any]) -> dict[str, Any]: |
| """Filter document contexts based on their usefulness scores. |
| |
| Args: |
| state (dict[str, Any]): Processing state containing: |
| - "question" (str): Original user question |
| - "documents" (list[Document]): Retrieved documents |
| - "context" (list[str]): Document texts to filter |
| |
| Returns: |
| dict[str, Any]: Updated state with filtered contexts: |
| - "question": Original question |
| - "context": Texts with score >= threshold |
| - "documents": Original documents (unfiltered) |
| """ |
| question: str = state["question"] |
| documents: list[Document] = state["documents"] |
| relevant_context: list[str] = state["context"] |
|
|
| filtered_context: list[str] = [ |
| context for context in relevant_context if self.score_context(question, context) >= self.score_threshold |
| ] |
|
|
| return { |
| "question": question, |
| "context": filtered_context, |
| "documents": documents, |
| } |
|
|