| """Qwen-self chunk-relevance verifier. |
| |
| Uses the same SPChat (frozen LLM + pooler) to answer a strict Yes/No |
| question: "Does this chunk contain enough info to answer the query?" |
| |
| Greedy + rp=1.0 + small max_resp to keep the verification fast. The R1 |
| distill tends to emit <think> first, so we look for "yes"/"no" anywhere |
| in the first few hundred chars. |
| |
| This is intentionally lightweight (one short Qwen pass per chunk). For |
| production with stricter quality control, replace with a dedicated |
| cross-encoder or fine-tuned NLI head. |
| """ |
|
|
| import re |
|
|
| from spchat import SPChat |
|
|
|
|
| VERIFIER_SYSTEM = ( |
| "You are a strict relevance judge. Given a question and a context, decide " |
| "whether the context contains a SPECIFIC, CONCRETE answer (a name, number, " |
| "date, place, or verifiable fact) that directly answers the question.\n\n" |
| "Rules:\n" |
| "- Answer YES only if the exact answer can be verbatim extracted from the context.\n" |
| "- Answer NO if the context is only on the same topic but does not contain the " |
| "specific answer (e.g. question asks 'Who founded X?' and context says 'X is a " |
| "company that does Y' without naming founders).\n" |
| "- Answer NO if the question is about a specific year/date and that year is not " |
| "explicitly mentioned in the context.\n\n" |
| "Respond with exactly one word: Yes or No. No explanation." |
| ) |
|
|
|
|
| class QwenVerifier: |
| def __init__(self, chat: SPChat, max_chunk_chars: int = 600): |
| self.chat = chat |
| self.max_chunk_chars = max_chunk_chars |
|
|
| def is_relevant(self, query: str, chunk: str) -> bool: |
| prompt = ( |
| f"Question: {query}\n" |
| f"Context: {chunk[:self.max_chunk_chars]}\n\n" |
| "Does the context contain the SPECIFIC concrete answer (a name, " |
| "date, or number) verbatim? One word only: Yes or No." |
| ) |
| state = self.chat.start_session(VERIFIER_SYSTEM) |
| out = self.chat.turn(state, prompt, max_resp=80, rp=1.0, nr=0) |
| text = re.sub(r"<think>.*?</think>", "", out, flags=re.DOTALL).strip().lower() |
| if not text: |
| text = out.lower() |
| head = text[:200] |
| yes = bool(re.search(r"\byes\b", head)) |
| no = bool(re.search(r"\bno\b", head)) |
| if yes and not no: |
| return True |
| if no and not yes: |
| return False |
| |
| return False |
|
|