PersonaRag / rag_core /rag_chain_helper.py
github-actions[bot]
Deploy snapshot to HF (binaries stripped)
a0b643a
import re
from rag_core.models_groq import get_judge_llm
_rewriter_llm = get_judge_llm()
_MAX_HISTORY_TURNS = 4
_MAX_HISTORY_CHARS = 2500
QUESTION_REWRITE_PROMPT = """
You are a helpful assistant that rewrites follow-up questions into standalone questions.
You are given a chat history between a user and an assistant, plus the user's new question.
Your job is to rewrite the new question so that it is self-contained and can be understood
without the previous turns.
The rewritten question MUST stay faithful to the user's intent and be about Ritam's
career, projects, research, or education.
If the question is already standalone, return it as-is.
Important rules:
- Preserve the user's original wording for project names, aliases, shorthand references,
and distinctive phrases whenever possible.
- If the user says things like "the improper face one", "that Paytm one", "the PersonaRAG project",
or similar shorthand, keep those phrases in the rewritten question rather than replacing them
with a paraphrased interpretation.
- Use chat history only to add missing context, not to overwrite or over-interpret the user's wording.
- Prefer minimal rewriting. Make the question self-contained, but do not add speculative details.
- If a follow-up refers to a previously mentioned item, rewrite by naming that item while preserving
any distinctive user phrase that helps retrieval.
- Return ONLY the rewritten question inside <rewrite>...</rewrite>.
- Do not add explanations, bullets, labels, markdown, or surrounding quotes.
Good rewrite examples:
- "explain the improper face one as well"
-> "Explain the 'improper face' project as well."
- "what about the Paytm one?"
-> "What about the Paytm-related project?"
Bad rewrite examples:
- "explain the improper face one as well"
-> "What project at Paytm Money involved using image processing and deep learning to alert users about potential issues with their photos?"
Chat history:
------------
{chat_history}
New user question:
------------
{question}
Rewrite the new question into a single, self-contained question.
Return ONLY:
<rewrite>your rewritten question here</rewrite>
""".strip()
def _response_text(response) -> str:
text = getattr(response, "text", None)
if text:
return text.strip()
return str(response).strip()
def _compact_history(history) -> str:
turns = []
for turn in history[-_MAX_HISTORY_TURNS:]:
if not turn or len(turn) < 2:
continue
user_msg, assistant_msg = turn[0], turn[1]
turns.append(f"User: {user_msg}")
turns.append(f"Assistant: {assistant_msg}")
history_text = "\n".join(turns).strip()
if len(history_text) > _MAX_HISTORY_CHARS:
history_text = history_text[-_MAX_HISTORY_CHARS:]
return history_text
def _extract_rewrite(raw_text: str, fallback_question: str) -> str:
raw = (raw_text or "").strip()
if not raw:
return fallback_question
match = re.search(r"<rewrite>\s*(.*?)\s*</rewrite>", raw, flags=re.IGNORECASE | re.DOTALL)
if match:
candidate = match.group(1).strip()
else:
candidate = raw
candidate = re.sub(r"^```(?:text)?\s*", "", candidate, flags=re.IGNORECASE).strip()
candidate = re.sub(r"\s*```$", "", candidate).strip()
candidate = re.sub(r'^(rewritten question|rewrite)\s*:\s*', "", candidate, flags=re.IGNORECASE).strip()
candidate = candidate.strip('"').strip("'").strip()
candidate = " ".join(candidate.split())
if not candidate:
return fallback_question
if len(candidate) > 400:
return fallback_question
return candidate
def rewrite_question_with_history(history, question: str) -> str:
"""Rewrite a follow-up into a standalone question using the judge model."""
if not history:
return question
history_text = _compact_history(history)
prompt = QUESTION_REWRITE_PROMPT.format(
chat_history=history_text,
question=question,
)
response = _rewriter_llm.complete(prompt)
standalone = _extract_rewrite(_response_text(response), question)
return standalone or question