Rag_chatbot / app /rag /pipeline.py
suhail
changes
8119fcc
"""
RAG pipeline – concise, to-the-point answers using LLM on book content.
"""
from typing import Dict, Any, List, Optional
import logging
from app.rag.retriever import VectorRetriever
from app.rag.generator import AnswerGenerator
logger = logging.getLogger(__name__)
class RAGPipeline:
def __init__(self):
self.retriever = VectorRetriever()
self.generator = AnswerGenerator(temperature=0.2) # Low temperature for accurate, concise answers
async def query(self, question: str, top_k: int = 5) -> Dict[str, Any]:
try:
# Step 1: Retrieve relevant chunks
retrieved_chunks = await self.retriever.retrieve_similar_chunks(
query_text=question,
top_k=top_k
)
logger.info(f"Retrieved {len(retrieved_chunks)} chunks for question: {question}")
# Step 2: Extract content
context_texts = []
sources = []
for chunk in retrieved_chunks:
payload = chunk.get('payload', {})
content = payload.get('content', '').strip()
if len(content) > 120: # 🔥 minimum semantic length
context_texts.append(
f"[Source: {payload.get('chapter')}]\n{content}"
)
if len(content) > 120:
sources.append({
'source_file': payload.get('source_file', 'unknown'),
'chapter': payload.get('chapter', ''),
'section': payload.get('section', 'Unknown'),
'content': content[:500] + ("..." if len(content) > 500 else "")
})
# Remove duplicate sources
seen = set()
unique_sources = []
for s in sources:
key = (s['source_file'], s['chapter'], s['section'])
if key not in seen:
seen.add(key)
unique_sources.append(s)
if not context_texts:
return {
'answer': "No relevant section found in the course book for this question.",
'sources': unique_sources,
'mode': 'book'
}
# Step 3: Combine context
context_texts = context_texts[:5] # top 5 only
combined_context = "\n\n---\n\n".join(context_texts)
# Step 4: Smart prompt for concise answerc
system_prompt = "You are a helpful course assistant for Physical AI & Humanoid Robotics. Answer the user's question using ONLY the provided course book content. Be concise, clear, and to-the-point. Use bullet points if needed. Do not add extra information."
user_prompt = f"""Question: {question}
Course Book Content:
{combined_context}
Answer the question based on the above content only. Keep it short and direct."""
# Step 5: Generate concise answer
result = await self.generator.generate_answer(
system_message=system_prompt,
user_message=user_prompt
)
answer = result.get('answer', '').strip() if result else "I found relevant material but couldn't summarize it."
if not answer.strip():
answer = "Relevant content found in the course book, but I couldn't formulate a clear answer."
return {
'answer': answer,
'sources': unique_sources,
'mode': 'book'
}
except Exception as e:
logger.error(f"RAG error: {e}")
return {
'answer': "Temporary error – please try again.",
'sources': [],
'mode': 'book'
}