Spaces:

sk3078
/

Rag_chatbot

Sleeping

suhail

changes

8119fcc 4 months ago

3.87 kB

	"""
	RAG pipeline – concise, to-the-point answers using LLM on book content.
	"""
	from typing import Dict, Any, List, Optional
	import logging

	from app.rag.retriever import VectorRetriever
	from app.rag.generator import AnswerGenerator

	logger = logging.getLogger(__name__)

	class RAGPipeline:
	def __init__(self):
	self.retriever = VectorRetriever()
	self.generator = AnswerGenerator(temperature=0.2) # Low temperature for accurate, concise answers

	async def query(self, question: str, top_k: int = 5) -> Dict[str, Any]:
	try:
	# Step 1: Retrieve relevant chunks
	retrieved_chunks = await self.retriever.retrieve_similar_chunks(
	query_text=question,
	top_k=top_k
	)

	logger.info(f"Retrieved {len(retrieved_chunks)} chunks for question: {question}")

	# Step 2: Extract content
	context_texts = []
	sources = []
	for chunk in retrieved_chunks:
	payload = chunk.get('payload', {})
	content = payload.get('content', '').strip()

	if len(content) > 120: # 🔥 minimum semantic length
	context_texts.append(
	f"[Source: {payload.get('chapter')}]\n{content}"
	)


	if len(content) > 120:
	sources.append({
	'source_file': payload.get('source_file', 'unknown'),
	'chapter': payload.get('chapter', ''),
	'section': payload.get('section', 'Unknown'),
	'content': content[:500] + ("..." if len(content) > 500 else "")
	})

	# Remove duplicate sources
	seen = set()
	unique_sources = []
	for s in sources:
	key = (s['source_file'], s['chapter'], s['section'])
	if key not in seen:
	seen.add(key)
	unique_sources.append(s)

	if not context_texts:
	return {
	'answer': "No relevant section found in the course book for this question.",
	'sources': unique_sources,
	'mode': 'book'
	}

	# Step 3: Combine context
	context_texts = context_texts[:5] # top 5 only
	combined_context = "\n\n---\n\n".join(context_texts)

	# Step 4: Smart prompt for concise answerc
	system_prompt = "You are a helpful course assistant for Physical AI & Humanoid Robotics. Answer the user's question using ONLY the provided course book content. Be concise, clear, and to-the-point. Use bullet points if needed. Do not add extra information."

	user_prompt = f"""Question: {question}

	Course Book Content:
	{combined_context}

	Answer the question based on the above content only. Keep it short and direct."""

	# Step 5: Generate concise answer
	result = await self.generator.generate_answer(
	system_message=system_prompt,
	user_message=user_prompt
	)

	answer = result.get('answer', '').strip() if result else "I found relevant material but couldn't summarize it."

	if not answer.strip():
	answer = "Relevant content found in the course book, but I couldn't formulate a clear answer."

	return {
	'answer': answer,
	'sources': unique_sources,
	'mode': 'book'
	}

	except Exception as e:
	logger.error(f"RAG error: {e}")
	return {
	'answer': "Temporary error – please try again.",
	'sources': [],
	'mode': 'book'
	}