Spaces:

Chirag20
/

MindBot-v0

Running

App Files Files Community

MindBot-v0 / query.py

Chirag20

added knowledge

edabb92 about 1 month ago

raw

history blame contribute delete

12.2 kB

	import os
	import logging
	from qdrant_client import QdrantClient
	from langchain_qdrant import QdrantVectorStore
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_groq import ChatGroq
	from dotenv import load_dotenv
	from collections import defaultdict

	load_dotenv()

	# --------------------------
	# GLOBALS (cached)
	# --------------------------

	EMBEDDINGS = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	VECTORSTORE = None
	LLM_ROUTER = None
	LOGGER = logging.getLogger(__name__)

	# Simple in-memory session history: list of {"role": "user"/"assistant", "content": "..."}
	CONVERSATION_HISTORY = []
	MAX_HISTORY_TURNS = 6 # keep last 6 turns (3 user + 3 assistant)


	# --------------------------
	# QDRANT / LLM SETUP
	# --------------------------

	def get_client():
	return QdrantClient(url=os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"))

	def get_vectorstore():
	global VECTORSTORE
	if VECTORSTORE is None:
	VECTORSTORE = QdrantVectorStore(
	client=get_client(),
	collection_name="psychology_books",
	embedding=EMBEDDINGS,
	)
	return VECTORSTORE


	def _collect_groq_api_keys() -> list[str]:
	"""
	Collect Groq keys from supported env vars in priority order:
	1) GROQ_API_KEYS (comma-separated)
	2) GROQ_API_KEY
	3) GROQ_API_KEY_2, GROQ_API_KEY_3, ...
	"""
	keys = []

	combined = os.getenv("GROQ_API_KEYS", "").strip()
	if combined:
	keys.extend([key.strip() for key in combined.split(",") if key.strip()])

	primary = os.getenv("GROQ_API_KEY", "").strip()
	if primary:
	keys.append(primary)

	numbered_names = []
	prefix = "GROQ_API_KEY_"
	for env_name in os.environ.keys():
	if not env_name.startswith(prefix):
	continue
	suffix = env_name[len(prefix):]
	if suffix.isdigit():
	numbered_names.append(env_name)

	numbered_names.sort(key=lambda name: int(name[len(prefix):]))
	for env_name in numbered_names:
	value = os.getenv(env_name, "").strip()
	if value:
	keys.append(value)

	deduped = []
	seen = set()
	for key in keys:
	if key not in seen:
	seen.add(key)
	deduped.append(key)

	return deduped


	class GroqFailoverLLM:
	def __init__(self, api_keys: list[str], model: str, temperature: float):
	self.api_keys = api_keys
	self.model = model
	self.temperature = temperature
	self.clients = [
	ChatGroq(model=self.model, temperature=self.temperature, api_key=key)
	for key in self.api_keys
	]
	self.active_idx = 0

	@staticmethod
	def _is_rate_limit_error(error: Exception) -> bool:
	text = str(error).lower()
	markers = [
	"rate limit",
	"too many requests",
	"429",
	"quota",
	"resource exhausted",
	]
	return any(marker in text for marker in markers)

	def invoke(self, prompt):
	total = len(self.clients)
	last_error = None

	for offset in range(total):
	idx = (self.active_idx + offset) % total
	client = self.clients[idx]

	try:
	result = client.invoke(prompt)
	if idx != self.active_idx:
	LOGGER.warning("Groq fallback active: switched to key #%s", idx + 1)
	self.active_idx = idx
	return result
	except Exception as exc:
	last_error = exc
	if total > 1 and self._is_rate_limit_error(exc):
	LOGGER.warning(
	"Groq key #%s hit limit/quota. Trying next configured key.",
	idx + 1,
	)
	continue
	raise

	if last_error is not None:
	raise last_error
	raise RuntimeError("No Groq clients available")

	def get_llm():
	global LLM_ROUTER

	if LLM_ROUTER is None:
	api_keys = _collect_groq_api_keys()
	if not api_keys:
	raise ValueError(
	"No Groq keys configured. Set GROQ_API_KEY, GROQ_API_KEYS, or GROQ_API_KEY_2+ in .env"
	)
	LLM_ROUTER = GroqFailoverLLM(
	api_keys=api_keys,
	model="llama-3.1-8b-instant",
	temperature=0,
	)

	return LLM_ROUTER


	# --------------------------
	# LAYER 1: INTENT ROUTER
	# --------------------------

	INTENT_PROMPT = """Classify this message into EXACTLY one category.

	Categories:
	- CHITCHAT : greetings, thanks, small talk, "how are you", jokes
	- IDENTITY : asking who/what you are, your name, your purpose
	- CRISIS : suicidal thoughts, self-harm, abuse, urgent danger
	- PSYCHOLOGY : anything about mental health, behavior, emotions, habits, thinking, relationships

	Message: "{query}"

	Reply with only ONE word: CHITCHAT, IDENTITY, CRISIS, or PSYCHOLOGY"""

	def classify_intent(query: str, llm) -> str:
	result = llm.invoke(INTENT_PROMPT.format(query=query))
	text = getattr(result, "content", str(result)).strip().upper()

	for intent in ["CRISIS", "CHITCHAT", "IDENTITY", "PSYCHOLOGY"]:
	if intent in text:
	return intent
	return "PSYCHOLOGY" # safe default


	# --------------------------
	# LAYER 2: QUERY REWRITER
	# --------------------------

	def rewrite_query(query: str, history_context: str, llm) -> str:
	"""
	Rephrase emotional/vague queries into retrieval-friendly ones.
	E.g. "I feel so empty" → "emotional numbness causes and psychological explanation"
	"""
	prompt = f"""You are a psychology search assistant.

	Conversation so far:
	{history_context if history_context else "None"}

	User query: "{query}"

	Rewrite this query to be more specific and retrieval-friendly for a psychology book search.
	Keep it under 15 words. Return ONLY the rewritten query, nothing else."""

	result = llm.invoke(prompt)
	rewritten = getattr(result, "content", str(result)).strip().strip('"')
	return rewritten if rewritten else query


	# --------------------------
	# LAYER 3: CRAG RELEVANCE GRADER
	# --------------------------

	def grade_chunks(query: str, docs: list, llm) -> list:
	"""
	Score each retrieved chunk 1-5 for relevance.
	Filter out chunks scoring below threshold.
	Returns (filtered_docs, all_relevant: bool)
	"""
	relevant_docs = []

	for doc in docs:
	prompt = f"""Rate this text chunk's relevance to the query on a scale of 1 to 5.

	Query: "{query}"

	Chunk:
	{doc.page_content[:400]}

	Reply with ONLY a number: 1, 2, 3, 4, or 5
	(1=completely irrelevant, 5=directly answers the query)"""

	result = llm.invoke(prompt)
	text = getattr(result, "content", str(result)).strip()

	try:
	score = int(text[0])
	except (ValueError, IndexError):
	score = 3 # default to neutral

	if score >= 3:
	relevant_docs.append(doc)

	all_relevant = len(relevant_docs) > 0
	return relevant_docs, all_relevant


	# --------------------------
	# CONVERSATION MEMORY HELPERS
	# --------------------------

	def get_history_context() -> str:
	"""Format last N turns for injection into prompts."""
	if not CONVERSATION_HISTORY:
	return ""

	lines = []
	for turn in CONVERSATION_HISTORY[-MAX_HISTORY_TURNS:]:
	role = "User" if turn["role"] == "user" else "Assistant"
	lines.append(f"{role}: {turn['content']}")
	return "\n".join(lines)

	def save_to_history(user_msg: str, assistant_msg: str):
	CONVERSATION_HISTORY.append({"role": "user", "content": user_msg})
	CONVERSATION_HISTORY.append({"role": "assistant", "content": assistant_msg})

	def clear_history():
	CONVERSATION_HISTORY.clear()


	# --------------------------
	# INTENT HANDLERS
	# --------------------------

	def handle_chitchat(query: str, history_context: str, llm) -> str:
	prompt = f"""You are a warm, supportive psychology assistant named MindBot.
	Have a natural, friendly conversation. Keep it brief (1-2 sentences).

	{f'Conversation so far: {history_context}' if history_context else ''}

	User: {query}
	MindBot:"""
	result = llm.invoke(prompt)
	return getattr(result, "content", str(result)).strip()

	def handle_identity(query: str, llm) -> str:
	prompt = f"""The user asked: "{query}"

	You are MindBot, a psychology assistant trained on books covering therapy, behavior,
	cognitive psychology, habits, and emotional well-being. Answer in 2-3 sentences."""
	result = llm.invoke(prompt)
	return getattr(result, "content", str(result)).strip()

	def handle_crisis(query: str) -> str:
	return (
	"I can hear that you're going through something really difficult right now. "
	"Please know you're not alone.\n\n"
	"iCall (India): 9152987821\n"
	"Vandrevala Foundation (24/7): 1860-2662-345\n"
	"iCall email: icall@tiss.edu\n\n"
	"Would you like to talk about what's on your mind? I'm here to listen."
	)


	# --------------------------
	# CORE RAG PIPELINE (with CRAG)
	# --------------------------

	def run_rag_with_crag(query: str, history_context: str, llm, max_retries: int = 2) -> tuple:
	vectorstore = get_vectorstore()

	# Step 1: rewrite query for better retrieval
	search_query = rewrite_query(query, history_context, llm)

	for attempt in range(max_retries):
	# Step 2: retrieve
	retriever = vectorstore.as_retriever(
	search_type="mmr",
	search_kwargs={"k": 5},
	)
	docs = retriever.invoke(search_query)

	if not docs:
	return "I couldn't find relevant information in the books.", []

	# Step 3: CRAG grade
	relevant_docs, has_relevant = grade_chunks(search_query, docs, llm)

	if has_relevant:
	break

	# If nothing relevant: rewrite more aggressively and retry
	if attempt < max_retries - 1:
	search_query = rewrite_query(
	f"explain in detail: {query}",
	history_context,
	llm
	)

	# Step 4: build grouped context
	if not relevant_docs:
	relevant_docs = docs[:2] # fallback: use top 2 anyway

	grouped = defaultdict(list)
	for doc in relevant_docs:
	book = doc.metadata.get("book", "Unknown")
	grouped[book].append(doc.page_content)

	context_parts = []
	for book, texts in grouped.items():
	joined = "\n".join(texts[:2])
	context_parts.append(f"[Source: {book}]\n{joined}")
	context = "\n\n---\n\n".join(context_parts)

	# Step 5: generate with memory + empathy constraint
	prompt = f"""You are MindBot, a compassionate psychology assistant.

	Conversation history:
	{history_context if history_context else "This is the start of the conversation."}

	Knowledge from books:
	{context}

	User's question: {query}

	Instructions:
	- Be warm and empathetic first, then informative
	- Answer using ONLY the provided book context
	- If multiple perspectives exist, present them clearly
	- If the question isn't covered in the context, say so honestly
	- Keep response focused and under 200 words unless depth is needed"""

	result = llm.invoke(prompt)
	answer = getattr(result, "content", str(result)).strip()

	return answer, relevant_docs


	# --------------------------
	# MAIN ENTRY POINT
	# --------------------------

	def ask_question(query: str) -> tuple:
	llm = get_llm()
	history_context = get_history_context()

	# Route intent
	intent = classify_intent(query, llm)

	if intent == "CRISIS":
	response = handle_crisis(query)
	save_to_history(query, response)
	return response, []

	elif intent == "CHITCHAT":
	response = handle_chitchat(query, history_context, llm)
	save_to_history(query, response)
	return response, []

	elif intent == "IDENTITY":
	response = handle_identity(query, llm)
	save_to_history(query, response)
	return response, []

	else: # PSYCHOLOGY
	response, docs = run_rag_with_crag(query, history_context, llm)
	save_to_history(query, response)
	return response, docs