Spaces:

mishrabp
/

interview-assistant

Sleeping

App Files Files Community

interview-assistant / common /mcp /tools /rag_tool.py

mishrabp

Upload folder using huggingface_hub

226b286 verified 21 days ago

raw

history blame contribute delete

4.02 kB

	"""RAG Search Tool - Search the local healthcare knowledge base"""
	import os
	from pathlib import Path
	from agents import function_tool, RunContextWrapper

	from common.rag.rag import Retriever
	from dataclasses import dataclass

	@dataclass
	class UserContext:
	uid: str
	db_path: str = ""
	file_path: str = ""
	similarity_threshold: float = 0.4 # FAISS L2 distance threshold for RAG relevance


	# ---------------------------------------------------------
	# RAG Search Tool
	# ---------------------------------------------------------
	@function_tool
	def rag_search(wrapper: RunContextWrapper[UserContext], query: str) -> str:
	"""
	Search the local healthcare knowledge base for relevant information.

	Args:
	query: The medical question or topic to search for

	Returns:
	Relevant information from the healthcare knowledge base
	"""
	print(f"[DEBUG] RAG_SEARCH called with query: '{query}'")

	# Get similarity threshold from user context
	similarity_threshold = wrapper.context.similarity_threshold
	print(f"[DEBUG] RAG_SEARCH: Using similarity threshold: {similarity_threshold}")

	try:
	# Initialize retriever with user context
	retriever = Retriever(
	db_path=wrapper.context.db_path,
	file_path=wrapper.context.file_path
	)

	# Get results with similarity scores
	results_with_scores = retriever.retrieve_with_scores(query, k=5) # Increased from 4 to 5

	if not results_with_scores:
	print("[DEBUG] RAG_SEARCH: No results found in knowledge base")
	return "No relevant information found in the knowledge base."

	print(f"[DEBUG] RAG_SEARCH: Found {len(results_with_scores)} results")

	# Check if the best match meets the threshold
	# FAISS returns (document, distance) where lower distance = better match
	best_score = results_with_scores[0][1]
	print(f"[DEBUG] RAG_SEARCH: Best similarity score (distance): {best_score:.4f} (threshold: {similarity_threshold})")

	if best_score > similarity_threshold:
	print(f"[DEBUG] RAG_SEARCH: Best match score {best_score:.4f} is above threshold {similarity_threshold}")
	print("[DEBUG] RAG_SEARCH: Results not relevant enough, triggering web search fallback")
	return "No relevant information found in the knowledge base."

	print(f"[DEBUG] RAG_SEARCH: Results are relevant (score: {best_score:.4f} <= {similarity_threshold})")

	# Log all scores for debugging
	all_scores = [f"{score:.4f}" for _, score in results_with_scores]
	print(f"[DEBUG] RAG_SEARCH: All scores: {', '.join(all_scores)}")

	# Format results - only include documents that meet the similarity threshold
	formatted_results = []
	for i, (doc, score) in enumerate(results_with_scores[:5], 1): # Top 5 results
	if score <= similarity_threshold:
	content = doc.page_content.strip()
	formatted_results.append(f"Result {i} (score: {score:.4f}):\n{content}\n")

	if not formatted_results:
	print("[DEBUG] RAG_SEARCH: No results met the similarity threshold")
	print("[DEBUG] RAG_SEARCH: Triggering web search fallback")
	return "No relevant information found in the knowledge base."

	result_text = "\n".join(formatted_results)
	print(f"[DEBUG] RAG_SEARCH: Returning {len(formatted_results)} results, total length: {len(result_text)} characters")
	print(f"[DEBUG] RAG_SEARCH: First 300 chars: {result_text[:300]}...")

	return result_text

	except Exception as e:
	print(f"[DEBUG] RAG_SEARCH: Error occurred - {str(e)}")
	return f"Error retrieving from knowledge base: {str(e)}"



	__all__ = ["rag_search", "retriever"]