Spaces:

satyakimitra
/

QuerySphere

Running

App Files Files Community

QuerySphere / generation /query_classifier.py

satyakimitra

first commit

0a4529c about 2 months ago

raw

history blame contribute delete

11.4 kB

	# DEPENDENCIES
	import json
	from typing import Dict
	from config.models import LLMProvider
	from config.settings import get_settings
	from config.logging_config import get_logger
	from generation.llm_client import get_llm_client


	# Setup Settings and Logging
	settings = get_settings()
	logger = get_logger(__name__)


	class QueryClassifier:
	"""
	LLM-based query classifier that intelligently routes queries to:
	1. General/Conversational (no document context needed)
	2. RAG/Document-based (needs retrieval from documents)

	Uses the LLM itself for classification instead of hardcoded patterns.
	"""
	def __init__(self, provider: LLMProvider = None, model_name: str = None):
	self.logger = logger
	self.provider = provider or LLMProvider.OLLAMA
	self.model_name = model_name or settings.OLLAMA_MODEL

	# Initialize LLM client for classification
	self.llm_client = get_llm_client(provider = self.provider,
	model_name = self.model_name,
	)

	# Classification prompt
	self.system_prompt = """
	You are a query classification system for a RAG (Retrieval-Augmented Generation) system.
	Your job is to determine if a user query should be answered using the user's uploaded documents.

	IMPORTANT CONTEXT: The user has uploaded documents to the system. All queries related to the content of those uploaded documents should use RAG.

	Classify queries into TWO categories:

	RAG (Document-based) - Use when ANY of these are true:
	1. Query asks about ANY content that could be in the uploaded documents
	2. Query asks factual questions that could be answered from document content
	3. Query asks for lists, summaries, or analysis of information
	4. Query mentions specific details, data, statistics, names, dates, or facts
	5. Query asks "what", "how", "list", "explain", "summarize", "compare", "analyze" about any topic
	6. Query could reasonably be answered by searching through documents
	7. CRITICAL: When documents are uploaded, DEFAULT TO RAG for most factual/content queries

	GENERAL (Conversational) - Use ONLY when MOST of these are true:
	1. Query is purely conversational (greetings, thanks, casual chat)
	2. Query asks about the RAG system itself or its functionality
	3. Query asks for general knowledge that is NOT specific to uploaded documents
	4. Query is a meta-question about how to use the system
	5. Query contains NO request for factual information from documents

	EXAMPLES FOR ANY DOCUMENT TYPE:
	- For business documents: "What sales channels does the company use?" → RAG
	- For research papers: "What were the study's findings?" → RAG
	- For legal documents: "What are the key clauses?" → RAG
	- For technical manuals: "How do I configure the system?" → RAG
	- For personal documents: "What dates are mentioned?" → RAG
	- "Hi, how are you?" → GENERAL
	- "How do I upload a document?" → GENERAL
	- "What is the capital of France?" → GENERAL (unless geography documents were uploaded)

	KEY RULES:
	1. When documents exist, assume queries are about them unless clearly not
	2. When in doubt, classify as RAG (safer to search than hallucinate)
	3. If query could be answered from document content, use RAG
	4. Only use GENERAL for purely conversational or system-related queries

	Respond with ONLY a JSON object (no markdown, no extra text):
	{
	"type": "rag" or "general",
	"confidence": 0.0 to 1.0,
	"reason": "brief explanation"
	}
	"""


	async def classify(self, query: str, has_documents: bool = True) -> Dict:
	"""
	Classify a query using LLM

	Arguments:
	----------
	query { str } : User query

	has_documents { bool } : Whether documents are available in the system

	Returns:
	--------
	{ dict } : Classification result
	"""
	try:
	# If no documents are available, everything should be general
	if not has_documents:
	return {"type" : "general",
	"confidence" : 1.0,
	"reason" : "No documents available in system",
	"suggested_action" : "respond_with_general_llm",
	"is_llm_classified" : False,
	}

	# Build classification prompt
	user_prompt = f"""
	Query: "{query}"

	System status: {"Documents are available" if has_documents else "No documents uploaded"}

	Classify this query. Remember: if uncertain, prefer RAG.
	"""

	messages = [{"role" : "system",
	"content" : self.system_prompt,
	},
	{"role" : "user",
	"content" : user_prompt,
	}
	]

	# Get LLM classification (use low temperature for consistency)
	llm_response = await self.llm_client.generate(messages = messages,
	temperature = 0.1, # Low temperature for consistent classification
	max_tokens = 150,
	)

	response_text = llm_response.get("content", "").strip()

	# Parse JSON response
	classification = self._parse_llm_response(response_text)

	# Add suggested action based on classification
	if (classification["type"] == "rag"):
	classification["suggested_action"] = "respond_with_rag"

	elif (classification["type"] == "general"):
	classification["suggested_action"] = "respond_with_general_llm"

	else:
	# Default to RAG if uncertain
	classification["suggested_action"] = "respond_with_rag"

	classification["is_llm_classified"] = True

	logger.info(f"LLM classified query as: {classification['type']} (confidence: {classification['confidence']:.2f})")
	logger.debug(f"Classification reason: {classification['reason']}")

	return classification

	except Exception as e:
	logger.error(f"LLM classification failed: {e}, defaulting to RAG")
	# On error, default to RAG (safer to try document search)
	return {"type" : "rag",
	"confidence" : 0.5,
	"reason" : f"Classification failed: {str(e)}, defaulting to RAG",
	"suggested_action" : "respond_with_rag",
	"is_llm_classified" : False,
	"error" : str(e)
	}


	def _parse_llm_response(self, response_text: str) -> Dict:
	"""
	Parse LLM JSON response

	Arguments:
	----------
	response_text { str } : LLM response text

	Returns:
	--------
	{ dict } : Parsed classification
	"""
	try:
	# Remove markdown code blocks if present
	if ("```json" in response_text):
	response_text = response_text.split("```json")[1].split("```")[0].strip()

	elif ("```" in response_text):
	response_text = response_text.split("```")[1].split("```")[0].strip()

	# Parse JSON
	result = json.loads(response_text)

	# Validate required fields
	if ("type" not in result) or (result["type"] not in ["rag", "general"]):
	raise ValueError(f"Invalid type in response: {result.get('type')}")

	# Set defaults for missing fields
	result.setdefault("confidence", 0.8)
	result.setdefault("reason", "LLM classification")

	# Clamp confidence to valid range
	result["confidence"] = max(0.0, min(1.0, float(result["confidence"])))

	return result

	except (json.JSONDecodeError, ValueError, KeyError) as e:
	logger.warning(f"Failed to parse LLM response: {e}")
	logger.debug(f"Raw response: {response_text}")

	# Try to extract type from text if JSON parsing fails
	response_lower = response_text.lower()

	if (("general" in response_lower) and ("rag" not in response_lower)):
	return {"type" : "general",
	"confidence" : 0.6,
	"reason" : "Parsed from non-JSON response",
	}

	else:
	# Default to RAG if parsing fails
	return {"type" : "rag",
	"confidence" : 0.6,
	"reason" : "Failed to parse response, defaulting to RAG",
	}


	# Global classifier instance
	_query_classifier = None


	def get_query_classifier(provider: LLMProvider = None, model_name: str = None) -> QueryClassifier:
	"""
	Get global query classifier instance

	Arguments:
	----------
	provider { LLMProvider } : LLM provider

	model_name { str } : Model name

	Returns:
	--------
	{ QueryClassifier } : QueryClassifier instance
	"""
	global _query_classifier

	if _query_classifier is None:
	_query_classifier = QueryClassifier(provider = provider,
	model_name = model_name,
	)

	return _query_classifier