Spaces:

abhishekrn
/

topcoder-info-mcp-agent

Sleeping

App Files Files Community

topcoder-info-mcp-agent / src /application /services /query_classifier.py

abhishekrn

history, follow-up

9a6a5aa 6 months ago

raw

history blame contribute delete

11.1 kB

	from typing import List, Dict, Any
	import json
	import re
	from src.domain.models.conversation import QueryClassification, ConversationContext, Entity
	from src.infrastructure.providers.llm_provider import LLMClient
	from src.application.services.dynamic_pattern_manager import dynamic_pattern_manager
	from config import settings
	from config.conversation_config import conversation_config

	class QueryClassifier:
	"""Classifies user queries to determine if they need new tool data or can be answered from history."""

	def __init__(self):
	self.llm_client = LLMClient()

	async def classify_query(self, user_message: str, conversation_context: ConversationContext) -> QueryClassification:
	"""Classify a user query using LLM assistance and rule-based logic."""

	# First, try rule-based classification for obvious cases
	rule_based_result = self._rule_based_classification(user_message, conversation_context)
	if rule_based_result.confidence > 0.8:
	return rule_based_result

	# Use LLM for more nuanced classification
	return await self._llm_classification(user_message, conversation_context, rule_based_result)

	def _rule_based_classification(self, user_message: str, conversation_context: ConversationContext) -> QueryClassification:
	"""Fast rule-based classification for obvious cases."""
	message_lower = user_message.lower()

	# Get patterns dynamically from pattern manager
	dynamic_pattern_manager.refresh_if_needed()
	history_indicators = dynamic_pattern_manager.get_history_patterns()
	tool_indicators = dynamic_pattern_manager.get_tool_patterns()
	pronoun_patterns = dynamic_pattern_manager.get_pronoun_patterns()

	# Check for pronouns without clear antecedents (suggesting reference to history)
	has_pronouns = any(bool(re.search(pattern, message_lower)) for pattern in pronoun_patterns)

	# Calculate scores
	history_score = sum(1 for pattern in history_indicators if re.search(pattern, message_lower))
	tool_score = sum(1 for pattern in tool_indicators if re.search(pattern, message_lower))

	# Get configuration
	config = conversation_config.query_classification

	# Check if there's relevant history
	has_history = len(conversation_context.turns) > 0
	recent_tool_usage = any(turn.tool_used for turn in conversation_context.get_recent_turns(config.recent_turns_check))

	# Classification logic using configurable thresholds
	if history_score > 0 and has_history:
	confidence = min(config.max_confidence, config.min_history_confidence + (history_score * config.confidence_increment))
	return QueryClassification(
	query_type="history",
	confidence=confidence,
	reasoning=f"Found {history_score} dynamic history indicators and conversation history exists"
	)

	if tool_score > 0 or not has_history:
	confidence = min(config.max_confidence, config.min_history_confidence + 0.1 + (tool_score * config.confidence_increment))
	return QueryClassification(
	query_type="tool",
	confidence=confidence,
	reasoning=f"Found {tool_score} dynamic tool indicators or no conversation history",
	needs_tool_data=True
	)

	if has_pronouns and recent_tool_usage:
	return QueryClassification(
	query_type="history",
	confidence=config.pronoun_confidence,
	reasoning="Contains pronouns and recent tool usage suggests reference to history"
	)

	# Default to tool query with low confidence
	return QueryClassification(
	query_type="tool",
	confidence=config.default_confidence,
	reasoning="Uncertain classification, defaulting to tool query",
	needs_tool_data=True
	)

	async def _llm_classification(self, user_message: str, conversation_context: ConversationContext,
	rule_based_result: QueryClassification) -> QueryClassification:
	"""Use LLM for sophisticated query classification."""

	if not settings.HF_TOKEN:
	# Fallback to rule-based if no LLM available
	return rule_based_result

	# Prepare context for LLM
	recent_context = self._prepare_context_for_llm(conversation_context)

	classification_prompt = self._build_classification_prompt(user_message, recent_context, rule_based_result)

	try:
	# Get LLM decision
	response = await self.llm_client.complete_json(classification_prompt)

	return QueryClassification(
	query_type=response.get("query_type", rule_based_result.query_type),
	confidence=response.get("confidence", rule_based_result.confidence),
	reasoning=response.get("reasoning", rule_based_result.reasoning),
	referenced_entities=response.get("referenced_entities", []),
	needs_tool_data=response.get("needs_tool_data", rule_based_result.needs_tool_data)
	)

	except Exception as e:
	print(f"LLM classification failed: {e}")
	# Fallback to rule-based result
	return rule_based_result

	def _prepare_context_for_llm(self, conversation_context: ConversationContext) -> str:
	"""Prepare conversation context for LLM analysis."""
	recent_turns = conversation_context.get_recent_turns(5)

	if not recent_turns:
	return "No previous conversation history."

	context_parts = []
	for i, turn in enumerate(recent_turns, 1):
	turn_summary = f"Turn {i}:\n"
	turn_summary += f" User: {turn.user_message}\n"

	if turn.tool_used:
	turn_summary += f" Tool used: {turn.tool_used}\n"
	if turn.tool_params:
	turn_summary += f" Parameters: {json.dumps(turn.tool_params, separators=(',', ':'))}\n"
	turn_summary += f" Result: {turn.response_summary}\n"
	else:
	turn_summary += f" Response: {turn.full_response[:100]}...\n"

	if turn.extracted_entities:
	entities = [f"{e.type}:{e.name}" for e in turn.extracted_entities]
	turn_summary += f" Entities: {', '.join(entities)}\n"

	context_parts.append(turn_summary)

	return "\n".join(context_parts)

	def _build_classification_prompt(self, user_message: str, context: str, rule_based_result: QueryClassification) -> str:
	"""Build the LLM prompt for query classification."""

	return f"""You are a query classification assistant for a Topcoder MCP agent.

	Your task is to determine if a user query can be answered from conversation history or needs new external data.

	CONVERSATION CONTEXT:
	{context}

	USER'S NEW MESSAGE: "{user_message}"

	RULE-BASED ANALYSIS:
	- Preliminary classification: {rule_based_result.query_type}
	- Confidence: {rule_based_result.confidence}
	- Reasoning: {rule_based_result.reasoning}

	CLASSIFICATION TYPES:
	1. "history" - Can be answered from conversation context (previous results, mentioned entities, etc.)
	2. "tool" - Needs new data from external APIs/tools
	3. "chat" - General conversation, greetings, or explanations

	ANALYZE THE QUERY FOR:
	- References to previous results ("the last challenge", "that user", "it")
	- Temporal references ("what you just told me", "from before")
	- Pronouns without clear new antecedents
	- Requests for new/different/more data vs clarification of existing data
	- Entity references that were mentioned in previous conversation

	Respond ONLY with a JSON object:
	{{
	"query_type": "history\|tool\|chat",
	"confidence": 0.0-1.0,
	"reasoning": "Brief explanation of your decision",
	"referenced_entities": ["entity1", "entity2"],
	"needs_tool_data": true/false
	}}

	Be especially careful to identify when users are asking about previously retrieved data vs requesting new data."""

	def extract_entities(self, text: str, previous_entities: List[Entity] = None) -> List[Entity]:
	"""Extract entities from text that might be referenced later."""
	entities = []

	if not conversation_config.entity_extraction.use_dynamic_patterns:
	return entities

	# Get patterns dynamically from pattern manager
	dynamic_pattern_manager.refresh_if_needed()
	patterns = dynamic_pattern_manager.get_entity_patterns()

	config = conversation_config.entity_extraction

	for pattern_name, pattern in patterns.items():
	try:
	matches = re.finditer(pattern, text, re.IGNORECASE)
	for match in matches:
	# Get the first non-None group
	value = next((group for group in match.groups() if group), None)
	if value:
	# Determine entity type based on pattern name
	entity_type = self._determine_entity_type(pattern_name, value)
	entities.append(Entity(
	name=value.strip(),
	type=entity_type,
	value=value.strip(),
	confidence=config.default_entity_confidence
	))
	except Exception as e:
	print(f"Error in pattern {pattern_name}: {e}")
	continue

	return entities

	def _determine_entity_type(self, pattern_name: str, value: str) -> str:
	"""Determine entity type from pattern name and value."""
	pattern_lower = pattern_name.lower()

	if "id" in pattern_lower:
	if "challenge" in pattern_lower:
	return "challenge_id"
	elif "skill" in pattern_lower:
	return "skill_id"
	elif "member" in pattern_lower or "user" in pattern_lower:
	return "user_id"
	else:
	return "entity_id"

	elif "name" in pattern_lower:
	if "challenge" in pattern_lower:
	return "challenge_name"
	elif "skill" in pattern_lower:
	return "skill_name"
	else:
	return "entity_name"

	elif "handle" in pattern_lower:
	return "user_handle"

	else:
	# Try to infer from value pattern
	if value.isdigit():
	return "entity_id"
	elif len(value) <= 30 and "_" in value:
	return "user_handle"
	else:
	return "entity_name"