Spaces:

ChiragPatankar
/

RAG_backend

Running

App Files Files Community

RAG_backend / app /rag /intent.py

ChiragPatankar

Add all RAG backend files - force add

c19c7bf 3 months ago

raw

history blame contribute delete

5.13 kB

	"""
	Intent detection module for RAG pipeline.
	Detects user intent from queries to enable intent-based gating.
	"""
	import re
	from typing import List, Dict, Set
	import logging

	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)


	# Intent keywords mapping
	INTENT_KEYWORDS: Dict[str, List[str]] = {
	"integration": [
	"integrate", "integration", "api", "connect", "connection", "webhook",
	"shopify", "woocommerce", "stripe", "paypal", "payment gateway",
	"whatsapp", "telegram", "slack", "zapier", "ifttt", "automation"
	],
	"billing": [
	"billing", "invoice", "payment", "subscription", "plan", "pricing",
	"cost", "price", "charge", "fee", "refund", "cancel", "renew"
	],
	"account": [
	"account", "profile", "settings", "preferences", "user", "login",
	"signup", "register", "authentication", "auth"
	],
	"password_reset": [
	"password", "reset", "forgot", "change password", "update password",
	"password reset link", "expire", "expiry"
	],
	"pricing": [
	"pricing", "price", "plan", "cost", "subscription", "tier", "starter",
	"pro", "enterprise", "monthly", "yearly", "billing"
	],
	"general": [] # Catch-all for general queries
	}


	def detect_intents(query: str) -> List[str]:
	"""
	Detect intents from a user query.

	Args:
	query: User's question

	Returns:
	List of detected intent labels (e.g., ["integration", "billing"])
	"""
	query_lower = query.lower()
	detected = []

	for intent, keywords in INTENT_KEYWORDS.items():
	if intent == "general":
	continue # Skip general, it's a catch-all

	# Check if any keyword matches
	for keyword in keywords:
	# Use word boundary matching for better accuracy
	pattern = r'\b' + re.escape(keyword.lower()) + r'\b'
	if re.search(pattern, query_lower):
	detected.append(intent)
	break # Only add intent once

	# If no specific intent detected, return general
	if not detected:
	detected = ["general"]

	logger.info(f"Detected intents for query '{query[:50]}...': {detected}")
	return detected


	def get_intent_keywords(intents: List[str]) -> Set[str]:
	"""
	Get all keywords for a list of intents.

	Args:
	intents: List of intent labels

	Returns:
	Set of keywords for those intents
	"""
	keywords = set()
	for intent in intents:
	if intent in INTENT_KEYWORDS:
	keywords.update(INTENT_KEYWORDS[intent])
	return keywords


	def check_direct_match(
	query: str,
	retrieved_chunks: List[str],
	intent_keywords: Set[str] = None
	) -> bool:
	"""
	Check if at least one retrieved chunk contains direct matches for query intent.

	Args:
	query: User's question
	retrieved_chunks: List of retrieved chunk texts
	intent_keywords: Optional set of intent keywords to check

	Returns:
	True if at least one chunk has direct match, False otherwise
	"""
	if not retrieved_chunks:
	return False

	query_lower = query.lower()
	query_words = set(re.findall(r'\b\w+\b', query_lower))

	# Get intent keywords if not provided
	if intent_keywords is None:
	intents = detect_intents(query)
	intent_keywords = get_intent_keywords(intents)

	# Check each chunk for direct matches
	for chunk in retrieved_chunks:
	chunk_lower = chunk.lower()

	# Check 1: Intent keywords must be present in chunk
	if intent_keywords:
	intent_found = any(
	re.search(r'\b' + re.escape(kw.lower()) + r'\b', chunk_lower)
	for kw in intent_keywords
	)
	if not intent_found:
	continue # Skip this chunk if no intent keywords

	# Check 2: At least 2-3 important query words should be in chunk
	# (excluding common stop words)
	stop_words = {"the", "a", "an", "is", "are", "was", "were", "be", "been",
	"to", "of", "and", "or", "but", "in", "on", "at", "for",
	"with", "how", "what", "when", "where", "why", "do", "does"}
	important_words = query_words - stop_words

	if len(important_words) >= 2:
	# Need at least 2 important words to match
	matches = sum(1 for word in important_words if word in chunk_lower)
	if matches >= 2:
	logger.info(f"Direct match found: {matches} important words matched in chunk")
	return True
	elif len(important_words) == 1:
	# Single important word - require exact phrase match
	for word in important_words:
	if re.search(r'\b' + re.escape(word) + r'\b', chunk_lower):
	logger.info(f"Direct match found: single important word '{word}' matched")
	return True

	logger.warning("No direct match found in retrieved chunks")
	return False