Spaces:

stellar413
/

masterllm

Sleeping

App Files Files Community

masterllm / services /intent_classifier.py

redhairedshanks1

Update services/intent_classifier.py

e3fee32 2 months ago

raw

history blame contribute delete

10.7 kB

	"""
	Intent Classification Service
	Determines user intent to make the chatbot behave naturally
	"""

	import re
	from typing import Dict, Any


	class IntentClassifier:
	"""Classifies user intents to determine appropriate bot response"""

	# Keywords that indicate pipeline creation intent
	PIPELINE_KEYWORDS = [
	"extract", "summarize", "translate", "classify", "detect",
	"analyze", "process", "generate", "create pipeline", "build pipeline",
	"run", "execute", "perform", "do", "get", "find", "identify",
	"table", "text", "image", "signature", "stamp", "ner", "entities"
	]

	# Casual chat patterns
	CASUAL_PATTERNS = [
	r"^(hi\|hello\|hey\|greetings\|good morning\|good afternoon\|good evening)",
	r"^(how are you\|what's up\|wassup)",
	r"^(thanks\|thank you\|appreciate)",
	r"^(bye\|goodbye\|see you\|later)",
	r"^(ok\|okay\|cool\|nice\|great\|awesome)",
	r"^(what can you do\|what do you do\|help\|capabilities)",
	r"^(who are you\|what are you)"
	]

	# Question patterns that need informational response
	QUESTION_PATTERNS = [
	r"^(what\|how\|why\|when\|where\|who\|can you\|do you\|are you\|is it)",
	r"(help\|explain\|tell me\|show me)"
	]

	@staticmethod
	def classify_intent(user_message: str) -> Dict[str, Any]:
	"""
	Classify user intent from their message

	Returns:
	{
	"intent": "casual_chat" \| "question" \| "pipeline_request" \| "approval" \| "rejection",
	"confidence": float (0-1),
	"requires_pipeline": bool,
	"suggested_response_type": "friendly" \| "informational" \| "pipeline_generation"
	}
	"""
	message = user_message.strip().lower()

	# Empty message
	if not message:
	return {
	"intent": "casual_chat",
	"confidence": 1.0,
	"requires_pipeline": False,
	"suggested_response_type": "friendly"
	}

	# Approval/Rejection patterns (for pipeline confirmation)
	if message in ["approve", "yes", "y", "ok", "okay", "proceed", "go ahead", "do it"]:
	return {
	"intent": "approval",
	"confidence": 1.0,
	"requires_pipeline": False,
	"suggested_response_type": "execute_pipeline"
	}

	if message in ["reject", "no", "n", "cancel", "stop", "don't"]:
	return {
	"intent": "rejection",
	"confidence": 1.0,
	"requires_pipeline": False,
	"suggested_response_type": "friendly"
	}

	# Casual chat patterns
	for pattern in IntentClassifier.CASUAL_PATTERNS:
	if re.search(pattern, message, re.IGNORECASE):
	return {
	"intent": "casual_chat",
	"confidence": 0.9,
	"requires_pipeline": False,
	"suggested_response_type": "friendly"
	}

	# Question patterns (informational) - CHECK THIS FIRST!
	is_question = False
	for pattern in IntentClassifier.QUESTION_PATTERNS:
	if re.search(pattern, message, re.IGNORECASE):
	is_question = True
	break

	# Check for pipeline keywords
	pipeline_keyword_count = sum(
	1 for keyword in IntentClassifier.PIPELINE_KEYWORDS
	if keyword in message
	)

	# IMPORTANT: Questions take priority over pipeline keywords!
	# "tell me about X" is a question, not a pipeline request
	if is_question:
	# If it's clearly asking about the system itself, it's informational
	if any(term in message for term in ["masterllm", "you", "this", "system", "bot", "chatbot", "assistant"]):
	return {
	"intent": "question",
	"confidence": 0.95,
	"requires_pipeline": False,
	"suggested_response_type": "informational"
	}
	# If it's a question but with strong pipeline keywords, could be pipeline request
	# But only if the keywords are very explicit (e.g., "extract text from page 5")
	elif pipeline_keyword_count >= 2 and not message.startswith("tell me"):
	return {
	"intent": "pipeline_request",
	"confidence": 0.7,
	"requires_pipeline": True,
	"suggested_response_type": "pipeline_generation",
	"keyword_matches": pipeline_keyword_count
	}
	# Otherwise, it's just a question
	else:
	return {
	"intent": "question",
	"confidence": 0.85,
	"requires_pipeline": False,
	"suggested_response_type": "informational"
	}

	# If has pipeline keywords but is NOT a question, then pipeline request
	if pipeline_keyword_count > 0:
	return {
	"intent": "pipeline_request",
	"confidence": min(0.6 + (pipeline_keyword_count * 0.1), 1.0),
	"requires_pipeline": True,
	"suggested_response_type": "pipeline_generation",
	"keyword_matches": pipeline_keyword_count
	}

	# Default: treat as casual if short, otherwise might be pipeline request
	if len(message.split()) < 3:
	return {
	"intent": "casual_chat",
	"confidence": 0.6,
	"requires_pipeline": False,
	"suggested_response_type": "friendly"
	}

	# Longer messages without clear intent - ask for clarification
	return {
	"intent": "unclear",
	"confidence": 0.4,
	"requires_pipeline": False,
	"suggested_response_type": "clarification"
	}

	@staticmethod
	def get_friendly_response(intent: str, user_message: str = "") -> str:
	"""Generate friendly chatbot responses for non-pipeline intents"""

	message_lower = user_message.lower().strip()

	# Greetings
	if re.search(r"^(hi\|hello\|hey)", message_lower):
	return "Hello! 👋 I'm MasterLLM, your AI document processing assistant. Upload a document and tell me what you'd like to do with it!"

	# How are you
	if "how are you" in message_lower:
	return "I'm doing great, thank you! 🤖 Ready to help you process documents. Upload a file to get started!"

	# Thanks
	if re.search(r"^(thanks\|thank you)", message_lower):
	return "You're welcome! 😊 Let me know if you need anything else!"

	# Goodbye
	if re.search(r"^(bye\|goodbye)", message_lower):
	return "Goodbye! 👋 Feel free to come back anytime you need document processing help!"

	# Capabilities question
	if "what can you do" in message_lower or "capabilities" in message_lower:
	return """I can help you with various document processing tasks:

	📄 Text Operations:
	- Extract text from PDFs and images
	- Summarize documents
	- Translate to different languages
	- Classify text content
	- Extract named entities (NER)

	📊 Table Operations:
	- Extract tables from documents
	- Analyze tabular data

	🖼️ Image Operations:
	- Describe images
	- Detect signatures
	- Detect stamps

	🔧 How to use:
	1. Upload a document (PDF or image)
	2. Tell me what you want to do (e.g., "extract text and summarize")
	3. I'll create a pipeline for you to approve
	4. Watch the magic happen! ✨"""

	# Who are you / What are you / Tell me about MasterLLM
	if any(phrase in message_lower for phrase in ["who are you", "what are you", "about masterllm", "about you", "tell me about"]):
	return """About MasterLLM 🤖

	I'm MasterLLM, an intelligent AI-powered document processing orchestrator that helps you automate complex document workflows.

	What I Do:
	- 🔍 Analyze your requests and understand what you need
	- 🛠️ Create custom processing pipelines automatically
	- ⚡ Execute multi-step document workflows
	- 📊 Deliver structured results

	My Capabilities:
	- Extract text from PDFs and images (OCR)
	- Summarize long documents
	- Translate content to different languages
	- Classify documents
	- Extract tables and structured data
	- Detect signatures and stamps
	- Describe images
	- Extract named entities (people, places, organizations)

	How I'm Different:
	I'm not just a single tool - I'm a smart orchestrator that:
	1. Understands your natural language requests
	2. Determines the best sequence of operations
	3. Uses powerful AI models (AWS Bedrock Claude & Google Gemini)
	4. Combines multiple tools into efficient pipelines
	5. Executes everything seamlessly

	My Role:
	Think of me as your AI assistant for document processing. I have a natural conversation with you, understand what you need, propose a plan, and execute it - all while keeping you informed every step of the way!

	Want to see me in action? Just upload a document and tell me what you'd like to do with it! 🚀"""


	# Help
	if message_lower in ["help", "?"] or "help me" in message_lower:
	return """Here's how to use me:

	1️⃣ Upload Document: Click the upload button and select a PDF or image
	2️⃣ Describe Task: Tell me what you want (e.g., "extract text from pages 1-5 and summarize")
	3️⃣ Review Pipeline: I'll show you the processing plan
	4️⃣ Approve: Type 'approve' to execute or 'reject' to cancel
	5️⃣ Get Results: Watch real-time progress and get your results!

	Example requests:
	- "extract text and summarize"
	- "get tables from pages 2-4"
	- "translate to Spanish"
	- "detect signatures and stamps"

	Need anything else?"""

	# Unclear intent
	if intent == "unclear":
	return "I'm not sure what you'd like me to do. Could you please:\n- Upload a document first, or\n- Tell me what processing task you need (e.g., 'extract text', 'summarize', 'translate')\n\nType 'help' to see what I can do!"

	# Default friendly response
	return "I'm here to help! Upload a document and tell me what you'd like to do with it. Type 'help' if you need examples! 😊"


	# Singleton instance
	intent_classifier = IntentClassifier()