masterllm / services /intent_classifier.py
redhairedshanks1's picture
Update services/intent_classifier.py
e3fee32
"""
Intent Classification Service
Determines user intent to make the chatbot behave naturally
"""
import re
from typing import Dict, Any
class IntentClassifier:
"""Classifies user intents to determine appropriate bot response"""
# Keywords that indicate pipeline creation intent
PIPELINE_KEYWORDS = [
"extract", "summarize", "translate", "classify", "detect",
"analyze", "process", "generate", "create pipeline", "build pipeline",
"run", "execute", "perform", "do", "get", "find", "identify",
"table", "text", "image", "signature", "stamp", "ner", "entities"
]
# Casual chat patterns
CASUAL_PATTERNS = [
r"^(hi|hello|hey|greetings|good morning|good afternoon|good evening)",
r"^(how are you|what's up|wassup)",
r"^(thanks|thank you|appreciate)",
r"^(bye|goodbye|see you|later)",
r"^(ok|okay|cool|nice|great|awesome)",
r"^(what can you do|what do you do|help|capabilities)",
r"^(who are you|what are you)"
]
# Question patterns that need informational response
QUESTION_PATTERNS = [
r"^(what|how|why|when|where|who|can you|do you|are you|is it)",
r"(help|explain|tell me|show me)"
]
@staticmethod
def classify_intent(user_message: str) -> Dict[str, Any]:
"""
Classify user intent from their message
Returns:
{
"intent": "casual_chat" | "question" | "pipeline_request" | "approval" | "rejection",
"confidence": float (0-1),
"requires_pipeline": bool,
"suggested_response_type": "friendly" | "informational" | "pipeline_generation"
}
"""
message = user_message.strip().lower()
# Empty message
if not message:
return {
"intent": "casual_chat",
"confidence": 1.0,
"requires_pipeline": False,
"suggested_response_type": "friendly"
}
# Approval/Rejection patterns (for pipeline confirmation)
if message in ["approve", "yes", "y", "ok", "okay", "proceed", "go ahead", "do it"]:
return {
"intent": "approval",
"confidence": 1.0,
"requires_pipeline": False,
"suggested_response_type": "execute_pipeline"
}
if message in ["reject", "no", "n", "cancel", "stop", "don't"]:
return {
"intent": "rejection",
"confidence": 1.0,
"requires_pipeline": False,
"suggested_response_type": "friendly"
}
# Casual chat patterns
for pattern in IntentClassifier.CASUAL_PATTERNS:
if re.search(pattern, message, re.IGNORECASE):
return {
"intent": "casual_chat",
"confidence": 0.9,
"requires_pipeline": False,
"suggested_response_type": "friendly"
}
# Question patterns (informational) - CHECK THIS FIRST!
is_question = False
for pattern in IntentClassifier.QUESTION_PATTERNS:
if re.search(pattern, message, re.IGNORECASE):
is_question = True
break
# Check for pipeline keywords
pipeline_keyword_count = sum(
1 for keyword in IntentClassifier.PIPELINE_KEYWORDS
if keyword in message
)
# IMPORTANT: Questions take priority over pipeline keywords!
# "tell me about X" is a question, not a pipeline request
if is_question:
# If it's clearly asking about the system itself, it's informational
if any(term in message for term in ["masterllm", "you", "this", "system", "bot", "chatbot", "assistant"]):
return {
"intent": "question",
"confidence": 0.95,
"requires_pipeline": False,
"suggested_response_type": "informational"
}
# If it's a question but with strong pipeline keywords, could be pipeline request
# But only if the keywords are very explicit (e.g., "extract text from page 5")
elif pipeline_keyword_count >= 2 and not message.startswith("tell me"):
return {
"intent": "pipeline_request",
"confidence": 0.7,
"requires_pipeline": True,
"suggested_response_type": "pipeline_generation",
"keyword_matches": pipeline_keyword_count
}
# Otherwise, it's just a question
else:
return {
"intent": "question",
"confidence": 0.85,
"requires_pipeline": False,
"suggested_response_type": "informational"
}
# If has pipeline keywords but is NOT a question, then pipeline request
if pipeline_keyword_count > 0:
return {
"intent": "pipeline_request",
"confidence": min(0.6 + (pipeline_keyword_count * 0.1), 1.0),
"requires_pipeline": True,
"suggested_response_type": "pipeline_generation",
"keyword_matches": pipeline_keyword_count
}
# Default: treat as casual if short, otherwise might be pipeline request
if len(message.split()) < 3:
return {
"intent": "casual_chat",
"confidence": 0.6,
"requires_pipeline": False,
"suggested_response_type": "friendly"
}
# Longer messages without clear intent - ask for clarification
return {
"intent": "unclear",
"confidence": 0.4,
"requires_pipeline": False,
"suggested_response_type": "clarification"
}
@staticmethod
def get_friendly_response(intent: str, user_message: str = "") -> str:
"""Generate friendly chatbot responses for non-pipeline intents"""
message_lower = user_message.lower().strip()
# Greetings
if re.search(r"^(hi|hello|hey)", message_lower):
return "Hello! πŸ‘‹ I'm MasterLLM, your AI document processing assistant. Upload a document and tell me what you'd like to do with it!"
# How are you
if "how are you" in message_lower:
return "I'm doing great, thank you! πŸ€– Ready to help you process documents. Upload a file to get started!"
# Thanks
if re.search(r"^(thanks|thank you)", message_lower):
return "You're welcome! 😊 Let me know if you need anything else!"
# Goodbye
if re.search(r"^(bye|goodbye)", message_lower):
return "Goodbye! πŸ‘‹ Feel free to come back anytime you need document processing help!"
# Capabilities question
if "what can you do" in message_lower or "capabilities" in message_lower:
return """I can help you with various document processing tasks:
πŸ“„ **Text Operations:**
- Extract text from PDFs and images
- Summarize documents
- Translate to different languages
- Classify text content
- Extract named entities (NER)
πŸ“Š **Table Operations:**
- Extract tables from documents
- Analyze tabular data
πŸ–ΌοΈ **Image Operations:**
- Describe images
- Detect signatures
- Detect stamps
πŸ”§ **How to use:**
1. Upload a document (PDF or image)
2. Tell me what you want to do (e.g., "extract text and summarize")
3. I'll create a pipeline for you to approve
4. Watch the magic happen! ✨"""
# Who are you / What are you / Tell me about MasterLLM
if any(phrase in message_lower for phrase in ["who are you", "what are you", "about masterllm", "about you", "tell me about"]):
return """**About MasterLLM** πŸ€–
I'm MasterLLM, an intelligent AI-powered document processing orchestrator that helps you automate complex document workflows.
**What I Do:**
- πŸ” **Analyze** your requests and understand what you need
- πŸ› οΈ **Create** custom processing pipelines automatically
- ⚑ **Execute** multi-step document workflows
- πŸ“Š **Deliver** structured results
**My Capabilities:**
- Extract text from PDFs and images (OCR)
- Summarize long documents
- Translate content to different languages
- Classify documents
- Extract tables and structured data
- Detect signatures and stamps
- Describe images
- Extract named entities (people, places, organizations)
**How I'm Different:**
I'm not just a single tool - I'm a **smart orchestrator** that:
1. Understands your natural language requests
2. Determines the best sequence of operations
3. Uses powerful AI models (AWS Bedrock Claude & Google Gemini)
4. Combines multiple tools into efficient pipelines
5. Executes everything seamlessly
**My Role:**
Think of me as your AI assistant for document processing. I have a natural conversation with you, understand what you need, propose a plan, and execute it - all while keeping you informed every step of the way!
Want to see me in action? Just upload a document and tell me what you'd like to do with it! πŸš€"""
# Help
if message_lower in ["help", "?"] or "help me" in message_lower:
return """Here's how to use me:
1️⃣ **Upload Document**: Click the upload button and select a PDF or image
2️⃣ **Describe Task**: Tell me what you want (e.g., "extract text from pages 1-5 and summarize")
3️⃣ **Review Pipeline**: I'll show you the processing plan
4️⃣ **Approve**: Type 'approve' to execute or 'reject' to cancel
5️⃣ **Get Results**: Watch real-time progress and get your results!
**Example requests:**
- "extract text and summarize"
- "get tables from pages 2-4"
- "translate to Spanish"
- "detect signatures and stamps"
Need anything else?"""
# Unclear intent
if intent == "unclear":
return "I'm not sure what you'd like me to do. Could you please:\n- Upload a document first, or\n- Tell me what processing task you need (e.g., 'extract text', 'summarize', 'translate')\n\nType 'help' to see what I can do!"
# Default friendly response
return "I'm here to help! Upload a document and tell me what you'd like to do with it. Type 'help' if you need examples! 😊"
# Singleton instance
intent_classifier = IntentClassifier()