""" Intent Classification Service Determines user intent to make the chatbot behave naturally """ import re from typing import Dict, Any class IntentClassifier: """Classifies user intents to determine appropriate bot response""" # Keywords that indicate pipeline creation intent PIPELINE_KEYWORDS = [ "extract", "summarize", "translate", "classify", "detect", "analyze", "process", "generate", "create pipeline", "build pipeline", "run", "execute", "perform", "do", "get", "find", "identify", "table", "text", "image", "signature", "stamp", "ner", "entities" ] # Casual chat patterns CASUAL_PATTERNS = [ r"^(hi|hello|hey|greetings|good morning|good afternoon|good evening)", r"^(how are you|what's up|wassup)", r"^(thanks|thank you|appreciate)", r"^(bye|goodbye|see you|later)", r"^(ok|okay|cool|nice|great|awesome)", r"^(what can you do|what do you do|help|capabilities)", r"^(who are you|what are you)" ] # Question patterns that need informational response QUESTION_PATTERNS = [ r"^(what|how|why|when|where|who|can you|do you|are you|is it)", r"(help|explain|tell me|show me)" ] @staticmethod def classify_intent(user_message: str) -> Dict[str, Any]: """ Classify user intent from their message Returns: { "intent": "casual_chat" | "question" | "pipeline_request" | "approval" | "rejection", "confidence": float (0-1), "requires_pipeline": bool, "suggested_response_type": "friendly" | "informational" | "pipeline_generation" } """ message = user_message.strip().lower() # Empty message if not message: return { "intent": "casual_chat", "confidence": 1.0, "requires_pipeline": False, "suggested_response_type": "friendly" } # Approval/Rejection patterns (for pipeline confirmation) if message in ["approve", "yes", "y", "ok", "okay", "proceed", "go ahead", "do it"]: return { "intent": "approval", "confidence": 1.0, "requires_pipeline": False, "suggested_response_type": "execute_pipeline" } if message in ["reject", "no", "n", "cancel", "stop", "don't"]: return { "intent": "rejection", "confidence": 1.0, "requires_pipeline": False, "suggested_response_type": "friendly" } # Casual chat patterns for pattern in IntentClassifier.CASUAL_PATTERNS: if re.search(pattern, message, re.IGNORECASE): return { "intent": "casual_chat", "confidence": 0.9, "requires_pipeline": False, "suggested_response_type": "friendly" } # Question patterns (informational) - CHECK THIS FIRST! is_question = False for pattern in IntentClassifier.QUESTION_PATTERNS: if re.search(pattern, message, re.IGNORECASE): is_question = True break # Check for pipeline keywords pipeline_keyword_count = sum( 1 for keyword in IntentClassifier.PIPELINE_KEYWORDS if keyword in message ) # IMPORTANT: Questions take priority over pipeline keywords! # "tell me about X" is a question, not a pipeline request if is_question: # If it's clearly asking about the system itself, it's informational if any(term in message for term in ["masterllm", "you", "this", "system", "bot", "chatbot", "assistant"]): return { "intent": "question", "confidence": 0.95, "requires_pipeline": False, "suggested_response_type": "informational" } # If it's a question but with strong pipeline keywords, could be pipeline request # But only if the keywords are very explicit (e.g., "extract text from page 5") elif pipeline_keyword_count >= 2 and not message.startswith("tell me"): return { "intent": "pipeline_request", "confidence": 0.7, "requires_pipeline": True, "suggested_response_type": "pipeline_generation", "keyword_matches": pipeline_keyword_count } # Otherwise, it's just a question else: return { "intent": "question", "confidence": 0.85, "requires_pipeline": False, "suggested_response_type": "informational" } # If has pipeline keywords but is NOT a question, then pipeline request if pipeline_keyword_count > 0: return { "intent": "pipeline_request", "confidence": min(0.6 + (pipeline_keyword_count * 0.1), 1.0), "requires_pipeline": True, "suggested_response_type": "pipeline_generation", "keyword_matches": pipeline_keyword_count } # Default: treat as casual if short, otherwise might be pipeline request if len(message.split()) < 3: return { "intent": "casual_chat", "confidence": 0.6, "requires_pipeline": False, "suggested_response_type": "friendly" } # Longer messages without clear intent - ask for clarification return { "intent": "unclear", "confidence": 0.4, "requires_pipeline": False, "suggested_response_type": "clarification" } @staticmethod def get_friendly_response(intent: str, user_message: str = "") -> str: """Generate friendly chatbot responses for non-pipeline intents""" message_lower = user_message.lower().strip() # Greetings if re.search(r"^(hi|hello|hey)", message_lower): return "Hello! 👋 I'm MasterLLM, your AI document processing assistant. Upload a document and tell me what you'd like to do with it!" # How are you if "how are you" in message_lower: return "I'm doing great, thank you! 🤖 Ready to help you process documents. Upload a file to get started!" # Thanks if re.search(r"^(thanks|thank you)", message_lower): return "You're welcome! 😊 Let me know if you need anything else!" # Goodbye if re.search(r"^(bye|goodbye)", message_lower): return "Goodbye! 👋 Feel free to come back anytime you need document processing help!" # Capabilities question if "what can you do" in message_lower or "capabilities" in message_lower: return """I can help you with various document processing tasks: 📄 **Text Operations:** - Extract text from PDFs and images - Summarize documents - Translate to different languages - Classify text content - Extract named entities (NER) 📊 **Table Operations:** - Extract tables from documents - Analyze tabular data 🖼️ **Image Operations:** - Describe images - Detect signatures - Detect stamps 🔧 **How to use:** 1. Upload a document (PDF or image) 2. Tell me what you want to do (e.g., "extract text and summarize") 3. I'll create a pipeline for you to approve 4. Watch the magic happen! ✨""" # Who are you / What are you / Tell me about MasterLLM if any(phrase in message_lower for phrase in ["who are you", "what are you", "about masterllm", "about you", "tell me about"]): return """**About MasterLLM** 🤖 I'm MasterLLM, an intelligent AI-powered document processing orchestrator that helps you automate complex document workflows. **What I Do:** - 🔍 **Analyze** your requests and understand what you need - 🛠️ **Create** custom processing pipelines automatically - ⚡ **Execute** multi-step document workflows - 📊 **Deliver** structured results **My Capabilities:** - Extract text from PDFs and images (OCR) - Summarize long documents - Translate content to different languages - Classify documents - Extract tables and structured data - Detect signatures and stamps - Describe images - Extract named entities (people, places, organizations) **How I'm Different:** I'm not just a single tool - I'm a **smart orchestrator** that: 1. Understands your natural language requests 2. Determines the best sequence of operations 3. Uses powerful AI models (AWS Bedrock Claude & Google Gemini) 4. Combines multiple tools into efficient pipelines 5. Executes everything seamlessly **My Role:** Think of me as your AI assistant for document processing. I have a natural conversation with you, understand what you need, propose a plan, and execute it - all while keeping you informed every step of the way! Want to see me in action? Just upload a document and tell me what you'd like to do with it! 🚀""" # Help if message_lower in ["help", "?"] or "help me" in message_lower: return """Here's how to use me: 1️⃣ **Upload Document**: Click the upload button and select a PDF or image 2️⃣ **Describe Task**: Tell me what you want (e.g., "extract text from pages 1-5 and summarize") 3️⃣ **Review Pipeline**: I'll show you the processing plan 4️⃣ **Approve**: Type 'approve' to execute or 'reject' to cancel 5️⃣ **Get Results**: Watch real-time progress and get your results! **Example requests:** - "extract text and summarize" - "get tables from pages 2-4" - "translate to Spanish" - "detect signatures and stamps" Need anything else?""" # Unclear intent if intent == "unclear": return "I'm not sure what you'd like me to do. Could you please:\n- Upload a document first, or\n- Tell me what processing task you need (e.g., 'extract text', 'summarize', 'translate')\n\nType 'help' to see what I can do!" # Default friendly response return "I'm here to help! Upload a document and tell me what you'd like to do with it. Type 'help' if you need examples! 😊" # Singleton instance intent_classifier = IntentClassifier()