Spaces:
Sleeping
Sleeping
| """ | |
| Intent Classification Service | |
| Determines user intent to make the chatbot behave naturally | |
| """ | |
| import re | |
| from typing import Dict, Any | |
| class IntentClassifier: | |
| """Classifies user intents to determine appropriate bot response""" | |
| # Keywords that indicate pipeline creation intent | |
| PIPELINE_KEYWORDS = [ | |
| "extract", "summarize", "translate", "classify", "detect", | |
| "analyze", "process", "generate", "create pipeline", "build pipeline", | |
| "run", "execute", "perform", "do", "get", "find", "identify", | |
| "table", "text", "image", "signature", "stamp", "ner", "entities" | |
| ] | |
| # Casual chat patterns | |
| CASUAL_PATTERNS = [ | |
| r"^(hi|hello|hey|greetings|good morning|good afternoon|good evening)", | |
| r"^(how are you|what's up|wassup)", | |
| r"^(thanks|thank you|appreciate)", | |
| r"^(bye|goodbye|see you|later)", | |
| r"^(ok|okay|cool|nice|great|awesome)", | |
| r"^(what can you do|what do you do|help|capabilities)", | |
| r"^(who are you|what are you)" | |
| ] | |
| # Question patterns that need informational response | |
| QUESTION_PATTERNS = [ | |
| r"^(what|how|why|when|where|who|can you|do you|are you|is it)", | |
| r"(help|explain|tell me|show me)" | |
| ] | |
| def classify_intent(user_message: str) -> Dict[str, Any]: | |
| """ | |
| Classify user intent from their message | |
| Returns: | |
| { | |
| "intent": "casual_chat" | "question" | "pipeline_request" | "approval" | "rejection", | |
| "confidence": float (0-1), | |
| "requires_pipeline": bool, | |
| "suggested_response_type": "friendly" | "informational" | "pipeline_generation" | |
| } | |
| """ | |
| message = user_message.strip().lower() | |
| # Empty message | |
| if not message: | |
| return { | |
| "intent": "casual_chat", | |
| "confidence": 1.0, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "friendly" | |
| } | |
| # Approval/Rejection patterns (for pipeline confirmation) | |
| if message in ["approve", "yes", "y", "ok", "okay", "proceed", "go ahead", "do it"]: | |
| return { | |
| "intent": "approval", | |
| "confidence": 1.0, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "execute_pipeline" | |
| } | |
| if message in ["reject", "no", "n", "cancel", "stop", "don't"]: | |
| return { | |
| "intent": "rejection", | |
| "confidence": 1.0, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "friendly" | |
| } | |
| # Casual chat patterns | |
| for pattern in IntentClassifier.CASUAL_PATTERNS: | |
| if re.search(pattern, message, re.IGNORECASE): | |
| return { | |
| "intent": "casual_chat", | |
| "confidence": 0.9, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "friendly" | |
| } | |
| # Question patterns (informational) - CHECK THIS FIRST! | |
| is_question = False | |
| for pattern in IntentClassifier.QUESTION_PATTERNS: | |
| if re.search(pattern, message, re.IGNORECASE): | |
| is_question = True | |
| break | |
| # Check for pipeline keywords | |
| pipeline_keyword_count = sum( | |
| 1 for keyword in IntentClassifier.PIPELINE_KEYWORDS | |
| if keyword in message | |
| ) | |
| # IMPORTANT: Questions take priority over pipeline keywords! | |
| # "tell me about X" is a question, not a pipeline request | |
| if is_question: | |
| # If it's clearly asking about the system itself, it's informational | |
| if any(term in message for term in ["masterllm", "you", "this", "system", "bot", "chatbot", "assistant"]): | |
| return { | |
| "intent": "question", | |
| "confidence": 0.95, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "informational" | |
| } | |
| # If it's a question but with strong pipeline keywords, could be pipeline request | |
| # But only if the keywords are very explicit (e.g., "extract text from page 5") | |
| elif pipeline_keyword_count >= 2 and not message.startswith("tell me"): | |
| return { | |
| "intent": "pipeline_request", | |
| "confidence": 0.7, | |
| "requires_pipeline": True, | |
| "suggested_response_type": "pipeline_generation", | |
| "keyword_matches": pipeline_keyword_count | |
| } | |
| # Otherwise, it's just a question | |
| else: | |
| return { | |
| "intent": "question", | |
| "confidence": 0.85, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "informational" | |
| } | |
| # If has pipeline keywords but is NOT a question, then pipeline request | |
| if pipeline_keyword_count > 0: | |
| return { | |
| "intent": "pipeline_request", | |
| "confidence": min(0.6 + (pipeline_keyword_count * 0.1), 1.0), | |
| "requires_pipeline": True, | |
| "suggested_response_type": "pipeline_generation", | |
| "keyword_matches": pipeline_keyword_count | |
| } | |
| # Default: treat as casual if short, otherwise might be pipeline request | |
| if len(message.split()) < 3: | |
| return { | |
| "intent": "casual_chat", | |
| "confidence": 0.6, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "friendly" | |
| } | |
| # Longer messages without clear intent - ask for clarification | |
| return { | |
| "intent": "unclear", | |
| "confidence": 0.4, | |
| "requires_pipeline": False, | |
| "suggested_response_type": "clarification" | |
| } | |
| def get_friendly_response(intent: str, user_message: str = "") -> str: | |
| """Generate friendly chatbot responses for non-pipeline intents""" | |
| message_lower = user_message.lower().strip() | |
| # Greetings | |
| if re.search(r"^(hi|hello|hey)", message_lower): | |
| return "Hello! π I'm MasterLLM, your AI document processing assistant. Upload a document and tell me what you'd like to do with it!" | |
| # How are you | |
| if "how are you" in message_lower: | |
| return "I'm doing great, thank you! π€ Ready to help you process documents. Upload a file to get started!" | |
| # Thanks | |
| if re.search(r"^(thanks|thank you)", message_lower): | |
| return "You're welcome! π Let me know if you need anything else!" | |
| # Goodbye | |
| if re.search(r"^(bye|goodbye)", message_lower): | |
| return "Goodbye! π Feel free to come back anytime you need document processing help!" | |
| # Capabilities question | |
| if "what can you do" in message_lower or "capabilities" in message_lower: | |
| return """I can help you with various document processing tasks: | |
| π **Text Operations:** | |
| - Extract text from PDFs and images | |
| - Summarize documents | |
| - Translate to different languages | |
| - Classify text content | |
| - Extract named entities (NER) | |
| π **Table Operations:** | |
| - Extract tables from documents | |
| - Analyze tabular data | |
| πΌοΈ **Image Operations:** | |
| - Describe images | |
| - Detect signatures | |
| - Detect stamps | |
| π§ **How to use:** | |
| 1. Upload a document (PDF or image) | |
| 2. Tell me what you want to do (e.g., "extract text and summarize") | |
| 3. I'll create a pipeline for you to approve | |
| 4. Watch the magic happen! β¨""" | |
| # Who are you / What are you / Tell me about MasterLLM | |
| if any(phrase in message_lower for phrase in ["who are you", "what are you", "about masterllm", "about you", "tell me about"]): | |
| return """**About MasterLLM** π€ | |
| I'm MasterLLM, an intelligent AI-powered document processing orchestrator that helps you automate complex document workflows. | |
| **What I Do:** | |
| - π **Analyze** your requests and understand what you need | |
| - π οΈ **Create** custom processing pipelines automatically | |
| - β‘ **Execute** multi-step document workflows | |
| - π **Deliver** structured results | |
| **My Capabilities:** | |
| - Extract text from PDFs and images (OCR) | |
| - Summarize long documents | |
| - Translate content to different languages | |
| - Classify documents | |
| - Extract tables and structured data | |
| - Detect signatures and stamps | |
| - Describe images | |
| - Extract named entities (people, places, organizations) | |
| **How I'm Different:** | |
| I'm not just a single tool - I'm a **smart orchestrator** that: | |
| 1. Understands your natural language requests | |
| 2. Determines the best sequence of operations | |
| 3. Uses powerful AI models (AWS Bedrock Claude & Google Gemini) | |
| 4. Combines multiple tools into efficient pipelines | |
| 5. Executes everything seamlessly | |
| **My Role:** | |
| Think of me as your AI assistant for document processing. I have a natural conversation with you, understand what you need, propose a plan, and execute it - all while keeping you informed every step of the way! | |
| Want to see me in action? Just upload a document and tell me what you'd like to do with it! π""" | |
| # Help | |
| if message_lower in ["help", "?"] or "help me" in message_lower: | |
| return """Here's how to use me: | |
| 1οΈβ£ **Upload Document**: Click the upload button and select a PDF or image | |
| 2οΈβ£ **Describe Task**: Tell me what you want (e.g., "extract text from pages 1-5 and summarize") | |
| 3οΈβ£ **Review Pipeline**: I'll show you the processing plan | |
| 4οΈβ£ **Approve**: Type 'approve' to execute or 'reject' to cancel | |
| 5οΈβ£ **Get Results**: Watch real-time progress and get your results! | |
| **Example requests:** | |
| - "extract text and summarize" | |
| - "get tables from pages 2-4" | |
| - "translate to Spanish" | |
| - "detect signatures and stamps" | |
| Need anything else?""" | |
| # Unclear intent | |
| if intent == "unclear": | |
| return "I'm not sure what you'd like me to do. Could you please:\n- Upload a document first, or\n- Tell me what processing task you need (e.g., 'extract text', 'summarize', 'translate')\n\nType 'help' to see what I can do!" | |
| # Default friendly response | |
| return "I'm here to help! Upload a document and tell me what you'd like to do with it. Type 'help' if you need examples! π" | |
| # Singleton instance | |
| intent_classifier = IntentClassifier() | |