Spaces:

stellar413
/

masterllm

Sleeping

File size: 10,687 Bytes

"""
Intent Classification Service
Determines user intent to make the chatbot behave naturally
"""

import re
from typing import Dict, Any


class IntentClassifier:
    """Classifies user intents to determine appropriate bot response"""
    
    # Keywords that indicate pipeline creation intent
    PIPELINE_KEYWORDS = [
        "extract", "summarize", "translate", "classify", "detect", 
        "analyze", "process", "generate", "create pipeline", "build pipeline",
        "run", "execute", "perform", "do", "get", "find", "identify",
        "table", "text", "image", "signature", "stamp", "ner", "entities"
    ]
    
    # Casual chat patterns
    CASUAL_PATTERNS = [
        r"^(hi|hello|hey|greetings|good morning|good afternoon|good evening)",
        r"^(how are you|what's up|wassup)",
        r"^(thanks|thank you|appreciate)",
        r"^(bye|goodbye|see you|later)",
        r"^(ok|okay|cool|nice|great|awesome)",
        r"^(what can you do|what do you do|help|capabilities)",
        r"^(who are you|what are you)"
    ]
    
    # Question patterns that need informational response
    QUESTION_PATTERNS = [
        r"^(what|how|why|when|where|who|can you|do you|are you|is it)",
        r"(help|explain|tell me|show me)"
    ]
    
    @staticmethod
    def classify_intent(user_message: str) -> Dict[str, Any]:
        """
        Classify user intent from their message
        
        Returns:
            {
                "intent": "casual_chat" | "question" | "pipeline_request" | "approval" | "rejection",
                "confidence": float (0-1),
                "requires_pipeline": bool,
                "suggested_response_type": "friendly" | "informational" | "pipeline_generation"
            }
        """
        message = user_message.strip().lower()
        
        # Empty message
        if not message:
            return {
                "intent": "casual_chat",
                "confidence": 1.0,
                "requires_pipeline": False,
                "suggested_response_type": "friendly"
            }
        
        # Approval/Rejection patterns (for pipeline confirmation)
        if message in ["approve", "yes", "y", "ok", "okay", "proceed", "go ahead", "do it"]:
            return {
                "intent": "approval",
                "confidence": 1.0,
                "requires_pipeline": False,
                "suggested_response_type": "execute_pipeline"
            }
        
        if message in ["reject", "no", "n", "cancel", "stop", "don't"]:
            return {
                "intent": "rejection",
                "confidence": 1.0,
                "requires_pipeline": False,
                "suggested_response_type": "friendly"
            }
        
        # Casual chat patterns
        for pattern in IntentClassifier.CASUAL_PATTERNS:
            if re.search(pattern, message, re.IGNORECASE):
                return {
                    "intent": "casual_chat",
                    "confidence": 0.9,
                    "requires_pipeline": False,
                    "suggested_response_type": "friendly"
                }
        
        # Question patterns (informational) - CHECK THIS FIRST!
        is_question = False
        for pattern in IntentClassifier.QUESTION_PATTERNS:
            if re.search(pattern, message, re.IGNORECASE):
                is_question = True
                break
        
        # Check for pipeline keywords
        pipeline_keyword_count = sum(
            1 for keyword in IntentClassifier.PIPELINE_KEYWORDS 
            if keyword in message
        )
        
        # IMPORTANT: Questions take priority over pipeline keywords!
        # "tell me about X" is a question, not a pipeline request
        if is_question:
            # If it's clearly asking about the system itself, it's informational
            if any(term in message for term in ["masterllm", "you", "this", "system", "bot", "chatbot", "assistant"]):
                return {
                    "intent": "question",
                    "confidence": 0.95,
                    "requires_pipeline": False,
                    "suggested_response_type": "informational"
                }
            # If it's a question but with strong pipeline keywords, could be pipeline request
            # But only if the keywords are very explicit (e.g., "extract text from page 5")
            elif pipeline_keyword_count >= 2 and not message.startswith("tell me"):
                return {
                    "intent": "pipeline_request",
                    "confidence": 0.7,
                    "requires_pipeline": True,
                    "suggested_response_type": "pipeline_generation",
                    "keyword_matches": pipeline_keyword_count
                }
            # Otherwise, it's just a question
            else:
                return {
                    "intent": "question",
                    "confidence": 0.85,
                    "requires_pipeline": False,
                    "suggested_response_type": "informational"
                }
        
        # If has pipeline keywords but is NOT a question, then pipeline request
        if pipeline_keyword_count > 0:
            return {
                "intent": "pipeline_request",
                "confidence": min(0.6 + (pipeline_keyword_count * 0.1), 1.0),
                "requires_pipeline": True,
                "suggested_response_type": "pipeline_generation",
                "keyword_matches": pipeline_keyword_count
            }
        
        # Default: treat as casual if short, otherwise might be pipeline request
        if len(message.split()) < 3:
            return {
                "intent": "casual_chat",
                "confidence": 0.6,
                "requires_pipeline": False,
                "suggested_response_type": "friendly"
            }
        
        # Longer messages without clear intent - ask for clarification
        return {
            "intent": "unclear",
            "confidence": 0.4,
            "requires_pipeline": False,
            "suggested_response_type": "clarification"
        }
    
    @staticmethod
    def get_friendly_response(intent: str, user_message: str = "") -> str:
        """Generate friendly chatbot responses for non-pipeline intents"""
        
        message_lower = user_message.lower().strip()
        
        # Greetings
        if re.search(r"^(hi|hello|hey)", message_lower):
            return "Hello! 👋 I'm MasterLLM, your AI document processing assistant. Upload a document and tell me what you'd like to do with it!"
        
        # How are you
        if "how are you" in message_lower:
            return "I'm doing great, thank you! 🤖 Ready to help you process documents. Upload a file to get started!"
        
        # Thanks
        if re.search(r"^(thanks|thank you)", message_lower):
            return "You're welcome! 😊 Let me know if you need anything else!"
        
        # Goodbye
        if re.search(r"^(bye|goodbye)", message_lower):
            return "Goodbye! 👋 Feel free to come back anytime you need document processing help!"
        
        # Capabilities question
        if "what can you do" in message_lower or "capabilities" in message_lower:
            return """I can help you with various document processing tasks:

📄 **Text Operations:**
- Extract text from PDFs and images
- Summarize documents
- Translate to different languages
- Classify text content
- Extract named entities (NER)

📊 **Table Operations:**
- Extract tables from documents
- Analyze tabular data

🖼️ **Image Operations:**
- Describe images
- Detect signatures
- Detect stamps

🔧 **How to use:**
1. Upload a document (PDF or image)
2. Tell me what you want to do (e.g., "extract text and summarize")
3. I'll create a pipeline for you to approve
4. Watch the magic happen! ✨"""
        
        # Who are you / What are you / Tell me about MasterLLM
        if any(phrase in message_lower for phrase in ["who are you", "what are you", "about masterllm", "about you", "tell me about"]):
            return """**About MasterLLM** 🤖

I'm MasterLLM, an intelligent AI-powered document processing orchestrator that helps you automate complex document workflows.

**What I Do:**
- 🔍 **Analyze** your requests and understand what you need
- 🛠️ **Create** custom processing pipelines automatically
- ⚡ **Execute** multi-step document workflows
- 📊 **Deliver** structured results

**My Capabilities:**
- Extract text from PDFs and images (OCR)
- Summarize long documents
- Translate content to different languages
- Classify documents
- Extract tables and structured data
- Detect signatures and stamps
- Describe images
- Extract named entities (people, places, organizations)

**How I'm Different:**
I'm not just a single tool - I'm a **smart orchestrator** that:
1. Understands your natural language requests
2. Determines the best sequence of operations
3. Uses powerful AI models (AWS Bedrock Claude & Google Gemini)
4. Combines multiple tools into efficient pipelines
5. Executes everything seamlessly

**My Role:**
Think of me as your AI assistant for document processing. I have a natural conversation with you, understand what you need, propose a plan, and execute it - all while keeping you informed every step of the way!

Want to see me in action? Just upload a document and tell me what you'd like to do with it! 🚀"""

        
        # Help
        if message_lower in ["help", "?"] or "help me" in message_lower:
            return """Here's how to use me:

1️⃣ **Upload Document**: Click the upload button and select a PDF or image
2️⃣ **Describe Task**: Tell me what you want (e.g., "extract text from pages 1-5 and summarize")
3️⃣ **Review Pipeline**: I'll show you the processing plan
4️⃣ **Approve**: Type 'approve' to execute or 'reject' to cancel
5️⃣ **Get Results**: Watch real-time progress and get your results!

**Example requests:**
- "extract text and summarize"
- "get tables from pages 2-4"
- "translate to Spanish"
- "detect signatures and stamps"

Need anything else?"""
        
        # Unclear intent
        if intent == "unclear":
            return "I'm not sure what you'd like me to do. Could you please:\n- Upload a document first, or\n- Tell me what processing task you need (e.g., 'extract text', 'summarize', 'translate')\n\nType 'help' to see what I can do!"
        
        # Default friendly response
        return "I'm here to help! Upload a document and tell me what you'd like to do with it. Type 'help' if you need examples! 😊"


# Singleton instance
intent_classifier = IntentClassifier()