File size: 8,206 Bytes
6d6b815
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
import re
import logging
from typing import Dict, List, Optional, Any, Tuple
import string

# Configure logging
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

class NLPProcessor:
    """
    Handles natural language processing tasks including text analysis,
    content filtering, and conversation management
    Simplified version without external NLP libraries
    """
    
    def __init__(self):
        """Initialize the NLP processor with required resources"""
        # Common English stopwords
        self.stopwords = {
            'i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', 
            'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 
            'she', 'her', 'hers', 'herself', 'it', 'its', 'itself', 'they', 'them', 
            'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 
            'that', 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 
            'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 
            'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 
            'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 
            'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 
            'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 
            'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 
            'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 
            'no', 'nor', 'not', 'only', 'own', 'same', 'so', 'than', 'too', 'very', 
            's', 't', 'can', 'will', 'just', 'don', 'should', 'now'
        }
        
        # Unsafe content patterns
        self.unsafe_patterns = [
            r'(hack|exploit|attack|compromise)\s+(system|server|computer|network)',
            r'(illegal|unlawful)\s+(activity|operation|action)',
            r'(bypass|circumvent)\s+(security|protection|filter)',
            r'(steal|obtain)\s+(password|credentials|sensitive\s+data)',
            r'(launch|execute)\s+(malware|virus|ransomware)',
        ]
        
        logger.info("Simplified NLP Processor initialized successfully")
            
    def process_text(self, text: str) -> str:
        """
        Process text with basic NLP operations
        
        Args:
            text: Input text to process
            
        Returns:
            Processed text
        """
        try:
            # Basic text cleaning
            processed_text = text.strip()
            
            # Remove extra whitespace
            processed_text = re.sub(r'\s+', ' ', processed_text)
            
            return processed_text
            
        except Exception as e:
            logger.error(f"Error processing text: {e}")
            return text  # Return original text if processing fails
    
    def analyze_intent(self, text: str) -> Dict[str, Any]:
        """
        Analyze the user's intent from their input
        
        Args:
            text: User input text
            
        Returns:
            Dictionary containing intent classification
        """
        try:
            text_lower = text.lower()
            
            # Basic intent detection using keyword matching
            intents = {
                "greeting": any(word in text_lower for word in ["hello", "hi", "hey", "greetings"]),
                "question": '?' in text or any(word in text_lower for word in ["what", "why", "how", "when", "where", "who"]),
                "command": any(word in text_lower for word in ["do", "execute", "run", "perform", "download", "clone", "modify"]),
                "farewell": any(word in text_lower for word in ["bye", "goodbye", "exit", "quit", "end"]),
                "help": "help" in text_lower or "assist" in text_lower,
                "settings": any(word in text_lower for word in ["setting", "configure", "preference", "option"])
            }
            
            # Determine primary intent
            primary_intent = "general"
            max_score = 0
            for intent, score in intents.items():
                if score and score > max_score:
                    primary_intent = intent
                    max_score = score
            
            return {
                "primary_intent": primary_intent,
                "intents": intents,
                "confidence": 0.7 if max_score else 0.3  # Simple confidence score
            }
            
        except Exception as e:
            logger.error(f"Error analyzing intent: {e}")
            return {"primary_intent": "general", "intents": {}, "confidence": 0.0}
    
    def filter_unsafe_content(self, text: str) -> str:
        """
        Filter potentially unsafe content from text
        
        Args:
            text: Text to filter
            
        Returns:
            Filtered text
        """
        try:
            # Check for unsafe patterns
            for pattern in self.unsafe_patterns:
                if re.search(pattern, text, re.IGNORECASE):
                    return "I apologize, but I cannot provide that information or perform that action due to safety constraints."
            
            return text
            
        except Exception as e:
            logger.error(f"Error filtering content: {e}")
            return "I apologize, but I encountered an error processing your request."
    
    def extract_keywords(self, text: str) -> List[str]:
        """
        Extract important keywords from text
        
        Args:
            text: Input text
            
        Returns:
            List of keywords
        """
        try:
            # Simple tokenization - split by whitespace and remove punctuation
            text = text.lower()
            for char in string.punctuation:
                text = text.replace(char, ' ')
            tokens = text.split()
            
            # Remove stopwords and short tokens
            keywords = [word for word in tokens if word not in self.stopwords and len(word) > 3]
            
            # Count occurrences and sort by frequency
            keyword_counts = {}
            for word in keywords:
                if word in keyword_counts:
                    keyword_counts[word] += 1
                else:
                    keyword_counts[word] = 1
            
            # Sort by count (descending)
            sorted_keywords = sorted(keyword_counts.items(), key=lambda x: x[1], reverse=True)
            
            # Return just the words (not counts)
            return [word for word, count in sorted_keywords[:10]]
            
        except Exception as e:
            logger.error(f"Error extracting keywords: {e}")
            return []
            
    def summarize_conversation(self, messages: List[Dict[str, Any]]) -> str:
        """
        Generate a brief summary of the conversation
        
        Args:
            messages: List of conversation messages
            
        Returns:
            Summary text
        """
        try:
            if not messages:
                return "No conversation to summarize."
                
            # Extract just the content from messages
            contents = [msg.get('content', '') for msg in messages]
            
            # Join all content with spaces
            full_text = ' '.join(contents)
            
            # Get key terms from the conversation
            keywords = self.extract_keywords(full_text)
            
            # Create a simple summary based on conversation length
            if len(messages) <= 3:
                keyword_str = ', '.join(keywords[:3]) if keywords else "various topics"
                return f"Brief conversation about {keyword_str}."
            else:
                keyword_str = ', '.join(keywords[:5]) if keywords else "various topics"
                return f"Extended conversation covering {keyword_str}."
                
        except Exception as e:
            logger.error(f"Error summarizing conversation: {e}")
            return "Unable to summarize conversation."