File size: 17,822 Bytes
22eeb7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
# app/intents.py
"""

๐ŸŽฏ Penny's Intent Classification System

Rule-based intent classifier designed for civic engagement queries.



CURRENT: Simple keyword matching (fast, predictable, debuggable)

FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)



This approach allows Penny to understand resident needs and route them

to the right civic systems โ€” weather, resources, events, translation, etc.

"""

import logging
from typing import Dict, List, Optional
from dataclasses import dataclass, field
from enum import Enum

# --- LOGGING SETUP (Azure-friendly) ---
logger = logging.getLogger(__name__)


# --- INTENT CATEGORIES (Enumerated for type safety) ---
class IntentType(str, Enum):
    """

    Penny's supported intent categories.

    Each maps to a specific civic assistance pathway.

    """
    WEATHER = "weather"
    GREETING = "greeting"
    LOCAL_RESOURCES = "local_resources"
    EVENTS = "events"
    TRANSLATION = "translation"
    SENTIMENT_ANALYSIS = "sentiment_analysis"
    BIAS_DETECTION = "bias_detection"
    DOCUMENT_PROCESSING = "document_processing"
    HELP = "help"
    EMERGENCY = "emergency"  # Critical safety routing
    UNKNOWN = "unknown"


@dataclass
class IntentMatch:
    """

    Structured intent classification result.

    Includes confidence score and matched keywords for debugging.

    """
    intent: IntentType
    confidence: float  # 0.0 - 1.0
    matched_keywords: List[str]
    is_compound: bool = False  # True if query spans multiple intents
    secondary_intents: List[IntentType] = field(default_factory=list)
    
    def to_dict(self) -> Dict:
        """Convert to dictionary for logging and API responses."""
        return {
            "intent": self.intent.value,
            "confidence": self.confidence,
            "matched_keywords": self.matched_keywords,
            "is_compound": self.is_compound,
            "secondary_intents": [intent.value for intent in self.secondary_intents]
        }


# --- INTENT KEYWORD PATTERNS (Organized by priority) ---
class IntentPatterns:
    """

    Penny's keyword patterns for intent matching.

    Organized by priority โ€” critical intents checked first.

    """
    
    # ๐Ÿšจ PRIORITY 1: EMERGENCY & SAFETY (Always check first)
    EMERGENCY = [
        "911", "emergency", "urgent", "crisis", "danger", "help me",
        "suicide", "overdose", "assault", "abuse", "threatening",
        "hurt myself", "hurt someone", "life threatening"
    ]
    
    # ๐ŸŒ PRIORITY 2: TRANSLATION (High civic value)
    TRANSLATION = [
        "translate", "in spanish", "in french", "in portuguese", 
        "in german", "in chinese", "in arabic", "in vietnamese",
        "in russian", "in korean", "in japanese", "in tagalog",
        "convert to", "say this in", "how do i say", "what is", "in hindi"
    ]
    
    # ๐Ÿ“„ PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
    DOCUMENT_PROCESSING = [
        "process this document", "extract data", "analyze pdf",
        "upload form", "read this file", "scan this", "form help",
        "fill out", "document", "pdf", "application", "permit"
    ]
    
    # ๐Ÿ” PRIORITY 4: ANALYSIS TOOLS
    SENTIMENT_ANALYSIS = [
        "how does this sound", "is this positive", "is this negative",
        "analyze", "sentiment", "feel about", "mood", "tone"
    ]
    
    BIAS_DETECTION = [
        "is this biased", "check bias", "check fairness", "is this neutral",
        "biased", "objective", "subjective", "fair", "discriminatory"
    ]
    
    # ๐ŸŒค๏ธ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
    WEATHER = [
        "weather", "rain", "snow", "sunny", "forecast", "temperature",
        "hot", "cold", "storm", "wind", "outside", "climate",
        "degrees", "celsius", "fahrenheit"
    ]
    
    # Specific date/time keywords that suggest event context
    DATE_TIME = [
        "today", "tomorrow", "this weekend", "next week",
        "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
        "tonight", "this morning", "this afternoon", "this evening"
    ]
    
    EVENTS = [
        "event", "things to do", "what's happening", "activities",
        "festival", "concert", "activity", "community event",
        "show", "performance", "gathering", "meetup", "celebration"
    ]
    
    # ๐Ÿ›๏ธ PRIORITY 6: LOCAL RESOURCES (Core civic mission)
    LOCAL_RESOURCES = [
        "resource", "shelter", "library", "help center",
        "food bank", "warming center", "cooling center", "csb",
        "mental health", "housing", "community service",
        "trash", "recycling", "transit", "bus", "schedule",
        "clinic", "hospital", "pharmacy", "assistance",
        "utility", "water", "electric", "gas", "bill"
    ]
    
    # ๐Ÿ’ฌ PRIORITY 7: CONVERSATIONAL
    GREETING = [
        "hi", "hello", "hey", "what's up", "good morning",
        "good afternoon", "good evening", "howdy", "yo",
        "greetings", "sup", "hiya"
    ]
    
    HELP = [
        "help", "how do i", "can you help", "i need help",
        "what can you do", "how does this work", "instructions",
        "guide", "tutorial", "show me how"
    ]


def classify_intent(message: str) -> str:
    """

    ๐ŸŽฏ Main classification function (backward-compatible).

    Returns intent as string for existing API compatibility.

    

    Args:

        message: User's query text

        

    Returns:

        Intent string (e.g., "weather", "events", "translation")

    """
    try:
        result = classify_intent_detailed(message)
        return result.intent.value
    except Exception as e:
        logger.error(f"Intent classification failed: {e}", exc_info=True)
        return IntentType.UNKNOWN.value


def classify_intent_detailed(message: str) -> IntentMatch:
    """

    ๐Ÿง  Enhanced classification with confidence scores and metadata.

    

    This function:

    1. Checks for emergency keywords FIRST (safety routing)

    2. Detects compound intents (e.g., "weather + events")

    3. Returns structured result with confidence + matched keywords

    

    Args:

        message: User's query text

        

    Returns:

        IntentMatch object with full classification details

    """
    
    if not message or not message.strip():
        logger.warning("Empty message received for intent classification")
        return IntentMatch(
            intent=IntentType.UNKNOWN,
            confidence=0.0,
            matched_keywords=[]
        )
    
    try:
        text = message.lower().strip()
        logger.debug(f"Classifying intent for: '{text[:50]}...'")
        
        # --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
        emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
        if emergency_matches:
            logger.warning(f"๐Ÿšจ EMERGENCY intent detected: {emergency_matches}")
            return IntentMatch(
                intent=IntentType.EMERGENCY,
                confidence=1.0,  # Always high confidence for safety
                matched_keywords=emergency_matches
            )
        
        # --- PRIORITY 2: TRANSLATION ---
        translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
        if translation_matches:
            return IntentMatch(
                intent=IntentType.TRANSLATION,
                confidence=0.9,
                matched_keywords=translation_matches
            )
        
        # --- PRIORITY 3: DOCUMENT PROCESSING ---
        doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
        if doc_matches:
            return IntentMatch(
                intent=IntentType.DOCUMENT_PROCESSING,
                confidence=0.9,
                matched_keywords=doc_matches
            )
        
        # --- PRIORITY 4: ANALYSIS TOOLS ---
        sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
        if sentiment_matches:
            return IntentMatch(
                intent=IntentType.SENTIMENT_ANALYSIS,
                confidence=0.85,
                matched_keywords=sentiment_matches
            )
        
        bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
        if bias_matches:
            return IntentMatch(
                intent=IntentType.BIAS_DETECTION,
                confidence=0.85,
                matched_keywords=bias_matches
            )
        
        # --- PRIORITY 5: COMPOUND INTENT HANDLING (Weather + Events) ---
        weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
        event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
        date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
        
        # Compound detection: "What events are happening this weekend?"
        # or "What's the weather like for Sunday's festival?"
        if event_matches and (weather_matches or date_matches):
            logger.info("Compound intent detected: events + weather/date")
            return IntentMatch(
                intent=IntentType.EVENTS,  # Primary intent
                confidence=0.85,
                matched_keywords=event_matches + weather_matches + date_matches,
                is_compound=True,
                secondary_intents=[IntentType.WEATHER]
            )
        
        # --- PRIORITY 6: SIMPLE WEATHER INTENT ---
        if weather_matches:
            return IntentMatch(
                intent=IntentType.WEATHER,
                confidence=0.9,
                matched_keywords=weather_matches
            )
        
        # --- PRIORITY 7: LOCAL RESOURCES ---
        resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
        if resource_matches:
            return IntentMatch(
                intent=IntentType.LOCAL_RESOURCES,
                confidence=0.9,
                matched_keywords=resource_matches
            )
        
        # --- PRIORITY 8: EVENTS (Simple check) ---
        if event_matches:
            return IntentMatch(
                intent=IntentType.EVENTS,
                confidence=0.85,
                matched_keywords=event_matches
            )
        
        # --- PRIORITY 9: CONVERSATIONAL ---
        greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
        if greeting_matches:
            return IntentMatch(
                intent=IntentType.GREETING,
                confidence=0.8,
                matched_keywords=greeting_matches
            )
        
        help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
        if help_matches:
            return IntentMatch(
                intent=IntentType.HELP,
                confidence=0.9,
                matched_keywords=help_matches
            )
        
        # --- FALLBACK: UNKNOWN ---
        logger.info(f"No clear intent match for: '{text[:50]}...'")
        return IntentMatch(
            intent=IntentType.UNKNOWN,
            confidence=0.0,
            matched_keywords=[]
        )
        
    except Exception as e:
        logger.error(f"Error during intent classification: {e}", exc_info=True)
        return IntentMatch(
            intent=IntentType.UNKNOWN,
            confidence=0.0,
            matched_keywords=[],
        )


# --- HELPER FUNCTIONS ---

def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
    """

    Finds which keywords from a pattern list appear in the user's message.

    

    Args:

        text: Normalized user message (lowercase)

        keywords: List of keywords to search for

        

    Returns:

        List of matched keywords (for debugging/logging)

    """
    try:
        matches = []
        for keyword in keywords:
            if keyword in text:
                matches.append(keyword)
        return matches
    except Exception as e:
        logger.error(f"Error finding keyword matches: {e}", exc_info=True)
        return []


def get_intent_description(intent: IntentType) -> str:
    """

    ๐Ÿ—ฃ๏ธ Penny's plain-English explanation of what each intent does.

    Useful for help systems and debugging.

    

    Args:

        intent: IntentType enum value

        

    Returns:

        Human-readable description of the intent

    """
    descriptions = {
        IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
        IntentType.GREETING: "Start a conversation with Penny",
        IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
        IntentType.EVENTS: "Discover local events and activities happening in your city",
        IntentType.TRANSLATION: "Translate text between 27 languages",
        IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
        IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
        IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
        IntentType.HELP: "Learn how to use Penny's features",
        IntentType.EMERGENCY: "Connect with emergency services and crisis support",
        IntentType.UNKNOWN: "I'm not sure what you're asking โ€” can you rephrase?"
    }
    return descriptions.get(intent, "Unknown intent type")


def get_all_supported_intents() -> Dict[str, str]:
    """

    ๐Ÿ“‹ Returns all supported intents with descriptions.

    Useful for /help endpoints and documentation.

    

    Returns:

        Dictionary mapping intent values to descriptions

    """
    try:
        return {
            intent.value: get_intent_description(intent)
            for intent in IntentType
            if intent != IntentType.UNKNOWN
        }
    except Exception as e:
        logger.error(f"Error getting supported intents: {e}", exc_info=True)
        return {}


# --- FUTURE ML UPGRADE HOOK ---
def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
    """

    ๐Ÿ”ฎ PLACEHOLDER for future ML-based classification.

    

    When ready to upgrade from keyword matching to embeddings:

    1. Load Gemma-7B or sentence-transformers model

    2. Generate message embeddings

    3. Compare to intent prototype embeddings

    4. Return top match with confidence score

    

    Args:

        message: User's query

        use_embedding_model: If True, use ML model (not implemented yet)

        

    Returns:

        IntentMatch object (currently falls back to rule-based)

    """
    
    if use_embedding_model:
        logger.warning("ML-based classification not yet implemented. Falling back to rules.")
    
    # Fallback to rule-based for now
    return classify_intent_detailed(message)


# --- TESTING & VALIDATION ---
def validate_intent_patterns() -> Dict[str, List[str]]:
    """

    ๐Ÿงช Validates that all intent patterns are properly configured.

    Returns any overlapping keywords that might cause conflicts.

    

    Returns:

        Dictionary of overlapping keywords between intent pairs

    """
    try:
        all_patterns = {
            "emergency": IntentPatterns.EMERGENCY,
            "translation": IntentPatterns.TRANSLATION,
            "document": IntentPatterns.DOCUMENT_PROCESSING,
            "sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
            "bias": IntentPatterns.BIAS_DETECTION,
            "weather": IntentPatterns.WEATHER,
            "events": IntentPatterns.EVENTS,
            "resources": IntentPatterns.LOCAL_RESOURCES,
            "greeting": IntentPatterns.GREETING,
            "help": IntentPatterns.HELP
        }
        
        overlaps = {}
        
        # Check for keyword overlap between different intents
        for intent1, keywords1 in all_patterns.items():
            for intent2, keywords2 in all_patterns.items():
                if intent1 >= intent2:  # Avoid duplicate comparisons
                    continue
                
                overlap = set(keywords1) & set(keywords2)
                if overlap:
                    key = f"{intent1}_vs_{intent2}"
                    overlaps[key] = list(overlap)
        
        if overlaps:
            logger.warning(f"Found keyword overlaps between intents: {overlaps}")
        
        return overlaps
        
    except Exception as e:
        logger.error(f"Error validating intent patterns: {e}", exc_info=True)
        return {}


# --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
def log_intent_classification(message: str, result: IntentMatch) -> None:
    """

    ๐Ÿ“Š Logs classification results for Azure Application Insights.

    Helps track intent distribution and confidence patterns.

    

    Args:

        message: Original user message (truncated for PII safety)

        result: IntentMatch classification result

    """
    try:
        # Truncate message for PII safety
        safe_message = message[:50] + "..." if len(message) > 50 else message
        
        logger.info(
            f"Intent classified | "
            f"intent={result.intent.value} | "
            f"confidence={result.confidence:.2f} | "
            f"compound={result.is_compound} | "
            f"keywords={result.matched_keywords[:5]} | "  # Limit logged keywords
            f"message_preview='{safe_message}'"
        )
    except Exception as e:
        logger.error(f"Error logging intent classification: {e}", exc_info=True)