pythonprincess commited on
Commit
fb720d8
·
verified ·
1 Parent(s): 5fcd544

Upload intents.py

Browse files
Files changed (1) hide show
  1. app/intents.py +516 -0
app/intents.py ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/intents.py
2
+ """
3
+ 🎯 Penny's Intent Classification System
4
+ Rule-based intent classifier designed for civic engagement queries.
5
+
6
+ CURRENT: Simple keyword matching (fast, predictable, debuggable)
7
+ FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)
8
+
9
+ This approach allows Penny to understand resident needs and route them
10
+ to the right civic systems — weather, resources, events, translation, etc.
11
+ """
12
+
13
+ import logging
14
+ from typing import Dict, List, Optional
15
+ from dataclasses import dataclass, field
16
+ from enum import Enum
17
+
18
+ # --- LOGGING SETUP (Azure-friendly) ---
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ # --- INTENT CATEGORIES (Enumerated for type safety) ---
23
+ class IntentType(str, Enum):
24
+ """
25
+ Penny's supported intent categories.
26
+ Each maps to a specific civic assistance pathway.
27
+ """
28
+ WEATHER = "weather"
29
+ GREETING = "greeting"
30
+ LOCAL_RESOURCES = "local_resources"
31
+ EVENTS = "events"
32
+ TRANSLATION = "translation"
33
+ SENTIMENT_ANALYSIS = "sentiment_analysis"
34
+ BIAS_DETECTION = "bias_detection"
35
+ DOCUMENT_PROCESSING = "document_processing"
36
+ HELP = "help"
37
+ EMERGENCY = "emergency" # Critical safety routing
38
+ GOVERNMENT = "government" # Government officials and representatives
39
+ UNKNOWN = "unknown"
40
+
41
+
42
+ @dataclass
43
+ class IntentMatch:
44
+ """
45
+ Structured intent classification result.
46
+ Includes confidence score and matched keywords for debugging.
47
+ """
48
+ intent: IntentType
49
+ confidence: float # 0.0 - 1.0
50
+ matched_keywords: List[str]
51
+ is_compound: bool = False # True if query spans multiple intents
52
+ secondary_intents: List[IntentType] = field(default_factory=list)
53
+
54
+ def to_dict(self) -> Dict:
55
+ """Convert to dictionary for logging and API responses."""
56
+ return {
57
+ "intent": self.intent.value,
58
+ "confidence": self.confidence,
59
+ "matched_keywords": self.matched_keywords,
60
+ "is_compound": self.is_compound,
61
+ "secondary_intents": [intent.value for intent in self.secondary_intents]
62
+ }
63
+
64
+
65
+ # --- INTENT KEYWORD PATTERNS (Organized by priority) ---
66
+ class IntentPatterns:
67
+ """
68
+ Penny's keyword patterns for intent matching.
69
+ Organized by priority — critical intents checked first.
70
+ """
71
+
72
+ # 🚨 PRIORITY 1: EMERGENCY & SAFETY (Always check first)
73
+ EMERGENCY = [
74
+ "911", "emergency", "urgent", "crisis", "danger", "help me",
75
+ "suicide", "overdose", "assault", "abuse", "threatening",
76
+ "hurt myself", "hurt someone", "life threatening"
77
+ ]
78
+
79
+ # 🌍 PRIORITY 2: TRANSLATION (High civic value)
80
+ TRANSLATION = [
81
+ "translate", "in spanish", "in french", "in portuguese",
82
+ "in german", "in chinese", "in arabic", "in vietnamese",
83
+ "in russian", "in korean", "in japanese", "in tagalog",
84
+ "convert to", "say this in", "how do i say", "what is", "in hindi"
85
+ ]
86
+
87
+ # 📄 PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
88
+ DOCUMENT_PROCESSING = [
89
+ "process this document", "extract data", "analyze pdf",
90
+ "upload form", "read this file", "scan this", "form help",
91
+ "fill out", "document", "pdf", "application", "permit"
92
+ ]
93
+
94
+ # 🔍 PRIORITY 4: ANALYSIS TOOLS
95
+ SENTIMENT_ANALYSIS = [
96
+ "how does this sound", "is this positive", "is this negative",
97
+ "analyze", "sentiment", "feel about", "mood", "tone"
98
+ ]
99
+
100
+ BIAS_DETECTION = [
101
+ "is this biased", "check bias", "check fairness", "is this neutral",
102
+ "biased", "objective", "subjective", "fair", "discriminatory"
103
+ ]
104
+
105
+ # 🌤️ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
106
+ WEATHER = [
107
+ "weather", "rain", "snow", "sunny", "forecast", "temperature",
108
+ "hot", "cold", "storm", "wind", "outside", "climate",
109
+ "degrees", "celsius", "fahrenheit"
110
+ ]
111
+
112
+ # Specific date/time keywords that suggest event context
113
+ DATE_TIME = [
114
+ "today", "tomorrow", "this weekend", "next week",
115
+ "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
116
+ "tonight", "this morning", "this afternoon", "this evening"
117
+ ]
118
+
119
+ EVENTS = [
120
+ "event", "things to do", "what's happening", "activities",
121
+ "festival", "concert", "activity", "community event",
122
+ "show", "performance", "gathering", "meetup", "celebration"
123
+ ]
124
+
125
+ # 🏛️ PRIORITY 6: GOVERNMENT & OFFICIALS (Before local resources)
126
+ GOVERNMENT = [
127
+ "city council", "council member", "council representative", "councilman", "councilwoman",
128
+ "mayor", "city manager", "commissioner", "representative", "representatives",
129
+ "who is my", "who represents me", "my representative", "my council",
130
+ "district", "ward", "elected official", "government official",
131
+ "city hall", "municipal", "alderman", "aldermanic", "supervisor"
132
+ ]
133
+
134
+ # 🏛️ PRIORITY 7: LOCAL RESOURCES (Core civic mission)
135
+ LOCAL_RESOURCES = [
136
+ "resource", "shelter", "library", "help center",
137
+ "food bank", "warming center", "cooling center", "csb",
138
+ "mental health", "housing", "community service",
139
+ "trash", "recycling", "transit", "bus", "schedule",
140
+ "clinic", "hospital", "pharmacy", "assistance",
141
+ "utility", "water", "electric", "gas", "bill"
142
+ ]
143
+
144
+ # 💬 PRIORITY 8: CONVERSATIONAL
145
+ GREETING = [
146
+ "hi", "hello", "hey", "what's up", "good morning",
147
+ "good afternoon", "good evening", "howdy", "yo",
148
+ "greetings", "sup", "hiya"
149
+ ]
150
+
151
+ HELP = [
152
+ "help", "how do i", "can you help", "i need help",
153
+ "what can you do", "how does this work", "instructions",
154
+ "guide", "tutorial", "show me how"
155
+ ]
156
+
157
+
158
+ def classify_intent(message: str) -> str:
159
+ """
160
+ 🎯 Main classification function (backward-compatible).
161
+ Returns intent as string for existing API compatibility.
162
+
163
+ Args:
164
+ message: User's query text
165
+
166
+ Returns:
167
+ Intent string (e.g., "weather", "events", "translation")
168
+ """
169
+ try:
170
+ result = classify_intent_detailed(message)
171
+ return result.intent.value
172
+ except Exception as e:
173
+ logger.error(f"Intent classification failed: {e}", exc_info=True)
174
+ return IntentType.UNKNOWN.value
175
+
176
+
177
+ def classify_intent_detailed(message: str) -> IntentMatch:
178
+ """
179
+ 🧠 Enhanced classification with confidence scores and metadata.
180
+
181
+ This function:
182
+ 1. Checks for emergency keywords FIRST (safety routing)
183
+ 2. Detects compound intents (e.g., "weather + events")
184
+ 3. Returns structured result with confidence + matched keywords
185
+
186
+ Args:
187
+ message: User's query text
188
+
189
+ Returns:
190
+ IntentMatch object with full classification details
191
+ """
192
+
193
+ if not message or not message.strip():
194
+ logger.warning("Empty message received for intent classification")
195
+ return IntentMatch(
196
+ intent=IntentType.UNKNOWN,
197
+ confidence=0.0,
198
+ matched_keywords=[]
199
+ )
200
+
201
+ try:
202
+ text = message.lower().strip()
203
+ logger.debug(f"Classifying intent for: '{text[:50]}...'")
204
+
205
+ # --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
206
+ emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
207
+ if emergency_matches:
208
+ logger.warning(f"🚨 EMERGENCY intent detected: {emergency_matches}")
209
+ return IntentMatch(
210
+ intent=IntentType.EMERGENCY,
211
+ confidence=1.0, # Always high confidence for safety
212
+ matched_keywords=emergency_matches
213
+ )
214
+
215
+ # --- PRIORITY 2: TRANSLATION ---
216
+ translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
217
+ if translation_matches:
218
+ return IntentMatch(
219
+ intent=IntentType.TRANSLATION,
220
+ confidence=0.9,
221
+ matched_keywords=translation_matches
222
+ )
223
+
224
+ # --- PRIORITY 3: DOCUMENT PROCESSING ---
225
+ doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
226
+ if doc_matches:
227
+ return IntentMatch(
228
+ intent=IntentType.DOCUMENT_PROCESSING,
229
+ confidence=0.9,
230
+ matched_keywords=doc_matches
231
+ )
232
+
233
+ # --- PRIORITY 4: ANALYSIS TOOLS ---
234
+ sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
235
+ if sentiment_matches:
236
+ return IntentMatch(
237
+ intent=IntentType.SENTIMENT_ANALYSIS,
238
+ confidence=0.85,
239
+ matched_keywords=sentiment_matches
240
+ )
241
+
242
+ bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
243
+ if bias_matches:
244
+ return IntentMatch(
245
+ intent=IntentType.BIAS_DETECTION,
246
+ confidence=0.85,
247
+ matched_keywords=bias_matches
248
+ )
249
+
250
+ # --- PRIORITY 5: GOVERNMENT & OFFICIALS (Check before resources) ---
251
+ government_matches = _find_keyword_matches(text, IntentPatterns.GOVERNMENT)
252
+ if government_matches:
253
+ return IntentMatch(
254
+ intent=IntentType.GOVERNMENT,
255
+ confidence=0.9,
256
+ matched_keywords=government_matches
257
+ )
258
+
259
+ # --- PRIORITY 6: LOCAL RESOURCES (Check before events to avoid false matches) ---
260
+ resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
261
+
262
+ # --- PRIORITY 7: COMPOUND INTENT HANDLING (Weather + Events) ---
263
+ weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
264
+ event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
265
+ date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
266
+
267
+ # If both resource and event keywords match, prioritize resources (more specific)
268
+ if resource_matches and event_matches:
269
+ # Check if resource keywords are more specific (e.g., "library" vs generic "show")
270
+ specific_resource_keywords = ["library", "libraries", "food bank", "shelter", "clinic", "hospital", "pharmacy", "trash", "recycling", "transit", "bus"]
271
+ has_specific_resource = any(kw in text for kw in specific_resource_keywords)
272
+
273
+ if has_specific_resource:
274
+ return IntentMatch(
275
+ intent=IntentType.LOCAL_RESOURCES,
276
+ confidence=0.9,
277
+ matched_keywords=resource_matches
278
+ )
279
+
280
+ # Compound detection: "What events are happening this weekend?"
281
+ # or "What's the weather like for Sunday's festival?"
282
+ if event_matches and (weather_matches or date_matches):
283
+ logger.info("Compound intent detected: events + weather/date")
284
+ return IntentMatch(
285
+ intent=IntentType.EVENTS, # Primary intent
286
+ confidence=0.85,
287
+ matched_keywords=event_matches + weather_matches + date_matches,
288
+ is_compound=True,
289
+ secondary_intents=[IntentType.WEATHER]
290
+ )
291
+
292
+ # --- PRIORITY 8: SIMPLE WEATHER INTENT ---
293
+ if weather_matches:
294
+ return IntentMatch(
295
+ intent=IntentType.WEATHER,
296
+ confidence=0.9,
297
+ matched_keywords=weather_matches
298
+ )
299
+
300
+ # --- PRIORITY 9: LOCAL RESOURCES (if not already handled) ---
301
+ if resource_matches:
302
+ return IntentMatch(
303
+ intent=IntentType.LOCAL_RESOURCES,
304
+ confidence=0.9,
305
+ matched_keywords=resource_matches
306
+ )
307
+
308
+ # --- PRIORITY 10: EVENTS (Simple check) ---
309
+ if event_matches:
310
+ return IntentMatch(
311
+ intent=IntentType.EVENTS,
312
+ confidence=0.85,
313
+ matched_keywords=event_matches
314
+ )
315
+
316
+ # --- PRIORITY 11: CONVERSATIONAL ---
317
+ greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
318
+ if greeting_matches:
319
+ return IntentMatch(
320
+ intent=IntentType.GREETING,
321
+ confidence=0.8,
322
+ matched_keywords=greeting_matches
323
+ )
324
+
325
+ help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
326
+ if help_matches:
327
+ return IntentMatch(
328
+ intent=IntentType.HELP,
329
+ confidence=0.9,
330
+ matched_keywords=help_matches
331
+ )
332
+
333
+ # --- FALLBACK: UNKNOWN ---
334
+ logger.info(f"No clear intent match for: '{text[:50]}...'")
335
+ return IntentMatch(
336
+ intent=IntentType.UNKNOWN,
337
+ confidence=0.0,
338
+ matched_keywords=[]
339
+ )
340
+
341
+ except Exception as e:
342
+ logger.error(f"Error during intent classification: {e}", exc_info=True)
343
+ return IntentMatch(
344
+ intent=IntentType.UNKNOWN,
345
+ confidence=0.0,
346
+ matched_keywords=[],
347
+ )
348
+
349
+
350
+ # --- HELPER FUNCTIONS ---
351
+
352
+ def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
353
+ """
354
+ Finds which keywords from a pattern list appear in the user's message.
355
+
356
+ Args:
357
+ text: Normalized user message (lowercase)
358
+ keywords: List of keywords to search for
359
+
360
+ Returns:
361
+ List of matched keywords (for debugging/logging)
362
+ """
363
+ try:
364
+ matches = []
365
+ for keyword in keywords:
366
+ if keyword in text:
367
+ matches.append(keyword)
368
+ return matches
369
+ except Exception as e:
370
+ logger.error(f"Error finding keyword matches: {e}", exc_info=True)
371
+ return []
372
+
373
+
374
+ def get_intent_description(intent: IntentType) -> str:
375
+ """
376
+ 🗣️ Penny's plain-English explanation of what each intent does.
377
+ Useful for help systems and debugging.
378
+
379
+ Args:
380
+ intent: IntentType enum value
381
+
382
+ Returns:
383
+ Human-readable description of the intent
384
+ """
385
+ descriptions = {
386
+ IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
387
+ IntentType.GREETING: "Start a conversation with Penny",
388
+ IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
389
+ IntentType.EVENTS: "Discover local events and activities happening in your city",
390
+ IntentType.TRANSLATION: "Translate text between 27 languages",
391
+ IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
392
+ IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
393
+ IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
394
+ IntentType.HELP: "Learn how to use Penny's features",
395
+ IntentType.EMERGENCY: "Connect with emergency services and crisis support",
396
+ IntentType.GOVERNMENT: "Find information about city officials, council members, and representatives",
397
+ IntentType.UNKNOWN: "I'm not sure what you're asking — can you rephrase?"
398
+ }
399
+ return descriptions.get(intent, "Unknown intent type")
400
+
401
+
402
+ def get_all_supported_intents() -> Dict[str, str]:
403
+ """
404
+ 📋 Returns all supported intents with descriptions.
405
+ Useful for /help endpoints and documentation.
406
+
407
+ Returns:
408
+ Dictionary mapping intent values to descriptions
409
+ """
410
+ try:
411
+ return {
412
+ intent.value: get_intent_description(intent)
413
+ for intent in IntentType
414
+ if intent != IntentType.UNKNOWN
415
+ }
416
+ except Exception as e:
417
+ logger.error(f"Error getting supported intents: {e}", exc_info=True)
418
+ return {}
419
+
420
+
421
+ # --- FUTURE ML UPGRADE HOOK ---
422
+ def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
423
+ """
424
+ 🔮 PLACEHOLDER for future ML-based classification.
425
+
426
+ When ready to upgrade from keyword matching to embeddings:
427
+ 1. Load Gemma-7B or sentence-transformers model
428
+ 2. Generate message embeddings
429
+ 3. Compare to intent prototype embeddings
430
+ 4. Return top match with confidence score
431
+
432
+ Args:
433
+ message: User's query
434
+ use_embedding_model: If True, use ML model (not implemented yet)
435
+
436
+ Returns:
437
+ IntentMatch object (currently falls back to rule-based)
438
+ """
439
+
440
+ if use_embedding_model:
441
+ logger.warning("ML-based classification not yet implemented. Falling back to rules.")
442
+
443
+ # Fallback to rule-based for now
444
+ return classify_intent_detailed(message)
445
+
446
+
447
+ # --- TESTING & VALIDATION ---
448
+ def validate_intent_patterns() -> Dict[str, List[str]]:
449
+ """
450
+ 🧪 Validates that all intent patterns are properly configured.
451
+ Returns any overlapping keywords that might cause conflicts.
452
+
453
+ Returns:
454
+ Dictionary of overlapping keywords between intent pairs
455
+ """
456
+ try:
457
+ all_patterns = {
458
+ "emergency": IntentPatterns.EMERGENCY,
459
+ "translation": IntentPatterns.TRANSLATION,
460
+ "document": IntentPatterns.DOCUMENT_PROCESSING,
461
+ "sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
462
+ "bias": IntentPatterns.BIAS_DETECTION,
463
+ "weather": IntentPatterns.WEATHER,
464
+ "events": IntentPatterns.EVENTS,
465
+ "resources": IntentPatterns.LOCAL_RESOURCES,
466
+ "greeting": IntentPatterns.GREETING,
467
+ "help": IntentPatterns.HELP
468
+ }
469
+
470
+ overlaps = {}
471
+
472
+ # Check for keyword overlap between different intents
473
+ for intent1, keywords1 in all_patterns.items():
474
+ for intent2, keywords2 in all_patterns.items():
475
+ if intent1 >= intent2: # Avoid duplicate comparisons
476
+ continue
477
+
478
+ overlap = set(keywords1) & set(keywords2)
479
+ if overlap:
480
+ key = f"{intent1}_vs_{intent2}"
481
+ overlaps[key] = list(overlap)
482
+
483
+ if overlaps:
484
+ logger.warning(f"Found keyword overlaps between intents: {overlaps}")
485
+
486
+ return overlaps
487
+
488
+ except Exception as e:
489
+ logger.error(f"Error validating intent patterns: {e}", exc_info=True)
490
+ return {}
491
+
492
+
493
+ # --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
494
+ def log_intent_classification(message: str, result: IntentMatch) -> None:
495
+ """
496
+ 📊 Logs classification results for Azure Application Insights.
497
+ Helps track intent distribution and confidence patterns.
498
+
499
+ Args:
500
+ message: Original user message (truncated for PII safety)
501
+ result: IntentMatch classification result
502
+ """
503
+ try:
504
+ # Truncate message for PII safety
505
+ safe_message = message[:50] + "..." if len(message) > 50 else message
506
+
507
+ logger.info(
508
+ f"Intent classified | "
509
+ f"intent={result.intent.value} | "
510
+ f"confidence={result.confidence:.2f} | "
511
+ f"compound={result.is_compound} | "
512
+ f"keywords={result.matched_keywords[:5]} | " # Limit logged keywords
513
+ f"message_preview='{safe_message}'"
514
+ )
515
+ except Exception as e:
516
+ logger.error(f"Error logging intent classification: {e}", exc_info=True)