pythonprincess commited on
Commit
5fcd544
·
verified ·
1 Parent(s): b8dfce3

Delete app/intents.py

Browse files
Files changed (1) hide show
  1. app/intents.py +0 -496
app/intents.py DELETED
@@ -1,496 +0,0 @@
1
- # app/intents.py
2
- """
3
- 🎯 Penny's Intent Classification System
4
- Rule-based intent classifier designed for civic engagement queries.
5
-
6
- CURRENT: Simple keyword matching (fast, predictable, debuggable)
7
- FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)
8
-
9
- This approach allows Penny to understand resident needs and route them
10
- to the right civic systems — weather, resources, events, translation, etc.
11
- """
12
-
13
- import logging
14
- from typing import Dict, List, Optional
15
- from dataclasses import dataclass, field
16
- from enum import Enum
17
-
18
- # --- LOGGING SETUP (Azure-friendly) ---
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- # --- INTENT CATEGORIES (Enumerated for type safety) ---
23
- class IntentType(str, Enum):
24
- """
25
- Penny's supported intent categories.
26
- Each maps to a specific civic assistance pathway.
27
- """
28
- WEATHER = "weather"
29
- GREETING = "greeting"
30
- LOCAL_RESOURCES = "local_resources"
31
- EVENTS = "events"
32
- TRANSLATION = "translation"
33
- SENTIMENT_ANALYSIS = "sentiment_analysis"
34
- BIAS_DETECTION = "bias_detection"
35
- DOCUMENT_PROCESSING = "document_processing"
36
- HELP = "help"
37
- EMERGENCY = "emergency" # Critical safety routing
38
- UNKNOWN = "unknown"
39
-
40
-
41
- @dataclass
42
- class IntentMatch:
43
- """
44
- Structured intent classification result.
45
- Includes confidence score and matched keywords for debugging.
46
- """
47
- intent: IntentType
48
- confidence: float # 0.0 - 1.0
49
- matched_keywords: List[str]
50
- is_compound: bool = False # True if query spans multiple intents
51
- secondary_intents: List[IntentType] = field(default_factory=list)
52
-
53
- def to_dict(self) -> Dict:
54
- """Convert to dictionary for logging and API responses."""
55
- return {
56
- "intent": self.intent.value,
57
- "confidence": self.confidence,
58
- "matched_keywords": self.matched_keywords,
59
- "is_compound": self.is_compound,
60
- "secondary_intents": [intent.value for intent in self.secondary_intents]
61
- }
62
-
63
-
64
- # --- INTENT KEYWORD PATTERNS (Organized by priority) ---
65
- class IntentPatterns:
66
- """
67
- Penny's keyword patterns for intent matching.
68
- Organized by priority — critical intents checked first.
69
- """
70
-
71
- # 🚨 PRIORITY 1: EMERGENCY & SAFETY (Always check first)
72
- EMERGENCY = [
73
- "911", "emergency", "urgent", "crisis", "danger", "help me",
74
- "suicide", "overdose", "assault", "abuse", "threatening",
75
- "hurt myself", "hurt someone", "life threatening"
76
- ]
77
-
78
- # 🌍 PRIORITY 2: TRANSLATION (High civic value)
79
- TRANSLATION = [
80
- "translate", "in spanish", "in french", "in portuguese",
81
- "in german", "in chinese", "in arabic", "in vietnamese",
82
- "in russian", "in korean", "in japanese", "in tagalog",
83
- "convert to", "say this in", "how do i say", "what is", "in hindi"
84
- ]
85
-
86
- # 📄 PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
87
- DOCUMENT_PROCESSING = [
88
- "process this document", "extract data", "analyze pdf",
89
- "upload form", "read this file", "scan this", "form help",
90
- "fill out", "document", "pdf", "application", "permit"
91
- ]
92
-
93
- # 🔍 PRIORITY 4: ANALYSIS TOOLS
94
- SENTIMENT_ANALYSIS = [
95
- "how does this sound", "is this positive", "is this negative",
96
- "analyze", "sentiment", "feel about", "mood", "tone"
97
- ]
98
-
99
- BIAS_DETECTION = [
100
- "is this biased", "check bias", "check fairness", "is this neutral",
101
- "biased", "objective", "subjective", "fair", "discriminatory"
102
- ]
103
-
104
- # 🌤️ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
105
- WEATHER = [
106
- "weather", "rain", "snow", "sunny", "forecast", "temperature",
107
- "hot", "cold", "storm", "wind", "outside", "climate",
108
- "degrees", "celsius", "fahrenheit"
109
- ]
110
-
111
- # Specific date/time keywords that suggest event context
112
- DATE_TIME = [
113
- "today", "tomorrow", "this weekend", "next week",
114
- "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
115
- "tonight", "this morning", "this afternoon", "this evening"
116
- ]
117
-
118
- EVENTS = [
119
- "event", "things to do", "what's happening", "activities",
120
- "festival", "concert", "activity", "community event",
121
- "show", "performance", "gathering", "meetup", "celebration"
122
- ]
123
-
124
- # 🏛️ PRIORITY 6: LOCAL RESOURCES (Core civic mission)
125
- LOCAL_RESOURCES = [
126
- "resource", "shelter", "library", "help center",
127
- "food bank", "warming center", "cooling center", "csb",
128
- "mental health", "housing", "community service",
129
- "trash", "recycling", "transit", "bus", "schedule",
130
- "clinic", "hospital", "pharmacy", "assistance",
131
- "utility", "water", "electric", "gas", "bill"
132
- ]
133
-
134
- # 💬 PRIORITY 7: CONVERSATIONAL
135
- GREETING = [
136
- "hi", "hello", "hey", "what's up", "good morning",
137
- "good afternoon", "good evening", "howdy", "yo",
138
- "greetings", "sup", "hiya"
139
- ]
140
-
141
- HELP = [
142
- "help", "how do i", "can you help", "i need help",
143
- "what can you do", "how does this work", "instructions",
144
- "guide", "tutorial", "show me how"
145
- ]
146
-
147
-
148
- def classify_intent(message: str) -> str:
149
- """
150
- 🎯 Main classification function (backward-compatible).
151
- Returns intent as string for existing API compatibility.
152
-
153
- Args:
154
- message: User's query text
155
-
156
- Returns:
157
- Intent string (e.g., "weather", "events", "translation")
158
- """
159
- try:
160
- result = classify_intent_detailed(message)
161
- return result.intent.value
162
- except Exception as e:
163
- logger.error(f"Intent classification failed: {e}", exc_info=True)
164
- return IntentType.UNKNOWN.value
165
-
166
-
167
- def classify_intent_detailed(message: str) -> IntentMatch:
168
- """
169
- 🧠 Enhanced classification with confidence scores and metadata.
170
-
171
- This function:
172
- 1. Checks for emergency keywords FIRST (safety routing)
173
- 2. Detects compound intents (e.g., "weather + events")
174
- 3. Returns structured result with confidence + matched keywords
175
-
176
- Args:
177
- message: User's query text
178
-
179
- Returns:
180
- IntentMatch object with full classification details
181
- """
182
-
183
- if not message or not message.strip():
184
- logger.warning("Empty message received for intent classification")
185
- return IntentMatch(
186
- intent=IntentType.UNKNOWN,
187
- confidence=0.0,
188
- matched_keywords=[]
189
- )
190
-
191
- try:
192
- text = message.lower().strip()
193
- logger.debug(f"Classifying intent for: '{text[:50]}...'")
194
-
195
- # --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
196
- emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
197
- if emergency_matches:
198
- logger.warning(f"🚨 EMERGENCY intent detected: {emergency_matches}")
199
- return IntentMatch(
200
- intent=IntentType.EMERGENCY,
201
- confidence=1.0, # Always high confidence for safety
202
- matched_keywords=emergency_matches
203
- )
204
-
205
- # --- PRIORITY 2: TRANSLATION ---
206
- translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
207
- if translation_matches:
208
- return IntentMatch(
209
- intent=IntentType.TRANSLATION,
210
- confidence=0.9,
211
- matched_keywords=translation_matches
212
- )
213
-
214
- # --- PRIORITY 3: DOCUMENT PROCESSING ---
215
- doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
216
- if doc_matches:
217
- return IntentMatch(
218
- intent=IntentType.DOCUMENT_PROCESSING,
219
- confidence=0.9,
220
- matched_keywords=doc_matches
221
- )
222
-
223
- # --- PRIORITY 4: ANALYSIS TOOLS ---
224
- sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
225
- if sentiment_matches:
226
- return IntentMatch(
227
- intent=IntentType.SENTIMENT_ANALYSIS,
228
- confidence=0.85,
229
- matched_keywords=sentiment_matches
230
- )
231
-
232
- bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
233
- if bias_matches:
234
- return IntentMatch(
235
- intent=IntentType.BIAS_DETECTION,
236
- confidence=0.85,
237
- matched_keywords=bias_matches
238
- )
239
-
240
- # --- PRIORITY 5: LOCAL RESOURCES (Check before events to avoid false matches) ---
241
- resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
242
-
243
- # --- PRIORITY 6: COMPOUND INTENT HANDLING (Weather + Events) ---
244
- weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
245
- event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
246
- date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
247
-
248
- # If both resource and event keywords match, prioritize resources (more specific)
249
- if resource_matches and event_matches:
250
- # Check if resource keywords are more specific (e.g., "library" vs generic "show")
251
- specific_resource_keywords = ["library", "libraries", "food bank", "shelter", "clinic", "hospital", "pharmacy", "trash", "recycling", "transit", "bus"]
252
- has_specific_resource = any(kw in text for kw in specific_resource_keywords)
253
-
254
- if has_specific_resource:
255
- return IntentMatch(
256
- intent=IntentType.LOCAL_RESOURCES,
257
- confidence=0.9,
258
- matched_keywords=resource_matches
259
- )
260
-
261
- # Compound detection: "What events are happening this weekend?"
262
- # or "What's the weather like for Sunday's festival?"
263
- if event_matches and (weather_matches or date_matches):
264
- logger.info("Compound intent detected: events + weather/date")
265
- return IntentMatch(
266
- intent=IntentType.EVENTS, # Primary intent
267
- confidence=0.85,
268
- matched_keywords=event_matches + weather_matches + date_matches,
269
- is_compound=True,
270
- secondary_intents=[IntentType.WEATHER]
271
- )
272
-
273
- # --- PRIORITY 7: SIMPLE WEATHER INTENT ---
274
- if weather_matches:
275
- return IntentMatch(
276
- intent=IntentType.WEATHER,
277
- confidence=0.9,
278
- matched_keywords=weather_matches
279
- )
280
-
281
- # --- PRIORITY 8: LOCAL RESOURCES (if not already handled) ---
282
- if resource_matches:
283
- return IntentMatch(
284
- intent=IntentType.LOCAL_RESOURCES,
285
- confidence=0.9,
286
- matched_keywords=resource_matches
287
- )
288
-
289
- # --- PRIORITY 9: EVENTS (Simple check) ---
290
- if event_matches:
291
- return IntentMatch(
292
- intent=IntentType.EVENTS,
293
- confidence=0.85,
294
- matched_keywords=event_matches
295
- )
296
-
297
- # --- PRIORITY 9: CONVERSATIONAL ---
298
- greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
299
- if greeting_matches:
300
- return IntentMatch(
301
- intent=IntentType.GREETING,
302
- confidence=0.8,
303
- matched_keywords=greeting_matches
304
- )
305
-
306
- help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
307
- if help_matches:
308
- return IntentMatch(
309
- intent=IntentType.HELP,
310
- confidence=0.9,
311
- matched_keywords=help_matches
312
- )
313
-
314
- # --- FALLBACK: UNKNOWN ---
315
- logger.info(f"No clear intent match for: '{text[:50]}...'")
316
- return IntentMatch(
317
- intent=IntentType.UNKNOWN,
318
- confidence=0.0,
319
- matched_keywords=[]
320
- )
321
-
322
- except Exception as e:
323
- logger.error(f"Error during intent classification: {e}", exc_info=True)
324
- return IntentMatch(
325
- intent=IntentType.UNKNOWN,
326
- confidence=0.0,
327
- matched_keywords=[],
328
- )
329
-
330
-
331
- # --- HELPER FUNCTIONS ---
332
-
333
- def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
334
- """
335
- Finds which keywords from a pattern list appear in the user's message.
336
-
337
- Args:
338
- text: Normalized user message (lowercase)
339
- keywords: List of keywords to search for
340
-
341
- Returns:
342
- List of matched keywords (for debugging/logging)
343
- """
344
- try:
345
- matches = []
346
- for keyword in keywords:
347
- if keyword in text:
348
- matches.append(keyword)
349
- return matches
350
- except Exception as e:
351
- logger.error(f"Error finding keyword matches: {e}", exc_info=True)
352
- return []
353
-
354
-
355
- def get_intent_description(intent: IntentType) -> str:
356
- """
357
- 🗣️ Penny's plain-English explanation of what each intent does.
358
- Useful for help systems and debugging.
359
-
360
- Args:
361
- intent: IntentType enum value
362
-
363
- Returns:
364
- Human-readable description of the intent
365
- """
366
- descriptions = {
367
- IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
368
- IntentType.GREETING: "Start a conversation with Penny",
369
- IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
370
- IntentType.EVENTS: "Discover local events and activities happening in your city",
371
- IntentType.TRANSLATION: "Translate text between 27 languages",
372
- IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
373
- IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
374
- IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
375
- IntentType.HELP: "Learn how to use Penny's features",
376
- IntentType.EMERGENCY: "Connect with emergency services and crisis support",
377
- IntentType.UNKNOWN: "I'm not sure what you're asking — can you rephrase?"
378
- }
379
- return descriptions.get(intent, "Unknown intent type")
380
-
381
-
382
- def get_all_supported_intents() -> Dict[str, str]:
383
- """
384
- 📋 Returns all supported intents with descriptions.
385
- Useful for /help endpoints and documentation.
386
-
387
- Returns:
388
- Dictionary mapping intent values to descriptions
389
- """
390
- try:
391
- return {
392
- intent.value: get_intent_description(intent)
393
- for intent in IntentType
394
- if intent != IntentType.UNKNOWN
395
- }
396
- except Exception as e:
397
- logger.error(f"Error getting supported intents: {e}", exc_info=True)
398
- return {}
399
-
400
-
401
- # --- FUTURE ML UPGRADE HOOK ---
402
- def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
403
- """
404
- 🔮 PLACEHOLDER for future ML-based classification.
405
-
406
- When ready to upgrade from keyword matching to embeddings:
407
- 1. Load Gemma-7B or sentence-transformers model
408
- 2. Generate message embeddings
409
- 3. Compare to intent prototype embeddings
410
- 4. Return top match with confidence score
411
-
412
- Args:
413
- message: User's query
414
- use_embedding_model: If True, use ML model (not implemented yet)
415
-
416
- Returns:
417
- IntentMatch object (currently falls back to rule-based)
418
- """
419
-
420
- if use_embedding_model:
421
- logger.warning("ML-based classification not yet implemented. Falling back to rules.")
422
-
423
- # Fallback to rule-based for now
424
- return classify_intent_detailed(message)
425
-
426
-
427
- # --- TESTING & VALIDATION ---
428
- def validate_intent_patterns() -> Dict[str, List[str]]:
429
- """
430
- 🧪 Validates that all intent patterns are properly configured.
431
- Returns any overlapping keywords that might cause conflicts.
432
-
433
- Returns:
434
- Dictionary of overlapping keywords between intent pairs
435
- """
436
- try:
437
- all_patterns = {
438
- "emergency": IntentPatterns.EMERGENCY,
439
- "translation": IntentPatterns.TRANSLATION,
440
- "document": IntentPatterns.DOCUMENT_PROCESSING,
441
- "sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
442
- "bias": IntentPatterns.BIAS_DETECTION,
443
- "weather": IntentPatterns.WEATHER,
444
- "events": IntentPatterns.EVENTS,
445
- "resources": IntentPatterns.LOCAL_RESOURCES,
446
- "greeting": IntentPatterns.GREETING,
447
- "help": IntentPatterns.HELP
448
- }
449
-
450
- overlaps = {}
451
-
452
- # Check for keyword overlap between different intents
453
- for intent1, keywords1 in all_patterns.items():
454
- for intent2, keywords2 in all_patterns.items():
455
- if intent1 >= intent2: # Avoid duplicate comparisons
456
- continue
457
-
458
- overlap = set(keywords1) & set(keywords2)
459
- if overlap:
460
- key = f"{intent1}_vs_{intent2}"
461
- overlaps[key] = list(overlap)
462
-
463
- if overlaps:
464
- logger.warning(f"Found keyword overlaps between intents: {overlaps}")
465
-
466
- return overlaps
467
-
468
- except Exception as e:
469
- logger.error(f"Error validating intent patterns: {e}", exc_info=True)
470
- return {}
471
-
472
-
473
- # --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
474
- def log_intent_classification(message: str, result: IntentMatch) -> None:
475
- """
476
- 📊 Logs classification results for Azure Application Insights.
477
- Helps track intent distribution and confidence patterns.
478
-
479
- Args:
480
- message: Original user message (truncated for PII safety)
481
- result: IntentMatch classification result
482
- """
483
- try:
484
- # Truncate message for PII safety
485
- safe_message = message[:50] + "..." if len(message) > 50 else message
486
-
487
- logger.info(
488
- f"Intent classified | "
489
- f"intent={result.intent.value} | "
490
- f"confidence={result.confidence:.2f} | "
491
- f"compound={result.is_compound} | "
492
- f"keywords={result.matched_keywords[:5]} | " # Limit logged keywords
493
- f"message_preview='{safe_message}'"
494
- )
495
- except Exception as e:
496
- logger.error(f"Error logging intent classification: {e}", exc_info=True)