pythonprincess commited on
Commit
ab2887c
·
verified ·
1 Parent(s): 45d5985

Delete intents.py

Browse files
Files changed (1) hide show
  1. intents.py +0 -481
intents.py DELETED
@@ -1,481 +0,0 @@
1
- # app/intents.py
2
- """
3
- 🎯 Penny's Intent Classification System
4
- Rule-based intent classifier designed for civic engagement queries.
5
-
6
- CURRENT: Simple keyword matching (fast, predictable, debuggable)
7
- FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)
8
-
9
- This approach allows Penny to understand resident needs and route them
10
- to the right civic systems — weather, resources, events, translation, etc.
11
- """
12
-
13
- import logging
14
- from typing import Dict, List, Optional
15
- from dataclasses import dataclass, field
16
- from enum import Enum
17
-
18
- # --- LOGGING SETUP (Azure-friendly) ---
19
- logger = logging.getLogger(__name__)
20
-
21
-
22
- # --- INTENT CATEGORIES (Enumerated for type safety) ---
23
- class IntentType(str, Enum):
24
- """
25
- Penny's supported intent categories.
26
- Each maps to a specific civic assistance pathway.
27
- """
28
- WEATHER = "weather"
29
- GREETING = "greeting"
30
- LOCAL_RESOURCES = "local_resources"
31
- EVENTS = "events"
32
- TRANSLATION = "translation"
33
- SENTIMENT_ANALYSIS = "sentiment_analysis"
34
- BIAS_DETECTION = "bias_detection"
35
- DOCUMENT_PROCESSING = "document_processing"
36
- HELP = "help"
37
- EMERGENCY = "emergency" # Critical safety routing
38
- UNKNOWN = "unknown"
39
-
40
-
41
- @dataclass
42
- class IntentMatch:
43
- """
44
- Structured intent classification result.
45
- Includes confidence score and matched keywords for debugging.
46
- """
47
- intent: IntentType
48
- confidence: float # 0.0 - 1.0
49
- matched_keywords: List[str]
50
- is_compound: bool = False # True if query spans multiple intents
51
- secondary_intents: List[IntentType] = field(default_factory=list)
52
-
53
- def to_dict(self) -> Dict:
54
- """Convert to dictionary for logging and API responses."""
55
- return {
56
- "intent": self.intent.value,
57
- "confidence": self.confidence,
58
- "matched_keywords": self.matched_keywords,
59
- "is_compound": self.is_compound,
60
- "secondary_intents": [intent.value for intent in self.secondary_intents]
61
- }
62
-
63
-
64
- # --- INTENT KEYWORD PATTERNS (Organized by priority) ---
65
- class IntentPatterns:
66
- """
67
- Penny's keyword patterns for intent matching.
68
- Organized by priority — critical intents checked first.
69
- """
70
-
71
- # 🚨 PRIORITY 1: EMERGENCY & SAFETY (Always check first)
72
- EMERGENCY = [
73
- "911", "emergency", "urgent", "crisis", "danger", "help me",
74
- "suicide", "overdose", "assault", "abuse", "threatening",
75
- "hurt myself", "hurt someone", "life threatening"
76
- ]
77
-
78
- # 🌍 PRIORITY 2: TRANSLATION (High civic value)
79
- TRANSLATION = [
80
- "translate", "in spanish", "in french", "in portuguese",
81
- "in german", "in chinese", "in arabic", "in vietnamese",
82
- "in russian", "in korean", "in japanese", "in tagalog",
83
- "convert to", "say this in", "how do i say", "what is", "in hindi"
84
- ]
85
-
86
- # 📄 PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
87
- DOCUMENT_PROCESSING = [
88
- "process this document", "extract data", "analyze pdf",
89
- "upload form", "read this file", "scan this", "form help",
90
- "fill out", "document", "pdf", "application", "permit"
91
- ]
92
-
93
- # 🔍 PRIORITY 4: ANALYSIS TOOLS
94
- SENTIMENT_ANALYSIS = [
95
- "how does this sound", "is this positive", "is this negative",
96
- "analyze", "sentiment", "feel about", "mood", "tone"
97
- ]
98
-
99
- BIAS_DETECTION = [
100
- "is this biased", "check bias", "check fairness", "is this neutral",
101
- "biased", "objective", "subjective", "fair", "discriminatory"
102
- ]
103
-
104
- # 🌤️ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
105
- WEATHER = [
106
- "weather", "rain", "snow", "sunny", "forecast", "temperature",
107
- "hot", "cold", "storm", "wind", "outside", "climate",
108
- "degrees", "celsius", "fahrenheit"
109
- ]
110
-
111
- # Specific date/time keywords that suggest event context
112
- DATE_TIME = [
113
- "today", "tomorrow", "this weekend", "next week",
114
- "sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
115
- "tonight", "this morning", "this afternoon", "this evening"
116
- ]
117
-
118
- EVENTS = [
119
- "event", "things to do", "what's happening", "activities",
120
- "festival", "concert", "activity", "community event",
121
- "show", "performance", "gathering", "meetup", "celebration"
122
- ]
123
-
124
- # 🏛️ PRIORITY 6: LOCAL RESOURCES (Core civic mission)
125
- LOCAL_RESOURCES = [
126
- "resource", "shelter", "library", "help center",
127
- "food bank", "warming center", "cooling center", "csb",
128
- "mental health", "housing", "community service",
129
- "trash", "recycling", "transit", "bus", "schedule",
130
- "clinic", "hospital", "pharmacy", "assistance",
131
- "utility", "water", "electric", "gas", "bill"
132
- ]
133
-
134
- # 💬 PRIORITY 7: CONVERSATIONAL
135
- GREETING = [
136
- "hi", "hello", "hey", "what's up", "good morning",
137
- "good afternoon", "good evening", "howdy", "yo",
138
- "greetings", "sup", "hiya"
139
- ]
140
-
141
- HELP = [
142
- "help", "how do i", "can you help", "i need help",
143
- "what can you do", "how does this work", "instructions",
144
- "guide", "tutorial", "show me how"
145
- ]
146
-
147
-
148
- def classify_intent(message: str) -> str:
149
- """
150
- 🎯 Main classification function (backward-compatible).
151
- Returns intent as string for existing API compatibility.
152
-
153
- Args:
154
- message: User's query text
155
-
156
- Returns:
157
- Intent string (e.g., "weather", "events", "translation")
158
- """
159
- try:
160
- result = classify_intent_detailed(message)
161
- return result.intent.value
162
- except Exception as e:
163
- logger.error(f"Intent classification failed: {e}", exc_info=True)
164
- return IntentType.UNKNOWN.value
165
-
166
-
167
- def classify_intent_detailed(message: str) -> IntentMatch:
168
- """
169
- 🧠 Enhanced classification with confidence scores and metadata.
170
-
171
- This function:
172
- 1. Checks for emergency keywords FIRST (safety routing)
173
- 2. Detects compound intents (e.g., "weather + events")
174
- 3. Returns structured result with confidence + matched keywords
175
-
176
- Args:
177
- message: User's query text
178
-
179
- Returns:
180
- IntentMatch object with full classification details
181
- """
182
-
183
- if not message or not message.strip():
184
- logger.warning("Empty message received for intent classification")
185
- return IntentMatch(
186
- intent=IntentType.UNKNOWN,
187
- confidence=0.0,
188
- matched_keywords=[]
189
- )
190
-
191
- try:
192
- text = message.lower().strip()
193
- logger.debug(f"Classifying intent for: '{text[:50]}...'")
194
-
195
- # --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
196
- emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
197
- if emergency_matches:
198
- logger.warning(f"🚨 EMERGENCY intent detected: {emergency_matches}")
199
- return IntentMatch(
200
- intent=IntentType.EMERGENCY,
201
- confidence=1.0, # Always high confidence for safety
202
- matched_keywords=emergency_matches
203
- )
204
-
205
- # --- PRIORITY 2: TRANSLATION ---
206
- translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
207
- if translation_matches:
208
- return IntentMatch(
209
- intent=IntentType.TRANSLATION,
210
- confidence=0.9,
211
- matched_keywords=translation_matches
212
- )
213
-
214
- # --- PRIORITY 3: DOCUMENT PROCESSING ---
215
- doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
216
- if doc_matches:
217
- return IntentMatch(
218
- intent=IntentType.DOCUMENT_PROCESSING,
219
- confidence=0.9,
220
- matched_keywords=doc_matches
221
- )
222
-
223
- # --- PRIORITY 4: ANALYSIS TOOLS ---
224
- sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
225
- if sentiment_matches:
226
- return IntentMatch(
227
- intent=IntentType.SENTIMENT_ANALYSIS,
228
- confidence=0.85,
229
- matched_keywords=sentiment_matches
230
- )
231
-
232
- bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
233
- if bias_matches:
234
- return IntentMatch(
235
- intent=IntentType.BIAS_DETECTION,
236
- confidence=0.85,
237
- matched_keywords=bias_matches
238
- )
239
-
240
- # --- PRIORITY 5: COMPOUND INTENT HANDLING (Weather + Events) ---
241
- weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
242
- event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
243
- date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
244
-
245
- # Compound detection: "What events are happening this weekend?"
246
- # or "What's the weather like for Sunday's festival?"
247
- if event_matches and (weather_matches or date_matches):
248
- logger.info("Compound intent detected: events + weather/date")
249
- return IntentMatch(
250
- intent=IntentType.EVENTS, # Primary intent
251
- confidence=0.85,
252
- matched_keywords=event_matches + weather_matches + date_matches,
253
- is_compound=True,
254
- secondary_intents=[IntentType.WEATHER]
255
- )
256
-
257
- # --- PRIORITY 6: SIMPLE WEATHER INTENT ---
258
- if weather_matches:
259
- return IntentMatch(
260
- intent=IntentType.WEATHER,
261
- confidence=0.9,
262
- matched_keywords=weather_matches
263
- )
264
-
265
- # --- PRIORITY 7: LOCAL RESOURCES ---
266
- resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
267
- if resource_matches:
268
- return IntentMatch(
269
- intent=IntentType.LOCAL_RESOURCES,
270
- confidence=0.9,
271
- matched_keywords=resource_matches
272
- )
273
-
274
- # --- PRIORITY 8: EVENTS (Simple check) ---
275
- if event_matches:
276
- return IntentMatch(
277
- intent=IntentType.EVENTS,
278
- confidence=0.85,
279
- matched_keywords=event_matches
280
- )
281
-
282
- # --- PRIORITY 9: CONVERSATIONAL ---
283
- greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
284
- if greeting_matches:
285
- return IntentMatch(
286
- intent=IntentType.GREETING,
287
- confidence=0.8,
288
- matched_keywords=greeting_matches
289
- )
290
-
291
- help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
292
- if help_matches:
293
- return IntentMatch(
294
- intent=IntentType.HELP,
295
- confidence=0.9,
296
- matched_keywords=help_matches
297
- )
298
-
299
- # --- FALLBACK: UNKNOWN ---
300
- logger.info(f"No clear intent match for: '{text[:50]}...'")
301
- return IntentMatch(
302
- intent=IntentType.UNKNOWN,
303
- confidence=0.0,
304
- matched_keywords=[]
305
- )
306
-
307
- except Exception as e:
308
- logger.error(f"Error during intent classification: {e}", exc_info=True)
309
- return IntentMatch(
310
- intent=IntentType.UNKNOWN,
311
- confidence=0.0,
312
- matched_keywords=[],
313
- )
314
-
315
-
316
- # --- HELPER FUNCTIONS ---
317
-
318
- def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
319
- """
320
- Finds which keywords from a pattern list appear in the user's message.
321
-
322
- Args:
323
- text: Normalized user message (lowercase)
324
- keywords: List of keywords to search for
325
-
326
- Returns:
327
- List of matched keywords (for debugging/logging)
328
- """
329
- try:
330
- matches = []
331
- for keyword in keywords:
332
- if keyword in text:
333
- matches.append(keyword)
334
- return matches
335
- except Exception as e:
336
- logger.error(f"Error finding keyword matches: {e}", exc_info=True)
337
- return []
338
-
339
-
340
- def get_intent_description(intent: IntentType) -> str:
341
- """
342
- 🗣️ Penny's plain-English explanation of what each intent does.
343
- Useful for help systems and debugging.
344
-
345
- Args:
346
- intent: IntentType enum value
347
-
348
- Returns:
349
- Human-readable description of the intent
350
- """
351
- descriptions = {
352
- IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
353
- IntentType.GREETING: "Start a conversation with Penny",
354
- IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
355
- IntentType.EVENTS: "Discover local events and activities happening in your city",
356
- IntentType.TRANSLATION: "Translate text between 27 languages",
357
- IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
358
- IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
359
- IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
360
- IntentType.HELP: "Learn how to use Penny's features",
361
- IntentType.EMERGENCY: "Connect with emergency services and crisis support",
362
- IntentType.UNKNOWN: "I'm not sure what you're asking — can you rephrase?"
363
- }
364
- return descriptions.get(intent, "Unknown intent type")
365
-
366
-
367
- def get_all_supported_intents() -> Dict[str, str]:
368
- """
369
- 📋 Returns all supported intents with descriptions.
370
- Useful for /help endpoints and documentation.
371
-
372
- Returns:
373
- Dictionary mapping intent values to descriptions
374
- """
375
- try:
376
- return {
377
- intent.value: get_intent_description(intent)
378
- for intent in IntentType
379
- if intent != IntentType.UNKNOWN
380
- }
381
- except Exception as e:
382
- logger.error(f"Error getting supported intents: {e}", exc_info=True)
383
- return {}
384
-
385
-
386
- # --- FUTURE ML UPGRADE HOOK ---
387
- def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
388
- """
389
- 🔮 PLACEHOLDER for future ML-based classification.
390
-
391
- When ready to upgrade from keyword matching to embeddings:
392
- 1. Load Gemma-7B or sentence-transformers model
393
- 2. Generate message embeddings
394
- 3. Compare to intent prototype embeddings
395
- 4. Return top match with confidence score
396
-
397
- Args:
398
- message: User's query
399
- use_embedding_model: If True, use ML model (not implemented yet)
400
-
401
- Returns:
402
- IntentMatch object (currently falls back to rule-based)
403
- """
404
-
405
- if use_embedding_model:
406
- logger.warning("ML-based classification not yet implemented. Falling back to rules.")
407
-
408
- # Fallback to rule-based for now
409
- return classify_intent_detailed(message)
410
-
411
-
412
- # --- TESTING & VALIDATION ---
413
- def validate_intent_patterns() -> Dict[str, List[str]]:
414
- """
415
- 🧪 Validates that all intent patterns are properly configured.
416
- Returns any overlapping keywords that might cause conflicts.
417
-
418
- Returns:
419
- Dictionary of overlapping keywords between intent pairs
420
- """
421
- try:
422
- all_patterns = {
423
- "emergency": IntentPatterns.EMERGENCY,
424
- "translation": IntentPatterns.TRANSLATION,
425
- "document": IntentPatterns.DOCUMENT_PROCESSING,
426
- "sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
427
- "bias": IntentPatterns.BIAS_DETECTION,
428
- "weather": IntentPatterns.WEATHER,
429
- "events": IntentPatterns.EVENTS,
430
- "resources": IntentPatterns.LOCAL_RESOURCES,
431
- "greeting": IntentPatterns.GREETING,
432
- "help": IntentPatterns.HELP
433
- }
434
-
435
- overlaps = {}
436
-
437
- # Check for keyword overlap between different intents
438
- for intent1, keywords1 in all_patterns.items():
439
- for intent2, keywords2 in all_patterns.items():
440
- if intent1 >= intent2: # Avoid duplicate comparisons
441
- continue
442
-
443
- overlap = set(keywords1) & set(keywords2)
444
- if overlap:
445
- key = f"{intent1}_vs_{intent2}"
446
- overlaps[key] = list(overlap)
447
-
448
- if overlaps:
449
- logger.warning(f"Found keyword overlaps between intents: {overlaps}")
450
-
451
- return overlaps
452
-
453
- except Exception as e:
454
- logger.error(f"Error validating intent patterns: {e}", exc_info=True)
455
- return {}
456
-
457
-
458
- # --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
459
- def log_intent_classification(message: str, result: IntentMatch) -> None:
460
- """
461
- 📊 Logs classification results for Azure Application Insights.
462
- Helps track intent distribution and confidence patterns.
463
-
464
- Args:
465
- message: Original user message (truncated for PII safety)
466
- result: IntentMatch classification result
467
- """
468
- try:
469
- # Truncate message for PII safety
470
- safe_message = message[:50] + "..." if len(message) > 50 else message
471
-
472
- logger.info(
473
- f"Intent classified | "
474
- f"intent={result.intent.value} | "
475
- f"confidence={result.confidence:.2f} | "
476
- f"compound={result.is_compound} | "
477
- f"keywords={result.matched_keywords[:5]} | " # Limit logged keywords
478
- f"message_preview='{safe_message}'"
479
- )
480
- except Exception as e:
481
- logger.error(f"Error logging intent classification: {e}", exc_info=True)