Voucher-Bot / enhanced_semantic_router_v2.py
Raj718's picture
Fix dark mode input font color and improve UI accessibility - Fixed dark mode input textbox font color for better visibility - Increased chatbot response font size - Enhanced Spanish language support for search intent detection - Improved email request detection and transit/school context handling - Integrated violation checker into search flow - Added comprehensive test suites for regex and LLM fallback validation
67e153c
#!/usr/bin/env python3
"""
Enhanced Semantic Router V2 - Comprehensive Pattern Matching
This version addresses the gaps revealed by comprehensive testing,
including better handling of:
- More diverse what-if trigger patterns
- Expanded borough extraction patterns
- Better bedroom expression handling
- Improved rent/budget pattern matching
- Enhanced voucher type detection
- Better handling of informal language
"""
import re
from enum import Enum
from typing import Dict, List, Tuple, Optional, Any
from dataclasses import dataclass
class Intent(Enum):
SEARCH_LISTINGS = "search_listings"
CHECK_VIOLATIONS = "check_violations"
VOUCHER_INFO = "voucher_info"
SHOW_HELP = "show_help"
WHAT_IF = "what_if"
PARAMETER_REFINEMENT = "parameter_refinement"
UNCLASSIFIED = "unclassified"
@dataclass
class PatternGroup:
"""Group of patterns with priority for intent classification"""
patterns: List[str]
priority: int = 1
case_insensitive: bool = True
class EnhancedSemanticRouterV2:
"""Enhanced semantic router with comprehensive pattern matching"""
def __init__(self):
self.intent_patterns = self._build_intent_patterns()
self.parameter_patterns = self._build_parameter_patterns()
def _build_intent_patterns(self) -> Dict[Intent, PatternGroup]:
"""Build comprehensive intent classification patterns"""
return {
Intent.WHAT_IF: PatternGroup([
# Core what-if patterns
r'\b(?:what if|how about|what about)\b',
r'\b(?:try|check|look)\b',
r'\b(?:search|find|show)\s+(?:in|around|near)\b',
r'\b(?:look|search)\s+(?:in|around|near|for)\b',
r'\b(?:can you|could you|would you|should i)\s+(?:try|check|look|search)\b',
r'\bmaybe\s+(?:try|check|look)\b',
r'\b(?:let\'s|lets)\s+(?:try|check|look)\b',
r'\b(?:actually|wait|hmm),?\s+(?:try|check|look|how about|what about)\b',
r'\binstead\b',
r'\b(?:please|por favor)\s+(?:try|check|look|search)\b',
r'\b(?:i\'d like to|i want to)\s+(?:try|check|look|see)\b',
# Informal variations
r'\b(?:yo|tho|though)\b',
r'\b(?:bout|about)\b',
r'\bw/\b', # "with" abbreviation
r'@', # "at" symbol
# More specific question patterns (avoid overly broad matching)
r'\b(?:what about|how about|what happens if)\b.*\?\s*$',
r'\b(?:would|could|should|might)\b.*\?\s*$',
# Borough + context patterns (removed overly broad borough pattern)
# r'\b(?:manhattan|brooklyn|queens|bronx|staten island|bk|si|bx|mnh|qns)\b', # Too aggressive - matches any borough mention
r'\b(?:the city|downtown|uptown)\b',
# Bedroom patterns in what-if context
r'\b\d+\s*(?:br|bed|bedroom|bedrooms?)\b',
r'\b(?:studio|one|two|three|four|five)\s+(?:bed|bedroom)\b',
# Budget patterns in what-if context
r'\$\d+',
r'\b(?:under|max|budget|around|about)\s+\$?\d+\b',
# Voucher patterns in what-if context
r'\b(?:section\s*8|hasa|cityfheps|housing\s+voucher)\b',
], priority=2),
Intent.PARAMETER_REFINEMENT: PatternGroup([
r'\b(?:under|max|maximum|up to)\s+\$?\d+',
r'\$\d+(?:\.\d{2})?(?:\s*max|\s*maximum|\s*or\s+less)?$',
r'\bbudget\s+(?:of\s+)?\$?\d+',
r'\b(?:less than|no more than)\s+\$?\d+',
], priority=3),
Intent.SEARCH_LISTINGS: PatternGroup([
# English patterns
r'\b(?:show|get|find|display)\s+(?:me\s+)?(?:listings|apartments|places)',
r'\b(?:i want|i need|looking for)\s+(?:listings|apartments|places)',
r'\bsearch\s+(?:for\s+)?(?:listings|apartments|places)',
r'\b(?:browse|look at)\s+(?:available\s+)?(?:listings|apartments|places)',
r'\b(?:available|open)\s+(?:units?|apartments?|places?)\b',
r'\blooking\s+(?:for|to rent|to find)\s+(?:a\s+)?(?:room|apartment|place|spot)\b',
# Spanish search patterns
r'\b(?:busco|estoy buscando|quiero|necesito)\s+(?:un\s+)?(?:apartamento|departamento|vivienda|casa|lugar|opción|opciones|listado|listados|alojamiento|habitación|habitaciones)\b',
r'\btengo un vale\b.*(?:sección\s*8|section\s*8|voucher)',
r'\bbuscar\s+(?:apartamento|vivienda|casa|lugar|listado|listados|alojamiento|habitación|habitaciones)\b',
r'\b(?:sección\s*8|section\s*8|voucher)\b.*(?:bronx|brooklyn|manhattan|queens|staten\s+island)',
r'\b(?:busco|estoy buscando)\s+(?:vivienda|apartamento|casa)\s+(?:en|en el|en la)\s+(?:bronx|brooklyn|manhattan|queens|staten\s+island)\b',
r'\b(?:tengo|tiene)\s+(?:un\s+)?(?:vale|voucher)\s+(?:de\s+)?(?:sección\s*8|section\s*8)\b',
r'\b(?:busco|estoy buscando)\s+(?:un\s+)?(?:apartamento|departamento|vivienda)\s+(?:que\s+)?(?:acepte|acepten|reciba|reciban)\s+(?:vales|vouchers|sección\s*8|section\s*8)\b',
], priority=1),
Intent.CHECK_VIOLATIONS: PatternGroup([
r'\b(?:check|verify|look up)\s+violations?\b',
r'\bviolations?\s+(?:for|at|on)\b',
r'\b(?:any|check for)\s+violations?\b',
], priority=1),
Intent.VOUCHER_INFO: PatternGroup([
r'\b(?:what is|tell me about|explain)\s+(?:section\s*8|hasa|cityfheps|housing\s+vouchers?|vouchers?)',
r'\b(?:voucher|section\s*8|hasa|cityfheps)\s+(?:info|information|details)',
r'\bhow\s+(?:does|do)\s+(?:vouchers?|section\s*8|hasa|cityfheps|housing\s+vouchers?)\s+work',
r'\b(?:what are|what\'s)\s+(?:the\s+)?(?:requirements|eligibility|criteria)\s+for\s+(?:section\s*8|hasa|cityfheps|vouchers?)',
r'\bhow\s+(?:do i|can i)\s+apply\s+for\s+(?:section\s*8|hasa|cityfheps|vouchers?)',
r'\b(?:difference|differences)\s+between\s+(?:section\s*8|hasa|cityfheps)',
r'\b(?:can you|could you)\s+explain\s+(?:voucher|section\s*8|hasa|cityfheps)',
r'\b(?:what|which)\s+voucher\s+(?:types|programs|options)\b',
], priority=3),
Intent.SHOW_HELP: PatternGroup([
# Informational patterns (higher priority to catch before SEARCH_LISTINGS)
r'\b(?:what|how|why|tell me|explain)\b.*\b(?:benefits|definition|mean|process|steps|work|involve)\b',
r'\b(?:what are|what is|what does)\b.*\b(?:housing|apartment|listing|search|finding|looking)\b',
r'\b(?:how do|how does)\b.*\b(?:housing|apartment|listing|search|finding|looking)\b.*\bwork\b',
r'\b(?:explain|tell me about)\b.*\b(?:housing|apartment|listing|search|finding|looking)\b',
r'\b(?:how do people|how do most people|how do tenants|how do renters)\b.*\b(?:find|search|look for)\b',
r'\b(?:what should i know|what do i need to know)\b.*\b(?:finding|searching|looking)\b',
# Original help patterns
r'\b(?:help|assistance|support)\b',
r'\b(?:what can you do|how do i|how can i)\b',
r'\b(?:commands|options|features)\b',
], priority=2),
}
def _build_parameter_patterns(self) -> Dict[str, List[str]]:
"""Build comprehensive parameter extraction patterns"""
return {
'borough': [
# With prepositions - extract the borough after the preposition (more specific, checked first)
r'\b(?:in|around|near|at|from)\s+(manhattan|brooklyn|queens|bronx|staten\s+island|bk|si|bx|mnh|qns)\b',
r'\b(?:search|look|check|try|find)\s+(?:in|around|near)\s+(manhattan|brooklyn|queens|bronx|staten\s+island|bk|si|bx|mnh|qns)\b',
# Full borough names
r'\b(manhattan)\b',
r'\b(brooklyn)\b',
r'\b(queens)\b',
r'\b(?:the\s+)?(bronx)\b',
r'\b(staten\s+island)\b',
# Abbreviations
r'\b(bk)\b',
r'\b(si)\b',
r'\b(bx)\b',
r'\b(mnh)\b',
r'\b(qns)\b',
# Informal references
r'\b(?:the\s+)?(city)\b', # Manhattan
# Spanish borough patterns
r'\b(?:en|en el|en la|del|de la)\s+(manhattan|brooklyn|queens|bronx|staten\s+island)\b',
r'\b(?:busco|estoy buscando|quiero|necesito)\s+(?:vivienda|apartamento|casa)\s+(?:en|en el|en la)\s+(manhattan|brooklyn|queens|bronx|staten\s+island)\b',
r'\b(?:vivienda|apartamento|casa)\s+(?:en|en el|en la)\s+(manhattan|brooklyn|queens|bronx|staten\s+island)\b',
],
'bedrooms': [
# Numeric + abbreviations
r'\b(\d+)\s*(?:br|bed|bedroom|bedrooms?)\b',
r'\b(\d+)(?:br|bed)\b',
# Spelled out numbers
r'\b(one|1)\s+(?:bed|bedroom)\b',
r'\b(two|2)\s+(?:bed|bedroom)\b',
r'\b(three|3)\s+(?:bed|bedroom)\b',
r'\b(four|4)\s+(?:bed|bedroom)\b',
r'\b(five|5)\s+(?:bed|bedroom)\b',
# Studio handling
r'\b(studio)\b', # Convert to 0
# With context words
r'\b(?:with|for|having)\s+(\d+)\s+(?:bed|bedroom|bedrooms?)\b',
r'\b(\d+)(?:br|bed|bedroom)\s+(?:apartment|unit|place)\b',
# Spanish bedroom patterns
r'\b(\d+)\s+(?:habitación|habitaciones|dormitorio|dormitorios)\b',
r'\b(?:con|de|para)\s+(\d+)\s+(?:habitación|habitaciones|dormitorio|dormitorios)\b',
r'\b(?:apartamento|departamento|vivienda|casa)\s+(?:de|con)\s+(\d+)\s+(?:habitación|habitaciones|dormitorio|dormitorios)\b',
r'\b(?:busco|estoy buscando|quiero|necesito)\s+(?:un\s+)?(?:apartamento|departamento|vivienda|casa)\s+(?:de|con)\s+(\d+)\s+(?:habitación|habitaciones|dormitorio|dormitorios)\b',
# Spanish spelled out numbers
r'\b(uno|una|1)\s+(?:habitación|dormitorio)\b',
r'\b(dos|2)\s+(?:habitaciones|dormitorios)\b',
r'\b(tres|3)\s+(?:habitaciones|dormitorios)\b',
r'\b(cuatro|4)\s+(?:habitaciones|dormitorios)\b',
r'\b(cinco|5)\s+(?:habitaciones|dormitorios)\b',
# Spanish studio
r'\b(estudio)\b', # Convert to 0
],
'max_rent': [
# Standard formats
r'\$(\d{1,5}(?:,\d{3})*(?:\.\d{2})?)',
r'\b(\d{1,5}(?:,\d{3})*)\s+dollars?\b',
# With context words
r'\b(?:under|max|maximum|up\s+to|budget(?:\s+of)?|around|about|roughly)\s+\$?(\d{1,5}(?:,\d{3})*(?:\.\d{2})?)',
r'\bbudget\s+(?:of\s+)?\$?(\d{1,5}(?:,\d{3})*(?:\.\d{2})?)',
# Informal formats
r'\b(\d+(?:\.\d+)?)k\b', # "2k", "2.5k"
r'\b(?:around|about|roughly)\s+(\d+(?:\.\d+)?)k\b', # "around 2k"
# Range formats (extract first number)
r'\$?(\d{1,5}(?:,\d{3})*)\s*(?:-|to)\s*\$?\d+',
r'\bbetween\s+\$?(\d{1,5}(?:,\d{3})*)\s*(?:and|-|to)',
],
'voucher_type': [
# Section 8 variations
r'\b(section\s*8|section-8)\b',
r'\b(sec\s*8)\b',
# HASA variations
r'\b(hasa)\b',
# CityFHEPS variations
r'\b(cityfheps|city\s*fheps)\b',
# Housing voucher
r'\b(housing\s+voucher)\b',
# Generic voucher references
r'\b(voucher)s?\b',
# Other NYC assistance programs
r'\b(dss)\b',
r'\b(hra)\b',
# Context patterns
r'\b(?:with|using|accepts?|welcome)\s+(section\s*8|hasa|cityfheps|housing\s+voucher)\b',
r'\b(section\s*8|hasa|cityfheps|housing\s+voucher)\s+(?:ok|accepted?|welcome)\b',
# Spanish voucher patterns
r'\b(sección\s*8|section\s*8)\b',
r'\b(vale|voucher)s?\b',
r'\b(?:tengo|tiene)\s+(?:un\s+)?(vale|voucher)\s+(?:de\s+)?(?:sección\s*8|section\s*8)\b',
r'\b(?:vale|voucher)\s+(?:de\s+)?(?:sección\s*8|section\s*8)\b',
r'\b(?:apartamento|vivienda|casa)\s+(?:que\s+)?(?:acepte|acepten|reciba|reciban)\s+(?:vales|vouchers|sección\s*8|section\s*8)\b',
]
}
def classify_intent(self, message: str, context: Dict = None) -> Intent:
"""Classify message intent using comprehensive pattern matching"""
message_lower = message.lower()
# Sort intents by priority (higher priority first)
sorted_intents = sorted(
self.intent_patterns.items(),
key=lambda x: x[1].priority,
reverse=True
)
for intent, pattern_group in sorted_intents:
for pattern in pattern_group.patterns:
flags = re.IGNORECASE if pattern_group.case_insensitive else 0
if re.search(pattern, message_lower, flags):
return intent
return Intent.UNCLASSIFIED
def extract_parameters(self, message: str) -> Dict[str, Any]:
"""Extract parameters using comprehensive pattern matching"""
params = {}
message_lower = message.lower()
for param_name, patterns in self.parameter_patterns.items():
for pattern in patterns:
match = re.search(pattern, message_lower, re.IGNORECASE)
if match:
value = match.group(1).strip()
# Post-process the extracted value
processed_value = self._process_parameter_value(param_name, value)
if processed_value is not None:
params[param_name] = processed_value
break # Use first match for each parameter
return params
def _process_parameter_value(self, param_name: str, value: str) -> Any:
"""Process and normalize extracted parameter values"""
value = value.lower().strip()
if param_name == 'borough':
# Normalize borough names
borough_mapping = {
'manhattan': 'manhattan',
'brooklyn': 'brooklyn',
'queens': 'queens',
'bronx': 'bronx',
'staten island': 'staten_island',
'bk': 'bk',
'si': 'si',
'bx': 'bx',
'mnh': 'mnh',
'qns': 'qns',
'city': 'manhattan', # "the city" = Manhattan
}
return borough_mapping.get(value, value)
elif param_name == 'bedrooms':
# Convert bedroom values to integers
if value in ['studio', 'estudio']:
return 0
elif value in ['one', '1', 'uno', 'una']:
return 1
elif value in ['two', '2', 'dos']:
return 2
elif value in ['three', '3', 'tres']:
return 3
elif value in ['four', '4', 'cuatro']:
return 4
elif value in ['five', '5', 'cinco']:
return 5
else:
try:
return int(value)
except ValueError:
return None
elif param_name == 'max_rent':
# Convert rent values to integers
# Handle "k" suffix
if value.endswith('k'):
try:
return int(float(value[:-1]) * 1000)
except ValueError:
return None
else:
# Remove commas and convert
clean_value = value.replace(',', '')
try:
return int(float(clean_value))
except ValueError:
return None
elif param_name == 'voucher_type':
# Normalize voucher types
voucher_mapping = {
'section 8': 'section_8',
'section-8': 'section_8',
'sec 8': 'section_8',
'sección 8': 'section_8',
'seccion 8': 'section_8',
'hasa': 'hasa',
'cityfheps': 'cityfheps',
'city fheps': 'cityfheps',
'housing voucher': 'housing_voucher',
'voucher': 'housing_voucher', # Generic
'vale': 'housing_voucher', # Spanish generic
'dss': 'dss',
'hra': 'hra',
}
return voucher_mapping.get(value, value)
return value
def analyze_parameter_changes(self, new_params: Dict, context: Dict = None) -> Dict[str, str]:
"""Enhanced parameter change analysis"""
if not context:
return {param: "new" for param in new_params}
analysis = {}
previous_params = context.get('parameters', {})
for param, value in new_params.items():
if param not in previous_params:
analysis[param] = "new"
elif previous_params[param] == value:
# Check if we should allow redundant borough searches
if (param == 'borough' and
context.get('last_result_count', 0) == 0):
analysis[param] = "retry_allowed"
else:
analysis[param] = "redundant"
else:
analysis[param] = "refinement"
return analysis
def generate_response(self, intent: Intent, params: Dict, param_analysis: Dict = None, context: Dict = None) -> str:
"""Generate contextual response based on intent and parameters"""
if intent == Intent.WHAT_IF:
if not params:
return "I'll help you with that search."
# Build response based on parameters
response_parts = []
if 'borough' in params:
borough_name = params['borough'].replace('_', ' ').title()
if param_analysis and param_analysis.get('borough') == 'retry_allowed':
response_parts.append(f"I'll search {borough_name} again (previous search found no listings)")
elif param_analysis and param_analysis.get('borough') == 'redundant':
response_parts.append(f"I'll search {borough_name} again")
else:
response_parts.append(f"I'll search {borough_name}")
if 'bedrooms' in params:
bedrooms = params['bedrooms']
if bedrooms == 0:
response_parts.append("for studio apartments")
else:
response_parts.append(f"for {bedrooms} bedroom apartments")
if 'max_rent' in params:
rent = params['max_rent']
response_parts.append(f"under ${rent:,}")
if 'voucher_type' in params:
voucher = params['voucher_type'].replace('_', ' ').title()
response_parts.append(f"accepting {voucher}")
if response_parts:
return " ".join(response_parts) + "."
else:
return "I'll help you with that search."
elif intent == Intent.PARAMETER_REFINEMENT:
if 'max_rent' in params:
return f"I'll refine the search to show listings under ${params['max_rent']:,}."
return "I'll refine the search parameters."
elif intent == Intent.SEARCH_LISTINGS:
return "I'll search for listings matching your criteria."
elif intent == Intent.CHECK_VIOLATIONS:
return "I'll check for violations on that property."
elif intent == Intent.VOUCHER_INFO:
return "I'll provide information about voucher programs."
elif intent == Intent.SHOW_HELP:
return "I can help you search for apartments, check violations, and provide voucher information."
else:
return "I'll help you with that search."
def process_message(self, message: str, context: Dict = None) -> Tuple[Intent, Dict, str]:
"""Process message and return intent, parameters, and response"""
intent = self.classify_intent(message, context)
params = self.extract_parameters(message)
param_analysis = self.analyze_parameter_changes(params, context)
response = self.generate_response(intent, params, param_analysis, context)
return intent, params, response
# Convenience functions for backward compatibility
def classify_intent(message: str, context: Dict = None) -> Intent:
router = EnhancedSemanticRouterV2()
return router.classify_intent(message, context)
def extract_parameters(message: str) -> Dict[str, Any]:
router = EnhancedSemanticRouterV2()
return router.extract_parameters(message)
if __name__ == "__main__":
# Quick test
router = EnhancedSemanticRouterV2()
test_messages = [
"Look in Staten Island",
"Try 2 bedrooms",
"Budget of $3000",
"With Section 8",
"Check Brooklyn yo",
"Around 2k",
"Search in Manhattan",
"Look for 3 bedroom",
]
print("🧪 Testing Enhanced Semantic Router V2")
print("=" * 50)
for msg in test_messages:
intent, params, response = router.process_message(msg)
print(f"\nMessage: '{msg}'")
print(f"Intent: {intent.value}")
print(f"Params: {params}")
print(f"Response: {response}")