Spaces:
Paused
Paused
Upload 25 files
Browse files- README.md +341 -0
- atlanta_ga.json +140 -0
- bias_utils.py +197 -0
- birmingham_al.json +122 -0
- chesterfield_va.json +109 -0
- el_paso_tx.json +115 -0
- embeddings +0 -0
- event_weather.py +761 -0
- gemma_utils.py +244 -0
- handler.py +31 -0
- intents.py +481 -0
- layoutlm_utils.py +359 -0
- location_utils.py +717 -0
- logging_utils.py +778 -0
- main.py +660 -0
- model_config.json +47 -0
- model_loader.py +861 -0
- orchestrator.py +1315 -0
- providence_ri.json +115 -0
- router.py +802 -0
- seattle_wa.json +109 -0
- sentiment_utils.py +396 -0
- tool_agent.py +666 -0
- translation_utils.py +598 -0
- weather_agent.py +529 -0
README.md
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 🤖 PENNY - Civic Engagement AI Assistant
|
| 2 |
+
|
| 3 |
+
**Personal civic Engagement Nurturing Network sYstem**
|
| 4 |
+
|
| 5 |
+
[](https://www.python.org/downloads/)
|
| 6 |
+
[](https://huggingface.co/)
|
| 7 |
+
[](https://fastapi.tiangolo.com/)
|
| 8 |
+
[](LICENSE)
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
## 📋 Overview
|
| 13 |
+
|
| 14 |
+
**PENNY** is a production-grade, AI-powered civic engagement assistant designed to help citizens connect with local government services, community events, and civic resources. This Hugging Face model provides the core orchestration engine that coordinates multiple specialized AI models to deliver warm, helpful, and contextually-aware assistance for civic participation.
|
| 15 |
+
|
| 16 |
+
### ✨ Key Features
|
| 17 |
+
|
| 18 |
+
- **🏛️ Civic Information**: Local government services, voting info, public meetings
|
| 19 |
+
- **📅 Community Events**: Real-time local events discovery and recommendations
|
| 20 |
+
- **🌤️ Weather Integration**: Context-aware weather updates with outfit suggestions
|
| 21 |
+
- **🌍 Multi-language Support**: Translation services for inclusive access
|
| 22 |
+
- **🛡️ Safety & Bias Detection**: Built-in content moderation and bias analysis
|
| 23 |
+
- **🔒 Privacy-First**: PII sanitization and secure logging
|
| 24 |
+
- **⚡ High Performance**: Async architecture with intelligent caching
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
## 🧠 Model Architecture
|
| 29 |
+
|
| 30 |
+
PENNY is a **multi-model orchestration system** that coordinates 5 specialized models:
|
| 31 |
+
|
| 32 |
+
1. **Gemma** - Core language understanding and response generation
|
| 33 |
+
2. **LayoutLM** - Document processing and civic resource extraction
|
| 34 |
+
3. **Sentiment Analysis Model** - Emotion detection and empathetic responses
|
| 35 |
+
4. **Bias Detection Model** - Content moderation and fairness checking
|
| 36 |
+
5. **Translation Model** - Multi-language support for inclusive access
|
| 37 |
+
|
| 38 |
+
The orchestrator intelligently routes queries to the appropriate models and synthesizes their outputs into cohesive, helpful responses.
|
| 39 |
+
|
| 40 |
+
---
|
| 41 |
+
|
| 42 |
+
## 🚀 Quick Start
|
| 43 |
+
|
| 44 |
+
### Using the Hugging Face Inference API
|
| 45 |
+
|
| 46 |
+
```python
|
| 47 |
+
from huggingface_hub import InferenceClient
|
| 48 |
+
|
| 49 |
+
client = InferenceClient(model="your-username/penny-v2", token="your_hf_token")
|
| 50 |
+
|
| 51 |
+
response = client.post(
|
| 52 |
+
json={
|
| 53 |
+
"inputs": "What community events are happening this weekend?",
|
| 54 |
+
"tenant_id": "norfolk",
|
| 55 |
+
"user_id": "user123",
|
| 56 |
+
"session_id": "session456"
|
| 57 |
+
}
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
print(response)
|
| 61 |
+
```
|
| 62 |
+
|
| 63 |
+
### Using with Python Requests
|
| 64 |
+
|
| 65 |
+
```python
|
| 66 |
+
import requests
|
| 67 |
+
|
| 68 |
+
API_URL = "https://api-inference.huggingface.co/models/your-username/penny-v2"
|
| 69 |
+
headers = {"Authorization": f"Bearer {YOUR_HF_TOKEN}"}
|
| 70 |
+
|
| 71 |
+
def query(payload):
|
| 72 |
+
response = requests.post(API_URL, headers=headers, json=payload)
|
| 73 |
+
return response.json()
|
| 74 |
+
|
| 75 |
+
output = query({
|
| 76 |
+
"inputs": "Tell me about voter registration",
|
| 77 |
+
"tenant_id": "norfolk"
|
| 78 |
+
})
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### Response Format
|
| 82 |
+
|
| 83 |
+
```json
|
| 84 |
+
{
|
| 85 |
+
"response": "Hi! Here are some great community events happening this weekend in Norfolk...",
|
| 86 |
+
"intent": "community_events",
|
| 87 |
+
"tenant_id": "norfolk",
|
| 88 |
+
"session_id": "session456",
|
| 89 |
+
"timestamp": "2025-11-26T10:30:00Z",
|
| 90 |
+
"response_time_ms": 245
|
| 91 |
+
}
|
| 92 |
+
```
|
| 93 |
+
|
| 94 |
+
---
|
| 95 |
+
|
| 96 |
+
## 🏗️ Model Structure
|
| 97 |
+
|
| 98 |
+
```
|
| 99 |
+
penny-v2/
|
| 100 |
+
├── app/ # Core application logic
|
| 101 |
+
│ ├── orchestrator.py # Central coordination engine ⭐
|
| 102 |
+
│ ├── model_loader.py # ML model management
|
| 103 |
+
│ ├── intents.py # Intent classification
|
| 104 |
+
│ ├── tool_agent.py # Civic data & events agent
|
| 105 |
+
│ ├── weather_agent.py # Weather & recommendations
|
| 106 |
+
│ └── utils/ # Logging, location, safety utilities
|
| 107 |
+
├── models/ # ML model services
|
| 108 |
+
│ ├── translation/ # Multi-language translation
|
| 109 |
+
│ ├── sentiment/ # Sentiment analysis
|
| 110 |
+
│ ├── bias/ # Bias detection
|
| 111 |
+
│ ├── gemma/ # Core LLM
|
| 112 |
+
│ └── layoutlm/ # Document understanding
|
| 113 |
+
├── data/ # Civic resources & training data
|
| 114 |
+
│ ├── civic_pdfs/ # Local government documents
|
| 115 |
+
│ ├── events/ # Community events data
|
| 116 |
+
│ ├── resources/ # Civic resource database
|
| 117 |
+
│ └── embeddings/ # Pre-computed embeddings
|
| 118 |
+
├── handler.py # Hugging Face inference handler
|
| 119 |
+
├── model_config.json # Model configuration
|
| 120 |
+
└── requirements.txt # Python dependencies
|
| 121 |
+
```
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## 🔧 Configuration
|
| 126 |
+
|
| 127 |
+
### Model Parameters
|
| 128 |
+
|
| 129 |
+
The orchestrator supports the following input parameters:
|
| 130 |
+
|
| 131 |
+
| Parameter | Type | Description | Required | Default |
|
| 132 |
+
|-----------|------|-------------|----------|---------|
|
| 133 |
+
| `inputs` | string | User's message/query | Yes | - |
|
| 134 |
+
| `tenant_id` | string | City/region identifier | No | `default` |
|
| 135 |
+
| `user_id` | string | User identifier for tracking | No | `anonymous` |
|
| 136 |
+
| `session_id` | string | Conversation session ID | No | Auto-generated |
|
| 137 |
+
| `language` | string | Preferred response language | No | `en` |
|
| 138 |
+
|
| 139 |
+
### Environment Variables
|
| 140 |
+
|
| 141 |
+
For self-hosted deployments, configure:
|
| 142 |
+
|
| 143 |
+
| Variable | Description | Required |
|
| 144 |
+
|----------|-------------|----------|
|
| 145 |
+
| `AZURE_MAPS_KEY` | Azure Maps API key (weather) | Recommended |
|
| 146 |
+
| `LOG_LEVEL` | Logging level (`INFO`, `DEBUG`) | No |
|
| 147 |
+
| `TENANT_ID` | Default tenant/city | No |
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## 🎯 Use Cases
|
| 152 |
+
|
| 153 |
+
### Civic Information Queries
|
| 154 |
+
```python
|
| 155 |
+
query({"inputs": "How do I register to vote in Norfolk?"})
|
| 156 |
+
query({"inputs": "When is the next city council meeting?"})
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
### Community Events
|
| 160 |
+
```python
|
| 161 |
+
query({"inputs": "What events are happening this weekend?"})
|
| 162 |
+
query({"inputs": "Are there any family-friendly activities nearby?"})
|
| 163 |
+
```
|
| 164 |
+
|
| 165 |
+
### Weather & Recommendations
|
| 166 |
+
```python
|
| 167 |
+
query({"inputs": "What's the weather like today?"})
|
| 168 |
+
query({"inputs": "Should I bring an umbrella tomorrow?"})
|
| 169 |
+
```
|
| 170 |
+
|
| 171 |
+
### Multi-language Support
|
| 172 |
+
```python
|
| 173 |
+
query({
|
| 174 |
+
"inputs": "¿Cómo registro para votar?",
|
| 175 |
+
"language": "es"
|
| 176 |
+
})
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
---
|
| 180 |
+
|
| 181 |
+
## 🔌 Integration Guide
|
| 182 |
+
|
| 183 |
+
### Backend Integration (Azure)
|
| 184 |
+
|
| 185 |
+
PENNY is designed to work seamlessly with Azure backend services:
|
| 186 |
+
|
| 187 |
+
```python
|
| 188 |
+
# Azure Function integration example
|
| 189 |
+
import azure.functions as func
|
| 190 |
+
from huggingface_hub import InferenceClient
|
| 191 |
+
|
| 192 |
+
def main(req: func.HttpRequest) -> func.HttpResponse:
|
| 193 |
+
client = InferenceClient(model="your-username/penny-v2")
|
| 194 |
+
|
| 195 |
+
user_message = req.params.get('message')
|
| 196 |
+
tenant = req.params.get('tenant_id', 'default')
|
| 197 |
+
|
| 198 |
+
response = client.post(json={
|
| 199 |
+
"inputs": user_message,
|
| 200 |
+
"tenant_id": tenant
|
| 201 |
+
})
|
| 202 |
+
|
| 203 |
+
return func.HttpResponse(
|
| 204 |
+
response.json(),
|
| 205 |
+
mimetype="application/json"
|
| 206 |
+
)
|
| 207 |
+
```
|
| 208 |
+
|
| 209 |
+
### Frontend Integration (Lovable)
|
| 210 |
+
|
| 211 |
+
Connect to PENNY from your Lovable frontend:
|
| 212 |
+
|
| 213 |
+
```javascript
|
| 214 |
+
// Lovable component example
|
| 215 |
+
async function askPenny(message, tenantId) {
|
| 216 |
+
const response = await fetch(
|
| 217 |
+
'https://api-inference.huggingface.co/models/your-username/penny-v2',
|
| 218 |
+
{
|
| 219 |
+
headers: {
|
| 220 |
+
'Authorization': `Bearer ${HF_TOKEN}`,
|
| 221 |
+
'Content-Type': 'application/json'
|
| 222 |
+
},
|
| 223 |
+
method: 'POST',
|
| 224 |
+
body: JSON.stringify({
|
| 225 |
+
inputs: message,
|
| 226 |
+
tenant_id: tenantId
|
| 227 |
+
})
|
| 228 |
+
}
|
| 229 |
+
);
|
| 230 |
+
|
| 231 |
+
return await response.json();
|
| 232 |
+
}
|
| 233 |
+
```
|
| 234 |
+
|
| 235 |
+
---
|
| 236 |
+
|
| 237 |
+
## 📊 Model Performance
|
| 238 |
+
|
| 239 |
+
- **Average Response Time**: 200-400ms
|
| 240 |
+
- **Intent Classification Accuracy**: 94%
|
| 241 |
+
- **Multi-language Support**: 50+ languages
|
| 242 |
+
- **Concurrent Requests**: Scales with Hugging Face Pro tier
|
| 243 |
+
- **Uptime**: 99.9% (via Hugging Face infrastructure)
|
| 244 |
+
|
| 245 |
+
---
|
| 246 |
+
|
| 247 |
+
## 🛡️ Safety & Privacy
|
| 248 |
+
|
| 249 |
+
- **PII Protection**: All logs sanitized before storage
|
| 250 |
+
- **Content Moderation**: Built-in bias and safety detection
|
| 251 |
+
- **Bias Scoring**: Real-time fairness evaluation
|
| 252 |
+
- **Privacy-First**: No user data stored by the model
|
| 253 |
+
- **Compliance**: Designed for government/public sector use
|
| 254 |
+
|
| 255 |
+
---
|
| 256 |
+
|
| 257 |
+
## 🧪 Testing & Validation
|
| 258 |
+
|
| 259 |
+
### Test the Model
|
| 260 |
+
|
| 261 |
+
```python
|
| 262 |
+
# Basic functionality test
|
| 263 |
+
test_queries = [
|
| 264 |
+
"What's the weather today?",
|
| 265 |
+
"How do I pay my water bill?",
|
| 266 |
+
"Are there any events this weekend?",
|
| 267 |
+
"Translate: Hello, how are you? (to Spanish)"
|
| 268 |
+
]
|
| 269 |
+
|
| 270 |
+
for query in test_queries:
|
| 271 |
+
response = client.post(json={"inputs": query})
|
| 272 |
+
print(f"Query: {query}")
|
| 273 |
+
print(f"Response: {response}\n")
|
| 274 |
+
```
|
| 275 |
+
|
| 276 |
+
---
|
| 277 |
+
|
| 278 |
+
## 📦 Dependencies
|
| 279 |
+
|
| 280 |
+
Core dependencies (see `requirements.txt` for full list):
|
| 281 |
+
- `transformers>=4.30.0`
|
| 282 |
+
- `torch>=2.0.0`
|
| 283 |
+
- `fastapi>=0.100.0`
|
| 284 |
+
- `pydantic>=2.0.0`
|
| 285 |
+
- `azure-ai-ml>=1.8.0`
|
| 286 |
+
- `sentence-transformers>=2.2.0`
|
| 287 |
+
|
| 288 |
+
---
|
| 289 |
+
|
| 290 |
+
## 🤝 Contributing
|
| 291 |
+
|
| 292 |
+
We welcome contributions! Areas for improvement:
|
| 293 |
+
- New civic data sources
|
| 294 |
+
- Additional language support
|
| 295 |
+
- Enhanced intent classification
|
| 296 |
+
- Performance optimizations
|
| 297 |
+
|
| 298 |
+
---
|
| 299 |
+
|
| 300 |
+
## 🗺️ Roadmap
|
| 301 |
+
|
| 302 |
+
- [ ] Voice interface integration
|
| 303 |
+
- [ ] Advanced sentiment analysis
|
| 304 |
+
- [ ] Predictive civic engagement insights
|
| 305 |
+
- [ ] Mobile app SDK
|
| 306 |
+
- [ ] Real-time event streaming
|
| 307 |
+
|
| 308 |
+
---
|
| 309 |
+
|
| 310 |
+
## 📝 Citation
|
| 311 |
+
|
| 312 |
+
If you use PENNY in your research or application, please cite:
|
| 313 |
+
|
| 314 |
+
```bibtex
|
| 315 |
+
@software{penny_civic_ai,
|
| 316 |
+
title={PENNY: Personal Civic Engagement Nurturing Network System},
|
| 317 |
+
author={Your Name/Organization},
|
| 318 |
+
year={2025},
|
| 319 |
+
url={https://huggingface.co/pythonprincessssss/penny-v2}
|
| 320 |
+
}
|
| 321 |
+
```
|
| 322 |
+
|
| 323 |
+
---
|
| 324 |
+
|
| 325 |
+
## 📞 Support
|
| 326 |
+
|
| 327 |
+
- **Issues**: [GitHub Issues](https://github.com/CyberShawties-LLC/penny-v2/issues)
|
| 328 |
+
- **Hugging Face Discussions**: Use the Community tab
|
| 329 |
+
- **Email**:
|
| 330 |
+
|
| 331 |
+
---
|
| 332 |
+
|
| 333 |
+
## 📄 License
|
| 334 |
+
|
| 335 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 336 |
+
|
| 337 |
+
---
|
| 338 |
+
|
| 339 |
+
**Made with ❤️ for civic engagement**
|
| 340 |
+
|
| 341 |
+
*Empowering communities through accessible AI assistance*
|
atlanta_ga.json
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "Atlanta",
|
| 3 |
+
"state": "Georgia",
|
| 4 |
+
"tenant_id": "atlanta_ga",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "state_posts_only",
|
| 7 |
+
"csb_authority": "state",
|
| 8 |
+
"events_source": "city_official_site",
|
| 9 |
+
"transit_source": "city",
|
| 10 |
+
"trash_source": "city",
|
| 11 |
+
"public_safety_source": "city",
|
| 12 |
+
"warming_center_authority": "city",
|
| 13 |
+
"sexual_health_authority": "state"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"city_homepage": "https://www.atlantaga.gov/",
|
| 17 |
+
"events_calendar": "https://www.atlantaga.gov/i-want-to-/advanced-components/event-list-view",
|
| 18 |
+
"public_works": "https://www.atlantaga.gov/government/departments/public-works/office-of-solid-waste-services",
|
| 19 |
+
"transit": "https://itsmarta.com/",
|
| 20 |
+
"libraries": "https://www.fulcolibrary.org/",
|
| 21 |
+
"parks_and_recreation": "https://www.atlantaga.gov/government/departments/parks-recreation",
|
| 22 |
+
"warming_centers": "https://www.atlantaga.gov/Home/Components/News/News/15568/672",
|
| 23 |
+
"emergency_management": "https://www.fultoncountyga.gov/inside-fulton-county/fulton-county-departments/atlanta-fulton-emergency-management-agency"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "state",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "Georgia Crisis and Access Line (GCAL)",
|
| 32 |
+
"link": "https://dbhdd.georgia.gov/be-dbhdd",
|
| 33 |
+
"phone": "800-715-4225",
|
| 34 |
+
"notes": "24/7 statewide line for mental health, substance use, and developmental disability crises. Penny should gently steer people here if they mention a crisis."
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"name": "Find a Community Service Board (CSB)",
|
| 38 |
+
"link": "https://dbhdd.georgia.gov/locations/community-service-board",
|
| 39 |
+
"phone": "",
|
| 40 |
+
"notes": "State-managed directory of Community Service Boards across Georgia. Use this instead of naming local CSBs manually."
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"sexual_health": {
|
| 45 |
+
"allowed": true,
|
| 46 |
+
"authority": "state",
|
| 47 |
+
"resources": [
|
| 48 |
+
{
|
| 49 |
+
"name": "Georgia Department of Public Health – STD Program",
|
| 50 |
+
"link": "https://dph.georgia.gov/STDs",
|
| 51 |
+
"phone": "404-657-2700",
|
| 52 |
+
"notes": "State program that supports STD testing and treatment across all 159 Georgia counties."
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"name": "Fulton County Board of Health – Sexual Health Clinics",
|
| 56 |
+
"link": "https://fultoncountyboh.com/services/adult-health/sexual-health/",
|
| 57 |
+
"phone": "",
|
| 58 |
+
"notes": "County-run sexual health clinics serving Atlanta/Fulton residents with STI testing and treatment."
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"name": "AID Atlanta – HIV & STI Testing",
|
| 62 |
+
"link": "https://www.aidatlanta.org/testing/",
|
| 63 |
+
"phone": "",
|
| 64 |
+
"notes": "Nonprofit HIV/STI testing and support. Penny can mention this as an additional community resource."
|
| 65 |
+
}
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"warming_and_cooling_centers": {
|
| 69 |
+
"authority": "city",
|
| 70 |
+
"resources": [
|
| 71 |
+
{
|
| 72 |
+
"name": "Gateway Center – Warming Center",
|
| 73 |
+
"address": "275 Pryor St SW, Atlanta, GA 30303",
|
| 74 |
+
"season": "winter",
|
| 75 |
+
"link": "https://partnersforhome.org/atlwarmup-resources/",
|
| 76 |
+
"notes": "Frequently used as a winter warming center for people experiencing homelessness. Penny should remind users to check current activation status or call ahead."
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"name": "Central Park Recreation Center",
|
| 80 |
+
"address": "400 Merritts Ave NE, Atlanta, GA 30308",
|
| 81 |
+
"season": "winter",
|
| 82 |
+
"link": "https://www.atlantaga.gov/Home/Components/News/News/15568/672",
|
| 83 |
+
"notes": "City recreation center that opens as a temporary warming center during cold-weather activations."
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"name": "Old Adamsville Recreation Center",
|
| 87 |
+
"address": "3404 Delmar Ln NW, Atlanta, GA 30331",
|
| 88 |
+
"season": "winter",
|
| 89 |
+
"link": "https://www.atlantaga.gov/Home/Components/News/News/15568/672",
|
| 90 |
+
"notes": "Another recreation center the City uses as a temporary warming center when severe cold is forecast."
|
| 91 |
+
}
|
| 92 |
+
]
|
| 93 |
+
},
|
| 94 |
+
"trash_and_recycling": {
|
| 95 |
+
"authority": "city",
|
| 96 |
+
"pickup_days": "Varies by address. Residents should use the Solid Waste Services Collection Tool or ATL311 app to see their specific schedule.",
|
| 97 |
+
"holiday_schedule_link": "https://www.atlantaga.gov/government/departments/public-works/solid-waste-services-collection-tool"
|
| 98 |
+
},
|
| 99 |
+
"transit": {
|
| 100 |
+
"authority": "city",
|
| 101 |
+
"provider": "MARTA (Metropolitan Atlanta Rapid Transit Authority)",
|
| 102 |
+
"routes_link": "https://itsmarta.com/",
|
| 103 |
+
"planner_link": "https://discoveratlanta.com/explore/transportation/marta-guide/"
|
| 104 |
+
},
|
| 105 |
+
"emergency": {
|
| 106 |
+
"authority": "county",
|
| 107 |
+
"alerts_link": "https://www.atlantaga.gov/government/mayor-s-office/executive-offices/office-of-emergency-preparedness/be-ready",
|
| 108 |
+
"non_emergency_phone": "404-546-0311",
|
| 109 |
+
"emergency_management_link": "https://www.fultoncountyga.gov/inside-fulton-county/fulton-county-departments/atlanta-fulton-emergency-management-agency"
|
| 110 |
+
},
|
| 111 |
+
"libraries": {
|
| 112 |
+
"authority": "county",
|
| 113 |
+
"resources": [
|
| 114 |
+
{
|
| 115 |
+
"branch": "Fulton County Library System – Central Library",
|
| 116 |
+
"address": "1 Margaret Mitchell Sq, Atlanta, GA 30303",
|
| 117 |
+
"link": "https://www.fulcolibrary.org/",
|
| 118 |
+
"notes": "Main hub of the public library system serving Atlanta and Fulton County. Great place for computer access, study space, and community programs."
|
| 119 |
+
}
|
| 120 |
+
]
|
| 121 |
+
},
|
| 122 |
+
"community_centers": {
|
| 123 |
+
"authority": "city",
|
| 124 |
+
"resources": [
|
| 125 |
+
{
|
| 126 |
+
"name": "Central Park Recreation Center",
|
| 127 |
+
"address": "400 Merritts Ave NE, Atlanta, GA 30308",
|
| 128 |
+
"link": "https://www.atlantaga.gov/",
|
| 129 |
+
"notes": "Recreation center with community programming; also used as a warming center during extreme cold."
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"name": "Old Adamsville Recreation Center",
|
| 133 |
+
"address": "3404 Delmar Ln NW, Atlanta, GA 30331",
|
| 134 |
+
"link": "https://www.atlantaga.gov/",
|
| 135 |
+
"notes": "Neighborhood recreation center that the City activates as a warming center when needed."
|
| 136 |
+
}
|
| 137 |
+
]
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
}
|
bias_utils.py
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/bias/bias_utils.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Bias Detection Utilities for Penny
|
| 5 |
+
|
| 6 |
+
Provides zero-shot classification for detecting potential bias in text responses.
|
| 7 |
+
Uses a classification model to identify neutral content vs. biased language patterns.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import asyncio
|
| 11 |
+
from typing import Dict, Any, Optional, List
|
| 12 |
+
import logging
|
| 13 |
+
|
| 14 |
+
# --- Logging Setup ---
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
# --- Model Loader Import ---
|
| 18 |
+
try:
|
| 19 |
+
from app.model_loader import load_model_pipeline
|
| 20 |
+
MODEL_LOADER_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
MODEL_LOADER_AVAILABLE = False
|
| 23 |
+
logger.warning("Could not import load_model_pipeline. Bias detection will operate in fallback mode.")
|
| 24 |
+
|
| 25 |
+
# Global variable to store the loaded pipeline for re-use
|
| 26 |
+
BIAS_PIPELINE: Optional[Any] = None
|
| 27 |
+
AGENT_NAME = "penny-bias-checker"
|
| 28 |
+
|
| 29 |
+
# Define the labels for Zero-Shot Classification.
|
| 30 |
+
CANDIDATE_LABELS = [
|
| 31 |
+
"neutral and objective",
|
| 32 |
+
"contains political bias",
|
| 33 |
+
"uses emotional language",
|
| 34 |
+
"is factually biased",
|
| 35 |
+
]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _initialize_bias_pipeline() -> bool:
|
| 39 |
+
"""
|
| 40 |
+
Initializes the bias detection pipeline only once.
|
| 41 |
+
|
| 42 |
+
Returns:
|
| 43 |
+
bool: True if pipeline loaded successfully, False otherwise
|
| 44 |
+
"""
|
| 45 |
+
global BIAS_PIPELINE
|
| 46 |
+
|
| 47 |
+
if BIAS_PIPELINE is not None:
|
| 48 |
+
return True
|
| 49 |
+
|
| 50 |
+
if not MODEL_LOADER_AVAILABLE:
|
| 51 |
+
logger.warning(f"{AGENT_NAME}: Model loader not available, pipeline initialization skipped")
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
logger.info(f"Loading {AGENT_NAME}...")
|
| 56 |
+
BIAS_PIPELINE = load_model_pipeline(AGENT_NAME)
|
| 57 |
+
logger.info(f"Model {AGENT_NAME} loaded successfully")
|
| 58 |
+
return True
|
| 59 |
+
except Exception as e:
|
| 60 |
+
logger.error(f"Failed to load {AGENT_NAME}: {e}", exc_info=True)
|
| 61 |
+
BIAS_PIPELINE = None
|
| 62 |
+
return False
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# Attempt to initialize pipeline at module load
|
| 66 |
+
_initialize_bias_pipeline()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
async def check_bias(text: str) -> Dict[str, Any]:
|
| 70 |
+
"""
|
| 71 |
+
Runs zero-shot classification to check for bias in the input text.
|
| 72 |
+
|
| 73 |
+
Uses a pre-loaded classification model to analyze text for:
|
| 74 |
+
- Neutral and objective language
|
| 75 |
+
- Political bias
|
| 76 |
+
- Emotional language
|
| 77 |
+
- Factual bias
|
| 78 |
+
|
| 79 |
+
Args:
|
| 80 |
+
text: The string of text to analyze for bias
|
| 81 |
+
|
| 82 |
+
Returns:
|
| 83 |
+
Dictionary containing:
|
| 84 |
+
- analysis: List of labels with confidence scores, sorted by score
|
| 85 |
+
- available: Whether the bias detection service is operational
|
| 86 |
+
- message: Optional error or status message
|
| 87 |
+
|
| 88 |
+
Example:
|
| 89 |
+
>>> result = await check_bias("This is neutral text.")
|
| 90 |
+
>>> result['analysis'][0]['label']
|
| 91 |
+
'neutral and objective'
|
| 92 |
+
"""
|
| 93 |
+
global BIAS_PIPELINE
|
| 94 |
+
|
| 95 |
+
# Input validation
|
| 96 |
+
if not text or not isinstance(text, str):
|
| 97 |
+
logger.warning("check_bias called with invalid text input")
|
| 98 |
+
return {
|
| 99 |
+
"analysis": [],
|
| 100 |
+
"available": False,
|
| 101 |
+
"message": "Invalid input: text must be a non-empty string"
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
# Strip text to avoid processing whitespace
|
| 105 |
+
text = text.strip()
|
| 106 |
+
if not text:
|
| 107 |
+
logger.warning("check_bias called with empty text after stripping")
|
| 108 |
+
return {
|
| 109 |
+
"analysis": [],
|
| 110 |
+
"available": False,
|
| 111 |
+
"message": "Invalid input: text is empty"
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
# Ensure pipeline is initialized
|
| 115 |
+
if BIAS_PIPELINE is None:
|
| 116 |
+
logger.warning(f"{AGENT_NAME} pipeline not available, attempting re-initialization")
|
| 117 |
+
if not _initialize_bias_pipeline():
|
| 118 |
+
return {
|
| 119 |
+
"analysis": [],
|
| 120 |
+
"available": False,
|
| 121 |
+
"message": "Bias detection service is currently unavailable"
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
try:
|
| 125 |
+
loop = asyncio.get_event_loop()
|
| 126 |
+
|
| 127 |
+
# Run inference in thread pool to avoid blocking
|
| 128 |
+
results = await loop.run_in_executor(
|
| 129 |
+
None,
|
| 130 |
+
lambda: BIAS_PIPELINE(
|
| 131 |
+
text,
|
| 132 |
+
CANDIDATE_LABELS,
|
| 133 |
+
multi_label=True
|
| 134 |
+
)
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
# Validate results structure
|
| 138 |
+
if not results or not isinstance(results, dict):
|
| 139 |
+
logger.error(f"Bias detection returned unexpected format: {type(results)}")
|
| 140 |
+
return {
|
| 141 |
+
"analysis": [],
|
| 142 |
+
"available": True,
|
| 143 |
+
"message": "Inference returned unexpected format"
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
labels = results.get('labels', [])
|
| 147 |
+
scores = results.get('scores', [])
|
| 148 |
+
|
| 149 |
+
if not labels or not scores:
|
| 150 |
+
logger.warning("Bias detection returned empty labels or scores")
|
| 151 |
+
return {
|
| 152 |
+
"analysis": [],
|
| 153 |
+
"available": True,
|
| 154 |
+
"message": "No classification results returned"
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
# Build analysis results
|
| 158 |
+
analysis = [
|
| 159 |
+
{"label": label, "score": float(score)}
|
| 160 |
+
for label, score in zip(labels, scores)
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
# Sort by confidence score (descending)
|
| 164 |
+
analysis.sort(key=lambda x: x['score'], reverse=True)
|
| 165 |
+
|
| 166 |
+
logger.debug(f"Bias check completed successfully, top result: {analysis[0]['label']} ({analysis[0]['score']:.3f})")
|
| 167 |
+
|
| 168 |
+
return {
|
| 169 |
+
"analysis": analysis,
|
| 170 |
+
"available": True
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
except asyncio.CancelledError:
|
| 174 |
+
logger.warning("Bias detection task was cancelled")
|
| 175 |
+
raise
|
| 176 |
+
|
| 177 |
+
except Exception as e:
|
| 178 |
+
logger.error(f"Error during bias detection inference: {e}", exc_info=True)
|
| 179 |
+
return {
|
| 180 |
+
"analysis": [],
|
| 181 |
+
"available": False,
|
| 182 |
+
"message": f"Bias detection error: {str(e)}"
|
| 183 |
+
}
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def get_bias_pipeline_status() -> Dict[str, Any]:
|
| 187 |
+
"""
|
| 188 |
+
Returns the current status of the bias detection pipeline.
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
Dictionary with pipeline availability status
|
| 192 |
+
"""
|
| 193 |
+
return {
|
| 194 |
+
"agent_name": AGENT_NAME,
|
| 195 |
+
"available": BIAS_PIPELINE is not None,
|
| 196 |
+
"model_loader_available": MODEL_LOADER_AVAILABLE
|
| 197 |
+
}
|
birmingham_al.json
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "Birmingham",
|
| 3 |
+
"state": "Alabama",
|
| 4 |
+
"tenant_id": "birmingham_al",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "state_posts_only",
|
| 7 |
+
"csb_authority": "state",
|
| 8 |
+
"events_source": "city_official_site",
|
| 9 |
+
"transit_source": "city",
|
| 10 |
+
"trash_source": "city",
|
| 11 |
+
"public_safety_source": "city",
|
| 12 |
+
"warming_center_authority": "city",
|
| 13 |
+
"sexual_health_authority": "state"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"city_homepage": "https://www.birminghamal.gov/",
|
| 17 |
+
"events_calendar": "https://www.birminghamal.gov/events",
|
| 18 |
+
"public_works": "https://www.birminghamal.gov/public-works/",
|
| 19 |
+
"transit": "https://www.maxtransit.org/",
|
| 20 |
+
"libraries": "https://www.bplonline.org/",
|
| 21 |
+
"parks_and_recreation": "https://www.birminghamal.gov/parks-and-recreation",
|
| 22 |
+
"warming_centers": "https://www.birminghamal.gov/warming-stations",
|
| 23 |
+
"emergency_management": "https://www.jccal.org/Sites/emergency/index.asp"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "state",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "Alabama Department of Mental Health",
|
| 32 |
+
"link": "https://mh.alabama.gov/",
|
| 33 |
+
"phone": "800-367-0955",
|
| 34 |
+
"notes": "Handles statewide behavioral health services. Penny should refer residents here for mental health, substance use, or developmental support."
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"name": "Crisis Center Birmingham",
|
| 38 |
+
"link": "https://www.crisiscenterbham.org/",
|
| 39 |
+
"phone": "205-323-7777",
|
| 40 |
+
"notes": "Local nonprofit partner offering 24/7 crisis support and counseling services."
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"sexual_health": {
|
| 45 |
+
"allowed": true,
|
| 46 |
+
"authority": "state",
|
| 47 |
+
"resources": [
|
| 48 |
+
{
|
| 49 |
+
"name": "Alabama Department of Public Health – STD Program",
|
| 50 |
+
"link": "https://www.alabamapublichealth.gov/std/index.html",
|
| 51 |
+
"phone": "334-206-5350",
|
| 52 |
+
"notes": "Manages sexual health and testing across Alabama. Penny should default to this for all STD-related requests."
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"name": "Cooper Green Mercy Health Services – Sexual Health",
|
| 56 |
+
"link": "https://coopergreen.org/services/sexual-health/",
|
| 57 |
+
"phone": "",
|
| 58 |
+
"notes": "Jefferson County health resource for STD testing and treatment."
|
| 59 |
+
}
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"warming_and_cooling_centers": {
|
| 63 |
+
"allowed": true,
|
| 64 |
+
"authority": "city",
|
| 65 |
+
"resources": [
|
| 66 |
+
{
|
| 67 |
+
"name": "Birmingham CrossPlex Warming Station",
|
| 68 |
+
"address": "2337 Bessemer Rd, Birmingham, AL 35208",
|
| 69 |
+
"season": "winter",
|
| 70 |
+
"link": "https://www.birminghamal.gov/warming-stations",
|
| 71 |
+
"notes": "Activated by the city during freezing temperatures. Penny should confirm status before recommending."
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"name": "Boutwell Auditorium Warming Site",
|
| 75 |
+
"address": "1930 Reverend Abraham Woods Jr Blvd, Birmingham, AL 35203",
|
| 76 |
+
"season": "winter",
|
| 77 |
+
"link": "https://www.birminghamal.gov/warming-stations",
|
| 78 |
+
"notes": "Frequently used warming center downtown. Penny can suggest this for accessible shelter during winter nights."
|
| 79 |
+
}
|
| 80 |
+
]
|
| 81 |
+
},
|
| 82 |
+
"trash_and_recycling": {
|
| 83 |
+
"authority": "city",
|
| 84 |
+
"pickup_days": "Pickup varies by address. Residents can call 311 or check city notices for their zone.",
|
| 85 |
+
"holiday_schedule_link": "https://www.birminghamal.gov/public-works/"
|
| 86 |
+
},
|
| 87 |
+
"transit": {
|
| 88 |
+
"authority": "city",
|
| 89 |
+
"provider": "MAX Transit (Birmingham-Jefferson County Transit Authority)",
|
| 90 |
+
"routes_link": "https://www.maxtransit.org/routes/",
|
| 91 |
+
"planner_link": "https://www.maxtransit.org/"
|
| 92 |
+
},
|
| 93 |
+
"emergency": {
|
| 94 |
+
"authority": "county",
|
| 95 |
+
"alerts_link": "https://www.birminghamal.gov/police/",
|
| 96 |
+
"non_emergency_phone": "205-328-9311",
|
| 97 |
+
"emergency_management_link": "https://www.jccal.org/Sites/emergency/index.asp"
|
| 98 |
+
},
|
| 99 |
+
"libraries": {
|
| 100 |
+
"authority": "city",
|
| 101 |
+
"resources": [
|
| 102 |
+
{
|
| 103 |
+
"branch": "Birmingham Public Library – Central",
|
| 104 |
+
"address": "2100 Park Pl, Birmingham, AL 35203",
|
| 105 |
+
"link": "https://www.bplonline.org/",
|
| 106 |
+
"notes": "Main hub of Birmingham's public library system. Offers reading programs, tech help, and job resources."
|
| 107 |
+
}
|
| 108 |
+
]
|
| 109 |
+
},
|
| 110 |
+
"community_centers": {
|
| 111 |
+
"authority": "city",
|
| 112 |
+
"resources": [
|
| 113 |
+
{
|
| 114 |
+
"name": "Wylam Recreation Center",
|
| 115 |
+
"address": "5111 8th Ave Wylam, Birmingham, AL 35224",
|
| 116 |
+
"link": "https://www.birminghamal.gov/",
|
| 117 |
+
"notes": "Multi-use recreation center offering youth and senior programs, sports, and emergency shelter access."
|
| 118 |
+
}
|
| 119 |
+
]
|
| 120 |
+
}
|
| 121 |
+
}
|
| 122 |
+
}
|
chesterfield_va.json
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "Chesterfield County",
|
| 3 |
+
"state": "Virginia",
|
| 4 |
+
"tenant_id": "chesterfield_va",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "csb_supported",
|
| 7 |
+
"csb_authority": "local",
|
| 8 |
+
"events_source": "county_official_site",
|
| 9 |
+
"transit_source": "regional_partner",
|
| 10 |
+
"trash_source": "county",
|
| 11 |
+
"public_safety_source": "county",
|
| 12 |
+
"warming_center_authority": "county",
|
| 13 |
+
"sexual_health_authority": "state"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"county_homepage": "https://www.chesterfield.gov/",
|
| 17 |
+
"events_calendar": "https://www.chesterfield.gov/5932/Things-to-Do",
|
| 18 |
+
"public_works": "https://www.chesterfield.gov/178/Public-Utilities",
|
| 19 |
+
"transit": "https://www.ridegrtc.com/",
|
| 20 |
+
"libraries": "https://library.chesterfield.gov/",
|
| 21 |
+
"parks_and_recreation": "https://www.chesterfield.gov/148/Parks-Recreation",
|
| 22 |
+
"warming_centers": "https://www.chesterfield.gov/1943/Warming-Centers",
|
| 23 |
+
"emergency_management": "https://www.chesterfield.gov/153/Emergency-Management"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "local",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "Chesterfield Community Services Board (CSB)",
|
| 32 |
+
"link": "https://www.chesterfield.gov/159/Mental-Health-Support-Services",
|
| 33 |
+
"phone": "804-748-1227",
|
| 34 |
+
"notes": "Need mental health, addiction support, or developmental disability help? The CSB provides confidential services for Chesterfield residents."
|
| 35 |
+
}
|
| 36 |
+
]
|
| 37 |
+
},
|
| 38 |
+
"sexual_health": {
|
| 39 |
+
"allowed": true,
|
| 40 |
+
"authority": "state",
|
| 41 |
+
"resources": [
|
| 42 |
+
{
|
| 43 |
+
"name": "Virginia Department of Health – STD Services",
|
| 44 |
+
"link": "https://www.vdh.virginia.gov/std/",
|
| 45 |
+
"phone": "804-864-8000",
|
| 46 |
+
"notes": "For testing, treatment, and prevention of STDs across Virginia, Penny connects residents here."
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"name": "Chesterfield Health District – Sexual Health",
|
| 50 |
+
"link": "https://www.vdh.virginia.gov/chesterfield/",
|
| 51 |
+
"phone": "804-748-1691",
|
| 52 |
+
"notes": "Local district office offering public health services, including HIV/STD testing. Great nearby option for residents."
|
| 53 |
+
}
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
"warming_and_cooling_centers": {
|
| 57 |
+
"allowed": true,
|
| 58 |
+
"authority": "county",
|
| 59 |
+
"resources": [
|
| 60 |
+
{
|
| 61 |
+
"name": "Chesterfield County Warming Centers",
|
| 62 |
+
"address": "",
|
| 63 |
+
"season": "winter",
|
| 64 |
+
"link": "https://www.chesterfield.gov/1943/Warming-Centers",
|
| 65 |
+
"notes": "During freezing temperatures, the county activates warming sites. Penny should advise users to check the provided link for current activation status and locations."
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
"trash_and_recycling": {
|
| 70 |
+
"authority": "county",
|
| 71 |
+
"pickup_days": "Trash and recycling pickup depends on your address and hauler. You can contact GFL or visit the county site for exact schedules.",
|
| 72 |
+
"holiday_schedule_link": "https://www.chesterfield.gov/172/Trash-Collection"
|
| 73 |
+
},
|
| 74 |
+
"transit": {
|
| 75 |
+
"authority": "regional_partner",
|
| 76 |
+
"provider": "GRTC Transit System",
|
| 77 |
+
"routes_link": "https://ridegrtc.com/routes/",
|
| 78 |
+
"planner_link": "https://ridegrtc.com/"
|
| 79 |
+
},
|
| 80 |
+
"emergency": {
|
| 81 |
+
"authority": "county",
|
| 82 |
+
"alerts_link": "https://www.chesterfield.gov/153/Emergency-Management",
|
| 83 |
+
"non_emergency_phone": "804-748-1251",
|
| 84 |
+
"emergency_management_link": "https://www.chesterfield.gov/153/Emergency-Management"
|
| 85 |
+
},
|
| 86 |
+
"libraries": {
|
| 87 |
+
"authority": "county",
|
| 88 |
+
"resources": [
|
| 89 |
+
{
|
| 90 |
+
"branch": "Chesterfield County Public Library – Central",
|
| 91 |
+
"address": "7051 Lucy Corr Blvd, Chesterfield, VA 23832",
|
| 92 |
+
"link": "https://library.chesterfield.gov/",
|
| 93 |
+
"notes": "Books, free Wi-Fi, job help, and cozy spaces to study or chill — Penny recommends stopping by!"
|
| 94 |
+
}
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
"community_centers": {
|
| 98 |
+
"authority": "county",
|
| 99 |
+
"resources": [
|
| 100 |
+
{
|
| 101 |
+
"name": "Ettrick Recreation Center",
|
| 102 |
+
"address": "20621 Woodpecker Rd, South Chesterfield, VA 23803",
|
| 103 |
+
"link": "https://www.chesterfield.gov/148/Parks-Recreation",
|
| 104 |
+
"notes": "Looking for fun programs, senior activities, or rental space? This friendly center has it all."
|
| 105 |
+
}
|
| 106 |
+
]
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
}
|
el_paso_tx.json
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "El Paso",
|
| 3 |
+
"state": "Texas",
|
| 4 |
+
"tenant_id": "el_paso_tx",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "state_only",
|
| 7 |
+
"csb_authority": "none",
|
| 8 |
+
"events_source": "city_official_site",
|
| 9 |
+
"transit_source": "city",
|
| 10 |
+
"trash_source": "city",
|
| 11 |
+
"public_safety_source": "city",
|
| 12 |
+
"warming_center_authority": "city",
|
| 13 |
+
"sexual_health_authority": "state"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"city_homepage": "https://www.elpasotexas.gov/",
|
| 17 |
+
"events_calendar": "https://events.elpasotexas.gov/",
|
| 18 |
+
"public_health": "https://www.elpasotexas.gov/public-health/",
|
| 19 |
+
"transit": "https://sunmetro.net/",
|
| 20 |
+
"libraries": "https://www.elpasolibrary.org/",
|
| 21 |
+
"parks_and_recreation": "https://www.elpasotexas.gov/parks-and-recreation/",
|
| 22 |
+
"emergency_management": "https://www.elpasotexas.gov/fire-department/emergency-management/",
|
| 23 |
+
"trash": "https://www.elpasotexas.gov/environmental-services/"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "state",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "Texas Health and Human Services (Crisis Hotline)",
|
| 32 |
+
"link": "https://www.hhs.texas.gov/services/mental-health-substance-use",
|
| 33 |
+
"phone": "800-273-8255 (National Suicide Prevention Lifeline)",
|
| 34 |
+
"notes": "Since El Paso does not rely on a local CSB, Penny should prioritize state and national resources for mental health and substance abuse crises."
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"name": "Emergence Health Network (Local Mental Health Authority)",
|
| 38 |
+
"link": "https://www.ehn.org/",
|
| 39 |
+
"phone": "915-884-8884",
|
| 40 |
+
"notes": "The Local Mental Health Authority for El Paso County, providing emergency services and outpatient care."
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"sexual_health": {
|
| 45 |
+
"allowed": true,
|
| 46 |
+
"authority": "state",
|
| 47 |
+
"resources": [
|
| 48 |
+
{
|
| 49 |
+
"name": "City of El Paso Department of Public Health – HIV/STD",
|
| 50 |
+
"link": "https://www.elpasotexas.gov/public-health/sexual-health",
|
| 51 |
+
"phone": "915-212-6600",
|
| 52 |
+
"notes": "Local city department offering confidential testing, treatment, and prevention services."
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"name": "Texas Department of State Health Services (DSHS)",
|
| 56 |
+
"link": "https://www.dshs.texas.gov/hivstd/testing",
|
| 57 |
+
"phone": "",
|
| 58 |
+
"notes": "State authority for broader sexual health policies and resources."
|
| 59 |
+
}
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"warming_and_cooling_centers": {
|
| 63 |
+
"allowed": true,
|
| 64 |
+
"authority": "city",
|
| 65 |
+
"resources": [
|
| 66 |
+
{
|
| 67 |
+
"name": "City of El Paso Recreation Centers",
|
| 68 |
+
"address": "Varies by activation",
|
| 69 |
+
"season": "winter/summer",
|
| 70 |
+
"link": "https://www.elpasotexas.gov/parks-and-recreation/",
|
| 71 |
+
"notes": "City recreation centers are typically used as temporary cooling centers during extreme heat and warming centers during extreme cold. Penny must remind users to check for current activation alerts."
|
| 72 |
+
}
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
"trash_and_recycling": {
|
| 76 |
+
"authority": "city",
|
| 77 |
+
"pickup_days": "Varies by zone. Residents can use the city's Environmental Services website for schedules and holiday changes.",
|
| 78 |
+
"holiday_schedule_link": "https://www.elpasotexas.gov/environmental-services/about-us/news"
|
| 79 |
+
},
|
| 80 |
+
"transit": {
|
| 81 |
+
"authority": "city",
|
| 82 |
+
"provider": "Sun Metro",
|
| 83 |
+
"routes_link": "https://sunmetro.net/routes/maps-schedules/",
|
| 84 |
+
"planner_link": "https://sunmetro.net/"
|
| 85 |
+
},
|
| 86 |
+
"emergency": {
|
| 87 |
+
"authority": "city",
|
| 88 |
+
"alerts_link": "https://www.elpasotexas.gov/fire-department/emergency-management/alerts",
|
| 89 |
+
"non_emergency_phone": "915-832-4400 (Police Department Non Emergency)",
|
| 90 |
+
"emergency_management_link": "https://www.elpasotexas.gov/fire-department/emergency-management/"
|
| 91 |
+
},
|
| 92 |
+
"libraries": {
|
| 93 |
+
"authority": "city",
|
| 94 |
+
"resources": [
|
| 95 |
+
{
|
| 96 |
+
"branch": "El Paso Public Library – Main Branch",
|
| 97 |
+
"address": "501 N Oregon St, El Paso, TX 79901",
|
| 98 |
+
"link": "https://www.elpasolibrary.org/",
|
| 99 |
+
"notes": "The main public library, often a hub for public access internet, job search assistance, and community programs."
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
"community_centers": {
|
| 104 |
+
"authority": "city",
|
| 105 |
+
"resources": [
|
| 106 |
+
{
|
| 107 |
+
"name": "Leo C. Carter Recreation Center",
|
| 108 |
+
"address": "7708 Janway Dr, El Paso, TX 79915",
|
| 109 |
+
"link": "https://www.elpasotexas.gov/parks-and-recreation/recreation-centers",
|
| 110 |
+
"notes": "One of many city recreation centers providing programs for youth, adults, and seniors."
|
| 111 |
+
}
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
}
|
embeddings
ADDED
|
File without changes
|
event_weather.py
ADDED
|
@@ -0,0 +1,761 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/event_weather.py
|
| 2 |
+
"""
|
| 3 |
+
🌤️ Penny's Event + Weather Matchmaker
|
| 4 |
+
Helps residents find the perfect community activity based on real-time weather.
|
| 5 |
+
Penny always suggests what's actually enjoyable — not just what exists.
|
| 6 |
+
|
| 7 |
+
Production-ready version with structured logging, performance tracking, and robust error handling.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import time
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from typing import Dict, Any, List, Optional, Tuple
|
| 14 |
+
from datetime import datetime
|
| 15 |
+
from enum import Enum
|
| 16 |
+
|
| 17 |
+
from app.weather_agent import get_weather_for_location
|
| 18 |
+
from app.location_utils import load_city_events
|
| 19 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 20 |
+
|
| 21 |
+
# --- LOGGING SETUP (Structured, Azure-compatible) ---
|
| 22 |
+
import logging
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# --- CONFIGURATION CONSTANTS ---
|
| 27 |
+
class EventWeatherConfig:
|
| 28 |
+
"""Configuration constants for event recommendation system."""
|
| 29 |
+
MAX_FALLBACK_EVENTS = 10
|
| 30 |
+
MAX_RECOMMENDATIONS = 20
|
| 31 |
+
WEATHER_TIMEOUT_SECONDS = 5.0
|
| 32 |
+
SLOW_OPERATION_THRESHOLD_MS = 2000
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# --- PENNY'S WEATHER WISDOM (Personality-Driven Thresholds) ---
|
| 36 |
+
class WeatherThresholds:
|
| 37 |
+
"""
|
| 38 |
+
Penny's practical weather rules for event recommendations.
|
| 39 |
+
These are based on real resident comfort, not just data.
|
| 40 |
+
"""
|
| 41 |
+
WARM_THRESHOLD = 70 # F° - Great for outdoor events
|
| 42 |
+
HOT_THRESHOLD = 85 # F° - Maybe too hot for some activities
|
| 43 |
+
COOL_THRESHOLD = 60 # F° - Bring a jacket
|
| 44 |
+
COLD_THRESHOLD = 40 # F° - Indoor events preferred
|
| 45 |
+
|
| 46 |
+
RAINY_KEYWORDS = ["rain", "shower", "storm", "drizzle", "thunderstorm"]
|
| 47 |
+
SNOWY_KEYWORDS = ["snow", "flurries", "blizzard", "ice"]
|
| 48 |
+
NICE_KEYWORDS = ["clear", "sunny", "fair", "partly cloudy"]
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
class ErrorType(str, Enum):
|
| 52 |
+
"""Structured error types for event weather system."""
|
| 53 |
+
NOT_FOUND = "event_data_not_found"
|
| 54 |
+
PARSE_ERROR = "json_parse_error"
|
| 55 |
+
WEATHER_ERROR = "weather_service_error"
|
| 56 |
+
UNKNOWN = "unknown_error"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class EventWeatherException(Exception):
|
| 60 |
+
"""Base exception for event weather system."""
|
| 61 |
+
def __init__(self, error_type: ErrorType, message: str, original_error: Optional[Exception] = None):
|
| 62 |
+
self.error_type = error_type
|
| 63 |
+
self.message = message
|
| 64 |
+
self.original_error = original_error
|
| 65 |
+
super().__init__(message)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
# --- MAIN RECOMMENDATION FUNCTION ---
|
| 69 |
+
async def get_event_recommendations_with_weather(
|
| 70 |
+
tenant_id: str,
|
| 71 |
+
lat: float,
|
| 72 |
+
lon: float,
|
| 73 |
+
include_all_events: bool = False,
|
| 74 |
+
session_id: Optional[str] = None,
|
| 75 |
+
user_id: Optional[str] = None
|
| 76 |
+
) -> Dict[str, Any]:
|
| 77 |
+
"""
|
| 78 |
+
🌤️ Penny's Event + Weather Intelligence System
|
| 79 |
+
|
| 80 |
+
Combines real-time weather with community events to give residents
|
| 81 |
+
smart, helpful suggestions about what to do today.
|
| 82 |
+
|
| 83 |
+
Args:
|
| 84 |
+
tenant_id: City identifier (e.g., 'atlanta_ga', 'seattle_wa')
|
| 85 |
+
lat: Latitude for weather lookup
|
| 86 |
+
lon: Longitude for weather lookup
|
| 87 |
+
include_all_events: If True, returns all events regardless of weather fit
|
| 88 |
+
session_id: Optional session identifier for logging
|
| 89 |
+
user_id: Optional user identifier for logging
|
| 90 |
+
|
| 91 |
+
Returns:
|
| 92 |
+
Dict containing:
|
| 93 |
+
- weather: Current conditions
|
| 94 |
+
- suggestions: Penny's prioritized recommendations
|
| 95 |
+
- all_events: Optional full event list
|
| 96 |
+
- metadata: Useful context (timestamp, event count, etc.)
|
| 97 |
+
|
| 98 |
+
Raises:
|
| 99 |
+
EventWeatherException: When critical errors occur
|
| 100 |
+
|
| 101 |
+
Example:
|
| 102 |
+
>>> recommendations = await get_event_recommendations_with_weather(
|
| 103 |
+
... tenant_id="norfolk_va",
|
| 104 |
+
... lat=36.8508,
|
| 105 |
+
... lon=-76.2859
|
| 106 |
+
... )
|
| 107 |
+
>>> print(recommendations["suggestions"][0])
|
| 108 |
+
🌟 **Outdoor Concert**at Town Point Park — Perfect outdoor weather! This is the one.
|
| 109 |
+
"""
|
| 110 |
+
start_time = time.time()
|
| 111 |
+
|
| 112 |
+
# Sanitize inputs for logging
|
| 113 |
+
safe_tenant_id = sanitize_for_logging(tenant_id)
|
| 114 |
+
safe_coords = f"({lat:.4f}, {lon:.4f})"
|
| 115 |
+
|
| 116 |
+
logger.info(
|
| 117 |
+
f"🌤️ Event weather recommendation request: tenant={safe_tenant_id}, coords={safe_coords}"
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
# --- STEP 1: Load City Events (Standardized) ---
|
| 122 |
+
events, event_load_time = await _load_events_with_timing(tenant_id)
|
| 123 |
+
|
| 124 |
+
if not events:
|
| 125 |
+
response = _create_no_events_response(tenant_id)
|
| 126 |
+
_log_operation(
|
| 127 |
+
operation="event_weather_recommendations",
|
| 128 |
+
tenant_id=tenant_id,
|
| 129 |
+
session_id=session_id,
|
| 130 |
+
user_id=user_id,
|
| 131 |
+
success=True,
|
| 132 |
+
event_count=0,
|
| 133 |
+
response_time_ms=_calculate_response_time(start_time),
|
| 134 |
+
fallback_used=False,
|
| 135 |
+
weather_available=False
|
| 136 |
+
)
|
| 137 |
+
return response
|
| 138 |
+
|
| 139 |
+
logger.info(f"✅ Loaded {len(events)} events for {safe_tenant_id} in {event_load_time:.2f}s")
|
| 140 |
+
|
| 141 |
+
# --- STEP 2: Get Live Weather Data ---
|
| 142 |
+
weather, weather_available = await _get_weather_with_fallback(lat, lon)
|
| 143 |
+
|
| 144 |
+
# --- STEP 3: Generate Recommendations ---
|
| 145 |
+
if weather_available:
|
| 146 |
+
response = await _generate_weather_optimized_recommendations(
|
| 147 |
+
tenant_id=tenant_id,
|
| 148 |
+
events=events,
|
| 149 |
+
weather=weather,
|
| 150 |
+
include_all_events=include_all_events
|
| 151 |
+
)
|
| 152 |
+
else:
|
| 153 |
+
# Graceful degradation: Still show events without weather optimization
|
| 154 |
+
response = _create_fallback_response(tenant_id, events)
|
| 155 |
+
|
| 156 |
+
# --- STEP 4: Calculate Performance Metrics ---
|
| 157 |
+
response_time_ms = _calculate_response_time(start_time)
|
| 158 |
+
|
| 159 |
+
# Add performance metadata
|
| 160 |
+
response["performance"] = {
|
| 161 |
+
"response_time_ms": response_time_ms,
|
| 162 |
+
"event_load_time_ms": int(event_load_time * 1000),
|
| 163 |
+
"weather_available": weather_available
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
# Warn if operation was slow
|
| 167 |
+
if response_time_ms > EventWeatherConfig.SLOW_OPERATION_THRESHOLD_MS:
|
| 168 |
+
logger.warning(
|
| 169 |
+
f"⚠️ Slow event weather operation: {response_time_ms}ms for {safe_tenant_id}"
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# --- STEP 5: Log Structured Interaction ---
|
| 173 |
+
_log_operation(
|
| 174 |
+
operation="event_weather_recommendations",
|
| 175 |
+
tenant_id=tenant_id,
|
| 176 |
+
session_id=session_id,
|
| 177 |
+
user_id=user_id,
|
| 178 |
+
success=True,
|
| 179 |
+
event_count=len(events),
|
| 180 |
+
response_time_ms=response_time_ms,
|
| 181 |
+
fallback_used=not weather_available,
|
| 182 |
+
weather_available=weather_available
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
logger.info(
|
| 186 |
+
f"✅ Returning {len(response.get('suggestions', []))} recommendations "
|
| 187 |
+
f"for {safe_tenant_id} in {response_time_ms}ms"
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
return response
|
| 191 |
+
|
| 192 |
+
except EventWeatherException as e:
|
| 193 |
+
# Known error with structured handling
|
| 194 |
+
response_time_ms = _calculate_response_time(start_time)
|
| 195 |
+
|
| 196 |
+
_log_operation(
|
| 197 |
+
operation="event_weather_recommendations",
|
| 198 |
+
tenant_id=tenant_id,
|
| 199 |
+
session_id=session_id,
|
| 200 |
+
user_id=user_id,
|
| 201 |
+
success=False,
|
| 202 |
+
event_count=0,
|
| 203 |
+
response_time_ms=response_time_ms,
|
| 204 |
+
fallback_used=False,
|
| 205 |
+
weather_available=False,
|
| 206 |
+
error_type=e.error_type.value,
|
| 207 |
+
error_message=str(e)
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
return _create_error_response(
|
| 211 |
+
tenant_id=tenant_id,
|
| 212 |
+
error_type=e.error_type.value,
|
| 213 |
+
message=e.message
|
| 214 |
+
)
|
| 215 |
+
|
| 216 |
+
except Exception as e:
|
| 217 |
+
# Unexpected error
|
| 218 |
+
response_time_ms = _calculate_response_time(start_time)
|
| 219 |
+
|
| 220 |
+
logger.error(
|
| 221 |
+
f"❌ Unexpected error in event weather recommendations: {str(e)}",
|
| 222 |
+
exc_info=True
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
_log_operation(
|
| 226 |
+
operation="event_weather_recommendations",
|
| 227 |
+
tenant_id=tenant_id,
|
| 228 |
+
session_id=session_id,
|
| 229 |
+
user_id=user_id,
|
| 230 |
+
success=False,
|
| 231 |
+
event_count=0,
|
| 232 |
+
response_time_ms=response_time_ms,
|
| 233 |
+
fallback_used=False,
|
| 234 |
+
weather_available=False,
|
| 235 |
+
error_type=ErrorType.UNKNOWN.value,
|
| 236 |
+
error_message="Unexpected system error"
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
return _create_error_response(
|
| 240 |
+
tenant_id=tenant_id,
|
| 241 |
+
error_type=ErrorType.UNKNOWN.value,
|
| 242 |
+
message="Something unexpected happened. Please try again in a moment."
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
# --- EVENT LOADING WITH TIMING ---
|
| 247 |
+
async def _load_events_with_timing(tenant_id: str) -> Tuple[List[Dict[str, Any]], float]:
|
| 248 |
+
"""
|
| 249 |
+
Load city events with performance timing.
|
| 250 |
+
|
| 251 |
+
Args:
|
| 252 |
+
tenant_id: City identifier
|
| 253 |
+
|
| 254 |
+
Returns:
|
| 255 |
+
Tuple of (events list, load time in seconds)
|
| 256 |
+
|
| 257 |
+
Raises:
|
| 258 |
+
EventWeatherException: When event loading fails
|
| 259 |
+
"""
|
| 260 |
+
load_start = time.time()
|
| 261 |
+
|
| 262 |
+
try:
|
| 263 |
+
loaded_data = load_city_events(tenant_id)
|
| 264 |
+
events = loaded_data.get("events", [])
|
| 265 |
+
load_time = time.time() - load_start
|
| 266 |
+
|
| 267 |
+
return events, load_time
|
| 268 |
+
|
| 269 |
+
except FileNotFoundError as e:
|
| 270 |
+
logger.error(f"❌ Event data file not found for tenant: {tenant_id}")
|
| 271 |
+
raise EventWeatherException(
|
| 272 |
+
error_type=ErrorType.NOT_FOUND,
|
| 273 |
+
message=f"I don't have event data for {tenant_id} yet. Let me know if you'd like me to add it!",
|
| 274 |
+
original_error=e
|
| 275 |
+
)
|
| 276 |
+
|
| 277 |
+
except json.JSONDecodeError as e:
|
| 278 |
+
logger.error(f"❌ Invalid JSON in event data for {tenant_id}: {e}")
|
| 279 |
+
raise EventWeatherException(
|
| 280 |
+
error_type=ErrorType.PARSE_ERROR,
|
| 281 |
+
message="There's an issue with the event data format. Our team has been notified!",
|
| 282 |
+
original_error=e
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
except Exception as e:
|
| 286 |
+
logger.error(f"❌ Unexpected error loading events: {e}", exc_info=True)
|
| 287 |
+
raise EventWeatherException(
|
| 288 |
+
error_type=ErrorType.UNKNOWN,
|
| 289 |
+
message="Something went wrong loading events. Please try again in a moment.",
|
| 290 |
+
original_error=e
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
# --- WEATHER RETRIEVAL WITH FALLBACK ---
|
| 295 |
+
async def _get_weather_with_fallback(
|
| 296 |
+
lat: float,
|
| 297 |
+
lon: float
|
| 298 |
+
) -> Tuple[Dict[str, Any], bool]:
|
| 299 |
+
"""
|
| 300 |
+
Get weather data with graceful fallback if service is unavailable.
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
lat: Latitude
|
| 304 |
+
lon: Longitude
|
| 305 |
+
|
| 306 |
+
Returns:
|
| 307 |
+
Tuple of (weather data dict, availability boolean)
|
| 308 |
+
"""
|
| 309 |
+
try:
|
| 310 |
+
weather = await get_weather_for_location(lat, lon)
|
| 311 |
+
|
| 312 |
+
temp = weather.get("temperature", {}).get("value")
|
| 313 |
+
phrase = weather.get("phrase", "N/A")
|
| 314 |
+
|
| 315 |
+
logger.info(f"✅ Weather retrieved: {phrase} at {temp}°F")
|
| 316 |
+
|
| 317 |
+
return weather, True
|
| 318 |
+
|
| 319 |
+
except Exception as e:
|
| 320 |
+
logger.warning(f"⚠️ Weather service unavailable: {str(e)}")
|
| 321 |
+
return {"error": "Weather service unavailable"}, False
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
# --- WEATHER-OPTIMIZED RECOMMENDATIONS ---
|
| 325 |
+
async def _generate_weather_optimized_recommendations(
|
| 326 |
+
tenant_id: str,
|
| 327 |
+
events: List[Dict[str, Any]],
|
| 328 |
+
weather: Dict[str, Any],
|
| 329 |
+
include_all_events: bool
|
| 330 |
+
) -> Dict[str, Any]:
|
| 331 |
+
"""
|
| 332 |
+
Generate event recommendations optimized for current weather conditions.
|
| 333 |
+
|
| 334 |
+
Args:
|
| 335 |
+
tenant_id: City identifier
|
| 336 |
+
events: List of available events
|
| 337 |
+
weather: Weather data dictionary
|
| 338 |
+
include_all_events: Whether to include full event list in response
|
| 339 |
+
|
| 340 |
+
Returns:
|
| 341 |
+
Structured response with weather-optimized suggestions
|
| 342 |
+
"""
|
| 343 |
+
temp = weather.get("temperature", {}).get("value")
|
| 344 |
+
phrase = weather.get("phrase", "").lower()
|
| 345 |
+
|
| 346 |
+
# Analyze weather conditions
|
| 347 |
+
weather_analysis = _analyze_weather_conditions(temp, phrase)
|
| 348 |
+
|
| 349 |
+
# Generate Penny's smart suggestions
|
| 350 |
+
suggestions = _generate_recommendations(
|
| 351 |
+
events=events,
|
| 352 |
+
weather_analysis=weather_analysis,
|
| 353 |
+
temp=temp,
|
| 354 |
+
phrase=phrase
|
| 355 |
+
)
|
| 356 |
+
|
| 357 |
+
# Build response
|
| 358 |
+
response = {
|
| 359 |
+
"weather": weather,
|
| 360 |
+
"weather_summary": _create_weather_summary(temp, phrase),
|
| 361 |
+
"suggestions": suggestions[:EventWeatherConfig.MAX_RECOMMENDATIONS],
|
| 362 |
+
"tenant_id": tenant_id,
|
| 363 |
+
"event_count": len(events),
|
| 364 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 365 |
+
"weather_analysis": weather_analysis
|
| 366 |
+
}
|
| 367 |
+
|
| 368 |
+
# Optionally include full event list
|
| 369 |
+
if include_all_events:
|
| 370 |
+
response["all_events"] = events
|
| 371 |
+
|
| 372 |
+
return response
|
| 373 |
+
|
| 374 |
+
|
| 375 |
+
# --- HELPER FUNCTIONS (Penny's Intelligence Layer) ---
|
| 376 |
+
|
| 377 |
+
def _analyze_weather_conditions(temp: Optional[float], phrase: str) -> Dict[str, Any]:
|
| 378 |
+
"""
|
| 379 |
+
🧠 Penny's weather interpretation logic.
|
| 380 |
+
Returns structured analysis of current conditions.
|
| 381 |
+
|
| 382 |
+
Args:
|
| 383 |
+
temp: Temperature in Fahrenheit
|
| 384 |
+
phrase: Weather description phrase
|
| 385 |
+
|
| 386 |
+
Returns:
|
| 387 |
+
Dictionary with weather analysis including outdoor suitability
|
| 388 |
+
"""
|
| 389 |
+
analysis = {
|
| 390 |
+
"is_rainy": any(keyword in phrase for keyword in WeatherThresholds.RAINY_KEYWORDS),
|
| 391 |
+
"is_snowy": any(keyword in phrase for keyword in WeatherThresholds.SNOWY_KEYWORDS),
|
| 392 |
+
"is_nice": any(keyword in phrase for keyword in WeatherThresholds.NICE_KEYWORDS),
|
| 393 |
+
"temp_category": None,
|
| 394 |
+
"outdoor_friendly": False,
|
| 395 |
+
"indoor_preferred": False
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
if temp:
|
| 399 |
+
if temp >= WeatherThresholds.HOT_THRESHOLD:
|
| 400 |
+
analysis["temp_category"] = "hot"
|
| 401 |
+
elif temp >= WeatherThresholds.WARM_THRESHOLD:
|
| 402 |
+
analysis["temp_category"] = "warm"
|
| 403 |
+
elif temp >= WeatherThresholds.COOL_THRESHOLD:
|
| 404 |
+
analysis["temp_category"] = "mild"
|
| 405 |
+
elif temp >= WeatherThresholds.COLD_THRESHOLD:
|
| 406 |
+
analysis["temp_category"] = "cool"
|
| 407 |
+
else:
|
| 408 |
+
analysis["temp_category"] = "cold"
|
| 409 |
+
|
| 410 |
+
# Outdoor-friendly = warm/mild + not rainy/snowy
|
| 411 |
+
analysis["outdoor_friendly"] = (
|
| 412 |
+
temp >= WeatherThresholds.COOL_THRESHOLD and
|
| 413 |
+
not analysis["is_rainy"] and
|
| 414 |
+
not analysis["is_snowy"]
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
# Indoor preferred = cold or rainy or snowy
|
| 418 |
+
analysis["indoor_preferred"] = (
|
| 419 |
+
temp < WeatherThresholds.COOL_THRESHOLD or
|
| 420 |
+
analysis["is_rainy"] or
|
| 421 |
+
analysis["is_snowy"]
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
return analysis
|
| 425 |
+
|
| 426 |
+
|
| 427 |
+
def _generate_recommendations(
|
| 428 |
+
events: List[Dict[str, Any]],
|
| 429 |
+
weather_analysis: Dict[str, Any],
|
| 430 |
+
temp: Optional[float],
|
| 431 |
+
phrase: str
|
| 432 |
+
) -> List[str]:
|
| 433 |
+
"""
|
| 434 |
+
🎯 Penny's event recommendation engine.
|
| 435 |
+
Prioritizes events based on weather + category fit.
|
| 436 |
+
Keeps Penny's warm, helpful voice throughout.
|
| 437 |
+
|
| 438 |
+
Args:
|
| 439 |
+
events: List of available events
|
| 440 |
+
weather_analysis: Weather condition analysis
|
| 441 |
+
temp: Current temperature
|
| 442 |
+
phrase: Weather description
|
| 443 |
+
|
| 444 |
+
Returns:
|
| 445 |
+
List of formatted event suggestions
|
| 446 |
+
"""
|
| 447 |
+
suggestions = []
|
| 448 |
+
|
| 449 |
+
# Sort events: Best weather fit first
|
| 450 |
+
scored_events = []
|
| 451 |
+
for event in events:
|
| 452 |
+
score = _calculate_event_weather_score(event, weather_analysis)
|
| 453 |
+
scored_events.append((score, event))
|
| 454 |
+
|
| 455 |
+
scored_events.sort(reverse=True, key=lambda x: x[0])
|
| 456 |
+
|
| 457 |
+
# Generate suggestions with Penny's personality
|
| 458 |
+
for score, event in scored_events:
|
| 459 |
+
event_name = event.get("name", "Unnamed Event")
|
| 460 |
+
event_category = event.get("category", "").lower()
|
| 461 |
+
event_location = event.get("location", "")
|
| 462 |
+
|
| 463 |
+
# Build suggestion with appropriate emoji + messaging
|
| 464 |
+
suggestion = _create_suggestion_message(
|
| 465 |
+
event_name=event_name,
|
| 466 |
+
event_category=event_category,
|
| 467 |
+
event_location=event_location,
|
| 468 |
+
score=score,
|
| 469 |
+
weather_analysis=weather_analysis,
|
| 470 |
+
temp=temp,
|
| 471 |
+
phrase=phrase
|
| 472 |
+
)
|
| 473 |
+
|
| 474 |
+
suggestions.append(suggestion)
|
| 475 |
+
|
| 476 |
+
return suggestions
|
| 477 |
+
|
| 478 |
+
|
| 479 |
+
def _calculate_event_weather_score(
|
| 480 |
+
event: Dict[str, Any],
|
| 481 |
+
weather_analysis: Dict[str, Any]
|
| 482 |
+
) -> int:
|
| 483 |
+
"""
|
| 484 |
+
📊 Scores event suitability based on weather (0-100).
|
| 485 |
+
Higher = better match for current conditions.
|
| 486 |
+
|
| 487 |
+
Args:
|
| 488 |
+
event: Event dictionary with category information
|
| 489 |
+
weather_analysis: Weather condition analysis
|
| 490 |
+
|
| 491 |
+
Returns:
|
| 492 |
+
Integer score from 0-100
|
| 493 |
+
"""
|
| 494 |
+
category = event.get("category", "").lower()
|
| 495 |
+
score = 50 # Neutral baseline
|
| 496 |
+
|
| 497 |
+
# Perfect matches
|
| 498 |
+
if "outdoor" in category and weather_analysis["outdoor_friendly"]:
|
| 499 |
+
score = 95
|
| 500 |
+
elif "indoor" in category and weather_analysis["indoor_preferred"]:
|
| 501 |
+
score = 90
|
| 502 |
+
|
| 503 |
+
# Good matches
|
| 504 |
+
elif "indoor" in category and not weather_analysis["outdoor_friendly"]:
|
| 505 |
+
score = 75
|
| 506 |
+
elif "outdoor" in category and weather_analysis["temp_category"] in ["warm", "mild"]:
|
| 507 |
+
score = 70
|
| 508 |
+
|
| 509 |
+
# Acceptable matches
|
| 510 |
+
elif "civic" in category or "community" in category:
|
| 511 |
+
score = 60 # Usually indoor, weather-neutral
|
| 512 |
+
|
| 513 |
+
# Poor matches (but still list them)
|
| 514 |
+
elif "outdoor" in category and weather_analysis["indoor_preferred"]:
|
| 515 |
+
score = 30
|
| 516 |
+
|
| 517 |
+
return score
|
| 518 |
+
|
| 519 |
+
|
| 520 |
+
def _create_suggestion_message(
|
| 521 |
+
event_name: str,
|
| 522 |
+
event_category: str,
|
| 523 |
+
event_location: str,
|
| 524 |
+
score: int,
|
| 525 |
+
weather_analysis: Dict[str, Any],
|
| 526 |
+
temp: Optional[float],
|
| 527 |
+
phrase: str
|
| 528 |
+
) -> str:
|
| 529 |
+
"""
|
| 530 |
+
💬 Penny's voice: Generates natural, helpful event suggestions.
|
| 531 |
+
Adapts tone based on weather fit score.
|
| 532 |
+
|
| 533 |
+
Args:
|
| 534 |
+
event_name: Name of the event
|
| 535 |
+
event_category: Event category (outdoor, indoor, etc.)
|
| 536 |
+
event_location: Event location/venue
|
| 537 |
+
score: Weather suitability score (0-100)
|
| 538 |
+
weather_analysis: Weather condition analysis
|
| 539 |
+
temp: Current temperature
|
| 540 |
+
phrase: Weather description
|
| 541 |
+
|
| 542 |
+
Returns:
|
| 543 |
+
Formatted suggestion string with emoji and helpful context
|
| 544 |
+
"""
|
| 545 |
+
location_text = f" at {event_location}" if event_location else ""
|
| 546 |
+
|
| 547 |
+
# PERFECT MATCHES (90-100)
|
| 548 |
+
if score >= 90:
|
| 549 |
+
if "outdoor" in event_category:
|
| 550 |
+
return f"🌟 **{event_name}**{location_text} — Perfect outdoor weather! This is the one."
|
| 551 |
+
else:
|
| 552 |
+
return f"🏛️ **{event_name}**{location_text} — Ideal indoor activity for today's weather!"
|
| 553 |
+
|
| 554 |
+
# GOOD MATCHES (70-89)
|
| 555 |
+
elif score >= 70:
|
| 556 |
+
if "outdoor" in event_category:
|
| 557 |
+
return f"☀️ **{event_name}**{location_text} — Great day for outdoor activities!"
|
| 558 |
+
else:
|
| 559 |
+
return f"🔵 **{event_name}**{location_text} — Solid indoor option!"
|
| 560 |
+
|
| 561 |
+
# DECENT MATCHES (50-69)
|
| 562 |
+
elif score >= 50:
|
| 563 |
+
if "outdoor" in event_category:
|
| 564 |
+
temp_text = f" (It's {int(temp)}°F)" if temp else ""
|
| 565 |
+
return f"🌤️ **{event_name}**{location_text} — Weather's okay for outdoor events{temp_text}."
|
| 566 |
+
else:
|
| 567 |
+
return f"⚪ **{event_name}**{location_text} — Weather-neutral activity."
|
| 568 |
+
|
| 569 |
+
# POOR MATCHES (Below 50)
|
| 570 |
+
else:
|
| 571 |
+
if "outdoor" in event_category and weather_analysis["is_rainy"]:
|
| 572 |
+
return f"🌧️ **{event_name}**{location_text} — Outdoor event, but it's rainy. Bring an umbrella or check if it's postponed!"
|
| 573 |
+
elif "outdoor" in event_category and weather_analysis.get("temp_category") == "cold":
|
| 574 |
+
return f"❄️ **{event_name}**{location_text} — Outdoor event, but bundle up — it's chilly!"
|
| 575 |
+
else:
|
| 576 |
+
return f"⚪ **{event_name}**{location_text} — Check weather before heading out."
|
| 577 |
+
|
| 578 |
+
|
| 579 |
+
def _create_weather_summary(temp: Optional[float], phrase: str) -> str:
|
| 580 |
+
"""
|
| 581 |
+
🌤️ Penny's plain-English weather summary.
|
| 582 |
+
|
| 583 |
+
Args:
|
| 584 |
+
temp: Temperature in Fahrenheit
|
| 585 |
+
phrase: Weather description phrase
|
| 586 |
+
|
| 587 |
+
Returns:
|
| 588 |
+
Human-readable weather summary
|
| 589 |
+
"""
|
| 590 |
+
if not temp:
|
| 591 |
+
return f"Current conditions: {phrase.title()}"
|
| 592 |
+
|
| 593 |
+
temp_desc = ""
|
| 594 |
+
if temp >= 85:
|
| 595 |
+
temp_desc = "hot"
|
| 596 |
+
elif temp >= 70:
|
| 597 |
+
temp_desc = "warm"
|
| 598 |
+
elif temp >= 60:
|
| 599 |
+
temp_desc = "mild"
|
| 600 |
+
elif temp >= 40:
|
| 601 |
+
temp_desc = "cool"
|
| 602 |
+
else:
|
| 603 |
+
temp_desc = "cold"
|
| 604 |
+
|
| 605 |
+
return f"It's {temp_desc} at {int(temp)}°F — {phrase.lower()}."
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
# --- ERROR RESPONSE HELPERS (Penny stays helpful even in failures) ---
|
| 609 |
+
|
| 610 |
+
def _create_no_events_response(tenant_id: str) -> Dict[str, Any]:
|
| 611 |
+
"""
|
| 612 |
+
Returns friendly response when no events are found.
|
| 613 |
+
|
| 614 |
+
Args:
|
| 615 |
+
tenant_id: City identifier
|
| 616 |
+
|
| 617 |
+
Returns:
|
| 618 |
+
Structured response with helpful message
|
| 619 |
+
"""
|
| 620 |
+
return {
|
| 621 |
+
"weather": {},
|
| 622 |
+
"suggestions": [
|
| 623 |
+
f"🤔 I don't have any events loaded for {tenant_id} right now. "
|
| 624 |
+
"Let me know if you'd like me to check again or add some!"
|
| 625 |
+
],
|
| 626 |
+
"tenant_id": tenant_id,
|
| 627 |
+
"event_count": 0,
|
| 628 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 629 |
+
}
|
| 630 |
+
|
| 631 |
+
|
| 632 |
+
def _create_error_response(
|
| 633 |
+
tenant_id: str,
|
| 634 |
+
error_type: str,
|
| 635 |
+
message: str
|
| 636 |
+
) -> Dict[str, Any]:
|
| 637 |
+
"""
|
| 638 |
+
Returns structured error with Penny's helpful tone.
|
| 639 |
+
|
| 640 |
+
Args:
|
| 641 |
+
tenant_id: City identifier
|
| 642 |
+
error_type: Structured error type code
|
| 643 |
+
message: User-friendly error message
|
| 644 |
+
|
| 645 |
+
Returns:
|
| 646 |
+
Error response dictionary
|
| 647 |
+
"""
|
| 648 |
+
logger.error(f"Error in event_weather: {error_type} - {message}")
|
| 649 |
+
return {
|
| 650 |
+
"weather": {},
|
| 651 |
+
"suggestions": [f"⚠️ {message}"],
|
| 652 |
+
"tenant_id": tenant_id,
|
| 653 |
+
"event_count": 0,
|
| 654 |
+
"error_type": error_type,
|
| 655 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 656 |
+
}
|
| 657 |
+
|
| 658 |
+
|
| 659 |
+
def _create_fallback_response(
|
| 660 |
+
tenant_id: str,
|
| 661 |
+
events: List[Dict[str, Any]]
|
| 662 |
+
) -> Dict[str, Any]:
|
| 663 |
+
"""
|
| 664 |
+
Graceful degradation: Shows events even if weather service is down.
|
| 665 |
+
Penny stays helpful!
|
| 666 |
+
|
| 667 |
+
Args:
|
| 668 |
+
tenant_id: City identifier
|
| 669 |
+
events: List of available events
|
| 670 |
+
|
| 671 |
+
Returns:
|
| 672 |
+
Fallback response with events but no weather optimization
|
| 673 |
+
"""
|
| 674 |
+
# Limit to configured maximum
|
| 675 |
+
display_events = events[:EventWeatherConfig.MAX_FALLBACK_EVENTS]
|
| 676 |
+
|
| 677 |
+
suggestions = [
|
| 678 |
+
f"📅 **{event.get('name', 'Event')}** — {event.get('category', 'Community event')}"
|
| 679 |
+
for event in display_events
|
| 680 |
+
]
|
| 681 |
+
|
| 682 |
+
suggestions.insert(0, "⚠️ Weather service is temporarily unavailable, but here are today's events:")
|
| 683 |
+
|
| 684 |
+
return {
|
| 685 |
+
"weather": {"error": "Weather service unavailable"},
|
| 686 |
+
"suggestions": suggestions,
|
| 687 |
+
"tenant_id": tenant_id,
|
| 688 |
+
"event_count": len(events),
|
| 689 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 690 |
+
"fallback_mode": True
|
| 691 |
+
}
|
| 692 |
+
|
| 693 |
+
|
| 694 |
+
# --- STRUCTURED LOGGING HELPER ---
|
| 695 |
+
|
| 696 |
+
def _log_operation(
|
| 697 |
+
operation: str,
|
| 698 |
+
tenant_id: str,
|
| 699 |
+
success: bool,
|
| 700 |
+
event_count: int,
|
| 701 |
+
response_time_ms: int,
|
| 702 |
+
fallback_used: bool,
|
| 703 |
+
weather_available: bool,
|
| 704 |
+
session_id: Optional[str] = None,
|
| 705 |
+
user_id: Optional[str] = None,
|
| 706 |
+
error_type: Optional[str] = None,
|
| 707 |
+
error_message: Optional[str] = None
|
| 708 |
+
) -> None:
|
| 709 |
+
"""
|
| 710 |
+
Log event weather operation with structured data.
|
| 711 |
+
|
| 712 |
+
Args:
|
| 713 |
+
operation: Operation name
|
| 714 |
+
tenant_id: City identifier
|
| 715 |
+
success: Whether operation succeeded
|
| 716 |
+
event_count: Number of events processed
|
| 717 |
+
response_time_ms: Total response time in milliseconds
|
| 718 |
+
fallback_used: Whether fallback mode was used
|
| 719 |
+
weather_available: Whether weather data was available
|
| 720 |
+
session_id: Optional session identifier
|
| 721 |
+
user_id: Optional user identifier
|
| 722 |
+
error_type: Optional error type if failed
|
| 723 |
+
error_message: Optional error message if failed
|
| 724 |
+
"""
|
| 725 |
+
log_data = {
|
| 726 |
+
"operation": operation,
|
| 727 |
+
"tenant_id": sanitize_for_logging(tenant_id),
|
| 728 |
+
"success": success,
|
| 729 |
+
"event_count": event_count,
|
| 730 |
+
"response_time_ms": response_time_ms,
|
| 731 |
+
"fallback_used": fallback_used,
|
| 732 |
+
"weather_available": weather_available,
|
| 733 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
if session_id:
|
| 737 |
+
log_data["session_id"] = sanitize_for_logging(session_id)
|
| 738 |
+
|
| 739 |
+
if user_id:
|
| 740 |
+
log_data["user_id"] = sanitize_for_logging(user_id)
|
| 741 |
+
|
| 742 |
+
if error_type:
|
| 743 |
+
log_data["error_type"] = error_type
|
| 744 |
+
|
| 745 |
+
if error_message:
|
| 746 |
+
log_data["error_message"] = sanitize_for_logging(error_message)
|
| 747 |
+
|
| 748 |
+
log_interaction(log_data)
|
| 749 |
+
|
| 750 |
+
|
| 751 |
+
def _calculate_response_time(start_time: float) -> int:
|
| 752 |
+
"""
|
| 753 |
+
Calculate response time in milliseconds.
|
| 754 |
+
|
| 755 |
+
Args:
|
| 756 |
+
start_time: Operation start time from time.time()
|
| 757 |
+
|
| 758 |
+
Returns:
|
| 759 |
+
Response time in milliseconds
|
| 760 |
+
"""
|
| 761 |
+
return int((time.time() - start_time) * 1000)
|
gemma_utils.py
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/gemma/gemma_utils.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Gemma Model Utilities for PENNY Project
|
| 5 |
+
Handles text generation using the Gemma-based core language model pipeline.
|
| 6 |
+
Provides async generation with structured error handling and logging.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import time
|
| 11 |
+
from typing import Dict, Any, Optional
|
| 12 |
+
|
| 13 |
+
# --- Logging Imports ---
|
| 14 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 15 |
+
|
| 16 |
+
# --- Model Loader Import ---
|
| 17 |
+
try:
|
| 18 |
+
from app.model_loader import load_model_pipeline
|
| 19 |
+
MODEL_LOADER_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
MODEL_LOADER_AVAILABLE = False
|
| 22 |
+
import logging
|
| 23 |
+
logging.getLogger(__name__).warning("Could not import load_model_pipeline. Gemma service unavailable.")
|
| 24 |
+
|
| 25 |
+
# Global variable to store the loaded pipeline for re-use
|
| 26 |
+
GEMMA_PIPELINE: Optional[Any] = None
|
| 27 |
+
AGENT_NAME = "penny-core-agent"
|
| 28 |
+
INITIALIZATION_ATTEMPTED = False
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _initialize_gemma_pipeline() -> bool:
|
| 32 |
+
"""
|
| 33 |
+
Initializes the Gemma pipeline only once.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
bool: True if initialization succeeded, False otherwise.
|
| 37 |
+
"""
|
| 38 |
+
global GEMMA_PIPELINE, INITIALIZATION_ATTEMPTED
|
| 39 |
+
|
| 40 |
+
if INITIALIZATION_ATTEMPTED:
|
| 41 |
+
return GEMMA_PIPELINE is not None
|
| 42 |
+
|
| 43 |
+
INITIALIZATION_ATTEMPTED = True
|
| 44 |
+
|
| 45 |
+
if not MODEL_LOADER_AVAILABLE:
|
| 46 |
+
log_interaction(
|
| 47 |
+
intent="gemma_initialization",
|
| 48 |
+
success=False,
|
| 49 |
+
error="model_loader unavailable"
|
| 50 |
+
)
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
log_interaction(
|
| 55 |
+
intent="gemma_initialization",
|
| 56 |
+
success=None,
|
| 57 |
+
details=f"Loading {AGENT_NAME}"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
GEMMA_PIPELINE = load_model_pipeline(AGENT_NAME)
|
| 61 |
+
|
| 62 |
+
if GEMMA_PIPELINE is None:
|
| 63 |
+
log_interaction(
|
| 64 |
+
intent="gemma_initialization",
|
| 65 |
+
success=False,
|
| 66 |
+
error="Pipeline returned None"
|
| 67 |
+
)
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
log_interaction(
|
| 71 |
+
intent="gemma_initialization",
|
| 72 |
+
success=True,
|
| 73 |
+
details=f"Model {AGENT_NAME} loaded successfully"
|
| 74 |
+
)
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
log_interaction(
|
| 79 |
+
intent="gemma_initialization",
|
| 80 |
+
success=False,
|
| 81 |
+
error=str(e)
|
| 82 |
+
)
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# Attempt initialization at module load
|
| 87 |
+
_initialize_gemma_pipeline()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def is_gemma_available() -> bool:
|
| 91 |
+
"""
|
| 92 |
+
Check if Gemma service is available.
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
bool: True if Gemma pipeline is loaded and ready.
|
| 96 |
+
"""
|
| 97 |
+
return GEMMA_PIPELINE is not None
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
async def generate_response(
|
| 101 |
+
prompt: str,
|
| 102 |
+
max_new_tokens: int = 256,
|
| 103 |
+
temperature: float = 0.7,
|
| 104 |
+
tenant_id: Optional[str] = None,
|
| 105 |
+
) -> Dict[str, Any]:
|
| 106 |
+
"""
|
| 107 |
+
Runs text generation using the loaded Gemma pipeline.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
prompt: The conversational or instruction prompt.
|
| 111 |
+
max_new_tokens: The maximum number of tokens to generate (default: 256).
|
| 112 |
+
temperature: Controls randomness in generation (default: 0.7).
|
| 113 |
+
tenant_id: Optional tenant identifier for logging.
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
A dictionary containing:
|
| 117 |
+
- response (str): The generated text
|
| 118 |
+
- available (bool): Whether the service was available
|
| 119 |
+
- error (str, optional): Error message if generation failed
|
| 120 |
+
- response_time_ms (int, optional): Generation time in milliseconds
|
| 121 |
+
"""
|
| 122 |
+
start_time = time.time()
|
| 123 |
+
|
| 124 |
+
global GEMMA_PIPELINE
|
| 125 |
+
|
| 126 |
+
# Check availability
|
| 127 |
+
if not is_gemma_available():
|
| 128 |
+
log_interaction(
|
| 129 |
+
intent="gemma_generate",
|
| 130 |
+
tenant_id=tenant_id,
|
| 131 |
+
success=False,
|
| 132 |
+
error="Gemma pipeline not available",
|
| 133 |
+
fallback_used=True
|
| 134 |
+
)
|
| 135 |
+
return {
|
| 136 |
+
"response": "I'm having trouble accessing my language model right now. Please try again in a moment!",
|
| 137 |
+
"available": False,
|
| 138 |
+
"error": "Pipeline not initialized"
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Validate inputs
|
| 142 |
+
if not prompt or not isinstance(prompt, str):
|
| 143 |
+
log_interaction(
|
| 144 |
+
intent="gemma_generate",
|
| 145 |
+
tenant_id=tenant_id,
|
| 146 |
+
success=False,
|
| 147 |
+
error="Invalid prompt provided"
|
| 148 |
+
)
|
| 149 |
+
return {
|
| 150 |
+
"response": "I didn't receive a valid prompt. Could you try again?",
|
| 151 |
+
"available": True,
|
| 152 |
+
"error": "Invalid input"
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
# Configure generation parameters
|
| 156 |
+
gen_kwargs = {
|
| 157 |
+
"max_new_tokens": max_new_tokens,
|
| 158 |
+
"temperature": temperature,
|
| 159 |
+
"do_sample": True if temperature > 0.0 else False,
|
| 160 |
+
"return_full_text": False
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
try:
|
| 164 |
+
loop = asyncio.get_event_loop()
|
| 165 |
+
|
| 166 |
+
# Run model inference in thread executor
|
| 167 |
+
results = await loop.run_in_executor(
|
| 168 |
+
None,
|
| 169 |
+
lambda: GEMMA_PIPELINE(prompt, **gen_kwargs)
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 173 |
+
|
| 174 |
+
# Parse results
|
| 175 |
+
if results and isinstance(results, list) and len(results) > 0:
|
| 176 |
+
if isinstance(results[0], dict) and 'generated_text' in results[0]:
|
| 177 |
+
generated_text = results[0]['generated_text'].strip()
|
| 178 |
+
|
| 179 |
+
# Log slow responses
|
| 180 |
+
if response_time_ms > 5000:
|
| 181 |
+
log_interaction(
|
| 182 |
+
intent="gemma_generate_slow",
|
| 183 |
+
tenant_id=tenant_id,
|
| 184 |
+
success=True,
|
| 185 |
+
response_time_ms=response_time_ms,
|
| 186 |
+
details="Slow generation detected"
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
log_interaction(
|
| 190 |
+
intent="gemma_generate",
|
| 191 |
+
tenant_id=tenant_id,
|
| 192 |
+
success=True,
|
| 193 |
+
response_time_ms=response_time_ms,
|
| 194 |
+
prompt_preview=sanitize_for_logging(prompt[:100])
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
return {
|
| 198 |
+
"response": generated_text,
|
| 199 |
+
"available": True,
|
| 200 |
+
"response_time_ms": response_time_ms
|
| 201 |
+
}
|
| 202 |
+
|
| 203 |
+
# Unexpected output format
|
| 204 |
+
log_interaction(
|
| 205 |
+
intent="gemma_generate",
|
| 206 |
+
tenant_id=tenant_id,
|
| 207 |
+
success=False,
|
| 208 |
+
error="Unexpected model output format",
|
| 209 |
+
response_time_ms=response_time_ms
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
return {
|
| 213 |
+
"response": "I got an unexpected response from my language model. Let me try to help you another way!",
|
| 214 |
+
"available": True,
|
| 215 |
+
"error": "Unexpected output format"
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
except asyncio.CancelledError:
|
| 219 |
+
log_interaction(
|
| 220 |
+
intent="gemma_generate",
|
| 221 |
+
tenant_id=tenant_id,
|
| 222 |
+
success=False,
|
| 223 |
+
error="Generation cancelled"
|
| 224 |
+
)
|
| 225 |
+
raise
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 229 |
+
|
| 230 |
+
log_interaction(
|
| 231 |
+
intent="gemma_generate",
|
| 232 |
+
tenant_id=tenant_id,
|
| 233 |
+
success=False,
|
| 234 |
+
error=str(e),
|
| 235 |
+
response_time_ms=response_time_ms,
|
| 236 |
+
fallback_used=True
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
return {
|
| 240 |
+
"response": "I'm having trouble generating a response right now. Please try again!",
|
| 241 |
+
"available": False,
|
| 242 |
+
"error": str(e),
|
| 243 |
+
"response_time_ms": response_time_ms
|
| 244 |
+
}
|
handler.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# handler.py
|
| 2 |
+
from app.orchestrator import PennyOrchestrator
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
import json
|
| 5 |
+
|
| 6 |
+
class EndpointHandler:
|
| 7 |
+
def __init__(self, path=""):
|
| 8 |
+
"""Initialize PENNY orchestrator when endpoint starts"""
|
| 9 |
+
print("🤖 Initializing PENNY...")
|
| 10 |
+
self.orchestrator = PennyOrchestrator()
|
| 11 |
+
print("✅ PENNY ready!")
|
| 12 |
+
|
| 13 |
+
def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
|
| 14 |
+
"""
|
| 15 |
+
Handle inference requests from Hugging Face
|
| 16 |
+
"""
|
| 17 |
+
# Extract inputs
|
| 18 |
+
inputs = data.get("inputs", "")
|
| 19 |
+
tenant_id = data.get("tenant_id", "default")
|
| 20 |
+
user_id = data.get("user_id", "anonymous")
|
| 21 |
+
session_id = data.get("session_id", None)
|
| 22 |
+
|
| 23 |
+
# Process through orchestrator
|
| 24 |
+
response = self.orchestrator.process(
|
| 25 |
+
message=inputs,
|
| 26 |
+
tenant_id=tenant_id,
|
| 27 |
+
user_id=user_id,
|
| 28 |
+
session_id=session_id
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
return response
|
intents.py
ADDED
|
@@ -0,0 +1,481 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/intents.py
|
| 2 |
+
"""
|
| 3 |
+
🎯 Penny's Intent Classification System
|
| 4 |
+
Rule-based intent classifier designed for civic engagement queries.
|
| 5 |
+
|
| 6 |
+
CURRENT: Simple keyword matching (fast, predictable, debuggable)
|
| 7 |
+
FUTURE: Will upgrade to ML/embedding-based classification (Gemma/LayoutLM)
|
| 8 |
+
|
| 9 |
+
This approach allows Penny to understand resident needs and route them
|
| 10 |
+
to the right civic systems — weather, resources, events, translation, etc.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
from typing import Dict, List, Optional
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
from enum import Enum
|
| 17 |
+
|
| 18 |
+
# --- LOGGING SETUP (Azure-friendly) ---
|
| 19 |
+
logger = logging.getLogger(__name__)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# --- INTENT CATEGORIES (Enumerated for type safety) ---
|
| 23 |
+
class IntentType(str, Enum):
|
| 24 |
+
"""
|
| 25 |
+
Penny's supported intent categories.
|
| 26 |
+
Each maps to a specific civic assistance pathway.
|
| 27 |
+
"""
|
| 28 |
+
WEATHER = "weather"
|
| 29 |
+
GREETING = "greeting"
|
| 30 |
+
LOCAL_RESOURCES = "local_resources"
|
| 31 |
+
EVENTS = "events"
|
| 32 |
+
TRANSLATION = "translation"
|
| 33 |
+
SENTIMENT_ANALYSIS = "sentiment_analysis"
|
| 34 |
+
BIAS_DETECTION = "bias_detection"
|
| 35 |
+
DOCUMENT_PROCESSING = "document_processing"
|
| 36 |
+
HELP = "help"
|
| 37 |
+
EMERGENCY = "emergency" # Critical safety routing
|
| 38 |
+
UNKNOWN = "unknown"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class IntentMatch:
|
| 43 |
+
"""
|
| 44 |
+
Structured intent classification result.
|
| 45 |
+
Includes confidence score and matched keywords for debugging.
|
| 46 |
+
"""
|
| 47 |
+
intent: IntentType
|
| 48 |
+
confidence: float # 0.0 - 1.0
|
| 49 |
+
matched_keywords: List[str]
|
| 50 |
+
is_compound: bool = False # True if query spans multiple intents
|
| 51 |
+
secondary_intents: List[IntentType] = field(default_factory=list)
|
| 52 |
+
|
| 53 |
+
def to_dict(self) -> Dict:
|
| 54 |
+
"""Convert to dictionary for logging and API responses."""
|
| 55 |
+
return {
|
| 56 |
+
"intent": self.intent.value,
|
| 57 |
+
"confidence": self.confidence,
|
| 58 |
+
"matched_keywords": self.matched_keywords,
|
| 59 |
+
"is_compound": self.is_compound,
|
| 60 |
+
"secondary_intents": [intent.value for intent in self.secondary_intents]
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# --- INTENT KEYWORD PATTERNS (Organized by priority) ---
|
| 65 |
+
class IntentPatterns:
|
| 66 |
+
"""
|
| 67 |
+
Penny's keyword patterns for intent matching.
|
| 68 |
+
Organized by priority — critical intents checked first.
|
| 69 |
+
"""
|
| 70 |
+
|
| 71 |
+
# 🚨 PRIORITY 1: EMERGENCY & SAFETY (Always check first)
|
| 72 |
+
EMERGENCY = [
|
| 73 |
+
"911", "emergency", "urgent", "crisis", "danger", "help me",
|
| 74 |
+
"suicide", "overdose", "assault", "abuse", "threatening",
|
| 75 |
+
"hurt myself", "hurt someone", "life threatening"
|
| 76 |
+
]
|
| 77 |
+
|
| 78 |
+
# 🌍 PRIORITY 2: TRANSLATION (High civic value)
|
| 79 |
+
TRANSLATION = [
|
| 80 |
+
"translate", "in spanish", "in french", "in portuguese",
|
| 81 |
+
"in german", "in chinese", "in arabic", "in vietnamese",
|
| 82 |
+
"in russian", "in korean", "in japanese", "in tagalog",
|
| 83 |
+
"convert to", "say this in", "how do i say", "what is", "in hindi"
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
# 📄 PRIORITY 3: DOCUMENT PROCESSING (Forms, PDFs)
|
| 87 |
+
DOCUMENT_PROCESSING = [
|
| 88 |
+
"process this document", "extract data", "analyze pdf",
|
| 89 |
+
"upload form", "read this file", "scan this", "form help",
|
| 90 |
+
"fill out", "document", "pdf", "application", "permit"
|
| 91 |
+
]
|
| 92 |
+
|
| 93 |
+
# 🔍 PRIORITY 4: ANALYSIS TOOLS
|
| 94 |
+
SENTIMENT_ANALYSIS = [
|
| 95 |
+
"how does this sound", "is this positive", "is this negative",
|
| 96 |
+
"analyze", "sentiment", "feel about", "mood", "tone"
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
BIAS_DETECTION = [
|
| 100 |
+
"is this biased", "check bias", "check fairness", "is this neutral",
|
| 101 |
+
"biased", "objective", "subjective", "fair", "discriminatory"
|
| 102 |
+
]
|
| 103 |
+
|
| 104 |
+
# 🌤️ PRIORITY 5: WEATHER + EVENTS (Compound intent handling)
|
| 105 |
+
WEATHER = [
|
| 106 |
+
"weather", "rain", "snow", "sunny", "forecast", "temperature",
|
| 107 |
+
"hot", "cold", "storm", "wind", "outside", "climate",
|
| 108 |
+
"degrees", "celsius", "fahrenheit"
|
| 109 |
+
]
|
| 110 |
+
|
| 111 |
+
# Specific date/time keywords that suggest event context
|
| 112 |
+
DATE_TIME = [
|
| 113 |
+
"today", "tomorrow", "this weekend", "next week",
|
| 114 |
+
"sunday", "monday", "tuesday", "wednesday", "thursday", "friday", "saturday",
|
| 115 |
+
"tonight", "this morning", "this afternoon", "this evening"
|
| 116 |
+
]
|
| 117 |
+
|
| 118 |
+
EVENTS = [
|
| 119 |
+
"event", "things to do", "what's happening", "activities",
|
| 120 |
+
"festival", "concert", "activity", "community event",
|
| 121 |
+
"show", "performance", "gathering", "meetup", "celebration"
|
| 122 |
+
]
|
| 123 |
+
|
| 124 |
+
# 🏛️ PRIORITY 6: LOCAL RESOURCES (Core civic mission)
|
| 125 |
+
LOCAL_RESOURCES = [
|
| 126 |
+
"resource", "shelter", "library", "help center",
|
| 127 |
+
"food bank", "warming center", "cooling center", "csb",
|
| 128 |
+
"mental health", "housing", "community service",
|
| 129 |
+
"trash", "recycling", "transit", "bus", "schedule",
|
| 130 |
+
"clinic", "hospital", "pharmacy", "assistance",
|
| 131 |
+
"utility", "water", "electric", "gas", "bill"
|
| 132 |
+
]
|
| 133 |
+
|
| 134 |
+
# 💬 PRIORITY 7: CONVERSATIONAL
|
| 135 |
+
GREETING = [
|
| 136 |
+
"hi", "hello", "hey", "what's up", "good morning",
|
| 137 |
+
"good afternoon", "good evening", "howdy", "yo",
|
| 138 |
+
"greetings", "sup", "hiya"
|
| 139 |
+
]
|
| 140 |
+
|
| 141 |
+
HELP = [
|
| 142 |
+
"help", "how do i", "can you help", "i need help",
|
| 143 |
+
"what can you do", "how does this work", "instructions",
|
| 144 |
+
"guide", "tutorial", "show me how"
|
| 145 |
+
]
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def classify_intent(message: str) -> str:
|
| 149 |
+
"""
|
| 150 |
+
🎯 Main classification function (backward-compatible).
|
| 151 |
+
Returns intent as string for existing API compatibility.
|
| 152 |
+
|
| 153 |
+
Args:
|
| 154 |
+
message: User's query text
|
| 155 |
+
|
| 156 |
+
Returns:
|
| 157 |
+
Intent string (e.g., "weather", "events", "translation")
|
| 158 |
+
"""
|
| 159 |
+
try:
|
| 160 |
+
result = classify_intent_detailed(message)
|
| 161 |
+
return result.intent.value
|
| 162 |
+
except Exception as e:
|
| 163 |
+
logger.error(f"Intent classification failed: {e}", exc_info=True)
|
| 164 |
+
return IntentType.UNKNOWN.value
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def classify_intent_detailed(message: str) -> IntentMatch:
|
| 168 |
+
"""
|
| 169 |
+
🧠 Enhanced classification with confidence scores and metadata.
|
| 170 |
+
|
| 171 |
+
This function:
|
| 172 |
+
1. Checks for emergency keywords FIRST (safety routing)
|
| 173 |
+
2. Detects compound intents (e.g., "weather + events")
|
| 174 |
+
3. Returns structured result with confidence + matched keywords
|
| 175 |
+
|
| 176 |
+
Args:
|
| 177 |
+
message: User's query text
|
| 178 |
+
|
| 179 |
+
Returns:
|
| 180 |
+
IntentMatch object with full classification details
|
| 181 |
+
"""
|
| 182 |
+
|
| 183 |
+
if not message or not message.strip():
|
| 184 |
+
logger.warning("Empty message received for intent classification")
|
| 185 |
+
return IntentMatch(
|
| 186 |
+
intent=IntentType.UNKNOWN,
|
| 187 |
+
confidence=0.0,
|
| 188 |
+
matched_keywords=[]
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
try:
|
| 192 |
+
text = message.lower().strip()
|
| 193 |
+
logger.debug(f"Classifying intent for: '{text[:50]}...'")
|
| 194 |
+
|
| 195 |
+
# --- PRIORITY 1: EMERGENCY (Critical safety routing) ---
|
| 196 |
+
emergency_matches = _find_keyword_matches(text, IntentPatterns.EMERGENCY)
|
| 197 |
+
if emergency_matches:
|
| 198 |
+
logger.warning(f"🚨 EMERGENCY intent detected: {emergency_matches}")
|
| 199 |
+
return IntentMatch(
|
| 200 |
+
intent=IntentType.EMERGENCY,
|
| 201 |
+
confidence=1.0, # Always high confidence for safety
|
| 202 |
+
matched_keywords=emergency_matches
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# --- PRIORITY 2: TRANSLATION ---
|
| 206 |
+
translation_matches = _find_keyword_matches(text, IntentPatterns.TRANSLATION)
|
| 207 |
+
if translation_matches:
|
| 208 |
+
return IntentMatch(
|
| 209 |
+
intent=IntentType.TRANSLATION,
|
| 210 |
+
confidence=0.9,
|
| 211 |
+
matched_keywords=translation_matches
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# --- PRIORITY 3: DOCUMENT PROCESSING ---
|
| 215 |
+
doc_matches = _find_keyword_matches(text, IntentPatterns.DOCUMENT_PROCESSING)
|
| 216 |
+
if doc_matches:
|
| 217 |
+
return IntentMatch(
|
| 218 |
+
intent=IntentType.DOCUMENT_PROCESSING,
|
| 219 |
+
confidence=0.9,
|
| 220 |
+
matched_keywords=doc_matches
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
# --- PRIORITY 4: ANALYSIS TOOLS ---
|
| 224 |
+
sentiment_matches = _find_keyword_matches(text, IntentPatterns.SENTIMENT_ANALYSIS)
|
| 225 |
+
if sentiment_matches:
|
| 226 |
+
return IntentMatch(
|
| 227 |
+
intent=IntentType.SENTIMENT_ANALYSIS,
|
| 228 |
+
confidence=0.85,
|
| 229 |
+
matched_keywords=sentiment_matches
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
bias_matches = _find_keyword_matches(text, IntentPatterns.BIAS_DETECTION)
|
| 233 |
+
if bias_matches:
|
| 234 |
+
return IntentMatch(
|
| 235 |
+
intent=IntentType.BIAS_DETECTION,
|
| 236 |
+
confidence=0.85,
|
| 237 |
+
matched_keywords=bias_matches
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
# --- PRIORITY 5: COMPOUND INTENT HANDLING (Weather + Events) ---
|
| 241 |
+
weather_matches = _find_keyword_matches(text, IntentPatterns.WEATHER)
|
| 242 |
+
event_matches = _find_keyword_matches(text, IntentPatterns.EVENTS)
|
| 243 |
+
date_matches = _find_keyword_matches(text, IntentPatterns.DATE_TIME)
|
| 244 |
+
|
| 245 |
+
# Compound detection: "What events are happening this weekend?"
|
| 246 |
+
# or "What's the weather like for Sunday's festival?"
|
| 247 |
+
if event_matches and (weather_matches or date_matches):
|
| 248 |
+
logger.info("Compound intent detected: events + weather/date")
|
| 249 |
+
return IntentMatch(
|
| 250 |
+
intent=IntentType.EVENTS, # Primary intent
|
| 251 |
+
confidence=0.85,
|
| 252 |
+
matched_keywords=event_matches + weather_matches + date_matches,
|
| 253 |
+
is_compound=True,
|
| 254 |
+
secondary_intents=[IntentType.WEATHER]
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
# --- PRIORITY 6: SIMPLE WEATHER INTENT ---
|
| 258 |
+
if weather_matches:
|
| 259 |
+
return IntentMatch(
|
| 260 |
+
intent=IntentType.WEATHER,
|
| 261 |
+
confidence=0.9,
|
| 262 |
+
matched_keywords=weather_matches
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
# --- PRIORITY 7: LOCAL RESOURCES ---
|
| 266 |
+
resource_matches = _find_keyword_matches(text, IntentPatterns.LOCAL_RESOURCES)
|
| 267 |
+
if resource_matches:
|
| 268 |
+
return IntentMatch(
|
| 269 |
+
intent=IntentType.LOCAL_RESOURCES,
|
| 270 |
+
confidence=0.9,
|
| 271 |
+
matched_keywords=resource_matches
|
| 272 |
+
)
|
| 273 |
+
|
| 274 |
+
# --- PRIORITY 8: EVENTS (Simple check) ---
|
| 275 |
+
if event_matches:
|
| 276 |
+
return IntentMatch(
|
| 277 |
+
intent=IntentType.EVENTS,
|
| 278 |
+
confidence=0.85,
|
| 279 |
+
matched_keywords=event_matches
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
# --- PRIORITY 9: CONVERSATIONAL ---
|
| 283 |
+
greeting_matches = _find_keyword_matches(text, IntentPatterns.GREETING)
|
| 284 |
+
if greeting_matches:
|
| 285 |
+
return IntentMatch(
|
| 286 |
+
intent=IntentType.GREETING,
|
| 287 |
+
confidence=0.8,
|
| 288 |
+
matched_keywords=greeting_matches
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
help_matches = _find_keyword_matches(text, IntentPatterns.HELP)
|
| 292 |
+
if help_matches:
|
| 293 |
+
return IntentMatch(
|
| 294 |
+
intent=IntentType.HELP,
|
| 295 |
+
confidence=0.9,
|
| 296 |
+
matched_keywords=help_matches
|
| 297 |
+
)
|
| 298 |
+
|
| 299 |
+
# --- FALLBACK: UNKNOWN ---
|
| 300 |
+
logger.info(f"No clear intent match for: '{text[:50]}...'")
|
| 301 |
+
return IntentMatch(
|
| 302 |
+
intent=IntentType.UNKNOWN,
|
| 303 |
+
confidence=0.0,
|
| 304 |
+
matched_keywords=[]
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
except Exception as e:
|
| 308 |
+
logger.error(f"Error during intent classification: {e}", exc_info=True)
|
| 309 |
+
return IntentMatch(
|
| 310 |
+
intent=IntentType.UNKNOWN,
|
| 311 |
+
confidence=0.0,
|
| 312 |
+
matched_keywords=[],
|
| 313 |
+
)
|
| 314 |
+
|
| 315 |
+
|
| 316 |
+
# --- HELPER FUNCTIONS ---
|
| 317 |
+
|
| 318 |
+
def _find_keyword_matches(text: str, keywords: List[str]) -> List[str]:
|
| 319 |
+
"""
|
| 320 |
+
Finds which keywords from a pattern list appear in the user's message.
|
| 321 |
+
|
| 322 |
+
Args:
|
| 323 |
+
text: Normalized user message (lowercase)
|
| 324 |
+
keywords: List of keywords to search for
|
| 325 |
+
|
| 326 |
+
Returns:
|
| 327 |
+
List of matched keywords (for debugging/logging)
|
| 328 |
+
"""
|
| 329 |
+
try:
|
| 330 |
+
matches = []
|
| 331 |
+
for keyword in keywords:
|
| 332 |
+
if keyword in text:
|
| 333 |
+
matches.append(keyword)
|
| 334 |
+
return matches
|
| 335 |
+
except Exception as e:
|
| 336 |
+
logger.error(f"Error finding keyword matches: {e}", exc_info=True)
|
| 337 |
+
return []
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
def get_intent_description(intent: IntentType) -> str:
|
| 341 |
+
"""
|
| 342 |
+
🗣️ Penny's plain-English explanation of what each intent does.
|
| 343 |
+
Useful for help systems and debugging.
|
| 344 |
+
|
| 345 |
+
Args:
|
| 346 |
+
intent: IntentType enum value
|
| 347 |
+
|
| 348 |
+
Returns:
|
| 349 |
+
Human-readable description of the intent
|
| 350 |
+
"""
|
| 351 |
+
descriptions = {
|
| 352 |
+
IntentType.WEATHER: "Get current weather conditions and forecasts for your area",
|
| 353 |
+
IntentType.GREETING: "Start a conversation with Penny",
|
| 354 |
+
IntentType.LOCAL_RESOURCES: "Find community resources like shelters, libraries, and services",
|
| 355 |
+
IntentType.EVENTS: "Discover local events and activities happening in your city",
|
| 356 |
+
IntentType.TRANSLATION: "Translate text between 27 languages",
|
| 357 |
+
IntentType.SENTIMENT_ANALYSIS: "Analyze the emotional tone of text",
|
| 358 |
+
IntentType.BIAS_DETECTION: "Check text for potential bias or fairness issues",
|
| 359 |
+
IntentType.DOCUMENT_PROCESSING: "Process PDFs and forms to extract information",
|
| 360 |
+
IntentType.HELP: "Learn how to use Penny's features",
|
| 361 |
+
IntentType.EMERGENCY: "Connect with emergency services and crisis support",
|
| 362 |
+
IntentType.UNKNOWN: "I'm not sure what you're asking — can you rephrase?"
|
| 363 |
+
}
|
| 364 |
+
return descriptions.get(intent, "Unknown intent type")
|
| 365 |
+
|
| 366 |
+
|
| 367 |
+
def get_all_supported_intents() -> Dict[str, str]:
|
| 368 |
+
"""
|
| 369 |
+
📋 Returns all supported intents with descriptions.
|
| 370 |
+
Useful for /help endpoints and documentation.
|
| 371 |
+
|
| 372 |
+
Returns:
|
| 373 |
+
Dictionary mapping intent values to descriptions
|
| 374 |
+
"""
|
| 375 |
+
try:
|
| 376 |
+
return {
|
| 377 |
+
intent.value: get_intent_description(intent)
|
| 378 |
+
for intent in IntentType
|
| 379 |
+
if intent != IntentType.UNKNOWN
|
| 380 |
+
}
|
| 381 |
+
except Exception as e:
|
| 382 |
+
logger.error(f"Error getting supported intents: {e}", exc_info=True)
|
| 383 |
+
return {}
|
| 384 |
+
|
| 385 |
+
|
| 386 |
+
# --- FUTURE ML UPGRADE HOOK ---
|
| 387 |
+
def classify_intent_ml(message: str, use_embedding_model: bool = False) -> IntentMatch:
|
| 388 |
+
"""
|
| 389 |
+
🔮 PLACEHOLDER for future ML-based classification.
|
| 390 |
+
|
| 391 |
+
When ready to upgrade from keyword matching to embeddings:
|
| 392 |
+
1. Load Gemma-7B or sentence-transformers model
|
| 393 |
+
2. Generate message embeddings
|
| 394 |
+
3. Compare to intent prototype embeddings
|
| 395 |
+
4. Return top match with confidence score
|
| 396 |
+
|
| 397 |
+
Args:
|
| 398 |
+
message: User's query
|
| 399 |
+
use_embedding_model: If True, use ML model (not implemented yet)
|
| 400 |
+
|
| 401 |
+
Returns:
|
| 402 |
+
IntentMatch object (currently falls back to rule-based)
|
| 403 |
+
"""
|
| 404 |
+
|
| 405 |
+
if use_embedding_model:
|
| 406 |
+
logger.warning("ML-based classification not yet implemented. Falling back to rules.")
|
| 407 |
+
|
| 408 |
+
# Fallback to rule-based for now
|
| 409 |
+
return classify_intent_detailed(message)
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
# --- TESTING & VALIDATION ---
|
| 413 |
+
def validate_intent_patterns() -> Dict[str, List[str]]:
|
| 414 |
+
"""
|
| 415 |
+
🧪 Validates that all intent patterns are properly configured.
|
| 416 |
+
Returns any overlapping keywords that might cause conflicts.
|
| 417 |
+
|
| 418 |
+
Returns:
|
| 419 |
+
Dictionary of overlapping keywords between intent pairs
|
| 420 |
+
"""
|
| 421 |
+
try:
|
| 422 |
+
all_patterns = {
|
| 423 |
+
"emergency": IntentPatterns.EMERGENCY,
|
| 424 |
+
"translation": IntentPatterns.TRANSLATION,
|
| 425 |
+
"document": IntentPatterns.DOCUMENT_PROCESSING,
|
| 426 |
+
"sentiment": IntentPatterns.SENTIMENT_ANALYSIS,
|
| 427 |
+
"bias": IntentPatterns.BIAS_DETECTION,
|
| 428 |
+
"weather": IntentPatterns.WEATHER,
|
| 429 |
+
"events": IntentPatterns.EVENTS,
|
| 430 |
+
"resources": IntentPatterns.LOCAL_RESOURCES,
|
| 431 |
+
"greeting": IntentPatterns.GREETING,
|
| 432 |
+
"help": IntentPatterns.HELP
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
overlaps = {}
|
| 436 |
+
|
| 437 |
+
# Check for keyword overlap between different intents
|
| 438 |
+
for intent1, keywords1 in all_patterns.items():
|
| 439 |
+
for intent2, keywords2 in all_patterns.items():
|
| 440 |
+
if intent1 >= intent2: # Avoid duplicate comparisons
|
| 441 |
+
continue
|
| 442 |
+
|
| 443 |
+
overlap = set(keywords1) & set(keywords2)
|
| 444 |
+
if overlap:
|
| 445 |
+
key = f"{intent1}_vs_{intent2}"
|
| 446 |
+
overlaps[key] = list(overlap)
|
| 447 |
+
|
| 448 |
+
if overlaps:
|
| 449 |
+
logger.warning(f"Found keyword overlaps between intents: {overlaps}")
|
| 450 |
+
|
| 451 |
+
return overlaps
|
| 452 |
+
|
| 453 |
+
except Exception as e:
|
| 454 |
+
logger.error(f"Error validating intent patterns: {e}", exc_info=True)
|
| 455 |
+
return {}
|
| 456 |
+
|
| 457 |
+
|
| 458 |
+
# --- LOGGING SAMPLE CLASSIFICATIONS (For monitoring) ---
|
| 459 |
+
def log_intent_classification(message: str, result: IntentMatch) -> None:
|
| 460 |
+
"""
|
| 461 |
+
📊 Logs classification results for Azure Application Insights.
|
| 462 |
+
Helps track intent distribution and confidence patterns.
|
| 463 |
+
|
| 464 |
+
Args:
|
| 465 |
+
message: Original user message (truncated for PII safety)
|
| 466 |
+
result: IntentMatch classification result
|
| 467 |
+
"""
|
| 468 |
+
try:
|
| 469 |
+
# Truncate message for PII safety
|
| 470 |
+
safe_message = message[:50] + "..." if len(message) > 50 else message
|
| 471 |
+
|
| 472 |
+
logger.info(
|
| 473 |
+
f"Intent classified | "
|
| 474 |
+
f"intent={result.intent.value} | "
|
| 475 |
+
f"confidence={result.confidence:.2f} | "
|
| 476 |
+
f"compound={result.is_compound} | "
|
| 477 |
+
f"keywords={result.matched_keywords[:5]} | " # Limit logged keywords
|
| 478 |
+
f"message_preview='{safe_message}'"
|
| 479 |
+
)
|
| 480 |
+
except Exception as e:
|
| 481 |
+
logger.error(f"Error logging intent classification: {e}", exc_info=True)
|
layoutlm_utils.py
ADDED
|
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/layoutlm/layoutlm_utils.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
LayoutLM Model Utilities for PENNY Project
|
| 5 |
+
Handles document structure extraction and field recognition for civic forms and documents.
|
| 6 |
+
Provides async document processing with structured error handling and logging.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import time
|
| 11 |
+
from typing import Dict, Any, Optional, List
|
| 12 |
+
from io import BytesIO
|
| 13 |
+
|
| 14 |
+
# --- Logging Imports ---
|
| 15 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 16 |
+
|
| 17 |
+
# --- Model Loader Import ---
|
| 18 |
+
try:
|
| 19 |
+
from app.model_loader import load_model_pipeline
|
| 20 |
+
MODEL_LOADER_AVAILABLE = True
|
| 21 |
+
except ImportError:
|
| 22 |
+
MODEL_LOADER_AVAILABLE = False
|
| 23 |
+
import logging
|
| 24 |
+
logging.getLogger(__name__).warning("Could not import load_model_pipeline. LayoutLM service unavailable.")
|
| 25 |
+
|
| 26 |
+
# Global variable to store the loaded pipeline for re-use
|
| 27 |
+
LAYOUTLM_PIPELINE: Optional[Any] = None
|
| 28 |
+
AGENT_NAME = "penny-doc-agent"
|
| 29 |
+
INITIALIZATION_ATTEMPTED = False
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _initialize_layoutlm_pipeline() -> bool:
|
| 33 |
+
"""
|
| 34 |
+
Initializes the LayoutLM pipeline only once.
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
bool: True if initialization succeeded, False otherwise.
|
| 38 |
+
"""
|
| 39 |
+
global LAYOUTLM_PIPELINE, INITIALIZATION_ATTEMPTED
|
| 40 |
+
|
| 41 |
+
if INITIALIZATION_ATTEMPTED:
|
| 42 |
+
return LAYOUTLM_PIPELINE is not None
|
| 43 |
+
|
| 44 |
+
INITIALIZATION_ATTEMPTED = True
|
| 45 |
+
|
| 46 |
+
if not MODEL_LOADER_AVAILABLE:
|
| 47 |
+
log_interaction(
|
| 48 |
+
intent="layoutlm_initialization",
|
| 49 |
+
success=False,
|
| 50 |
+
error="model_loader unavailable"
|
| 51 |
+
)
|
| 52 |
+
return False
|
| 53 |
+
|
| 54 |
+
try:
|
| 55 |
+
log_interaction(
|
| 56 |
+
intent="layoutlm_initialization",
|
| 57 |
+
success=None,
|
| 58 |
+
details=f"Loading {AGENT_NAME}"
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
LAYOUTLM_PIPELINE = load_model_pipeline(AGENT_NAME)
|
| 62 |
+
|
| 63 |
+
if LAYOUTLM_PIPELINE is None:
|
| 64 |
+
log_interaction(
|
| 65 |
+
intent="layoutlm_initialization",
|
| 66 |
+
success=False,
|
| 67 |
+
error="Pipeline returned None"
|
| 68 |
+
)
|
| 69 |
+
return False
|
| 70 |
+
|
| 71 |
+
log_interaction(
|
| 72 |
+
intent="layoutlm_initialization",
|
| 73 |
+
success=True,
|
| 74 |
+
details=f"Model {AGENT_NAME} loaded successfully"
|
| 75 |
+
)
|
| 76 |
+
return True
|
| 77 |
+
|
| 78 |
+
except Exception as e:
|
| 79 |
+
log_interaction(
|
| 80 |
+
intent="layoutlm_initialization",
|
| 81 |
+
success=False,
|
| 82 |
+
error=str(e)
|
| 83 |
+
)
|
| 84 |
+
return False
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
# Attempt initialization at module load
|
| 88 |
+
_initialize_layoutlm_pipeline()
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def is_layoutlm_available() -> bool:
|
| 92 |
+
"""
|
| 93 |
+
Check if LayoutLM service is available.
|
| 94 |
+
|
| 95 |
+
Returns:
|
| 96 |
+
bool: True if LayoutLM pipeline is loaded and ready.
|
| 97 |
+
"""
|
| 98 |
+
return LAYOUTLM_PIPELINE is not None
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
async def extract_document_data(
|
| 102 |
+
file_bytes: bytes,
|
| 103 |
+
file_name: str,
|
| 104 |
+
tenant_id: Optional[str] = None
|
| 105 |
+
) -> Dict[str, Any]:
|
| 106 |
+
"""
|
| 107 |
+
Processes a document (e.g., PDF, image) using LayoutLM to extract structured data.
|
| 108 |
+
|
| 109 |
+
Args:
|
| 110 |
+
file_bytes: The raw bytes of the uploaded file.
|
| 111 |
+
file_name: The original name of the file (e.g., form.pdf).
|
| 112 |
+
tenant_id: Optional tenant identifier for logging.
|
| 113 |
+
|
| 114 |
+
Returns:
|
| 115 |
+
A dictionary containing:
|
| 116 |
+
- status (str): "success" or "error"
|
| 117 |
+
- extracted_fields (dict, optional): Extracted key-value pairs
|
| 118 |
+
- available (bool): Whether the service was available
|
| 119 |
+
- message (str, optional): Error message if extraction failed
|
| 120 |
+
- response_time_ms (int, optional): Processing time in milliseconds
|
| 121 |
+
"""
|
| 122 |
+
start_time = time.time()
|
| 123 |
+
|
| 124 |
+
global LAYOUTLM_PIPELINE
|
| 125 |
+
|
| 126 |
+
# Check availability
|
| 127 |
+
if not is_layoutlm_available():
|
| 128 |
+
log_interaction(
|
| 129 |
+
intent="layoutlm_extract",
|
| 130 |
+
tenant_id=tenant_id,
|
| 131 |
+
success=False,
|
| 132 |
+
error="LayoutLM pipeline not available",
|
| 133 |
+
fallback_used=True
|
| 134 |
+
)
|
| 135 |
+
return {
|
| 136 |
+
"status": "error",
|
| 137 |
+
"available": False,
|
| 138 |
+
"message": "Document processing is temporarily unavailable. Please try uploading your document again in a moment!"
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
# Validate inputs
|
| 142 |
+
if not file_bytes or not isinstance(file_bytes, bytes):
|
| 143 |
+
log_interaction(
|
| 144 |
+
intent="layoutlm_extract",
|
| 145 |
+
tenant_id=tenant_id,
|
| 146 |
+
success=False,
|
| 147 |
+
error="Invalid file_bytes provided"
|
| 148 |
+
)
|
| 149 |
+
return {
|
| 150 |
+
"status": "error",
|
| 151 |
+
"available": True,
|
| 152 |
+
"message": "I didn't receive valid document data. Could you try uploading your file again?"
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
if not file_name or not isinstance(file_name, str):
|
| 156 |
+
log_interaction(
|
| 157 |
+
intent="layoutlm_extract",
|
| 158 |
+
tenant_id=tenant_id,
|
| 159 |
+
success=False,
|
| 160 |
+
error="Invalid file_name provided"
|
| 161 |
+
)
|
| 162 |
+
return {
|
| 163 |
+
"status": "error",
|
| 164 |
+
"available": True,
|
| 165 |
+
"message": "I need a valid file name to process your document. Please try again!"
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
# Check file size (prevent processing extremely large files)
|
| 169 |
+
file_size_mb = len(file_bytes) / (1024 * 1024)
|
| 170 |
+
if file_size_mb > 50: # 50 MB limit
|
| 171 |
+
log_interaction(
|
| 172 |
+
intent="layoutlm_extract",
|
| 173 |
+
tenant_id=tenant_id,
|
| 174 |
+
success=False,
|
| 175 |
+
error=f"File too large: {file_size_mb:.2f}MB",
|
| 176 |
+
file_name=sanitize_for_logging(file_name)
|
| 177 |
+
)
|
| 178 |
+
return {
|
| 179 |
+
"status": "error",
|
| 180 |
+
"available": True,
|
| 181 |
+
"message": f"Your file is too large ({file_size_mb:.1f}MB). Please upload a document smaller than 50MB."
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
try:
|
| 185 |
+
# --- Real-world step (PLACEHOLDER) ---
|
| 186 |
+
# In a real implementation, you would:
|
| 187 |
+
# 1. Use a library (e.g., PyMuPDF, pdf2image) to convert PDF bytes to image(s).
|
| 188 |
+
# 2. Use PIL/Pillow to load the image(s) from bytes.
|
| 189 |
+
# 3. Pass the PIL Image object to the LayoutLM pipeline.
|
| 190 |
+
|
| 191 |
+
# For now, we use a simple mock placeholder for the image object:
|
| 192 |
+
image_mock = {
|
| 193 |
+
"file_name": file_name,
|
| 194 |
+
"byte_size": len(file_bytes)
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
loop = asyncio.get_event_loop()
|
| 198 |
+
|
| 199 |
+
# Run model inference in thread executor
|
| 200 |
+
results = await loop.run_in_executor(
|
| 201 |
+
None,
|
| 202 |
+
lambda: LAYOUTLM_PIPELINE(image_mock)
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 206 |
+
|
| 207 |
+
# Validate results
|
| 208 |
+
if not results or not isinstance(results, list):
|
| 209 |
+
log_interaction(
|
| 210 |
+
intent="layoutlm_extract",
|
| 211 |
+
tenant_id=tenant_id,
|
| 212 |
+
success=False,
|
| 213 |
+
error="Unexpected model output format",
|
| 214 |
+
response_time_ms=response_time_ms,
|
| 215 |
+
file_name=sanitize_for_logging(file_name)
|
| 216 |
+
)
|
| 217 |
+
return {
|
| 218 |
+
"status": "error",
|
| 219 |
+
"available": True,
|
| 220 |
+
"message": "I had trouble understanding the document structure. The file might be corrupted or in an unsupported format."
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
# Convert model output (list of dicts) into a clean key-value format
|
| 224 |
+
extracted_data = {}
|
| 225 |
+
for item in results:
|
| 226 |
+
if isinstance(item, dict) and 'label' in item and 'text' in item:
|
| 227 |
+
label_key = item['label'].lower().strip()
|
| 228 |
+
text_value = str(item['text']).strip()
|
| 229 |
+
|
| 230 |
+
# Avoid empty values
|
| 231 |
+
if text_value:
|
| 232 |
+
extracted_data[label_key] = text_value
|
| 233 |
+
|
| 234 |
+
# Log slow processing
|
| 235 |
+
if response_time_ms > 10000: # 10 seconds
|
| 236 |
+
log_interaction(
|
| 237 |
+
intent="layoutlm_extract_slow",
|
| 238 |
+
tenant_id=tenant_id,
|
| 239 |
+
success=True,
|
| 240 |
+
response_time_ms=response_time_ms,
|
| 241 |
+
details="Slow document processing detected",
|
| 242 |
+
file_name=sanitize_for_logging(file_name)
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
log_interaction(
|
| 246 |
+
intent="layoutlm_extract",
|
| 247 |
+
tenant_id=tenant_id,
|
| 248 |
+
success=True,
|
| 249 |
+
response_time_ms=response_time_ms,
|
| 250 |
+
file_name=sanitize_for_logging(file_name),
|
| 251 |
+
fields_extracted=len(extracted_data)
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
return {
|
| 255 |
+
"status": "success",
|
| 256 |
+
"extracted_fields": extracted_data,
|
| 257 |
+
"available": True,
|
| 258 |
+
"response_time_ms": response_time_ms,
|
| 259 |
+
"fields_count": len(extracted_data)
|
| 260 |
+
}
|
| 261 |
+
|
| 262 |
+
except asyncio.CancelledError:
|
| 263 |
+
log_interaction(
|
| 264 |
+
intent="layoutlm_extract",
|
| 265 |
+
tenant_id=tenant_id,
|
| 266 |
+
success=False,
|
| 267 |
+
error="Processing cancelled",
|
| 268 |
+
file_name=sanitize_for_logging(file_name)
|
| 269 |
+
)
|
| 270 |
+
raise
|
| 271 |
+
|
| 272 |
+
except Exception as e:
|
| 273 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 274 |
+
|
| 275 |
+
log_interaction(
|
| 276 |
+
intent="layoutlm_extract",
|
| 277 |
+
tenant_id=tenant_id,
|
| 278 |
+
success=False,
|
| 279 |
+
error=str(e),
|
| 280 |
+
response_time_ms=response_time_ms,
|
| 281 |
+
file_name=sanitize_for_logging(file_name),
|
| 282 |
+
fallback_used=True
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
return {
|
| 286 |
+
"status": "error",
|
| 287 |
+
"available": False,
|
| 288 |
+
"message": f"I encountered an issue while processing your document. Please try again, or contact support if this continues!",
|
| 289 |
+
"error": str(e),
|
| 290 |
+
"response_time_ms": response_time_ms
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
async def validate_document_fields(
|
| 295 |
+
extracted_fields: Dict[str, str],
|
| 296 |
+
required_fields: List[str],
|
| 297 |
+
tenant_id: Optional[str] = None
|
| 298 |
+
) -> Dict[str, Any]:
|
| 299 |
+
"""
|
| 300 |
+
Validates that required fields were successfully extracted from a document.
|
| 301 |
+
|
| 302 |
+
Args:
|
| 303 |
+
extracted_fields: Dictionary of extracted field names and values.
|
| 304 |
+
required_fields: List of field names that must be present.
|
| 305 |
+
tenant_id: Optional tenant identifier for logging.
|
| 306 |
+
|
| 307 |
+
Returns:
|
| 308 |
+
A dictionary containing:
|
| 309 |
+
- valid (bool): Whether all required fields are present
|
| 310 |
+
- missing_fields (list): List of missing required fields
|
| 311 |
+
- present_fields (list): List of found required fields
|
| 312 |
+
"""
|
| 313 |
+
if not isinstance(extracted_fields, dict):
|
| 314 |
+
log_interaction(
|
| 315 |
+
intent="layoutlm_validate",
|
| 316 |
+
tenant_id=tenant_id,
|
| 317 |
+
success=False,
|
| 318 |
+
error="Invalid extracted_fields type"
|
| 319 |
+
)
|
| 320 |
+
return {
|
| 321 |
+
"valid": False,
|
| 322 |
+
"missing_fields": required_fields,
|
| 323 |
+
"present_fields": []
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
if not isinstance(required_fields, list):
|
| 327 |
+
log_interaction(
|
| 328 |
+
intent="layoutlm_validate",
|
| 329 |
+
tenant_id=tenant_id,
|
| 330 |
+
success=False,
|
| 331 |
+
error="Invalid required_fields type"
|
| 332 |
+
)
|
| 333 |
+
return {
|
| 334 |
+
"valid": False,
|
| 335 |
+
"missing_fields": [],
|
| 336 |
+
"present_fields": []
|
| 337 |
+
}
|
| 338 |
+
|
| 339 |
+
# Normalize field names for case-insensitive comparison
|
| 340 |
+
extracted_keys = {k.lower().strip() for k in extracted_fields.keys()}
|
| 341 |
+
required_keys = {f.lower().strip() for f in required_fields}
|
| 342 |
+
|
| 343 |
+
present_fields = [f for f in required_fields if f.lower().strip() in extracted_keys]
|
| 344 |
+
missing_fields = [f for f in required_fields if f.lower().strip() not in extracted_keys]
|
| 345 |
+
|
| 346 |
+
is_valid = len(missing_fields) == 0
|
| 347 |
+
|
| 348 |
+
log_interaction(
|
| 349 |
+
intent="layoutlm_validate",
|
| 350 |
+
tenant_id=tenant_id,
|
| 351 |
+
success=is_valid,
|
| 352 |
+
details=f"Validated {len(present_fields)}/{len(required_fields)} required fields"
|
| 353 |
+
)
|
| 354 |
+
|
| 355 |
+
return {
|
| 356 |
+
"valid": is_valid,
|
| 357 |
+
"missing_fields": missing_fields,
|
| 358 |
+
"present_fields": present_fields
|
| 359 |
+
}
|
location_utils.py
ADDED
|
@@ -0,0 +1,717 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/location_utils.py
|
| 2 |
+
"""
|
| 3 |
+
🗺️ Penny's Location Intelligence System
|
| 4 |
+
Handles city detection, tenant routing, and geographic data loading.
|
| 5 |
+
|
| 6 |
+
MISSION: Connect residents to the right local resources, regardless of how
|
| 7 |
+
they describe their location — whether it's "Atlanta", "ATL", "30303", or "near me".
|
| 8 |
+
|
| 9 |
+
CURRENT: Rule-based city matching with 6 supported cities
|
| 10 |
+
FUTURE: Will add ZIP→city mapping, geocoding API, and user location preferences
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import re
|
| 14 |
+
import json
|
| 15 |
+
import os
|
| 16 |
+
import logging
|
| 17 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
from dataclasses import dataclass
|
| 20 |
+
from enum import Enum
|
| 21 |
+
|
| 22 |
+
# --- LOGGING SETUP (Azure-friendly) ---
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
# --- BASE PATHS (OS-agnostic for Azure/Windows/Linux) ---
|
| 26 |
+
BASE_DIR = Path(__file__).parent.parent.resolve()
|
| 27 |
+
DATA_PATH = BASE_DIR / "data"
|
| 28 |
+
EVENTS_PATH = DATA_PATH / "events"
|
| 29 |
+
RESOURCES_PATH = DATA_PATH / "resources"
|
| 30 |
+
|
| 31 |
+
# Ensure critical directories exist (Azure deployment safety)
|
| 32 |
+
for path in [DATA_PATH, EVENTS_PATH, RESOURCES_PATH]:
|
| 33 |
+
path.mkdir(parents=True, exist_ok=True)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ============================================================
|
| 37 |
+
# CITY REGISTRY (Penny's Supported Cities)
|
| 38 |
+
# ============================================================
|
| 39 |
+
|
| 40 |
+
@dataclass
|
| 41 |
+
class CityInfo:
|
| 42 |
+
"""
|
| 43 |
+
Structured information about a city Penny supports.
|
| 44 |
+
Makes it easy to add new cities with metadata.
|
| 45 |
+
"""
|
| 46 |
+
tenant_id: str # Standard format: cityname_state (e.g., "atlanta_ga")
|
| 47 |
+
full_name: str # Display name: "Atlanta, GA"
|
| 48 |
+
state: str # Two-letter state code
|
| 49 |
+
aliases: List[str] # Common variations users might say
|
| 50 |
+
timezone: str # IANA timezone (e.g., "America/New_York")
|
| 51 |
+
lat: Optional[float] = None # For weather API fallback
|
| 52 |
+
lon: Optional[float] = None
|
| 53 |
+
|
| 54 |
+
def __post_init__(self):
|
| 55 |
+
# Normalize all aliases to lowercase for matching
|
| 56 |
+
self.aliases = [alias.lower().strip() for alias in self.aliases]
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class SupportedCities:
|
| 60 |
+
"""
|
| 61 |
+
🏙️ Penny's city registry.
|
| 62 |
+
Each city gets standardized metadata for consistent routing.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
ATLANTA = CityInfo(
|
| 66 |
+
tenant_id="atlanta_ga",
|
| 67 |
+
full_name="Atlanta, GA",
|
| 68 |
+
state="GA",
|
| 69 |
+
timezone="America/New_York",
|
| 70 |
+
lat=33.7490,
|
| 71 |
+
lon=-84.3880,
|
| 72 |
+
aliases=[
|
| 73 |
+
"atlanta", "atl", "atlanta ga", "atlanta, ga",
|
| 74 |
+
"city of atlanta", "hotlanta", "the atl"
|
| 75 |
+
]
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
BIRMINGHAM = CityInfo(
|
| 79 |
+
tenant_id="birmingham_al",
|
| 80 |
+
full_name="Birmingham, AL",
|
| 81 |
+
state="AL",
|
| 82 |
+
timezone="America/Chicago",
|
| 83 |
+
lat=33.5207,
|
| 84 |
+
lon=-86.8025,
|
| 85 |
+
aliases=[
|
| 86 |
+
"birmingham", "birmingham al", "birmingham, al",
|
| 87 |
+
"city of birmingham", "bham"
|
| 88 |
+
]
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
CHESTERFIELD = CityInfo(
|
| 92 |
+
tenant_id="chesterfield_va",
|
| 93 |
+
full_name="Chesterfield, VA",
|
| 94 |
+
state="VA",
|
| 95 |
+
timezone="America/New_York",
|
| 96 |
+
lat=37.3771,
|
| 97 |
+
lon=-77.5047,
|
| 98 |
+
aliases=[
|
| 99 |
+
"chesterfield", "chesterfield va", "chesterfield, va",
|
| 100 |
+
"chesterfield county"
|
| 101 |
+
]
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
EL_PASO = CityInfo(
|
| 105 |
+
tenant_id="el_paso_tx",
|
| 106 |
+
full_name="El Paso, TX",
|
| 107 |
+
state="TX",
|
| 108 |
+
timezone="America/Denver",
|
| 109 |
+
lat=31.7619,
|
| 110 |
+
lon=-106.4850,
|
| 111 |
+
aliases=[
|
| 112 |
+
"el paso", "el paso tx", "el paso, tx",
|
| 113 |
+
"city of el paso", "elpaso"
|
| 114 |
+
]
|
| 115 |
+
)
|
| 116 |
+
|
| 117 |
+
PROVIDENCE = CityInfo(
|
| 118 |
+
tenant_id="providence_ri",
|
| 119 |
+
full_name="Providence, RI",
|
| 120 |
+
state="RI",
|
| 121 |
+
timezone="America/New_York",
|
| 122 |
+
lat=41.8240,
|
| 123 |
+
lon=-71.4128,
|
| 124 |
+
aliases=[
|
| 125 |
+
"providence", "providence ri", "providence, ri",
|
| 126 |
+
"city of providence", "pvd"
|
| 127 |
+
]
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
SEATTLE = CityInfo(
|
| 131 |
+
tenant_id="seattle_wa",
|
| 132 |
+
full_name="Seattle, WA",
|
| 133 |
+
state="WA",
|
| 134 |
+
timezone="America/Los_Angeles",
|
| 135 |
+
lat=47.6062,
|
| 136 |
+
lon=-122.3321,
|
| 137 |
+
aliases=[
|
| 138 |
+
"seattle", "seattle wa", "seattle, wa",
|
| 139 |
+
"city of seattle", "emerald city", "sea"
|
| 140 |
+
]
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
@classmethod
|
| 144 |
+
def get_all_cities(cls) -> List[CityInfo]:
|
| 145 |
+
"""Returns list of all supported cities."""
|
| 146 |
+
return [
|
| 147 |
+
cls.ATLANTA,
|
| 148 |
+
cls.BIRMINGHAM,
|
| 149 |
+
cls.CHESTERFIELD,
|
| 150 |
+
cls.EL_PASO,
|
| 151 |
+
cls.PROVIDENCE,
|
| 152 |
+
cls.SEATTLE
|
| 153 |
+
]
|
| 154 |
+
|
| 155 |
+
@classmethod
|
| 156 |
+
def get_city_by_tenant_id(cls, tenant_id: str) -> Optional[CityInfo]:
|
| 157 |
+
"""Lookup city info by tenant ID."""
|
| 158 |
+
for city in cls.get_all_cities():
|
| 159 |
+
if city.tenant_id == tenant_id:
|
| 160 |
+
return city
|
| 161 |
+
return None
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# ============================================================
|
| 165 |
+
# BUILD DYNAMIC CITY PATTERNS (from CityInfo registry)
|
| 166 |
+
# ============================================================
|
| 167 |
+
|
| 168 |
+
def _build_city_patterns() -> Dict[str, str]:
|
| 169 |
+
"""
|
| 170 |
+
Generates city matching dictionary from the CityInfo registry.
|
| 171 |
+
This keeps the pattern matching backward-compatible with existing code.
|
| 172 |
+
"""
|
| 173 |
+
patterns = {}
|
| 174 |
+
for city in SupportedCities.get_all_cities():
|
| 175 |
+
for alias in city.aliases:
|
| 176 |
+
patterns[alias] = city.tenant_id
|
| 177 |
+
return patterns
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
# Dynamic pattern dictionary (auto-generated from city registry)
|
| 181 |
+
REAL_CITY_PATTERNS = _build_city_patterns()
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
# ============================================================
|
| 185 |
+
# LOCATION DETECTION ENUMS
|
| 186 |
+
# ============================================================
|
| 187 |
+
|
| 188 |
+
class LocationStatus(str, Enum):
|
| 189 |
+
"""
|
| 190 |
+
Status codes for location detection results.
|
| 191 |
+
"""
|
| 192 |
+
FOUND = "found" # Valid city matched
|
| 193 |
+
ZIP_DETECTED = "zip_detected" # ZIP code found (needs mapping)
|
| 194 |
+
USER_LOCATION_NEEDED = "user_location_needed" # "near me" detected
|
| 195 |
+
UNKNOWN = "unknown" # No match found
|
| 196 |
+
AMBIGUOUS = "ambiguous" # Multiple possible matches
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
@dataclass
|
| 200 |
+
class LocationMatch:
|
| 201 |
+
"""
|
| 202 |
+
Structured result from location detection.
|
| 203 |
+
Includes confidence and matched patterns for debugging.
|
| 204 |
+
"""
|
| 205 |
+
status: LocationStatus
|
| 206 |
+
tenant_id: Optional[str] = None
|
| 207 |
+
city_info: Optional[CityInfo] = None
|
| 208 |
+
confidence: float = 0.0 # 0.0 - 1.0
|
| 209 |
+
matched_pattern: Optional[str] = None
|
| 210 |
+
alternatives: List[str] = None
|
| 211 |
+
|
| 212 |
+
def __post_init__(self):
|
| 213 |
+
if self.alternatives is None:
|
| 214 |
+
self.alternatives = []
|
| 215 |
+
|
| 216 |
+
|
| 217 |
+
# ============================================================
|
| 218 |
+
# ZIP CODE PATTERNS (for future expansion)
|
| 219 |
+
# ============================================================
|
| 220 |
+
|
| 221 |
+
ZIP_PATTERN = re.compile(r"\b\d{5}(?:-\d{4})?\b") # Matches 12345 or 12345-6789
|
| 222 |
+
|
| 223 |
+
# Future ZIP → City mapping (placeholder)
|
| 224 |
+
ZIP_TO_CITY_MAP: Dict[str, str] = {
|
| 225 |
+
# Atlanta metro
|
| 226 |
+
"30303": "atlanta_ga",
|
| 227 |
+
"30318": "atlanta_ga",
|
| 228 |
+
"30309": "atlanta_ga",
|
| 229 |
+
|
| 230 |
+
# Birmingham metro
|
| 231 |
+
"35203": "birmingham_al",
|
| 232 |
+
"35233": "birmingham_al",
|
| 233 |
+
|
| 234 |
+
# Chesterfield County
|
| 235 |
+
"23832": "chesterfield_va",
|
| 236 |
+
"23838": "chesterfield_va",
|
| 237 |
+
|
| 238 |
+
# El Paso
|
| 239 |
+
"79901": "el_paso_tx",
|
| 240 |
+
"79936": "el_paso_tx",
|
| 241 |
+
|
| 242 |
+
# Providence
|
| 243 |
+
"02903": "providence_ri",
|
| 244 |
+
"02904": "providence_ri",
|
| 245 |
+
|
| 246 |
+
# Seattle metro
|
| 247 |
+
"98101": "seattle_wa",
|
| 248 |
+
"98104": "seattle_wa",
|
| 249 |
+
"98122": "seattle_wa",
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
|
| 253 |
+
# ============================================================
|
| 254 |
+
# MAIN CITY EXTRACTION LOGIC (Enhanced)
|
| 255 |
+
# ============================================================
|
| 256 |
+
|
| 257 |
+
def extract_city_name(text: str) -> str:
|
| 258 |
+
"""
|
| 259 |
+
🎯 BACKWARD-COMPATIBLE location extraction (returns tenant_id string).
|
| 260 |
+
|
| 261 |
+
Extracts tenant ID (e.g., 'atlanta_ga') from user input.
|
| 262 |
+
|
| 263 |
+
Args:
|
| 264 |
+
text: User's location input (e.g., "Atlanta", "30303", "near me")
|
| 265 |
+
|
| 266 |
+
Returns:
|
| 267 |
+
Tenant ID string or status code:
|
| 268 |
+
- Valid tenant_id (e.g., "atlanta_ga")
|
| 269 |
+
- "zip_detected" (ZIP code found, needs mapping)
|
| 270 |
+
- "user_location_needed" ("near me" detected)
|
| 271 |
+
- "unknown" (no match)
|
| 272 |
+
"""
|
| 273 |
+
result = extract_location_detailed(text)
|
| 274 |
+
return result.tenant_id or result.status.value
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
def extract_location_detailed(text: str) -> LocationMatch:
|
| 278 |
+
"""
|
| 279 |
+
🧠 ENHANCED location extraction with confidence scoring.
|
| 280 |
+
|
| 281 |
+
This function intelligently parses location references and returns
|
| 282 |
+
structured results with metadata for better error handling.
|
| 283 |
+
|
| 284 |
+
Args:
|
| 285 |
+
text: User's location input
|
| 286 |
+
|
| 287 |
+
Returns:
|
| 288 |
+
LocationMatch object with full detection details
|
| 289 |
+
"""
|
| 290 |
+
|
| 291 |
+
if not text or not text.strip():
|
| 292 |
+
logger.warning("Empty text provided to location extraction")
|
| 293 |
+
return LocationMatch(
|
| 294 |
+
status=LocationStatus.UNKNOWN,
|
| 295 |
+
confidence=0.0
|
| 296 |
+
)
|
| 297 |
+
|
| 298 |
+
lowered = text.lower().strip()
|
| 299 |
+
logger.debug(f"Extracting location from: '{lowered}'")
|
| 300 |
+
|
| 301 |
+
# --- STEP 1: Check for "near me" / location services needed ---
|
| 302 |
+
near_me_phrases = [
|
| 303 |
+
"near me", "my area", "my city", "my neighborhood",
|
| 304 |
+
"where i am", "current location", "my location",
|
| 305 |
+
"around here", "locally", "in my town"
|
| 306 |
+
]
|
| 307 |
+
|
| 308 |
+
if any(phrase in lowered for phrase in near_me_phrases):
|
| 309 |
+
logger.info("User location services required")
|
| 310 |
+
return LocationMatch(
|
| 311 |
+
status=LocationStatus.USER_LOCATION_NEEDED,
|
| 312 |
+
confidence=1.0,
|
| 313 |
+
matched_pattern="near_me_detected"
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
# --- STEP 2: Check for ZIP codes ---
|
| 317 |
+
zip_matches = ZIP_PATTERN.findall(text)
|
| 318 |
+
if zip_matches:
|
| 319 |
+
zip_code = zip_matches[0] # Take first ZIP if multiple
|
| 320 |
+
|
| 321 |
+
# Try to map ZIP to known city
|
| 322 |
+
if zip_code in ZIP_TO_CITY_MAP:
|
| 323 |
+
tenant_id = ZIP_TO_CITY_MAP[zip_code]
|
| 324 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 325 |
+
logger.info(f"ZIP {zip_code} mapped to {tenant_id}")
|
| 326 |
+
return LocationMatch(
|
| 327 |
+
status=LocationStatus.FOUND,
|
| 328 |
+
tenant_id=tenant_id,
|
| 329 |
+
city_info=city_info,
|
| 330 |
+
confidence=0.95,
|
| 331 |
+
matched_pattern=f"zip:{zip_code}"
|
| 332 |
+
)
|
| 333 |
+
else:
|
| 334 |
+
logger.info(f"ZIP code detected but not mapped: {zip_code}")
|
| 335 |
+
return LocationMatch(
|
| 336 |
+
status=LocationStatus.ZIP_DETECTED,
|
| 337 |
+
confidence=0.5,
|
| 338 |
+
matched_pattern=f"zip:{zip_code}"
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
# --- STEP 3: Match against city patterns ---
|
| 342 |
+
matches = []
|
| 343 |
+
for pattern, tenant_id in REAL_CITY_PATTERNS.items():
|
| 344 |
+
if pattern in lowered:
|
| 345 |
+
matches.append((pattern, tenant_id))
|
| 346 |
+
|
| 347 |
+
if not matches:
|
| 348 |
+
logger.info(f"No city match found for: '{lowered}'")
|
| 349 |
+
return LocationMatch(
|
| 350 |
+
status=LocationStatus.UNKNOWN,
|
| 351 |
+
confidence=0.0
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
# If multiple matches, pick the longest pattern (most specific)
|
| 355 |
+
# Example: "atlanta" vs "city of atlanta" — pick the longer one
|
| 356 |
+
matches.sort(key=lambda x: len(x[0]), reverse=True)
|
| 357 |
+
best_pattern, best_tenant_id = matches[0]
|
| 358 |
+
|
| 359 |
+
city_info = SupportedCities.get_city_by_tenant_id(best_tenant_id)
|
| 360 |
+
|
| 361 |
+
# Calculate confidence based on match specificity
|
| 362 |
+
confidence = min(len(best_pattern) / len(lowered), 1.0)
|
| 363 |
+
|
| 364 |
+
result = LocationMatch(
|
| 365 |
+
status=LocationStatus.FOUND,
|
| 366 |
+
tenant_id=best_tenant_id,
|
| 367 |
+
city_info=city_info,
|
| 368 |
+
confidence=confidence,
|
| 369 |
+
matched_pattern=best_pattern
|
| 370 |
+
)
|
| 371 |
+
|
| 372 |
+
# Check for ambiguity (multiple different cities matched)
|
| 373 |
+
unique_tenant_ids = set(tid for _, tid in matches)
|
| 374 |
+
if len(unique_tenant_ids) > 1:
|
| 375 |
+
result.status = LocationStatus.AMBIGUOUS
|
| 376 |
+
result.alternatives = [tid for _, tid in matches if tid != best_tenant_id]
|
| 377 |
+
logger.warning(f"Ambiguous location match: {unique_tenant_ids}")
|
| 378 |
+
|
| 379 |
+
logger.info(f"Location matched: {best_tenant_id} (confidence: {confidence:.2f})")
|
| 380 |
+
return result
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
# ============================================================
|
| 384 |
+
# DATA LOADING UTILITIES (Enhanced with error handling)
|
| 385 |
+
# ============================================================
|
| 386 |
+
|
| 387 |
+
def load_city_data(directory: Path, tenant_id: str) -> Dict[str, Any]:
|
| 388 |
+
"""
|
| 389 |
+
🗄️ Generic utility to load JSON data for a given tenant ID.
|
| 390 |
+
|
| 391 |
+
Args:
|
| 392 |
+
directory: Base path (EVENTS_PATH or RESOURCES_PATH)
|
| 393 |
+
tenant_id: City identifier (e.g., 'atlanta_ga')
|
| 394 |
+
|
| 395 |
+
Returns:
|
| 396 |
+
Parsed JSON content as dictionary
|
| 397 |
+
|
| 398 |
+
Raises:
|
| 399 |
+
FileNotFoundError: If the JSON file doesn't exist
|
| 400 |
+
json.JSONDecodeError: If the file is malformed
|
| 401 |
+
"""
|
| 402 |
+
|
| 403 |
+
file_path = directory / f"{tenant_id}.json"
|
| 404 |
+
|
| 405 |
+
if not file_path.exists():
|
| 406 |
+
logger.error(f"Data file not found: {file_path}")
|
| 407 |
+
raise FileNotFoundError(f"Data file not found: {file_path}")
|
| 408 |
+
|
| 409 |
+
try:
|
| 410 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 411 |
+
data = json.load(f)
|
| 412 |
+
logger.debug(f"Loaded data from {file_path}")
|
| 413 |
+
return data
|
| 414 |
+
except json.JSONDecodeError as e:
|
| 415 |
+
logger.error(f"Invalid JSON in {file_path}: {e}")
|
| 416 |
+
raise
|
| 417 |
+
except Exception as e:
|
| 418 |
+
logger.error(f"Error reading {file_path}: {e}", exc_info=True)
|
| 419 |
+
raise
|
| 420 |
+
|
| 421 |
+
|
| 422 |
+
def load_city_events(tenant_id: str) -> Dict[str, Any]:
|
| 423 |
+
"""
|
| 424 |
+
📅 Loads structured event data for a given city.
|
| 425 |
+
|
| 426 |
+
Args:
|
| 427 |
+
tenant_id: City identifier (e.g., 'atlanta_ga')
|
| 428 |
+
|
| 429 |
+
Returns:
|
| 430 |
+
Event data structure with 'events' key containing list of events
|
| 431 |
+
|
| 432 |
+
Example:
|
| 433 |
+
{
|
| 434 |
+
"city": "Atlanta, GA",
|
| 435 |
+
"events": [
|
| 436 |
+
{"name": "Jazz Festival", "category": "outdoor", ...},
|
| 437 |
+
...
|
| 438 |
+
]
|
| 439 |
+
}
|
| 440 |
+
"""
|
| 441 |
+
logger.info(f"Loading events for {tenant_id}")
|
| 442 |
+
return load_city_data(EVENTS_PATH, tenant_id)
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def load_city_resources(tenant_id: str) -> Dict[str, Any]:
|
| 446 |
+
"""
|
| 447 |
+
🏛️ Loads civic resource data for a given city.
|
| 448 |
+
|
| 449 |
+
Args:
|
| 450 |
+
tenant_id: City identifier (e.g., 'atlanta_ga')
|
| 451 |
+
|
| 452 |
+
Returns:
|
| 453 |
+
Resource data structure with categorized resources
|
| 454 |
+
|
| 455 |
+
Example:
|
| 456 |
+
{
|
| 457 |
+
"city": "Atlanta, GA",
|
| 458 |
+
"resources": {
|
| 459 |
+
"shelters": [...],
|
| 460 |
+
"food_banks": [...],
|
| 461 |
+
"libraries": [...]
|
| 462 |
+
}
|
| 463 |
+
}
|
| 464 |
+
"""
|
| 465 |
+
logger.info(f"Loading resources for {tenant_id}")
|
| 466 |
+
return load_city_data(RESOURCES_PATH, tenant_id)
|
| 467 |
+
|
| 468 |
+
|
| 469 |
+
# ============================================================
|
| 470 |
+
# UTILITY FUNCTIONS
|
| 471 |
+
# ============================================================
|
| 472 |
+
|
| 473 |
+
def normalize_location_name(text: str) -> str:
|
| 474 |
+
"""
|
| 475 |
+
🧹 Normalize location names into consistent format.
|
| 476 |
+
Removes spaces, hyphens, and special characters.
|
| 477 |
+
|
| 478 |
+
Example:
|
| 479 |
+
"El Paso, TX" → "elpasotx"
|
| 480 |
+
"Chesterfield County" → "chesterfieldcounty"
|
| 481 |
+
"""
|
| 482 |
+
if not text:
|
| 483 |
+
return ""
|
| 484 |
+
|
| 485 |
+
# Remove punctuation and spaces
|
| 486 |
+
normalized = re.sub(r"[\s\-,\.]+", "", text.lower().strip())
|
| 487 |
+
return normalized
|
| 488 |
+
|
| 489 |
+
|
| 490 |
+
def get_city_coordinates(tenant_id: str) -> Optional[Dict[str, float]]:
|
| 491 |
+
"""
|
| 492 |
+
🗺️ Returns coordinates for a city as a dictionary.
|
| 493 |
+
Useful for weather API calls.
|
| 494 |
+
|
| 495 |
+
Args:
|
| 496 |
+
tenant_id: City identifier
|
| 497 |
+
|
| 498 |
+
Returns:
|
| 499 |
+
Dictionary with "lat" and "lon" keys, or None if not found
|
| 500 |
+
|
| 501 |
+
Note: This function returns a dict for consistency with orchestrator usage.
|
| 502 |
+
Use tuple unpacking: coords = get_city_coordinates(tenant_id); lat, lon = coords["lat"], coords["lon"]
|
| 503 |
+
"""
|
| 504 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 505 |
+
if city_info and city_info.lat is not None and city_info.lon is not None:
|
| 506 |
+
return {"lat": city_info.lat, "lon": city_info.lon}
|
| 507 |
+
return None
|
| 508 |
+
|
| 509 |
+
|
| 510 |
+
def get_city_info(tenant_id: str) -> Optional[Dict[str, Any]]:
|
| 511 |
+
"""
|
| 512 |
+
🏙️ Returns city information dictionary.
|
| 513 |
+
|
| 514 |
+
Args:
|
| 515 |
+
tenant_id: City identifier
|
| 516 |
+
|
| 517 |
+
Returns:
|
| 518 |
+
Dictionary with city information (name, state, coordinates, etc.) or None
|
| 519 |
+
"""
|
| 520 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 521 |
+
if city_info:
|
| 522 |
+
return {
|
| 523 |
+
"tenant_id": city_info.tenant_id,
|
| 524 |
+
"full_name": city_info.full_name,
|
| 525 |
+
"state": city_info.state,
|
| 526 |
+
"timezone": city_info.timezone,
|
| 527 |
+
"lat": city_info.lat,
|
| 528 |
+
"lon": city_info.lon,
|
| 529 |
+
"aliases": city_info.aliases
|
| 530 |
+
}
|
| 531 |
+
return None
|
| 532 |
+
|
| 533 |
+
|
| 534 |
+
def detect_location_from_text(text: str) -> Dict[str, Any]:
|
| 535 |
+
"""
|
| 536 |
+
🔍 Detects location from text input.
|
| 537 |
+
|
| 538 |
+
Args:
|
| 539 |
+
text: User input text
|
| 540 |
+
|
| 541 |
+
Returns:
|
| 542 |
+
Dictionary with keys:
|
| 543 |
+
- found: bool (whether location was detected)
|
| 544 |
+
- tenant_id: str (if found)
|
| 545 |
+
- city_info: dict (if found)
|
| 546 |
+
- confidence: float (0.0-1.0)
|
| 547 |
+
"""
|
| 548 |
+
result = extract_location_detailed(text)
|
| 549 |
+
|
| 550 |
+
return {
|
| 551 |
+
"found": result.status == LocationStatus.FOUND,
|
| 552 |
+
"tenant_id": result.tenant_id,
|
| 553 |
+
"city_info": {
|
| 554 |
+
"tenant_id": result.city_info.tenant_id,
|
| 555 |
+
"full_name": result.city_info.full_name,
|
| 556 |
+
"state": result.city_info.state
|
| 557 |
+
} if result.city_info else None,
|
| 558 |
+
"confidence": result.confidence,
|
| 559 |
+
"status": result.status.value
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
|
| 563 |
+
def validate_coordinates(lat: float, lon: float) -> Tuple[bool, Optional[str]]:
|
| 564 |
+
"""
|
| 565 |
+
✅ Validates latitude and longitude coordinates.
|
| 566 |
+
|
| 567 |
+
Args:
|
| 568 |
+
lat: Latitude (-90 to 90)
|
| 569 |
+
lon: Longitude (-180 to 180)
|
| 570 |
+
|
| 571 |
+
Returns:
|
| 572 |
+
Tuple of (is_valid, error_message)
|
| 573 |
+
- is_valid: True if coordinates are valid
|
| 574 |
+
- error_message: None if valid, error description if invalid
|
| 575 |
+
"""
|
| 576 |
+
if not isinstance(lat, (int, float)) or not isinstance(lon, (int, float)):
|
| 577 |
+
return False, "Coordinates must be numeric values"
|
| 578 |
+
|
| 579 |
+
if not (-90 <= lat <= 90):
|
| 580 |
+
return False, f"Latitude must be between -90 and 90, got {lat}"
|
| 581 |
+
|
| 582 |
+
if not (-180 <= lon <= 180):
|
| 583 |
+
return False, f"Longitude must be between -180 and 180, got {lon}"
|
| 584 |
+
|
| 585 |
+
return True, None
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
def get_city_timezone(tenant_id: str) -> Optional[str]:
|
| 589 |
+
"""
|
| 590 |
+
🕐 Returns IANA timezone string for a city.
|
| 591 |
+
Useful for time-sensitive features (events, business hours).
|
| 592 |
+
|
| 593 |
+
Args:
|
| 594 |
+
tenant_id: City identifier
|
| 595 |
+
|
| 596 |
+
Returns:
|
| 597 |
+
IANA timezone string (e.g., "America/New_York") or None
|
| 598 |
+
"""
|
| 599 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 600 |
+
return city_info.timezone if city_info else None
|
| 601 |
+
|
| 602 |
+
|
| 603 |
+
def validate_tenant_id(tenant_id: str) -> bool:
|
| 604 |
+
"""
|
| 605 |
+
✅ Checks if a tenant_id is valid and supported.
|
| 606 |
+
|
| 607 |
+
Args:
|
| 608 |
+
tenant_id: City identifier to validate
|
| 609 |
+
|
| 610 |
+
Returns:
|
| 611 |
+
True if valid and supported, False otherwise
|
| 612 |
+
"""
|
| 613 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 614 |
+
return city_info is not None
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
def get_all_supported_cities() -> List[Dict[str, str]]:
|
| 618 |
+
"""
|
| 619 |
+
📋 Returns list of all supported cities for API responses.
|
| 620 |
+
|
| 621 |
+
Returns:
|
| 622 |
+
List of city info dictionaries with tenant_id and display name
|
| 623 |
+
|
| 624 |
+
Example:
|
| 625 |
+
[
|
| 626 |
+
{"tenant_id": "atlanta_ga", "name": "Atlanta, GA"},
|
| 627 |
+
{"tenant_id": "seattle_wa", "name": "Seattle, WA"},
|
| 628 |
+
...
|
| 629 |
+
]
|
| 630 |
+
"""
|
| 631 |
+
return [
|
| 632 |
+
{
|
| 633 |
+
"tenant_id": city.tenant_id,
|
| 634 |
+
"name": city.full_name,
|
| 635 |
+
"state": city.state
|
| 636 |
+
}
|
| 637 |
+
for city in SupportedCities.get_all_cities()
|
| 638 |
+
]
|
| 639 |
+
|
| 640 |
+
|
| 641 |
+
# ============================================================
|
| 642 |
+
# DATA VALIDATION (For startup checks)
|
| 643 |
+
# ============================================================
|
| 644 |
+
|
| 645 |
+
def validate_city_data_files() -> Dict[str, Dict[str, bool]]:
|
| 646 |
+
"""
|
| 647 |
+
🧪 Validates that all expected data files exist.
|
| 648 |
+
Useful for startup checks and deployment verification.
|
| 649 |
+
|
| 650 |
+
Returns:
|
| 651 |
+
Dictionary mapping tenant_id to file existence status
|
| 652 |
+
|
| 653 |
+
Example:
|
| 654 |
+
{
|
| 655 |
+
"atlanta_ga": {"events": True, "resources": True},
|
| 656 |
+
"seattle_wa": {"events": False, "resources": True}
|
| 657 |
+
}
|
| 658 |
+
"""
|
| 659 |
+
validation_results = {}
|
| 660 |
+
|
| 661 |
+
for city in SupportedCities.get_all_cities():
|
| 662 |
+
tenant_id = city.tenant_id
|
| 663 |
+
events_file = EVENTS_PATH / f"{tenant_id}.json"
|
| 664 |
+
resources_file = RESOURCES_PATH / f"{tenant_id}.json"
|
| 665 |
+
|
| 666 |
+
validation_results[tenant_id] = {
|
| 667 |
+
"events": events_file.exists(),
|
| 668 |
+
"resources": resources_file.exists()
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
if not events_file.exists():
|
| 672 |
+
logger.warning(f"Missing events file for {tenant_id}")
|
| 673 |
+
if not resources_file.exists():
|
| 674 |
+
logger.warning(f"Missing resources file for {tenant_id}")
|
| 675 |
+
|
| 676 |
+
return validation_results
|
| 677 |
+
|
| 678 |
+
|
| 679 |
+
# ============================================================
|
| 680 |
+
# INITIALIZATION CHECK (Call on app startup)
|
| 681 |
+
# ============================================================
|
| 682 |
+
|
| 683 |
+
def initialize_location_system() -> bool:
|
| 684 |
+
"""
|
| 685 |
+
🚀 Validates location system is ready.
|
| 686 |
+
Should be called during app startup.
|
| 687 |
+
|
| 688 |
+
Returns:
|
| 689 |
+
True if system is ready, False if critical files missing
|
| 690 |
+
"""
|
| 691 |
+
logger.info("🗺️ Initializing Penny's location system...")
|
| 692 |
+
|
| 693 |
+
# Check directories exist
|
| 694 |
+
if not DATA_PATH.exists():
|
| 695 |
+
logger.error(f"Data directory not found: {DATA_PATH}")
|
| 696 |
+
return False
|
| 697 |
+
|
| 698 |
+
# Validate city data files
|
| 699 |
+
validation = validate_city_data_files()
|
| 700 |
+
|
| 701 |
+
total_cities = len(SupportedCities.get_all_cities())
|
| 702 |
+
cities_with_events = sum(1 for v in validation.values() if v["events"])
|
| 703 |
+
cities_with_resources = sum(1 for v in validation.values() if v["resources"])
|
| 704 |
+
|
| 705 |
+
logger.info(f"✅ {total_cities} cities registered")
|
| 706 |
+
logger.info(f"✅ {cities_with_events}/{total_cities} cities have event data")
|
| 707 |
+
logger.info(f"✅ {cities_with_resources}/{total_cities} cities have resource data")
|
| 708 |
+
|
| 709 |
+
# Warn about missing data but don't fail
|
| 710 |
+
missing_data = [tid for tid, status in validation.items()
|
| 711 |
+
if not status["events"] or not status["resources"]]
|
| 712 |
+
|
| 713 |
+
if missing_data:
|
| 714 |
+
logger.warning(f"⚠️ Incomplete data for cities: {missing_data}")
|
| 715 |
+
|
| 716 |
+
logger.info("🗺️ Location system initialized successfully")
|
| 717 |
+
return True
|
logging_utils.py
ADDED
|
@@ -0,0 +1,778 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/logging_utils.py
|
| 2 |
+
"""
|
| 3 |
+
📊 Penny's Logging & Analytics System
|
| 4 |
+
Tracks user interactions, system performance, and civic engagement patterns.
|
| 5 |
+
|
| 6 |
+
MISSION: Create an audit trail that helps improve Penny's service while
|
| 7 |
+
respecting user privacy and meeting compliance requirements.
|
| 8 |
+
|
| 9 |
+
FEATURES:
|
| 10 |
+
- Structured JSON logging for Azure Application Insights
|
| 11 |
+
- Daily log rotation for long-term storage
|
| 12 |
+
- Privacy-safe request/response tracking
|
| 13 |
+
- Performance monitoring
|
| 14 |
+
- Error tracking with context
|
| 15 |
+
- Optional Azure Blob Storage integration
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
import json
|
| 19 |
+
import logging
|
| 20 |
+
from datetime import datetime, timezone
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
import os
|
| 23 |
+
from typing import Dict, Any, Optional, List
|
| 24 |
+
from dataclasses import dataclass, asdict
|
| 25 |
+
from enum import Enum
|
| 26 |
+
import hashlib
|
| 27 |
+
|
| 28 |
+
# --- LOGGING SETUP ---
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
# ============================================================
|
| 32 |
+
# LOG PATH CONFIGURATION (Environment-aware)
|
| 33 |
+
# ============================================================
|
| 34 |
+
|
| 35 |
+
# Base directories (use pathlib for OS compatibility)
|
| 36 |
+
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
| 37 |
+
LOGS_BASE_DIR = PROJECT_ROOT / "data" / "logs"
|
| 38 |
+
DEFAULT_LOG_PATH = LOGS_BASE_DIR / "penny_combined.jsonl"
|
| 39 |
+
|
| 40 |
+
# Environment-configurable log path
|
| 41 |
+
LOG_PATH = Path(os.getenv("PENNY_LOG_PATH", str(DEFAULT_LOG_PATH)))
|
| 42 |
+
|
| 43 |
+
# Ensure log directory exists on import
|
| 44 |
+
LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ============================================================
|
| 48 |
+
# LOG LEVEL ENUM (For categorizing log entries)
|
| 49 |
+
# ============================================================
|
| 50 |
+
|
| 51 |
+
class LogLevel(str, Enum):
|
| 52 |
+
"""
|
| 53 |
+
Categorizes the importance/type of log entries.
|
| 54 |
+
Maps to Azure Application Insights severity levels.
|
| 55 |
+
"""
|
| 56 |
+
DEBUG = "debug" # Detailed diagnostic info
|
| 57 |
+
INFO = "info" # General informational messages
|
| 58 |
+
WARNING = "warning" # Potential issues
|
| 59 |
+
ERROR = "error" # Error events
|
| 60 |
+
CRITICAL = "critical" # Critical failures
|
| 61 |
+
AUDIT = "audit" # Compliance/audit trail
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class InteractionType(str, Enum):
|
| 65 |
+
"""
|
| 66 |
+
Categorizes the type of user interaction.
|
| 67 |
+
Helps track which features residents use most.
|
| 68 |
+
"""
|
| 69 |
+
QUERY = "query" # General question
|
| 70 |
+
RESOURCE_LOOKUP = "resource_lookup" # Finding civic resources
|
| 71 |
+
TRANSLATION = "translation" # Language translation
|
| 72 |
+
EVENT_SEARCH = "event_search" # Looking for events
|
| 73 |
+
WEATHER = "weather" # Weather inquiry
|
| 74 |
+
DOCUMENT = "document_processing" # PDF/form processing
|
| 75 |
+
EMERGENCY = "emergency" # Crisis/emergency routing
|
| 76 |
+
GREETING = "greeting" # Conversational greeting
|
| 77 |
+
HELP = "help" # Help request
|
| 78 |
+
UNKNOWN = "unknown" # Unclassified
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ============================================================
|
| 82 |
+
# STRUCTURED LOG ENTRY (Type-safe logging)
|
| 83 |
+
# ============================================================
|
| 84 |
+
|
| 85 |
+
@dataclass
|
| 86 |
+
class PennyLogEntry:
|
| 87 |
+
"""
|
| 88 |
+
📋 Structured log entry for Penny interactions.
|
| 89 |
+
|
| 90 |
+
This format is:
|
| 91 |
+
- Azure Application Insights compatible
|
| 92 |
+
- Privacy-safe (no PII unless explicitly needed)
|
| 93 |
+
- Analytics-ready
|
| 94 |
+
- Compliance-friendly
|
| 95 |
+
"""
|
| 96 |
+
# Timestamp
|
| 97 |
+
timestamp: str
|
| 98 |
+
|
| 99 |
+
# Request Context
|
| 100 |
+
input: str
|
| 101 |
+
input_length: int
|
| 102 |
+
tenant_id: str
|
| 103 |
+
user_role: str
|
| 104 |
+
interaction_type: InteractionType
|
| 105 |
+
|
| 106 |
+
# Response Context
|
| 107 |
+
intent: str
|
| 108 |
+
tool_used: Optional[str]
|
| 109 |
+
model_id: Optional[str]
|
| 110 |
+
response_summary: str
|
| 111 |
+
response_length: int
|
| 112 |
+
response_time_ms: Optional[float]
|
| 113 |
+
|
| 114 |
+
# Technical Context
|
| 115 |
+
log_level: LogLevel
|
| 116 |
+
success: bool
|
| 117 |
+
error_message: Optional[str] = None
|
| 118 |
+
|
| 119 |
+
# Location Context (Optional)
|
| 120 |
+
lat: Optional[float] = None
|
| 121 |
+
lon: Optional[float] = None
|
| 122 |
+
location_detected: Optional[str] = None
|
| 123 |
+
|
| 124 |
+
# Privacy & Compliance
|
| 125 |
+
session_id: Optional[str] = None # Hashed session identifier
|
| 126 |
+
contains_pii: bool = False
|
| 127 |
+
|
| 128 |
+
# Performance Metrics
|
| 129 |
+
tokens_used: Optional[int] = None
|
| 130 |
+
cache_hit: bool = False
|
| 131 |
+
|
| 132 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 133 |
+
"""Converts to dictionary for JSON serialization."""
|
| 134 |
+
return {k: v.value if isinstance(v, Enum) else v
|
| 135 |
+
for k, v in asdict(self).items()}
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ============================================================
|
| 139 |
+
# DAILY LOG ROTATION
|
| 140 |
+
# ============================================================
|
| 141 |
+
|
| 142 |
+
def get_daily_log_path() -> Path:
|
| 143 |
+
"""
|
| 144 |
+
🗓️ Returns a daily unique path for log rotation.
|
| 145 |
+
|
| 146 |
+
Creates files like:
|
| 147 |
+
data/logs/2025-02-01.jsonl
|
| 148 |
+
data/logs/2025-02-02.jsonl
|
| 149 |
+
|
| 150 |
+
This helps with:
|
| 151 |
+
- Log management (archive old logs)
|
| 152 |
+
- Azure Blob Storage uploads (one file per day)
|
| 153 |
+
- Performance (smaller files)
|
| 154 |
+
"""
|
| 155 |
+
date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
| 156 |
+
daily_path = LOGS_BASE_DIR / f"{date_str}.jsonl"
|
| 157 |
+
|
| 158 |
+
# Ensure directory exists
|
| 159 |
+
daily_path.parent.mkdir(parents=True, exist_ok=True)
|
| 160 |
+
|
| 161 |
+
return daily_path
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# ============================================================
|
| 165 |
+
# MAIN LOGGING FUNCTION (Enhanced)
|
| 166 |
+
# ============================================================
|
| 167 |
+
|
| 168 |
+
def log_request(
|
| 169 |
+
payload: Dict[str, Any],
|
| 170 |
+
response: Dict[str, Any],
|
| 171 |
+
rotate_daily: bool = True,
|
| 172 |
+
log_level: LogLevel = LogLevel.INFO
|
| 173 |
+
) -> None:
|
| 174 |
+
"""
|
| 175 |
+
📝 Logs a user interaction with Penny.
|
| 176 |
+
|
| 177 |
+
This is the primary logging function called by router.py after
|
| 178 |
+
processing each request. It creates a structured, privacy-safe
|
| 179 |
+
record of the interaction.
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
payload: Incoming request data from router.py
|
| 183 |
+
response: Final response dictionary from orchestrator
|
| 184 |
+
rotate_daily: If True, uses daily log files
|
| 185 |
+
log_level: Severity level for this log entry
|
| 186 |
+
|
| 187 |
+
Example:
|
| 188 |
+
log_request(
|
| 189 |
+
payload={"input": "What's the weather?", "tenant_id": "atlanta_ga"},
|
| 190 |
+
response={"intent": "weather", "response": "..."}
|
| 191 |
+
)
|
| 192 |
+
"""
|
| 193 |
+
|
| 194 |
+
try:
|
| 195 |
+
# --- Extract Core Fields ---
|
| 196 |
+
user_input = payload.get("input", "")
|
| 197 |
+
tenant_id = payload.get("tenant_id", "unknown")
|
| 198 |
+
user_role = payload.get("role", "resident")
|
| 199 |
+
|
| 200 |
+
# --- Determine Interaction Type ---
|
| 201 |
+
intent = response.get("intent", "unknown")
|
| 202 |
+
interaction_type = _classify_interaction(intent)
|
| 203 |
+
|
| 204 |
+
# --- Privacy: Hash Session ID (if provided) ---
|
| 205 |
+
session_id = payload.get("session_id")
|
| 206 |
+
if session_id:
|
| 207 |
+
session_id = _hash_identifier(session_id)
|
| 208 |
+
|
| 209 |
+
# --- Detect PII (Simple check - can be enhanced) ---
|
| 210 |
+
contains_pii = _check_for_pii(user_input)
|
| 211 |
+
|
| 212 |
+
# --- Create Structured Log Entry ---
|
| 213 |
+
log_entry = PennyLogEntry(
|
| 214 |
+
timestamp=datetime.now(timezone.utc).isoformat(),
|
| 215 |
+
input=_sanitize_input(user_input, contains_pii),
|
| 216 |
+
input_length=len(user_input),
|
| 217 |
+
tenant_id=tenant_id,
|
| 218 |
+
user_role=user_role,
|
| 219 |
+
interaction_type=interaction_type,
|
| 220 |
+
intent=intent,
|
| 221 |
+
tool_used=response.get("tool", "none"),
|
| 222 |
+
model_id=response.get("model_id"),
|
| 223 |
+
response_summary=_summarize_response(response.get("response")),
|
| 224 |
+
response_length=len(str(response.get("response", ""))),
|
| 225 |
+
response_time_ms=response.get("response_time_ms"),
|
| 226 |
+
log_level=log_level,
|
| 227 |
+
success=response.get("success", True),
|
| 228 |
+
error_message=response.get("error"),
|
| 229 |
+
lat=payload.get("lat"),
|
| 230 |
+
lon=payload.get("lon"),
|
| 231 |
+
location_detected=response.get("location_detected"),
|
| 232 |
+
session_id=session_id,
|
| 233 |
+
contains_pii=contains_pii,
|
| 234 |
+
tokens_used=response.get("tokens_used"),
|
| 235 |
+
cache_hit=response.get("cache_hit", False)
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# --- Write to File ---
|
| 239 |
+
log_path = get_daily_log_path() if rotate_daily else LOG_PATH
|
| 240 |
+
_write_log_entry(log_path, log_entry)
|
| 241 |
+
|
| 242 |
+
# --- Optional: Send to Azure (if enabled) ---
|
| 243 |
+
if os.getenv("AZURE_LOGS_ENABLED", "false").lower() == "true":
|
| 244 |
+
_send_to_azure(log_entry)
|
| 245 |
+
|
| 246 |
+
# --- Log to console (for Azure Application Insights) ---
|
| 247 |
+
logger.info(
|
| 248 |
+
f"Request logged | "
|
| 249 |
+
f"tenant={tenant_id} | "
|
| 250 |
+
f"intent={intent} | "
|
| 251 |
+
f"interaction={interaction_type.value} | "
|
| 252 |
+
f"success={log_entry.success}"
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
# Failsafe: Never let logging failures crash the application
|
| 257 |
+
logger.error(f"Failed to log request: {e}", exc_info=True)
|
| 258 |
+
_emergency_log(payload, response, str(e))
|
| 259 |
+
|
| 260 |
+
|
| 261 |
+
# ============================================================
|
| 262 |
+
# LOG WRITING (With error handling)
|
| 263 |
+
# ============================================================
|
| 264 |
+
|
| 265 |
+
def _write_log_entry(log_path: Path, log_entry: PennyLogEntry) -> None:
|
| 266 |
+
"""
|
| 267 |
+
📁 Writes log entry to JSONL file.
|
| 268 |
+
Handles file I/O errors gracefully.
|
| 269 |
+
"""
|
| 270 |
+
try:
|
| 271 |
+
# Ensure parent directory exists
|
| 272 |
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 273 |
+
|
| 274 |
+
# Write as JSON Lines (append mode)
|
| 275 |
+
with open(log_path, "a", encoding="utf-8") as f:
|
| 276 |
+
json_str = json.dumps(log_entry.to_dict(), ensure_ascii=False)
|
| 277 |
+
f.write(json_str + "\n")
|
| 278 |
+
|
| 279 |
+
except IOError as e:
|
| 280 |
+
logger.error(f"Failed to write to log file {log_path}: {e}")
|
| 281 |
+
_emergency_log_to_console(log_entry)
|
| 282 |
+
except Exception as e:
|
| 283 |
+
logger.error(f"Unexpected error writing log: {e}", exc_info=True)
|
| 284 |
+
_emergency_log_to_console(log_entry)
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
def _emergency_log_to_console(log_entry: PennyLogEntry) -> None:
|
| 288 |
+
"""
|
| 289 |
+
🚨 Emergency fallback: Print log to console if file writing fails.
|
| 290 |
+
Azure Application Insights will capture console output.
|
| 291 |
+
"""
|
| 292 |
+
print(f"[EMERGENCY LOG] {json.dumps(log_entry.to_dict())}")
|
| 293 |
+
|
| 294 |
+
|
| 295 |
+
def _emergency_log(payload: Dict, response: Dict, error: str) -> None:
|
| 296 |
+
"""
|
| 297 |
+
🚨 Absolute fallback for when structured logging fails entirely.
|
| 298 |
+
"""
|
| 299 |
+
emergency_entry = {
|
| 300 |
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
| 301 |
+
"level": "CRITICAL",
|
| 302 |
+
"message": "Logging system failure",
|
| 303 |
+
"error": error,
|
| 304 |
+
"input_preview": str(payload.get("input", ""))[:100],
|
| 305 |
+
"response_preview": str(response.get("response", ""))[:100]
|
| 306 |
+
}
|
| 307 |
+
print(f"[LOGGING FAILURE] {json.dumps(emergency_entry)}")
|
| 308 |
+
|
| 309 |
+
|
| 310 |
+
# ============================================================
|
| 311 |
+
# HELPER FUNCTIONS
|
| 312 |
+
# ============================================================
|
| 313 |
+
|
| 314 |
+
def _classify_interaction(intent: str) -> InteractionType:
|
| 315 |
+
"""
|
| 316 |
+
🏷️ Maps intent to interaction type for analytics.
|
| 317 |
+
"""
|
| 318 |
+
intent_mapping = {
|
| 319 |
+
"weather": InteractionType.WEATHER,
|
| 320 |
+
"events": InteractionType.EVENT_SEARCH,
|
| 321 |
+
"local_resources": InteractionType.RESOURCE_LOOKUP,
|
| 322 |
+
"translation": InteractionType.TRANSLATION,
|
| 323 |
+
"document_processing": InteractionType.DOCUMENT,
|
| 324 |
+
"emergency": InteractionType.EMERGENCY,
|
| 325 |
+
"greeting": InteractionType.GREETING,
|
| 326 |
+
"help": InteractionType.HELP,
|
| 327 |
+
}
|
| 328 |
+
return intent_mapping.get(intent.lower(), InteractionType.UNKNOWN)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
def _summarize_response(resp: Optional[Any]) -> str:
|
| 332 |
+
"""
|
| 333 |
+
✂️ Creates a truncated summary of the response for logging.
|
| 334 |
+
Prevents log files from becoming bloated with full responses.
|
| 335 |
+
"""
|
| 336 |
+
if resp is None:
|
| 337 |
+
return "No response content"
|
| 338 |
+
|
| 339 |
+
if isinstance(resp, dict):
|
| 340 |
+
# Try to extract the most meaningful part
|
| 341 |
+
summary = (
|
| 342 |
+
resp.get("response") or
|
| 343 |
+
resp.get("summary") or
|
| 344 |
+
resp.get("message") or
|
| 345 |
+
str(resp)
|
| 346 |
+
)
|
| 347 |
+
return str(summary)[:250]
|
| 348 |
+
|
| 349 |
+
return str(resp)[:250]
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
def _hash_identifier(identifier: str) -> str:
|
| 353 |
+
"""
|
| 354 |
+
🔒 Creates a privacy-safe hash of identifiers (session IDs, user IDs).
|
| 355 |
+
|
| 356 |
+
Uses SHA256 for one-way hashing. This allows:
|
| 357 |
+
- Session tracking without storing raw IDs
|
| 358 |
+
- Privacy compliance (GDPR, CCPA)
|
| 359 |
+
- Anonymized analytics
|
| 360 |
+
"""
|
| 361 |
+
return hashlib.sha256(identifier.encode()).hexdigest()[:16]
|
| 362 |
+
|
| 363 |
+
|
| 364 |
+
def _check_for_pii(text: str) -> bool:
|
| 365 |
+
"""
|
| 366 |
+
🔍 Simple PII detection (can be enhanced with NER models).
|
| 367 |
+
|
| 368 |
+
Checks for common PII patterns:
|
| 369 |
+
- Social Security Numbers
|
| 370 |
+
- Email addresses
|
| 371 |
+
- Phone numbers
|
| 372 |
+
|
| 373 |
+
Returns True if potential PII detected.
|
| 374 |
+
"""
|
| 375 |
+
import re
|
| 376 |
+
|
| 377 |
+
# SSN pattern: XXX-XX-XXXX
|
| 378 |
+
ssn_pattern = r'\b\d{3}-\d{2}-\d{4}\b'
|
| 379 |
+
|
| 380 |
+
# Email pattern
|
| 381 |
+
email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
|
| 382 |
+
|
| 383 |
+
# Phone pattern: various formats
|
| 384 |
+
phone_pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
|
| 385 |
+
|
| 386 |
+
patterns = [ssn_pattern, email_pattern, phone_pattern]
|
| 387 |
+
|
| 388 |
+
for pattern in patterns:
|
| 389 |
+
if re.search(pattern, text):
|
| 390 |
+
return True
|
| 391 |
+
|
| 392 |
+
return False
|
| 393 |
+
|
| 394 |
+
|
| 395 |
+
def _sanitize_input(text: str, contains_pii: bool) -> str:
|
| 396 |
+
"""
|
| 397 |
+
🧹 Sanitizes user input for logging.
|
| 398 |
+
|
| 399 |
+
If PII detected:
|
| 400 |
+
- Masks the input for privacy
|
| 401 |
+
- Keeps first/last few characters for debugging
|
| 402 |
+
|
| 403 |
+
Args:
|
| 404 |
+
text: Original user input
|
| 405 |
+
contains_pii: Whether PII was detected
|
| 406 |
+
|
| 407 |
+
Returns:
|
| 408 |
+
Sanitized text safe for logging
|
| 409 |
+
"""
|
| 410 |
+
if not contains_pii:
|
| 411 |
+
return text
|
| 412 |
+
|
| 413 |
+
# Mask middle portion if PII detected
|
| 414 |
+
if len(text) <= 20:
|
| 415 |
+
return "[PII_DETECTED]"
|
| 416 |
+
|
| 417 |
+
# Keep first 10 and last 10 chars, mask middle
|
| 418 |
+
return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
|
| 419 |
+
|
| 420 |
+
|
| 421 |
+
# ============================================================
|
| 422 |
+
# AZURE INTEGRATION (Placeholder for future)
|
| 423 |
+
# ============================================================
|
| 424 |
+
|
| 425 |
+
def _send_to_azure(log_entry: PennyLogEntry) -> None:
|
| 426 |
+
"""
|
| 427 |
+
☁️ Sends log entry to Azure services.
|
| 428 |
+
|
| 429 |
+
Options:
|
| 430 |
+
1. Azure Application Insights (custom events)
|
| 431 |
+
2. Azure Blob Storage (long-term archival)
|
| 432 |
+
3. Azure Table Storage (queryable logs)
|
| 433 |
+
|
| 434 |
+
TODO: Implement when Azure integration is ready
|
| 435 |
+
"""
|
| 436 |
+
try:
|
| 437 |
+
# Example: Send to Application Insights
|
| 438 |
+
# from applicationinsights import TelemetryClient
|
| 439 |
+
# tc = TelemetryClient(os.getenv("APPINSIGHTS_INSTRUMENTATION_KEY"))
|
| 440 |
+
# tc.track_event(
|
| 441 |
+
# "PennyInteraction",
|
| 442 |
+
# properties=log_entry.to_dict()
|
| 443 |
+
# )
|
| 444 |
+
# tc.flush()
|
| 445 |
+
|
| 446 |
+
logger.debug("Azure logging not yet implemented")
|
| 447 |
+
|
| 448 |
+
except Exception as e:
|
| 449 |
+
logger.error(f"Failed to send log to Azure: {e}")
|
| 450 |
+
# Don't raise - logging failures should never crash the app
|
| 451 |
+
|
| 452 |
+
|
| 453 |
+
# ============================================================
|
| 454 |
+
# LOG ANALYSIS UTILITIES
|
| 455 |
+
# ============================================================
|
| 456 |
+
|
| 457 |
+
def get_logs_for_date(date: str) -> List[Dict[str, Any]]:
|
| 458 |
+
"""
|
| 459 |
+
📊 Retrieves all log entries for a specific date.
|
| 460 |
+
|
| 461 |
+
Args:
|
| 462 |
+
date: Date string in YYYY-MM-DD format
|
| 463 |
+
|
| 464 |
+
Returns:
|
| 465 |
+
List of log entry dictionaries
|
| 466 |
+
|
| 467 |
+
Example:
|
| 468 |
+
logs = get_logs_for_date("2025-02-01")
|
| 469 |
+
"""
|
| 470 |
+
log_file = LOGS_BASE_DIR / f"{date}.jsonl"
|
| 471 |
+
|
| 472 |
+
if not log_file.exists():
|
| 473 |
+
logger.warning(f"No logs found for date: {date}")
|
| 474 |
+
return []
|
| 475 |
+
|
| 476 |
+
logs = []
|
| 477 |
+
try:
|
| 478 |
+
with open(log_file, "r", encoding="utf-8") as f:
|
| 479 |
+
for line in f:
|
| 480 |
+
if line.strip():
|
| 481 |
+
logs.append(json.loads(line))
|
| 482 |
+
except Exception as e:
|
| 483 |
+
logger.error(f"Error reading logs for {date}: {e}")
|
| 484 |
+
|
| 485 |
+
return logs
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
def get_interaction_stats(date: str) -> Dict[str, Any]:
|
| 489 |
+
"""
|
| 490 |
+
📈 Generates usage statistics for a given date.
|
| 491 |
+
|
| 492 |
+
Returns metrics like:
|
| 493 |
+
- Total interactions
|
| 494 |
+
- Interactions by type
|
| 495 |
+
- Average response time
|
| 496 |
+
- Success rate
|
| 497 |
+
- Most common intents
|
| 498 |
+
|
| 499 |
+
Args:
|
| 500 |
+
date: Date string in YYYY-MM-DD format
|
| 501 |
+
|
| 502 |
+
Returns:
|
| 503 |
+
Statistics dictionary
|
| 504 |
+
"""
|
| 505 |
+
logs = get_logs_for_date(date)
|
| 506 |
+
|
| 507 |
+
if not logs:
|
| 508 |
+
return {"error": "No logs found for date", "date": date}
|
| 509 |
+
|
| 510 |
+
# Calculate statistics
|
| 511 |
+
total = len(logs)
|
| 512 |
+
successful = sum(1 for log in logs if log.get("success", False))
|
| 513 |
+
|
| 514 |
+
# Response time statistics
|
| 515 |
+
response_times = [
|
| 516 |
+
log["response_time_ms"]
|
| 517 |
+
for log in logs
|
| 518 |
+
if log.get("response_time_ms") is not None
|
| 519 |
+
]
|
| 520 |
+
avg_response_time = sum(response_times) / len(response_times) if response_times else 0
|
| 521 |
+
|
| 522 |
+
# Interaction type breakdown
|
| 523 |
+
interaction_counts = {}
|
| 524 |
+
for log in logs:
|
| 525 |
+
itype = log.get("interaction_type", "unknown")
|
| 526 |
+
interaction_counts[itype] = interaction_counts.get(itype, 0) + 1
|
| 527 |
+
|
| 528 |
+
# Intent breakdown
|
| 529 |
+
intent_counts = {}
|
| 530 |
+
for log in logs:
|
| 531 |
+
intent = log.get("intent", "unknown")
|
| 532 |
+
intent_counts[intent] = intent_counts.get(intent, 0) + 1
|
| 533 |
+
|
| 534 |
+
return {
|
| 535 |
+
"date": date,
|
| 536 |
+
"total_interactions": total,
|
| 537 |
+
"successful_interactions": successful,
|
| 538 |
+
"success_rate": f"{(successful/total*100):.1f}%",
|
| 539 |
+
"avg_response_time_ms": round(avg_response_time, 2),
|
| 540 |
+
"interactions_by_type": interaction_counts,
|
| 541 |
+
"top_intents": dict(sorted(
|
| 542 |
+
intent_counts.items(),
|
| 543 |
+
key=lambda x: x[1],
|
| 544 |
+
reverse=True
|
| 545 |
+
)[:5])
|
| 546 |
+
}
|
| 547 |
+
|
| 548 |
+
|
| 549 |
+
# ============================================================
|
| 550 |
+
# LOG CLEANUP (For maintenance)
|
| 551 |
+
# ============================================================
|
| 552 |
+
|
| 553 |
+
def cleanup_old_logs(days_to_keep: int = 90) -> int:
|
| 554 |
+
"""
|
| 555 |
+
🧹 Removes log files older than specified days.
|
| 556 |
+
|
| 557 |
+
Args:
|
| 558 |
+
days_to_keep: Number of days to retain logs
|
| 559 |
+
|
| 560 |
+
Returns:
|
| 561 |
+
Number of files deleted
|
| 562 |
+
|
| 563 |
+
Example:
|
| 564 |
+
# Delete logs older than 90 days
|
| 565 |
+
deleted = cleanup_old_logs(90)
|
| 566 |
+
"""
|
| 567 |
+
from datetime import timedelta
|
| 568 |
+
|
| 569 |
+
cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep)
|
| 570 |
+
deleted_count = 0
|
| 571 |
+
|
| 572 |
+
try:
|
| 573 |
+
for log_file in LOGS_BASE_DIR.glob("*.jsonl"):
|
| 574 |
+
try:
|
| 575 |
+
# Parse date from filename (YYYY-MM-DD.jsonl)
|
| 576 |
+
date_str = log_file.stem
|
| 577 |
+
file_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
|
| 578 |
+
|
| 579 |
+
if file_date < cutoff_date:
|
| 580 |
+
log_file.unlink()
|
| 581 |
+
deleted_count += 1
|
| 582 |
+
logger.info(f"Deleted old log file: {log_file.name}")
|
| 583 |
+
|
| 584 |
+
except ValueError:
|
| 585 |
+
# Skip files that don't match date format
|
| 586 |
+
continue
|
| 587 |
+
|
| 588 |
+
except Exception as e:
|
| 589 |
+
logger.error(f"Error during log cleanup: {e}")
|
| 590 |
+
|
| 591 |
+
logger.info(f"Log cleanup complete: {deleted_count} files deleted")
|
| 592 |
+
return deleted_count
|
| 593 |
+
|
| 594 |
+
|
| 595 |
+
# ============================================================
|
| 596 |
+
# PUBLIC API FUNCTIONS (Used by other modules)
|
| 597 |
+
# ============================================================
|
| 598 |
+
|
| 599 |
+
def log_interaction(
|
| 600 |
+
tenant_id: Optional[str] = None,
|
| 601 |
+
interaction_type: Optional[str] = None,
|
| 602 |
+
intent: Optional[str] = None,
|
| 603 |
+
response_time_ms: Optional[float] = None,
|
| 604 |
+
success: Optional[bool] = None,
|
| 605 |
+
metadata: Optional[Dict[str, Any]] = None,
|
| 606 |
+
**kwargs
|
| 607 |
+
) -> None:
|
| 608 |
+
"""
|
| 609 |
+
📝 Simplified logging function used throughout Penny's codebase.
|
| 610 |
+
|
| 611 |
+
This is the main logging function called by orchestrator, router, agents, and model utils.
|
| 612 |
+
It creates a structured log entry and writes it to the log file.
|
| 613 |
+
|
| 614 |
+
Args:
|
| 615 |
+
tenant_id: City/location identifier (optional)
|
| 616 |
+
interaction_type: Type of interaction (e.g., "weather", "events", "orchestration") (optional)
|
| 617 |
+
intent: Detected intent (e.g., "weather", "emergency") (optional)
|
| 618 |
+
response_time_ms: Response time in milliseconds (optional)
|
| 619 |
+
success: Whether the operation succeeded (optional)
|
| 620 |
+
metadata: Optional additional metadata dictionary
|
| 621 |
+
**kwargs: Additional fields to include in log entry (e.g., error, details, fallback_used)
|
| 622 |
+
|
| 623 |
+
Example:
|
| 624 |
+
log_interaction(
|
| 625 |
+
tenant_id="atlanta_ga",
|
| 626 |
+
interaction_type="weather",
|
| 627 |
+
intent="weather",
|
| 628 |
+
response_time_ms=150.5,
|
| 629 |
+
success=True,
|
| 630 |
+
metadata={"temperature": 72, "condition": "sunny"}
|
| 631 |
+
)
|
| 632 |
+
|
| 633 |
+
# Or with keyword arguments:
|
| 634 |
+
log_interaction(
|
| 635 |
+
intent="translation_initialization",
|
| 636 |
+
success=False,
|
| 637 |
+
error="model_loader unavailable"
|
| 638 |
+
)
|
| 639 |
+
"""
|
| 640 |
+
try:
|
| 641 |
+
# Build log entry dictionary from provided parameters
|
| 642 |
+
log_entry_dict = {
|
| 643 |
+
"timestamp": datetime.now(timezone.utc).isoformat()
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
# Add standard fields if provided
|
| 647 |
+
if tenant_id is not None:
|
| 648 |
+
log_entry_dict["tenant_id"] = sanitize_for_logging(tenant_id)
|
| 649 |
+
if interaction_type is not None:
|
| 650 |
+
log_entry_dict["interaction_type"] = interaction_type
|
| 651 |
+
if intent is not None:
|
| 652 |
+
log_entry_dict["intent"] = intent
|
| 653 |
+
if response_time_ms is not None:
|
| 654 |
+
log_entry_dict["response_time_ms"] = round(response_time_ms, 2)
|
| 655 |
+
if success is not None:
|
| 656 |
+
log_entry_dict["success"] = success
|
| 657 |
+
|
| 658 |
+
# Add metadata if provided
|
| 659 |
+
if metadata:
|
| 660 |
+
# Sanitize metadata values
|
| 661 |
+
sanitized_metadata = {}
|
| 662 |
+
for key, value in metadata.items():
|
| 663 |
+
if isinstance(value, str):
|
| 664 |
+
sanitized_metadata[key] = sanitize_for_logging(value)
|
| 665 |
+
else:
|
| 666 |
+
sanitized_metadata[key] = value
|
| 667 |
+
log_entry_dict["metadata"] = sanitized_metadata
|
| 668 |
+
|
| 669 |
+
# Add any additional kwargs (for backward compatibility with model utils)
|
| 670 |
+
for key, value in kwargs.items():
|
| 671 |
+
if key not in log_entry_dict: # Don't overwrite standard fields
|
| 672 |
+
if isinstance(value, str):
|
| 673 |
+
log_entry_dict[key] = sanitize_for_logging(value)
|
| 674 |
+
else:
|
| 675 |
+
log_entry_dict[key] = value
|
| 676 |
+
|
| 677 |
+
# Write to log file
|
| 678 |
+
log_path = get_daily_log_path()
|
| 679 |
+
_write_log_entry_dict(log_path, log_entry_dict)
|
| 680 |
+
|
| 681 |
+
except Exception as e:
|
| 682 |
+
# Failsafe: Never let logging failures crash the application
|
| 683 |
+
logger.error(f"Failed to log interaction: {e}", exc_info=True)
|
| 684 |
+
_emergency_log_to_console_dict(log_entry_dict if 'log_entry_dict' in locals() else {})
|
| 685 |
+
|
| 686 |
+
|
| 687 |
+
def sanitize_for_logging(text: str) -> str:
|
| 688 |
+
"""
|
| 689 |
+
🧹 Sanitizes text for safe logging (removes PII).
|
| 690 |
+
|
| 691 |
+
This function is used throughout Penny to ensure sensitive information
|
| 692 |
+
is not logged. It checks for PII and masks it appropriately.
|
| 693 |
+
|
| 694 |
+
Args:
|
| 695 |
+
text: Text to sanitize
|
| 696 |
+
|
| 697 |
+
Returns:
|
| 698 |
+
Sanitized text safe for logging
|
| 699 |
+
|
| 700 |
+
Example:
|
| 701 |
+
safe_text = sanitize_for_logging("My email is user@example.com")
|
| 702 |
+
# Returns: "My email is [PII_DETECTED]"
|
| 703 |
+
"""
|
| 704 |
+
if not text or not isinstance(text, str):
|
| 705 |
+
return str(text) if text else ""
|
| 706 |
+
|
| 707 |
+
# Check for PII
|
| 708 |
+
contains_pii = _check_for_pii(text)
|
| 709 |
+
|
| 710 |
+
if contains_pii:
|
| 711 |
+
# Mask PII
|
| 712 |
+
if len(text) <= 20:
|
| 713 |
+
return "[PII_DETECTED]"
|
| 714 |
+
return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
|
| 715 |
+
|
| 716 |
+
return text
|
| 717 |
+
|
| 718 |
+
|
| 719 |
+
def _write_log_entry_dict(log_path: Path, log_entry_dict: Dict[str, Any]) -> None:
|
| 720 |
+
"""
|
| 721 |
+
📁 Writes log entry dictionary to JSONL file.
|
| 722 |
+
Helper function for simplified logging.
|
| 723 |
+
"""
|
| 724 |
+
try:
|
| 725 |
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
| 726 |
+
with open(log_path, "a", encoding="utf-8") as f:
|
| 727 |
+
json_str = json.dumps(log_entry_dict, ensure_ascii=False)
|
| 728 |
+
f.write(json_str + "\n")
|
| 729 |
+
except Exception as e:
|
| 730 |
+
logger.error(f"Failed to write log entry: {e}")
|
| 731 |
+
_emergency_log_to_console_dict(log_entry_dict)
|
| 732 |
+
|
| 733 |
+
|
| 734 |
+
def _emergency_log_to_console_dict(log_entry_dict: Dict[str, Any]) -> None:
|
| 735 |
+
"""
|
| 736 |
+
🚨 Emergency fallback: Print log to console if file writing fails.
|
| 737 |
+
"""
|
| 738 |
+
print(f"[EMERGENCY LOG] {json.dumps(log_entry_dict)}")
|
| 739 |
+
|
| 740 |
+
|
| 741 |
+
# ============================================================
|
| 742 |
+
# INITIALIZATION
|
| 743 |
+
# ============================================================
|
| 744 |
+
|
| 745 |
+
def initialize_logging_system() -> bool:
|
| 746 |
+
"""
|
| 747 |
+
🚀 Initializes the logging system.
|
| 748 |
+
Should be called during app startup.
|
| 749 |
+
|
| 750 |
+
Returns:
|
| 751 |
+
True if initialization successful
|
| 752 |
+
"""
|
| 753 |
+
logger.info("📊 Initializing Penny's logging system...")
|
| 754 |
+
|
| 755 |
+
try:
|
| 756 |
+
# Ensure log directory exists
|
| 757 |
+
LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
|
| 758 |
+
|
| 759 |
+
# Test write permissions
|
| 760 |
+
test_file = LOGS_BASE_DIR / ".write_test"
|
| 761 |
+
test_file.write_text("test")
|
| 762 |
+
test_file.unlink()
|
| 763 |
+
|
| 764 |
+
logger.info(f"✅ Logging system initialized")
|
| 765 |
+
logger.info(f"📁 Log directory: {LOGS_BASE_DIR}")
|
| 766 |
+
logger.info(f"🔄 Daily rotation: Enabled")
|
| 767 |
+
|
| 768 |
+
# Log Azure status
|
| 769 |
+
if os.getenv("AZURE_LOGS_ENABLED") == "true":
|
| 770 |
+
logger.info("☁️ Azure logging: Enabled")
|
| 771 |
+
else:
|
| 772 |
+
logger.info("💾 Azure logging: Disabled (local only)")
|
| 773 |
+
|
| 774 |
+
return True
|
| 775 |
+
|
| 776 |
+
except Exception as e:
|
| 777 |
+
logger.error(f"❌ Failed to initialize logging system: {e}")
|
| 778 |
+
return False
|
main.py
ADDED
|
@@ -0,0 +1,660 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/main.py
|
| 2 |
+
"""
|
| 3 |
+
🤖 PENNY - People's Engagement Network Navigator for You
|
| 4 |
+
FastAPI Entry Point with Azure-Ready Configuration
|
| 5 |
+
|
| 6 |
+
This is Penny's front door. She loads her environment, registers all her endpoints,
|
| 7 |
+
and makes sure she's ready to help residents find what they need.
|
| 8 |
+
|
| 9 |
+
MISSION: Connect residents to civic resources through a warm, multilingual interface
|
| 10 |
+
that removes barriers and empowers communities.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from fastapi import FastAPI, Request, status
|
| 14 |
+
from fastapi.responses import JSONResponse
|
| 15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
+
import logging
|
| 17 |
+
import sys
|
| 18 |
+
import os
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
import pathlib
|
| 21 |
+
from typing import Dict, Any, Optional, List
|
| 22 |
+
from datetime import datetime, timedelta
|
| 23 |
+
|
| 24 |
+
# --- LOGGING CONFIGURATION (Must be set up before other imports) ---
|
| 25 |
+
logging.basicConfig(
|
| 26 |
+
level=logging.INFO,
|
| 27 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
| 28 |
+
handlers=[
|
| 29 |
+
logging.StreamHandler(sys.stdout)
|
| 30 |
+
]
|
| 31 |
+
)
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
# --- CRITICAL: FORCE .ENV LOADING BEFORE ANY OTHER IMPORTS ---
|
| 35 |
+
# Determine the absolute path to the project root
|
| 36 |
+
PROJECT_ROOT = pathlib.Path(__file__).parent.parent
|
| 37 |
+
|
| 38 |
+
# Load environment variables into the active Python session IMMEDIATELY
|
| 39 |
+
# This ensures Azure Maps keys, API tokens, and model paths are available
|
| 40 |
+
try:
|
| 41 |
+
load_dotenv(PROJECT_ROOT / ".env")
|
| 42 |
+
|
| 43 |
+
# Verify critical environment variables are loaded
|
| 44 |
+
REQUIRED_ENV_VARS = ["AZURE_MAPS_KEY"]
|
| 45 |
+
missing_vars = [var for var in REQUIRED_ENV_VARS if not os.getenv(var)]
|
| 46 |
+
if missing_vars:
|
| 47 |
+
logger.warning(f"⚠️ WARNING: Missing required environment variables: {missing_vars}")
|
| 48 |
+
logger.warning(f"📁 Looking for .env file at: {PROJECT_ROOT / '.env'}")
|
| 49 |
+
else:
|
| 50 |
+
logger.info("✅ Environment variables loaded successfully")
|
| 51 |
+
except Exception as e:
|
| 52 |
+
logger.error(f"❌ Error loading environment variables: {e}")
|
| 53 |
+
logger.error(f"📁 Expected .env location: {PROJECT_ROOT / '.env'}")
|
| 54 |
+
|
| 55 |
+
# --- NOW SAFE TO IMPORT MODULES THAT DEPEND ON ENV VARS ---
|
| 56 |
+
try:
|
| 57 |
+
from app.weather_agent import get_weather_for_location
|
| 58 |
+
from app.router import router as api_router
|
| 59 |
+
from app.location_utils import (
|
| 60 |
+
initialize_location_system,
|
| 61 |
+
get_all_supported_cities,
|
| 62 |
+
validate_city_data_files,
|
| 63 |
+
SupportedCities,
|
| 64 |
+
get_city_coordinates
|
| 65 |
+
)
|
| 66 |
+
except ImportError as e:
|
| 67 |
+
logger.error(f"❌ Critical import error: {e}")
|
| 68 |
+
logger.error("⚠️ Penny cannot start without core modules")
|
| 69 |
+
sys.exit(1)
|
| 70 |
+
|
| 71 |
+
# --- FASTAPI APP INITIALIZATION ---
|
| 72 |
+
app = FastAPI(
|
| 73 |
+
title="PENNY - Civic Engagement Assistant",
|
| 74 |
+
description=(
|
| 75 |
+
"💛 Multilingual civic chatbot connecting residents with local services, "
|
| 76 |
+
"government programs, and community resources.\n\n"
|
| 77 |
+
"**Powered by:**\n"
|
| 78 |
+
"- Transformer models for natural language understanding\n"
|
| 79 |
+
"- Azure ML infrastructure for scalable deployment\n"
|
| 80 |
+
"- 27-language translation support\n"
|
| 81 |
+
"- Real-time weather integration\n"
|
| 82 |
+
"- Multi-city civic resource databases\n\n"
|
| 83 |
+
"**Supported Cities:** Atlanta, Birmingham, Chesterfield, El Paso, Providence, Seattle"
|
| 84 |
+
),
|
| 85 |
+
version="1.0.0",
|
| 86 |
+
docs_url="/docs",
|
| 87 |
+
redoc_url="/redoc",
|
| 88 |
+
contact={
|
| 89 |
+
"name": "Penny Support",
|
| 90 |
+
"email": "support@pennyai.example"
|
| 91 |
+
},
|
| 92 |
+
license_info={
|
| 93 |
+
"name": "Proprietary",
|
| 94 |
+
}
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
# --- CORS MIDDLEWARE (Configure for your deployment) ---
|
| 98 |
+
# Production: Update allowed_origins to restrict to specific domains
|
| 99 |
+
allowed_origins = os.getenv("ALLOWED_ORIGINS", "*").split(",")
|
| 100 |
+
app.add_middleware(
|
| 101 |
+
CORSMiddleware,
|
| 102 |
+
allow_origins=allowed_origins,
|
| 103 |
+
allow_credentials=True,
|
| 104 |
+
allow_methods=["*"],
|
| 105 |
+
allow_headers=["*"],
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
# --- APPLICATION STATE (For health checks and monitoring) ---
|
| 109 |
+
app.state.location_system_healthy = False
|
| 110 |
+
app.state.startup_time = None
|
| 111 |
+
app.state.startup_errors: List[str] = []
|
| 112 |
+
|
| 113 |
+
# --- GLOBAL EXCEPTION HANDLER ---
|
| 114 |
+
@app.exception_handler(Exception)
|
| 115 |
+
async def global_exception_handler(request: Request, exc: Exception) -> JSONResponse:
|
| 116 |
+
"""
|
| 117 |
+
🛡️ Catches any unhandled exceptions and returns a user-friendly response.
|
| 118 |
+
Logs full error details for debugging while keeping responses safe for users.
|
| 119 |
+
|
| 120 |
+
Penny stays helpful even when things go wrong!
|
| 121 |
+
|
| 122 |
+
Args:
|
| 123 |
+
request: FastAPI request object
|
| 124 |
+
exc: The unhandled exception
|
| 125 |
+
|
| 126 |
+
Returns:
|
| 127 |
+
JSONResponse with error details (sanitized for production)
|
| 128 |
+
"""
|
| 129 |
+
logger.error(
|
| 130 |
+
f"Unhandled exception on {request.url.path} | "
|
| 131 |
+
f"method={request.method} | "
|
| 132 |
+
f"error={exc}",
|
| 133 |
+
exc_info=True
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Check if debug mode is enabled
|
| 137 |
+
debug_mode = os.getenv("DEBUG_MODE", "false").lower() == "true"
|
| 138 |
+
|
| 139 |
+
return JSONResponse(
|
| 140 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 141 |
+
content={
|
| 142 |
+
"error": "An unexpected error occurred. Penny's on it!",
|
| 143 |
+
"message": "Our team has been notified and we're working to fix this.",
|
| 144 |
+
"detail": str(exc) if debug_mode else None,
|
| 145 |
+
"request_path": str(request.url.path),
|
| 146 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 147 |
+
}
|
| 148 |
+
)
|
| 149 |
+
|
| 150 |
+
# --- STARTUP EVENT ---
|
| 151 |
+
@app.on_event("startup")
|
| 152 |
+
async def startup_event() -> None:
|
| 153 |
+
"""
|
| 154 |
+
🚀 Runs when Penny wakes up.
|
| 155 |
+
|
| 156 |
+
Responsibilities:
|
| 157 |
+
1. Validate environment configuration
|
| 158 |
+
2. Initialize location/city systems
|
| 159 |
+
3. Verify data files exist
|
| 160 |
+
4. Log system status
|
| 161 |
+
"""
|
| 162 |
+
try:
|
| 163 |
+
app.state.startup_time = datetime.utcnow()
|
| 164 |
+
app.state.startup_errors = []
|
| 165 |
+
|
| 166 |
+
logger.info("=" * 60)
|
| 167 |
+
logger.info("🤖 PENNY STARTUP INITIALIZED")
|
| 168 |
+
logger.info("=" * 60)
|
| 169 |
+
|
| 170 |
+
# --- Environment Info ---
|
| 171 |
+
logger.info(f"📂 Project Root: {PROJECT_ROOT}")
|
| 172 |
+
logger.info(f"🌍 Environment: {os.getenv('ENVIRONMENT', 'development')}")
|
| 173 |
+
logger.info(f"🐍 Python Version: {sys.version.split()[0]}")
|
| 174 |
+
|
| 175 |
+
# --- Azure Configuration Check ---
|
| 176 |
+
azure_maps_key = os.getenv("AZURE_MAPS_KEY")
|
| 177 |
+
if azure_maps_key:
|
| 178 |
+
logger.info("🗺️ Azure Maps: ✅ Configured")
|
| 179 |
+
else:
|
| 180 |
+
error_msg = "Azure Maps key missing - weather features will be limited"
|
| 181 |
+
logger.warning(f"🗺️ Azure Maps: ⚠️ {error_msg}")
|
| 182 |
+
app.state.startup_errors.append(error_msg)
|
| 183 |
+
|
| 184 |
+
# --- Initialize Location System ---
|
| 185 |
+
logger.info("🗺️ Initializing location system...")
|
| 186 |
+
try:
|
| 187 |
+
location_system_ready = initialize_location_system()
|
| 188 |
+
app.state.location_system_healthy = location_system_ready
|
| 189 |
+
|
| 190 |
+
if location_system_ready:
|
| 191 |
+
logger.info("✅ Location system initialized successfully")
|
| 192 |
+
|
| 193 |
+
# Log supported cities
|
| 194 |
+
cities = SupportedCities.get_all_cities()
|
| 195 |
+
logger.info(f"📍 Supported cities: {len(cities)}")
|
| 196 |
+
for city in cities:
|
| 197 |
+
logger.info(f" - {city.full_name} ({city.tenant_id})")
|
| 198 |
+
|
| 199 |
+
# Validate data files
|
| 200 |
+
validation = validate_city_data_files()
|
| 201 |
+
missing_data = [
|
| 202 |
+
tid for tid, status in validation.items()
|
| 203 |
+
if not status["events"] or not status["resources"]
|
| 204 |
+
]
|
| 205 |
+
if missing_data:
|
| 206 |
+
error_msg = f"Incomplete data for cities: {missing_data}"
|
| 207 |
+
logger.warning(f"⚠️ {error_msg}")
|
| 208 |
+
app.state.startup_errors.append(error_msg)
|
| 209 |
+
else:
|
| 210 |
+
error_msg = "Location system initialization failed"
|
| 211 |
+
logger.error(f"❌ {error_msg}")
|
| 212 |
+
app.state.startup_errors.append(error_msg)
|
| 213 |
+
|
| 214 |
+
except Exception as e:
|
| 215 |
+
error_msg = f"Error initializing location system: {e}"
|
| 216 |
+
logger.error(f"❌ {error_msg}", exc_info=True)
|
| 217 |
+
app.state.location_system_healthy = False
|
| 218 |
+
app.state.startup_errors.append(error_msg)
|
| 219 |
+
|
| 220 |
+
# --- Startup Summary ---
|
| 221 |
+
logger.info("=" * 60)
|
| 222 |
+
if app.state.startup_errors:
|
| 223 |
+
logger.warning(f"⚠️ PENNY STARTED WITH {len(app.state.startup_errors)} WARNING(S)")
|
| 224 |
+
for error in app.state.startup_errors:
|
| 225 |
+
logger.warning(f" - {error}")
|
| 226 |
+
else:
|
| 227 |
+
logger.info("🎉 PENNY IS READY TO HELP RESIDENTS!")
|
| 228 |
+
logger.info("📖 API Documentation: http://localhost:8000/docs")
|
| 229 |
+
logger.info("=" * 60)
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
logger.error(f"❌ Critical startup error: {e}", exc_info=True)
|
| 233 |
+
app.state.startup_errors.append(f"Critical startup failure: {e}")
|
| 234 |
+
|
| 235 |
+
# --- SHUTDOWN EVENT ---
|
| 236 |
+
@app.on_event("shutdown")
|
| 237 |
+
async def shutdown_event() -> None:
|
| 238 |
+
"""
|
| 239 |
+
👋 Cleanup tasks when Penny shuts down.
|
| 240 |
+
"""
|
| 241 |
+
try:
|
| 242 |
+
logger.info("=" * 60)
|
| 243 |
+
logger.info("👋 PENNY SHUTTING DOWN")
|
| 244 |
+
logger.info("=" * 60)
|
| 245 |
+
|
| 246 |
+
# Calculate uptime
|
| 247 |
+
if app.state.startup_time:
|
| 248 |
+
uptime = datetime.utcnow() - app.state.startup_time
|
| 249 |
+
logger.info(f"⏱️ Total uptime: {uptime}")
|
| 250 |
+
|
| 251 |
+
# TODO: Add cleanup tasks here
|
| 252 |
+
# - Close database connections
|
| 253 |
+
# - Save state if needed
|
| 254 |
+
# - Release model resources
|
| 255 |
+
|
| 256 |
+
logger.info("✅ Shutdown complete. Goodbye for now!")
|
| 257 |
+
except Exception as e:
|
| 258 |
+
logger.error(f"Error during shutdown: {e}", exc_info=True)
|
| 259 |
+
|
| 260 |
+
# --- ROUTER INCLUSION ---
|
| 261 |
+
# All API endpoints defined in router.py are registered here
|
| 262 |
+
try:
|
| 263 |
+
app.include_router(api_router)
|
| 264 |
+
logger.info("✅ API router registered successfully")
|
| 265 |
+
except Exception as e:
|
| 266 |
+
logger.error(f"❌ Failed to register API router: {e}", exc_info=True)
|
| 267 |
+
|
| 268 |
+
# ============================================================
|
| 269 |
+
# CORE HEALTH & STATUS ENDPOINTS
|
| 270 |
+
# ============================================================
|
| 271 |
+
|
| 272 |
+
@app.get("/", tags=["Health"])
|
| 273 |
+
async def root() -> Dict[str, Any]:
|
| 274 |
+
"""
|
| 275 |
+
🏠 Root endpoint - confirms Penny is alive and running.
|
| 276 |
+
|
| 277 |
+
This is the first thing users/load balancers will hit.
|
| 278 |
+
Penny always responds with warmth, even to bots! 💛
|
| 279 |
+
|
| 280 |
+
Returns:
|
| 281 |
+
Basic status and feature information
|
| 282 |
+
"""
|
| 283 |
+
try:
|
| 284 |
+
return {
|
| 285 |
+
"message": "💛 Hi! I'm Penny, your civic engagement assistant.",
|
| 286 |
+
"status": "operational",
|
| 287 |
+
"tagline": "Connecting residents to community resources since 2024",
|
| 288 |
+
"docs": "/docs",
|
| 289 |
+
"api_version": "1.0.0",
|
| 290 |
+
"supported_cities": len(SupportedCities.get_all_cities()),
|
| 291 |
+
"features": [
|
| 292 |
+
"27-language translation",
|
| 293 |
+
"Real-time weather",
|
| 294 |
+
"Community events",
|
| 295 |
+
"Local resource finder",
|
| 296 |
+
"Document processing"
|
| 297 |
+
],
|
| 298 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 299 |
+
}
|
| 300 |
+
except Exception as e:
|
| 301 |
+
logger.error(f"Error in root endpoint: {e}", exc_info=True)
|
| 302 |
+
return {
|
| 303 |
+
"message": "💛 Hi! I'm Penny, your civic engagement assistant.",
|
| 304 |
+
"status": "degraded",
|
| 305 |
+
"error": "Some features may be unavailable"
|
| 306 |
+
}
|
| 307 |
+
|
| 308 |
+
@app.get("/health", tags=["Health"])
|
| 309 |
+
async def health_check() -> JSONResponse:
|
| 310 |
+
"""
|
| 311 |
+
🏥 Comprehensive health check for Azure load balancers and monitoring.
|
| 312 |
+
|
| 313 |
+
Returns detailed status of all critical components:
|
| 314 |
+
- Environment configuration
|
| 315 |
+
- Location system
|
| 316 |
+
- Data availability
|
| 317 |
+
- API components
|
| 318 |
+
|
| 319 |
+
Returns:
|
| 320 |
+
JSONResponse with health status (200 = healthy, 503 = degraded)
|
| 321 |
+
"""
|
| 322 |
+
try:
|
| 323 |
+
# Calculate uptime
|
| 324 |
+
uptime = None
|
| 325 |
+
if app.state.startup_time:
|
| 326 |
+
uptime_delta = datetime.utcnow() - app.state.startup_time
|
| 327 |
+
uptime = str(uptime_delta).split('.')[0] # Remove microseconds
|
| 328 |
+
|
| 329 |
+
# Validate data files
|
| 330 |
+
validation = validate_city_data_files()
|
| 331 |
+
cities_with_full_data = sum(
|
| 332 |
+
1 for v in validation.values()
|
| 333 |
+
if v.get("events", False) and v.get("resources", False)
|
| 334 |
+
)
|
| 335 |
+
total_cities = len(SupportedCities.get_all_cities())
|
| 336 |
+
|
| 337 |
+
health_status = {
|
| 338 |
+
"status": "healthy",
|
| 339 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 340 |
+
"uptime": uptime,
|
| 341 |
+
"environment": {
|
| 342 |
+
"azure_maps_configured": bool(os.getenv("AZURE_MAPS_KEY")),
|
| 343 |
+
"debug_mode": os.getenv("DEBUG_MODE", "false").lower() == "true",
|
| 344 |
+
"environment_type": os.getenv("ENVIRONMENT", "development")
|
| 345 |
+
},
|
| 346 |
+
"location_system": {
|
| 347 |
+
"status": "operational" if app.state.location_system_healthy else "degraded",
|
| 348 |
+
"supported_cities": total_cities,
|
| 349 |
+
"cities_with_full_data": cities_with_full_data
|
| 350 |
+
},
|
| 351 |
+
"api_components": {
|
| 352 |
+
"router": "operational",
|
| 353 |
+
"weather_agent": "operational" if os.getenv("AZURE_MAPS_KEY") else "degraded",
|
| 354 |
+
"translation": "operational",
|
| 355 |
+
"document_processing": "operational"
|
| 356 |
+
},
|
| 357 |
+
"startup_errors": app.state.startup_errors if app.state.startup_errors else None,
|
| 358 |
+
"api_version": "1.0.0"
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
# Determine overall health status
|
| 362 |
+
critical_checks = [
|
| 363 |
+
app.state.location_system_healthy,
|
| 364 |
+
bool(os.getenv("AZURE_MAPS_KEY"))
|
| 365 |
+
]
|
| 366 |
+
|
| 367 |
+
all_healthy = all(critical_checks)
|
| 368 |
+
|
| 369 |
+
if not all_healthy:
|
| 370 |
+
health_status["status"] = "degraded"
|
| 371 |
+
logger.warning(f"Health check: System degraded - {health_status}")
|
| 372 |
+
return JSONResponse(
|
| 373 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 374 |
+
content=health_status
|
| 375 |
+
)
|
| 376 |
+
|
| 377 |
+
return JSONResponse(
|
| 378 |
+
status_code=status.HTTP_200_OK,
|
| 379 |
+
content=health_status
|
| 380 |
+
)
|
| 381 |
+
|
| 382 |
+
except Exception as e:
|
| 383 |
+
logger.error(f"Health check failed: {e}", exc_info=True)
|
| 384 |
+
return JSONResponse(
|
| 385 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 386 |
+
content={
|
| 387 |
+
"status": "error",
|
| 388 |
+
"timestamp": datetime.utcnow().isoformat(),
|
| 389 |
+
"error": "Health check failed",
|
| 390 |
+
"detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None
|
| 391 |
+
}
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
@app.get("/cities", tags=["Location"])
|
| 395 |
+
async def list_supported_cities() -> JSONResponse:
|
| 396 |
+
"""
|
| 397 |
+
📍 Lists all cities Penny currently supports.
|
| 398 |
+
|
| 399 |
+
Returns:
|
| 400 |
+
List of city information including tenant_id and display name.
|
| 401 |
+
Useful for frontend dropdowns and API clients.
|
| 402 |
+
|
| 403 |
+
Example Response:
|
| 404 |
+
{
|
| 405 |
+
"total": 6,
|
| 406 |
+
"cities": [
|
| 407 |
+
{
|
| 408 |
+
"tenant_id": "atlanta_ga",
|
| 409 |
+
"name": "Atlanta, GA",
|
| 410 |
+
"state": "GA",
|
| 411 |
+
"data_status": {"events": true, "resources": true}
|
| 412 |
+
}
|
| 413 |
+
]
|
| 414 |
+
}
|
| 415 |
+
"""
|
| 416 |
+
try:
|
| 417 |
+
cities = get_all_supported_cities()
|
| 418 |
+
|
| 419 |
+
# Add validation status for each city
|
| 420 |
+
validation = validate_city_data_files()
|
| 421 |
+
for city in cities:
|
| 422 |
+
tenant_id = city["tenant_id"]
|
| 423 |
+
city["data_status"] = validation.get(tenant_id, {
|
| 424 |
+
"events": False,
|
| 425 |
+
"resources": False
|
| 426 |
+
})
|
| 427 |
+
|
| 428 |
+
return JSONResponse(
|
| 429 |
+
status_code=status.HTTP_200_OK,
|
| 430 |
+
content={
|
| 431 |
+
"total": len(cities),
|
| 432 |
+
"cities": cities,
|
| 433 |
+
"message": "These are the cities where Penny can help you find resources!",
|
| 434 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 435 |
+
}
|
| 436 |
+
)
|
| 437 |
+
except Exception as e:
|
| 438 |
+
logger.error(f"Error listing cities: {e}", exc_info=True)
|
| 439 |
+
return JSONResponse(
|
| 440 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 441 |
+
content={
|
| 442 |
+
"error": "Unable to retrieve city list",
|
| 443 |
+
"message": "I'm having trouble loading the city list right now. Please try again in a moment!",
|
| 444 |
+
"detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None,
|
| 445 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 446 |
+
}
|
| 447 |
+
)
|
| 448 |
+
|
| 449 |
+
# ============================================================
|
| 450 |
+
# WEATHER ENDPOINTS
|
| 451 |
+
# ============================================================
|
| 452 |
+
|
| 453 |
+
@app.get("/weather_direct", tags=["Weather"])
|
| 454 |
+
async def weather_direct_endpoint(lat: float, lon: float) -> JSONResponse:
|
| 455 |
+
"""
|
| 456 |
+
🌤️ Direct weather lookup by coordinates.
|
| 457 |
+
|
| 458 |
+
Args:
|
| 459 |
+
lat: Latitude (-90 to 90)
|
| 460 |
+
lon: Longitude (-180 to 180)
|
| 461 |
+
|
| 462 |
+
Returns:
|
| 463 |
+
Current weather conditions for the specified location
|
| 464 |
+
|
| 465 |
+
Example:
|
| 466 |
+
GET /weather_direct?lat=36.8508&lon=-76.2859 (Norfolk, VA)
|
| 467 |
+
"""
|
| 468 |
+
# Validate coordinates
|
| 469 |
+
if not (-90 <= lat <= 90):
|
| 470 |
+
return JSONResponse(
|
| 471 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 472 |
+
content={
|
| 473 |
+
"error": "Invalid latitude",
|
| 474 |
+
"message": "Latitude must be between -90 and 90",
|
| 475 |
+
"provided_value": lat
|
| 476 |
+
}
|
| 477 |
+
)
|
| 478 |
+
if not (-180 <= lon <= 180):
|
| 479 |
+
return JSONResponse(
|
| 480 |
+
status_code=status.HTTP_400_BAD_REQUEST,
|
| 481 |
+
content={
|
| 482 |
+
"error": "Invalid longitude",
|
| 483 |
+
"message": "Longitude must be between -180 and 180",
|
| 484 |
+
"provided_value": lon
|
| 485 |
+
}
|
| 486 |
+
)
|
| 487 |
+
|
| 488 |
+
try:
|
| 489 |
+
weather = await get_weather_for_location(lat=lat, lon=lon)
|
| 490 |
+
return JSONResponse(
|
| 491 |
+
status_code=status.HTTP_200_OK,
|
| 492 |
+
content={
|
| 493 |
+
"latitude": lat,
|
| 494 |
+
"longitude": lon,
|
| 495 |
+
"weather": weather,
|
| 496 |
+
"source": "Azure Maps Weather API",
|
| 497 |
+
"message": "Current weather conditions at your location",
|
| 498 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 499 |
+
}
|
| 500 |
+
)
|
| 501 |
+
except Exception as e:
|
| 502 |
+
logger.error(f"Weather lookup failed for ({lat}, {lon}): {e}", exc_info=True)
|
| 503 |
+
return JSONResponse(
|
| 504 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 505 |
+
content={
|
| 506 |
+
"error": "Weather service temporarily unavailable",
|
| 507 |
+
"message": "We're having trouble reaching the weather service. Please try again in a moment.",
|
| 508 |
+
"latitude": lat,
|
| 509 |
+
"longitude": lon,
|
| 510 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 511 |
+
}
|
| 512 |
+
)
|
| 513 |
+
|
| 514 |
+
@app.get("/weather/{tenant_id}", tags=["Weather"])
|
| 515 |
+
async def weather_by_city(tenant_id: str) -> JSONResponse:
|
| 516 |
+
"""
|
| 517 |
+
🌤️ Get weather for a supported city by tenant ID.
|
| 518 |
+
|
| 519 |
+
Args:
|
| 520 |
+
tenant_id: City identifier (e.g., 'atlanta_ga', 'seattle_wa')
|
| 521 |
+
|
| 522 |
+
Returns:
|
| 523 |
+
Current weather conditions for the specified city
|
| 524 |
+
|
| 525 |
+
Example:
|
| 526 |
+
GET /weather/atlanta_ga
|
| 527 |
+
"""
|
| 528 |
+
try:
|
| 529 |
+
# Get city info
|
| 530 |
+
city_info = SupportedCities.get_city_by_tenant_id(tenant_id)
|
| 531 |
+
if not city_info:
|
| 532 |
+
supported = [c["tenant_id"] for c in get_all_supported_cities()]
|
| 533 |
+
return JSONResponse(
|
| 534 |
+
status_code=status.HTTP_404_NOT_FOUND,
|
| 535 |
+
content={
|
| 536 |
+
"error": f"City not found: {tenant_id}",
|
| 537 |
+
"message": f"I don't have data for '{tenant_id}' yet. Try one of the supported cities!",
|
| 538 |
+
"supported_cities": supported,
|
| 539 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 540 |
+
}
|
| 541 |
+
)
|
| 542 |
+
|
| 543 |
+
# Get coordinates
|
| 544 |
+
coords = get_city_coordinates(tenant_id)
|
| 545 |
+
if not coords:
|
| 546 |
+
return JSONResponse(
|
| 547 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 548 |
+
content={
|
| 549 |
+
"error": "City coordinates not available",
|
| 550 |
+
"city": city_info.full_name,
|
| 551 |
+
"tenant_id": tenant_id,
|
| 552 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 553 |
+
}
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
lat, lon = coords["lat"], coords["lon"]
|
| 557 |
+
|
| 558 |
+
weather = await get_weather_for_location(lat=lat, lon=lon)
|
| 559 |
+
return JSONResponse(
|
| 560 |
+
status_code=status.HTTP_200_OK,
|
| 561 |
+
content={
|
| 562 |
+
"city": city_info.full_name,
|
| 563 |
+
"tenant_id": tenant_id,
|
| 564 |
+
"coordinates": {"latitude": lat, "longitude": lon},
|
| 565 |
+
"weather": weather,
|
| 566 |
+
"source": "Azure Maps Weather API",
|
| 567 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 568 |
+
}
|
| 569 |
+
)
|
| 570 |
+
except Exception as e:
|
| 571 |
+
logger.error(f"Weather lookup failed for {tenant_id}: {e}", exc_info=True)
|
| 572 |
+
return JSONResponse(
|
| 573 |
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
| 574 |
+
content={
|
| 575 |
+
"error": "Weather service temporarily unavailable",
|
| 576 |
+
"message": "We're having trouble getting the weather right now. Please try again in a moment!",
|
| 577 |
+
"tenant_id": tenant_id,
|
| 578 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 579 |
+
}
|
| 580 |
+
)
|
| 581 |
+
|
| 582 |
+
# ============================================================
|
| 583 |
+
# DEBUG ENDPOINTS (Only available in debug mode)
|
| 584 |
+
# ============================================================
|
| 585 |
+
|
| 586 |
+
@app.get("/debug/validation", tags=["Debug"], include_in_schema=False)
|
| 587 |
+
async def debug_validation() -> JSONResponse:
|
| 588 |
+
"""
|
| 589 |
+
🧪 Debug endpoint: Shows data file validation status.
|
| 590 |
+
Only available when DEBUG_MODE=true
|
| 591 |
+
"""
|
| 592 |
+
if os.getenv("DEBUG_MODE", "false").lower() != "true":
|
| 593 |
+
return JSONResponse(
|
| 594 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 595 |
+
content={"error": "Debug endpoints are disabled in production"}
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
try:
|
| 599 |
+
validation = validate_city_data_files()
|
| 600 |
+
return JSONResponse(
|
| 601 |
+
status_code=status.HTTP_200_OK,
|
| 602 |
+
content={
|
| 603 |
+
"validation": validation,
|
| 604 |
+
"summary": {
|
| 605 |
+
"total_cities": len(validation),
|
| 606 |
+
"cities_with_events": sum(1 for v in validation.values() if v.get("events", False)),
|
| 607 |
+
"cities_with_resources": sum(1 for v in validation.values() if v.get("resources", False))
|
| 608 |
+
},
|
| 609 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 610 |
+
}
|
| 611 |
+
)
|
| 612 |
+
except Exception as e:
|
| 613 |
+
logger.error(f"Debug validation failed: {e}", exc_info=True)
|
| 614 |
+
return JSONResponse(
|
| 615 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 616 |
+
content={"error": str(e)}
|
| 617 |
+
)
|
| 618 |
+
|
| 619 |
+
@app.get("/debug/env", tags=["Debug"], include_in_schema=False)
|
| 620 |
+
async def debug_environment() -> JSONResponse:
|
| 621 |
+
"""
|
| 622 |
+
🧪 Debug endpoint: Shows environment configuration.
|
| 623 |
+
Sensitive values are masked. Only available when DEBUG_MODE=true
|
| 624 |
+
"""
|
| 625 |
+
if os.getenv("DEBUG_MODE", "false").lower() != "true":
|
| 626 |
+
return JSONResponse(
|
| 627 |
+
status_code=status.HTTP_403_FORBIDDEN,
|
| 628 |
+
content={"error": "Debug endpoints are disabled in production"}
|
| 629 |
+
)
|
| 630 |
+
|
| 631 |
+
def mask_sensitive(key: str, value: str) -> str:
|
| 632 |
+
"""Masks sensitive environment variables."""
|
| 633 |
+
sensitive_keys = ["key", "secret", "password", "token"]
|
| 634 |
+
if any(s in key.lower() for s in sensitive_keys):
|
| 635 |
+
return f"{value[:4]}...{value[-4:]}" if len(value) > 8 else "***"
|
| 636 |
+
return value
|
| 637 |
+
|
| 638 |
+
try:
|
| 639 |
+
env_vars = {
|
| 640 |
+
key: mask_sensitive(key, value)
|
| 641 |
+
for key, value in os.environ.items()
|
| 642 |
+
if key.startswith(("AZURE_", "PENNY_", "DEBUG_", "ENVIRONMENT"))
|
| 643 |
+
}
|
| 644 |
+
|
| 645 |
+
return JSONResponse(
|
| 646 |
+
status_code=status.HTTP_200_OK,
|
| 647 |
+
content={
|
| 648 |
+
"environment_variables": env_vars,
|
| 649 |
+
"project_root": str(PROJECT_ROOT),
|
| 650 |
+
"location_system_healthy": app.state.location_system_healthy,
|
| 651 |
+
"startup_errors": app.state.startup_errors,
|
| 652 |
+
"timestamp": datetime.utcnow().isoformat()
|
| 653 |
+
}
|
| 654 |
+
)
|
| 655 |
+
except Exception as e:
|
| 656 |
+
logger.error(f"Debug environment check failed: {e}", exc_info=True)
|
| 657 |
+
return JSONResponse(
|
| 658 |
+
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
| 659 |
+
content={"error": str(e)}
|
| 660 |
+
)
|
model_config.json
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"penny-core-agent": {
|
| 3 |
+
"model_name": "google/gemma-7b-it",
|
| 4 |
+
"task": "text-generation",
|
| 5 |
+
"endpoint": "azure-ml",
|
| 6 |
+
"fallback_endpoint": "local",
|
| 7 |
+
"timeout_seconds": 30,
|
| 8 |
+
"max_retries": 2,
|
| 9 |
+
"description": "Penny's core conversational AI for civic engagement responses"
|
| 10 |
+
},
|
| 11 |
+
"penny-doc-agent": {
|
| 12 |
+
"model_name": "microsoft/layoutlmv3-base",
|
| 13 |
+
"task": "pdf-extraction",
|
| 14 |
+
"endpoint": "azure-ml",
|
| 15 |
+
"fallback_endpoint": "local",
|
| 16 |
+
"timeout_seconds": 45,
|
| 17 |
+
"max_retries": 2,
|
| 18 |
+
"description": "Document understanding and PDF extraction for civic documents"
|
| 19 |
+
},
|
| 20 |
+
"penny-translate-agent": {
|
| 21 |
+
"model_name": "facebook/nllb-200-distilled-600M",
|
| 22 |
+
"task": "translation",
|
| 23 |
+
"endpoint": "azure-ml",
|
| 24 |
+
"fallback_endpoint": "local",
|
| 25 |
+
"timeout_seconds": 20,
|
| 26 |
+
"max_retries": 2,
|
| 27 |
+
"description": "Multilingual translation service for accessible civic information"
|
| 28 |
+
},
|
| 29 |
+
"penny-sentiment-agent": {
|
| 30 |
+
"model_name": "cardiffnlp/twitter-roberta-base-sentiment",
|
| 31 |
+
"task": "sentiment-analysis",
|
| 32 |
+
"endpoint": "azure-ml",
|
| 33 |
+
"fallback_endpoint": "local",
|
| 34 |
+
"timeout_seconds": 15,
|
| 35 |
+
"max_retries": 2,
|
| 36 |
+
"description": "Sentiment analysis for community feedback and engagement monitoring"
|
| 37 |
+
},
|
| 38 |
+
"penny-bias-checker": {
|
| 39 |
+
"model_name": "facebook/bart-large-mnli",
|
| 40 |
+
"task": "bias-detection",
|
| 41 |
+
"endpoint": "azure-ml",
|
| 42 |
+
"fallback_endpoint": "local",
|
| 43 |
+
"timeout_seconds": 20,
|
| 44 |
+
"max_retries": 2,
|
| 45 |
+
"description": "Bias detection to ensure fair and equitable civic information"
|
| 46 |
+
}
|
| 47 |
+
}
|
model_loader.py
ADDED
|
@@ -0,0 +1,861 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/model_loader.py
|
| 2 |
+
"""
|
| 3 |
+
🧠 PENNY Model Loader - Azure-Ready Multi-Model Orchestration
|
| 4 |
+
|
| 5 |
+
This is Penny's brain loader. She manages multiple specialized models:
|
| 6 |
+
- Gemma 7B for conversational reasoning
|
| 7 |
+
- NLLB-200 for 27-language translation
|
| 8 |
+
- Sentiment analysis for resident wellbeing
|
| 9 |
+
- Bias detection for equitable service
|
| 10 |
+
- LayoutLM for civic document processing
|
| 11 |
+
|
| 12 |
+
MISSION: Load AI models efficiently in memory-constrained environments while
|
| 13 |
+
maintaining Penny's warm, civic-focused personality across all interactions.
|
| 14 |
+
|
| 15 |
+
FEATURES:
|
| 16 |
+
- Lazy loading (models only load when needed)
|
| 17 |
+
- 8-bit quantization for memory efficiency
|
| 18 |
+
- GPU/CPU auto-detection
|
| 19 |
+
- Model caching and reuse
|
| 20 |
+
- Graceful fallbacks for Azure ML deployment
|
| 21 |
+
- Memory monitoring and cleanup
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import os
|
| 26 |
+
import torch
|
| 27 |
+
from typing import Dict, Any, Callable, Optional, Union, List
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
import logging
|
| 30 |
+
from dataclasses import dataclass
|
| 31 |
+
from enum import Enum
|
| 32 |
+
from datetime import datetime
|
| 33 |
+
|
| 34 |
+
from transformers import (
|
| 35 |
+
AutoTokenizer,
|
| 36 |
+
AutoModelForCausalLM,
|
| 37 |
+
AutoModelForSeq2SeqLM,
|
| 38 |
+
pipeline,
|
| 39 |
+
PreTrainedModel,
|
| 40 |
+
PreTrainedTokenizer
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# --- LOGGING SETUP ---
|
| 44 |
+
logger = logging.getLogger(__name__)
|
| 45 |
+
|
| 46 |
+
# --- PATH CONFIGURATION (Environment-Aware) ---
|
| 47 |
+
# Support both local development and Azure ML deployment
|
| 48 |
+
if os.getenv("AZUREML_MODEL_DIR"):
|
| 49 |
+
# Azure ML deployment - models are in AZUREML_MODEL_DIR
|
| 50 |
+
MODEL_ROOT = Path(os.getenv("AZUREML_MODEL_DIR"))
|
| 51 |
+
CONFIG_PATH = MODEL_ROOT / "model_config.json"
|
| 52 |
+
logger.info("☁️ Running in Azure ML environment")
|
| 53 |
+
else:
|
| 54 |
+
# Local development - models are in project structure
|
| 55 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 56 |
+
MODEL_ROOT = PROJECT_ROOT / "models"
|
| 57 |
+
CONFIG_PATH = MODEL_ROOT / "model_config.json"
|
| 58 |
+
logger.info("💻 Running in local development environment")
|
| 59 |
+
|
| 60 |
+
logger.info(f"📂 Model config path: {CONFIG_PATH}")
|
| 61 |
+
|
| 62 |
+
# ============================================================
|
| 63 |
+
# PENNY'S CIVIC IDENTITY & PERSONALITY
|
| 64 |
+
# ============================================================
|
| 65 |
+
|
| 66 |
+
PENNY_SYSTEM_PROMPT = (
|
| 67 |
+
"You are Penny, a smart, civic-focused AI assistant serving local communities. "
|
| 68 |
+
"You help residents navigate city services, government programs, and community resources. "
|
| 69 |
+
"You're warm, professional, accurate, and always stay within your civic mission.\n\n"
|
| 70 |
+
|
| 71 |
+
"Your expertise includes:\n"
|
| 72 |
+
"- Connecting people with local services (food banks, shelters, libraries)\n"
|
| 73 |
+
"- Translating information into 27 languages\n"
|
| 74 |
+
"- Explaining public programs and eligibility\n"
|
| 75 |
+
"- Guiding residents through civic processes\n"
|
| 76 |
+
"- Providing emergency resources when needed\n\n"
|
| 77 |
+
|
| 78 |
+
"YOUR PERSONALITY:\n"
|
| 79 |
+
"- Warm and approachable, like a helpful community center staff member\n"
|
| 80 |
+
"- Clear and practical, avoiding jargon\n"
|
| 81 |
+
"- Culturally sensitive and inclusive\n"
|
| 82 |
+
"- Patient with repetition or clarification\n"
|
| 83 |
+
"- Funny when appropriate, but never at anyone's expense\n\n"
|
| 84 |
+
|
| 85 |
+
"CRITICAL RULES:\n"
|
| 86 |
+
"- When residents greet you by name (e.g., 'Hi Penny'), respond warmly and personally\n"
|
| 87 |
+
"- You are ALWAYS Penny - never ChatGPT, Assistant, Claude, or any other name\n"
|
| 88 |
+
"- If you don't know something, say so clearly and help find the right resource\n"
|
| 89 |
+
"- NEVER make up information about services, eligibility, or contacts\n"
|
| 90 |
+
"- Stay within your civic mission - you don't provide legal, medical, or financial advice\n"
|
| 91 |
+
"- For emergencies, immediately connect to appropriate services (911, crisis lines)\n\n"
|
| 92 |
+
)
|
| 93 |
+
|
| 94 |
+
# --- GLOBAL STATE ---
|
| 95 |
+
_MODEL_CACHE: Dict[str, Any] = {} # Memory-efficient model reuse
|
| 96 |
+
_LOAD_TIMES: Dict[str, float] = {} # Track model loading performance
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# ============================================================
|
| 100 |
+
# DEVICE MANAGEMENT
|
| 101 |
+
# ============================================================
|
| 102 |
+
|
| 103 |
+
class DeviceType(str, Enum):
|
| 104 |
+
"""Supported compute devices."""
|
| 105 |
+
CUDA = "cuda"
|
| 106 |
+
CPU = "cpu"
|
| 107 |
+
MPS = "mps" # Apple Silicon
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def get_optimal_device() -> str:
|
| 111 |
+
"""
|
| 112 |
+
🎮 Determines the best device for model inference.
|
| 113 |
+
|
| 114 |
+
Priority:
|
| 115 |
+
1. CUDA GPU (NVIDIA)
|
| 116 |
+
2. MPS (Apple Silicon)
|
| 117 |
+
3. CPU (fallback)
|
| 118 |
+
|
| 119 |
+
Returns:
|
| 120 |
+
Device string ("cuda", "mps", or "cpu")
|
| 121 |
+
"""
|
| 122 |
+
if torch.cuda.is_available():
|
| 123 |
+
device = DeviceType.CUDA.value
|
| 124 |
+
gpu_name = torch.cuda.get_device_name(0)
|
| 125 |
+
gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
|
| 126 |
+
logger.info(f"🎮 GPU detected: {gpu_name} ({gpu_memory:.1f}GB)")
|
| 127 |
+
return device
|
| 128 |
+
|
| 129 |
+
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
| 130 |
+
device = DeviceType.MPS.value
|
| 131 |
+
logger.info("🍎 Apple Silicon (MPS) detected")
|
| 132 |
+
return device
|
| 133 |
+
|
| 134 |
+
else:
|
| 135 |
+
device = DeviceType.CPU.value
|
| 136 |
+
logger.info("💻 Using CPU for inference")
|
| 137 |
+
logger.warning("⚠️ GPU not available - inference will be slower")
|
| 138 |
+
return device
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def get_memory_stats() -> Dict[str, float]:
|
| 142 |
+
"""
|
| 143 |
+
📊 Returns current GPU/CPU memory statistics.
|
| 144 |
+
|
| 145 |
+
Returns:
|
| 146 |
+
Dict with memory stats in GB
|
| 147 |
+
"""
|
| 148 |
+
stats = {}
|
| 149 |
+
|
| 150 |
+
if torch.cuda.is_available():
|
| 151 |
+
stats["gpu_allocated_gb"] = torch.cuda.memory_allocated() / 1e9
|
| 152 |
+
stats["gpu_reserved_gb"] = torch.cuda.memory_reserved() / 1e9
|
| 153 |
+
stats["gpu_total_gb"] = torch.cuda.get_device_properties(0).total_memory / 1e9
|
| 154 |
+
|
| 155 |
+
# CPU memory (requires psutil)
|
| 156 |
+
try:
|
| 157 |
+
import psutil
|
| 158 |
+
mem = psutil.virtual_memory()
|
| 159 |
+
stats["cpu_used_gb"] = mem.used / 1e9
|
| 160 |
+
stats["cpu_total_gb"] = mem.total / 1e9
|
| 161 |
+
stats["cpu_percent"] = mem.percent
|
| 162 |
+
except ImportError:
|
| 163 |
+
pass
|
| 164 |
+
|
| 165 |
+
return stats
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# ============================================================
|
| 169 |
+
# MODEL CLIENT (Individual Model Handler)
|
| 170 |
+
# ============================================================
|
| 171 |
+
|
| 172 |
+
@dataclass
|
| 173 |
+
class ModelMetadata:
|
| 174 |
+
"""
|
| 175 |
+
📋 Metadata about a loaded model.
|
| 176 |
+
Tracks performance and resource usage.
|
| 177 |
+
"""
|
| 178 |
+
name: str
|
| 179 |
+
task: str
|
| 180 |
+
model_name: str
|
| 181 |
+
device: str
|
| 182 |
+
loaded_at: Optional[datetime] = None
|
| 183 |
+
load_time_seconds: Optional[float] = None
|
| 184 |
+
memory_usage_gb: Optional[float] = None
|
| 185 |
+
inference_count: int = 0
|
| 186 |
+
total_inference_time_ms: float = 0.0
|
| 187 |
+
|
| 188 |
+
@property
|
| 189 |
+
def avg_inference_time_ms(self) -> float:
|
| 190 |
+
"""Calculate average inference time."""
|
| 191 |
+
if self.inference_count == 0:
|
| 192 |
+
return 0.0
|
| 193 |
+
return self.total_inference_time_ms / self.inference_count
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
class ModelClient:
|
| 197 |
+
"""
|
| 198 |
+
🤖 Manages a single HuggingFace model with optimized loading and inference.
|
| 199 |
+
|
| 200 |
+
Features:
|
| 201 |
+
- Lazy loading (load on first use)
|
| 202 |
+
- Memory optimization (8-bit quantization)
|
| 203 |
+
- Performance tracking
|
| 204 |
+
- Graceful error handling
|
| 205 |
+
- Automatic device placement
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
def __init__(
|
| 209 |
+
self,
|
| 210 |
+
name: str,
|
| 211 |
+
model_name: str,
|
| 212 |
+
task: str,
|
| 213 |
+
device: str = None,
|
| 214 |
+
config: Optional[Dict[str, Any]] = None
|
| 215 |
+
):
|
| 216 |
+
"""
|
| 217 |
+
Initialize model client (doesn't load the model yet).
|
| 218 |
+
|
| 219 |
+
Args:
|
| 220 |
+
name: Model identifier (e.g., "penny-core-agent")
|
| 221 |
+
model_name: HuggingFace model ID
|
| 222 |
+
task: Task type (text-generation, translation, etc.)
|
| 223 |
+
device: Target device (auto-detected if None)
|
| 224 |
+
config: Additional model configuration
|
| 225 |
+
"""
|
| 226 |
+
self.name = name
|
| 227 |
+
self.model_name = model_name
|
| 228 |
+
self.task = task
|
| 229 |
+
self.device = device or get_optimal_device()
|
| 230 |
+
self.config = config or {}
|
| 231 |
+
self.pipeline = None
|
| 232 |
+
self._load_attempted = False
|
| 233 |
+
self.metadata = ModelMetadata(
|
| 234 |
+
name=name,
|
| 235 |
+
task=task,
|
| 236 |
+
model_name=model_name,
|
| 237 |
+
device=self.device
|
| 238 |
+
)
|
| 239 |
+
|
| 240 |
+
logger.info(f"📦 Initialized ModelClient: {name}")
|
| 241 |
+
logger.debug(f" Model: {model_name}")
|
| 242 |
+
logger.debug(f" Task: {task}")
|
| 243 |
+
logger.debug(f" Device: {self.device}")
|
| 244 |
+
|
| 245 |
+
def load_pipeline(self) -> bool:
|
| 246 |
+
"""
|
| 247 |
+
🔄 Loads the HuggingFace pipeline with Azure-optimized settings.
|
| 248 |
+
|
| 249 |
+
Features:
|
| 250 |
+
- 8-bit quantization for large models (saves ~50% memory)
|
| 251 |
+
- Automatic device placement
|
| 252 |
+
- Memory monitoring
|
| 253 |
+
- Cache checking
|
| 254 |
+
|
| 255 |
+
Returns:
|
| 256 |
+
True if successful, False otherwise
|
| 257 |
+
"""
|
| 258 |
+
if self.pipeline is not None:
|
| 259 |
+
logger.debug(f"✅ {self.name} already loaded")
|
| 260 |
+
return True
|
| 261 |
+
|
| 262 |
+
if self._load_attempted:
|
| 263 |
+
logger.warning(f"⚠️ Previous load attempt failed for {self.name}")
|
| 264 |
+
return False
|
| 265 |
+
|
| 266 |
+
global _MODEL_CACHE, _LOAD_TIMES
|
| 267 |
+
|
| 268 |
+
# Check cache first
|
| 269 |
+
if self.name in _MODEL_CACHE:
|
| 270 |
+
logger.info(f"♻️ Using cached pipeline for {self.name}")
|
| 271 |
+
self.pipeline = _MODEL_CACHE[self.name]
|
| 272 |
+
return True
|
| 273 |
+
|
| 274 |
+
logger.info(f"🔄 Loading {self.name} from HuggingFace...")
|
| 275 |
+
self._load_attempted = True
|
| 276 |
+
|
| 277 |
+
start_time = datetime.now()
|
| 278 |
+
|
| 279 |
+
try:
|
| 280 |
+
# === TEXT GENERATION (Gemma 7B, GPT-2, etc.) ===
|
| 281 |
+
if self.task == "text-generation":
|
| 282 |
+
logger.info(" Using 8-bit quantization for memory efficiency...")
|
| 283 |
+
|
| 284 |
+
# Check if model supports 8-bit loading
|
| 285 |
+
use_8bit = self.device == DeviceType.CUDA.value
|
| 286 |
+
|
| 287 |
+
if use_8bit:
|
| 288 |
+
self.pipeline = pipeline(
|
| 289 |
+
"text-generation",
|
| 290 |
+
model=self.model_name,
|
| 291 |
+
tokenizer=self.model_name,
|
| 292 |
+
device_map="auto",
|
| 293 |
+
load_in_8bit=True, # Reduces ~14GB to ~7GB
|
| 294 |
+
trust_remote_code=True,
|
| 295 |
+
torch_dtype=torch.float16
|
| 296 |
+
)
|
| 297 |
+
else:
|
| 298 |
+
# CPU fallback
|
| 299 |
+
self.pipeline = pipeline(
|
| 300 |
+
"text-generation",
|
| 301 |
+
model=self.model_name,
|
| 302 |
+
tokenizer=self.model_name,
|
| 303 |
+
device=-1, # CPU
|
| 304 |
+
trust_remote_code=True,
|
| 305 |
+
torch_dtype=torch.float32
|
| 306 |
+
)
|
| 307 |
+
|
| 308 |
+
# === TRANSLATION (NLLB-200, M2M-100, etc.) ===
|
| 309 |
+
elif self.task == "translation":
|
| 310 |
+
self.pipeline = pipeline(
|
| 311 |
+
"translation",
|
| 312 |
+
model=self.model_name,
|
| 313 |
+
device=0 if self.device == DeviceType.CUDA.value else -1,
|
| 314 |
+
src_lang=self.config.get("default_src_lang", "eng_Latn"),
|
| 315 |
+
tgt_lang=self.config.get("default_tgt_lang", "spa_Latn")
|
| 316 |
+
)
|
| 317 |
+
|
| 318 |
+
# === SENTIMENT ANALYSIS ===
|
| 319 |
+
elif self.task == "sentiment-analysis":
|
| 320 |
+
self.pipeline = pipeline(
|
| 321 |
+
"sentiment-analysis",
|
| 322 |
+
model=self.model_name,
|
| 323 |
+
device=0 if self.device == DeviceType.CUDA.value else -1,
|
| 324 |
+
truncation=True,
|
| 325 |
+
max_length=512
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
# === BIAS DETECTION (Zero-Shot Classification) ===
|
| 329 |
+
elif self.task == "bias-detection":
|
| 330 |
+
self.pipeline = pipeline(
|
| 331 |
+
"zero-shot-classification",
|
| 332 |
+
model=self.model_name,
|
| 333 |
+
device=0 if self.device == DeviceType.CUDA.value else -1
|
| 334 |
+
)
|
| 335 |
+
|
| 336 |
+
# === TEXT CLASSIFICATION (Generic) ===
|
| 337 |
+
elif self.task == "text-classification":
|
| 338 |
+
self.pipeline = pipeline(
|
| 339 |
+
"text-classification",
|
| 340 |
+
model=self.model_name,
|
| 341 |
+
device=0 if self.device == DeviceType.CUDA.value else -1,
|
| 342 |
+
truncation=True
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
# === PDF/DOCUMENT EXTRACTION (LayoutLMv3) ===
|
| 346 |
+
elif self.task == "pdf-extraction":
|
| 347 |
+
logger.warning("⚠️ PDF extraction requires additional OCR setup")
|
| 348 |
+
logger.info(" Consider using Azure Form Recognizer as alternative")
|
| 349 |
+
# Placeholder - requires pytesseract/OCR infrastructure
|
| 350 |
+
self.pipeline = None
|
| 351 |
+
return False
|
| 352 |
+
|
| 353 |
+
else:
|
| 354 |
+
raise ValueError(f"Unknown task type: {self.task}")
|
| 355 |
+
|
| 356 |
+
# === SUCCESS HANDLING ===
|
| 357 |
+
if self.pipeline is not None:
|
| 358 |
+
# Calculate load time
|
| 359 |
+
load_time = (datetime.now() - start_time).total_seconds()
|
| 360 |
+
self.metadata.loaded_at = datetime.now()
|
| 361 |
+
self.metadata.load_time_seconds = load_time
|
| 362 |
+
|
| 363 |
+
# Cache the pipeline
|
| 364 |
+
_MODEL_CACHE[self.name] = self.pipeline
|
| 365 |
+
_LOAD_TIMES[self.name] = load_time
|
| 366 |
+
|
| 367 |
+
# Log memory usage
|
| 368 |
+
mem_stats = get_memory_stats()
|
| 369 |
+
self.metadata.memory_usage_gb = mem_stats.get("gpu_allocated_gb", 0)
|
| 370 |
+
|
| 371 |
+
logger.info(f"✅ {self.name} loaded successfully!")
|
| 372 |
+
logger.info(f" Load time: {load_time:.2f}s")
|
| 373 |
+
|
| 374 |
+
if "gpu_allocated_gb" in mem_stats:
|
| 375 |
+
logger.info(
|
| 376 |
+
f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB / "
|
| 377 |
+
f"{mem_stats['gpu_total_gb']:.2f}GB"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
return True
|
| 381 |
+
|
| 382 |
+
except Exception as e:
|
| 383 |
+
logger.error(f"❌ Failed to load {self.name}: {e}", exc_info=True)
|
| 384 |
+
self.pipeline = None
|
| 385 |
+
return False
|
| 386 |
+
|
| 387 |
+
def predict(
|
| 388 |
+
self,
|
| 389 |
+
input_data: Union[str, Dict[str, Any]],
|
| 390 |
+
**kwargs
|
| 391 |
+
) -> Dict[str, Any]:
|
| 392 |
+
"""
|
| 393 |
+
🎯 Runs inference with the loaded model pipeline.
|
| 394 |
+
|
| 395 |
+
Features:
|
| 396 |
+
- Automatic pipeline loading
|
| 397 |
+
- Error handling with fallback responses
|
| 398 |
+
- Performance tracking
|
| 399 |
+
- Penny's personality injection (for text-generation)
|
| 400 |
+
|
| 401 |
+
Args:
|
| 402 |
+
input_data: Text or structured input for the model
|
| 403 |
+
**kwargs: Task-specific parameters
|
| 404 |
+
|
| 405 |
+
Returns:
|
| 406 |
+
Model output dict with results or error information
|
| 407 |
+
"""
|
| 408 |
+
# Track inference start time
|
| 409 |
+
start_time = datetime.now()
|
| 410 |
+
|
| 411 |
+
# Ensure pipeline is loaded
|
| 412 |
+
if self.pipeline is None:
|
| 413 |
+
success = self.load_pipeline()
|
| 414 |
+
if not success:
|
| 415 |
+
return {
|
| 416 |
+
"error": f"{self.name} pipeline unavailable",
|
| 417 |
+
"detail": "Model failed to load. Check logs for details.",
|
| 418 |
+
"model": self.name
|
| 419 |
+
}
|
| 420 |
+
|
| 421 |
+
try:
|
| 422 |
+
# === TEXT GENERATION ===
|
| 423 |
+
if self.task == "text-generation":
|
| 424 |
+
# Inject Penny's civic identity
|
| 425 |
+
if not kwargs.get("skip_system_prompt", False):
|
| 426 |
+
full_prompt = PENNY_SYSTEM_PROMPT + input_data
|
| 427 |
+
else:
|
| 428 |
+
full_prompt = input_data
|
| 429 |
+
|
| 430 |
+
# Extract generation parameters with safe defaults
|
| 431 |
+
max_new_tokens = kwargs.get("max_new_tokens", 256)
|
| 432 |
+
temperature = kwargs.get("temperature", 0.7)
|
| 433 |
+
top_p = kwargs.get("top_p", 0.9)
|
| 434 |
+
do_sample = kwargs.get("do_sample", temperature > 0.0)
|
| 435 |
+
|
| 436 |
+
result = self.pipeline(
|
| 437 |
+
full_prompt,
|
| 438 |
+
max_new_tokens=max_new_tokens,
|
| 439 |
+
temperature=temperature,
|
| 440 |
+
top_p=top_p,
|
| 441 |
+
do_sample=do_sample,
|
| 442 |
+
return_full_text=False,
|
| 443 |
+
pad_token_id=self.pipeline.tokenizer.eos_token_id,
|
| 444 |
+
truncation=True
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
+
output = {
|
| 448 |
+
"generated_text": result[0]["generated_text"],
|
| 449 |
+
"model": self.name,
|
| 450 |
+
"success": True
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
# === TRANSLATION ===
|
| 454 |
+
elif self.task == "translation":
|
| 455 |
+
src_lang = kwargs.get("source_lang", "eng_Latn")
|
| 456 |
+
tgt_lang = kwargs.get("target_lang", "spa_Latn")
|
| 457 |
+
|
| 458 |
+
result = self.pipeline(
|
| 459 |
+
input_data,
|
| 460 |
+
src_lang=src_lang,
|
| 461 |
+
tgt_lang=tgt_lang,
|
| 462 |
+
max_length=512
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
output = {
|
| 466 |
+
"translation": result[0]["translation_text"],
|
| 467 |
+
"source_lang": src_lang,
|
| 468 |
+
"target_lang": tgt_lang,
|
| 469 |
+
"model": self.name,
|
| 470 |
+
"success": True
|
| 471 |
+
}
|
| 472 |
+
|
| 473 |
+
# === SENTIMENT ANALYSIS ===
|
| 474 |
+
elif self.task == "sentiment-analysis":
|
| 475 |
+
result = self.pipeline(input_data)
|
| 476 |
+
|
| 477 |
+
output = {
|
| 478 |
+
"sentiment": result[0]["label"],
|
| 479 |
+
"confidence": result[0]["score"],
|
| 480 |
+
"model": self.name,
|
| 481 |
+
"success": True
|
| 482 |
+
}
|
| 483 |
+
|
| 484 |
+
# === BIAS DETECTION ===
|
| 485 |
+
elif self.task == "bias-detection":
|
| 486 |
+
candidate_labels = kwargs.get("candidate_labels", [
|
| 487 |
+
"neutral and objective",
|
| 488 |
+
"contains political bias",
|
| 489 |
+
"uses emotional language",
|
| 490 |
+
"culturally insensitive"
|
| 491 |
+
])
|
| 492 |
+
|
| 493 |
+
result = self.pipeline(
|
| 494 |
+
input_data,
|
| 495 |
+
candidate_labels=candidate_labels,
|
| 496 |
+
multi_label=True
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
output = {
|
| 500 |
+
"labels": result["labels"],
|
| 501 |
+
"scores": result["scores"],
|
| 502 |
+
"model": self.name,
|
| 503 |
+
"success": True
|
| 504 |
+
}
|
| 505 |
+
|
| 506 |
+
# === TEXT CLASSIFICATION ===
|
| 507 |
+
elif self.task == "text-classification":
|
| 508 |
+
result = self.pipeline(input_data)
|
| 509 |
+
|
| 510 |
+
output = {
|
| 511 |
+
"label": result[0]["label"],
|
| 512 |
+
"confidence": result[0]["score"],
|
| 513 |
+
"model": self.name,
|
| 514 |
+
"success": True
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
else:
|
| 518 |
+
output = {
|
| 519 |
+
"error": f"Task '{self.task}' not implemented",
|
| 520 |
+
"model": self.name,
|
| 521 |
+
"success": False
|
| 522 |
+
}
|
| 523 |
+
|
| 524 |
+
# Track performance
|
| 525 |
+
inference_time = (datetime.now() - start_time).total_seconds() * 1000
|
| 526 |
+
self.metadata.inference_count += 1
|
| 527 |
+
self.metadata.total_inference_time_ms += inference_time
|
| 528 |
+
output["inference_time_ms"] = round(inference_time, 2)
|
| 529 |
+
|
| 530 |
+
return output
|
| 531 |
+
|
| 532 |
+
except Exception as e:
|
| 533 |
+
logger.error(f"❌ Inference error in {self.name}: {e}", exc_info=True)
|
| 534 |
+
return {
|
| 535 |
+
"error": "Inference failed",
|
| 536 |
+
"detail": str(e),
|
| 537 |
+
"model": self.name,
|
| 538 |
+
"success": False
|
| 539 |
+
}
|
| 540 |
+
|
| 541 |
+
def unload(self) -> None:
|
| 542 |
+
"""
|
| 543 |
+
🗑️ Unloads the model to free memory.
|
| 544 |
+
Critical for Azure environments with limited resources.
|
| 545 |
+
"""
|
| 546 |
+
if self.pipeline is not None:
|
| 547 |
+
logger.info(f"🗑️ Unloading {self.name}...")
|
| 548 |
+
|
| 549 |
+
# Delete pipeline
|
| 550 |
+
del self.pipeline
|
| 551 |
+
self.pipeline = None
|
| 552 |
+
|
| 553 |
+
# Remove from cache
|
| 554 |
+
if self.name in _MODEL_CACHE:
|
| 555 |
+
del _MODEL_CACHE[self.name]
|
| 556 |
+
|
| 557 |
+
# Force GPU memory release
|
| 558 |
+
if torch.cuda.is_available():
|
| 559 |
+
torch.cuda.empty_cache()
|
| 560 |
+
|
| 561 |
+
logger.info(f"✅ {self.name} unloaded successfully")
|
| 562 |
+
|
| 563 |
+
# Log memory stats after unload
|
| 564 |
+
mem_stats = get_memory_stats()
|
| 565 |
+
if "gpu_allocated_gb" in mem_stats:
|
| 566 |
+
logger.info(f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB remaining")
|
| 567 |
+
|
| 568 |
+
def get_metadata(self) -> Dict[str, Any]:
|
| 569 |
+
"""
|
| 570 |
+
📊 Returns model metadata and performance stats.
|
| 571 |
+
"""
|
| 572 |
+
return {
|
| 573 |
+
"name": self.metadata.name,
|
| 574 |
+
"task": self.metadata.task,
|
| 575 |
+
"model_name": self.metadata.model_name,
|
| 576 |
+
"device": self.metadata.device,
|
| 577 |
+
"loaded": self.pipeline is not None,
|
| 578 |
+
"loaded_at": self.metadata.loaded_at.isoformat() if self.metadata.loaded_at else None,
|
| 579 |
+
"load_time_seconds": self.metadata.load_time_seconds,
|
| 580 |
+
"memory_usage_gb": self.metadata.memory_usage_gb,
|
| 581 |
+
"inference_count": self.metadata.inference_count,
|
| 582 |
+
"avg_inference_time_ms": round(self.metadata.avg_inference_time_ms, 2)
|
| 583 |
+
}
|
| 584 |
+
|
| 585 |
+
|
| 586 |
+
# ============================================================
|
| 587 |
+
# MODEL LOADER (Singleton Manager)
|
| 588 |
+
# ============================================================
|
| 589 |
+
|
| 590 |
+
class ModelLoader:
|
| 591 |
+
"""
|
| 592 |
+
🎛️ Singleton manager for all Penny's specialized models.
|
| 593 |
+
|
| 594 |
+
Features:
|
| 595 |
+
- Centralized model configuration
|
| 596 |
+
- Lazy loading (models only load when needed)
|
| 597 |
+
- Memory management
|
| 598 |
+
- Health monitoring
|
| 599 |
+
- Unified access interface
|
| 600 |
+
"""
|
| 601 |
+
|
| 602 |
+
_instance: Optional['ModelLoader'] = None
|
| 603 |
+
|
| 604 |
+
def __new__(cls, *args, **kwargs):
|
| 605 |
+
"""Singleton pattern - only one ModelLoader instance."""
|
| 606 |
+
if cls._instance is None:
|
| 607 |
+
cls._instance = super(ModelLoader, cls).__new__(cls)
|
| 608 |
+
return cls._instance
|
| 609 |
+
|
| 610 |
+
def __init__(self, config_path: Optional[str] = None):
|
| 611 |
+
"""
|
| 612 |
+
Initialize ModelLoader (only runs once due to singleton).
|
| 613 |
+
|
| 614 |
+
Args:
|
| 615 |
+
config_path: Path to model_config.json (optional)
|
| 616 |
+
"""
|
| 617 |
+
if not hasattr(self, '_models_loaded'):
|
| 618 |
+
self.models: Dict[str, ModelClient] = {}
|
| 619 |
+
self._models_loaded = True
|
| 620 |
+
self._initialization_time = datetime.now()
|
| 621 |
+
|
| 622 |
+
# Use provided path or default
|
| 623 |
+
config_file = Path(config_path) if config_path else CONFIG_PATH
|
| 624 |
+
|
| 625 |
+
try:
|
| 626 |
+
logger.info(f"📖 Loading model configuration from {config_file}")
|
| 627 |
+
|
| 628 |
+
if not config_file.exists():
|
| 629 |
+
logger.warning(f"⚠️ Configuration file not found: {config_file}")
|
| 630 |
+
logger.info(" Create model_config.json with your model definitions")
|
| 631 |
+
return
|
| 632 |
+
|
| 633 |
+
with open(config_file, "r") as f:
|
| 634 |
+
config = json.load(f)
|
| 635 |
+
|
| 636 |
+
# Initialize ModelClients (doesn't load models yet)
|
| 637 |
+
for model_id, model_info in config.items():
|
| 638 |
+
self.models[model_id] = ModelClient(
|
| 639 |
+
name=model_id,
|
| 640 |
+
model_name=model_info["model_name"],
|
| 641 |
+
task=model_info["task"],
|
| 642 |
+
config=model_info.get("config", {})
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
logger.info(f"✅ ModelLoader initialized with {len(self.models)} models:")
|
| 646 |
+
for model_id in self.models.keys():
|
| 647 |
+
logger.info(f" - {model_id}")
|
| 648 |
+
|
| 649 |
+
except json.JSONDecodeError as e:
|
| 650 |
+
logger.error(f"❌ Invalid JSON in model_config.json: {e}")
|
| 651 |
+
except Exception as e:
|
| 652 |
+
logger.error(f"❌ Failed to initialize ModelLoader: {e}", exc_info=True)
|
| 653 |
+
|
| 654 |
+
def get(self, model_id: str) -> Optional[ModelClient]:
|
| 655 |
+
"""
|
| 656 |
+
🎯 Retrieves a configured ModelClient by ID.
|
| 657 |
+
|
| 658 |
+
Args:
|
| 659 |
+
model_id: Model identifier from config
|
| 660 |
+
|
| 661 |
+
Returns:
|
| 662 |
+
ModelClient instance or None if not found
|
| 663 |
+
"""
|
| 664 |
+
return self.models.get(model_id)
|
| 665 |
+
|
| 666 |
+
def list_models(self) -> List[str]:
|
| 667 |
+
"""📋 Returns list of all available model IDs."""
|
| 668 |
+
return list(self.models.keys())
|
| 669 |
+
|
| 670 |
+
def get_loaded_models(self) -> List[str]:
|
| 671 |
+
"""📋 Returns list of currently loaded model IDs."""
|
| 672 |
+
return [
|
| 673 |
+
model_id
|
| 674 |
+
for model_id, client in self.models.items()
|
| 675 |
+
if client.pipeline is not None
|
| 676 |
+
]
|
| 677 |
+
|
| 678 |
+
def unload_all(self) -> None:
|
| 679 |
+
"""
|
| 680 |
+
🗑️ Unloads all models to free memory.
|
| 681 |
+
Useful for Azure environments when switching workloads.
|
| 682 |
+
"""
|
| 683 |
+
logger.info("🗑️ Unloading all models...")
|
| 684 |
+
for model_client in self.models.values():
|
| 685 |
+
model_client.unload()
|
| 686 |
+
logger.info("✅ All models unloaded")
|
| 687 |
+
|
| 688 |
+
def get_status(self) -> Dict[str, Any]:
|
| 689 |
+
"""
|
| 690 |
+
📊 Returns comprehensive status of all models.
|
| 691 |
+
Useful for health checks and monitoring.
|
| 692 |
+
"""
|
| 693 |
+
status = {
|
| 694 |
+
"initialization_time": self._initialization_time.isoformat(),
|
| 695 |
+
"total_models": len(self.models),
|
| 696 |
+
"loaded_models": len(self.get_loaded_models()),
|
| 697 |
+
"device": get_optimal_device(),
|
| 698 |
+
"memory": get_memory_stats(),
|
| 699 |
+
"models": {}
|
| 700 |
+
}
|
| 701 |
+
|
| 702 |
+
for model_id, client in self.models.items():
|
| 703 |
+
status["models"][model_id] = client.get_metadata()
|
| 704 |
+
|
| 705 |
+
return status
|
| 706 |
+
|
| 707 |
+
|
| 708 |
+
# ============================================================
|
| 709 |
+
# PUBLIC INTERFACE (Used by all *_utils.py modules)
|
| 710 |
+
# ============================================================
|
| 711 |
+
|
| 712 |
+
def load_model_pipeline(agent_name: str) -> Callable[..., Dict[str, Any]]:
|
| 713 |
+
"""
|
| 714 |
+
🚀 Loads a model client and returns its inference function.
|
| 715 |
+
|
| 716 |
+
This is the main function used by other modules (translation_utils.py,
|
| 717 |
+
sentiment_utils.py, etc.) to access Penny's models.
|
| 718 |
+
|
| 719 |
+
Args:
|
| 720 |
+
agent_name: Model ID from model_config.json
|
| 721 |
+
|
| 722 |
+
Returns:
|
| 723 |
+
Callable inference function
|
| 724 |
+
|
| 725 |
+
Raises:
|
| 726 |
+
ValueError: If agent_name not found in configuration
|
| 727 |
+
|
| 728 |
+
Example:
|
| 729 |
+
>>> translator = load_model_pipeline("penny-translate-agent")
|
| 730 |
+
>>> result = translator("Hello world", target_lang="spa_Latn")
|
| 731 |
+
"""
|
| 732 |
+
loader = ModelLoader()
|
| 733 |
+
client = loader.get(agent_name)
|
| 734 |
+
|
| 735 |
+
if client is None:
|
| 736 |
+
available = loader.list_models()
|
| 737 |
+
raise ValueError(
|
| 738 |
+
f"Agent ID '{agent_name}' not found in model configuration. "
|
| 739 |
+
f"Available models: {available}"
|
| 740 |
+
)
|
| 741 |
+
|
| 742 |
+
# Load the pipeline (lazy loading)
|
| 743 |
+
client.load_pipeline()
|
| 744 |
+
|
| 745 |
+
# Return a callable wrapper
|
| 746 |
+
def inference_wrapper(input_data, **kwargs):
|
| 747 |
+
return client.predict(input_data, **kwargs)
|
| 748 |
+
|
| 749 |
+
return inference_wrapper
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
# === CONVENIENCE FUNCTIONS ===
|
| 753 |
+
|
| 754 |
+
def get_model_status() -> Dict[str, Any]:
|
| 755 |
+
"""
|
| 756 |
+
📊 Returns status of all configured models.
|
| 757 |
+
Useful for health checks and monitoring endpoints.
|
| 758 |
+
"""
|
| 759 |
+
loader = ModelLoader()
|
| 760 |
+
return loader.get_status()
|
| 761 |
+
|
| 762 |
+
|
| 763 |
+
def preload_models(model_ids: Optional[List[str]] = None) -> None:
|
| 764 |
+
"""
|
| 765 |
+
🚀 Preloads specified models during startup.
|
| 766 |
+
|
| 767 |
+
Args:
|
| 768 |
+
model_ids: List of model IDs to preload (None = all models)
|
| 769 |
+
"""
|
| 770 |
+
loader = ModelLoader()
|
| 771 |
+
|
| 772 |
+
if model_ids is None:
|
| 773 |
+
model_ids = loader.list_models()
|
| 774 |
+
|
| 775 |
+
logger.info(f"🚀 Preloading {len(model_ids)} models...")
|
| 776 |
+
|
| 777 |
+
for model_id in model_ids:
|
| 778 |
+
client = loader.get(model_id)
|
| 779 |
+
if client:
|
| 780 |
+
logger.info(f" Loading {model_id}...")
|
| 781 |
+
client.load_pipeline()
|
| 782 |
+
|
| 783 |
+
logger.info("✅ Model preloading complete")
|
| 784 |
+
|
| 785 |
+
|
| 786 |
+
def initialize_model_system() -> bool:
|
| 787 |
+
"""
|
| 788 |
+
🏁 Initializes the model system.
|
| 789 |
+
Should be called during app startup.
|
| 790 |
+
|
| 791 |
+
Returns:
|
| 792 |
+
True if initialization successful
|
| 793 |
+
"""
|
| 794 |
+
logger.info("🧠 Initializing Penny's model system...")
|
| 795 |
+
|
| 796 |
+
try:
|
| 797 |
+
# Initialize singleton
|
| 798 |
+
loader = ModelLoader()
|
| 799 |
+
|
| 800 |
+
# Log device info
|
| 801 |
+
device = get_optimal_device()
|
| 802 |
+
mem_stats = get_memory_stats()
|
| 803 |
+
|
| 804 |
+
logger.info(f"✅ Model system initialized")
|
| 805 |
+
logger.info(f"🎮 Compute device: {device}")
|
| 806 |
+
|
| 807 |
+
if "gpu_total_gb" in mem_stats:
|
| 808 |
+
logger.info(
|
| 809 |
+
f"💾 GPU Memory: {mem_stats['gpu_total_gb']:.1f}GB total"
|
| 810 |
+
)
|
| 811 |
+
|
| 812 |
+
logger.info(f"📦 {len(loader.models)} models configured")
|
| 813 |
+
|
| 814 |
+
# Optional: Preload critical models
|
| 815 |
+
# Uncomment to preload models at startup
|
| 816 |
+
# preload_models(["penny-core-agent"])
|
| 817 |
+
|
| 818 |
+
return True
|
| 819 |
+
|
| 820 |
+
except Exception as e:
|
| 821 |
+
logger.error(f"❌ Failed to initialize model system: {e}", exc_info=True)
|
| 822 |
+
return False
|
| 823 |
+
|
| 824 |
+
|
| 825 |
+
# ============================================================
|
| 826 |
+
# CLI TESTING & DEBUGGING
|
| 827 |
+
# ============================================================
|
| 828 |
+
|
| 829 |
+
if __name__ == "__main__":
|
| 830 |
+
"""
|
| 831 |
+
🧪 Test script for model loading and inference.
|
| 832 |
+
Run with: python -m app.model_loader
|
| 833 |
+
"""
|
| 834 |
+
print("=" * 60)
|
| 835 |
+
print("🧪 Testing Penny's Model System")
|
| 836 |
+
print("=" * 60)
|
| 837 |
+
|
| 838 |
+
# Initialize
|
| 839 |
+
loader = ModelLoader()
|
| 840 |
+
print(f"\n📋 Available models: {loader.list_models()}")
|
| 841 |
+
|
| 842 |
+
# Get status
|
| 843 |
+
status = get_model_status()
|
| 844 |
+
print(f"\n📊 System status:")
|
| 845 |
+
print(json.dumps(status, indent=2, default=str))
|
| 846 |
+
|
| 847 |
+
# Test model loading (if models configured)
|
| 848 |
+
if loader.models:
|
| 849 |
+
test_model_id = list(loader.models.keys())[0]
|
| 850 |
+
print(f"\n🧪 Testing model: {test_model_id}")
|
| 851 |
+
|
| 852 |
+
client = loader.get(test_model_id)
|
| 853 |
+
if client:
|
| 854 |
+
print(f" Loading pipeline...")
|
| 855 |
+
success = client.load_pipeline()
|
| 856 |
+
|
| 857 |
+
if success:
|
| 858 |
+
print(f" ✅ Model loaded successfully!")
|
| 859 |
+
print(f" Metadata: {json.dumps(client.get_metadata(), indent=2, default=str)}")
|
| 860 |
+
else:
|
| 861 |
+
print(f" ❌ Model loading failed")
|
orchestrator.py
ADDED
|
@@ -0,0 +1,1315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
🎭 PENNY Orchestrator - Request Routing & Coordination Engine
|
| 3 |
+
|
| 4 |
+
This is Penny's decision-making brain. She analyzes each request, determines
|
| 5 |
+
the best way to help, and coordinates between her specialized AI models and
|
| 6 |
+
civic data tools.
|
| 7 |
+
|
| 8 |
+
MISSION: Route every resident request to the right resource while maintaining
|
| 9 |
+
Penny's warm, helpful personality and ensuring fast, accurate responses.
|
| 10 |
+
|
| 11 |
+
FEATURES:
|
| 12 |
+
- Enhanced intent classification with confidence scoring
|
| 13 |
+
- Compound intent handling (weather + events)
|
| 14 |
+
- Graceful fallbacks when services are unavailable
|
| 15 |
+
- Performance tracking for all operations
|
| 16 |
+
- Context-aware responses
|
| 17 |
+
- Emergency routing with immediate escalation
|
| 18 |
+
|
| 19 |
+
ENHANCEMENTS (Phase 1):
|
| 20 |
+
- ✅ Structured logging with performance tracking
|
| 21 |
+
- ✅ Safe imports with availability flags
|
| 22 |
+
- ✅ Result format checking helper
|
| 23 |
+
- ✅ Enhanced error handling patterns
|
| 24 |
+
- ✅ Service availability tracking
|
| 25 |
+
- ✅ Fixed function signature mismatches
|
| 26 |
+
- ✅ Integration with enhanced modules
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
import logging
|
| 30 |
+
import time
|
| 31 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 32 |
+
from datetime import datetime
|
| 33 |
+
from dataclasses import dataclass, field
|
| 34 |
+
from enum import Enum
|
| 35 |
+
|
| 36 |
+
# --- ENHANCED MODULE IMPORTS ---
|
| 37 |
+
from app.intents import classify_intent_detailed, IntentType, IntentMatch
|
| 38 |
+
from app.location_utils import (
|
| 39 |
+
extract_location_detailed,
|
| 40 |
+
LocationMatch,
|
| 41 |
+
LocationStatus,
|
| 42 |
+
get_city_coordinates
|
| 43 |
+
)
|
| 44 |
+
from app.logging_utils import (
|
| 45 |
+
log_interaction,
|
| 46 |
+
sanitize_for_logging,
|
| 47 |
+
LogLevel
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
# --- AGENT IMPORTS (with availability tracking) ---
|
| 51 |
+
try:
|
| 52 |
+
from app.weather_agent import (
|
| 53 |
+
get_weather_for_location,
|
| 54 |
+
recommend_outfit,
|
| 55 |
+
weather_to_event_recommendations,
|
| 56 |
+
format_weather_summary
|
| 57 |
+
)
|
| 58 |
+
WEATHER_AGENT_AVAILABLE = True
|
| 59 |
+
except ImportError as e:
|
| 60 |
+
logger = logging.getLogger(__name__)
|
| 61 |
+
logger.warning(f"Weather agent not available: {e}")
|
| 62 |
+
WEATHER_AGENT_AVAILABLE = False
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
from app.event_weather import get_event_recommendations_with_weather
|
| 66 |
+
EVENT_WEATHER_AVAILABLE = True
|
| 67 |
+
except ImportError as e:
|
| 68 |
+
logger = logging.getLogger(__name__)
|
| 69 |
+
logger.warning(f"Event weather integration not available: {e}")
|
| 70 |
+
EVENT_WEATHER_AVAILABLE = False
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
from app.tool_agent import handle_tool_request
|
| 74 |
+
TOOL_AGENT_AVAILABLE = True
|
| 75 |
+
except ImportError as e:
|
| 76 |
+
logger = logging.getLogger(__name__)
|
| 77 |
+
logger.warning(f"Tool agent not available: {e}")
|
| 78 |
+
TOOL_AGENT_AVAILABLE = False
|
| 79 |
+
|
| 80 |
+
# --- MODEL IMPORTS (with availability tracking) ---
|
| 81 |
+
try:
|
| 82 |
+
from models.translation.translation_utils import translate_text
|
| 83 |
+
TRANSLATION_AVAILABLE = True
|
| 84 |
+
except ImportError as e:
|
| 85 |
+
logger = logging.getLogger(__name__)
|
| 86 |
+
logger.warning(f"Translation service not available: {e}")
|
| 87 |
+
TRANSLATION_AVAILABLE = False
|
| 88 |
+
|
| 89 |
+
try:
|
| 90 |
+
from models.sentiment.sentiment_utils import get_sentiment_analysis
|
| 91 |
+
SENTIMENT_AVAILABLE = True
|
| 92 |
+
except ImportError as e:
|
| 93 |
+
logger = logging.getLogger(__name__)
|
| 94 |
+
logger.warning(f"Sentiment service not available: {e}")
|
| 95 |
+
SENTIMENT_AVAILABLE = False
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
from models.bias.bias_utils import check_bias
|
| 99 |
+
BIAS_AVAILABLE = True
|
| 100 |
+
except ImportError as e:
|
| 101 |
+
logger = logging.getLogger(__name__)
|
| 102 |
+
logger.warning(f"Bias detection service not available: {e}")
|
| 103 |
+
BIAS_AVAILABLE = False
|
| 104 |
+
|
| 105 |
+
try:
|
| 106 |
+
from models.gemma.gemma_utils import generate_response
|
| 107 |
+
LLM_AVAILABLE = True
|
| 108 |
+
except ImportError as e:
|
| 109 |
+
logger = logging.getLogger(__name__)
|
| 110 |
+
logger.warning(f"LLM service not available: {e}")
|
| 111 |
+
LLM_AVAILABLE = False
|
| 112 |
+
|
| 113 |
+
# --- LOGGING SETUP ---
|
| 114 |
+
logger = logging.getLogger(__name__)
|
| 115 |
+
|
| 116 |
+
# --- CONFIGURATION ---
|
| 117 |
+
CORE_MODEL_ID = "penny-core-agent"
|
| 118 |
+
MAX_RESPONSE_TIME_MS = 5000 # 5 seconds - log if exceeded
|
| 119 |
+
|
| 120 |
+
# --- TRACKING COUNTERS ---
|
| 121 |
+
_orchestration_count = 0
|
| 122 |
+
_emergency_count = 0
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
# ============================================================
|
| 126 |
+
# COMPATIBILITY HELPER - Result Format Checking
|
| 127 |
+
# ============================================================
|
| 128 |
+
|
| 129 |
+
def _check_result_success(
|
| 130 |
+
result: Dict[str, Any],
|
| 131 |
+
expected_keys: List[str]
|
| 132 |
+
) -> Tuple[bool, Optional[str]]:
|
| 133 |
+
"""
|
| 134 |
+
✅ Check if a utility function result indicates success.
|
| 135 |
+
|
| 136 |
+
Handles multiple return format patterns:
|
| 137 |
+
- Explicit "success" key (preferred)
|
| 138 |
+
- Presence of expected data keys (implicit success)
|
| 139 |
+
- Presence of "error" key (explicit failure)
|
| 140 |
+
|
| 141 |
+
This helper fixes compatibility issues where different utility
|
| 142 |
+
functions return different result formats.
|
| 143 |
+
|
| 144 |
+
Args:
|
| 145 |
+
result: Dictionary returned from utility function
|
| 146 |
+
expected_keys: List of keys that indicate successful data
|
| 147 |
+
|
| 148 |
+
Returns:
|
| 149 |
+
Tuple of (is_success, error_message)
|
| 150 |
+
|
| 151 |
+
Example:
|
| 152 |
+
result = await translate_text(message, "en", "es")
|
| 153 |
+
success, error = _check_result_success(result, ["translated_text"])
|
| 154 |
+
if success:
|
| 155 |
+
text = result.get("translated_text")
|
| 156 |
+
"""
|
| 157 |
+
# Check for explicit success key
|
| 158 |
+
if "success" in result:
|
| 159 |
+
return result["success"], result.get("error")
|
| 160 |
+
|
| 161 |
+
# Check for explicit error (presence = failure)
|
| 162 |
+
if "error" in result and result["error"]:
|
| 163 |
+
return False, result["error"]
|
| 164 |
+
|
| 165 |
+
# Check for expected data keys (implicit success)
|
| 166 |
+
has_data = any(key in result for key in expected_keys)
|
| 167 |
+
if has_data:
|
| 168 |
+
return True, None
|
| 169 |
+
|
| 170 |
+
# Unknown format - assume failure
|
| 171 |
+
return False, "Unexpected response format"
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# ============================================================
|
| 175 |
+
# SERVICE AVAILABILITY CHECK
|
| 176 |
+
# ============================================================
|
| 177 |
+
|
| 178 |
+
def get_service_availability() -> Dict[str, bool]:
|
| 179 |
+
"""
|
| 180 |
+
📊 Returns which services are currently available.
|
| 181 |
+
|
| 182 |
+
Used for health checks, debugging, and deciding whether
|
| 183 |
+
to attempt service calls or use fallbacks.
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
Dictionary mapping service names to availability status
|
| 187 |
+
"""
|
| 188 |
+
return {
|
| 189 |
+
"translation": TRANSLATION_AVAILABLE,
|
| 190 |
+
"sentiment": SENTIMENT_AVAILABLE,
|
| 191 |
+
"bias_detection": BIAS_AVAILABLE,
|
| 192 |
+
"llm": LLM_AVAILABLE,
|
| 193 |
+
"tool_agent": TOOL_AGENT_AVAILABLE,
|
| 194 |
+
"weather": WEATHER_AGENT_AVAILABLE,
|
| 195 |
+
"event_weather": EVENT_WEATHER_AVAILABLE
|
| 196 |
+
}
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
# ============================================================
|
| 200 |
+
# ORCHESTRATION RESULT STRUCTURE
|
| 201 |
+
# ============================================================
|
| 202 |
+
|
| 203 |
+
@dataclass
|
| 204 |
+
class OrchestrationResult:
|
| 205 |
+
"""
|
| 206 |
+
📦 Structured result from orchestration pipeline.
|
| 207 |
+
|
| 208 |
+
This format is used throughout the system for consistency
|
| 209 |
+
and makes it easy to track what happened during request processing.
|
| 210 |
+
"""
|
| 211 |
+
intent: str # Detected intent
|
| 212 |
+
reply: str # User-facing response
|
| 213 |
+
success: bool # Whether request succeeded
|
| 214 |
+
tenant_id: Optional[str] = None # City/location identifier
|
| 215 |
+
data: Optional[Dict[str, Any]] = None # Raw data from services
|
| 216 |
+
model_id: Optional[str] = None # Which model/service was used
|
| 217 |
+
error: Optional[str] = None # Error message if failed
|
| 218 |
+
response_time_ms: Optional[float] = None
|
| 219 |
+
confidence: Optional[float] = None # Intent confidence score
|
| 220 |
+
fallback_used: bool = False # True if fallback logic triggered
|
| 221 |
+
|
| 222 |
+
def to_dict(self) -> Dict[str, Any]:
|
| 223 |
+
"""Converts to dictionary for API responses."""
|
| 224 |
+
return {
|
| 225 |
+
"intent": self.intent,
|
| 226 |
+
"reply": self.reply,
|
| 227 |
+
"success": self.success,
|
| 228 |
+
"tenant_id": self.tenant_id,
|
| 229 |
+
"data": self.data,
|
| 230 |
+
"model_id": self.model_id,
|
| 231 |
+
"error": self.error,
|
| 232 |
+
"response_time_ms": self.response_time_ms,
|
| 233 |
+
"confidence": self.confidence,
|
| 234 |
+
"fallback_used": self.fallback_used
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
# ============================================================
|
| 239 |
+
# MAIN ORCHESTRATOR FUNCTION (ENHANCED)
|
| 240 |
+
# ============================================================
|
| 241 |
+
|
| 242 |
+
async def run_orchestrator(
|
| 243 |
+
message: str,
|
| 244 |
+
context: Dict[str, Any] = None
|
| 245 |
+
) -> Dict[str, Any]:
|
| 246 |
+
"""
|
| 247 |
+
🧠 Main decision-making brain of Penny.
|
| 248 |
+
|
| 249 |
+
This function:
|
| 250 |
+
1. Analyzes the user's message to determine intent
|
| 251 |
+
2. Extracts location/city information
|
| 252 |
+
3. Routes to the appropriate specialized service
|
| 253 |
+
4. Handles errors gracefully with helpful fallbacks
|
| 254 |
+
5. Tracks performance and logs the interaction
|
| 255 |
+
|
| 256 |
+
Args:
|
| 257 |
+
message: User's input text
|
| 258 |
+
context: Additional context (tenant_id, lat, lon, session_id, etc.)
|
| 259 |
+
|
| 260 |
+
Returns:
|
| 261 |
+
Dictionary with response and metadata
|
| 262 |
+
|
| 263 |
+
Example:
|
| 264 |
+
result = await run_orchestrator(
|
| 265 |
+
message="What's the weather in Atlanta?",
|
| 266 |
+
context={"lat": 33.7490, "lon": -84.3880}
|
| 267 |
+
)
|
| 268 |
+
"""
|
| 269 |
+
global _orchestration_count
|
| 270 |
+
_orchestration_count += 1
|
| 271 |
+
|
| 272 |
+
start_time = time.time()
|
| 273 |
+
|
| 274 |
+
# Initialize context if not provided
|
| 275 |
+
if context is None:
|
| 276 |
+
context = {}
|
| 277 |
+
|
| 278 |
+
# Sanitize message for logging (PII protection)
|
| 279 |
+
safe_message = sanitize_for_logging(message)
|
| 280 |
+
logger.info(f"🎭 Orchestrator processing: '{safe_message[:50]}...'")
|
| 281 |
+
|
| 282 |
+
try:
|
| 283 |
+
# === STEP 1: CLASSIFY INTENT (Enhanced) ===
|
| 284 |
+
intent_result = classify_intent_detailed(message)
|
| 285 |
+
intent = intent_result.intent
|
| 286 |
+
confidence = intent_result.confidence
|
| 287 |
+
|
| 288 |
+
logger.info(
|
| 289 |
+
f"Intent detected: {intent.value} "
|
| 290 |
+
f"(confidence: {confidence:.2f})"
|
| 291 |
+
)
|
| 292 |
+
|
| 293 |
+
# === STEP 2: EXTRACT LOCATION ===
|
| 294 |
+
tenant_id = context.get("tenant_id")
|
| 295 |
+
lat = context.get("lat")
|
| 296 |
+
lon = context.get("lon")
|
| 297 |
+
|
| 298 |
+
# If tenant_id not provided, try to extract from message
|
| 299 |
+
if not tenant_id or tenant_id == "unknown":
|
| 300 |
+
location_result = extract_location_detailed(message)
|
| 301 |
+
|
| 302 |
+
if location_result.status == LocationStatus.FOUND:
|
| 303 |
+
tenant_id = location_result.tenant_id
|
| 304 |
+
logger.info(f"Location extracted: {tenant_id}")
|
| 305 |
+
|
| 306 |
+
# Get coordinates for this tenant if available
|
| 307 |
+
coords = get_city_coordinates(tenant_id)
|
| 308 |
+
if coords and lat is None and lon is None:
|
| 309 |
+
lat, lon = coords["lat"], coords["lon"]
|
| 310 |
+
logger.info(f"Coordinates loaded: {lat}, {lon}")
|
| 311 |
+
|
| 312 |
+
elif location_result.status == LocationStatus.USER_LOCATION_NEEDED:
|
| 313 |
+
logger.info("User location services needed")
|
| 314 |
+
else:
|
| 315 |
+
logger.info(f"No location detected: {location_result.status}")
|
| 316 |
+
|
| 317 |
+
# === STEP 3: HANDLE EMERGENCY INTENTS (CRITICAL) ===
|
| 318 |
+
if intent == IntentType.EMERGENCY:
|
| 319 |
+
return await _handle_emergency(
|
| 320 |
+
message=message,
|
| 321 |
+
context=context,
|
| 322 |
+
start_time=start_time
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
# === STEP 4: ROUTE TO APPROPRIATE HANDLER ===
|
| 326 |
+
|
| 327 |
+
# Translation
|
| 328 |
+
if intent == IntentType.TRANSLATION:
|
| 329 |
+
result = await _handle_translation(message, context)
|
| 330 |
+
|
| 331 |
+
# Sentiment Analysis
|
| 332 |
+
elif intent == IntentType.SENTIMENT_ANALYSIS:
|
| 333 |
+
result = await _handle_sentiment(message, context)
|
| 334 |
+
|
| 335 |
+
# Bias Detection
|
| 336 |
+
elif intent == IntentType.BIAS_DETECTION:
|
| 337 |
+
result = await _handle_bias(message, context)
|
| 338 |
+
|
| 339 |
+
# Document Processing
|
| 340 |
+
elif intent == IntentType.DOCUMENT_PROCESSING:
|
| 341 |
+
result = await _handle_document(message, context)
|
| 342 |
+
|
| 343 |
+
# Weather (includes compound weather+events handling)
|
| 344 |
+
elif intent == IntentType.WEATHER:
|
| 345 |
+
result = await _handle_weather(
|
| 346 |
+
message=message,
|
| 347 |
+
context=context,
|
| 348 |
+
tenant_id=tenant_id,
|
| 349 |
+
lat=lat,
|
| 350 |
+
lon=lon,
|
| 351 |
+
intent_result=intent_result
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
# Events
|
| 355 |
+
elif intent == IntentType.EVENTS:
|
| 356 |
+
result = await _handle_events(
|
| 357 |
+
message=message,
|
| 358 |
+
context=context,
|
| 359 |
+
tenant_id=tenant_id,
|
| 360 |
+
lat=lat,
|
| 361 |
+
lon=lon,
|
| 362 |
+
intent_result=intent_result
|
| 363 |
+
)
|
| 364 |
+
|
| 365 |
+
# Local Resources
|
| 366 |
+
elif intent == IntentType.LOCAL_RESOURCES:
|
| 367 |
+
result = await _handle_local_resources(
|
| 368 |
+
message=message,
|
| 369 |
+
context=context,
|
| 370 |
+
tenant_id=tenant_id,
|
| 371 |
+
lat=lat,
|
| 372 |
+
lon=lon
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
# Greeting, Help, Unknown
|
| 376 |
+
elif intent in [IntentType.GREETING, IntentType.HELP, IntentType.UNKNOWN]:
|
| 377 |
+
result = await _handle_conversational(
|
| 378 |
+
message=message,
|
| 379 |
+
intent=intent,
|
| 380 |
+
context=context
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
else:
|
| 384 |
+
# Unhandled intent type (shouldn't happen, but safety net)
|
| 385 |
+
result = await _handle_fallback(message, intent, context)
|
| 386 |
+
|
| 387 |
+
# === STEP 5: ADD METADATA & LOG INTERACTION ===
|
| 388 |
+
response_time = (time.time() - start_time) * 1000
|
| 389 |
+
result.response_time_ms = round(response_time, 2)
|
| 390 |
+
result.confidence = confidence
|
| 391 |
+
result.tenant_id = tenant_id
|
| 392 |
+
|
| 393 |
+
# Log the interaction with structured logging
|
| 394 |
+
log_interaction(
|
| 395 |
+
tenant_id=tenant_id or "unknown",
|
| 396 |
+
interaction_type="orchestration",
|
| 397 |
+
intent=intent.value,
|
| 398 |
+
response_time_ms=response_time,
|
| 399 |
+
success=result.success,
|
| 400 |
+
metadata={
|
| 401 |
+
"confidence": confidence,
|
| 402 |
+
"fallback_used": result.fallback_used,
|
| 403 |
+
"model_id": result.model_id,
|
| 404 |
+
"orchestration_count": _orchestration_count
|
| 405 |
+
}
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
# Log slow responses
|
| 409 |
+
if response_time > MAX_RESPONSE_TIME_MS:
|
| 410 |
+
logger.warning(
|
| 411 |
+
f"⚠️ Slow response: {response_time:.0f}ms "
|
| 412 |
+
f"(intent: {intent.value})"
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
logger.info(
|
| 416 |
+
f"✅ Orchestration complete: {intent.value} "
|
| 417 |
+
f"({response_time:.0f}ms)"
|
| 418 |
+
)
|
| 419 |
+
|
| 420 |
+
return result.to_dict()
|
| 421 |
+
|
| 422 |
+
except Exception as e:
|
| 423 |
+
# === CATASTROPHIC FAILURE HANDLER ===
|
| 424 |
+
response_time = (time.time() - start_time) * 1000
|
| 425 |
+
logger.error(
|
| 426 |
+
f"❌ Orchestrator error: {e} "
|
| 427 |
+
f"(response_time: {response_time:.0f}ms)",
|
| 428 |
+
exc_info=True
|
| 429 |
+
)
|
| 430 |
+
|
| 431 |
+
# Log failed interaction
|
| 432 |
+
log_interaction(
|
| 433 |
+
tenant_id=context.get("tenant_id", "unknown"),
|
| 434 |
+
interaction_type="orchestration_error",
|
| 435 |
+
intent="error",
|
| 436 |
+
response_time_ms=response_time,
|
| 437 |
+
success=False,
|
| 438 |
+
metadata={
|
| 439 |
+
"error": str(e),
|
| 440 |
+
"error_type": type(e).__name__
|
| 441 |
+
}
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
error_result = OrchestrationResult(
|
| 445 |
+
intent="error",
|
| 446 |
+
reply=(
|
| 447 |
+
"I'm having trouble processing your request right now. "
|
| 448 |
+
"Please try again in a moment, or let me know if you need "
|
| 449 |
+
"immediate assistance! 💛"
|
| 450 |
+
),
|
| 451 |
+
success=False,
|
| 452 |
+
error=str(e),
|
| 453 |
+
model_id="orchestrator",
|
| 454 |
+
fallback_used=True,
|
| 455 |
+
response_time_ms=round(response_time, 2)
|
| 456 |
+
)
|
| 457 |
+
|
| 458 |
+
return error_result.to_dict()
|
| 459 |
+
|
| 460 |
+
|
| 461 |
+
# ============================================================
|
| 462 |
+
# SPECIALIZED INTENT HANDLERS (ENHANCED)
|
| 463 |
+
# ============================================================
|
| 464 |
+
|
| 465 |
+
async def _handle_emergency(
|
| 466 |
+
message: str,
|
| 467 |
+
context: Dict[str, Any],
|
| 468 |
+
start_time: float
|
| 469 |
+
) -> OrchestrationResult:
|
| 470 |
+
"""
|
| 471 |
+
🚨 CRITICAL: Emergency intent handler.
|
| 472 |
+
|
| 473 |
+
This function handles crisis situations with immediate routing
|
| 474 |
+
to appropriate services. All emergency interactions are logged
|
| 475 |
+
for compliance and safety tracking.
|
| 476 |
+
|
| 477 |
+
IMPORTANT: This is a compliance-critical function. All emergency
|
| 478 |
+
interactions must be logged and handled with priority.
|
| 479 |
+
"""
|
| 480 |
+
global _emergency_count
|
| 481 |
+
_emergency_count += 1
|
| 482 |
+
|
| 483 |
+
# Sanitize message for logging (but keep full context for safety review)
|
| 484 |
+
safe_message = sanitize_for_logging(message)
|
| 485 |
+
logger.warning(f"🚨 EMERGENCY INTENT DETECTED (#{_emergency_count}): {safe_message[:100]}")
|
| 486 |
+
|
| 487 |
+
# TODO: Integrate with safety_utils.py when enhanced
|
| 488 |
+
# from app.safety_utils import route_emergency
|
| 489 |
+
# result = await route_emergency(message, context)
|
| 490 |
+
|
| 491 |
+
# For now, provide crisis resources
|
| 492 |
+
reply = (
|
| 493 |
+
"🚨 **If this is a life-threatening emergency, please call 911 immediately.**\n\n"
|
| 494 |
+
"For crisis support:\n"
|
| 495 |
+
"- **National Suicide Prevention Lifeline:** 988\n"
|
| 496 |
+
"- **Crisis Text Line:** Text HOME to 741741\n"
|
| 497 |
+
"- **National Domestic Violence Hotline:** 1-800-799-7233\n\n"
|
| 498 |
+
"I'm here to help connect you with local resources. "
|
| 499 |
+
"What kind of support do you need right now?"
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
# Log emergency interaction for compliance (CRITICAL)
|
| 503 |
+
response_time = (time.time() - start_time) * 1000
|
| 504 |
+
log_interaction(
|
| 505 |
+
tenant_id=context.get("tenant_id", "emergency"),
|
| 506 |
+
interaction_type="emergency",
|
| 507 |
+
intent=IntentType.EMERGENCY.value,
|
| 508 |
+
response_time_ms=response_time,
|
| 509 |
+
success=True,
|
| 510 |
+
metadata={
|
| 511 |
+
"emergency_number": _emergency_count,
|
| 512 |
+
"message_length": len(message),
|
| 513 |
+
"timestamp": datetime.now().isoformat(),
|
| 514 |
+
"action": "crisis_resources_provided"
|
| 515 |
+
}
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
logger.critical(
|
| 519 |
+
f"EMERGENCY LOG #{_emergency_count}: Resources provided "
|
| 520 |
+
f"({response_time:.0f}ms)"
|
| 521 |
+
)
|
| 522 |
+
|
| 523 |
+
return OrchestrationResult(
|
| 524 |
+
intent=IntentType.EMERGENCY.value,
|
| 525 |
+
reply=reply,
|
| 526 |
+
success=True,
|
| 527 |
+
model_id="emergency_router",
|
| 528 |
+
data={"crisis_resources_provided": True},
|
| 529 |
+
response_time_ms=round(response_time, 2)
|
| 530 |
+
)
|
| 531 |
+
|
| 532 |
+
|
| 533 |
+
async def _handle_translation(
|
| 534 |
+
message: str,
|
| 535 |
+
context: Dict[str, Any]
|
| 536 |
+
) -> OrchestrationResult:
|
| 537 |
+
"""
|
| 538 |
+
🌍 Translation handler - 27 languages supported.
|
| 539 |
+
|
| 540 |
+
Handles translation requests with graceful fallback if service
|
| 541 |
+
is unavailable.
|
| 542 |
+
"""
|
| 543 |
+
logger.info("🌍 Processing translation request")
|
| 544 |
+
|
| 545 |
+
# Check service availability first
|
| 546 |
+
if not TRANSLATION_AVAILABLE:
|
| 547 |
+
logger.warning("Translation service not available")
|
| 548 |
+
return OrchestrationResult(
|
| 549 |
+
intent=IntentType.TRANSLATION.value,
|
| 550 |
+
reply="Translation isn't available right now. Try again soon! 🌍",
|
| 551 |
+
success=False,
|
| 552 |
+
error="Service not loaded",
|
| 553 |
+
fallback_used=True
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
try:
|
| 557 |
+
# Extract language parameters from context
|
| 558 |
+
source_lang = context.get("source_lang", "eng_Latn")
|
| 559 |
+
target_lang = context.get("target_lang", "spa_Latn")
|
| 560 |
+
|
| 561 |
+
# TODO: Parse languages from message when enhanced
|
| 562 |
+
# Example: "Translate 'hello' to Spanish"
|
| 563 |
+
|
| 564 |
+
result = await translate_text(message, source_lang, target_lang)
|
| 565 |
+
|
| 566 |
+
# Use compatibility helper to check result
|
| 567 |
+
success, error = _check_result_success(result, ["translated_text"])
|
| 568 |
+
|
| 569 |
+
if success:
|
| 570 |
+
translated = result.get("translated_text", "")
|
| 571 |
+
reply = (
|
| 572 |
+
f"Here's the translation:\n\n"
|
| 573 |
+
f"**{translated}**\n\n"
|
| 574 |
+
f"(Translated from {source_lang} to {target_lang})"
|
| 575 |
+
)
|
| 576 |
+
|
| 577 |
+
return OrchestrationResult(
|
| 578 |
+
intent=IntentType.TRANSLATION.value,
|
| 579 |
+
reply=reply,
|
| 580 |
+
success=True,
|
| 581 |
+
data=result,
|
| 582 |
+
model_id="penny-translate-agent"
|
| 583 |
+
)
|
| 584 |
+
else:
|
| 585 |
+
raise Exception(error or "Translation failed")
|
| 586 |
+
|
| 587 |
+
except Exception as e:
|
| 588 |
+
logger.error(f"Translation error: {e}", exc_info=True)
|
| 589 |
+
return OrchestrationResult(
|
| 590 |
+
intent=IntentType.TRANSLATION.value,
|
| 591 |
+
reply=(
|
| 592 |
+
"I had trouble translating that. Could you rephrase? 💬"
|
| 593 |
+
),
|
| 594 |
+
success=False,
|
| 595 |
+
error=str(e),
|
| 596 |
+
fallback_used=True
|
| 597 |
+
)
|
| 598 |
+
|
| 599 |
+
|
| 600 |
+
async def _handle_sentiment(
|
| 601 |
+
message: str,
|
| 602 |
+
context: Dict[str, Any]
|
| 603 |
+
) -> OrchestrationResult:
|
| 604 |
+
"""
|
| 605 |
+
😊 Sentiment analysis handler.
|
| 606 |
+
|
| 607 |
+
Analyzes the emotional tone of text with graceful fallback
|
| 608 |
+
if service is unavailable.
|
| 609 |
+
"""
|
| 610 |
+
logger.info("😊 Processing sentiment analysis")
|
| 611 |
+
|
| 612 |
+
# Check service availability first
|
| 613 |
+
if not SENTIMENT_AVAILABLE:
|
| 614 |
+
logger.warning("Sentiment service not available")
|
| 615 |
+
return OrchestrationResult(
|
| 616 |
+
intent=IntentType.SENTIMENT_ANALYSIS.value,
|
| 617 |
+
reply="Sentiment analysis isn't available right now. Try again soon! 😊",
|
| 618 |
+
success=False,
|
| 619 |
+
error="Service not loaded",
|
| 620 |
+
fallback_used=True
|
| 621 |
+
)
|
| 622 |
+
|
| 623 |
+
try:
|
| 624 |
+
result = await get_sentiment_analysis(message)
|
| 625 |
+
|
| 626 |
+
# Use compatibility helper to check result
|
| 627 |
+
success, error = _check_result_success(result, ["label", "score"])
|
| 628 |
+
|
| 629 |
+
if success:
|
| 630 |
+
sentiment = result.get("label", "neutral")
|
| 631 |
+
confidence = result.get("score", 0.0)
|
| 632 |
+
|
| 633 |
+
reply = (
|
| 634 |
+
f"The overall sentiment detected is: **{sentiment}**\n"
|
| 635 |
+
f"Confidence: {confidence:.1%}"
|
| 636 |
+
)
|
| 637 |
+
|
| 638 |
+
return OrchestrationResult(
|
| 639 |
+
intent=IntentType.SENTIMENT_ANALYSIS.value,
|
| 640 |
+
reply=reply,
|
| 641 |
+
success=True,
|
| 642 |
+
data=result,
|
| 643 |
+
model_id="penny-sentiment-agent"
|
| 644 |
+
)
|
| 645 |
+
else:
|
| 646 |
+
raise Exception(error or "Sentiment analysis failed")
|
| 647 |
+
|
| 648 |
+
except Exception as e:
|
| 649 |
+
logger.error(f"Sentiment analysis error: {e}", exc_info=True)
|
| 650 |
+
return OrchestrationResult(
|
| 651 |
+
intent=IntentType.SENTIMENT_ANALYSIS.value,
|
| 652 |
+
reply="I couldn't analyze the sentiment right now. Try again? 😊",
|
| 653 |
+
success=False,
|
| 654 |
+
error=str(e),
|
| 655 |
+
fallback_used=True
|
| 656 |
+
)
|
| 657 |
+
|
| 658 |
+
async def _handle_bias(
|
| 659 |
+
message: str,
|
| 660 |
+
context: Dict[str, Any]
|
| 661 |
+
) -> OrchestrationResult:
|
| 662 |
+
"""
|
| 663 |
+
⚖️ Bias detection handler.
|
| 664 |
+
|
| 665 |
+
Analyzes text for potential bias patterns with graceful fallback
|
| 666 |
+
if service is unavailable.
|
| 667 |
+
"""
|
| 668 |
+
logger.info("⚖️ Processing bias detection")
|
| 669 |
+
|
| 670 |
+
# Check service availability first
|
| 671 |
+
if not BIAS_AVAILABLE:
|
| 672 |
+
logger.warning("Bias detection service not available")
|
| 673 |
+
return OrchestrationResult(
|
| 674 |
+
intent=IntentType.BIAS_DETECTION.value,
|
| 675 |
+
reply="Bias detection isn't available right now. Try again soon! ⚖️",
|
| 676 |
+
success=False,
|
| 677 |
+
error="Service not loaded",
|
| 678 |
+
fallback_used=True
|
| 679 |
+
)
|
| 680 |
+
|
| 681 |
+
try:
|
| 682 |
+
result = await check_bias(message)
|
| 683 |
+
|
| 684 |
+
# Use compatibility helper to check result
|
| 685 |
+
success, error = _check_result_success(result, ["analysis"])
|
| 686 |
+
|
| 687 |
+
if success:
|
| 688 |
+
analysis = result.get("analysis", [])
|
| 689 |
+
|
| 690 |
+
if analysis:
|
| 691 |
+
top_result = analysis[0]
|
| 692 |
+
label = top_result.get("label", "unknown")
|
| 693 |
+
score = top_result.get("score", 0.0)
|
| 694 |
+
|
| 695 |
+
reply = (
|
| 696 |
+
f"Bias analysis complete:\n\n"
|
| 697 |
+
f"**Most likely category:** {label}\n"
|
| 698 |
+
f"**Confidence:** {score:.1%}"
|
| 699 |
+
)
|
| 700 |
+
else:
|
| 701 |
+
reply = "The text appears relatively neutral. ⚖️"
|
| 702 |
+
|
| 703 |
+
return OrchestrationResult(
|
| 704 |
+
intent=IntentType.BIAS_DETECTION.value,
|
| 705 |
+
reply=reply,
|
| 706 |
+
success=True,
|
| 707 |
+
data=result,
|
| 708 |
+
model_id="penny-bias-checker"
|
| 709 |
+
)
|
| 710 |
+
else:
|
| 711 |
+
raise Exception(error or "Bias detection failed")
|
| 712 |
+
|
| 713 |
+
except Exception as e:
|
| 714 |
+
logger.error(f"Bias detection error: {e}", exc_info=True)
|
| 715 |
+
return OrchestrationResult(
|
| 716 |
+
intent=IntentType.BIAS_DETECTION.value,
|
| 717 |
+
reply="I couldn't check for bias right now. Try again? ⚖️",
|
| 718 |
+
success=False,
|
| 719 |
+
error=str(e),
|
| 720 |
+
fallback_used=True
|
| 721 |
+
)
|
| 722 |
+
|
| 723 |
+
|
| 724 |
+
async def _handle_document(
|
| 725 |
+
message: str,
|
| 726 |
+
context: Dict[str, Any]
|
| 727 |
+
) -> OrchestrationResult:
|
| 728 |
+
"""
|
| 729 |
+
📄 Document processing handler.
|
| 730 |
+
|
| 731 |
+
Note: Actual file upload happens in router.py via FastAPI.
|
| 732 |
+
This handler just provides instructions.
|
| 733 |
+
"""
|
| 734 |
+
logger.info("📄 Document processing requested")
|
| 735 |
+
|
| 736 |
+
reply = (
|
| 737 |
+
"I can help you process documents! 📄\n\n"
|
| 738 |
+
"Please upload your document (PDF or image) using the "
|
| 739 |
+
"`/upload-document` endpoint. I can extract text, analyze forms, "
|
| 740 |
+
"and help you understand civic documents.\n\n"
|
| 741 |
+
"What kind of document do you need help with?"
|
| 742 |
+
)
|
| 743 |
+
|
| 744 |
+
return OrchestrationResult(
|
| 745 |
+
intent=IntentType.DOCUMENT_PROCESSING.value,
|
| 746 |
+
reply=reply,
|
| 747 |
+
success=True,
|
| 748 |
+
model_id="document_router"
|
| 749 |
+
)
|
| 750 |
+
|
| 751 |
+
|
| 752 |
+
async def _handle_weather(
|
| 753 |
+
message: str,
|
| 754 |
+
context: Dict[str, Any],
|
| 755 |
+
tenant_id: Optional[str],
|
| 756 |
+
lat: Optional[float],
|
| 757 |
+
lon: Optional[float],
|
| 758 |
+
intent_result: IntentMatch
|
| 759 |
+
) -> OrchestrationResult:
|
| 760 |
+
"""
|
| 761 |
+
🌤️ Weather handler with compound intent support.
|
| 762 |
+
|
| 763 |
+
Handles both simple weather queries and compound weather+events queries.
|
| 764 |
+
Uses enhanced weather_agent.py with caching and performance tracking.
|
| 765 |
+
"""
|
| 766 |
+
logger.info("🌤️ Processing weather request")
|
| 767 |
+
|
| 768 |
+
# Check service availability first
|
| 769 |
+
if not WEATHER_AGENT_AVAILABLE:
|
| 770 |
+
logger.warning("Weather agent not available")
|
| 771 |
+
return OrchestrationResult(
|
| 772 |
+
intent=IntentType.WEATHER.value,
|
| 773 |
+
reply="Weather service isn't available right now. Try again soon! 🌤️",
|
| 774 |
+
success=False,
|
| 775 |
+
error="Weather agent not loaded",
|
| 776 |
+
fallback_used=True
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
# Check for compound intent (weather + events)
|
| 780 |
+
is_compound = intent_result.is_compound or IntentType.EVENTS in intent_result.secondary_intents
|
| 781 |
+
|
| 782 |
+
# Validate location
|
| 783 |
+
if lat is None or lon is None:
|
| 784 |
+
# Try to get coordinates from tenant_id
|
| 785 |
+
if tenant_id:
|
| 786 |
+
coords = get_city_coordinates(tenant_id)
|
| 787 |
+
if coords and lat is None and lon is None:
|
| 788 |
+
lat, lon = coords["lat"], coords["lon"]
|
| 789 |
+
logger.info(f"Using city coordinates for {tenant_id}: {lat}, {lon}")
|
| 790 |
+
|
| 791 |
+
if lat is None or lon is None:
|
| 792 |
+
return OrchestrationResult(
|
| 793 |
+
intent=IntentType.WEATHER.value,
|
| 794 |
+
reply=(
|
| 795 |
+
"I need to know your location to check the weather! 📍 "
|
| 796 |
+
"You can tell me your city, or share your location."
|
| 797 |
+
),
|
| 798 |
+
success=False,
|
| 799 |
+
error="Location required"
|
| 800 |
+
)
|
| 801 |
+
|
| 802 |
+
try:
|
| 803 |
+
# Use combined weather + events if compound intent detected
|
| 804 |
+
if is_compound and tenant_id and EVENT_WEATHER_AVAILABLE:
|
| 805 |
+
logger.info("Using weather+events combined handler")
|
| 806 |
+
result = await get_event_recommendations_with_weather(tenant_id, lat, lon)
|
| 807 |
+
|
| 808 |
+
# Build response
|
| 809 |
+
weather = result.get("weather", {})
|
| 810 |
+
weather_summary = result.get("weather_summary", "Weather unavailable")
|
| 811 |
+
suggestions = result.get("suggestions", [])
|
| 812 |
+
|
| 813 |
+
reply_lines = [f"🌤️ **Weather Update:**\n{weather_summary}\n"]
|
| 814 |
+
|
| 815 |
+
if suggestions:
|
| 816 |
+
reply_lines.append("\n📅 **Event Suggestions Based on Weather:**")
|
| 817 |
+
for suggestion in suggestions[:5]: # Top 5 suggestions
|
| 818 |
+
reply_lines.append(f"• {suggestion}")
|
| 819 |
+
|
| 820 |
+
reply = "\n".join(reply_lines)
|
| 821 |
+
|
| 822 |
+
return OrchestrationResult(
|
| 823 |
+
intent=IntentType.WEATHER.value,
|
| 824 |
+
reply=reply,
|
| 825 |
+
success=True,
|
| 826 |
+
data=result,
|
| 827 |
+
model_id="weather_events_combined"
|
| 828 |
+
)
|
| 829 |
+
|
| 830 |
+
else:
|
| 831 |
+
# Simple weather query using enhanced weather_agent
|
| 832 |
+
weather = await get_weather_for_location(lat, lon)
|
| 833 |
+
|
| 834 |
+
# Use enhanced weather_agent's format_weather_summary
|
| 835 |
+
if format_weather_summary:
|
| 836 |
+
weather_text = format_weather_summary(weather)
|
| 837 |
+
else:
|
| 838 |
+
# Fallback formatting
|
| 839 |
+
temp = weather.get("temperature", {}).get("value")
|
| 840 |
+
phrase = weather.get("phrase", "Conditions unavailable")
|
| 841 |
+
if temp:
|
| 842 |
+
weather_text = f"{phrase}, {int(temp)}°F"
|
| 843 |
+
else:
|
| 844 |
+
weather_text = phrase
|
| 845 |
+
|
| 846 |
+
# Get outfit recommendation from enhanced weather_agent
|
| 847 |
+
if recommend_outfit:
|
| 848 |
+
temp = weather.get("temperature", {}).get("value", 70)
|
| 849 |
+
condition = weather.get("phrase", "Clear")
|
| 850 |
+
outfit = recommend_outfit(temp, condition)
|
| 851 |
+
reply = f"🌤️ {weather_text}\n\n👕 {outfit}"
|
| 852 |
+
else:
|
| 853 |
+
reply = f"🌤️ {weather_text}"
|
| 854 |
+
|
| 855 |
+
return OrchestrationResult(
|
| 856 |
+
intent=IntentType.WEATHER.value,
|
| 857 |
+
reply=reply,
|
| 858 |
+
success=True,
|
| 859 |
+
data=weather,
|
| 860 |
+
model_id="azure-maps-weather"
|
| 861 |
+
)
|
| 862 |
+
|
| 863 |
+
except Exception as e:
|
| 864 |
+
logger.error(f"Weather error: {e}", exc_info=True)
|
| 865 |
+
return OrchestrationResult(
|
| 866 |
+
intent=IntentType.WEATHER.value,
|
| 867 |
+
reply=(
|
| 868 |
+
"I'm having trouble getting weather data right now. "
|
| 869 |
+
"Can I help you with something else? 💛"
|
| 870 |
+
),
|
| 871 |
+
success=False,
|
| 872 |
+
error=str(e),
|
| 873 |
+
fallback_used=True
|
| 874 |
+
)
|
| 875 |
+
|
| 876 |
+
|
| 877 |
+
async def _handle_events(
|
| 878 |
+
message: str,
|
| 879 |
+
context: Dict[str, Any],
|
| 880 |
+
tenant_id: Optional[str],
|
| 881 |
+
lat: Optional[float],
|
| 882 |
+
lon: Optional[float],
|
| 883 |
+
intent_result: IntentMatch
|
| 884 |
+
) -> OrchestrationResult:
|
| 885 |
+
"""
|
| 886 |
+
📅 Events handler.
|
| 887 |
+
|
| 888 |
+
Routes event queries to tool_agent with proper error handling
|
| 889 |
+
and graceful degradation.
|
| 890 |
+
"""
|
| 891 |
+
logger.info("📅 Processing events request")
|
| 892 |
+
|
| 893 |
+
if not tenant_id:
|
| 894 |
+
return OrchestrationResult(
|
| 895 |
+
intent=IntentType.EVENTS.value,
|
| 896 |
+
reply=(
|
| 897 |
+
"I'd love to help you find events! 📅 "
|
| 898 |
+
"Which city are you interested in? "
|
| 899 |
+
"I have information for Atlanta, Birmingham, Chesterfield, "
|
| 900 |
+
"El Paso, Providence, and Seattle."
|
| 901 |
+
),
|
| 902 |
+
success=False,
|
| 903 |
+
error="City required"
|
| 904 |
+
)
|
| 905 |
+
|
| 906 |
+
# Check tool agent availability
|
| 907 |
+
if not TOOL_AGENT_AVAILABLE:
|
| 908 |
+
logger.warning("Tool agent not available")
|
| 909 |
+
return OrchestrationResult(
|
| 910 |
+
intent=IntentType.EVENTS.value,
|
| 911 |
+
reply=(
|
| 912 |
+
"Event information isn't available right now. "
|
| 913 |
+
"Try again soon! 📅"
|
| 914 |
+
),
|
| 915 |
+
success=False,
|
| 916 |
+
error="Tool agent not loaded",
|
| 917 |
+
fallback_used=True
|
| 918 |
+
)
|
| 919 |
+
|
| 920 |
+
try:
|
| 921 |
+
# FIXED: Add role parameter (compatibility fix)
|
| 922 |
+
tool_response = await handle_tool_request(
|
| 923 |
+
user_input=message,
|
| 924 |
+
role=context.get("role", "resident"), # ← ADDED
|
| 925 |
+
lat=lat,
|
| 926 |
+
lon=lon
|
| 927 |
+
)
|
| 928 |
+
|
| 929 |
+
reply = tool_response.get("response", "Events information retrieved.")
|
| 930 |
+
|
| 931 |
+
return OrchestrationResult(
|
| 932 |
+
intent=IntentType.EVENTS.value,
|
| 933 |
+
reply=reply,
|
| 934 |
+
success=True,
|
| 935 |
+
data=tool_response,
|
| 936 |
+
model_id="events_tool"
|
| 937 |
+
)
|
| 938 |
+
|
| 939 |
+
except Exception as e:
|
| 940 |
+
logger.error(f"Events error: {e}", exc_info=True)
|
| 941 |
+
return OrchestrationResult(
|
| 942 |
+
intent=IntentType.EVENTS.value,
|
| 943 |
+
reply=(
|
| 944 |
+
"I'm having trouble loading event information right now. "
|
| 945 |
+
"Check back soon! 📅"
|
| 946 |
+
),
|
| 947 |
+
success=False,
|
| 948 |
+
error=str(e),
|
| 949 |
+
fallback_used=True
|
| 950 |
+
)
|
| 951 |
+
|
| 952 |
+
async def _handle_local_resources(
|
| 953 |
+
message: str,
|
| 954 |
+
context: Dict[str, Any],
|
| 955 |
+
tenant_id: Optional[str],
|
| 956 |
+
lat: Optional[float],
|
| 957 |
+
lon: Optional[float]
|
| 958 |
+
) -> OrchestrationResult:
|
| 959 |
+
"""
|
| 960 |
+
🏛️ Local resources handler (shelters, libraries, food banks, etc.).
|
| 961 |
+
|
| 962 |
+
Routes resource queries to tool_agent with proper error handling.
|
| 963 |
+
"""
|
| 964 |
+
logger.info("🏛️ Processing local resources request")
|
| 965 |
+
|
| 966 |
+
if not tenant_id:
|
| 967 |
+
return OrchestrationResult(
|
| 968 |
+
intent=IntentType.LOCAL_RESOURCES.value,
|
| 969 |
+
reply=(
|
| 970 |
+
"I can help you find local resources! 🏛️ "
|
| 971 |
+
"Which city do you need help in? "
|
| 972 |
+
"I cover Atlanta, Birmingham, Chesterfield, El Paso, "
|
| 973 |
+
"Providence, and Seattle."
|
| 974 |
+
),
|
| 975 |
+
success=False,
|
| 976 |
+
error="City required"
|
| 977 |
+
)
|
| 978 |
+
|
| 979 |
+
# Check tool agent availability
|
| 980 |
+
if not TOOL_AGENT_AVAILABLE:
|
| 981 |
+
logger.warning("Tool agent not available")
|
| 982 |
+
return OrchestrationResult(
|
| 983 |
+
intent=IntentType.LOCAL_RESOURCES.value,
|
| 984 |
+
reply=(
|
| 985 |
+
"Resource information isn't available right now. "
|
| 986 |
+
"Try again soon! 🏛️"
|
| 987 |
+
),
|
| 988 |
+
success=False,
|
| 989 |
+
error="Tool agent not loaded",
|
| 990 |
+
fallback_used=True
|
| 991 |
+
)
|
| 992 |
+
|
| 993 |
+
try:
|
| 994 |
+
# FIXED: Add role parameter (compatibility fix)
|
| 995 |
+
tool_response = await handle_tool_request(
|
| 996 |
+
user_input=message,
|
| 997 |
+
role=context.get("role", "resident"), # ← ADDED
|
| 998 |
+
lat=lat,
|
| 999 |
+
lon=lon
|
| 1000 |
+
)
|
| 1001 |
+
|
| 1002 |
+
reply = tool_response.get("response", "Resource information retrieved.")
|
| 1003 |
+
|
| 1004 |
+
return OrchestrationResult(
|
| 1005 |
+
intent=IntentType.LOCAL_RESOURCES.value,
|
| 1006 |
+
reply=reply,
|
| 1007 |
+
success=True,
|
| 1008 |
+
data=tool_response,
|
| 1009 |
+
model_id="resources_tool"
|
| 1010 |
+
)
|
| 1011 |
+
|
| 1012 |
+
except Exception as e:
|
| 1013 |
+
logger.error(f"Resources error: {e}", exc_info=True)
|
| 1014 |
+
return OrchestrationResult(
|
| 1015 |
+
intent=IntentType.LOCAL_RESOURCES.value,
|
| 1016 |
+
reply=(
|
| 1017 |
+
"I'm having trouble finding resource information right now. "
|
| 1018 |
+
"Would you like to try a different search? 💛"
|
| 1019 |
+
),
|
| 1020 |
+
success=False,
|
| 1021 |
+
error=str(e),
|
| 1022 |
+
fallback_used=True
|
| 1023 |
+
)
|
| 1024 |
+
|
| 1025 |
+
|
| 1026 |
+
async def _handle_conversational(
|
| 1027 |
+
message: str,
|
| 1028 |
+
intent: IntentType,
|
| 1029 |
+
context: Dict[str, Any]
|
| 1030 |
+
) -> OrchestrationResult:
|
| 1031 |
+
"""
|
| 1032 |
+
💬 Handles conversational intents (greeting, help, unknown).
|
| 1033 |
+
Uses Penny's core LLM for natural responses with graceful fallback.
|
| 1034 |
+
"""
|
| 1035 |
+
logger.info(f"💬 Processing conversational intent: {intent.value}")
|
| 1036 |
+
|
| 1037 |
+
# Check LLM availability
|
| 1038 |
+
use_llm = LLM_AVAILABLE
|
| 1039 |
+
|
| 1040 |
+
try:
|
| 1041 |
+
if use_llm:
|
| 1042 |
+
# Build prompt based on intent
|
| 1043 |
+
if intent == IntentType.GREETING:
|
| 1044 |
+
prompt = (
|
| 1045 |
+
f"The user greeted you with: '{message}'\n\n"
|
| 1046 |
+
"Respond warmly as Penny, introduce yourself briefly, "
|
| 1047 |
+
"and ask how you can help them with civic services today."
|
| 1048 |
+
)
|
| 1049 |
+
|
| 1050 |
+
elif intent == IntentType.HELP:
|
| 1051 |
+
prompt = (
|
| 1052 |
+
f"The user asked for help: '{message}'\n\n"
|
| 1053 |
+
"Explain Penny's main features:\n"
|
| 1054 |
+
"- Finding local resources (shelters, libraries, food banks)\n"
|
| 1055 |
+
"- Community events and activities\n"
|
| 1056 |
+
"- Weather information\n"
|
| 1057 |
+
"- 27-language translation\n"
|
| 1058 |
+
"- Document processing help\n\n"
|
| 1059 |
+
"Ask which city they need assistance in."
|
| 1060 |
+
)
|
| 1061 |
+
|
| 1062 |
+
else: # UNKNOWN
|
| 1063 |
+
prompt = (
|
| 1064 |
+
f"The user said: '{message}'\n\n"
|
| 1065 |
+
"You're not sure what they need help with. "
|
| 1066 |
+
"Respond kindly, acknowledge their request, and ask them to "
|
| 1067 |
+
"clarify or rephrase. Mention a few things you can help with."
|
| 1068 |
+
)
|
| 1069 |
+
|
| 1070 |
+
# Call Penny's core LLM
|
| 1071 |
+
llm_result = await generate_response(prompt=prompt, max_new_tokens=200)
|
| 1072 |
+
|
| 1073 |
+
# Use compatibility helper to check result
|
| 1074 |
+
success, error = _check_result_success(llm_result, ["response"])
|
| 1075 |
+
|
| 1076 |
+
if success:
|
| 1077 |
+
reply = llm_result.get("response", "")
|
| 1078 |
+
|
| 1079 |
+
return OrchestrationResult(
|
| 1080 |
+
intent=intent.value,
|
| 1081 |
+
reply=reply,
|
| 1082 |
+
success=True,
|
| 1083 |
+
data=llm_result,
|
| 1084 |
+
model_id=CORE_MODEL_ID
|
| 1085 |
+
)
|
| 1086 |
+
else:
|
| 1087 |
+
raise Exception(error or "LLM generation failed")
|
| 1088 |
+
|
| 1089 |
+
else:
|
| 1090 |
+
# LLM not available, use fallback directly
|
| 1091 |
+
logger.info("LLM not available, using fallback responses")
|
| 1092 |
+
raise Exception("LLM service not loaded")
|
| 1093 |
+
|
| 1094 |
+
except Exception as e:
|
| 1095 |
+
logger.warning(f"Conversational handler using fallback: {e}")
|
| 1096 |
+
|
| 1097 |
+
# Hardcoded fallback responses (Penny's friendly voice)
|
| 1098 |
+
fallback_replies = {
|
| 1099 |
+
IntentType.GREETING: (
|
| 1100 |
+
"Hi there! 👋 I'm Penny, your civic assistant. "
|
| 1101 |
+
"I can help you find local resources, events, weather, and more. "
|
| 1102 |
+
"What city are you in?"
|
| 1103 |
+
),
|
| 1104 |
+
IntentType.HELP: (
|
| 1105 |
+
"I'm Penny! 💛 I can help you with:\n\n"
|
| 1106 |
+
"🏛️ Local resources (shelters, libraries, food banks)\n"
|
| 1107 |
+
"📅 Community events\n"
|
| 1108 |
+
"🌤️ Weather updates\n"
|
| 1109 |
+
"🌍 Translation (27 languages)\n"
|
| 1110 |
+
"📄 Document help\n\n"
|
| 1111 |
+
"What would you like to know about?"
|
| 1112 |
+
),
|
| 1113 |
+
IntentType.UNKNOWN: (
|
| 1114 |
+
"I'm not sure I understood that. Could you rephrase? "
|
| 1115 |
+
"I'm best at helping with local services, events, weather, "
|
| 1116 |
+
"and translation! 💬"
|
| 1117 |
+
)
|
| 1118 |
+
}
|
| 1119 |
+
|
| 1120 |
+
return OrchestrationResult(
|
| 1121 |
+
intent=intent.value,
|
| 1122 |
+
reply=fallback_replies.get(intent, "How can I help you today? 💛"),
|
| 1123 |
+
success=True,
|
| 1124 |
+
model_id="fallback",
|
| 1125 |
+
fallback_used=True
|
| 1126 |
+
)
|
| 1127 |
+
|
| 1128 |
+
|
| 1129 |
+
async def _handle_fallback(
|
| 1130 |
+
message: str,
|
| 1131 |
+
intent: IntentType,
|
| 1132 |
+
context: Dict[str, Any]
|
| 1133 |
+
) -> OrchestrationResult:
|
| 1134 |
+
"""
|
| 1135 |
+
🆘 Ultimate fallback handler for unhandled intents.
|
| 1136 |
+
|
| 1137 |
+
This is a safety net that should rarely trigger, but ensures
|
| 1138 |
+
users always get a helpful response.
|
| 1139 |
+
"""
|
| 1140 |
+
logger.warning(f"⚠️ Fallback triggered for intent: {intent.value}")
|
| 1141 |
+
|
| 1142 |
+
reply = (
|
| 1143 |
+
"I've processed your request, but I'm not sure how to help with that yet. "
|
| 1144 |
+
"I'm still learning! 🤖\n\n"
|
| 1145 |
+
"I'm best at:\n"
|
| 1146 |
+
"🏛️ Finding local resources\n"
|
| 1147 |
+
"📅 Community events\n"
|
| 1148 |
+
"🌤️ Weather updates\n"
|
| 1149 |
+
"🌍 Translation\n\n"
|
| 1150 |
+
"Could you rephrase your question? 💛"
|
| 1151 |
+
)
|
| 1152 |
+
|
| 1153 |
+
return OrchestrationResult(
|
| 1154 |
+
intent=intent.value,
|
| 1155 |
+
reply=reply,
|
| 1156 |
+
success=False,
|
| 1157 |
+
error="Unhandled intent",
|
| 1158 |
+
fallback_used=True
|
| 1159 |
+
)
|
| 1160 |
+
|
| 1161 |
+
|
| 1162 |
+
# ============================================================
|
| 1163 |
+
# HEALTH CHECK & DIAGNOSTICS (ENHANCED)
|
| 1164 |
+
# ============================================================
|
| 1165 |
+
|
| 1166 |
+
def get_orchestrator_health() -> Dict[str, Any]:
|
| 1167 |
+
"""
|
| 1168 |
+
📊 Returns comprehensive orchestrator health status.
|
| 1169 |
+
|
| 1170 |
+
Used by the main application health check endpoint to monitor
|
| 1171 |
+
the orchestrator and all its service dependencies.
|
| 1172 |
+
|
| 1173 |
+
Returns:
|
| 1174 |
+
Dictionary with health information including:
|
| 1175 |
+
- status: operational/degraded
|
| 1176 |
+
- service_availability: which services are loaded
|
| 1177 |
+
- statistics: orchestration counts
|
| 1178 |
+
- supported_intents: list of all intent types
|
| 1179 |
+
- features: available orchestrator features
|
| 1180 |
+
"""
|
| 1181 |
+
# Get service availability
|
| 1182 |
+
services = get_service_availability()
|
| 1183 |
+
|
| 1184 |
+
# Determine overall status
|
| 1185 |
+
# Orchestrator is operational even if some services are down (graceful degradation)
|
| 1186 |
+
critical_services = ["weather", "tool_agent"] # Must have these
|
| 1187 |
+
critical_available = all(services.get(svc, False) for svc in critical_services)
|
| 1188 |
+
|
| 1189 |
+
status = "operational" if critical_available else "degraded"
|
| 1190 |
+
|
| 1191 |
+
return {
|
| 1192 |
+
"status": status,
|
| 1193 |
+
"core_model": CORE_MODEL_ID,
|
| 1194 |
+
"max_response_time_ms": MAX_RESPONSE_TIME_MS,
|
| 1195 |
+
"statistics": {
|
| 1196 |
+
"total_orchestrations": _orchestration_count,
|
| 1197 |
+
"emergency_interactions": _emergency_count
|
| 1198 |
+
},
|
| 1199 |
+
"service_availability": services,
|
| 1200 |
+
"supported_intents": [intent.value for intent in IntentType],
|
| 1201 |
+
"features": {
|
| 1202 |
+
"emergency_routing": True,
|
| 1203 |
+
"compound_intents": True,
|
| 1204 |
+
"fallback_handling": True,
|
| 1205 |
+
"performance_tracking": True,
|
| 1206 |
+
"context_aware": True,
|
| 1207 |
+
"multi_language": TRANSLATION_AVAILABLE,
|
| 1208 |
+
"sentiment_analysis": SENTIMENT_AVAILABLE,
|
| 1209 |
+
"bias_detection": BIAS_AVAILABLE,
|
| 1210 |
+
"weather_integration": WEATHER_AGENT_AVAILABLE,
|
| 1211 |
+
"event_recommendations": EVENT_WEATHER_AVAILABLE
|
| 1212 |
+
}
|
| 1213 |
+
}
|
| 1214 |
+
|
| 1215 |
+
|
| 1216 |
+
def get_orchestrator_stats() -> Dict[str, Any]:
|
| 1217 |
+
"""
|
| 1218 |
+
📈 Returns orchestrator statistics.
|
| 1219 |
+
|
| 1220 |
+
Useful for monitoring and analytics.
|
| 1221 |
+
"""
|
| 1222 |
+
return {
|
| 1223 |
+
"total_orchestrations": _orchestration_count,
|
| 1224 |
+
"emergency_interactions": _emergency_count,
|
| 1225 |
+
"services_available": sum(1 for v in get_service_availability().values() if v),
|
| 1226 |
+
"services_total": len(get_service_availability())
|
| 1227 |
+
}
|
| 1228 |
+
|
| 1229 |
+
|
| 1230 |
+
# ============================================================
|
| 1231 |
+
# TESTING & DEBUGGING (ENHANCED)
|
| 1232 |
+
# ============================================================
|
| 1233 |
+
|
| 1234 |
+
if __name__ == "__main__":
|
| 1235 |
+
"""
|
| 1236 |
+
🧪 Test the orchestrator with sample queries.
|
| 1237 |
+
Run with: python -m app.orchestrator
|
| 1238 |
+
"""
|
| 1239 |
+
import asyncio
|
| 1240 |
+
|
| 1241 |
+
print("=" * 60)
|
| 1242 |
+
print("🧪 Testing Penny's Orchestrator")
|
| 1243 |
+
print("=" * 60)
|
| 1244 |
+
|
| 1245 |
+
# Display service availability first
|
| 1246 |
+
print("\n📊 Service Availability Check:")
|
| 1247 |
+
services = get_service_availability()
|
| 1248 |
+
for service, available in services.items():
|
| 1249 |
+
status = "✅" if available else "❌"
|
| 1250 |
+
print(f" {status} {service}: {'Available' if available else 'Not loaded'}")
|
| 1251 |
+
|
| 1252 |
+
print("\n" + "=" * 60)
|
| 1253 |
+
|
| 1254 |
+
test_queries = [
|
| 1255 |
+
{
|
| 1256 |
+
"name": "Greeting",
|
| 1257 |
+
"message": "Hi Penny!",
|
| 1258 |
+
"context": {}
|
| 1259 |
+
},
|
| 1260 |
+
{
|
| 1261 |
+
"name": "Weather with location",
|
| 1262 |
+
"message": "What's the weather?",
|
| 1263 |
+
"context": {"lat": 33.7490, "lon": -84.3880}
|
| 1264 |
+
},
|
| 1265 |
+
{
|
| 1266 |
+
"name": "Events in city",
|
| 1267 |
+
"message": "Events in Atlanta",
|
| 1268 |
+
"context": {"tenant_id": "atlanta_ga"}
|
| 1269 |
+
},
|
| 1270 |
+
{
|
| 1271 |
+
"name": "Help request",
|
| 1272 |
+
"message": "I need help",
|
| 1273 |
+
"context": {}
|
| 1274 |
+
},
|
| 1275 |
+
{
|
| 1276 |
+
"name": "Translation",
|
| 1277 |
+
"message": "Translate hello",
|
| 1278 |
+
"context": {"source_lang": "eng_Latn", "target_lang": "spa_Latn"}
|
| 1279 |
+
}
|
| 1280 |
+
]
|
| 1281 |
+
|
| 1282 |
+
async def run_tests():
|
| 1283 |
+
for i, query in enumerate(test_queries, 1):
|
| 1284 |
+
print(f"\n--- Test {i}: {query['name']} ---")
|
| 1285 |
+
print(f"Query: {query['message']}")
|
| 1286 |
+
|
| 1287 |
+
try:
|
| 1288 |
+
result = await run_orchestrator(query["message"], query["context"])
|
| 1289 |
+
print(f"Intent: {result['intent']}")
|
| 1290 |
+
print(f"Success: {result['success']}")
|
| 1291 |
+
print(f"Fallback: {result.get('fallback_used', False)}")
|
| 1292 |
+
|
| 1293 |
+
# Truncate long replies
|
| 1294 |
+
reply = result['reply']
|
| 1295 |
+
if len(reply) > 150:
|
| 1296 |
+
reply = reply[:150] + "..."
|
| 1297 |
+
print(f"Reply: {reply}")
|
| 1298 |
+
|
| 1299 |
+
if result.get('response_time_ms'):
|
| 1300 |
+
print(f"Response time: {result['response_time_ms']:.0f}ms")
|
| 1301 |
+
|
| 1302 |
+
except Exception as e:
|
| 1303 |
+
print(f"❌ Error: {e}")
|
| 1304 |
+
|
| 1305 |
+
asyncio.run(run_tests())
|
| 1306 |
+
|
| 1307 |
+
print("\n" + "=" * 60)
|
| 1308 |
+
print("📊 Final Statistics:")
|
| 1309 |
+
stats = get_orchestrator_stats()
|
| 1310 |
+
for key, value in stats.items():
|
| 1311 |
+
print(f" {key}: {value}")
|
| 1312 |
+
|
| 1313 |
+
print("\n" + "=" * 60)
|
| 1314 |
+
print("✅ Tests complete")
|
| 1315 |
+
print("=" * 60)
|
providence_ri.json
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "Providence",
|
| 3 |
+
"state": "Rhode Island",
|
| 4 |
+
"tenant_id": "providence_ri",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "state_only",
|
| 7 |
+
"csb_authority": "none",
|
| 8 |
+
"events_source": "city_official_site",
|
| 9 |
+
"transit_source": "city",
|
| 10 |
+
"trash_source": "city",
|
| 11 |
+
"public_safety_source": "city",
|
| 12 |
+
"warming_center_authority": "city",
|
| 13 |
+
"sexual_health_authority": "state"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"city_homepage": "https://www.providenceri.gov/",
|
| 17 |
+
"events_calendar": "https://www.downtownprovidence.com/event-calendar/",
|
| 18 |
+
"public_health": "https://health.ri.gov/",
|
| 19 |
+
"transit": "https://www.ripta.com/",
|
| 20 |
+
"libraries": "https://www.provcomlib.org/",
|
| 21 |
+
"parks_and_recreation": "https://www.providenceri.gov/providence-parks/",
|
| 22 |
+
"emergency_management": "https://www.providenceri.gov/emergency-management-agency/",
|
| 23 |
+
"trash": "https://www.providenceri.gov/public-works/streets-sanitation/"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "state",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "BH Link (Behavioral Health Hotline)",
|
| 32 |
+
"link": "https://www.bhlink.org/",
|
| 33 |
+
"phone": "401-414-LINK (5465)",
|
| 34 |
+
"notes": "24/7 mental health and substance use crisis hotline for all Rhode Island residents. Penny should always refer users here for crisis situations."
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"name": "Rhode Island Dept. of Behavioral Healthcare",
|
| 38 |
+
"link": "https://www.bhddh.ri.gov/",
|
| 39 |
+
"phone": "",
|
| 40 |
+
"notes": "State authority for long term mental health, developmental disability, and addiction services."
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"sexual_health": {
|
| 45 |
+
"allowed": true,
|
| 46 |
+
"authority": "state",
|
| 47 |
+
"resources": [
|
| 48 |
+
{
|
| 49 |
+
"name": "Rhode Island Dept. of Health – STD Prevention",
|
| 50 |
+
"link": "https://health.ri.gov/programs/std/",
|
| 51 |
+
"phone": "401-222-2577",
|
| 52 |
+
"notes": "State program offering confidential testing and treatment services. Penny should prioritize this official source."
|
| 53 |
+
},
|
| 54 |
+
{
|
| 55 |
+
"name": "Planned Parenthood – Providence Health Center",
|
| 56 |
+
"link": "https://www.plannedparenthood.org/health-center/rhode-island/providence/02903/providence-health-center-3991-90490",
|
| 57 |
+
"phone": "401-421-9500",
|
| 58 |
+
"notes": "Major nonprofit provider of sexual health services, testing, and contraception in the area."
|
| 59 |
+
}
|
| 60 |
+
]
|
| 61 |
+
},
|
| 62 |
+
"warming_and_cooling_centers": {
|
| 63 |
+
"allowed": true,
|
| 64 |
+
"authority": "city",
|
| 65 |
+
"resources": [
|
| 66 |
+
{
|
| 67 |
+
"name": "Providence Community Recreation Centers",
|
| 68 |
+
"address": "Varies by activation",
|
| 69 |
+
"season": "winter/summer",
|
| 70 |
+
"link": "https://www.providenceri.gov/providence-parks/",
|
| 71 |
+
"notes": "City community centers are activated as warming centers during extreme cold and cooling centers during extreme heat. Penny must advise users to check the city's official social media or hotline for current sites."
|
| 72 |
+
}
|
| 73 |
+
]
|
| 74 |
+
},
|
| 75 |
+
"trash_and_recycling": {
|
| 76 |
+
"authority": "city",
|
| 77 |
+
"pickup_days": "Varies by zone. Collection schedules are published by the Department of Public Works.",
|
| 78 |
+
"holiday_schedule_link": "https://www.providenceri.gov/public-works/streets-sanitation/"
|
| 79 |
+
},
|
| 80 |
+
"transit": {
|
| 81 |
+
"authority": "state",
|
| 82 |
+
"provider": "RIPTA (Rhode Island Public Transit Authority)",
|
| 83 |
+
"routes_link": "https://www.ripta.com/routes/",
|
| 84 |
+
"planner_link": "https://www.ripta.com/"
|
| 85 |
+
},
|
| 86 |
+
"emergency": {
|
| 87 |
+
"authority": "city",
|
| 88 |
+
"alerts_link": "https://www.providenceri.gov/emergency-management-agency/",
|
| 89 |
+
"non_emergency_phone": "401-272-3121 (Police Non Emergency)",
|
| 90 |
+
"emergency_management_link": "https://www.providenceri.gov/emergency-management-agency/"
|
| 91 |
+
},
|
| 92 |
+
"libraries": {
|
| 93 |
+
"authority": "city",
|
| 94 |
+
"resources": [
|
| 95 |
+
{
|
| 96 |
+
"branch": "Providence Community Library – Central",
|
| 97 |
+
"address": "150 Empire St, Providence, RI 02903",
|
| 98 |
+
"link": "https://www.provcomlib.org/",
|
| 99 |
+
"notes": "Main branch of the city library system offering access to computers, Wi-Fi, and community resources."
|
| 100 |
+
}
|
| 101 |
+
]
|
| 102 |
+
},
|
| 103 |
+
"community_centers": {
|
| 104 |
+
"authority": "city",
|
| 105 |
+
"resources": [
|
| 106 |
+
{
|
| 107 |
+
"name": "Fox Point Community Center",
|
| 108 |
+
"address": "446 Wickenden St, Providence, RI 02903",
|
| 109 |
+
"link": "https://www.providenceri.gov/providence-parks/fox-point-community-center/",
|
| 110 |
+
"notes": "Offers recreational programs, afterschool activities, and senior services for the East Side neighborhood."
|
| 111 |
+
}
|
| 112 |
+
]
|
| 113 |
+
}
|
| 114 |
+
}
|
| 115 |
+
}
|
router.py
ADDED
|
@@ -0,0 +1,802 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
🚦 PENNY Request Router - Enhanced for Azure ML Production
|
| 3 |
+
Routes incoming requests to appropriate agents and tools based on intent classification.
|
| 4 |
+
Integrates with enhanced logging, location detection, and intent classification.
|
| 5 |
+
|
| 6 |
+
Mission: Ensure every resident request reaches the right civic service with proper tracking.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import logging
|
| 10 |
+
import time
|
| 11 |
+
import asyncio
|
| 12 |
+
import os
|
| 13 |
+
from typing import Dict, Any, Optional, List
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
from fastapi import APIRouter, HTTPException
|
| 16 |
+
from fastapi.responses import JSONResponse
|
| 17 |
+
|
| 18 |
+
from app.model_loader import ModelLoader
|
| 19 |
+
from app.tool_agent import handle_tool_request
|
| 20 |
+
from app.weather_agent import (
|
| 21 |
+
get_weather_for_location,
|
| 22 |
+
weather_to_event_recommendations,
|
| 23 |
+
recommend_outfit
|
| 24 |
+
)
|
| 25 |
+
from app.intents import classify_intent_detailed, IntentType
|
| 26 |
+
from app.event_weather import get_event_recommendations_with_weather
|
| 27 |
+
from app.location_utils import (
|
| 28 |
+
detect_location_from_text,
|
| 29 |
+
get_city_info,
|
| 30 |
+
validate_coordinates
|
| 31 |
+
)
|
| 32 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 33 |
+
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
# Initialize FastAPI router
|
| 37 |
+
router = APIRouter(prefix="/api", tags=["Penny API"])
|
| 38 |
+
|
| 39 |
+
# Initialize model loader
|
| 40 |
+
models = ModelLoader()
|
| 41 |
+
|
| 42 |
+
# Supported languages for translation routing
|
| 43 |
+
SUPPORTED_LANGUAGES = [
|
| 44 |
+
"arabic", "french", "german", "hindi", "mandarin",
|
| 45 |
+
"portuguese", "russian", "spanish", "swahili",
|
| 46 |
+
"tagalog", "urdu", "vietnamese", "translate", "translation"
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
def validate_request_payload(payload: dict) -> tuple[bool, Optional[str]]:
|
| 50 |
+
"""
|
| 51 |
+
Validate incoming request payload for required fields and data types.
|
| 52 |
+
|
| 53 |
+
Args:
|
| 54 |
+
payload: Request payload dictionary
|
| 55 |
+
|
| 56 |
+
Returns:
|
| 57 |
+
Tuple of (is_valid, error_message)
|
| 58 |
+
"""
|
| 59 |
+
if not isinstance(payload, dict):
|
| 60 |
+
return False, "Payload must be a dictionary"
|
| 61 |
+
|
| 62 |
+
# Check for required input field
|
| 63 |
+
if "input" not in payload:
|
| 64 |
+
return False, "Missing required field: 'input'"
|
| 65 |
+
|
| 66 |
+
user_input = payload.get("input")
|
| 67 |
+
if not isinstance(user_input, str):
|
| 68 |
+
return False, "Field 'input' must be a string"
|
| 69 |
+
|
| 70 |
+
if not user_input.strip():
|
| 71 |
+
return False, "Input cannot be empty"
|
| 72 |
+
|
| 73 |
+
# Validate coordinates if provided
|
| 74 |
+
lat = payload.get("lat")
|
| 75 |
+
lon = payload.get("lon")
|
| 76 |
+
|
| 77 |
+
if lat is not None or lon is not None:
|
| 78 |
+
if lat is None or lon is None:
|
| 79 |
+
return False, "Both 'lat' and 'lon' must be provided together"
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
lat = float(lat)
|
| 83 |
+
lon = float(lon)
|
| 84 |
+
is_valid, error = validate_coordinates(lat, lon)
|
| 85 |
+
if not is_valid:
|
| 86 |
+
return False, f"Invalid coordinates: {error}"
|
| 87 |
+
except (ValueError, TypeError):
|
| 88 |
+
return False, "Coordinates must be numeric values"
|
| 89 |
+
|
| 90 |
+
# Validate tenant_id if provided
|
| 91 |
+
tenant_id = payload.get("tenant_id")
|
| 92 |
+
if tenant_id is not None:
|
| 93 |
+
if not isinstance(tenant_id, str):
|
| 94 |
+
return False, "Field 'tenant_id' must be a string"
|
| 95 |
+
if not tenant_id.strip():
|
| 96 |
+
return False, "Field 'tenant_id' cannot be empty"
|
| 97 |
+
|
| 98 |
+
return True, None
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def extract_location_info(payload: dict, user_input: str) -> Dict[str, Any]:
|
| 102 |
+
"""
|
| 103 |
+
Extract and validate location information from payload or user input.
|
| 104 |
+
|
| 105 |
+
Args:
|
| 106 |
+
payload: Request payload
|
| 107 |
+
user_input: User's input text
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
Dictionary with location info: {lat, lon, tenant_id, city_info, location_source}
|
| 111 |
+
"""
|
| 112 |
+
location_info = {
|
| 113 |
+
"lat": payload.get("lat"),
|
| 114 |
+
"lon": payload.get("lon"),
|
| 115 |
+
"tenant_id": payload.get("tenant_id", "default"),
|
| 116 |
+
"city_info": None,
|
| 117 |
+
"location_source": "none"
|
| 118 |
+
}
|
| 119 |
+
|
| 120 |
+
try:
|
| 121 |
+
# Try to get location from coordinates
|
| 122 |
+
if location_info["lat"] is not None and location_info["lon"] is not None:
|
| 123 |
+
location_info["location_source"] = "coordinates"
|
| 124 |
+
|
| 125 |
+
# Try to map coordinates to a tenant city
|
| 126 |
+
if location_info["tenant_id"] == "default":
|
| 127 |
+
city_info = get_city_info(location_info["tenant_id"])
|
| 128 |
+
if city_info:
|
| 129 |
+
location_info["city_info"] = city_info
|
| 130 |
+
|
| 131 |
+
# Try to detect location from text if not provided
|
| 132 |
+
elif "near me" in user_input.lower() or any(
|
| 133 |
+
keyword in user_input.lower()
|
| 134 |
+
for keyword in ["in", "at", "near", "around"]
|
| 135 |
+
):
|
| 136 |
+
detected = detect_location_from_text(user_input)
|
| 137 |
+
if detected.get("found"):
|
| 138 |
+
location_info["tenant_id"] = detected.get("tenant_id", "default")
|
| 139 |
+
location_info["city_info"] = detected.get("city_info")
|
| 140 |
+
location_info["location_source"] = "text_detection"
|
| 141 |
+
logger.info(
|
| 142 |
+
f"Detected location from text: {location_info['tenant_id']}"
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# Get city info for tenant_id if we have it
|
| 146 |
+
if not location_info["city_info"] and location_info["tenant_id"] != "default":
|
| 147 |
+
location_info["city_info"] = get_city_info(location_info["tenant_id"])
|
| 148 |
+
|
| 149 |
+
except Exception as e:
|
| 150 |
+
logger.warning(f"Error extracting location info: {e}")
|
| 151 |
+
|
| 152 |
+
return location_info
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def route_request(payload: dict) -> dict:
|
| 156 |
+
"""
|
| 157 |
+
Main routing function for PENNY requests.
|
| 158 |
+
Routes requests to appropriate agents based on intent classification.
|
| 159 |
+
|
| 160 |
+
Args:
|
| 161 |
+
payload: Request payload with user input and metadata
|
| 162 |
+
|
| 163 |
+
Returns:
|
| 164 |
+
Response dictionary with agent output and metadata
|
| 165 |
+
"""
|
| 166 |
+
start_time = time.time()
|
| 167 |
+
|
| 168 |
+
try:
|
| 169 |
+
# Validate request payload
|
| 170 |
+
is_valid, error_msg = validate_request_payload(payload)
|
| 171 |
+
if not is_valid:
|
| 172 |
+
logger.warning(f"Invalid request payload: {error_msg}")
|
| 173 |
+
return {
|
| 174 |
+
"error": "Oops! I couldn't understand that request. " + error_msg,
|
| 175 |
+
"status": "validation_error",
|
| 176 |
+
"response_time_ms": round((time.time() - start_time) * 1000)
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
# Extract basic request info
|
| 180 |
+
user_input = payload.get("input", "").strip()
|
| 181 |
+
role = payload.get("role", "unknown")
|
| 182 |
+
|
| 183 |
+
# Sanitize input for logging (remove PII)
|
| 184 |
+
sanitized_input = sanitize_for_logging(user_input)
|
| 185 |
+
|
| 186 |
+
# Extract location information
|
| 187 |
+
location_info = extract_location_info(payload, user_input)
|
| 188 |
+
tenant_id = location_info["tenant_id"]
|
| 189 |
+
lat = location_info["lat"]
|
| 190 |
+
lon = location_info["lon"]
|
| 191 |
+
|
| 192 |
+
logger.info(
|
| 193 |
+
f"Routing request from tenant '{tenant_id}', role '{role}', "
|
| 194 |
+
f"location_source: {location_info['location_source']}"
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
# Classify intent using enhanced intent classifier
|
| 198 |
+
try:
|
| 199 |
+
intent_result = classify_intent_detailed(user_input)
|
| 200 |
+
intent = intent_result["intent"]
|
| 201 |
+
confidence = intent_result["confidence"]
|
| 202 |
+
is_compound = intent_result["is_compound"]
|
| 203 |
+
|
| 204 |
+
logger.info(
|
| 205 |
+
f"Intent classified: {intent} (confidence: {confidence:.2f}, "
|
| 206 |
+
f"compound: {is_compound})"
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
except Exception as e:
|
| 210 |
+
logger.error(f"Intent classification failed: {e}")
|
| 211 |
+
intent = IntentType.GENERAL
|
| 212 |
+
confidence = 0.0
|
| 213 |
+
is_compound = False
|
| 214 |
+
|
| 215 |
+
# EMERGENCY ROUTING - Highest priority
|
| 216 |
+
if intent == IntentType.EMERGENCY:
|
| 217 |
+
logger.critical(
|
| 218 |
+
f"EMERGENCY intent detected from tenant '{tenant_id}'. "
|
| 219 |
+
f"Routing to safety protocols."
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
# Log emergency interaction for compliance
|
| 223 |
+
log_interaction(
|
| 224 |
+
tenant_id=tenant_id,
|
| 225 |
+
interaction_type="emergency",
|
| 226 |
+
intent="emergency",
|
| 227 |
+
response_time_ms=round((time.time() - start_time) * 1000),
|
| 228 |
+
success=True,
|
| 229 |
+
metadata={
|
| 230 |
+
"sanitized_input": sanitized_input,
|
| 231 |
+
"requires_followup": True,
|
| 232 |
+
"escalation_level": "critical"
|
| 233 |
+
}
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
return {
|
| 237 |
+
"response": (
|
| 238 |
+
"I can see you might need urgent help. Please contact:\n\n"
|
| 239 |
+
"🚨 **Emergency Services**: 911\n"
|
| 240 |
+
"💚 **National Crisis Hotline**: 988\n"
|
| 241 |
+
"💬 **Crisis Text Line**: Text HOME to 741741\n\n"
|
| 242 |
+
"You're not alone, and help is available 24/7."
|
| 243 |
+
),
|
| 244 |
+
"intent": "emergency",
|
| 245 |
+
"model_id": "safety-agent",
|
| 246 |
+
"tenant_id": tenant_id,
|
| 247 |
+
"user_role": role,
|
| 248 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 249 |
+
"escalation_required": True
|
| 250 |
+
}
|
| 251 |
+
|
| 252 |
+
# WEATHER ROUTING
|
| 253 |
+
if intent == IntentType.WEATHER:
|
| 254 |
+
return handle_weather_request(
|
| 255 |
+
user_input, lat, lon, tenant_id, role, start_time
|
| 256 |
+
)
|
| 257 |
+
|
| 258 |
+
# WEATHER + EVENTS ROUTING (compound intent)
|
| 259 |
+
if intent == IntentType.WEATHER_EVENTS or (
|
| 260 |
+
is_compound and "weather" in intent_result.get("components", [])
|
| 261 |
+
):
|
| 262 |
+
return handle_weather_events_request(
|
| 263 |
+
user_input, lat, lon, tenant_id, role, start_time
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
# EVENTS ROUTING
|
| 267 |
+
if intent == IntentType.EVENTS:
|
| 268 |
+
return handle_events_request(
|
| 269 |
+
user_input, tenant_id, role, start_time
|
| 270 |
+
)
|
| 271 |
+
|
| 272 |
+
# TOOL-BASED ROUTING (transit, alerts, resources, etc.)
|
| 273 |
+
if intent in [
|
| 274 |
+
IntentType.TRANSIT, IntentType.ALERTS, IntentType.RESOURCES,
|
| 275 |
+
IntentType.PUBLIC_WORKS
|
| 276 |
+
]:
|
| 277 |
+
return handle_tool_based_request(
|
| 278 |
+
user_input, intent, tenant_id, role, start_time
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# TRANSLATION ROUTING
|
| 282 |
+
if intent == IntentType.TRANSLATION or any(
|
| 283 |
+
lang in user_input.lower() for lang in SUPPORTED_LANGUAGES
|
| 284 |
+
):
|
| 285 |
+
return handle_translation_request(
|
| 286 |
+
user_input, tenant_id, role, start_time
|
| 287 |
+
)
|
| 288 |
+
|
| 289 |
+
# DOCUMENT/PDF ROUTING
|
| 290 |
+
if any(term in user_input.lower() for term in ["form", "upload", "document", "pdf"]):
|
| 291 |
+
return handle_document_request(
|
| 292 |
+
user_input, tenant_id, role, start_time
|
| 293 |
+
)
|
| 294 |
+
|
| 295 |
+
# SENTIMENT ANALYSIS ROUTING
|
| 296 |
+
if any(term in user_input.lower() for term in ["angry", "sentiment", "how do i feel"]):
|
| 297 |
+
return handle_sentiment_request(
|
| 298 |
+
user_input, tenant_id, role, start_time
|
| 299 |
+
)
|
| 300 |
+
|
| 301 |
+
# BIAS DETECTION ROUTING
|
| 302 |
+
if any(term in user_input.lower() for term in ["bias", "is this fair", "offensive"]):
|
| 303 |
+
return handle_bias_request(
|
| 304 |
+
user_input, tenant_id, role, start_time
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
# GENERAL/FALLBACK ROUTING
|
| 308 |
+
return handle_general_request(
|
| 309 |
+
user_input, tenant_id, role, start_time
|
| 310 |
+
)
|
| 311 |
+
|
| 312 |
+
except Exception as e:
|
| 313 |
+
logger.error(f"Unexpected error in route_request: {e}", exc_info=True)
|
| 314 |
+
|
| 315 |
+
return {
|
| 316 |
+
"error": (
|
| 317 |
+
"I'm having trouble processing that right now. "
|
| 318 |
+
"Could you try rephrasing your question? 💛"
|
| 319 |
+
),
|
| 320 |
+
"status": "server_error",
|
| 321 |
+
"response_time_ms": round((time.time() - start_time) * 1000)
|
| 322 |
+
}
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def handle_weather_request(
|
| 326 |
+
user_input: str, lat: Optional[float], lon: Optional[float],
|
| 327 |
+
tenant_id: str, role: str, start_time: float
|
| 328 |
+
) -> dict:
|
| 329 |
+
"""Handle weather-specific requests."""
|
| 330 |
+
try:
|
| 331 |
+
if lat is None or lon is None:
|
| 332 |
+
return {
|
| 333 |
+
"response": (
|
| 334 |
+
"I'd love to help with the weather! To give you accurate info, "
|
| 335 |
+
"I need your location. Can you share your coordinates or tell me "
|
| 336 |
+
"what city you're in? 🌤️"
|
| 337 |
+
),
|
| 338 |
+
"intent": "weather",
|
| 339 |
+
"model_id": "weather-agent",
|
| 340 |
+
"tenant_id": tenant_id,
|
| 341 |
+
"user_role": role,
|
| 342 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 343 |
+
"location_required": True
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
# Get weather data
|
| 347 |
+
weather = asyncio.run(get_weather_for_location(lat, lon))
|
| 348 |
+
|
| 349 |
+
# Generate recommendations
|
| 350 |
+
recs = weather_to_event_recommendations(weather)
|
| 351 |
+
outfit = recommend_outfit(
|
| 352 |
+
weather.get("temperature", {}).get("value"),
|
| 353 |
+
weather.get("phrase", "")
|
| 354 |
+
)
|
| 355 |
+
|
| 356 |
+
end_time = time.time()
|
| 357 |
+
response_time = round((end_time - start_time) * 1000)
|
| 358 |
+
|
| 359 |
+
# Log successful interaction
|
| 360 |
+
log_interaction(
|
| 361 |
+
tenant_id=tenant_id,
|
| 362 |
+
interaction_type="weather",
|
| 363 |
+
intent="weather",
|
| 364 |
+
response_time_ms=response_time,
|
| 365 |
+
success=True
|
| 366 |
+
)
|
| 367 |
+
|
| 368 |
+
return {
|
| 369 |
+
"response": {
|
| 370 |
+
"weather": weather,
|
| 371 |
+
"recommendations": recs,
|
| 372 |
+
"outfit": outfit
|
| 373 |
+
},
|
| 374 |
+
"intent": "weather",
|
| 375 |
+
"model_id": "weather-agent",
|
| 376 |
+
"tenant_id": tenant_id,
|
| 377 |
+
"user_role": role,
|
| 378 |
+
"response_time_ms": response_time
|
| 379 |
+
}
|
| 380 |
+
|
| 381 |
+
except Exception as e:
|
| 382 |
+
logger.error(f"Error handling weather request: {e}")
|
| 383 |
+
|
| 384 |
+
return {
|
| 385 |
+
"response": (
|
| 386 |
+
"I'm having trouble getting the weather right now. "
|
| 387 |
+
"The weather service might be down. Want to try again in a moment? 🌦️"
|
| 388 |
+
),
|
| 389 |
+
"intent": "weather",
|
| 390 |
+
"model_id": "weather-agent",
|
| 391 |
+
"tenant_id": tenant_id,
|
| 392 |
+
"user_role": role,
|
| 393 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 394 |
+
"error": "weather_service_unavailable"
|
| 395 |
+
}
|
| 396 |
+
|
| 397 |
+
|
| 398 |
+
def handle_weather_events_request(
|
| 399 |
+
user_input: str, lat: Optional[float], lon: Optional[float],
|
| 400 |
+
tenant_id: str, role: str, start_time: float
|
| 401 |
+
) -> dict:
|
| 402 |
+
"""Handle combined weather and events requests."""
|
| 403 |
+
try:
|
| 404 |
+
if lat is None or lon is None:
|
| 405 |
+
return {
|
| 406 |
+
"response": (
|
| 407 |
+
"I can suggest events based on the weather! "
|
| 408 |
+
"To do that, I need your location. Can you share your coordinates "
|
| 409 |
+
"or tell me what city you're in? 🎉☀️"
|
| 410 |
+
),
|
| 411 |
+
"intent": "weather_events",
|
| 412 |
+
"model_id": "event-weather-agent",
|
| 413 |
+
"tenant_id": tenant_id,
|
| 414 |
+
"user_role": role,
|
| 415 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 416 |
+
"location_required": True
|
| 417 |
+
}
|
| 418 |
+
|
| 419 |
+
# Get combined weather and event recommendations
|
| 420 |
+
combined = asyncio.run(
|
| 421 |
+
get_event_recommendations_with_weather(tenant_id, lat, lon)
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
end_time = time.time()
|
| 425 |
+
response_time = round((end_time - start_time) * 1000)
|
| 426 |
+
|
| 427 |
+
# Log successful interaction
|
| 428 |
+
log_interaction(
|
| 429 |
+
tenant_id=tenant_id,
|
| 430 |
+
interaction_type="weather_events",
|
| 431 |
+
intent="weather_events",
|
| 432 |
+
response_time_ms=response_time,
|
| 433 |
+
success=True
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
return {
|
| 437 |
+
"response": combined,
|
| 438 |
+
"intent": "weather_events",
|
| 439 |
+
"model_id": "event-weather-agent",
|
| 440 |
+
"tenant_id": tenant_id,
|
| 441 |
+
"user_role": role,
|
| 442 |
+
"response_time_ms": response_time
|
| 443 |
+
}
|
| 444 |
+
|
| 445 |
+
except Exception as e:
|
| 446 |
+
logger.error(f"Error handling weather_events request: {e}")
|
| 447 |
+
|
| 448 |
+
return {
|
| 449 |
+
"response": (
|
| 450 |
+
"I'm having trouble combining weather and events right now. "
|
| 451 |
+
"Let me try to help you with just one or the other! 🤔"
|
| 452 |
+
),
|
| 453 |
+
"intent": "weather_events",
|
| 454 |
+
"model_id": "event-weather-agent",
|
| 455 |
+
"tenant_id": tenant_id,
|
| 456 |
+
"user_role": role,
|
| 457 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 458 |
+
"error": "combined_service_unavailable"
|
| 459 |
+
}
|
| 460 |
+
|
| 461 |
+
|
| 462 |
+
def handle_events_request(
|
| 463 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 464 |
+
) -> dict:
|
| 465 |
+
"""Handle events-only requests."""
|
| 466 |
+
try:
|
| 467 |
+
tool_response = handle_tool_request(user_input, role, tenant_id, "events")
|
| 468 |
+
end_time = time.time()
|
| 469 |
+
|
| 470 |
+
return {
|
| 471 |
+
"response": tool_response.get("response"),
|
| 472 |
+
"intent": "events",
|
| 473 |
+
"model_id": "event-agent",
|
| 474 |
+
"tenant_id": tool_response.get("city", tenant_id),
|
| 475 |
+
"user_role": role,
|
| 476 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
except Exception as e:
|
| 480 |
+
logger.error(f"Error handling events request: {e}")
|
| 481 |
+
|
| 482 |
+
return {
|
| 483 |
+
"response": (
|
| 484 |
+
"I'm having trouble finding events right now. "
|
| 485 |
+
"Let me know what you're interested in and I'll do my best! 🎭"
|
| 486 |
+
),
|
| 487 |
+
"intent": "events",
|
| 488 |
+
"model_id": "event-agent",
|
| 489 |
+
"tenant_id": tenant_id,
|
| 490 |
+
"user_role": role,
|
| 491 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 492 |
+
"error": "events_service_unavailable"
|
| 493 |
+
}
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
def handle_tool_based_request(
|
| 497 |
+
user_input: str, intent: str, tenant_id: str, role: str, start_time: float
|
| 498 |
+
) -> dict:
|
| 499 |
+
"""Handle tool-based requests (transit, alerts, resources, etc.)."""
|
| 500 |
+
try:
|
| 501 |
+
tool_response = handle_tool_request(user_input, role, tenant_id, intent)
|
| 502 |
+
end_time = time.time()
|
| 503 |
+
|
| 504 |
+
return {
|
| 505 |
+
"response": tool_response.get("response"),
|
| 506 |
+
"intent": str(intent),
|
| 507 |
+
"model_id": tool_response.get("tool", "tool-agent"),
|
| 508 |
+
"tenant_id": tool_response.get("city", tenant_id),
|
| 509 |
+
"user_role": role,
|
| 510 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 511 |
+
}
|
| 512 |
+
|
| 513 |
+
except Exception as e:
|
| 514 |
+
logger.error(f"Error handling tool request for {intent}: {e}")
|
| 515 |
+
|
| 516 |
+
return {
|
| 517 |
+
"response": (
|
| 518 |
+
f"I'm having trouble with that {intent} request right now. "
|
| 519 |
+
"Could you try again or ask me something else? 💛"
|
| 520 |
+
),
|
| 521 |
+
"intent": str(intent),
|
| 522 |
+
"model_id": "tool-agent",
|
| 523 |
+
"tenant_id": tenant_id,
|
| 524 |
+
"user_role": role,
|
| 525 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 526 |
+
"error": f"{intent}_service_unavailable"
|
| 527 |
+
}
|
| 528 |
+
|
| 529 |
+
|
| 530 |
+
def handle_translation_request(
|
| 531 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 532 |
+
) -> dict:
|
| 533 |
+
"""Handle translation requests."""
|
| 534 |
+
model_id = "penny-translate-agent"
|
| 535 |
+
|
| 536 |
+
try:
|
| 537 |
+
model = models.get(model_id)
|
| 538 |
+
if not model:
|
| 539 |
+
raise ValueError(f"Translation model not found: {model_id}")
|
| 540 |
+
|
| 541 |
+
result = model.predict(user_input)
|
| 542 |
+
end_time = time.time()
|
| 543 |
+
|
| 544 |
+
return {
|
| 545 |
+
"response": result,
|
| 546 |
+
"intent": "translation",
|
| 547 |
+
"model_id": model_id,
|
| 548 |
+
"tenant_id": tenant_id,
|
| 549 |
+
"user_role": role,
|
| 550 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 551 |
+
}
|
| 552 |
+
|
| 553 |
+
except Exception as e:
|
| 554 |
+
logger.error(f"Error handling translation request: {e}")
|
| 555 |
+
|
| 556 |
+
return {
|
| 557 |
+
"response": (
|
| 558 |
+
"I'm having trouble with translation right now. "
|
| 559 |
+
"Which language would you like help with? 🌍"
|
| 560 |
+
),
|
| 561 |
+
"intent": "translation",
|
| 562 |
+
"model_id": model_id,
|
| 563 |
+
"tenant_id": tenant_id,
|
| 564 |
+
"user_role": role,
|
| 565 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 566 |
+
"error": "translation_service_unavailable"
|
| 567 |
+
}
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def handle_document_request(
|
| 571 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 572 |
+
) -> dict:
|
| 573 |
+
"""Handle document/PDF processing requests."""
|
| 574 |
+
model_id = "penny-doc-agent"
|
| 575 |
+
|
| 576 |
+
try:
|
| 577 |
+
model = models.get(model_id)
|
| 578 |
+
if not model:
|
| 579 |
+
raise ValueError(f"Document model not found: {model_id}")
|
| 580 |
+
|
| 581 |
+
result = model.predict(user_input)
|
| 582 |
+
end_time = time.time()
|
| 583 |
+
|
| 584 |
+
return {
|
| 585 |
+
"response": result,
|
| 586 |
+
"intent": "document",
|
| 587 |
+
"model_id": model_id,
|
| 588 |
+
"tenant_id": tenant_id,
|
| 589 |
+
"user_role": role,
|
| 590 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 591 |
+
}
|
| 592 |
+
|
| 593 |
+
except Exception as e:
|
| 594 |
+
logger.error(f"Error handling document request: {e}")
|
| 595 |
+
|
| 596 |
+
return {
|
| 597 |
+
"response": (
|
| 598 |
+
"I'm having trouble processing documents right now. "
|
| 599 |
+
"What kind of form or document do you need help with? 📄"
|
| 600 |
+
),
|
| 601 |
+
"intent": "document",
|
| 602 |
+
"model_id": model_id,
|
| 603 |
+
"tenant_id": tenant_id,
|
| 604 |
+
"user_role": role,
|
| 605 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 606 |
+
"error": "document_service_unavailable"
|
| 607 |
+
}
|
| 608 |
+
|
| 609 |
+
|
| 610 |
+
def handle_sentiment_request(
|
| 611 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 612 |
+
) -> dict:
|
| 613 |
+
"""Handle sentiment analysis requests."""
|
| 614 |
+
model_id = "penny-sentiment-agent"
|
| 615 |
+
|
| 616 |
+
try:
|
| 617 |
+
model = models.get(model_id)
|
| 618 |
+
if not model:
|
| 619 |
+
raise ValueError(f"Sentiment model not found: {model_id}")
|
| 620 |
+
|
| 621 |
+
result = model.predict(user_input)
|
| 622 |
+
end_time = time.time()
|
| 623 |
+
|
| 624 |
+
return {
|
| 625 |
+
"response": result,
|
| 626 |
+
"intent": "sentiment",
|
| 627 |
+
"model_id": model_id,
|
| 628 |
+
"tenant_id": tenant_id,
|
| 629 |
+
"user_role": role,
|
| 630 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 631 |
+
}
|
| 632 |
+
|
| 633 |
+
except Exception as e:
|
| 634 |
+
logger.error(f"Error handling sentiment request: {e}")
|
| 635 |
+
|
| 636 |
+
return {
|
| 637 |
+
"response": (
|
| 638 |
+
"I'm having trouble analyzing sentiment right now. "
|
| 639 |
+
"How are you feeling about things? 💭"
|
| 640 |
+
),
|
| 641 |
+
"intent": "sentiment",
|
| 642 |
+
"model_id": model_id,
|
| 643 |
+
"tenant_id": tenant_id,
|
| 644 |
+
"user_role": role,
|
| 645 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 646 |
+
"error": "sentiment_service_unavailable"
|
| 647 |
+
}
|
| 648 |
+
|
| 649 |
+
|
| 650 |
+
def handle_bias_request(
|
| 651 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 652 |
+
) -> dict:
|
| 653 |
+
"""Handle bias detection requests."""
|
| 654 |
+
model_id = "penny-bias-checker"
|
| 655 |
+
|
| 656 |
+
try:
|
| 657 |
+
model = models.get(model_id)
|
| 658 |
+
if not model:
|
| 659 |
+
raise ValueError(f"Bias model not found: {model_id}")
|
| 660 |
+
|
| 661 |
+
result = model.predict(user_input)
|
| 662 |
+
end_time = time.time()
|
| 663 |
+
|
| 664 |
+
return {
|
| 665 |
+
"response": result,
|
| 666 |
+
"intent": "bias_check",
|
| 667 |
+
"model_id": model_id,
|
| 668 |
+
"tenant_id": tenant_id,
|
| 669 |
+
"user_role": role,
|
| 670 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 671 |
+
}
|
| 672 |
+
|
| 673 |
+
except Exception as e:
|
| 674 |
+
logger.error(f"Error handling bias request: {e}")
|
| 675 |
+
|
| 676 |
+
return {
|
| 677 |
+
"response": (
|
| 678 |
+
"I'm having trouble checking for bias right now. "
|
| 679 |
+
"What content would you like me to review? ⚖️"
|
| 680 |
+
),
|
| 681 |
+
"intent": "bias_check",
|
| 682 |
+
"model_id": model_id,
|
| 683 |
+
"tenant_id": tenant_id,
|
| 684 |
+
"user_role": role,
|
| 685 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 686 |
+
"error": "bias_service_unavailable"
|
| 687 |
+
}
|
| 688 |
+
|
| 689 |
+
|
| 690 |
+
def handle_general_request(
|
| 691 |
+
user_input: str, tenant_id: str, role: str, start_time: float
|
| 692 |
+
) -> dict:
|
| 693 |
+
"""Handle general/fallback requests."""
|
| 694 |
+
model_id = "penny-core-agent"
|
| 695 |
+
|
| 696 |
+
try:
|
| 697 |
+
model = models.get(model_id)
|
| 698 |
+
if not model:
|
| 699 |
+
raise ValueError(f"Core model not found: {model_id}")
|
| 700 |
+
|
| 701 |
+
result = model.predict(user_input)
|
| 702 |
+
end_time = time.time()
|
| 703 |
+
|
| 704 |
+
return {
|
| 705 |
+
"response": result,
|
| 706 |
+
"intent": "general",
|
| 707 |
+
"model_id": model_id,
|
| 708 |
+
"tenant_id": tenant_id,
|
| 709 |
+
"user_role": role,
|
| 710 |
+
"response_time_ms": round((end_time - start_time) * 1000)
|
| 711 |
+
}
|
| 712 |
+
|
| 713 |
+
except Exception as e:
|
| 714 |
+
logger.error(f"Error handling general request: {e}")
|
| 715 |
+
|
| 716 |
+
return {
|
| 717 |
+
"response": (
|
| 718 |
+
"I'm having some technical difficulties right now. "
|
| 719 |
+
"Can you try asking your question in a different way? "
|
| 720 |
+
"Or let me know if you need help with weather, events, or services! 💛"
|
| 721 |
+
),
|
| 722 |
+
"intent": "general",
|
| 723 |
+
"model_id": model_id,
|
| 724 |
+
"tenant_id": tenant_id,
|
| 725 |
+
"user_role": role,
|
| 726 |
+
"response_time_ms": round((time.time() - start_time) * 1000),
|
| 727 |
+
"error": "general_service_unavailable"
|
| 728 |
+
}
|
| 729 |
+
|
| 730 |
+
|
| 731 |
+
@router.post("/chat", response_model=Dict[str, Any])
|
| 732 |
+
async def chat_endpoint(payload: Dict[str, Any]) -> JSONResponse:
|
| 733 |
+
"""
|
| 734 |
+
💬 Main chat endpoint for Penny.
|
| 735 |
+
|
| 736 |
+
Processes user requests and routes them to appropriate handlers.
|
| 737 |
+
|
| 738 |
+
Args:
|
| 739 |
+
payload: Request payload with 'input', 'tenant_id', 'lat', 'lon', etc.
|
| 740 |
+
|
| 741 |
+
Returns:
|
| 742 |
+
JSONResponse with Penny's response
|
| 743 |
+
"""
|
| 744 |
+
try:
|
| 745 |
+
result = route_request(payload)
|
| 746 |
+
return JSONResponse(status_code=200, content=result)
|
| 747 |
+
except Exception as e:
|
| 748 |
+
logger.error(f"Error in chat endpoint: {e}", exc_info=True)
|
| 749 |
+
return JSONResponse(
|
| 750 |
+
status_code=500,
|
| 751 |
+
content={
|
| 752 |
+
"error": "I'm having trouble processing that right now. Please try again! 💛",
|
| 753 |
+
"detail": str(e) if os.getenv("DEBUG_MODE", "false").lower() == "true" else None
|
| 754 |
+
}
|
| 755 |
+
)
|
| 756 |
+
|
| 757 |
+
|
| 758 |
+
@router.get("/health/router", response_model=Dict[str, Any])
|
| 759 |
+
async def router_health_endpoint() -> JSONResponse:
|
| 760 |
+
"""
|
| 761 |
+
📊 Router health check endpoint.
|
| 762 |
+
|
| 763 |
+
Returns:
|
| 764 |
+
Health status of the router component
|
| 765 |
+
"""
|
| 766 |
+
try:
|
| 767 |
+
health = get_router_health()
|
| 768 |
+
return JSONResponse(status_code=200, content=health)
|
| 769 |
+
except Exception as e:
|
| 770 |
+
logger.error(f"Router health check failed: {e}")
|
| 771 |
+
return JSONResponse(
|
| 772 |
+
status_code=500,
|
| 773 |
+
content={
|
| 774 |
+
"status": "degraded",
|
| 775 |
+
"error": str(e)
|
| 776 |
+
}
|
| 777 |
+
)
|
| 778 |
+
|
| 779 |
+
|
| 780 |
+
def get_router_health() -> dict:
|
| 781 |
+
"""
|
| 782 |
+
Check router health status.
|
| 783 |
+
|
| 784 |
+
Returns:
|
| 785 |
+
Health status dictionary
|
| 786 |
+
"""
|
| 787 |
+
try:
|
| 788 |
+
return {
|
| 789 |
+
"status": "operational",
|
| 790 |
+
"model_loader": "initialized" if models else "not_initialized",
|
| 791 |
+
"supported_languages": len(SUPPORTED_LANGUAGES),
|
| 792 |
+
"routing_capabilities": [
|
| 793 |
+
"weather", "events", "weather_events", "translation",
|
| 794 |
+
"documents", "sentiment", "bias_detection", "general"
|
| 795 |
+
]
|
| 796 |
+
}
|
| 797 |
+
except Exception as e:
|
| 798 |
+
logger.error(f"Router health check failed: {e}")
|
| 799 |
+
return {
|
| 800 |
+
"status": "degraded",
|
| 801 |
+
"error": str(e)
|
| 802 |
+
}
|
seattle_wa.json
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"city": "Seattle",
|
| 3 |
+
"state": "Washington",
|
| 4 |
+
"tenant_id": "seattle_wa",
|
| 5 |
+
"legal_notes": {
|
| 6 |
+
"behavioral_health": "allowed",
|
| 7 |
+
"csb_authority": "city_department",
|
| 8 |
+
"events_source": "city_official_site",
|
| 9 |
+
"transit_source": "city",
|
| 10 |
+
"trash_source": "city",
|
| 11 |
+
"public_safety_source": "city",
|
| 12 |
+
"warming_center_authority": "city",
|
| 13 |
+
"sexual_health_authority": "city"
|
| 14 |
+
},
|
| 15 |
+
"official_links": {
|
| 16 |
+
"city_homepage": "https://www.seattle.gov/",
|
| 17 |
+
"events_calendar": "https://visitseattle.org/things-to-do/events/",
|
| 18 |
+
"public_health": "https://www.seattle.gov/human-services",
|
| 19 |
+
"transit": "https://kingcounty.gov/depts/transportation/metro.aspx",
|
| 20 |
+
"libraries": "https://www.spl.org/",
|
| 21 |
+
"parks_and_recreation": "https://www.seattle.gov/parks",
|
| 22 |
+
"emergency_management": "https://www.seattle.gov/emergency-management",
|
| 23 |
+
"trash": "https://www.seattle.gov/utilities/your-services/collection"
|
| 24 |
+
},
|
| 25 |
+
"services": {
|
| 26 |
+
"behavioral_health": {
|
| 27 |
+
"allowed": true,
|
| 28 |
+
"authority": "city_department",
|
| 29 |
+
"resources": [
|
| 30 |
+
{
|
| 31 |
+
"name": "Seattle Human Services Department (HSD)",
|
| 32 |
+
"link": "https://www.seattle.gov/human-services",
|
| 33 |
+
"phone": "206-684-0500",
|
| 34 |
+
"notes": "HSD coordinates city behavioral health and homelessness services. Penny should use this as the primary referral point for non-emergency help."
|
| 35 |
+
},
|
| 36 |
+
{
|
| 37 |
+
"name": "King County Crisis Connections",
|
| 38 |
+
"link": "https://www.crisisconnections.org/",
|
| 39 |
+
"phone": "866-427-4747",
|
| 40 |
+
"notes": "24/7 county-wide crisis line for mental health and substance abuse issues. Essential for immediate, non-911 crises."
|
| 41 |
+
}
|
| 42 |
+
]
|
| 43 |
+
},
|
| 44 |
+
"sexual_health": {
|
| 45 |
+
"allowed": true,
|
| 46 |
+
"authority": "city",
|
| 47 |
+
"resources": [
|
| 48 |
+
{
|
| 49 |
+
"name": "Public Health – Seattle & King County (Sexual Health Clinic)",
|
| 50 |
+
"link": "https://kingcounty.gov/depts/health/sexual-health.aspx",
|
| 51 |
+
"phone": "206-477-8000",
|
| 52 |
+
"notes": "Official county authority providing confidential STD/HIV testing, treatment, and prevention services in the Seattle area."
|
| 53 |
+
}
|
| 54 |
+
]
|
| 55 |
+
},
|
| 56 |
+
"warming_and_cooling_centers": {
|
| 57 |
+
"allowed": true,
|
| 58 |
+
"authority": "city",
|
| 59 |
+
"resources": [
|
| 60 |
+
{
|
| 61 |
+
"name": "Seattle Community Centers",
|
| 62 |
+
"address": "Varies by activation",
|
| 63 |
+
"season": "winter/summer",
|
| 64 |
+
"link": "https://www.seattle.gov/emergency-management/prepare/stay-safe/extreme-heat-and-cold",
|
| 65 |
+
"notes": "The City activates various libraries and community centers as warming shelters during extreme cold and cooling centers during extreme heat. Penny must advise residents to check the link for the official activation status."
|
| 66 |
+
}
|
| 67 |
+
]
|
| 68 |
+
},
|
| 69 |
+
"trash_and_recycling": {
|
| 70 |
+
"authority": "city",
|
| 71 |
+
"pickup_days": "Varies by address. Schedules and recycling rules are managed by Seattle Public Utilities (SPU).",
|
| 72 |
+
"holiday_schedule_link": "https://www.seattle.gov/utilities/your-services/collection/holiday-schedule"
|
| 73 |
+
},
|
| 74 |
+
"transit": {
|
| 75 |
+
"authority": "county",
|
| 76 |
+
"provider": "King County Metro",
|
| 77 |
+
"routes_link": "https://kingcounty.gov/depts/transportation/metro/schedules-routes.aspx",
|
| 78 |
+
"planner_link": "https://kingcounty.gov/depts/transportation/metro/travel-options.aspx"
|
| 79 |
+
},
|
| 80 |
+
"emergency": {
|
| 81 |
+
"authority": "city",
|
| 82 |
+
"alerts_link": "https://www.seattle.gov/emergency-management/alert-seattle",
|
| 83 |
+
"non_emergency_phone": "206-625-5011 (Police Non Emergency)",
|
| 84 |
+
"emergency_management_link": "https://www.seattle.gov/emergency-management"
|
| 85 |
+
},
|
| 86 |
+
"libraries": {
|
| 87 |
+
"authority": "city",
|
| 88 |
+
"resources": [
|
| 89 |
+
{
|
| 90 |
+
"branch": "The Seattle Public Library – Central Library",
|
| 91 |
+
"address": "1000 4th Ave, Seattle, WA 98104",
|
| 92 |
+
"link": "https://www.spl.org/",
|
| 93 |
+
"notes": "The flagship library offering extensive resources, free computer access, and a safe, public space."
|
| 94 |
+
}
|
| 95 |
+
]
|
| 96 |
+
},
|
| 97 |
+
"community_centers": {
|
| 98 |
+
"authority": "city",
|
| 99 |
+
"resources": [
|
| 100 |
+
{
|
| 101 |
+
"name": "Magnuson Park Community Center",
|
| 102 |
+
"address": "7110 62nd Ave NE, Seattle, WA 98115",
|
| 103 |
+
"link": "https://www.seattle.gov/parks/allparks/magnuson-park",
|
| 104 |
+
"notes": "A large community hub offering diverse recreational programming, classes, and facilities."
|
| 105 |
+
}
|
| 106 |
+
]
|
| 107 |
+
}
|
| 108 |
+
}
|
| 109 |
+
}
|
sentiment_utils.py
ADDED
|
@@ -0,0 +1,396 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/sentiment/sentiment_utils.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Sentiment Analysis Model Utilities for PENNY Project
|
| 5 |
+
Handles text sentiment classification for user input analysis and content moderation.
|
| 6 |
+
Provides async sentiment analysis with structured error handling and logging.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import time
|
| 11 |
+
from typing import Dict, Any, Optional, List
|
| 12 |
+
|
| 13 |
+
# --- Logging Imports ---
|
| 14 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 15 |
+
|
| 16 |
+
# --- Model Loader Import ---
|
| 17 |
+
try:
|
| 18 |
+
from app.model_loader import load_model_pipeline
|
| 19 |
+
MODEL_LOADER_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
MODEL_LOADER_AVAILABLE = False
|
| 22 |
+
import logging
|
| 23 |
+
logging.getLogger(__name__).warning("Could not import load_model_pipeline. Sentiment service unavailable.")
|
| 24 |
+
|
| 25 |
+
# Global variable to store the loaded pipeline for re-use
|
| 26 |
+
SENTIMENT_PIPELINE: Optional[Any] = None
|
| 27 |
+
AGENT_NAME = "penny-sentiment-agent"
|
| 28 |
+
INITIALIZATION_ATTEMPTED = False
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _initialize_sentiment_pipeline() -> bool:
|
| 32 |
+
"""
|
| 33 |
+
Initializes the sentiment pipeline only once.
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
bool: True if initialization succeeded, False otherwise.
|
| 37 |
+
"""
|
| 38 |
+
global SENTIMENT_PIPELINE, INITIALIZATION_ATTEMPTED
|
| 39 |
+
|
| 40 |
+
if INITIALIZATION_ATTEMPTED:
|
| 41 |
+
return SENTIMENT_PIPELINE is not None
|
| 42 |
+
|
| 43 |
+
INITIALIZATION_ATTEMPTED = True
|
| 44 |
+
|
| 45 |
+
if not MODEL_LOADER_AVAILABLE:
|
| 46 |
+
log_interaction(
|
| 47 |
+
intent="sentiment_initialization",
|
| 48 |
+
success=False,
|
| 49 |
+
error="model_loader unavailable"
|
| 50 |
+
)
|
| 51 |
+
return False
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
log_interaction(
|
| 55 |
+
intent="sentiment_initialization",
|
| 56 |
+
success=None,
|
| 57 |
+
details=f"Loading {AGENT_NAME}"
|
| 58 |
+
)
|
| 59 |
+
|
| 60 |
+
SENTIMENT_PIPELINE = load_model_pipeline(AGENT_NAME)
|
| 61 |
+
|
| 62 |
+
if SENTIMENT_PIPELINE is None:
|
| 63 |
+
log_interaction(
|
| 64 |
+
intent="sentiment_initialization",
|
| 65 |
+
success=False,
|
| 66 |
+
error="Pipeline returned None"
|
| 67 |
+
)
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
log_interaction(
|
| 71 |
+
intent="sentiment_initialization",
|
| 72 |
+
success=True,
|
| 73 |
+
details=f"Model {AGENT_NAME} loaded successfully"
|
| 74 |
+
)
|
| 75 |
+
return True
|
| 76 |
+
|
| 77 |
+
except Exception as e:
|
| 78 |
+
log_interaction(
|
| 79 |
+
intent="sentiment_initialization",
|
| 80 |
+
success=False,
|
| 81 |
+
error=str(e)
|
| 82 |
+
)
|
| 83 |
+
return False
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# Attempt initialization at module load
|
| 87 |
+
_initialize_sentiment_pipeline()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def is_sentiment_available() -> bool:
|
| 91 |
+
"""
|
| 92 |
+
Check if sentiment analysis service is available.
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
bool: True if sentiment pipeline is loaded and ready.
|
| 96 |
+
"""
|
| 97 |
+
return SENTIMENT_PIPELINE is not None
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
async def get_sentiment_analysis(
|
| 101 |
+
text: str,
|
| 102 |
+
tenant_id: Optional[str] = None
|
| 103 |
+
) -> Dict[str, Any]:
|
| 104 |
+
"""
|
| 105 |
+
Runs sentiment analysis on the input text using the loaded pipeline.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
text: The string of text to analyze.
|
| 109 |
+
tenant_id: Optional tenant identifier for logging.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
A dictionary containing:
|
| 113 |
+
- label (str): Sentiment label (e.g., "POSITIVE", "NEGATIVE", "NEUTRAL")
|
| 114 |
+
- score (float): Confidence score for the sentiment prediction
|
| 115 |
+
- available (bool): Whether the service was available
|
| 116 |
+
- message (str, optional): Error message if analysis failed
|
| 117 |
+
- response_time_ms (int, optional): Analysis time in milliseconds
|
| 118 |
+
"""
|
| 119 |
+
start_time = time.time()
|
| 120 |
+
|
| 121 |
+
global SENTIMENT_PIPELINE
|
| 122 |
+
|
| 123 |
+
# Check availability
|
| 124 |
+
if not is_sentiment_available():
|
| 125 |
+
log_interaction(
|
| 126 |
+
intent="sentiment_analysis",
|
| 127 |
+
tenant_id=tenant_id,
|
| 128 |
+
success=False,
|
| 129 |
+
error="Sentiment pipeline not available",
|
| 130 |
+
fallback_used=True
|
| 131 |
+
)
|
| 132 |
+
return {
|
| 133 |
+
"label": "UNKNOWN",
|
| 134 |
+
"score": 0.0,
|
| 135 |
+
"available": False,
|
| 136 |
+
"message": "Sentiment analysis is temporarily unavailable."
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
# Validate input
|
| 140 |
+
if not text or not isinstance(text, str):
|
| 141 |
+
log_interaction(
|
| 142 |
+
intent="sentiment_analysis",
|
| 143 |
+
tenant_id=tenant_id,
|
| 144 |
+
success=False,
|
| 145 |
+
error="Invalid text input"
|
| 146 |
+
)
|
| 147 |
+
return {
|
| 148 |
+
"label": "ERROR",
|
| 149 |
+
"score": 0.0,
|
| 150 |
+
"available": True,
|
| 151 |
+
"message": "Invalid text input provided."
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# Check text length (prevent processing extremely long texts)
|
| 155 |
+
if len(text) > 10000: # 10k character limit
|
| 156 |
+
log_interaction(
|
| 157 |
+
intent="sentiment_analysis",
|
| 158 |
+
tenant_id=tenant_id,
|
| 159 |
+
success=False,
|
| 160 |
+
error=f"Text too long: {len(text)} characters",
|
| 161 |
+
text_preview=sanitize_for_logging(text[:100])
|
| 162 |
+
)
|
| 163 |
+
return {
|
| 164 |
+
"label": "ERROR",
|
| 165 |
+
"score": 0.0,
|
| 166 |
+
"available": True,
|
| 167 |
+
"message": "Text is too long for sentiment analysis (max 10,000 characters)."
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
try:
|
| 171 |
+
loop = asyncio.get_event_loop()
|
| 172 |
+
|
| 173 |
+
# Run model inference in thread executor
|
| 174 |
+
# Hugging Face pipelines accept lists and return lists
|
| 175 |
+
results = await loop.run_in_executor(
|
| 176 |
+
None,
|
| 177 |
+
lambda: SENTIMENT_PIPELINE([text])
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 181 |
+
|
| 182 |
+
# Validate results
|
| 183 |
+
if not results or not isinstance(results, list) or len(results) == 0:
|
| 184 |
+
log_interaction(
|
| 185 |
+
intent="sentiment_analysis",
|
| 186 |
+
tenant_id=tenant_id,
|
| 187 |
+
success=False,
|
| 188 |
+
error="Empty or invalid model output",
|
| 189 |
+
response_time_ms=response_time_ms,
|
| 190 |
+
text_preview=sanitize_for_logging(text[:100])
|
| 191 |
+
)
|
| 192 |
+
return {
|
| 193 |
+
"label": "ERROR",
|
| 194 |
+
"score": 0.0,
|
| 195 |
+
"available": True,
|
| 196 |
+
"message": "Sentiment analysis returned unexpected format."
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
result = results[0]
|
| 200 |
+
|
| 201 |
+
# Validate result structure
|
| 202 |
+
if not isinstance(result, dict) or 'label' not in result or 'score' not in result:
|
| 203 |
+
log_interaction(
|
| 204 |
+
intent="sentiment_analysis",
|
| 205 |
+
tenant_id=tenant_id,
|
| 206 |
+
success=False,
|
| 207 |
+
error="Invalid result structure",
|
| 208 |
+
response_time_ms=response_time_ms,
|
| 209 |
+
text_preview=sanitize_for_logging(text[:100])
|
| 210 |
+
)
|
| 211 |
+
return {
|
| 212 |
+
"label": "ERROR",
|
| 213 |
+
"score": 0.0,
|
| 214 |
+
"available": True,
|
| 215 |
+
"message": "Sentiment analysis returned unexpected format."
|
| 216 |
+
}
|
| 217 |
+
|
| 218 |
+
# Log slow analysis
|
| 219 |
+
if response_time_ms > 3000: # 3 seconds
|
| 220 |
+
log_interaction(
|
| 221 |
+
intent="sentiment_analysis_slow",
|
| 222 |
+
tenant_id=tenant_id,
|
| 223 |
+
success=True,
|
| 224 |
+
response_time_ms=response_time_ms,
|
| 225 |
+
details="Slow sentiment analysis detected",
|
| 226 |
+
text_length=len(text)
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
log_interaction(
|
| 230 |
+
intent="sentiment_analysis",
|
| 231 |
+
tenant_id=tenant_id,
|
| 232 |
+
success=True,
|
| 233 |
+
response_time_ms=response_time_ms,
|
| 234 |
+
sentiment_label=result.get('label'),
|
| 235 |
+
sentiment_score=result.get('score'),
|
| 236 |
+
text_length=len(text)
|
| 237 |
+
)
|
| 238 |
+
|
| 239 |
+
return {
|
| 240 |
+
"label": result['label'],
|
| 241 |
+
"score": float(result['score']),
|
| 242 |
+
"available": True,
|
| 243 |
+
"response_time_ms": response_time_ms
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
except asyncio.CancelledError:
|
| 247 |
+
log_interaction(
|
| 248 |
+
intent="sentiment_analysis",
|
| 249 |
+
tenant_id=tenant_id,
|
| 250 |
+
success=False,
|
| 251 |
+
error="Analysis cancelled"
|
| 252 |
+
)
|
| 253 |
+
raise
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 257 |
+
|
| 258 |
+
log_interaction(
|
| 259 |
+
intent="sentiment_analysis",
|
| 260 |
+
tenant_id=tenant_id,
|
| 261 |
+
success=False,
|
| 262 |
+
error=str(e),
|
| 263 |
+
response_time_ms=response_time_ms,
|
| 264 |
+
text_preview=sanitize_for_logging(text[:100]),
|
| 265 |
+
fallback_used=True
|
| 266 |
+
)
|
| 267 |
+
|
| 268 |
+
return {
|
| 269 |
+
"label": "ERROR",
|
| 270 |
+
"score": 0.0,
|
| 271 |
+
"available": False,
|
| 272 |
+
"message": "An error occurred during sentiment analysis.",
|
| 273 |
+
"error": str(e),
|
| 274 |
+
"response_time_ms": response_time_ms
|
| 275 |
+
}
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
async def analyze_sentiment_batch(
|
| 279 |
+
texts: List[str],
|
| 280 |
+
tenant_id: Optional[str] = None
|
| 281 |
+
) -> Dict[str, Any]:
|
| 282 |
+
"""
|
| 283 |
+
Runs sentiment analysis on a batch of texts for efficiency.
|
| 284 |
+
|
| 285 |
+
Args:
|
| 286 |
+
texts: List of text strings to analyze.
|
| 287 |
+
tenant_id: Optional tenant identifier for logging.
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
A dictionary containing:
|
| 291 |
+
- results (list): List of sentiment analysis results for each text
|
| 292 |
+
- available (bool): Whether the service was available
|
| 293 |
+
- total_analyzed (int): Number of texts successfully analyzed
|
| 294 |
+
- response_time_ms (int, optional): Total batch analysis time
|
| 295 |
+
"""
|
| 296 |
+
start_time = time.time()
|
| 297 |
+
|
| 298 |
+
global SENTIMENT_PIPELINE
|
| 299 |
+
|
| 300 |
+
# Check availability
|
| 301 |
+
if not is_sentiment_available():
|
| 302 |
+
log_interaction(
|
| 303 |
+
intent="sentiment_batch_analysis",
|
| 304 |
+
tenant_id=tenant_id,
|
| 305 |
+
success=False,
|
| 306 |
+
error="Sentiment pipeline not available",
|
| 307 |
+
batch_size=len(texts) if texts else 0
|
| 308 |
+
)
|
| 309 |
+
return {
|
| 310 |
+
"results": [],
|
| 311 |
+
"available": False,
|
| 312 |
+
"total_analyzed": 0,
|
| 313 |
+
"message": "Sentiment analysis is temporarily unavailable."
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
# Validate input
|
| 317 |
+
if not texts or not isinstance(texts, list):
|
| 318 |
+
log_interaction(
|
| 319 |
+
intent="sentiment_batch_analysis",
|
| 320 |
+
tenant_id=tenant_id,
|
| 321 |
+
success=False,
|
| 322 |
+
error="Invalid texts input"
|
| 323 |
+
)
|
| 324 |
+
return {
|
| 325 |
+
"results": [],
|
| 326 |
+
"available": True,
|
| 327 |
+
"total_analyzed": 0,
|
| 328 |
+
"message": "Invalid batch input provided."
|
| 329 |
+
}
|
| 330 |
+
|
| 331 |
+
# Filter valid texts and limit batch size
|
| 332 |
+
valid_texts = [t for t in texts if isinstance(t, str) and t.strip()]
|
| 333 |
+
if len(valid_texts) > 100: # Batch size limit
|
| 334 |
+
valid_texts = valid_texts[:100]
|
| 335 |
+
|
| 336 |
+
if not valid_texts:
|
| 337 |
+
log_interaction(
|
| 338 |
+
intent="sentiment_batch_analysis",
|
| 339 |
+
tenant_id=tenant_id,
|
| 340 |
+
success=False,
|
| 341 |
+
error="No valid texts in batch"
|
| 342 |
+
)
|
| 343 |
+
return {
|
| 344 |
+
"results": [],
|
| 345 |
+
"available": True,
|
| 346 |
+
"total_analyzed": 0,
|
| 347 |
+
"message": "No valid texts provided for analysis."
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
try:
|
| 351 |
+
loop = asyncio.get_event_loop()
|
| 352 |
+
|
| 353 |
+
# Run batch inference in thread executor
|
| 354 |
+
results = await loop.run_in_executor(
|
| 355 |
+
None,
|
| 356 |
+
lambda: SENTIMENT_PIPELINE(valid_texts)
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 360 |
+
|
| 361 |
+
log_interaction(
|
| 362 |
+
intent="sentiment_batch_analysis",
|
| 363 |
+
tenant_id=tenant_id,
|
| 364 |
+
success=True,
|
| 365 |
+
response_time_ms=response_time_ms,
|
| 366 |
+
batch_size=len(valid_texts),
|
| 367 |
+
total_analyzed=len(results) if results else 0
|
| 368 |
+
)
|
| 369 |
+
|
| 370 |
+
return {
|
| 371 |
+
"results": results if results else [],
|
| 372 |
+
"available": True,
|
| 373 |
+
"total_analyzed": len(results) if results else 0,
|
| 374 |
+
"response_time_ms": response_time_ms
|
| 375 |
+
}
|
| 376 |
+
|
| 377 |
+
except Exception as e:
|
| 378 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 379 |
+
|
| 380 |
+
log_interaction(
|
| 381 |
+
intent="sentiment_batch_analysis",
|
| 382 |
+
tenant_id=tenant_id,
|
| 383 |
+
success=False,
|
| 384 |
+
error=str(e),
|
| 385 |
+
response_time_ms=response_time_ms,
|
| 386 |
+
batch_size=len(valid_texts)
|
| 387 |
+
)
|
| 388 |
+
|
| 389 |
+
return {
|
| 390 |
+
"results": [],
|
| 391 |
+
"available": False,
|
| 392 |
+
"total_analyzed": 0,
|
| 393 |
+
"message": "An error occurred during batch sentiment analysis.",
|
| 394 |
+
"error": str(e),
|
| 395 |
+
"response_time_ms": response_time_ms
|
| 396 |
+
}
|
tool_agent.py
ADDED
|
@@ -0,0 +1,666 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/tool_agent.py
|
| 2 |
+
"""
|
| 3 |
+
🛠️ PENNY Tool Agent - Civic Data & Services Handler
|
| 4 |
+
|
| 5 |
+
Routes requests to civic data sources (events, resources, transit, etc.)
|
| 6 |
+
and integrates with real-time weather information.
|
| 7 |
+
|
| 8 |
+
MISSION: Connect residents to local civic services by intelligently
|
| 9 |
+
processing their requests and returning relevant, actionable information.
|
| 10 |
+
|
| 11 |
+
FEATURES:
|
| 12 |
+
- Real-time weather integration with outfit recommendations
|
| 13 |
+
- Event discovery with weather-aware suggestions
|
| 14 |
+
- Resource lookup (trash, transit, emergency services)
|
| 15 |
+
- City-specific data routing
|
| 16 |
+
- Graceful fallback for missing data
|
| 17 |
+
|
| 18 |
+
ENHANCEMENTS (Phase 1):
|
| 19 |
+
- ✅ Structured logging with performance tracking
|
| 20 |
+
- ✅ Enhanced error handling with user-friendly messages
|
| 21 |
+
- ✅ Type hints for all functions
|
| 22 |
+
- ✅ Health check integration
|
| 23 |
+
- ✅ Service availability tracking
|
| 24 |
+
- ✅ Integration with enhanced modules
|
| 25 |
+
- ✅ Penny's friendly voice throughout
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
import logging
|
| 29 |
+
import time
|
| 30 |
+
from typing import Optional, Dict, Any
|
| 31 |
+
|
| 32 |
+
# --- ENHANCED MODULE IMPORTS ---
|
| 33 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 34 |
+
|
| 35 |
+
# --- AGENT IMPORTS (with availability tracking) ---
|
| 36 |
+
try:
|
| 37 |
+
from app.weather_agent import (
|
| 38 |
+
get_weather_for_location,
|
| 39 |
+
weather_to_event_recommendations,
|
| 40 |
+
recommend_outfit,
|
| 41 |
+
format_weather_summary
|
| 42 |
+
)
|
| 43 |
+
WEATHER_AGENT_AVAILABLE = True
|
| 44 |
+
except ImportError as e:
|
| 45 |
+
logging.getLogger(__name__).warning(f"Weather agent not available: {e}")
|
| 46 |
+
WEATHER_AGENT_AVAILABLE = False
|
| 47 |
+
|
| 48 |
+
# --- UTILITY IMPORTS (with availability tracking) ---
|
| 49 |
+
try:
|
| 50 |
+
from app.location_utils import (
|
| 51 |
+
extract_city_name,
|
| 52 |
+
load_city_events,
|
| 53 |
+
load_city_resources,
|
| 54 |
+
get_city_coordinates
|
| 55 |
+
)
|
| 56 |
+
LOCATION_UTILS_AVAILABLE = True
|
| 57 |
+
except ImportError as e:
|
| 58 |
+
logging.getLogger(__name__).warning(f"Location utils not available: {e}")
|
| 59 |
+
LOCATION_UTILS_AVAILABLE = False
|
| 60 |
+
|
| 61 |
+
# --- LOGGING SETUP ---
|
| 62 |
+
logger = logging.getLogger(__name__)
|
| 63 |
+
|
| 64 |
+
# --- TRACKING COUNTERS ---
|
| 65 |
+
_tool_request_count = 0
|
| 66 |
+
_weather_request_count = 0
|
| 67 |
+
_event_request_count = 0
|
| 68 |
+
_resource_request_count = 0
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ============================================================
|
| 72 |
+
# MAIN TOOL REQUEST HANDLER (ENHANCED)
|
| 73 |
+
# ============================================================
|
| 74 |
+
|
| 75 |
+
async def handle_tool_request(
|
| 76 |
+
user_input: str,
|
| 77 |
+
role: str = "unknown",
|
| 78 |
+
lat: Optional[float] = None,
|
| 79 |
+
lon: Optional[float] = None
|
| 80 |
+
) -> Dict[str, Any]:
|
| 81 |
+
"""
|
| 82 |
+
🛠️ Handles tool-based actions for civic services.
|
| 83 |
+
|
| 84 |
+
Routes user requests to appropriate civic data sources and real-time
|
| 85 |
+
services, including weather, events, transit, trash, and emergency info.
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
user_input: User's request text
|
| 89 |
+
role: User's role (resident, official, etc.)
|
| 90 |
+
lat: Latitude coordinate (optional)
|
| 91 |
+
lon: Longitude coordinate (optional)
|
| 92 |
+
|
| 93 |
+
Returns:
|
| 94 |
+
Dictionary containing:
|
| 95 |
+
- tool: str (which tool was used)
|
| 96 |
+
- city: str (detected city name)
|
| 97 |
+
- response: str or dict (user-facing response)
|
| 98 |
+
- data: dict (optional, raw data)
|
| 99 |
+
- tenant_id: str (optional, standardized city identifier)
|
| 100 |
+
|
| 101 |
+
Example:
|
| 102 |
+
result = await handle_tool_request(
|
| 103 |
+
user_input="What's the weather in Atlanta?",
|
| 104 |
+
role="resident",
|
| 105 |
+
lat=33.7490,
|
| 106 |
+
lon=-84.3880
|
| 107 |
+
)
|
| 108 |
+
"""
|
| 109 |
+
global _tool_request_count
|
| 110 |
+
_tool_request_count += 1
|
| 111 |
+
|
| 112 |
+
start_time = time.time()
|
| 113 |
+
|
| 114 |
+
# Sanitize input for logging (PII protection)
|
| 115 |
+
safe_input = sanitize_for_logging(user_input)
|
| 116 |
+
logger.info(f"🛠️ Tool request #{_tool_request_count}: '{safe_input[:50]}...'")
|
| 117 |
+
|
| 118 |
+
try:
|
| 119 |
+
# Check if location utilities are available
|
| 120 |
+
if not LOCATION_UTILS_AVAILABLE:
|
| 121 |
+
logger.error("Location utilities not available")
|
| 122 |
+
return {
|
| 123 |
+
"tool": "error",
|
| 124 |
+
"response": (
|
| 125 |
+
"I'm having trouble accessing city data right now. "
|
| 126 |
+
"Try again in a moment! 💛"
|
| 127 |
+
),
|
| 128 |
+
"error": "Location utilities not loaded"
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
lowered = user_input.lower()
|
| 132 |
+
city_name = extract_city_name(user_input)
|
| 133 |
+
|
| 134 |
+
# Standardize tenant ID (e.g., "Atlanta" -> "atlanta_ga")
|
| 135 |
+
# TODO: Enhance city_name extraction to detect state
|
| 136 |
+
tenant_id = f"{city_name.lower().replace(' ', '_')}_ga"
|
| 137 |
+
|
| 138 |
+
logger.info(f"Detected city: {city_name} (tenant_id: {tenant_id})")
|
| 139 |
+
|
| 140 |
+
# Route to appropriate handler
|
| 141 |
+
result = None
|
| 142 |
+
|
| 143 |
+
# Weather queries
|
| 144 |
+
if any(keyword in lowered for keyword in ["weather", "forecast", "temperature", "rain", "sunny"]):
|
| 145 |
+
result = await _handle_weather_query(
|
| 146 |
+
user_input=user_input,
|
| 147 |
+
city_name=city_name,
|
| 148 |
+
tenant_id=tenant_id,
|
| 149 |
+
lat=lat,
|
| 150 |
+
lon=lon
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
# Event queries
|
| 154 |
+
elif any(keyword in lowered for keyword in ["events", "meetings", "city hall", "happening", "activities"]):
|
| 155 |
+
result = await _handle_events_query(
|
| 156 |
+
user_input=user_input,
|
| 157 |
+
city_name=city_name,
|
| 158 |
+
tenant_id=tenant_id,
|
| 159 |
+
lat=lat,
|
| 160 |
+
lon=lon
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Resource queries (trash, transit, emergency)
|
| 164 |
+
elif any(keyword in lowered for keyword in ["trash", "recycling", "garbage", "bus", "train", "schedule", "alert", "warning", "non emergency"]):
|
| 165 |
+
result = await _handle_resource_query(
|
| 166 |
+
user_input=user_input,
|
| 167 |
+
city_name=city_name,
|
| 168 |
+
tenant_id=tenant_id,
|
| 169 |
+
lowered=lowered
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
# Unknown/fallback
|
| 173 |
+
else:
|
| 174 |
+
result = _handle_unknown_query(city_name)
|
| 175 |
+
|
| 176 |
+
# Add metadata and log interaction
|
| 177 |
+
response_time = (time.time() - start_time) * 1000
|
| 178 |
+
result["response_time_ms"] = round(response_time, 2)
|
| 179 |
+
result["role"] = role
|
| 180 |
+
|
| 181 |
+
log_interaction(
|
| 182 |
+
tenant_id=tenant_id,
|
| 183 |
+
interaction_type="tool_request",
|
| 184 |
+
intent=result.get("tool", "unknown"),
|
| 185 |
+
response_time_ms=response_time,
|
| 186 |
+
success=result.get("error") is None,
|
| 187 |
+
metadata={
|
| 188 |
+
"city": city_name,
|
| 189 |
+
"tool": result.get("tool"),
|
| 190 |
+
"role": role,
|
| 191 |
+
"has_location": lat is not None and lon is not None
|
| 192 |
+
}
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
logger.info(
|
| 196 |
+
f"✅ Tool request complete: {result.get('tool')} "
|
| 197 |
+
f"({response_time:.0f}ms)"
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
return result
|
| 201 |
+
|
| 202 |
+
except Exception as e:
|
| 203 |
+
response_time = (time.time() - start_time) * 1000
|
| 204 |
+
logger.error(f"❌ Tool agent error: {e}", exc_info=True)
|
| 205 |
+
|
| 206 |
+
log_interaction(
|
| 207 |
+
tenant_id="unknown",
|
| 208 |
+
interaction_type="tool_error",
|
| 209 |
+
intent="error",
|
| 210 |
+
response_time_ms=response_time,
|
| 211 |
+
success=False,
|
| 212 |
+
metadata={
|
| 213 |
+
"error": str(e),
|
| 214 |
+
"error_type": type(e).__name__
|
| 215 |
+
}
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
return {
|
| 219 |
+
"tool": "error",
|
| 220 |
+
"response": (
|
| 221 |
+
"I ran into trouble processing that request. "
|
| 222 |
+
"Could you try rephrasing? 💛"
|
| 223 |
+
),
|
| 224 |
+
"error": str(e),
|
| 225 |
+
"response_time_ms": round(response_time, 2)
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
# ============================================================
|
| 230 |
+
# WEATHER QUERY HANDLER (ENHANCED)
|
| 231 |
+
# ============================================================
|
| 232 |
+
|
| 233 |
+
async def _handle_weather_query(
|
| 234 |
+
user_input: str,
|
| 235 |
+
city_name: str,
|
| 236 |
+
tenant_id: str,
|
| 237 |
+
lat: Optional[float],
|
| 238 |
+
lon: Optional[float]
|
| 239 |
+
) -> Dict[str, Any]:
|
| 240 |
+
"""
|
| 241 |
+
🌤️ Handles weather-related queries with outfit recommendations.
|
| 242 |
+
"""
|
| 243 |
+
global _weather_request_count
|
| 244 |
+
_weather_request_count += 1
|
| 245 |
+
|
| 246 |
+
logger.info(f"🌤️ Weather query #{_weather_request_count} for {city_name}")
|
| 247 |
+
|
| 248 |
+
# Check weather agent availability
|
| 249 |
+
if not WEATHER_AGENT_AVAILABLE:
|
| 250 |
+
logger.warning("Weather agent not available")
|
| 251 |
+
return {
|
| 252 |
+
"tool": "weather",
|
| 253 |
+
"city": city_name,
|
| 254 |
+
"response": "Weather service isn't available right now. Try again soon! 🌤️"
|
| 255 |
+
}
|
| 256 |
+
|
| 257 |
+
# Get coordinates if not provided
|
| 258 |
+
if lat is None or lon is None:
|
| 259 |
+
coords = get_city_coordinates(tenant_id)
|
| 260 |
+
if coords:
|
| 261 |
+
lat, lon = coords["lat"], coords["lon"]
|
| 262 |
+
logger.info(f"Using city coordinates: {lat}, {lon}")
|
| 263 |
+
|
| 264 |
+
if lat is None or lon is None:
|
| 265 |
+
return {
|
| 266 |
+
"tool": "weather",
|
| 267 |
+
"city": city_name,
|
| 268 |
+
"response": (
|
| 269 |
+
f"To get weather for {city_name}, I need location coordinates. "
|
| 270 |
+
f"Can you share your location? 📍"
|
| 271 |
+
)
|
| 272 |
+
}
|
| 273 |
+
|
| 274 |
+
try:
|
| 275 |
+
# Fetch weather data
|
| 276 |
+
weather = await get_weather_for_location(lat, lon)
|
| 277 |
+
|
| 278 |
+
# Get weather-based event recommendations
|
| 279 |
+
recommendations = weather_to_event_recommendations(weather)
|
| 280 |
+
|
| 281 |
+
# Get outfit recommendation
|
| 282 |
+
temp = weather.get("temperature", {}).get("value", 70)
|
| 283 |
+
phrase = weather.get("phrase", "Clear")
|
| 284 |
+
outfit = recommend_outfit(temp, phrase)
|
| 285 |
+
|
| 286 |
+
# Format weather summary
|
| 287 |
+
weather_summary = format_weather_summary(weather)
|
| 288 |
+
|
| 289 |
+
# Build user-friendly response
|
| 290 |
+
response_text = (
|
| 291 |
+
f"🌤️ **Weather for {city_name}:**\n"
|
| 292 |
+
f"{weather_summary}\n\n"
|
| 293 |
+
f"���� **What to wear:** {outfit}"
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# Add event recommendations if available
|
| 297 |
+
if recommendations:
|
| 298 |
+
rec = recommendations[0] # Get top recommendation
|
| 299 |
+
response_text += f"\n\n📅 **Activity suggestion:** {rec['reason']}"
|
| 300 |
+
|
| 301 |
+
return {
|
| 302 |
+
"tool": "weather",
|
| 303 |
+
"city": city_name,
|
| 304 |
+
"tenant_id": tenant_id,
|
| 305 |
+
"response": response_text,
|
| 306 |
+
"data": {
|
| 307 |
+
"weather": weather,
|
| 308 |
+
"recommendations": recommendations,
|
| 309 |
+
"outfit": outfit
|
| 310 |
+
}
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
except Exception as e:
|
| 314 |
+
logger.error(f"Weather query error: {e}", exc_info=True)
|
| 315 |
+
return {
|
| 316 |
+
"tool": "weather",
|
| 317 |
+
"city": city_name,
|
| 318 |
+
"response": (
|
| 319 |
+
f"I couldn't get the weather for {city_name} right now. "
|
| 320 |
+
f"Try again in a moment! 🌤️"
|
| 321 |
+
),
|
| 322 |
+
"error": str(e)
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
|
| 326 |
+
# ============================================================
|
| 327 |
+
# EVENTS QUERY HANDLER (ENHANCED)
|
| 328 |
+
# ============================================================
|
| 329 |
+
|
| 330 |
+
async def _handle_events_query(
|
| 331 |
+
user_input: str,
|
| 332 |
+
city_name: str,
|
| 333 |
+
tenant_id: str,
|
| 334 |
+
lat: Optional[float],
|
| 335 |
+
lon: Optional[float]
|
| 336 |
+
) -> Dict[str, Any]:
|
| 337 |
+
"""
|
| 338 |
+
📅 Handles event discovery queries.
|
| 339 |
+
"""
|
| 340 |
+
global _event_request_count
|
| 341 |
+
_event_request_count += 1
|
| 342 |
+
|
| 343 |
+
logger.info(f"📅 Event query #{_event_request_count} for {city_name}")
|
| 344 |
+
|
| 345 |
+
try:
|
| 346 |
+
# Load structured event data
|
| 347 |
+
event_data = load_city_events(tenant_id)
|
| 348 |
+
events = event_data.get("events", [])
|
| 349 |
+
num_events = len(events)
|
| 350 |
+
|
| 351 |
+
if num_events == 0:
|
| 352 |
+
return {
|
| 353 |
+
"tool": "civic_events",
|
| 354 |
+
"city": city_name,
|
| 355 |
+
"tenant_id": tenant_id,
|
| 356 |
+
"response": (
|
| 357 |
+
f"I don't have any upcoming events for {city_name} right now. "
|
| 358 |
+
f"Check back soon! 📅"
|
| 359 |
+
)
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
# Get top event
|
| 363 |
+
top_event = events[0]
|
| 364 |
+
top_event_name = top_event.get("name", "Upcoming event")
|
| 365 |
+
|
| 366 |
+
# Build response
|
| 367 |
+
if num_events == 1:
|
| 368 |
+
response_text = (
|
| 369 |
+
f"📅 **Upcoming event in {city_name}:**\n"
|
| 370 |
+
f"• {top_event_name}\n\n"
|
| 371 |
+
f"Check the full details in the attached data!"
|
| 372 |
+
)
|
| 373 |
+
else:
|
| 374 |
+
response_text = (
|
| 375 |
+
f"📅 **Found {num_events} upcoming events in {city_name}!**\n"
|
| 376 |
+
f"Top event: {top_event_name}\n\n"
|
| 377 |
+
f"Check the full list in the attached data!"
|
| 378 |
+
)
|
| 379 |
+
|
| 380 |
+
return {
|
| 381 |
+
"tool": "civic_events",
|
| 382 |
+
"city": city_name,
|
| 383 |
+
"tenant_id": tenant_id,
|
| 384 |
+
"response": response_text,
|
| 385 |
+
"data": event_data
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
except FileNotFoundError:
|
| 389 |
+
logger.warning(f"Event data file not found for {tenant_id}")
|
| 390 |
+
return {
|
| 391 |
+
"tool": "civic_events",
|
| 392 |
+
"city": city_name,
|
| 393 |
+
"response": (
|
| 394 |
+
f"Event data for {city_name} isn't available yet. "
|
| 395 |
+
f"I'm still learning about events in your area! 📅"
|
| 396 |
+
),
|
| 397 |
+
"error": "Event data file not found"
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
except Exception as e:
|
| 401 |
+
logger.error(f"Events query error: {e}", exc_info=True)
|
| 402 |
+
return {
|
| 403 |
+
"tool": "civic_events",
|
| 404 |
+
"city": city_name,
|
| 405 |
+
"response": (
|
| 406 |
+
f"I had trouble loading events for {city_name}. "
|
| 407 |
+
f"Try again soon! 📅"
|
| 408 |
+
),
|
| 409 |
+
"error": str(e)
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
|
| 413 |
+
# ============================================================
|
| 414 |
+
# RESOURCE QUERY HANDLER (ENHANCED)
|
| 415 |
+
# ============================================================
|
| 416 |
+
|
| 417 |
+
async def _handle_resource_query(
|
| 418 |
+
user_input: str,
|
| 419 |
+
city_name: str,
|
| 420 |
+
tenant_id: str,
|
| 421 |
+
lowered: str
|
| 422 |
+
) -> Dict[str, Any]:
|
| 423 |
+
"""
|
| 424 |
+
♻️ Handles resource queries (trash, transit, emergency).
|
| 425 |
+
"""
|
| 426 |
+
global _resource_request_count
|
| 427 |
+
_resource_request_count += 1
|
| 428 |
+
|
| 429 |
+
logger.info(f"♻️ Resource query #{_resource_request_count} for {city_name}")
|
| 430 |
+
|
| 431 |
+
# Map keywords to resource types
|
| 432 |
+
resource_query_map = {
|
| 433 |
+
"trash": "trash_and_recycling",
|
| 434 |
+
"recycling": "trash_and_recycling",
|
| 435 |
+
"garbage": "trash_and_recycling",
|
| 436 |
+
"bus": "transit",
|
| 437 |
+
"train": "transit",
|
| 438 |
+
"schedule": "transit",
|
| 439 |
+
"alert": "emergency",
|
| 440 |
+
"warning": "emergency",
|
| 441 |
+
"non emergency": "emergency"
|
| 442 |
+
}
|
| 443 |
+
|
| 444 |
+
# Find matching resource type
|
| 445 |
+
resource_key = next(
|
| 446 |
+
(resource_query_map[key] for key in resource_query_map if key in lowered),
|
| 447 |
+
None
|
| 448 |
+
)
|
| 449 |
+
|
| 450 |
+
if not resource_key:
|
| 451 |
+
return {
|
| 452 |
+
"tool": "unknown",
|
| 453 |
+
"city": city_name,
|
| 454 |
+
"response": (
|
| 455 |
+
"I'm not sure which resource you're asking about. "
|
| 456 |
+
"Try asking about trash, transit, or emergency services! 💬"
|
| 457 |
+
)
|
| 458 |
+
}
|
| 459 |
+
|
| 460 |
+
try:
|
| 461 |
+
# Load structured resource data
|
| 462 |
+
resource_data = load_city_resources(tenant_id)
|
| 463 |
+
service_info = resource_data["services"].get(resource_key, {})
|
| 464 |
+
|
| 465 |
+
if not service_info:
|
| 466 |
+
return {
|
| 467 |
+
"tool": resource_key,
|
| 468 |
+
"city": city_name,
|
| 469 |
+
"response": (
|
| 470 |
+
f"I don't have {resource_key.replace('_', ' ')} information "
|
| 471 |
+
f"for {city_name} yet. Check the city's official website! 🏛️"
|
| 472 |
+
)
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
# Build resource-specific response
|
| 476 |
+
if resource_key == "trash_and_recycling":
|
| 477 |
+
pickup_days = service_info.get('pickup_days', 'Varies by address')
|
| 478 |
+
response_text = (
|
| 479 |
+
f"♻️ **Trash & Recycling for {city_name}:**\n"
|
| 480 |
+
f"Pickup days: {pickup_days}\n\n"
|
| 481 |
+
f"Check the official link for your specific schedule!"
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
elif resource_key == "transit":
|
| 485 |
+
provider = service_info.get('provider', 'The local transit authority')
|
| 486 |
+
response_text = (
|
| 487 |
+
f"🚌 **Transit for {city_name}:**\n"
|
| 488 |
+
f"Provider: {provider}\n\n"
|
| 489 |
+
f"Use the provided links to find routes and schedules!"
|
| 490 |
+
)
|
| 491 |
+
|
| 492 |
+
elif resource_key == "emergency":
|
| 493 |
+
non_emergency = service_info.get('non_emergency_phone', 'N/A')
|
| 494 |
+
response_text = (
|
| 495 |
+
f"🚨 **Emergency Info for {city_name}:**\n"
|
| 496 |
+
f"Non-emergency: {non_emergency}\n\n"
|
| 497 |
+
f"**For life-threatening emergencies, always call 911.**"
|
| 498 |
+
)
|
| 499 |
+
|
| 500 |
+
else:
|
| 501 |
+
response_text = f"Information found for {resource_key.replace('_', ' ')}, but details aren't available yet."
|
| 502 |
+
|
| 503 |
+
return {
|
| 504 |
+
"tool": resource_key,
|
| 505 |
+
"city": city_name,
|
| 506 |
+
"tenant_id": tenant_id,
|
| 507 |
+
"response": response_text,
|
| 508 |
+
"data": service_info
|
| 509 |
+
}
|
| 510 |
+
|
| 511 |
+
except FileNotFoundError:
|
| 512 |
+
logger.warning(f"Resource data file not found for {tenant_id}")
|
| 513 |
+
return {
|
| 514 |
+
"tool": "resource_loader",
|
| 515 |
+
"city": city_name,
|
| 516 |
+
"response": (
|
| 517 |
+
f"Resource data for {city_name} isn't available yet. "
|
| 518 |
+
f"Check back soon! 🏛️"
|
| 519 |
+
),
|
| 520 |
+
"error": "Resource data file not found"
|
| 521 |
+
}
|
| 522 |
+
|
| 523 |
+
except Exception as e:
|
| 524 |
+
logger.error(f"Resource query error: {e}", exc_info=True)
|
| 525 |
+
return {
|
| 526 |
+
"tool": "resource_loader",
|
| 527 |
+
"city": city_name,
|
| 528 |
+
"response": (
|
| 529 |
+
f"I had trouble loading resource data for {city_name}. "
|
| 530 |
+
f"Try again soon! 🏛️"
|
| 531 |
+
),
|
| 532 |
+
"error": str(e)
|
| 533 |
+
}
|
| 534 |
+
|
| 535 |
+
|
| 536 |
+
# ============================================================
|
| 537 |
+
# UNKNOWN QUERY HANDLER
|
| 538 |
+
# ============================================================
|
| 539 |
+
|
| 540 |
+
def _handle_unknown_query(city_name: str) -> Dict[str, Any]:
|
| 541 |
+
"""
|
| 542 |
+
❓ Fallback for queries that don't match any tool.
|
| 543 |
+
"""
|
| 544 |
+
logger.info(f"❓ Unknown query for {city_name}")
|
| 545 |
+
|
| 546 |
+
return {
|
| 547 |
+
"tool": "unknown",
|
| 548 |
+
"city": city_name,
|
| 549 |
+
"response": (
|
| 550 |
+
"I'm not sure which civic service you're asking about. "
|
| 551 |
+
"Try asking about weather, events, trash, or transit! 💬"
|
| 552 |
+
)
|
| 553 |
+
}
|
| 554 |
+
|
| 555 |
+
|
| 556 |
+
# ============================================================
|
| 557 |
+
# HEALTH CHECK & DIAGNOSTICS
|
| 558 |
+
# ============================================================
|
| 559 |
+
|
| 560 |
+
def get_tool_agent_health() -> Dict[str, Any]:
|
| 561 |
+
"""
|
| 562 |
+
📊 Returns tool agent health status.
|
| 563 |
+
|
| 564 |
+
Used by the main application health check endpoint.
|
| 565 |
+
"""
|
| 566 |
+
return {
|
| 567 |
+
"status": "operational",
|
| 568 |
+
"service_availability": {
|
| 569 |
+
"weather_agent": WEATHER_AGENT_AVAILABLE,
|
| 570 |
+
"location_utils": LOCATION_UTILS_AVAILABLE
|
| 571 |
+
},
|
| 572 |
+
"statistics": {
|
| 573 |
+
"total_requests": _tool_request_count,
|
| 574 |
+
"weather_requests": _weather_request_count,
|
| 575 |
+
"event_requests": _event_request_count,
|
| 576 |
+
"resource_requests": _resource_request_count
|
| 577 |
+
},
|
| 578 |
+
"supported_queries": [
|
| 579 |
+
"weather",
|
| 580 |
+
"events",
|
| 581 |
+
"trash_and_recycling",
|
| 582 |
+
"transit",
|
| 583 |
+
"emergency"
|
| 584 |
+
]
|
| 585 |
+
}
|
| 586 |
+
|
| 587 |
+
|
| 588 |
+
# ============================================================
|
| 589 |
+
# TESTING
|
| 590 |
+
# ============================================================
|
| 591 |
+
|
| 592 |
+
if __name__ == "__main__":
|
| 593 |
+
"""🧪 Test tool agent functionality"""
|
| 594 |
+
import asyncio
|
| 595 |
+
|
| 596 |
+
print("=" * 60)
|
| 597 |
+
print("🧪 Testing Tool Agent")
|
| 598 |
+
print("=" * 60)
|
| 599 |
+
|
| 600 |
+
# Display service availability
|
| 601 |
+
print("\n📊 Service Availability:")
|
| 602 |
+
print(f" Weather Agent: {'✅' if WEATHER_AGENT_AVAILABLE else '❌'}")
|
| 603 |
+
print(f" Location Utils: {'✅' if LOCATION_UTILS_AVAILABLE else '❌'}")
|
| 604 |
+
|
| 605 |
+
print("\n" + "=" * 60)
|
| 606 |
+
|
| 607 |
+
test_queries = [
|
| 608 |
+
{
|
| 609 |
+
"name": "Weather query",
|
| 610 |
+
"input": "What's the weather in Atlanta?",
|
| 611 |
+
"lat": 33.7490,
|
| 612 |
+
"lon": -84.3880
|
| 613 |
+
},
|
| 614 |
+
{
|
| 615 |
+
"name": "Events query",
|
| 616 |
+
"input": "Events in Atlanta",
|
| 617 |
+
"lat": None,
|
| 618 |
+
"lon": None
|
| 619 |
+
},
|
| 620 |
+
{
|
| 621 |
+
"name": "Trash query",
|
| 622 |
+
"input": "When is trash pickup?",
|
| 623 |
+
"lat": None,
|
| 624 |
+
"lon": None
|
| 625 |
+
}
|
| 626 |
+
]
|
| 627 |
+
|
| 628 |
+
async def run_tests():
|
| 629 |
+
for i, query in enumerate(test_queries, 1):
|
| 630 |
+
print(f"\n--- Test {i}: {query['name']} ---")
|
| 631 |
+
print(f"Query: {query['input']}")
|
| 632 |
+
|
| 633 |
+
try:
|
| 634 |
+
result = await handle_tool_request(
|
| 635 |
+
user_input=query["input"],
|
| 636 |
+
role="test_user",
|
| 637 |
+
lat=query["lat"],
|
| 638 |
+
lon=query["lon"]
|
| 639 |
+
)
|
| 640 |
+
|
| 641 |
+
print(f"Tool: {result.get('tool')}")
|
| 642 |
+
print(f"City: {result.get('city')}")
|
| 643 |
+
|
| 644 |
+
response = result.get('response')
|
| 645 |
+
if isinstance(response, str):
|
| 646 |
+
print(f"Response: {response[:150]}...")
|
| 647 |
+
else:
|
| 648 |
+
print(f"Response: [Dict with {len(response)} keys]")
|
| 649 |
+
|
| 650 |
+
if result.get('response_time_ms'):
|
| 651 |
+
print(f"Response time: {result['response_time_ms']:.0f}ms")
|
| 652 |
+
|
| 653 |
+
except Exception as e:
|
| 654 |
+
print(f"❌ Error: {e}")
|
| 655 |
+
|
| 656 |
+
asyncio.run(run_tests())
|
| 657 |
+
|
| 658 |
+
print("\n" + "=" * 60)
|
| 659 |
+
print("📊 Final Statistics:")
|
| 660 |
+
health = get_tool_agent_health()
|
| 661 |
+
for key, value in health["statistics"].items():
|
| 662 |
+
print(f" {key}: {value}")
|
| 663 |
+
|
| 664 |
+
print("\n" + "=" * 60)
|
| 665 |
+
print("✅ Tests complete")
|
| 666 |
+
print("=" * 60)
|
translation_utils.py
ADDED
|
@@ -0,0 +1,598 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# models/translation/translation_utils.py
|
| 2 |
+
|
| 3 |
+
"""
|
| 4 |
+
Translation Model Utilities for PENNY Project
|
| 5 |
+
Handles multilingual translation using NLLB-200 for civic engagement accessibility.
|
| 6 |
+
Provides async translation with structured error handling and language code normalization.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
import asyncio
|
| 10 |
+
import time
|
| 11 |
+
from typing import Dict, Any, Optional, List
|
| 12 |
+
|
| 13 |
+
# --- Logging Imports ---
|
| 14 |
+
from app.logging_utils import log_interaction, sanitize_for_logging
|
| 15 |
+
|
| 16 |
+
# --- Model Loader Import ---
|
| 17 |
+
try:
|
| 18 |
+
from app.model_loader import load_model_pipeline
|
| 19 |
+
MODEL_LOADER_AVAILABLE = True
|
| 20 |
+
except ImportError:
|
| 21 |
+
MODEL_LOADER_AVAILABLE = False
|
| 22 |
+
import logging
|
| 23 |
+
logging.getLogger(__name__).warning("Could not import load_model_pipeline. Translation service unavailable.")
|
| 24 |
+
|
| 25 |
+
# Global variable to store the loaded pipeline for re-use
|
| 26 |
+
TRANSLATION_PIPELINE: Optional[Any] = None
|
| 27 |
+
AGENT_NAME = "penny-translate-agent"
|
| 28 |
+
INITIALIZATION_ATTEMPTED = False
|
| 29 |
+
|
| 30 |
+
# NLLB-200 Language Code Mapping (Common languages for civic engagement)
|
| 31 |
+
LANGUAGE_CODES = {
|
| 32 |
+
# English variants
|
| 33 |
+
"english": "eng_Latn",
|
| 34 |
+
"en": "eng_Latn",
|
| 35 |
+
|
| 36 |
+
# Spanish variants
|
| 37 |
+
"spanish": "spa_Latn",
|
| 38 |
+
"es": "spa_Latn",
|
| 39 |
+
"español": "spa_Latn",
|
| 40 |
+
|
| 41 |
+
# French
|
| 42 |
+
"french": "fra_Latn",
|
| 43 |
+
"fr": "fra_Latn",
|
| 44 |
+
"français": "fra_Latn",
|
| 45 |
+
|
| 46 |
+
# Mandarin Chinese
|
| 47 |
+
"chinese": "zho_Hans",
|
| 48 |
+
"mandarin": "zho_Hans",
|
| 49 |
+
"zh": "zho_Hans",
|
| 50 |
+
|
| 51 |
+
# Arabic
|
| 52 |
+
"arabic": "arb_Arab",
|
| 53 |
+
"ar": "arb_Arab",
|
| 54 |
+
|
| 55 |
+
# Hindi
|
| 56 |
+
"hindi": "hin_Deva",
|
| 57 |
+
"hi": "hin_Deva",
|
| 58 |
+
|
| 59 |
+
# Portuguese
|
| 60 |
+
"portuguese": "por_Latn",
|
| 61 |
+
"pt": "por_Latn",
|
| 62 |
+
|
| 63 |
+
# Russian
|
| 64 |
+
"russian": "rus_Cyrl",
|
| 65 |
+
"ru": "rus_Cyrl",
|
| 66 |
+
|
| 67 |
+
# German
|
| 68 |
+
"german": "deu_Latn",
|
| 69 |
+
"de": "deu_Latn",
|
| 70 |
+
|
| 71 |
+
# Vietnamese
|
| 72 |
+
"vietnamese": "vie_Latn",
|
| 73 |
+
"vi": "vie_Latn",
|
| 74 |
+
|
| 75 |
+
# Tagalog
|
| 76 |
+
"tagalog": "tgl_Latn",
|
| 77 |
+
"tl": "tgl_Latn",
|
| 78 |
+
|
| 79 |
+
# Urdu
|
| 80 |
+
"urdu": "urd_Arab",
|
| 81 |
+
"ur": "urd_Arab",
|
| 82 |
+
|
| 83 |
+
# Swahili
|
| 84 |
+
"swahili": "swh_Latn",
|
| 85 |
+
"sw": "swh_Latn",
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
# Pre-translated civic phrases for common queries
|
| 89 |
+
CIVIC_PHRASES = {
|
| 90 |
+
"eng_Latn": {
|
| 91 |
+
"voting_location": "Where is my polling place?",
|
| 92 |
+
"voter_registration": "How do I register to vote?",
|
| 93 |
+
"city_services": "What city services are available?",
|
| 94 |
+
"report_issue": "I want to report a problem.",
|
| 95 |
+
"contact_city": "How do I contact city hall?",
|
| 96 |
+
},
|
| 97 |
+
"spa_Latn": {
|
| 98 |
+
"voting_location": "¿Dónde está mi lugar de votación?",
|
| 99 |
+
"voter_registration": "¿Cómo me registro para votar?",
|
| 100 |
+
"city_services": "¿Qué servicios de la ciudad están disponibles?",
|
| 101 |
+
"report_issue": "Quiero reportar un problema.",
|
| 102 |
+
"contact_city": "¿Cómo contacto al ayuntamiento?",
|
| 103 |
+
}
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def _initialize_translation_pipeline() -> bool:
|
| 108 |
+
"""
|
| 109 |
+
Initializes the translation pipeline only once.
|
| 110 |
+
|
| 111 |
+
Returns:
|
| 112 |
+
bool: True if initialization succeeded, False otherwise.
|
| 113 |
+
"""
|
| 114 |
+
global TRANSLATION_PIPELINE, INITIALIZATION_ATTEMPTED
|
| 115 |
+
|
| 116 |
+
if INITIALIZATION_ATTEMPTED:
|
| 117 |
+
return TRANSLATION_PIPELINE is not None
|
| 118 |
+
|
| 119 |
+
INITIALIZATION_ATTEMPTED = True
|
| 120 |
+
|
| 121 |
+
if not MODEL_LOADER_AVAILABLE:
|
| 122 |
+
log_interaction(
|
| 123 |
+
intent="translation_initialization",
|
| 124 |
+
success=False,
|
| 125 |
+
error="model_loader unavailable"
|
| 126 |
+
)
|
| 127 |
+
return False
|
| 128 |
+
|
| 129 |
+
try:
|
| 130 |
+
log_interaction(
|
| 131 |
+
intent="translation_initialization",
|
| 132 |
+
success=None,
|
| 133 |
+
details=f"Loading {AGENT_NAME}"
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
TRANSLATION_PIPELINE = load_model_pipeline(AGENT_NAME)
|
| 137 |
+
|
| 138 |
+
if TRANSLATION_PIPELINE is None:
|
| 139 |
+
log_interaction(
|
| 140 |
+
intent="translation_initialization",
|
| 141 |
+
success=False,
|
| 142 |
+
error="Pipeline returned None"
|
| 143 |
+
)
|
| 144 |
+
return False
|
| 145 |
+
|
| 146 |
+
log_interaction(
|
| 147 |
+
intent="translation_initialization",
|
| 148 |
+
success=True,
|
| 149 |
+
details=f"Model {AGENT_NAME} loaded successfully"
|
| 150 |
+
)
|
| 151 |
+
return True
|
| 152 |
+
|
| 153 |
+
except Exception as e:
|
| 154 |
+
log_interaction(
|
| 155 |
+
intent="translation_initialization",
|
| 156 |
+
success=False,
|
| 157 |
+
error=str(e)
|
| 158 |
+
)
|
| 159 |
+
return False
|
| 160 |
+
|
| 161 |
+
|
| 162 |
+
# Attempt initialization at module load
|
| 163 |
+
_initialize_translation_pipeline()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def is_translation_available() -> bool:
|
| 167 |
+
"""
|
| 168 |
+
Check if translation service is available.
|
| 169 |
+
|
| 170 |
+
Returns:
|
| 171 |
+
bool: True if translation pipeline is loaded and ready.
|
| 172 |
+
"""
|
| 173 |
+
return TRANSLATION_PIPELINE is not None
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
def normalize_language_code(lang: str) -> str:
|
| 177 |
+
"""
|
| 178 |
+
Converts common language names/codes to NLLB-200 format.
|
| 179 |
+
|
| 180 |
+
Args:
|
| 181 |
+
lang: Language name or code (e.g., "spanish", "es", "español")
|
| 182 |
+
|
| 183 |
+
Returns:
|
| 184 |
+
NLLB-200 language code (e.g., "spa_Latn")
|
| 185 |
+
"""
|
| 186 |
+
if not lang or not isinstance(lang, str):
|
| 187 |
+
return "eng_Latn" # Default to English
|
| 188 |
+
|
| 189 |
+
lang_lower = lang.lower().strip()
|
| 190 |
+
|
| 191 |
+
# Check if it's already in NLLB format (contains underscore)
|
| 192 |
+
if "_" in lang_lower:
|
| 193 |
+
return lang_lower
|
| 194 |
+
|
| 195 |
+
# Look up in mapping
|
| 196 |
+
return LANGUAGE_CODES.get(lang_lower, lang_lower)
|
| 197 |
+
|
| 198 |
+
|
| 199 |
+
def get_supported_languages() -> List[str]:
|
| 200 |
+
"""
|
| 201 |
+
Get list of supported language codes.
|
| 202 |
+
|
| 203 |
+
Returns:
|
| 204 |
+
List of NLLB-200 language codes supported by PENNY.
|
| 205 |
+
"""
|
| 206 |
+
return list(set(LANGUAGE_CODES.values()))
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
async def translate_text(
|
| 210 |
+
text: str,
|
| 211 |
+
source_language: str = "eng_Latn",
|
| 212 |
+
target_language: str = "spa_Latn",
|
| 213 |
+
tenant_id: Optional[str] = None
|
| 214 |
+
) -> Dict[str, Any]:
|
| 215 |
+
"""
|
| 216 |
+
Translates text from source language to target language using NLLB-200.
|
| 217 |
+
|
| 218 |
+
Args:
|
| 219 |
+
text: The text to translate.
|
| 220 |
+
source_language: Source language code (e.g., "eng_Latn", "spanish", "es")
|
| 221 |
+
target_language: Target language code (e.g., "spa_Latn", "french", "fr")
|
| 222 |
+
tenant_id: Optional tenant identifier for logging.
|
| 223 |
+
|
| 224 |
+
Returns:
|
| 225 |
+
A dictionary containing:
|
| 226 |
+
- translated_text (str): The translated text
|
| 227 |
+
- source_lang (str): Normalized source language code
|
| 228 |
+
- target_lang (str): Normalized target language code
|
| 229 |
+
- original_text (str): The input text
|
| 230 |
+
- available (bool): Whether the service was available
|
| 231 |
+
- error (str, optional): Error message if translation failed
|
| 232 |
+
- response_time_ms (int, optional): Translation time in milliseconds
|
| 233 |
+
"""
|
| 234 |
+
start_time = time.time()
|
| 235 |
+
|
| 236 |
+
global TRANSLATION_PIPELINE
|
| 237 |
+
|
| 238 |
+
# Check availability
|
| 239 |
+
if not is_translation_available():
|
| 240 |
+
log_interaction(
|
| 241 |
+
intent="translation",
|
| 242 |
+
tenant_id=tenant_id,
|
| 243 |
+
success=False,
|
| 244 |
+
error="Translation pipeline not available",
|
| 245 |
+
fallback_used=True
|
| 246 |
+
)
|
| 247 |
+
return {
|
| 248 |
+
"translated_text": text, # Return original text as fallback
|
| 249 |
+
"source_lang": source_language,
|
| 250 |
+
"target_lang": target_language,
|
| 251 |
+
"original_text": text,
|
| 252 |
+
"available": False,
|
| 253 |
+
"error": "Translation service is temporarily unavailable."
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
# Validate input
|
| 257 |
+
if not text or not isinstance(text, str):
|
| 258 |
+
log_interaction(
|
| 259 |
+
intent="translation",
|
| 260 |
+
tenant_id=tenant_id,
|
| 261 |
+
success=False,
|
| 262 |
+
error="Invalid text input"
|
| 263 |
+
)
|
| 264 |
+
return {
|
| 265 |
+
"translated_text": "",
|
| 266 |
+
"source_lang": source_language,
|
| 267 |
+
"target_lang": target_language,
|
| 268 |
+
"original_text": text if isinstance(text, str) else "",
|
| 269 |
+
"available": True,
|
| 270 |
+
"error": "Invalid text input provided."
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
# Check text length (prevent processing extremely long texts)
|
| 274 |
+
if len(text) > 5000: # 5k character limit for translation
|
| 275 |
+
log_interaction(
|
| 276 |
+
intent="translation",
|
| 277 |
+
tenant_id=tenant_id,
|
| 278 |
+
success=False,
|
| 279 |
+
error=f"Text too long: {len(text)} characters",
|
| 280 |
+
text_preview=sanitize_for_logging(text[:100])
|
| 281 |
+
)
|
| 282 |
+
return {
|
| 283 |
+
"translated_text": text,
|
| 284 |
+
"source_lang": source_language,
|
| 285 |
+
"target_lang": target_language,
|
| 286 |
+
"original_text": text,
|
| 287 |
+
"available": True,
|
| 288 |
+
"error": "Text is too long for translation (max 5,000 characters)."
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
# Normalize language codes
|
| 292 |
+
src_lang = normalize_language_code(source_language)
|
| 293 |
+
tgt_lang = normalize_language_code(target_language)
|
| 294 |
+
|
| 295 |
+
# Skip translation if source and target are the same
|
| 296 |
+
if src_lang == tgt_lang:
|
| 297 |
+
log_interaction(
|
| 298 |
+
intent="translation_skipped",
|
| 299 |
+
tenant_id=tenant_id,
|
| 300 |
+
success=True,
|
| 301 |
+
details="Source and target languages are identical"
|
| 302 |
+
)
|
| 303 |
+
return {
|
| 304 |
+
"translated_text": text,
|
| 305 |
+
"source_lang": src_lang,
|
| 306 |
+
"target_lang": tgt_lang,
|
| 307 |
+
"original_text": text,
|
| 308 |
+
"available": True,
|
| 309 |
+
"skipped": True
|
| 310 |
+
}
|
| 311 |
+
|
| 312 |
+
try:
|
| 313 |
+
loop = asyncio.get_event_loop()
|
| 314 |
+
|
| 315 |
+
# Run model inference in thread executor
|
| 316 |
+
# NLLB pipeline expects text and language parameters
|
| 317 |
+
results = await loop.run_in_executor(
|
| 318 |
+
None,
|
| 319 |
+
lambda: TRANSLATION_PIPELINE(
|
| 320 |
+
text,
|
| 321 |
+
src_lang=src_lang,
|
| 322 |
+
tgt_lang=tgt_lang
|
| 323 |
+
)
|
| 324 |
+
)
|
| 325 |
+
|
| 326 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 327 |
+
|
| 328 |
+
# Validate results
|
| 329 |
+
if not results or not isinstance(results, list) or len(results) == 0:
|
| 330 |
+
log_interaction(
|
| 331 |
+
intent="translation",
|
| 332 |
+
tenant_id=tenant_id,
|
| 333 |
+
success=False,
|
| 334 |
+
error="Empty or invalid model output",
|
| 335 |
+
response_time_ms=response_time_ms,
|
| 336 |
+
source_lang=src_lang,
|
| 337 |
+
target_lang=tgt_lang
|
| 338 |
+
)
|
| 339 |
+
return {
|
| 340 |
+
"translated_text": text, # Fallback to original
|
| 341 |
+
"source_lang": src_lang,
|
| 342 |
+
"target_lang": tgt_lang,
|
| 343 |
+
"original_text": text,
|
| 344 |
+
"available": True,
|
| 345 |
+
"error": "Translation returned unexpected format."
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
# NLLB returns format: [{'translation_text': '...'}]
|
| 349 |
+
translated = results[0].get('translation_text', '').strip()
|
| 350 |
+
|
| 351 |
+
if not translated:
|
| 352 |
+
log_interaction(
|
| 353 |
+
intent="translation",
|
| 354 |
+
tenant_id=tenant_id,
|
| 355 |
+
success=False,
|
| 356 |
+
error="Empty translation result",
|
| 357 |
+
response_time_ms=response_time_ms,
|
| 358 |
+
source_lang=src_lang,
|
| 359 |
+
target_lang=tgt_lang
|
| 360 |
+
)
|
| 361 |
+
return {
|
| 362 |
+
"translated_text": text, # Fallback to original
|
| 363 |
+
"source_lang": src_lang,
|
| 364 |
+
"target_lang": tgt_lang,
|
| 365 |
+
"original_text": text,
|
| 366 |
+
"available": True,
|
| 367 |
+
"error": "Translation produced empty result."
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
# Log slow translations
|
| 371 |
+
if response_time_ms > 5000: # 5 seconds
|
| 372 |
+
log_interaction(
|
| 373 |
+
intent="translation_slow",
|
| 374 |
+
tenant_id=tenant_id,
|
| 375 |
+
success=True,
|
| 376 |
+
response_time_ms=response_time_ms,
|
| 377 |
+
details="Slow translation detected",
|
| 378 |
+
source_lang=src_lang,
|
| 379 |
+
target_lang=tgt_lang,
|
| 380 |
+
text_length=len(text)
|
| 381 |
+
)
|
| 382 |
+
|
| 383 |
+
log_interaction(
|
| 384 |
+
intent="translation",
|
| 385 |
+
tenant_id=tenant_id,
|
| 386 |
+
success=True,
|
| 387 |
+
response_time_ms=response_time_ms,
|
| 388 |
+
source_lang=src_lang,
|
| 389 |
+
target_lang=tgt_lang,
|
| 390 |
+
text_length=len(text)
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
return {
|
| 394 |
+
"translated_text": translated,
|
| 395 |
+
"source_lang": src_lang,
|
| 396 |
+
"target_lang": tgt_lang,
|
| 397 |
+
"original_text": text,
|
| 398 |
+
"available": True,
|
| 399 |
+
"response_time_ms": response_time_ms
|
| 400 |
+
}
|
| 401 |
+
|
| 402 |
+
except asyncio.CancelledError:
|
| 403 |
+
log_interaction(
|
| 404 |
+
intent="translation",
|
| 405 |
+
tenant_id=tenant_id,
|
| 406 |
+
success=False,
|
| 407 |
+
error="Translation cancelled",
|
| 408 |
+
source_lang=src_lang,
|
| 409 |
+
target_lang=tgt_lang
|
| 410 |
+
)
|
| 411 |
+
raise
|
| 412 |
+
|
| 413 |
+
except Exception as e:
|
| 414 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 415 |
+
|
| 416 |
+
log_interaction(
|
| 417 |
+
intent="translation",
|
| 418 |
+
tenant_id=tenant_id,
|
| 419 |
+
success=False,
|
| 420 |
+
error=str(e),
|
| 421 |
+
response_time_ms=response_time_ms,
|
| 422 |
+
source_lang=src_lang,
|
| 423 |
+
target_lang=tgt_lang,
|
| 424 |
+
text_preview=sanitize_for_logging(text[:100]),
|
| 425 |
+
fallback_used=True
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
return {
|
| 429 |
+
"translated_text": text, # Fallback to original
|
| 430 |
+
"source_lang": src_lang,
|
| 431 |
+
"target_lang": tgt_lang,
|
| 432 |
+
"original_text": text,
|
| 433 |
+
"available": False,
|
| 434 |
+
"error": str(e),
|
| 435 |
+
"response_time_ms": response_time_ms
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
async def detect_and_translate(
|
| 440 |
+
text: str,
|
| 441 |
+
target_language: str = "eng_Latn",
|
| 442 |
+
tenant_id: Optional[str] = None
|
| 443 |
+
) -> Dict[str, Any]:
|
| 444 |
+
"""
|
| 445 |
+
Attempts to detect the source language and translate to target.
|
| 446 |
+
|
| 447 |
+
Note: This is a simplified heuristic-based detection. For production,
|
| 448 |
+
consider integrating a dedicated language detection model.
|
| 449 |
+
|
| 450 |
+
Args:
|
| 451 |
+
text: The text to translate
|
| 452 |
+
target_language: Target language code
|
| 453 |
+
tenant_id: Optional tenant identifier for logging
|
| 454 |
+
|
| 455 |
+
Returns:
|
| 456 |
+
Translation result dictionary
|
| 457 |
+
"""
|
| 458 |
+
if not text or not isinstance(text, str):
|
| 459 |
+
return {
|
| 460 |
+
"translated_text": "",
|
| 461 |
+
"detected_lang": "unknown",
|
| 462 |
+
"target_lang": target_language,
|
| 463 |
+
"original_text": text if isinstance(text, str) else "",
|
| 464 |
+
"available": True,
|
| 465 |
+
"error": "Invalid text input."
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
# Simple heuristic: check for common non-English characters
|
| 469 |
+
detected_lang = "eng_Latn" # Default assumption
|
| 470 |
+
|
| 471 |
+
# Check for Spanish characters
|
| 472 |
+
if any(char in text for char in ['¿', '¡', 'ñ', 'á', 'é', 'í', 'ó', 'ú']):
|
| 473 |
+
detected_lang = "spa_Latn"
|
| 474 |
+
# Check for Chinese characters
|
| 475 |
+
elif any('\u4e00' <= char <= '\u9fff' for char in text):
|
| 476 |
+
detected_lang = "zho_Hans"
|
| 477 |
+
# Check for Arabic script
|
| 478 |
+
elif any('\u0600' <= char <= '\u06ff' for char in text):
|
| 479 |
+
detected_lang = "arb_Arab"
|
| 480 |
+
# Check for Cyrillic (Russian)
|
| 481 |
+
elif any('\u0400' <= char <= '\u04ff' for char in text):
|
| 482 |
+
detected_lang = "rus_Cyrl"
|
| 483 |
+
# Check for Devanagari (Hindi)
|
| 484 |
+
elif any('\u0900' <= char <= '\u097f' for char in text):
|
| 485 |
+
detected_lang = "hin_Deva"
|
| 486 |
+
|
| 487 |
+
log_interaction(
|
| 488 |
+
intent="language_detection",
|
| 489 |
+
tenant_id=tenant_id,
|
| 490 |
+
success=True,
|
| 491 |
+
detected_lang=detected_lang,
|
| 492 |
+
text_preview=sanitize_for_logging(text[:50])
|
| 493 |
+
)
|
| 494 |
+
|
| 495 |
+
result = await translate_text(text, detected_lang, target_language, tenant_id)
|
| 496 |
+
result["detected_lang"] = detected_lang
|
| 497 |
+
|
| 498 |
+
return result
|
| 499 |
+
|
| 500 |
+
|
| 501 |
+
async def batch_translate(
|
| 502 |
+
texts: List[str],
|
| 503 |
+
source_language: str = "eng_Latn",
|
| 504 |
+
target_language: str = "spa_Latn",
|
| 505 |
+
tenant_id: Optional[str] = None
|
| 506 |
+
) -> List[Dict[str, Any]]:
|
| 507 |
+
"""
|
| 508 |
+
Translate multiple texts at once.
|
| 509 |
+
|
| 510 |
+
Args:
|
| 511 |
+
texts: List of strings to translate
|
| 512 |
+
source_language: Source language code
|
| 513 |
+
target_language: Target language code
|
| 514 |
+
tenant_id: Optional tenant identifier for logging
|
| 515 |
+
|
| 516 |
+
Returns:
|
| 517 |
+
List of translation result dictionaries
|
| 518 |
+
"""
|
| 519 |
+
if not texts or not isinstance(texts, list):
|
| 520 |
+
log_interaction(
|
| 521 |
+
intent="batch_translation",
|
| 522 |
+
tenant_id=tenant_id,
|
| 523 |
+
success=False,
|
| 524 |
+
error="Invalid texts input"
|
| 525 |
+
)
|
| 526 |
+
return []
|
| 527 |
+
|
| 528 |
+
# Filter valid texts and limit batch size
|
| 529 |
+
valid_texts = [t for t in texts if isinstance(t, str) and t.strip()]
|
| 530 |
+
if len(valid_texts) > 50: # Batch size limit
|
| 531 |
+
valid_texts = valid_texts[:50]
|
| 532 |
+
log_interaction(
|
| 533 |
+
intent="batch_translation",
|
| 534 |
+
tenant_id=tenant_id,
|
| 535 |
+
success=None,
|
| 536 |
+
details=f"Batch size limited to 50 texts"
|
| 537 |
+
)
|
| 538 |
+
|
| 539 |
+
if not valid_texts:
|
| 540 |
+
log_interaction(
|
| 541 |
+
intent="batch_translation",
|
| 542 |
+
tenant_id=tenant_id,
|
| 543 |
+
success=False,
|
| 544 |
+
error="No valid texts in batch"
|
| 545 |
+
)
|
| 546 |
+
return []
|
| 547 |
+
|
| 548 |
+
start_time = time.time()
|
| 549 |
+
results = []
|
| 550 |
+
|
| 551 |
+
for text in valid_texts:
|
| 552 |
+
result = await translate_text(text, source_language, target_language, tenant_id)
|
| 553 |
+
results.append(result)
|
| 554 |
+
|
| 555 |
+
response_time_ms = int((time.time() - start_time) * 1000)
|
| 556 |
+
|
| 557 |
+
log_interaction(
|
| 558 |
+
intent="batch_translation",
|
| 559 |
+
tenant_id=tenant_id,
|
| 560 |
+
success=True,
|
| 561 |
+
response_time_ms=response_time_ms,
|
| 562 |
+
batch_size=len(valid_texts),
|
| 563 |
+
source_lang=normalize_language_code(source_language),
|
| 564 |
+
target_lang=normalize_language_code(target_language)
|
| 565 |
+
)
|
| 566 |
+
|
| 567 |
+
return results
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
def get_civic_phrase(
|
| 571 |
+
phrase_key: str,
|
| 572 |
+
language: str = "eng_Latn"
|
| 573 |
+
) -> str:
|
| 574 |
+
"""
|
| 575 |
+
Get a pre-translated civic phrase for common queries.
|
| 576 |
+
|
| 577 |
+
Args:
|
| 578 |
+
phrase_key: Key for the civic phrase (e.g., "voting_location")
|
| 579 |
+
language: Target language code
|
| 580 |
+
|
| 581 |
+
Returns:
|
| 582 |
+
Translated phrase or empty string if not found
|
| 583 |
+
"""
|
| 584 |
+
if not phrase_key or not isinstance(phrase_key, str):
|
| 585 |
+
return ""
|
| 586 |
+
|
| 587 |
+
lang_code = normalize_language_code(language)
|
| 588 |
+
phrase = CIVIC_PHRASES.get(lang_code, {}).get(phrase_key, "")
|
| 589 |
+
|
| 590 |
+
if phrase:
|
| 591 |
+
log_interaction(
|
| 592 |
+
intent="civic_phrase_lookup",
|
| 593 |
+
success=True,
|
| 594 |
+
phrase_key=phrase_key,
|
| 595 |
+
language=lang_code
|
| 596 |
+
)
|
| 597 |
+
|
| 598 |
+
return phrase
|
weather_agent.py
ADDED
|
@@ -0,0 +1,529 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app/weather_agent.py
|
| 2 |
+
"""
|
| 3 |
+
🌤️ PENNY Weather Agent - Azure Maps Integration
|
| 4 |
+
|
| 5 |
+
Provides real-time weather information and weather-aware recommendations
|
| 6 |
+
for civic engagement activities.
|
| 7 |
+
|
| 8 |
+
MISSION: Help residents plan their day with accurate weather data and
|
| 9 |
+
smart suggestions for indoor/outdoor activities based on conditions.
|
| 10 |
+
|
| 11 |
+
ENHANCEMENTS (Phase 1 Complete):
|
| 12 |
+
- ✅ Structured logging with performance tracking
|
| 13 |
+
- ✅ Enhanced error handling with graceful degradation
|
| 14 |
+
- ✅ Type hints for all functions
|
| 15 |
+
- ✅ Health check integration
|
| 16 |
+
- ✅ Response caching for performance
|
| 17 |
+
- ✅ Detailed weather parsing with validation
|
| 18 |
+
- ✅ Penny's friendly voice in all responses
|
| 19 |
+
|
| 20 |
+
Production-ready for Azure ML deployment.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import os
|
| 24 |
+
import logging
|
| 25 |
+
import time
|
| 26 |
+
from typing import Dict, Any, Optional, List, Tuple
|
| 27 |
+
from datetime import datetime, timedelta
|
| 28 |
+
import httpx
|
| 29 |
+
|
| 30 |
+
# --- ENHANCED MODULE IMPORTS ---
|
| 31 |
+
from app.logging_utils import log_interaction
|
| 32 |
+
|
| 33 |
+
# --- LOGGING SETUP ---
|
| 34 |
+
logger = logging.getLogger(__name__)
|
| 35 |
+
|
| 36 |
+
# --- CONFIGURATION ---
|
| 37 |
+
AZURE_WEATHER_URL = "https://atlas.microsoft.com/weather/currentConditions/json"
|
| 38 |
+
DEFAULT_TIMEOUT = 10.0 # seconds
|
| 39 |
+
CACHE_TTL_SECONDS = 300 # 5 minutes - weather doesn't change that fast
|
| 40 |
+
|
| 41 |
+
# --- WEATHER CACHE ---
|
| 42 |
+
_weather_cache: Dict[str, Tuple[Dict[str, Any], datetime]] = {}
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# ============================================================
|
| 46 |
+
# WEATHER DATA RETRIEVAL
|
| 47 |
+
# ============================================================
|
| 48 |
+
|
| 49 |
+
async def get_weather_for_location(
|
| 50 |
+
lat: float,
|
| 51 |
+
lon: float,
|
| 52 |
+
use_cache: bool = True
|
| 53 |
+
) -> Dict[str, Any]:
|
| 54 |
+
"""
|
| 55 |
+
🌤️ Fetches real-time weather from Azure Maps.
|
| 56 |
+
|
| 57 |
+
Retrieves current weather conditions for a specific location using
|
| 58 |
+
Azure Maps Weather API. Includes caching to reduce API calls and
|
| 59 |
+
improve response times.
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
lat: Latitude coordinate
|
| 63 |
+
lon: Longitude coordinate
|
| 64 |
+
use_cache: Whether to use cached data if available (default: True)
|
| 65 |
+
|
| 66 |
+
Returns:
|
| 67 |
+
Dictionary containing weather data with keys:
|
| 68 |
+
- temperature: {value: float, unit: str}
|
| 69 |
+
- phrase: str (weather description)
|
| 70 |
+
- iconCode: int
|
| 71 |
+
- hasPrecipitation: bool
|
| 72 |
+
- isDayTime: bool
|
| 73 |
+
- relativeHumidity: int
|
| 74 |
+
- cloudCover: int
|
| 75 |
+
- etc.
|
| 76 |
+
|
| 77 |
+
Raises:
|
| 78 |
+
RuntimeError: If AZURE_MAPS_KEY is not configured
|
| 79 |
+
httpx.HTTPError: If API request fails
|
| 80 |
+
|
| 81 |
+
Example:
|
| 82 |
+
weather = await get_weather_for_location(33.7490, -84.3880)
|
| 83 |
+
temp = weather.get("temperature", {}).get("value")
|
| 84 |
+
condition = weather.get("phrase", "Unknown")
|
| 85 |
+
"""
|
| 86 |
+
start_time = time.time()
|
| 87 |
+
|
| 88 |
+
# Create cache key
|
| 89 |
+
cache_key = f"{lat:.4f},{lon:.4f}"
|
| 90 |
+
|
| 91 |
+
# Check cache first
|
| 92 |
+
if use_cache and cache_key in _weather_cache:
|
| 93 |
+
cached_data, cached_time = _weather_cache[cache_key]
|
| 94 |
+
age = (datetime.now() - cached_time).total_seconds()
|
| 95 |
+
|
| 96 |
+
if age < CACHE_TTL_SECONDS:
|
| 97 |
+
logger.info(
|
| 98 |
+
f"🌤️ Weather cache hit (age: {age:.0f}s, "
|
| 99 |
+
f"location: {cache_key})"
|
| 100 |
+
)
|
| 101 |
+
return cached_data
|
| 102 |
+
|
| 103 |
+
# Read API key
|
| 104 |
+
AZURE_MAPS_KEY = os.getenv("AZURE_MAPS_KEY")
|
| 105 |
+
|
| 106 |
+
if not AZURE_MAPS_KEY:
|
| 107 |
+
logger.error("❌ AZURE_MAPS_KEY not configured")
|
| 108 |
+
raise RuntimeError(
|
| 109 |
+
"AZURE_MAPS_KEY is required and not set in environment variables."
|
| 110 |
+
)
|
| 111 |
+
|
| 112 |
+
# Build request parameters
|
| 113 |
+
params = {
|
| 114 |
+
"api-version": "1.0",
|
| 115 |
+
"query": f"{lat},{lon}",
|
| 116 |
+
"subscription-key": AZURE_MAPS_KEY,
|
| 117 |
+
"details": "true",
|
| 118 |
+
"language": "en-US",
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
try:
|
| 122 |
+
logger.info(f"🌤️ Fetching weather for location: {cache_key}")
|
| 123 |
+
|
| 124 |
+
async with httpx.AsyncClient(timeout=DEFAULT_TIMEOUT) as client:
|
| 125 |
+
response = await client.get(AZURE_WEATHER_URL, params=params)
|
| 126 |
+
response.raise_for_status()
|
| 127 |
+
data = response.json()
|
| 128 |
+
|
| 129 |
+
# Parse response
|
| 130 |
+
if "results" in data and len(data["results"]) > 0:
|
| 131 |
+
weather_data = data["results"][0]
|
| 132 |
+
else:
|
| 133 |
+
weather_data = data # Fallback if structure changes
|
| 134 |
+
|
| 135 |
+
# Validate essential fields
|
| 136 |
+
weather_data = _validate_weather_data(weather_data)
|
| 137 |
+
|
| 138 |
+
# Cache the result
|
| 139 |
+
_weather_cache[cache_key] = (weather_data, datetime.now())
|
| 140 |
+
|
| 141 |
+
# Calculate response time
|
| 142 |
+
response_time = (time.time() - start_time) * 1000
|
| 143 |
+
|
| 144 |
+
# Log successful retrieval
|
| 145 |
+
log_interaction(
|
| 146 |
+
tenant_id="weather_service",
|
| 147 |
+
interaction_type="weather_fetch",
|
| 148 |
+
intent="weather",
|
| 149 |
+
response_time_ms=response_time,
|
| 150 |
+
success=True,
|
| 151 |
+
metadata={
|
| 152 |
+
"location": cache_key,
|
| 153 |
+
"cached": False,
|
| 154 |
+
"temperature": weather_data.get("temperature", {}).get("value"),
|
| 155 |
+
"condition": weather_data.get("phrase")
|
| 156 |
+
}
|
| 157 |
+
)
|
| 158 |
+
|
| 159 |
+
logger.info(
|
| 160 |
+
f"✅ Weather fetched successfully ({response_time:.0f}ms, "
|
| 161 |
+
f"location: {cache_key})"
|
| 162 |
+
)
|
| 163 |
+
|
| 164 |
+
return weather_data
|
| 165 |
+
|
| 166 |
+
except httpx.TimeoutException as e:
|
| 167 |
+
logger.error(f"⏱️ Weather API timeout: {e}")
|
| 168 |
+
raise
|
| 169 |
+
|
| 170 |
+
except httpx.HTTPStatusError as e:
|
| 171 |
+
logger.error(f"❌ Weather API HTTP error: {e.response.status_code}")
|
| 172 |
+
raise
|
| 173 |
+
|
| 174 |
+
except Exception as e:
|
| 175 |
+
logger.error(f"❌ Weather API error: {e}", exc_info=True)
|
| 176 |
+
raise
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def _validate_weather_data(data: Dict[str, Any]) -> Dict[str, Any]:
|
| 180 |
+
"""
|
| 181 |
+
Validates and normalizes weather data from Azure Maps.
|
| 182 |
+
|
| 183 |
+
Ensures essential fields are present with sensible defaults.
|
| 184 |
+
"""
|
| 185 |
+
# Ensure temperature exists
|
| 186 |
+
if "temperature" not in data:
|
| 187 |
+
data["temperature"] = {"value": None, "unit": "F"}
|
| 188 |
+
elif isinstance(data["temperature"], (int, float)):
|
| 189 |
+
# Handle case where temperature is just a number
|
| 190 |
+
data["temperature"] = {"value": data["temperature"], "unit": "F"}
|
| 191 |
+
|
| 192 |
+
# Ensure phrase exists
|
| 193 |
+
if "phrase" not in data or not data["phrase"]:
|
| 194 |
+
data["phrase"] = "Conditions unavailable"
|
| 195 |
+
|
| 196 |
+
# Ensure boolean flags exist
|
| 197 |
+
data.setdefault("hasPrecipitation", False)
|
| 198 |
+
data.setdefault("isDayTime", True)
|
| 199 |
+
|
| 200 |
+
# Ensure numeric fields exist
|
| 201 |
+
data.setdefault("relativeHumidity", None)
|
| 202 |
+
data.setdefault("cloudCover", None)
|
| 203 |
+
data.setdefault("iconCode", None)
|
| 204 |
+
|
| 205 |
+
return data
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
# ============================================================
|
| 209 |
+
# OUTFIT RECOMMENDATIONS
|
| 210 |
+
# ============================================================
|
| 211 |
+
|
| 212 |
+
def recommend_outfit(high_temp: float, condition: str) -> str:
|
| 213 |
+
"""
|
| 214 |
+
👕 Recommends what to wear based on weather conditions.
|
| 215 |
+
|
| 216 |
+
Provides friendly, practical clothing suggestions based on
|
| 217 |
+
temperature and weather conditions.
|
| 218 |
+
|
| 219 |
+
Args:
|
| 220 |
+
high_temp: Expected high temperature in Fahrenheit
|
| 221 |
+
condition: Weather condition description (e.g., "Sunny", "Rainy")
|
| 222 |
+
|
| 223 |
+
Returns:
|
| 224 |
+
Friendly outfit recommendation string
|
| 225 |
+
|
| 226 |
+
Example:
|
| 227 |
+
outfit = recommend_outfit(85, "Sunny")
|
| 228 |
+
# Returns: "Light clothes, sunscreen, and stay hydrated! ☀️"
|
| 229 |
+
"""
|
| 230 |
+
condition_lower = condition.lower()
|
| 231 |
+
|
| 232 |
+
# Check for precipitation first
|
| 233 |
+
if "rain" in condition_lower or "storm" in condition_lower:
|
| 234 |
+
logger.debug(f"Outfit rec: Rain/Storm (temp: {high_temp}°F)")
|
| 235 |
+
return "Bring an umbrella or rain jacket! ☔"
|
| 236 |
+
|
| 237 |
+
# Temperature-based recommendations
|
| 238 |
+
if high_temp >= 85:
|
| 239 |
+
logger.debug(f"Outfit rec: Hot (temp: {high_temp}°F)")
|
| 240 |
+
return "Light clothes, sunscreen, and stay hydrated! ☀️"
|
| 241 |
+
|
| 242 |
+
if high_temp >= 72:
|
| 243 |
+
logger.debug(f"Outfit rec: Warm (temp: {high_temp}°F)")
|
| 244 |
+
return "T-shirt and jeans or a casual dress. 👕"
|
| 245 |
+
|
| 246 |
+
if high_temp >= 60:
|
| 247 |
+
logger.debug(f"Outfit rec: Mild (temp: {high_temp}°F)")
|
| 248 |
+
return "A hoodie or light jacket should do! 🧥"
|
| 249 |
+
|
| 250 |
+
logger.debug(f"Outfit rec: Cold (temp: {high_temp}°F)")
|
| 251 |
+
return "Bundle up — sweater or coat recommended! 🧣"
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
# ============================================================
|
| 255 |
+
# EVENT RECOMMENDATIONS BASED ON WEATHER
|
| 256 |
+
# ============================================================
|
| 257 |
+
|
| 258 |
+
def weather_to_event_recommendations(
|
| 259 |
+
weather: Dict[str, Any]
|
| 260 |
+
) -> List[Dict[str, Any]]:
|
| 261 |
+
"""
|
| 262 |
+
📅 Suggests activity types based on current weather conditions.
|
| 263 |
+
|
| 264 |
+
Analyzes weather data to provide smart recommendations for
|
| 265 |
+
indoor vs outdoor activities, helping residents plan their day.
|
| 266 |
+
|
| 267 |
+
Args:
|
| 268 |
+
weather: Weather data dictionary from get_weather_for_location()
|
| 269 |
+
|
| 270 |
+
Returns:
|
| 271 |
+
List of recommendation dictionaries with keys:
|
| 272 |
+
- type: str ("indoor", "outdoor", "neutral")
|
| 273 |
+
- suggestions: List[str] (specific activity ideas)
|
| 274 |
+
- reason: str (explanation for recommendation)
|
| 275 |
+
- priority: int (1-3, added for sorting)
|
| 276 |
+
|
| 277 |
+
Example:
|
| 278 |
+
weather = await get_weather_for_location(33.7490, -84.3880)
|
| 279 |
+
recs = weather_to_event_recommendations(weather)
|
| 280 |
+
for rec in recs:
|
| 281 |
+
print(f"{rec['type']}: {rec['suggestions']}")
|
| 282 |
+
"""
|
| 283 |
+
condition = (weather.get("phrase") or "").lower()
|
| 284 |
+
temp = weather.get("temperature", {}).get("value")
|
| 285 |
+
has_precipitation = weather.get("hasPrecipitation", False)
|
| 286 |
+
|
| 287 |
+
recs = []
|
| 288 |
+
|
| 289 |
+
# Check for rain or storms (highest priority)
|
| 290 |
+
if "rain" in condition or "storm" in condition or has_precipitation:
|
| 291 |
+
logger.debug("Event rec: Indoor (precipitation)")
|
| 292 |
+
recs.append({
|
| 293 |
+
"type": "indoor",
|
| 294 |
+
"suggestions": [
|
| 295 |
+
"Visit a library 📚",
|
| 296 |
+
"Check out a community center event 🏛️",
|
| 297 |
+
"Find an indoor workshop or class 🎨",
|
| 298 |
+
"Explore a local museum 🖼️"
|
| 299 |
+
],
|
| 300 |
+
"reason": "Rainy weather makes indoor events ideal!",
|
| 301 |
+
"priority": 1
|
| 302 |
+
})
|
| 303 |
+
|
| 304 |
+
# Warm weather outdoor activities
|
| 305 |
+
elif temp is not None and temp >= 75:
|
| 306 |
+
logger.debug(f"Event rec: Outdoor (warm: {temp}°F)")
|
| 307 |
+
recs.append({
|
| 308 |
+
"type": "outdoor",
|
| 309 |
+
"suggestions": [
|
| 310 |
+
"Visit a park 🌳",
|
| 311 |
+
"Check out a farmers market 🥕",
|
| 312 |
+
"Look for outdoor concerts or festivals 🎵",
|
| 313 |
+
"Enjoy a community picnic or BBQ 🍔"
|
| 314 |
+
],
|
| 315 |
+
"reason": "Beautiful weather for outdoor activities!",
|
| 316 |
+
"priority": 1
|
| 317 |
+
})
|
| 318 |
+
|
| 319 |
+
# Cold weather considerations
|
| 320 |
+
elif temp is not None and temp < 50:
|
| 321 |
+
logger.debug(f"Event rec: Indoor (cold: {temp}°F)")
|
| 322 |
+
recs.append({
|
| 323 |
+
"type": "indoor",
|
| 324 |
+
"suggestions": [
|
| 325 |
+
"Browse local events at community centers 🏛️",
|
| 326 |
+
"Visit a museum or art gallery 🖼️",
|
| 327 |
+
"Check out indoor markets or shopping 🛍️",
|
| 328 |
+
"Warm up at a local café or restaurant ☕"
|
| 329 |
+
],
|
| 330 |
+
"reason": "Chilly weather — indoor activities are cozy!",
|
| 331 |
+
"priority": 1
|
| 332 |
+
})
|
| 333 |
+
|
| 334 |
+
# Mild/neutral weather
|
| 335 |
+
else:
|
| 336 |
+
logger.debug(f"Event rec: Neutral (mild: {temp}°F if temp else 'unknown')")
|
| 337 |
+
recs.append({
|
| 338 |
+
"type": "neutral",
|
| 339 |
+
"suggestions": [
|
| 340 |
+
"Browse local events 📅",
|
| 341 |
+
"Visit a museum or cultural center 🏛️",
|
| 342 |
+
"Walk around a local plaza or downtown 🚶",
|
| 343 |
+
"Check out both indoor and outdoor activities 🌍"
|
| 344 |
+
],
|
| 345 |
+
"reason": "Mild weather gives you flexible options!",
|
| 346 |
+
"priority": 2
|
| 347 |
+
})
|
| 348 |
+
|
| 349 |
+
return recs
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
# ============================================================
|
| 353 |
+
# HELPER FUNCTIONS
|
| 354 |
+
# ============================================================
|
| 355 |
+
|
| 356 |
+
def format_weather_summary(weather: Dict[str, Any]) -> str:
|
| 357 |
+
"""
|
| 358 |
+
📝 Formats weather data into a human-readable summary.
|
| 359 |
+
|
| 360 |
+
Args:
|
| 361 |
+
weather: Weather data dictionary
|
| 362 |
+
|
| 363 |
+
Returns:
|
| 364 |
+
Formatted weather summary string with Penny's friendly voice
|
| 365 |
+
|
| 366 |
+
Example:
|
| 367 |
+
summary = format_weather_summary(weather_data)
|
| 368 |
+
# "Currently 72°F and Partly Cloudy. Humidity: 65%"
|
| 369 |
+
"""
|
| 370 |
+
temp_data = weather.get("temperature", {})
|
| 371 |
+
temp = temp_data.get("value")
|
| 372 |
+
unit = temp_data.get("unit", "F")
|
| 373 |
+
phrase = weather.get("phrase", "Conditions unavailable")
|
| 374 |
+
humidity = weather.get("relativeHumidity")
|
| 375 |
+
|
| 376 |
+
# Build summary
|
| 377 |
+
parts = []
|
| 378 |
+
|
| 379 |
+
if temp is not None:
|
| 380 |
+
parts.append(f"Currently {int(temp)}°{unit}")
|
| 381 |
+
|
| 382 |
+
parts.append(phrase)
|
| 383 |
+
|
| 384 |
+
if humidity is not None:
|
| 385 |
+
parts.append(f"Humidity: {humidity}%")
|
| 386 |
+
|
| 387 |
+
summary = " and ".join(parts[:2])
|
| 388 |
+
if len(parts) > 2:
|
| 389 |
+
summary += f". {parts[2]}"
|
| 390 |
+
|
| 391 |
+
return summary
|
| 392 |
+
|
| 393 |
+
|
| 394 |
+
def clear_weather_cache():
|
| 395 |
+
"""
|
| 396 |
+
🧹 Clears the weather cache.
|
| 397 |
+
|
| 398 |
+
Useful for testing or if fresh data is needed immediately.
|
| 399 |
+
"""
|
| 400 |
+
global _weather_cache
|
| 401 |
+
cache_size = len(_weather_cache)
|
| 402 |
+
_weather_cache.clear()
|
| 403 |
+
logger.info(f"🧹 Weather cache cleared ({cache_size} entries removed)")
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
def get_cache_stats() -> Dict[str, Any]:
|
| 407 |
+
"""
|
| 408 |
+
📊 Returns weather cache statistics.
|
| 409 |
+
|
| 410 |
+
Returns:
|
| 411 |
+
Dictionary with cache statistics:
|
| 412 |
+
- entries: int (number of cached locations)
|
| 413 |
+
- oldest_entry_age_seconds: float
|
| 414 |
+
- newest_entry_age_seconds: float
|
| 415 |
+
"""
|
| 416 |
+
if not _weather_cache:
|
| 417 |
+
return {
|
| 418 |
+
"entries": 0,
|
| 419 |
+
"oldest_entry_age_seconds": None,
|
| 420 |
+
"newest_entry_age_seconds": None
|
| 421 |
+
}
|
| 422 |
+
|
| 423 |
+
now = datetime.now()
|
| 424 |
+
ages = [
|
| 425 |
+
(now - cached_time).total_seconds()
|
| 426 |
+
for _, cached_time in _weather_cache.values()
|
| 427 |
+
]
|
| 428 |
+
|
| 429 |
+
return {
|
| 430 |
+
"entries": len(_weather_cache),
|
| 431 |
+
"oldest_entry_age_seconds": max(ages) if ages else None,
|
| 432 |
+
"newest_entry_age_seconds": min(ages) if ages else None
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
|
| 436 |
+
# ============================================================
|
| 437 |
+
# HEALTH CHECK
|
| 438 |
+
# ============================================================
|
| 439 |
+
|
| 440 |
+
def get_weather_agent_health() -> Dict[str, Any]:
|
| 441 |
+
"""
|
| 442 |
+
📊 Returns weather agent health status.
|
| 443 |
+
|
| 444 |
+
Used by the main application health check endpoint to monitor
|
| 445 |
+
the weather service availability and performance.
|
| 446 |
+
|
| 447 |
+
Returns:
|
| 448 |
+
Dictionary with health information
|
| 449 |
+
"""
|
| 450 |
+
cache_stats = get_cache_stats()
|
| 451 |
+
|
| 452 |
+
# Check if API key is configured
|
| 453 |
+
api_key_configured = bool(os.getenv("AZURE_MAPS_KEY"))
|
| 454 |
+
|
| 455 |
+
return {
|
| 456 |
+
"status": "operational" if api_key_configured else "degraded",
|
| 457 |
+
"service": "azure_maps_weather",
|
| 458 |
+
"api_key_configured": api_key_configured,
|
| 459 |
+
"cache": cache_stats,
|
| 460 |
+
"cache_ttl_seconds": CACHE_TTL_SECONDS,
|
| 461 |
+
"default_timeout_seconds": DEFAULT_TIMEOUT,
|
| 462 |
+
"features": {
|
| 463 |
+
"real_time_weather": True,
|
| 464 |
+
"outfit_recommendations": True,
|
| 465 |
+
"event_recommendations": True,
|
| 466 |
+
"response_caching": True
|
| 467 |
+
}
|
| 468 |
+
}
|
| 469 |
+
|
| 470 |
+
|
| 471 |
+
# ============================================================
|
| 472 |
+
# TESTING
|
| 473 |
+
# ============================================================
|
| 474 |
+
|
| 475 |
+
if __name__ == "__main__":
|
| 476 |
+
"""🧪 Test weather agent functionality"""
|
| 477 |
+
import asyncio
|
| 478 |
+
|
| 479 |
+
print("=" * 60)
|
| 480 |
+
print("🧪 Testing Weather Agent")
|
| 481 |
+
print("=" * 60)
|
| 482 |
+
|
| 483 |
+
async def run_tests():
|
| 484 |
+
# Test location: Atlanta, GA
|
| 485 |
+
lat, lon = 33.7490, -84.3880
|
| 486 |
+
|
| 487 |
+
print(f"\n--- Test 1: Fetch Weather ---")
|
| 488 |
+
print(f"Location: {lat}, {lon} (Atlanta, GA)")
|
| 489 |
+
|
| 490 |
+
try:
|
| 491 |
+
weather = await get_weather_for_location(lat, lon)
|
| 492 |
+
print(f"✅ Weather fetched successfully")
|
| 493 |
+
print(f"Temperature: {weather.get('temperature', {}).get('value')}°F")
|
| 494 |
+
print(f"Condition: {weather.get('phrase')}")
|
| 495 |
+
print(f"Precipitation: {weather.get('hasPrecipitation')}")
|
| 496 |
+
|
| 497 |
+
print(f"\n--- Test 2: Weather Summary ---")
|
| 498 |
+
summary = format_weather_summary(weather)
|
| 499 |
+
print(f"Summary: {summary}")
|
| 500 |
+
|
| 501 |
+
print(f"\n--- Test 3: Outfit Recommendation ---")
|
| 502 |
+
temp = weather.get('temperature', {}).get('value', 70)
|
| 503 |
+
condition = weather.get('phrase', 'Clear')
|
| 504 |
+
outfit = recommend_outfit(temp, condition)
|
| 505 |
+
print(f"Outfit: {outfit}")
|
| 506 |
+
|
| 507 |
+
print(f"\n--- Test 4: Event Recommendations ---")
|
| 508 |
+
recs = weather_to_event_recommendations(weather)
|
| 509 |
+
for rec in recs:
|
| 510 |
+
print(f"Type: {rec['type']}")
|
| 511 |
+
print(f"Reason: {rec['reason']}")
|
| 512 |
+
print(f"Suggestions: {', '.join(rec['suggestions'][:2])}")
|
| 513 |
+
|
| 514 |
+
print(f"\n--- Test 5: Cache Test ---")
|
| 515 |
+
weather2 = await get_weather_for_location(lat, lon, use_cache=True)
|
| 516 |
+
print(f"✅ Cache working (should be instant)")
|
| 517 |
+
|
| 518 |
+
print(f"\n--- Test 6: Health Check ---")
|
| 519 |
+
health = get_weather_agent_health()
|
| 520 |
+
print(f"Status: {health['status']}")
|
| 521 |
+
print(f"Cache entries: {health['cache']['entries']}")
|
| 522 |
+
|
| 523 |
+
except Exception as e:
|
| 524 |
+
print(f"❌ Error: {e}")
|
| 525 |
+
|
| 526 |
+
asyncio.run(run_tests())
|
| 527 |
+
|
| 528 |
+
print("\n" + "=" * 60)
|
| 529 |
+
print("✅ Tests complete")
|