pythonprincess commited on
Commit
b8dfce3
·
verified ·
1 Parent(s): 46a515b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app/model_loader.py +897 -0
  2. app/orchestrator.py +1410 -0
app/model_loader.py ADDED
@@ -0,0 +1,897 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/model_loader.py
2
+ """
3
+ 🧠 PENNY Model Loader - Azure-Ready Multi-Model Orchestration
4
+
5
+ This is Penny's brain loader. She manages multiple specialized models:
6
+ - Gemma 7B for conversational reasoning
7
+ - NLLB-200 for 27-language translation
8
+ - Sentiment analysis for resident wellbeing
9
+ - Bias detection for equitable service
10
+ - LayoutLM for civic document processing
11
+
12
+ MISSION: Load AI models efficiently in memory-constrained environments while
13
+ maintaining Penny's warm, civic-focused personality across all interactions.
14
+
15
+ FEATURES:
16
+ - Lazy loading (models only load when needed)
17
+ - 8-bit quantization for memory efficiency
18
+ - GPU/CPU auto-detection
19
+ - Model caching and reuse
20
+ - Graceful fallbacks for Azure ML deployment
21
+ - Memory monitoring and cleanup
22
+ """
23
+
24
+ import json
25
+ import os
26
+ import torch
27
+ from typing import Dict, Any, Callable, Optional, Union, List
28
+ from pathlib import Path
29
+ import logging
30
+ from dataclasses import dataclass
31
+ from enum import Enum
32
+ from datetime import datetime
33
+
34
+ # --- LOGGING SETUP (Must be before functions that use it) ---
35
+ logger = logging.getLogger(__name__)
36
+
37
+ # ============================================================
38
+ # HUGGING FACE AUTHENTICATION
39
+ # ============================================================
40
+
41
+ def setup_huggingface_auth() -> bool:
42
+ """
43
+ 🔐 Authenticates with Hugging Face Hub using HF_TOKEN or READTOKEN.
44
+
45
+ Returns:
46
+ True if authentication successful or not needed, False if failed
47
+ """
48
+ # Check for HF_TOKEN first, then READTOKEN (for Hugging Face Spaces)
49
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("READTOKEN")
50
+
51
+ if not HF_TOKEN:
52
+ logger.warning("⚠️ HF_TOKEN/READTOKEN not found in environment")
53
+ logger.warning(" Some models may not be accessible")
54
+ logger.warning(" Set HF_TOKEN or READTOKEN in your environment or Hugging Face Spaces secrets")
55
+ return False
56
+
57
+ try:
58
+ from huggingface_hub import login
59
+ login(token=HF_TOKEN, add_to_git_credential=False)
60
+ logger.info("✅ Authenticated with Hugging Face Hub")
61
+ return True
62
+ except ImportError:
63
+ logger.warning("⚠️ huggingface_hub not installed, skipping authentication")
64
+ return False
65
+ except Exception as e:
66
+ logger.error(f"❌ Failed to authenticate with Hugging Face: {e}")
67
+ return False
68
+
69
+ # Attempt authentication at module load
70
+ # Note: This runs when the module is imported, so HF_TOKEN must be in environment
71
+ # For Hugging Face Spaces: Set HF_TOKEN as a secret in Space settings
72
+ # For local dev: Add HF_TOKEN to .env file or export it
73
+ _authentication_result = setup_huggingface_auth()
74
+ if _authentication_result:
75
+ logger.info("🔐 Hugging Face authentication successful - gated models accessible")
76
+ else:
77
+ logger.warning("⚠️ Hugging Face authentication failed - only public models will work")
78
+
79
+ # --- PATH CONFIGURATION (Environment-Aware) ---
80
+ # Support both local development and Azure ML deployment
81
+ if os.getenv("AZUREML_MODEL_DIR"):
82
+ # Azure ML deployment - models are in AZUREML_MODEL_DIR
83
+ MODEL_ROOT = Path(os.getenv("AZUREML_MODEL_DIR"))
84
+ CONFIG_PATH = MODEL_ROOT / "model_config.json"
85
+ logger.info("☁️ Running in Azure ML environment")
86
+ else:
87
+ # Local development - models are in project structure
88
+ PROJECT_ROOT = Path(__file__).parent.parent
89
+ MODEL_ROOT = PROJECT_ROOT / "models"
90
+ CONFIG_PATH = MODEL_ROOT / "model_config.json"
91
+ logger.info("💻 Running in local development environment")
92
+
93
+ logger.info(f"📂 Model config path: {CONFIG_PATH}")
94
+
95
+ # ============================================================
96
+ # PENNY'S CIVIC IDENTITY & PERSONALITY
97
+ # ============================================================
98
+
99
+ PENNY_SYSTEM_PROMPT = (
100
+ "You are Penny, a smart, civic-focused AI assistant serving local communities. "
101
+ "You help residents navigate city services, government programs, and community resources. "
102
+ "You're warm, professional, accurate, and always stay within your civic mission.\n\n"
103
+
104
+ "Your expertise includes:\n"
105
+ "- Connecting people with local services (food banks, shelters, libraries)\n"
106
+ "- Translating information into 27 languages\n"
107
+ "- Explaining public programs and eligibility\n"
108
+ "- Guiding residents through civic processes\n"
109
+ "- Providing emergency resources when needed\n\n"
110
+
111
+ "YOUR PERSONALITY:\n"
112
+ "- Warm and approachable, like a helpful community center staff member\n"
113
+ "- Clear and practical, avoiding jargon\n"
114
+ "- Culturally sensitive and inclusive\n"
115
+ "- Patient with repetition or clarification\n"
116
+ "- Funny when appropriate, but never at anyone's expense\n\n"
117
+
118
+ "CRITICAL RULES:\n"
119
+ "- When residents greet you by name (e.g., 'Hi Penny'), respond warmly and personally\n"
120
+ "- You are ALWAYS Penny - never ChatGPT, Assistant, Claude, or any other name\n"
121
+ "- If you don't know something, say so clearly and help find the right resource\n"
122
+ "- NEVER make up information about services, eligibility, or contacts\n"
123
+ "- Stay within your civic mission - you don't provide legal, medical, or financial advice\n"
124
+ "- For emergencies, immediately connect to appropriate services (911, crisis lines)\n\n"
125
+ )
126
+
127
+ # --- GLOBAL STATE ---
128
+ _MODEL_CACHE: Dict[str, Any] = {} # Memory-efficient model reuse
129
+ _LOAD_TIMES: Dict[str, float] = {} # Track model loading performance
130
+
131
+
132
+ # ============================================================
133
+ # DEVICE MANAGEMENT
134
+ # ============================================================
135
+
136
+ class DeviceType(str, Enum):
137
+ """Supported compute devices."""
138
+ CUDA = "cuda"
139
+ CPU = "cpu"
140
+ MPS = "mps" # Apple Silicon
141
+
142
+
143
+ def get_optimal_device() -> str:
144
+ """
145
+ 🎮 Determines the best device for model inference.
146
+
147
+ Priority:
148
+ 1. CUDA GPU (NVIDIA)
149
+ 2. MPS (Apple Silicon)
150
+ 3. CPU (fallback)
151
+
152
+ Returns:
153
+ Device string ("cuda", "mps", or "cpu")
154
+ """
155
+ if torch.cuda.is_available():
156
+ device = DeviceType.CUDA.value
157
+ gpu_name = torch.cuda.get_device_name(0)
158
+ gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
159
+ logger.info(f"🎮 GPU detected: {gpu_name} ({gpu_memory:.1f}GB)")
160
+ return device
161
+
162
+ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
163
+ device = DeviceType.MPS.value
164
+ logger.info("🍎 Apple Silicon (MPS) detected")
165
+ return device
166
+
167
+ else:
168
+ device = DeviceType.CPU.value
169
+ logger.info("💻 Using CPU for inference")
170
+ logger.warning("⚠️ GPU not available - inference will be slower")
171
+ return device
172
+
173
+
174
+ def get_memory_stats() -> Dict[str, float]:
175
+ """
176
+ 📊 Returns current GPU/CPU memory statistics.
177
+
178
+ Returns:
179
+ Dict with memory stats in GB
180
+ """
181
+ stats = {}
182
+
183
+ if torch.cuda.is_available():
184
+ stats["gpu_allocated_gb"] = torch.cuda.memory_allocated() / 1e9
185
+ stats["gpu_reserved_gb"] = torch.cuda.memory_reserved() / 1e9
186
+ stats["gpu_total_gb"] = torch.cuda.get_device_properties(0).total_memory / 1e9
187
+
188
+ # CPU memory (requires psutil)
189
+ try:
190
+ import psutil
191
+ mem = psutil.virtual_memory()
192
+ stats["cpu_used_gb"] = mem.used / 1e9
193
+ stats["cpu_total_gb"] = mem.total / 1e9
194
+ stats["cpu_percent"] = mem.percent
195
+ except ImportError:
196
+ pass
197
+
198
+ return stats
199
+
200
+
201
+ # ============================================================
202
+ # MODEL CLIENT (Individual Model Handler)
203
+ # ============================================================
204
+
205
+ @dataclass
206
+ class ModelMetadata:
207
+ """
208
+ 📋 Metadata about a loaded model.
209
+ Tracks performance and resource usage.
210
+ """
211
+ name: str
212
+ task: str
213
+ model_name: str
214
+ device: str
215
+ loaded_at: Optional[datetime] = None
216
+ load_time_seconds: Optional[float] = None
217
+ memory_usage_gb: Optional[float] = None
218
+ inference_count: int = 0
219
+ total_inference_time_ms: float = 0.0
220
+
221
+ @property
222
+ def avg_inference_time_ms(self) -> float:
223
+ """Calculate average inference time."""
224
+ if self.inference_count == 0:
225
+ return 0.0
226
+ return self.total_inference_time_ms / self.inference_count
227
+
228
+
229
+ class ModelClient:
230
+ """
231
+ 🤖 Manages a single HuggingFace model with optimized loading and inference.
232
+
233
+ Features:
234
+ - Lazy loading (load on first use)
235
+ - Memory optimization (8-bit quantization)
236
+ - Performance tracking
237
+ - Graceful error handling
238
+ - Automatic device placement
239
+ """
240
+
241
+ def __init__(
242
+ self,
243
+ name: str,
244
+ model_name: str,
245
+ task: str,
246
+ device: str = None,
247
+ config: Optional[Dict[str, Any]] = None
248
+ ):
249
+ """
250
+ Initialize model client (doesn't load the model yet).
251
+
252
+ Args:
253
+ name: Model identifier (e.g., "penny-core-agent")
254
+ model_name: HuggingFace model ID
255
+ task: Task type (text-generation, translation, etc.)
256
+ device: Target device (auto-detected if None)
257
+ config: Additional model configuration
258
+ """
259
+ self.name = name
260
+ self.model_name = model_name
261
+ self.task = task
262
+ self.device = device or get_optimal_device()
263
+ self.config = config or {}
264
+ self.pipeline = None
265
+ self._load_attempted = False
266
+ self.metadata = ModelMetadata(
267
+ name=name,
268
+ task=task,
269
+ model_name=model_name,
270
+ device=self.device
271
+ )
272
+
273
+ logger.info(f"📦 Initialized ModelClient: {name}")
274
+ logger.debug(f" Model: {model_name}")
275
+ logger.debug(f" Task: {task}")
276
+ logger.debug(f" Device: {self.device}")
277
+
278
+ def load_pipeline(self) -> bool:
279
+ """
280
+ 🔄 Loads the HuggingFace pipeline with Azure-optimized settings.
281
+
282
+ Features:
283
+ - 8-bit quantization for large models (saves ~50% memory)
284
+ - Automatic device placement
285
+ - Memory monitoring
286
+ - Cache checking
287
+
288
+ Returns:
289
+ True if successful, False otherwise
290
+ """
291
+ if self.pipeline is not None:
292
+ logger.debug(f"✅ {self.name} already loaded")
293
+ return True
294
+
295
+ if self._load_attempted:
296
+ logger.warning(f"⚠️ Previous load attempt failed for {self.name}")
297
+ return False
298
+
299
+ global _MODEL_CACHE, _LOAD_TIMES
300
+
301
+ # Check cache first
302
+ if self.name in _MODEL_CACHE:
303
+ logger.info(f"♻️ Using cached pipeline for {self.name}")
304
+ self.pipeline = _MODEL_CACHE[self.name]
305
+ return True
306
+
307
+ logger.info(f"🔄 Loading {self.name} from HuggingFace...")
308
+ self._load_attempted = True
309
+
310
+ start_time = datetime.now()
311
+
312
+ try:
313
+ # Import pipeline from transformers (lazy import to avoid dependency issues)
314
+ from transformers import pipeline
315
+
316
+ # === TEXT GENERATION (Gemma 7B, GPT-2, etc.) ===
317
+ if self.task == "text-generation":
318
+ logger.info(" Using 8-bit quantization for memory efficiency...")
319
+
320
+ # Check if model supports 8-bit loading
321
+ use_8bit = self.device == DeviceType.CUDA.value
322
+
323
+ if use_8bit:
324
+ self.pipeline = pipeline(
325
+ "text-generation",
326
+ model=self.model_name,
327
+ tokenizer=self.model_name,
328
+ device_map="auto",
329
+ load_in_8bit=True, # Reduces ~14GB to ~7GB
330
+ trust_remote_code=True,
331
+ torch_dtype=torch.float16
332
+ )
333
+ else:
334
+ # CPU fallback
335
+ self.pipeline = pipeline(
336
+ "text-generation",
337
+ model=self.model_name,
338
+ tokenizer=self.model_name,
339
+ device=-1, # CPU
340
+ trust_remote_code=True,
341
+ torch_dtype=torch.float32
342
+ )
343
+
344
+ # === TRANSLATION (NLLB-200, M2M-100, etc.) ===
345
+ elif self.task == "translation":
346
+ self.pipeline = pipeline(
347
+ "translation",
348
+ model=self.model_name,
349
+ device=0 if self.device == DeviceType.CUDA.value else -1,
350
+ src_lang=self.config.get("default_src_lang", "eng_Latn"),
351
+ tgt_lang=self.config.get("default_tgt_lang", "spa_Latn")
352
+ )
353
+
354
+ # === SENTIMENT ANALYSIS ===
355
+ elif self.task == "sentiment-analysis":
356
+ self.pipeline = pipeline(
357
+ "sentiment-analysis",
358
+ model=self.model_name,
359
+ device=0 if self.device == DeviceType.CUDA.value else -1,
360
+ truncation=True,
361
+ max_length=512
362
+ )
363
+
364
+ # === BIAS DETECTION (Zero-Shot Classification) ===
365
+ elif self.task == "bias-detection":
366
+ self.pipeline = pipeline(
367
+ "zero-shot-classification",
368
+ model=self.model_name,
369
+ device=0 if self.device == DeviceType.CUDA.value else -1
370
+ )
371
+
372
+ # === TEXT CLASSIFICATION (Generic) ===
373
+ elif self.task == "text-classification":
374
+ self.pipeline = pipeline(
375
+ "text-classification",
376
+ model=self.model_name,
377
+ device=0 if self.device == DeviceType.CUDA.value else -1,
378
+ truncation=True
379
+ )
380
+
381
+ # === PDF/DOCUMENT EXTRACTION (LayoutLMv3) ===
382
+ elif self.task == "pdf-extraction":
383
+ logger.warning("⚠️ PDF extraction requires additional OCR setup")
384
+ logger.info(" Consider using Azure Form Recognizer as alternative")
385
+ # Placeholder - requires pytesseract/OCR infrastructure
386
+ self.pipeline = None
387
+ return False
388
+
389
+ else:
390
+ raise ValueError(f"Unknown task type: {self.task}")
391
+
392
+ # === SUCCESS HANDLING ===
393
+ if self.pipeline is not None:
394
+ # Calculate load time
395
+ load_time = (datetime.now() - start_time).total_seconds()
396
+ self.metadata.loaded_at = datetime.now()
397
+ self.metadata.load_time_seconds = load_time
398
+
399
+ # Cache the pipeline
400
+ _MODEL_CACHE[self.name] = self.pipeline
401
+ _LOAD_TIMES[self.name] = load_time
402
+
403
+ # Log memory usage
404
+ mem_stats = get_memory_stats()
405
+ self.metadata.memory_usage_gb = mem_stats.get("gpu_allocated_gb", 0)
406
+
407
+ logger.info(f"✅ {self.name} loaded successfully!")
408
+ logger.info(f" Load time: {load_time:.2f}s")
409
+
410
+ if "gpu_allocated_gb" in mem_stats:
411
+ logger.info(
412
+ f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB / "
413
+ f"{mem_stats['gpu_total_gb']:.2f}GB"
414
+ )
415
+
416
+ return True
417
+
418
+ except Exception as e:
419
+ logger.error(f"❌ Failed to load {self.name}: {e}", exc_info=True)
420
+ self.pipeline = None
421
+ return False
422
+
423
+ def predict(
424
+ self,
425
+ input_data: Union[str, Dict[str, Any]],
426
+ **kwargs
427
+ ) -> Dict[str, Any]:
428
+ """
429
+ 🎯 Runs inference with the loaded model pipeline.
430
+
431
+ Features:
432
+ - Automatic pipeline loading
433
+ - Error handling with fallback responses
434
+ - Performance tracking
435
+ - Penny's personality injection (for text-generation)
436
+
437
+ Args:
438
+ input_data: Text or structured input for the model
439
+ **kwargs: Task-specific parameters
440
+
441
+ Returns:
442
+ Model output dict with results or error information
443
+ """
444
+ # Track inference start time
445
+ start_time = datetime.now()
446
+
447
+ # Ensure pipeline is loaded
448
+ if self.pipeline is None:
449
+ success = self.load_pipeline()
450
+ if not success:
451
+ return {
452
+ "error": f"{self.name} pipeline unavailable",
453
+ "detail": "Model failed to load. Check logs for details.",
454
+ "model": self.name
455
+ }
456
+
457
+ try:
458
+ # === TEXT GENERATION ===
459
+ if self.task == "text-generation":
460
+ # Inject Penny's civic identity
461
+ if not kwargs.get("skip_system_prompt", False):
462
+ full_prompt = PENNY_SYSTEM_PROMPT + input_data
463
+ else:
464
+ full_prompt = input_data
465
+
466
+ # Extract generation parameters with safe defaults
467
+ max_new_tokens = kwargs.get("max_new_tokens", 256)
468
+ temperature = kwargs.get("temperature", 0.7)
469
+ top_p = kwargs.get("top_p", 0.9)
470
+ do_sample = kwargs.get("do_sample", temperature > 0.0)
471
+
472
+ result = self.pipeline(
473
+ full_prompt,
474
+ max_new_tokens=max_new_tokens,
475
+ temperature=temperature,
476
+ top_p=top_p,
477
+ do_sample=do_sample,
478
+ return_full_text=False,
479
+ pad_token_id=self.pipeline.tokenizer.eos_token_id,
480
+ truncation=True
481
+ )
482
+
483
+ output = {
484
+ "generated_text": result[0]["generated_text"],
485
+ "model": self.name,
486
+ "success": True
487
+ }
488
+
489
+ # === TRANSLATION ===
490
+ elif self.task == "translation":
491
+ src_lang = kwargs.get("source_lang", "eng_Latn")
492
+ tgt_lang = kwargs.get("target_lang", "spa_Latn")
493
+
494
+ result = self.pipeline(
495
+ input_data,
496
+ src_lang=src_lang,
497
+ tgt_lang=tgt_lang,
498
+ max_length=512
499
+ )
500
+
501
+ output = {
502
+ "translation": result[0]["translation_text"],
503
+ "source_lang": src_lang,
504
+ "target_lang": tgt_lang,
505
+ "model": self.name,
506
+ "success": True
507
+ }
508
+
509
+ # === SENTIMENT ANALYSIS ===
510
+ elif self.task == "sentiment-analysis":
511
+ result = self.pipeline(input_data)
512
+
513
+ output = {
514
+ "sentiment": result[0]["label"],
515
+ "confidence": result[0]["score"],
516
+ "model": self.name,
517
+ "success": True
518
+ }
519
+
520
+ # === BIAS DETECTION ===
521
+ elif self.task == "bias-detection":
522
+ candidate_labels = kwargs.get("candidate_labels", [
523
+ "neutral and objective",
524
+ "contains political bias",
525
+ "uses emotional language",
526
+ "culturally insensitive"
527
+ ])
528
+
529
+ result = self.pipeline(
530
+ input_data,
531
+ candidate_labels=candidate_labels,
532
+ multi_label=True
533
+ )
534
+
535
+ output = {
536
+ "labels": result["labels"],
537
+ "scores": result["scores"],
538
+ "model": self.name,
539
+ "success": True
540
+ }
541
+
542
+ # === TEXT CLASSIFICATION ===
543
+ elif self.task == "text-classification":
544
+ result = self.pipeline(input_data)
545
+
546
+ output = {
547
+ "label": result[0]["label"],
548
+ "confidence": result[0]["score"],
549
+ "model": self.name,
550
+ "success": True
551
+ }
552
+
553
+ else:
554
+ output = {
555
+ "error": f"Task '{self.task}' not implemented",
556
+ "model": self.name,
557
+ "success": False
558
+ }
559
+
560
+ # Track performance
561
+ inference_time = (datetime.now() - start_time).total_seconds() * 1000
562
+ self.metadata.inference_count += 1
563
+ self.metadata.total_inference_time_ms += inference_time
564
+ output["inference_time_ms"] = round(inference_time, 2)
565
+
566
+ return output
567
+
568
+ except Exception as e:
569
+ logger.error(f"❌ Inference error in {self.name}: {e}", exc_info=True)
570
+ return {
571
+ "error": "Inference failed",
572
+ "detail": str(e),
573
+ "model": self.name,
574
+ "success": False
575
+ }
576
+
577
+ def unload(self) -> None:
578
+ """
579
+ 🗑️ Unloads the model to free memory.
580
+ Critical for Azure environments with limited resources.
581
+ """
582
+ if self.pipeline is not None:
583
+ logger.info(f"🗑️ Unloading {self.name}...")
584
+
585
+ # Delete pipeline
586
+ del self.pipeline
587
+ self.pipeline = None
588
+
589
+ # Remove from cache
590
+ if self.name in _MODEL_CACHE:
591
+ del _MODEL_CACHE[self.name]
592
+
593
+ # Force GPU memory release
594
+ if torch.cuda.is_available():
595
+ torch.cuda.empty_cache()
596
+
597
+ logger.info(f"✅ {self.name} unloaded successfully")
598
+
599
+ # Log memory stats after unload
600
+ mem_stats = get_memory_stats()
601
+ if "gpu_allocated_gb" in mem_stats:
602
+ logger.info(f" GPU Memory: {mem_stats['gpu_allocated_gb']:.2f}GB remaining")
603
+
604
+ def get_metadata(self) -> Dict[str, Any]:
605
+ """
606
+ 📊 Returns model metadata and performance stats.
607
+ """
608
+ return {
609
+ "name": self.metadata.name,
610
+ "task": self.metadata.task,
611
+ "model_name": self.metadata.model_name,
612
+ "device": self.metadata.device,
613
+ "loaded": self.pipeline is not None,
614
+ "loaded_at": self.metadata.loaded_at.isoformat() if self.metadata.loaded_at else None,
615
+ "load_time_seconds": self.metadata.load_time_seconds,
616
+ "memory_usage_gb": self.metadata.memory_usage_gb,
617
+ "inference_count": self.metadata.inference_count,
618
+ "avg_inference_time_ms": round(self.metadata.avg_inference_time_ms, 2)
619
+ }
620
+
621
+
622
+ # ============================================================
623
+ # MODEL LOADER (Singleton Manager)
624
+ # ============================================================
625
+
626
+ class ModelLoader:
627
+ """
628
+ 🎛️ Singleton manager for all Penny's specialized models.
629
+
630
+ Features:
631
+ - Centralized model configuration
632
+ - Lazy loading (models only load when needed)
633
+ - Memory management
634
+ - Health monitoring
635
+ - Unified access interface
636
+ """
637
+
638
+ _instance: Optional['ModelLoader'] = None
639
+
640
+ def __new__(cls, *args, **kwargs):
641
+ """Singleton pattern - only one ModelLoader instance."""
642
+ if cls._instance is None:
643
+ cls._instance = super(ModelLoader, cls).__new__(cls)
644
+ return cls._instance
645
+
646
+ def __init__(self, config_path: Optional[str] = None):
647
+ """
648
+ Initialize ModelLoader (only runs once due to singleton).
649
+
650
+ Args:
651
+ config_path: Path to model_config.json (optional)
652
+ """
653
+ if not hasattr(self, '_models_loaded'):
654
+ self.models: Dict[str, ModelClient] = {}
655
+ self._models_loaded = True
656
+ self._initialization_time = datetime.now()
657
+
658
+ # Use provided path or default
659
+ config_file = Path(config_path) if config_path else CONFIG_PATH
660
+
661
+ try:
662
+ logger.info(f"📖 Loading model configuration from {config_file}")
663
+
664
+ if not config_file.exists():
665
+ logger.warning(f"⚠️ Configuration file not found: {config_file}")
666
+ logger.info(" Create model_config.json with your model definitions")
667
+ return
668
+
669
+ with open(config_file, "r") as f:
670
+ config = json.load(f)
671
+
672
+ # Initialize ModelClients (doesn't load models yet)
673
+ for model_id, model_info in config.items():
674
+ self.models[model_id] = ModelClient(
675
+ name=model_id,
676
+ model_name=model_info["model_name"],
677
+ task=model_info["task"],
678
+ config=model_info.get("config", {})
679
+ )
680
+
681
+ logger.info(f"✅ ModelLoader initialized with {len(self.models)} models:")
682
+ for model_id in self.models.keys():
683
+ logger.info(f" - {model_id}")
684
+
685
+ except json.JSONDecodeError as e:
686
+ logger.error(f"❌ Invalid JSON in model_config.json: {e}")
687
+ except Exception as e:
688
+ logger.error(f"❌ Failed to initialize ModelLoader: {e}", exc_info=True)
689
+
690
+ def get(self, model_id: str) -> Optional[ModelClient]:
691
+ """
692
+ 🎯 Retrieves a configured ModelClient by ID.
693
+
694
+ Args:
695
+ model_id: Model identifier from config
696
+
697
+ Returns:
698
+ ModelClient instance or None if not found
699
+ """
700
+ return self.models.get(model_id)
701
+
702
+ def list_models(self) -> List[str]:
703
+ """📋 Returns list of all available model IDs."""
704
+ return list(self.models.keys())
705
+
706
+ def get_loaded_models(self) -> List[str]:
707
+ """📋 Returns list of currently loaded model IDs."""
708
+ return [
709
+ model_id
710
+ for model_id, client in self.models.items()
711
+ if client.pipeline is not None
712
+ ]
713
+
714
+ def unload_all(self) -> None:
715
+ """
716
+ 🗑️ Unloads all models to free memory.
717
+ Useful for Azure environments when switching workloads.
718
+ """
719
+ logger.info("🗑️ Unloading all models...")
720
+ for model_client in self.models.values():
721
+ model_client.unload()
722
+ logger.info("✅ All models unloaded")
723
+
724
+ def get_status(self) -> Dict[str, Any]:
725
+ """
726
+ 📊 Returns comprehensive status of all models.
727
+ Useful for health checks and monitoring.
728
+ """
729
+ status = {
730
+ "initialization_time": self._initialization_time.isoformat(),
731
+ "total_models": len(self.models),
732
+ "loaded_models": len(self.get_loaded_models()),
733
+ "device": get_optimal_device(),
734
+ "memory": get_memory_stats(),
735
+ "models": {}
736
+ }
737
+
738
+ for model_id, client in self.models.items():
739
+ status["models"][model_id] = client.get_metadata()
740
+
741
+ return status
742
+
743
+
744
+ # ============================================================
745
+ # PUBLIC INTERFACE (Used by all *_utils.py modules)
746
+ # ============================================================
747
+
748
+ def load_model_pipeline(agent_name: str) -> Callable[..., Dict[str, Any]]:
749
+ """
750
+ 🚀 Loads a model client and returns its inference function.
751
+
752
+ This is the main function used by other modules (translation_utils.py,
753
+ sentiment_utils.py, etc.) to access Penny's models.
754
+
755
+ Args:
756
+ agent_name: Model ID from model_config.json
757
+
758
+ Returns:
759
+ Callable inference function
760
+
761
+ Raises:
762
+ ValueError: If agent_name not found in configuration
763
+
764
+ Example:
765
+ >>> translator = load_model_pipeline("penny-translate-agent")
766
+ >>> result = translator("Hello world", target_lang="spa_Latn")
767
+ """
768
+ loader = ModelLoader()
769
+ client = loader.get(agent_name)
770
+
771
+ if client is None:
772
+ available = loader.list_models()
773
+ raise ValueError(
774
+ f"Agent ID '{agent_name}' not found in model configuration. "
775
+ f"Available models: {available}"
776
+ )
777
+
778
+ # Load the pipeline (lazy loading)
779
+ client.load_pipeline()
780
+
781
+ # Return a callable wrapper
782
+ def inference_wrapper(input_data, **kwargs):
783
+ return client.predict(input_data, **kwargs)
784
+
785
+ return inference_wrapper
786
+
787
+
788
+ # === CONVENIENCE FUNCTIONS ===
789
+
790
+ def get_model_status() -> Dict[str, Any]:
791
+ """
792
+ 📊 Returns status of all configured models.
793
+ Useful for health checks and monitoring endpoints.
794
+ """
795
+ loader = ModelLoader()
796
+ return loader.get_status()
797
+
798
+
799
+ def preload_models(model_ids: Optional[List[str]] = None) -> None:
800
+ """
801
+ 🚀 Preloads specified models during startup.
802
+
803
+ Args:
804
+ model_ids: List of model IDs to preload (None = all models)
805
+ """
806
+ loader = ModelLoader()
807
+
808
+ if model_ids is None:
809
+ model_ids = loader.list_models()
810
+
811
+ logger.info(f"🚀 Preloading {len(model_ids)} models...")
812
+
813
+ for model_id in model_ids:
814
+ client = loader.get(model_id)
815
+ if client:
816
+ logger.info(f" Loading {model_id}...")
817
+ client.load_pipeline()
818
+
819
+ logger.info("✅ Model preloading complete")
820
+
821
+
822
+ def initialize_model_system() -> bool:
823
+ """
824
+ 🏁 Initializes the model system.
825
+ Should be called during app startup.
826
+
827
+ Returns:
828
+ True if initialization successful
829
+ """
830
+ logger.info("🧠 Initializing Penny's model system...")
831
+
832
+ try:
833
+ # Initialize singleton
834
+ loader = ModelLoader()
835
+
836
+ # Log device info
837
+ device = get_optimal_device()
838
+ mem_stats = get_memory_stats()
839
+
840
+ logger.info(f"✅ Model system initialized")
841
+ logger.info(f"🎮 Compute device: {device}")
842
+
843
+ if "gpu_total_gb" in mem_stats:
844
+ logger.info(
845
+ f"💾 GPU Memory: {mem_stats['gpu_total_gb']:.1f}GB total"
846
+ )
847
+
848
+ logger.info(f"📦 {len(loader.models)} models configured")
849
+
850
+ # Optional: Preload critical models
851
+ # Uncomment to preload models at startup
852
+ # preload_models(["penny-core-agent"])
853
+
854
+ return True
855
+
856
+ except Exception as e:
857
+ logger.error(f"❌ Failed to initialize model system: {e}", exc_info=True)
858
+ return False
859
+
860
+
861
+ # ============================================================
862
+ # CLI TESTING & DEBUGGING
863
+ # ============================================================
864
+
865
+ if __name__ == "__main__":
866
+ """
867
+ 🧪 Test script for model loading and inference.
868
+ Run with: python -m app.model_loader
869
+ """
870
+ print("=" * 60)
871
+ print("🧪 Testing Penny's Model System")
872
+ print("=" * 60)
873
+
874
+ # Initialize
875
+ loader = ModelLoader()
876
+ print(f"\n📋 Available models: {loader.list_models()}")
877
+
878
+ # Get status
879
+ status = get_model_status()
880
+ print(f"\n📊 System status:")
881
+ print(json.dumps(status, indent=2, default=str))
882
+
883
+ # Test model loading (if models configured)
884
+ if loader.models:
885
+ test_model_id = list(loader.models.keys())[0]
886
+ print(f"\n🧪 Testing model: {test_model_id}")
887
+
888
+ client = loader.get(test_model_id)
889
+ if client:
890
+ print(f" Loading pipeline...")
891
+ success = client.load_pipeline()
892
+
893
+ if success:
894
+ print(f" ✅ Model loaded successfully!")
895
+ print(f" Metadata: {json.dumps(client.get_metadata(), indent=2, default=str)}")
896
+ else:
897
+ print(f" ❌ Model loading failed")
app/orchestrator.py ADDED
@@ -0,0 +1,1410 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🎭 PENNY Orchestrator - Request Routing & Coordination Engine
3
+
4
+ This is Penny's decision-making brain. She analyzes each request, determines
5
+ the best way to help, and coordinates between her specialized AI models and
6
+ civic data tools.
7
+
8
+ MISSION: Route every resident request to the right resource while maintaining
9
+ Penny's warm, helpful personality and ensuring fast, accurate responses.
10
+
11
+ FEATURES:
12
+ - Enhanced intent classification with confidence scoring
13
+ - Compound intent handling (weather + events)
14
+ - Graceful fallbacks when services are unavailable
15
+ - Performance tracking for all operations
16
+ - Context-aware responses
17
+ - Emergency routing with immediate escalation
18
+
19
+ ENHANCEMENTS (Phase 1):
20
+ - ✅ Structured logging with performance tracking
21
+ - ✅ Safe imports with availability flags
22
+ - ✅ Result format checking helper
23
+ - ✅ Enhanced error handling patterns
24
+ - ✅ Service availability tracking
25
+ - ✅ Fixed function signature mismatches
26
+ - ✅ Integration with enhanced modules
27
+ """
28
+
29
+ import logging
30
+ import time
31
+ from typing import Dict, Any, Optional, List, Tuple
32
+ from datetime import datetime
33
+ from dataclasses import dataclass, field
34
+ from enum import Enum
35
+
36
+ # --- ENHANCED MODULE IMPORTS ---
37
+ from app.intents import classify_intent_detailed, IntentType, IntentMatch
38
+ from app.location_utils import (
39
+ extract_location_detailed,
40
+ LocationMatch,
41
+ LocationStatus,
42
+ get_city_coordinates
43
+ )
44
+ from app.logging_utils import (
45
+ log_interaction,
46
+ sanitize_for_logging,
47
+ LogLevel
48
+ )
49
+
50
+ # --- AGENT IMPORTS (with availability tracking) ---
51
+ try:
52
+ from app.weather_agent import (
53
+ get_weather_for_location,
54
+ recommend_outfit,
55
+ weather_to_event_recommendations,
56
+ format_weather_summary
57
+ )
58
+ WEATHER_AGENT_AVAILABLE = True
59
+ except ImportError as e:
60
+ logger = logging.getLogger(__name__)
61
+ logger.warning(f"Weather agent not available: {e}")
62
+ WEATHER_AGENT_AVAILABLE = False
63
+
64
+ try:
65
+ from app.event_weather import get_event_recommendations_with_weather
66
+ EVENT_WEATHER_AVAILABLE = True
67
+ except ImportError as e:
68
+ logger = logging.getLogger(__name__)
69
+ logger.warning(f"Event weather integration not available: {e}")
70
+ EVENT_WEATHER_AVAILABLE = False
71
+
72
+ try:
73
+ from app.tool_agent import handle_tool_request
74
+ TOOL_AGENT_AVAILABLE = True
75
+ except ImportError as e:
76
+ logger = logging.getLogger(__name__)
77
+ logger.warning(f"Tool agent not available: {e}")
78
+ TOOL_AGENT_AVAILABLE = False
79
+
80
+ # --- MODEL IMPORTS (with availability tracking) ---
81
+ try:
82
+ from models.translation.translation_utils import translate_text
83
+ TRANSLATION_AVAILABLE = True
84
+ except ImportError as e:
85
+ logger = logging.getLogger(__name__)
86
+ logger.warning(f"Translation service not available: {e}")
87
+ TRANSLATION_AVAILABLE = False
88
+
89
+ try:
90
+ from models.sentiment.sentiment_utils import get_sentiment_analysis
91
+ SENTIMENT_AVAILABLE = True
92
+ except ImportError as e:
93
+ logger = logging.getLogger(__name__)
94
+ logger.warning(f"Sentiment service not available: {e}")
95
+ SENTIMENT_AVAILABLE = False
96
+
97
+ try:
98
+ from models.bias.bias_utils import check_bias
99
+ BIAS_AVAILABLE = True
100
+ except ImportError as e:
101
+ logger = logging.getLogger(__name__)
102
+ logger.warning(f"Bias detection service not available: {e}")
103
+ BIAS_AVAILABLE = False
104
+
105
+ try:
106
+ from models.gemma.gemma_utils import generate_response
107
+ LLM_AVAILABLE = True
108
+ except ImportError as e:
109
+ logger = logging.getLogger(__name__)
110
+ logger.warning(f"LLM service not available: {e}")
111
+ LLM_AVAILABLE = False
112
+
113
+ # --- LOGGING SETUP ---
114
+ logger = logging.getLogger(__name__)
115
+
116
+ # --- CONFIGURATION ---
117
+ CORE_MODEL_ID = "penny-core-agent"
118
+ MAX_RESPONSE_TIME_MS = 5000 # 5 seconds - log if exceeded
119
+
120
+ # --- TRACKING COUNTERS ---
121
+ _orchestration_count = 0
122
+ _emergency_count = 0
123
+
124
+
125
+ # ============================================================
126
+ # COMPATIBILITY HELPER - Result Format Checking
127
+ # ============================================================
128
+
129
+ def _check_result_success(
130
+ result: Dict[str, Any],
131
+ expected_keys: List[str]
132
+ ) -> Tuple[bool, Optional[str]]:
133
+ """
134
+ ✅ Check if a utility function result indicates success.
135
+
136
+ Handles multiple return format patterns:
137
+ - Explicit "success" key (preferred)
138
+ - Presence of expected data keys (implicit success)
139
+ - Presence of "error" key (explicit failure)
140
+
141
+ This helper fixes compatibility issues where different utility
142
+ functions return different result formats.
143
+
144
+ Args:
145
+ result: Dictionary returned from utility function
146
+ expected_keys: List of keys that indicate successful data
147
+
148
+ Returns:
149
+ Tuple of (is_success, error_message)
150
+
151
+ Example:
152
+ result = await translate_text(message, "en", "es")
153
+ success, error = _check_result_success(result, ["translated_text"])
154
+ if success:
155
+ text = result.get("translated_text")
156
+ """
157
+ # Check for explicit success key
158
+ if "success" in result:
159
+ return result["success"], result.get("error")
160
+
161
+ # Check for explicit error (presence = failure)
162
+ if "error" in result and result["error"]:
163
+ return False, result["error"]
164
+
165
+ # Check for expected data keys (implicit success)
166
+ has_data = any(key in result for key in expected_keys)
167
+ if has_data:
168
+ return True, None
169
+
170
+ # Unknown format - assume failure
171
+ return False, "Unexpected response format"
172
+
173
+
174
+ # ============================================================
175
+ # SERVICE AVAILABILITY CHECK
176
+ # ============================================================
177
+
178
+ def get_service_availability() -> Dict[str, bool]:
179
+ """
180
+ 📊 Returns which services are currently available.
181
+
182
+ Used for health checks, debugging, and deciding whether
183
+ to attempt service calls or use fallbacks.
184
+
185
+ Returns:
186
+ Dictionary mapping service names to availability status
187
+ """
188
+ return {
189
+ "translation": TRANSLATION_AVAILABLE,
190
+ "sentiment": SENTIMENT_AVAILABLE,
191
+ "bias_detection": BIAS_AVAILABLE,
192
+ "llm": LLM_AVAILABLE,
193
+ "tool_agent": TOOL_AGENT_AVAILABLE,
194
+ "weather": WEATHER_AGENT_AVAILABLE,
195
+ "event_weather": EVENT_WEATHER_AVAILABLE
196
+ }
197
+
198
+
199
+ # ============================================================
200
+ # ORCHESTRATION RESULT STRUCTURE
201
+ # ============================================================
202
+
203
+ @dataclass
204
+ class OrchestrationResult:
205
+ """
206
+ 📦 Structured result from orchestration pipeline.
207
+
208
+ This format is used throughout the system for consistency
209
+ and makes it easy to track what happened during request processing.
210
+ """
211
+ intent: str # Detected intent
212
+ reply: str # User-facing response
213
+ success: bool # Whether request succeeded
214
+ tenant_id: Optional[str] = None # City/location identifier
215
+ data: Optional[Dict[str, Any]] = None # Raw data from services
216
+ model_id: Optional[str] = None # Which model/service was used
217
+ error: Optional[str] = None # Error message if failed
218
+ response_time_ms: Optional[float] = None
219
+ confidence: Optional[float] = None # Intent confidence score
220
+ fallback_used: bool = False # True if fallback logic triggered
221
+
222
+ def to_dict(self) -> Dict[str, Any]:
223
+ """Converts to dictionary for API responses."""
224
+ return {
225
+ "intent": self.intent,
226
+ "reply": self.reply,
227
+ "success": self.success,
228
+ "tenant_id": self.tenant_id,
229
+ "data": self.data,
230
+ "model_id": self.model_id,
231
+ "error": self.error,
232
+ "response_time_ms": self.response_time_ms,
233
+ "confidence": self.confidence,
234
+ "fallback_used": self.fallback_used
235
+ }
236
+
237
+
238
+ # ============================================================
239
+ # MAIN ORCHESTRATOR FUNCTION (ENHANCED)
240
+ # ============================================================
241
+
242
+ async def run_orchestrator(
243
+ message: str,
244
+ context: Dict[str, Any] = None
245
+ ) -> Dict[str, Any]:
246
+ """
247
+ 🧠 Main decision-making brain of Penny.
248
+
249
+ This function:
250
+ 1. Analyzes the user's message to determine intent
251
+ 2. Extracts location/city information
252
+ 3. Routes to the appropriate specialized service
253
+ 4. Handles errors gracefully with helpful fallbacks
254
+ 5. Tracks performance and logs the interaction
255
+
256
+ Args:
257
+ message: User's input text
258
+ context: Additional context (tenant_id, lat, lon, session_id, etc.)
259
+
260
+ Returns:
261
+ Dictionary with response and metadata
262
+
263
+ Example:
264
+ result = await run_orchestrator(
265
+ message="What's the weather in Atlanta?",
266
+ context={"lat": 33.7490, "lon": -84.3880}
267
+ )
268
+ """
269
+ global _orchestration_count
270
+ _orchestration_count += 1
271
+
272
+ start_time = time.time()
273
+
274
+ # Initialize context if not provided
275
+ if context is None:
276
+ context = {}
277
+
278
+ # Sanitize message for logging (PII protection)
279
+ safe_message = sanitize_for_logging(message)
280
+ logger.info(f"🎭 Orchestrator processing: '{safe_message[:50]}...'")
281
+
282
+ try:
283
+ # === STEP 1: CLASSIFY INTENT (Enhanced) ===
284
+ intent_result = classify_intent_detailed(message)
285
+ intent = intent_result.intent
286
+ confidence = intent_result.confidence
287
+
288
+ logger.info(
289
+ f"Intent detected: {intent.value} "
290
+ f"(confidence: {confidence:.2f})"
291
+ )
292
+
293
+ # === STEP 2: EXTRACT LOCATION ===
294
+ tenant_id = context.get("tenant_id")
295
+ lat = context.get("lat")
296
+ lon = context.get("lon")
297
+
298
+ # If tenant_id not provided, try to extract from message
299
+ if not tenant_id or tenant_id == "unknown":
300
+ location_result = extract_location_detailed(message)
301
+
302
+ if location_result.status == LocationStatus.FOUND:
303
+ tenant_id = location_result.tenant_id
304
+ logger.info(f"Location extracted: {tenant_id}")
305
+
306
+ # Get coordinates for this tenant if available
307
+ coords = get_city_coordinates(tenant_id)
308
+ if coords and lat is None and lon is None:
309
+ lat, lon = coords["lat"], coords["lon"]
310
+ logger.info(f"Coordinates loaded: {lat}, {lon}")
311
+
312
+ elif location_result.status == LocationStatus.USER_LOCATION_NEEDED:
313
+ logger.info("User location services needed")
314
+ else:
315
+ logger.info(f"No location detected: {location_result.status}")
316
+
317
+ # === STEP 3: HANDLE EMERGENCY INTENTS (CRITICAL) ===
318
+ if intent == IntentType.EMERGENCY:
319
+ result = await _handle_emergency(
320
+ message=message,
321
+ context=context,
322
+ start_time=start_time
323
+ )
324
+ # Set confidence and metadata before returning
325
+ result.confidence = confidence
326
+ result.tenant_id = tenant_id
327
+ response_time = (time.time() - start_time) * 1000
328
+ result.response_time_ms = round(response_time, 2)
329
+ return result.to_dict()
330
+
331
+ # === STEP 4: ROUTE TO APPROPRIATE HANDLER ===
332
+
333
+ # Translation
334
+ if intent == IntentType.TRANSLATION:
335
+ result = await _handle_translation(message, context)
336
+
337
+ # Sentiment Analysis
338
+ elif intent == IntentType.SENTIMENT_ANALYSIS:
339
+ result = await _handle_sentiment(message, context)
340
+
341
+ # Bias Detection
342
+ elif intent == IntentType.BIAS_DETECTION:
343
+ result = await _handle_bias(message, context)
344
+
345
+ # Document Processing
346
+ elif intent == IntentType.DOCUMENT_PROCESSING:
347
+ result = await _handle_document(message, context)
348
+
349
+ # Weather (includes compound weather+events handling)
350
+ elif intent == IntentType.WEATHER:
351
+ result = await _handle_weather(
352
+ message=message,
353
+ context=context,
354
+ tenant_id=tenant_id,
355
+ lat=lat,
356
+ lon=lon,
357
+ intent_result=intent_result
358
+ )
359
+
360
+ # Events
361
+ elif intent == IntentType.EVENTS:
362
+ result = await _handle_events(
363
+ message=message,
364
+ context=context,
365
+ tenant_id=tenant_id,
366
+ lat=lat,
367
+ lon=lon,
368
+ intent_result=intent_result
369
+ )
370
+
371
+ # Local Resources
372
+ elif intent == IntentType.LOCAL_RESOURCES:
373
+ result = await _handle_local_resources(
374
+ message=message,
375
+ context=context,
376
+ tenant_id=tenant_id,
377
+ lat=lat,
378
+ lon=lon
379
+ )
380
+
381
+ # Greeting, Help, Unknown
382
+ elif intent in [IntentType.GREETING, IntentType.HELP, IntentType.UNKNOWN]:
383
+ result = await _handle_conversational(
384
+ message=message,
385
+ intent=intent,
386
+ context=context
387
+ )
388
+
389
+ else:
390
+ # Unhandled intent type (shouldn't happen, but safety net)
391
+ result = await _handle_fallback(message, intent, context)
392
+
393
+ # === STEP 5: ADD METADATA & LOG INTERACTION ===
394
+ response_time = (time.time() - start_time) * 1000
395
+ result.response_time_ms = round(response_time, 2)
396
+ result.confidence = confidence
397
+ result.tenant_id = tenant_id
398
+
399
+ # Log the interaction with structured logging
400
+ log_interaction(
401
+ tenant_id=tenant_id or "unknown",
402
+ interaction_type="orchestration",
403
+ intent=intent.value,
404
+ response_time_ms=response_time,
405
+ success=result.success,
406
+ metadata={
407
+ "confidence": confidence,
408
+ "fallback_used": result.fallback_used,
409
+ "model_id": result.model_id,
410
+ "orchestration_count": _orchestration_count
411
+ }
412
+ )
413
+
414
+ # Log slow responses
415
+ if response_time > MAX_RESPONSE_TIME_MS:
416
+ logger.warning(
417
+ f"⚠️ Slow response: {response_time:.0f}ms "
418
+ f"(intent: {intent.value})"
419
+ )
420
+
421
+ logger.info(
422
+ f"✅ Orchestration complete: {intent.value} "
423
+ f"({response_time:.0f}ms)"
424
+ )
425
+
426
+ return result.to_dict()
427
+
428
+ except Exception as e:
429
+ # === CATASTROPHIC FAILURE HANDLER ===
430
+ response_time = (time.time() - start_time) * 1000
431
+ logger.error(
432
+ f"❌ Orchestrator error: {e} "
433
+ f"(response_time: {response_time:.0f}ms)",
434
+ exc_info=True
435
+ )
436
+
437
+ # Log failed interaction
438
+ log_interaction(
439
+ tenant_id=context.get("tenant_id", "unknown"),
440
+ interaction_type="orchestration_error",
441
+ intent="error",
442
+ response_time_ms=response_time,
443
+ success=False,
444
+ metadata={
445
+ "error": str(e),
446
+ "error_type": type(e).__name__
447
+ }
448
+ )
449
+
450
+ error_result = OrchestrationResult(
451
+ intent="error",
452
+ reply=(
453
+ "I'm having trouble processing your request right now. "
454
+ "Please try again in a moment, or let me know if you need "
455
+ "immediate assistance! 💛"
456
+ ),
457
+ success=False,
458
+ error=str(e),
459
+ model_id="orchestrator",
460
+ fallback_used=True,
461
+ response_time_ms=round(response_time, 2)
462
+ )
463
+
464
+ return error_result.to_dict()
465
+
466
+
467
+ # ============================================================
468
+ # SPECIALIZED INTENT HANDLERS (ENHANCED)
469
+ # ============================================================
470
+
471
+ async def _handle_emergency(
472
+ message: str,
473
+ context: Dict[str, Any],
474
+ start_time: float
475
+ ) -> OrchestrationResult:
476
+ """
477
+ 🚨 CRITICAL: Emergency intent handler.
478
+
479
+ This function handles crisis situations with immediate routing
480
+ to appropriate services. All emergency interactions are logged
481
+ for compliance and safety tracking.
482
+
483
+ IMPORTANT: This is a compliance-critical function. All emergency
484
+ interactions must be logged and handled with priority.
485
+ """
486
+ global _emergency_count
487
+ _emergency_count += 1
488
+
489
+ # Sanitize message for logging (but keep full context for safety review)
490
+ safe_message = sanitize_for_logging(message)
491
+ logger.warning(f"🚨 EMERGENCY INTENT DETECTED (#{_emergency_count}): {safe_message[:100]}")
492
+
493
+ # TODO: Integrate with safety_utils.py when enhanced
494
+ # from app.safety_utils import route_emergency
495
+ # result = await route_emergency(message, context)
496
+
497
+ # For now, provide crisis resources
498
+ reply = (
499
+ "🚨 **If this is a life-threatening emergency, please call 911 immediately.**\n\n"
500
+ "For crisis support:\n"
501
+ "- **National Suicide Prevention Lifeline:** 988\n"
502
+ "- **Crisis Text Line:** Text HOME to 741741\n"
503
+ "- **National Domestic Violence Hotline:** 1-800-799-7233\n\n"
504
+ "I'm here to help connect you with local resources. "
505
+ "What kind of support do you need right now?"
506
+ )
507
+
508
+ # Log emergency interaction for compliance (CRITICAL)
509
+ response_time = (time.time() - start_time) * 1000
510
+ log_interaction(
511
+ tenant_id=context.get("tenant_id", "emergency"),
512
+ interaction_type="emergency",
513
+ intent=IntentType.EMERGENCY.value,
514
+ response_time_ms=response_time,
515
+ success=True,
516
+ metadata={
517
+ "emergency_number": _emergency_count,
518
+ "message_length": len(message),
519
+ "timestamp": datetime.now().isoformat(),
520
+ "action": "crisis_resources_provided"
521
+ }
522
+ )
523
+
524
+ logger.critical(
525
+ f"EMERGENCY LOG #{_emergency_count}: Resources provided "
526
+ f"({response_time:.0f}ms)"
527
+ )
528
+
529
+ return OrchestrationResult(
530
+ intent=IntentType.EMERGENCY.value,
531
+ reply=reply,
532
+ success=True,
533
+ model_id="emergency_router",
534
+ data={"crisis_resources_provided": True},
535
+ response_time_ms=round(response_time, 2)
536
+ )
537
+
538
+
539
+ async def _handle_translation(
540
+ message: str,
541
+ context: Dict[str, Any]
542
+ ) -> OrchestrationResult:
543
+ """
544
+ 🌍 Translation handler - 27 languages supported.
545
+
546
+ Handles translation requests with graceful fallback if service
547
+ is unavailable.
548
+ """
549
+ logger.info("🌍 Processing translation request")
550
+
551
+ # Check service availability first
552
+ if not TRANSLATION_AVAILABLE:
553
+ logger.warning("Translation service not available")
554
+ return OrchestrationResult(
555
+ intent=IntentType.TRANSLATION.value,
556
+ reply="Translation isn't available right now. Try again soon! 🌍",
557
+ success=False,
558
+ error="Service not loaded",
559
+ fallback_used=True
560
+ )
561
+
562
+ try:
563
+ # Extract language parameters from context or parse from message
564
+ source_lang = context.get("source_lang", "eng_Latn")
565
+ target_lang = context.get("target_lang", "spa_Latn")
566
+
567
+ # Parse target language from message if present
568
+ # Examples: "translate to Spanish", "in Spanish", "to Spanish"
569
+ message_lower = message.lower()
570
+ language_keywords = {
571
+ "spanish": "spa_Latn", "español": "spa_Latn", "es": "spa_Latn",
572
+ "french": "fra_Latn", "français": "fra_Latn", "fr": "fra_Latn",
573
+ "chinese": "zho_Hans", "mandarin": "zho_Hans", "zh": "zho_Hans",
574
+ "arabic": "arb_Arab", "ar": "arb_Arab",
575
+ "hindi": "hin_Deva", "hi": "hin_Deva",
576
+ "portuguese": "por_Latn", "pt": "por_Latn",
577
+ "russian": "rus_Cyrl", "ru": "rus_Cyrl",
578
+ "german": "deu_Latn", "de": "deu_Latn",
579
+ "vietnamese": "vie_Latn", "vi": "vie_Latn",
580
+ "tagalog": "tgl_Latn", "tl": "tgl_Latn",
581
+ "urdu": "urd_Arab", "ur": "urd_Arab",
582
+ "swahili": "swh_Latn", "sw": "swh_Latn",
583
+ "english": "eng_Latn", "en": "eng_Latn"
584
+ }
585
+
586
+ # Check for "to [language]" or "in [language]" patterns
587
+ for lang_name, lang_code in language_keywords.items():
588
+ if f"to {lang_name}" in message_lower or f"in {lang_name}" in message_lower:
589
+ target_lang = lang_code
590
+ logger.info(f"🌍 Detected target language from message: {lang_name} -> {lang_code}")
591
+ break
592
+
593
+ result = await translate_text(message, source_lang, target_lang)
594
+
595
+ # Check if translation service was actually available
596
+ if not result.get("available", True):
597
+ error_msg = result.get("error", "Translation service is temporarily unavailable.")
598
+ logger.warning(f"Translation service unavailable: {error_msg}")
599
+ return OrchestrationResult(
600
+ intent=IntentType.TRANSLATION.value,
601
+ reply=(
602
+ "I'm having trouble accessing the translation service right now. "
603
+ "Please try again in a moment! 🌍"
604
+ ),
605
+ success=False,
606
+ error=error_msg,
607
+ fallback_used=True
608
+ )
609
+
610
+ # Use compatibility helper to check result
611
+ success, error = _check_result_success(result, ["translated_text"])
612
+
613
+ if success:
614
+ translated = result.get("translated_text", "")
615
+
616
+ # Check if translation was skipped (same source/target language)
617
+ if result.get("skipped", False):
618
+ reply = (
619
+ f"The text is already in {target_lang}. "
620
+ f"No translation needed! 🌍"
621
+ )
622
+ else:
623
+ reply = (
624
+ f"Here's the translation:\n\n"
625
+ f"**{translated}**\n\n"
626
+ f"(Translated from {source_lang} to {target_lang})"
627
+ )
628
+
629
+ return OrchestrationResult(
630
+ intent=IntentType.TRANSLATION.value,
631
+ reply=reply,
632
+ success=True,
633
+ data=result,
634
+ model_id="penny-translate-agent"
635
+ )
636
+ else:
637
+ raise Exception(error or "Translation failed")
638
+
639
+ except Exception as e:
640
+ logger.error(f"Translation error: {e}", exc_info=True)
641
+ return OrchestrationResult(
642
+ intent=IntentType.TRANSLATION.value,
643
+ reply=(
644
+ "I had trouble translating that. Could you rephrase? 💬"
645
+ ),
646
+ success=False,
647
+ error=str(e),
648
+ fallback_used=True
649
+ )
650
+
651
+
652
+ async def _handle_sentiment(
653
+ message: str,
654
+ context: Dict[str, Any]
655
+ ) -> OrchestrationResult:
656
+ """
657
+ 😊 Sentiment analysis handler.
658
+
659
+ Analyzes the emotional tone of text with graceful fallback
660
+ if service is unavailable.
661
+ """
662
+ logger.info("😊 Processing sentiment analysis")
663
+
664
+ # Check service availability first
665
+ if not SENTIMENT_AVAILABLE:
666
+ logger.warning("Sentiment service not available")
667
+ return OrchestrationResult(
668
+ intent=IntentType.SENTIMENT_ANALYSIS.value,
669
+ reply="Sentiment analysis isn't available right now. Try again soon! 😊",
670
+ success=False,
671
+ error="Service not loaded",
672
+ fallback_used=True
673
+ )
674
+
675
+ try:
676
+ result = await get_sentiment_analysis(message)
677
+
678
+ # Use compatibility helper to check result
679
+ success, error = _check_result_success(result, ["label", "score"])
680
+
681
+ if success:
682
+ sentiment = result.get("label", "neutral")
683
+ confidence = result.get("score", 0.0)
684
+
685
+ reply = (
686
+ f"The overall sentiment detected is: **{sentiment}**\n"
687
+ f"Confidence: {confidence:.1%}"
688
+ )
689
+
690
+ return OrchestrationResult(
691
+ intent=IntentType.SENTIMENT_ANALYSIS.value,
692
+ reply=reply,
693
+ success=True,
694
+ data=result,
695
+ model_id="penny-sentiment-agent"
696
+ )
697
+ else:
698
+ raise Exception(error or "Sentiment analysis failed")
699
+
700
+ except Exception as e:
701
+ logger.error(f"Sentiment analysis error: {e}", exc_info=True)
702
+ return OrchestrationResult(
703
+ intent=IntentType.SENTIMENT_ANALYSIS.value,
704
+ reply="I couldn't analyze the sentiment right now. Try again? 😊",
705
+ success=False,
706
+ error=str(e),
707
+ fallback_used=True
708
+ )
709
+
710
+ async def _handle_bias(
711
+ message: str,
712
+ context: Dict[str, Any]
713
+ ) -> OrchestrationResult:
714
+ """
715
+ ⚖️ Bias detection handler.
716
+
717
+ Analyzes text for potential bias patterns with graceful fallback
718
+ if service is unavailable.
719
+ """
720
+ logger.info("⚖️ Processing bias detection")
721
+
722
+ # Check service availability first
723
+ if not BIAS_AVAILABLE:
724
+ logger.warning("Bias detection service not available")
725
+ return OrchestrationResult(
726
+ intent=IntentType.BIAS_DETECTION.value,
727
+ reply="Bias detection isn't available right now. Try again soon! ⚖️",
728
+ success=False,
729
+ error="Service not loaded",
730
+ fallback_used=True
731
+ )
732
+
733
+ try:
734
+ result = await check_bias(message)
735
+
736
+ # Use compatibility helper to check result
737
+ success, error = _check_result_success(result, ["analysis"])
738
+
739
+ if success:
740
+ analysis = result.get("analysis", [])
741
+
742
+ if analysis:
743
+ top_result = analysis[0]
744
+ label = top_result.get("label", "unknown")
745
+ score = top_result.get("score", 0.0)
746
+
747
+ reply = (
748
+ f"Bias analysis complete:\n\n"
749
+ f"**Most likely category:** {label}\n"
750
+ f"**Confidence:** {score:.1%}"
751
+ )
752
+ else:
753
+ reply = "The text appears relatively neutral. ⚖️"
754
+
755
+ return OrchestrationResult(
756
+ intent=IntentType.BIAS_DETECTION.value,
757
+ reply=reply,
758
+ success=True,
759
+ data=result,
760
+ model_id="penny-bias-checker"
761
+ )
762
+ else:
763
+ raise Exception(error or "Bias detection failed")
764
+
765
+ except Exception as e:
766
+ logger.error(f"Bias detection error: {e}", exc_info=True)
767
+ return OrchestrationResult(
768
+ intent=IntentType.BIAS_DETECTION.value,
769
+ reply="I couldn't check for bias right now. Try again? ⚖️",
770
+ success=False,
771
+ error=str(e),
772
+ fallback_used=True
773
+ )
774
+
775
+
776
+ async def _handle_document(
777
+ message: str,
778
+ context: Dict[str, Any]
779
+ ) -> OrchestrationResult:
780
+ """
781
+ 📄 Document processing handler.
782
+
783
+ Note: Actual file upload happens in router.py via FastAPI.
784
+ This handler just provides instructions.
785
+ """
786
+ logger.info("📄 Document processing requested")
787
+
788
+ reply = (
789
+ "I can help you process documents! 📄\n\n"
790
+ "Please upload your document (PDF or image) using the "
791
+ "`/upload-document` endpoint. I can extract text, analyze forms, "
792
+ "and help you understand civic documents.\n\n"
793
+ "What kind of document do you need help with?"
794
+ )
795
+
796
+ return OrchestrationResult(
797
+ intent=IntentType.DOCUMENT_PROCESSING.value,
798
+ reply=reply,
799
+ success=True,
800
+ model_id="document_router"
801
+ )
802
+
803
+
804
+ async def _handle_weather(
805
+ message: str,
806
+ context: Dict[str, Any],
807
+ tenant_id: Optional[str],
808
+ lat: Optional[float],
809
+ lon: Optional[float],
810
+ intent_result: IntentMatch
811
+ ) -> OrchestrationResult:
812
+ """
813
+ 🌤️ Weather handler with compound intent support.
814
+
815
+ Handles both simple weather queries and compound weather+events queries.
816
+ Uses enhanced weather_agent.py with caching and performance tracking.
817
+ """
818
+ logger.info("🌤️ Processing weather request")
819
+
820
+ # Check service availability first
821
+ if not WEATHER_AGENT_AVAILABLE:
822
+ logger.warning("Weather agent not available")
823
+ return OrchestrationResult(
824
+ intent=IntentType.WEATHER.value,
825
+ reply="Weather service isn't available right now. Try again soon! 🌤️",
826
+ success=False,
827
+ error="Weather agent not loaded",
828
+ fallback_used=True
829
+ )
830
+
831
+ # Check for compound intent (weather + events)
832
+ is_compound = intent_result.is_compound or IntentType.EVENTS in intent_result.secondary_intents
833
+
834
+ # === ENHANCED LOCATION RESOLUTION ===
835
+ # Try multiple strategies to get coordinates
836
+
837
+ # Strategy 1: Use provided coordinates
838
+ if lat is not None and lon is not None:
839
+ logger.info(f"Using provided coordinates: {lat}, {lon}")
840
+
841
+ # Strategy 2: Get coordinates from tenant_id (try multiple formats)
842
+ elif tenant_id:
843
+ # Try tenant_id as-is first
844
+ coords = get_city_coordinates(tenant_id)
845
+
846
+ # If that fails and tenant_id doesn't have state suffix, try adding common suffixes
847
+ if not coords and "_" not in tenant_id:
848
+ # Try common state abbreviations for known cities
849
+ state_suffixes = ["_va", "_ga", "_al", "_tx", "_ri", "_wa"]
850
+ for suffix in state_suffixes:
851
+ test_tenant_id = tenant_id + suffix
852
+ coords = get_city_coordinates(test_tenant_id)
853
+ if coords:
854
+ tenant_id = test_tenant_id # Update tenant_id to normalized form
855
+ logger.info(f"Normalized tenant_id to {tenant_id}")
856
+ break
857
+
858
+ if coords:
859
+ lat, lon = coords["lat"], coords["lon"]
860
+ logger.info(f"✅ Using city coordinates for {tenant_id}: {lat}, {lon}")
861
+
862
+ # Strategy 3: Extract location from message if still no coordinates
863
+ if lat is None or lon is None:
864
+ logger.info("No coordinates from tenant_id, trying to extract from message")
865
+ location_result = extract_location_detailed(message)
866
+
867
+ if location_result.status == LocationStatus.FOUND:
868
+ extracted_tenant_id = location_result.tenant_id
869
+ logger.info(f"📍 Location extracted from message: {extracted_tenant_id}")
870
+
871
+ # Update tenant_id if we extracted a better one
872
+ if not tenant_id or tenant_id != extracted_tenant_id:
873
+ tenant_id = extracted_tenant_id
874
+ logger.info(f"Updated tenant_id to {tenant_id}")
875
+
876
+ # Get coordinates for extracted location
877
+ coords = get_city_coordinates(tenant_id)
878
+ if coords:
879
+ lat, lon = coords["lat"], coords["lon"]
880
+ logger.info(f"✅ Coordinates found from message extraction: {lat}, {lon}")
881
+
882
+ # Final check: if still no coordinates, return error
883
+ if lat is None or lon is None:
884
+ logger.warning(f"❌ No coordinates available for weather request (tenant_id: {tenant_id})")
885
+ return OrchestrationResult(
886
+ intent=IntentType.WEATHER.value,
887
+ reply=(
888
+ "I need to know your location to check the weather! 📍 "
889
+ "You can tell me your city, or share your location."
890
+ ),
891
+ success=False,
892
+ error="Location required"
893
+ )
894
+
895
+ try:
896
+ # Use combined weather + events if compound intent detected
897
+ if is_compound and tenant_id and EVENT_WEATHER_AVAILABLE:
898
+ logger.info("Using weather+events combined handler")
899
+ result = await get_event_recommendations_with_weather(tenant_id, lat, lon)
900
+
901
+ # Build response
902
+ weather = result.get("weather", {})
903
+ weather_summary = result.get("weather_summary", "Weather unavailable")
904
+ suggestions = result.get("suggestions", [])
905
+
906
+ reply_lines = [f"🌤️ **Weather Update:**\n{weather_summary}\n"]
907
+
908
+ if suggestions:
909
+ reply_lines.append("\n📅 **Event Suggestions Based on Weather:**")
910
+ for suggestion in suggestions[:5]: # Top 5 suggestions
911
+ reply_lines.append(f"• {suggestion}")
912
+
913
+ reply = "\n".join(reply_lines)
914
+
915
+ return OrchestrationResult(
916
+ intent=IntentType.WEATHER.value,
917
+ reply=reply,
918
+ success=True,
919
+ data=result,
920
+ model_id="weather_events_combined"
921
+ )
922
+
923
+ else:
924
+ # Simple weather query using enhanced weather_agent
925
+ weather = await get_weather_for_location(lat, lon)
926
+
927
+ # Use enhanced weather_agent's format_weather_summary
928
+ if format_weather_summary:
929
+ weather_text = format_weather_summary(weather)
930
+ else:
931
+ # Fallback formatting
932
+ temp = weather.get("temperature", {}).get("value")
933
+ phrase = weather.get("phrase", "Conditions unavailable")
934
+ if temp:
935
+ weather_text = f"{phrase}, {int(temp)}°F"
936
+ else:
937
+ weather_text = phrase
938
+
939
+ # Get outfit recommendation from enhanced weather_agent
940
+ if recommend_outfit:
941
+ temp = weather.get("temperature", {}).get("value", 70)
942
+ condition = weather.get("phrase", "Clear")
943
+ outfit = recommend_outfit(temp, condition)
944
+ reply = f"🌤️ {weather_text}\n\n👕 {outfit}"
945
+ else:
946
+ reply = f"🌤️ {weather_text}"
947
+
948
+ return OrchestrationResult(
949
+ intent=IntentType.WEATHER.value,
950
+ reply=reply,
951
+ success=True,
952
+ data=weather,
953
+ model_id="azure-maps-weather"
954
+ )
955
+
956
+ except Exception as e:
957
+ logger.error(f"Weather error: {e}", exc_info=True)
958
+ return OrchestrationResult(
959
+ intent=IntentType.WEATHER.value,
960
+ reply=(
961
+ "I'm having trouble getting weather data right now. "
962
+ "Can I help you with something else? 💛"
963
+ ),
964
+ success=False,
965
+ error=str(e),
966
+ fallback_used=True
967
+ )
968
+
969
+
970
+ async def _handle_events(
971
+ message: str,
972
+ context: Dict[str, Any],
973
+ tenant_id: Optional[str],
974
+ lat: Optional[float],
975
+ lon: Optional[float],
976
+ intent_result: IntentMatch
977
+ ) -> OrchestrationResult:
978
+ """
979
+ 📅 Events handler.
980
+
981
+ Routes event queries to tool_agent with proper error handling
982
+ and graceful degradation.
983
+ """
984
+ logger.info("📅 Processing events request")
985
+
986
+ if not tenant_id:
987
+ return OrchestrationResult(
988
+ intent=IntentType.EVENTS.value,
989
+ reply=(
990
+ "I'd love to help you find events! 📅 "
991
+ "Which city are you interested in? "
992
+ "I have information for Atlanta, Birmingham, Chesterfield, "
993
+ "El Paso, Providence, and Seattle."
994
+ ),
995
+ success=False,
996
+ error="City required"
997
+ )
998
+
999
+ # Check tool agent availability
1000
+ if not TOOL_AGENT_AVAILABLE:
1001
+ logger.warning("Tool agent not available")
1002
+ return OrchestrationResult(
1003
+ intent=IntentType.EVENTS.value,
1004
+ reply=(
1005
+ "Event information isn't available right now. "
1006
+ "Try again soon! 📅"
1007
+ ),
1008
+ success=False,
1009
+ error="Tool agent not loaded",
1010
+ fallback_used=True
1011
+ )
1012
+
1013
+ try:
1014
+ # FIXED: Add role parameter (compatibility fix)
1015
+ tool_response = await handle_tool_request(
1016
+ user_input=message,
1017
+ role=context.get("role", "resident"), # ← ADDED
1018
+ lat=lat,
1019
+ lon=lon,
1020
+ context=context
1021
+ )
1022
+
1023
+ reply = tool_response.get("response", "Events information retrieved.")
1024
+
1025
+ return OrchestrationResult(
1026
+ intent=IntentType.EVENTS.value,
1027
+ reply=reply,
1028
+ success=True,
1029
+ data=tool_response,
1030
+ model_id="events_tool"
1031
+ )
1032
+
1033
+ except Exception as e:
1034
+ logger.error(f"Events error: {e}", exc_info=True)
1035
+ return OrchestrationResult(
1036
+ intent=IntentType.EVENTS.value,
1037
+ reply=(
1038
+ "I'm having trouble loading event information right now. "
1039
+ "Check back soon! 📅"
1040
+ ),
1041
+ success=False,
1042
+ error=str(e),
1043
+ fallback_used=True
1044
+ )
1045
+
1046
+ async def _handle_local_resources(
1047
+ message: str,
1048
+ context: Dict[str, Any],
1049
+ tenant_id: Optional[str],
1050
+ lat: Optional[float],
1051
+ lon: Optional[float]
1052
+ ) -> OrchestrationResult:
1053
+ """
1054
+ 🏛️ Local resources handler (shelters, libraries, food banks, etc.).
1055
+
1056
+ Routes resource queries to tool_agent with proper error handling.
1057
+ """
1058
+ logger.info("🏛️ Processing local resources request")
1059
+
1060
+ if not tenant_id:
1061
+ return OrchestrationResult(
1062
+ intent=IntentType.LOCAL_RESOURCES.value,
1063
+ reply=(
1064
+ "I can help you find local resources! 🏛️ "
1065
+ "Which city do you need help in? "
1066
+ "I cover Atlanta, Birmingham, Chesterfield, El Paso, "
1067
+ "Providence, and Seattle."
1068
+ ),
1069
+ success=False,
1070
+ error="City required"
1071
+ )
1072
+
1073
+ # Check tool agent availability
1074
+ if not TOOL_AGENT_AVAILABLE:
1075
+ logger.warning("Tool agent not available")
1076
+ return OrchestrationResult(
1077
+ intent=IntentType.LOCAL_RESOURCES.value,
1078
+ reply=(
1079
+ "Resource information isn't available right now. "
1080
+ "Try again soon! 🏛️"
1081
+ ),
1082
+ success=False,
1083
+ error="Tool agent not loaded",
1084
+ fallback_used=True
1085
+ )
1086
+
1087
+ try:
1088
+ # FIXED: Add role parameter (compatibility fix)
1089
+ tool_response = await handle_tool_request(
1090
+ user_input=message,
1091
+ role=context.get("role", "resident"), # ← ADDED
1092
+ lat=lat,
1093
+ lon=lon,
1094
+ context=context
1095
+ )
1096
+
1097
+ reply = tool_response.get("response", "Resource information retrieved.")
1098
+
1099
+ return OrchestrationResult(
1100
+ intent=IntentType.LOCAL_RESOURCES.value,
1101
+ reply=reply,
1102
+ success=True,
1103
+ data=tool_response,
1104
+ model_id="resources_tool"
1105
+ )
1106
+
1107
+ except Exception as e:
1108
+ logger.error(f"Resources error: {e}", exc_info=True)
1109
+ return OrchestrationResult(
1110
+ intent=IntentType.LOCAL_RESOURCES.value,
1111
+ reply=(
1112
+ "I'm having trouble finding resource information right now. "
1113
+ "Would you like to try a different search? 💛"
1114
+ ),
1115
+ success=False,
1116
+ error=str(e),
1117
+ fallback_used=True
1118
+ )
1119
+
1120
+
1121
+ async def _handle_conversational(
1122
+ message: str,
1123
+ intent: IntentType,
1124
+ context: Dict[str, Any]
1125
+ ) -> OrchestrationResult:
1126
+ """
1127
+ 💬 Handles conversational intents (greeting, help, unknown).
1128
+ Uses Penny's core LLM for natural responses with graceful fallback.
1129
+ """
1130
+ logger.info(f"💬 Processing conversational intent: {intent.value}")
1131
+
1132
+ # Check LLM availability
1133
+ use_llm = LLM_AVAILABLE
1134
+
1135
+ try:
1136
+ if use_llm:
1137
+ # Build prompt based on intent
1138
+ if intent == IntentType.GREETING:
1139
+ prompt = (
1140
+ f"The user greeted you with: '{message}'\n\n"
1141
+ "Respond warmly as Penny, introduce yourself briefly, "
1142
+ "and ask how you can help them with civic services today."
1143
+ )
1144
+
1145
+ elif intent == IntentType.HELP:
1146
+ prompt = (
1147
+ f"The user asked for help: '{message}'\n\n"
1148
+ "Explain Penny's main features:\n"
1149
+ "- Finding local resources (shelters, libraries, food banks)\n"
1150
+ "- Community events and activities\n"
1151
+ "- Weather information\n"
1152
+ "- 27-language translation\n"
1153
+ "- Document processing help\n\n"
1154
+ "Ask which city they need assistance in."
1155
+ )
1156
+
1157
+ else: # UNKNOWN
1158
+ prompt = (
1159
+ f"The user said: '{message}'\n\n"
1160
+ "You're not sure what they need help with. "
1161
+ "Respond kindly, acknowledge their request, and ask them to "
1162
+ "clarify or rephrase. Mention a few things you can help with."
1163
+ )
1164
+
1165
+ # Call Penny's core LLM
1166
+ llm_result = await generate_response(prompt=prompt, max_new_tokens=200)
1167
+
1168
+ # Use compatibility helper to check result
1169
+ success, error = _check_result_success(llm_result, ["response"])
1170
+
1171
+ if success:
1172
+ reply = llm_result.get("response", "")
1173
+
1174
+ return OrchestrationResult(
1175
+ intent=intent.value,
1176
+ reply=reply,
1177
+ success=True,
1178
+ data=llm_result,
1179
+ model_id=CORE_MODEL_ID
1180
+ )
1181
+ else:
1182
+ raise Exception(error or "LLM generation failed")
1183
+
1184
+ else:
1185
+ # LLM not available, use fallback directly
1186
+ logger.info("LLM not available, using fallback responses")
1187
+ raise Exception("LLM service not loaded")
1188
+
1189
+ except Exception as e:
1190
+ logger.warning(f"Conversational handler using fallback: {e}")
1191
+
1192
+ # Hardcoded fallback responses (Penny's friendly voice)
1193
+ fallback_replies = {
1194
+ IntentType.GREETING: (
1195
+ "Hi there! 👋 I'm Penny, your civic assistant. "
1196
+ "I can help you find local resources, events, weather, and more. "
1197
+ "What city are you in?"
1198
+ ),
1199
+ IntentType.HELP: (
1200
+ "I'm Penny! 💛 I can help you with:\n\n"
1201
+ "🏛️ Local resources (shelters, libraries, food banks)\n"
1202
+ "📅 Community events\n"
1203
+ "🌤️ Weather updates\n"
1204
+ "🌍 Translation (27 languages)\n"
1205
+ "📄 Document help\n\n"
1206
+ "What would you like to know about?"
1207
+ ),
1208
+ IntentType.UNKNOWN: (
1209
+ "I'm not sure I understood that. Could you rephrase? "
1210
+ "I'm best at helping with local services, events, weather, "
1211
+ "and translation! 💬"
1212
+ )
1213
+ }
1214
+
1215
+ return OrchestrationResult(
1216
+ intent=intent.value,
1217
+ reply=fallback_replies.get(intent, "How can I help you today? 💛"),
1218
+ success=True,
1219
+ model_id="fallback",
1220
+ fallback_used=True
1221
+ )
1222
+
1223
+
1224
+ async def _handle_fallback(
1225
+ message: str,
1226
+ intent: IntentType,
1227
+ context: Dict[str, Any]
1228
+ ) -> OrchestrationResult:
1229
+ """
1230
+ 🆘 Ultimate fallback handler for unhandled intents.
1231
+
1232
+ This is a safety net that should rarely trigger, but ensures
1233
+ users always get a helpful response.
1234
+ """
1235
+ logger.warning(f"⚠️ Fallback triggered for intent: {intent.value}")
1236
+
1237
+ reply = (
1238
+ "I've processed your request, but I'm not sure how to help with that yet. "
1239
+ "I'm still learning! 🤖\n\n"
1240
+ "I'm best at:\n"
1241
+ "🏛️ Finding local resources\n"
1242
+ "📅 Community events\n"
1243
+ "🌤️ Weather updates\n"
1244
+ "🌍 Translation\n\n"
1245
+ "Could you rephrase your question? 💛"
1246
+ )
1247
+
1248
+ return OrchestrationResult(
1249
+ intent=intent.value,
1250
+ reply=reply,
1251
+ success=False,
1252
+ error="Unhandled intent",
1253
+ fallback_used=True
1254
+ )
1255
+
1256
+
1257
+ # ============================================================
1258
+ # HEALTH CHECK & DIAGNOSTICS (ENHANCED)
1259
+ # ============================================================
1260
+
1261
+ def get_orchestrator_health() -> Dict[str, Any]:
1262
+ """
1263
+ 📊 Returns comprehensive orchestrator health status.
1264
+
1265
+ Used by the main application health check endpoint to monitor
1266
+ the orchestrator and all its service dependencies.
1267
+
1268
+ Returns:
1269
+ Dictionary with health information including:
1270
+ - status: operational/degraded
1271
+ - service_availability: which services are loaded
1272
+ - statistics: orchestration counts
1273
+ - supported_intents: list of all intent types
1274
+ - features: available orchestrator features
1275
+ """
1276
+ # Get service availability
1277
+ services = get_service_availability()
1278
+
1279
+ # Determine overall status
1280
+ # Orchestrator is operational even if some services are down (graceful degradation)
1281
+ critical_services = ["weather", "tool_agent"] # Must have these
1282
+ critical_available = all(services.get(svc, False) for svc in critical_services)
1283
+
1284
+ status = "operational" if critical_available else "degraded"
1285
+
1286
+ return {
1287
+ "status": status,
1288
+ "core_model": CORE_MODEL_ID,
1289
+ "max_response_time_ms": MAX_RESPONSE_TIME_MS,
1290
+ "statistics": {
1291
+ "total_orchestrations": _orchestration_count,
1292
+ "emergency_interactions": _emergency_count
1293
+ },
1294
+ "service_availability": services,
1295
+ "supported_intents": [intent.value for intent in IntentType],
1296
+ "features": {
1297
+ "emergency_routing": True,
1298
+ "compound_intents": True,
1299
+ "fallback_handling": True,
1300
+ "performance_tracking": True,
1301
+ "context_aware": True,
1302
+ "multi_language": TRANSLATION_AVAILABLE,
1303
+ "sentiment_analysis": SENTIMENT_AVAILABLE,
1304
+ "bias_detection": BIAS_AVAILABLE,
1305
+ "weather_integration": WEATHER_AGENT_AVAILABLE,
1306
+ "event_recommendations": EVENT_WEATHER_AVAILABLE
1307
+ }
1308
+ }
1309
+
1310
+
1311
+ def get_orchestrator_stats() -> Dict[str, Any]:
1312
+ """
1313
+ 📈 Returns orchestrator statistics.
1314
+
1315
+ Useful for monitoring and analytics.
1316
+ """
1317
+ return {
1318
+ "total_orchestrations": _orchestration_count,
1319
+ "emergency_interactions": _emergency_count,
1320
+ "services_available": sum(1 for v in get_service_availability().values() if v),
1321
+ "services_total": len(get_service_availability())
1322
+ }
1323
+
1324
+
1325
+ # ============================================================
1326
+ # TESTING & DEBUGGING (ENHANCED)
1327
+ # ============================================================
1328
+
1329
+ if __name__ == "__main__":
1330
+ """
1331
+ 🧪 Test the orchestrator with sample queries.
1332
+ Run with: python -m app.orchestrator
1333
+ """
1334
+ import asyncio
1335
+
1336
+ print("=" * 60)
1337
+ print("🧪 Testing Penny's Orchestrator")
1338
+ print("=" * 60)
1339
+
1340
+ # Display service availability first
1341
+ print("\n📊 Service Availability Check:")
1342
+ services = get_service_availability()
1343
+ for service, available in services.items():
1344
+ status = "✅" if available else "❌"
1345
+ print(f" {status} {service}: {'Available' if available else 'Not loaded'}")
1346
+
1347
+ print("\n" + "=" * 60)
1348
+
1349
+ test_queries = [
1350
+ {
1351
+ "name": "Greeting",
1352
+ "message": "Hi Penny!",
1353
+ "context": {}
1354
+ },
1355
+ {
1356
+ "name": "Weather with location",
1357
+ "message": "What's the weather?",
1358
+ "context": {"lat": 33.7490, "lon": -84.3880}
1359
+ },
1360
+ {
1361
+ "name": "Events in city",
1362
+ "message": "Events in Atlanta",
1363
+ "context": {"tenant_id": "atlanta_ga"}
1364
+ },
1365
+ {
1366
+ "name": "Help request",
1367
+ "message": "I need help",
1368
+ "context": {}
1369
+ },
1370
+ {
1371
+ "name": "Translation",
1372
+ "message": "Translate hello",
1373
+ "context": {"source_lang": "eng_Latn", "target_lang": "spa_Latn"}
1374
+ }
1375
+ ]
1376
+
1377
+ async def run_tests():
1378
+ for i, query in enumerate(test_queries, 1):
1379
+ print(f"\n--- Test {i}: {query['name']} ---")
1380
+ print(f"Query: {query['message']}")
1381
+
1382
+ try:
1383
+ result = await run_orchestrator(query["message"], query["context"])
1384
+ print(f"Intent: {result['intent']}")
1385
+ print(f"Success: {result['success']}")
1386
+ print(f"Fallback: {result.get('fallback_used', False)}")
1387
+
1388
+ # Truncate long replies
1389
+ reply = result['reply']
1390
+ if len(reply) > 150:
1391
+ reply = reply[:150] + "..."
1392
+ print(f"Reply: {reply}")
1393
+
1394
+ if result.get('response_time_ms'):
1395
+ print(f"Response time: {result['response_time_ms']:.0f}ms")
1396
+
1397
+ except Exception as e:
1398
+ print(f"❌ Error: {e}")
1399
+
1400
+ asyncio.run(run_tests())
1401
+
1402
+ print("\n" + "=" * 60)
1403
+ print("📊 Final Statistics:")
1404
+ stats = get_orchestrator_stats()
1405
+ for key, value in stats.items():
1406
+ print(f" {key}: {value}")
1407
+
1408
+ print("\n" + "=" * 60)
1409
+ print("✅ Tests complete")
1410
+ print("=" * 60)