pythonprincess commited on
Commit
b90ee2a
ยท
verified ยท
1 Parent(s): c47fcc7

Upload logging_utils.py

Browse files
Files changed (1) hide show
  1. app/logging_utils.py +815 -0
app/logging_utils.py ADDED
@@ -0,0 +1,815 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/logging_utils.py
2
+ """
3
+ ๐Ÿ“Š Penny's Logging & Analytics System
4
+ Tracks user interactions, system performance, and civic engagement patterns.
5
+
6
+ MISSION: Create an audit trail that helps improve Penny's service while
7
+ respecting user privacy and meeting compliance requirements.
8
+
9
+ FEATURES:
10
+ - Structured JSON logging for Azure Application Insights
11
+ - Daily log rotation for long-term storage
12
+ - Privacy-safe request/response tracking
13
+ - Performance monitoring
14
+ - Error tracking with context
15
+ - Optional Azure Blob Storage integration
16
+ """
17
+
18
+ import json
19
+ import logging
20
+ from datetime import datetime, timezone
21
+ from pathlib import Path
22
+ import os
23
+ from typing import Dict, Any, Optional, List
24
+ from dataclasses import dataclass, asdict
25
+ from enum import Enum
26
+ import hashlib
27
+
28
+ # --- LOGGING SETUP ---
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # ============================================================
32
+ # LOG PATH CONFIGURATION (Environment-aware)
33
+ # ============================================================
34
+
35
+ # Base directories (use pathlib for OS compatibility)
36
+ PROJECT_ROOT = Path(__file__).parent.parent.resolve()
37
+ LOGS_BASE_DIR = PROJECT_ROOT / "data" / "logs"
38
+ DEFAULT_LOG_PATH = LOGS_BASE_DIR / "penny_combined.jsonl"
39
+
40
+ # Environment-configurable log path
41
+ LOG_PATH = Path(os.getenv("PENNY_LOG_PATH", str(DEFAULT_LOG_PATH)))
42
+
43
+ # Ensure log directory exists on import
44
+ LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
45
+
46
+
47
+ # ============================================================
48
+ # LOG LEVEL ENUM (For categorizing log entries)
49
+ # ============================================================
50
+
51
+ class LogLevel(str, Enum):
52
+ """
53
+ Categorizes the importance/type of log entries.
54
+ Maps to Azure Application Insights severity levels.
55
+ """
56
+ DEBUG = "debug" # Detailed diagnostic info
57
+ INFO = "info" # General informational messages
58
+ WARNING = "warning" # Potential issues
59
+ ERROR = "error" # Error events
60
+ CRITICAL = "critical" # Critical failures
61
+ AUDIT = "audit" # Compliance/audit trail
62
+
63
+
64
+ class InteractionType(str, Enum):
65
+ """
66
+ Categorizes the type of user interaction.
67
+ Helps track which features residents use most.
68
+ """
69
+ QUERY = "query" # General question
70
+ RESOURCE_LOOKUP = "resource_lookup" # Finding civic resources
71
+ TRANSLATION = "translation" # Language translation
72
+ EVENT_SEARCH = "event_search" # Looking for events
73
+ WEATHER = "weather" # Weather inquiry
74
+ DOCUMENT = "document_processing" # PDF/form processing
75
+ EMERGENCY = "emergency" # Crisis/emergency routing
76
+ GREETING = "greeting" # Conversational greeting
77
+ HELP = "help" # Help request
78
+ UNKNOWN = "unknown" # Unclassified
79
+
80
+
81
+ # ============================================================
82
+ # STRUCTURED LOG ENTRY (Type-safe logging)
83
+ # ============================================================
84
+
85
+ @dataclass
86
+ class PennyLogEntry:
87
+ """
88
+ ๐Ÿ“‹ Structured log entry for Penny interactions.
89
+
90
+ This format is:
91
+ - Azure Application Insights compatible
92
+ - Privacy-safe (no PII unless explicitly needed)
93
+ - Analytics-ready
94
+ - Compliance-friendly
95
+ """
96
+ # Timestamp
97
+ timestamp: str
98
+
99
+ # Request Context
100
+ input: str
101
+ input_length: int
102
+ tenant_id: str
103
+ user_role: str
104
+ interaction_type: InteractionType
105
+
106
+ # Response Context
107
+ intent: str
108
+ tool_used: Optional[str]
109
+ model_id: Optional[str]
110
+ response_summary: str
111
+ response_length: int
112
+ response_time_ms: Optional[float]
113
+
114
+ # Technical Context
115
+ log_level: LogLevel
116
+ success: bool
117
+ error_message: Optional[str] = None
118
+
119
+ # Location Context (Optional)
120
+ lat: Optional[float] = None
121
+ lon: Optional[float] = None
122
+ location_detected: Optional[str] = None
123
+
124
+ # Privacy & Compliance
125
+ session_id: Optional[str] = None # Hashed session identifier
126
+ contains_pii: bool = False
127
+
128
+ # Performance Metrics
129
+ tokens_used: Optional[int] = None
130
+ cache_hit: bool = False
131
+
132
+ def to_dict(self) -> Dict[str, Any]:
133
+ """Converts to dictionary for JSON serialization."""
134
+ return {k: v.value if isinstance(v, Enum) else v
135
+ for k, v in asdict(self).items()}
136
+
137
+
138
+ # ============================================================
139
+ # DAILY LOG ROTATION
140
+ # ============================================================
141
+
142
+ def get_daily_log_path() -> Path:
143
+ """
144
+ ๐Ÿ—“๏ธ Returns a daily unique path for log rotation.
145
+
146
+ Creates files like:
147
+ data/logs/2025-02-01.jsonl
148
+ data/logs/2025-02-02.jsonl
149
+
150
+ This helps with:
151
+ - Log management (archive old logs)
152
+ - Azure Blob Storage uploads (one file per day)
153
+ - Performance (smaller files)
154
+ """
155
+ date_str = datetime.now(timezone.utc).strftime("%Y-%m-%d")
156
+ daily_path = LOGS_BASE_DIR / f"{date_str}.jsonl"
157
+
158
+ # Ensure directory exists
159
+ daily_path.parent.mkdir(parents=True, exist_ok=True)
160
+
161
+ return daily_path
162
+
163
+
164
+ # ============================================================
165
+ # MAIN LOGGING FUNCTION (Enhanced)
166
+ # ============================================================
167
+
168
+ def log_request(
169
+ payload: Dict[str, Any],
170
+ response: Dict[str, Any],
171
+ rotate_daily: bool = True,
172
+ log_level: LogLevel = LogLevel.INFO
173
+ ) -> None:
174
+ """
175
+ ๐Ÿ“ Logs a user interaction with Penny.
176
+
177
+ This is the primary logging function called by router.py after
178
+ processing each request. It creates a structured, privacy-safe
179
+ record of the interaction.
180
+
181
+ Args:
182
+ payload: Incoming request data from router.py
183
+ response: Final response dictionary from orchestrator
184
+ rotate_daily: If True, uses daily log files
185
+ log_level: Severity level for this log entry
186
+
187
+ Example:
188
+ log_request(
189
+ payload={"input": "What's the weather?", "tenant_id": "atlanta_ga"},
190
+ response={"intent": "weather", "response": "..."}
191
+ )
192
+ """
193
+
194
+ try:
195
+ # --- Extract Core Fields ---
196
+ user_input = payload.get("input", "")
197
+ tenant_id = payload.get("tenant_id", "unknown")
198
+ user_role = payload.get("role", "resident")
199
+
200
+ # --- Determine Interaction Type ---
201
+ intent = response.get("intent", "unknown")
202
+ interaction_type = _classify_interaction(intent)
203
+
204
+ # --- Privacy: Hash Session ID (if provided) ---
205
+ session_id = payload.get("session_id")
206
+ if session_id:
207
+ session_id = _hash_identifier(session_id)
208
+
209
+ # --- Detect PII (Simple check - can be enhanced) ---
210
+ contains_pii = _check_for_pii(user_input)
211
+
212
+ # --- Create Structured Log Entry ---
213
+ log_entry = PennyLogEntry(
214
+ timestamp=datetime.now(timezone.utc).isoformat(),
215
+ input=_sanitize_input(user_input, contains_pii),
216
+ input_length=len(user_input),
217
+ tenant_id=tenant_id,
218
+ user_role=user_role,
219
+ interaction_type=interaction_type,
220
+ intent=intent,
221
+ tool_used=response.get("tool", "none"),
222
+ model_id=response.get("model_id"),
223
+ response_summary=_summarize_response(response.get("response")),
224
+ response_length=len(str(response.get("response", ""))),
225
+ response_time_ms=response.get("response_time_ms"),
226
+ log_level=log_level,
227
+ success=response.get("success", True),
228
+ error_message=response.get("error"),
229
+ lat=payload.get("lat"),
230
+ lon=payload.get("lon"),
231
+ location_detected=response.get("location_detected"),
232
+ session_id=session_id,
233
+ contains_pii=contains_pii,
234
+ tokens_used=response.get("tokens_used"),
235
+ cache_hit=response.get("cache_hit", False)
236
+ )
237
+
238
+ # --- Write to File ---
239
+ log_path = get_daily_log_path() if rotate_daily else LOG_PATH
240
+ _write_log_entry(log_path, log_entry)
241
+
242
+ # --- Optional: Send to Azure (if enabled) ---
243
+ if os.getenv("AZURE_LOGS_ENABLED", "false").lower() == "true":
244
+ _send_to_azure(log_entry)
245
+
246
+ # --- Log to console (for Azure Application Insights) ---
247
+ logger.info(
248
+ f"Request logged | "
249
+ f"tenant={tenant_id} | "
250
+ f"intent={intent} | "
251
+ f"interaction={interaction_type.value} | "
252
+ f"success={log_entry.success}"
253
+ )
254
+
255
+ except Exception as e:
256
+ # Failsafe: Never let logging failures crash the application
257
+ logger.error(f"Failed to log request: {e}", exc_info=True)
258
+ _emergency_log(payload, response, str(e))
259
+
260
+
261
+ # ============================================================
262
+ # LOG WRITING (With error handling)
263
+ # ============================================================
264
+
265
+ def _write_log_entry(log_path: Path, log_entry: PennyLogEntry) -> None:
266
+ """
267
+ ๐Ÿ“ Writes log entry to JSONL file.
268
+ Handles file I/O errors gracefully.
269
+ """
270
+ try:
271
+ # Ensure parent directory exists
272
+ log_path.parent.mkdir(parents=True, exist_ok=True)
273
+
274
+ # Write as JSON Lines (append mode)
275
+ with open(log_path, "a", encoding="utf-8") as f:
276
+ json_str = json.dumps(log_entry.to_dict(), ensure_ascii=False)
277
+ f.write(json_str + "\n")
278
+
279
+ except IOError as e:
280
+ logger.error(f"Failed to write to log file {log_path}: {e}")
281
+ _emergency_log_to_console(log_entry)
282
+ except Exception as e:
283
+ logger.error(f"Unexpected error writing log: {e}", exc_info=True)
284
+ _emergency_log_to_console(log_entry)
285
+
286
+
287
+ def _emergency_log_to_console(log_entry: PennyLogEntry) -> None:
288
+ """
289
+ ๐Ÿšจ Emergency fallback: Print log to console if file writing fails.
290
+ Azure Application Insights will capture console output.
291
+ """
292
+ print(f"[EMERGENCY LOG] {json.dumps(log_entry.to_dict())}")
293
+
294
+
295
+ def _emergency_log(payload: Dict, response: Dict, error: str) -> None:
296
+ """
297
+ ๐Ÿšจ Absolute fallback for when structured logging fails entirely.
298
+ """
299
+ emergency_entry = {
300
+ "timestamp": datetime.now(timezone.utc).isoformat(),
301
+ "level": "CRITICAL",
302
+ "message": "Logging system failure",
303
+ "error": error,
304
+ "input_preview": str(payload.get("input", ""))[:100],
305
+ "response_preview": str(response.get("response", ""))[:100]
306
+ }
307
+ print(f"[LOGGING FAILURE] {json.dumps(emergency_entry)}")
308
+
309
+
310
+ # ============================================================
311
+ # HELPER FUNCTIONS
312
+ # ============================================================
313
+
314
+ def _classify_interaction(intent: str) -> InteractionType:
315
+ """
316
+ ๐Ÿท๏ธ Maps intent to interaction type for analytics.
317
+ """
318
+ intent_mapping = {
319
+ "weather": InteractionType.WEATHER,
320
+ "events": InteractionType.EVENT_SEARCH,
321
+ "local_resources": InteractionType.RESOURCE_LOOKUP,
322
+ "translation": InteractionType.TRANSLATION,
323
+ "document_processing": InteractionType.DOCUMENT,
324
+ "emergency": InteractionType.EMERGENCY,
325
+ "greeting": InteractionType.GREETING,
326
+ "help": InteractionType.HELP,
327
+ }
328
+ return intent_mapping.get(intent.lower(), InteractionType.UNKNOWN)
329
+
330
+
331
+ def _summarize_response(resp: Optional[Any]) -> str:
332
+ """
333
+ โœ‚๏ธ Creates a truncated summary of the response for logging.
334
+ Prevents log files from becoming bloated with full responses.
335
+ """
336
+ if resp is None:
337
+ return "No response content"
338
+
339
+ if isinstance(resp, dict):
340
+ # Try to extract the most meaningful part
341
+ summary = (
342
+ resp.get("response") or
343
+ resp.get("summary") or
344
+ resp.get("message") or
345
+ str(resp)
346
+ )
347
+ return str(summary)[:250]
348
+
349
+ return str(resp)[:250]
350
+
351
+
352
+ def _hash_identifier(identifier: str) -> str:
353
+ """
354
+ ๐Ÿ”’ Creates a privacy-safe hash of identifiers (session IDs, user IDs).
355
+
356
+ Uses SHA256 for one-way hashing. This allows:
357
+ - Session tracking without storing raw IDs
358
+ - Privacy compliance (GDPR, CCPA)
359
+ - Anonymized analytics
360
+ """
361
+ return hashlib.sha256(identifier.encode()).hexdigest()[:16]
362
+
363
+
364
+ def _check_for_pii(text: str) -> bool:
365
+ """
366
+ ๐Ÿ” Simple PII detection (can be enhanced with NER models).
367
+
368
+ Checks for common PII patterns:
369
+ - Social Security Numbers
370
+ - Email addresses
371
+ - Phone numbers
372
+
373
+ Returns True if potential PII detected.
374
+ """
375
+ import re
376
+
377
+ # SSN pattern: XXX-XX-XXXX
378
+ ssn_pattern = r'\b\d{3}-\d{2}-\d{4}\b'
379
+
380
+ # Email pattern
381
+ email_pattern = r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b'
382
+
383
+ # Phone pattern: various formats
384
+ phone_pattern = r'\b\d{3}[-.\s]?\d{3}[-.\s]?\d{4}\b'
385
+
386
+ patterns = [ssn_pattern, email_pattern, phone_pattern]
387
+
388
+ for pattern in patterns:
389
+ if re.search(pattern, text):
390
+ return True
391
+
392
+ return False
393
+
394
+
395
+ def _sanitize_input(text: str, contains_pii: bool) -> str:
396
+ """
397
+ ๐Ÿงน Sanitizes user input for logging.
398
+
399
+ If PII detected:
400
+ - Masks the input for privacy
401
+ - Keeps first/last few characters for debugging
402
+
403
+ Args:
404
+ text: Original user input
405
+ contains_pii: Whether PII was detected
406
+
407
+ Returns:
408
+ Sanitized text safe for logging
409
+ """
410
+ if not contains_pii:
411
+ return text
412
+
413
+ # Mask middle portion if PII detected
414
+ if len(text) <= 20:
415
+ return "[PII_DETECTED]"
416
+
417
+ # Keep first 10 and last 10 chars, mask middle
418
+ return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
419
+
420
+
421
+ # ============================================================
422
+ # AZURE INTEGRATION (Placeholder for future)
423
+ # ============================================================
424
+
425
+ def _send_to_azure(log_entry: PennyLogEntry) -> None:
426
+ """
427
+ โ˜๏ธ Sends log entry to Azure services.
428
+
429
+ Options:
430
+ 1. Azure Application Insights (custom events)
431
+ 2. Azure Blob Storage (long-term archival)
432
+ 3. Azure Table Storage (queryable logs)
433
+
434
+ TODO: Implement when Azure integration is ready
435
+ """
436
+ try:
437
+ # Example: Send to Application Insights
438
+ # from applicationinsights import TelemetryClient
439
+ # tc = TelemetryClient(os.getenv("APPINSIGHTS_INSTRUMENTATION_KEY"))
440
+ # tc.track_event(
441
+ # "PennyInteraction",
442
+ # properties=log_entry.to_dict()
443
+ # )
444
+ # tc.flush()
445
+
446
+ logger.debug("Azure logging not yet implemented")
447
+
448
+ except Exception as e:
449
+ logger.error(f"Failed to send log to Azure: {e}")
450
+ # Don't raise - logging failures should never crash the app
451
+
452
+
453
+ # ============================================================
454
+ # LOG ANALYSIS UTILITIES
455
+ # ============================================================
456
+
457
+ def get_logs_for_date(date: str) -> List[Dict[str, Any]]:
458
+ """
459
+ ๐Ÿ“Š Retrieves all log entries for a specific date.
460
+
461
+ Args:
462
+ date: Date string in YYYY-MM-DD format
463
+
464
+ Returns:
465
+ List of log entry dictionaries
466
+
467
+ Example:
468
+ logs = get_logs_for_date("2025-02-01")
469
+ """
470
+ log_file = LOGS_BASE_DIR / f"{date}.jsonl"
471
+
472
+ if not log_file.exists():
473
+ logger.warning(f"No logs found for date: {date}")
474
+ return []
475
+
476
+ logs = []
477
+ try:
478
+ with open(log_file, "r", encoding="utf-8") as f:
479
+ for line in f:
480
+ if line.strip():
481
+ logs.append(json.loads(line))
482
+ except Exception as e:
483
+ logger.error(f"Error reading logs for {date}: {e}")
484
+
485
+ return logs
486
+
487
+
488
+ def get_interaction_stats(date: str) -> Dict[str, Any]:
489
+ """
490
+ ๐Ÿ“ˆ Generates usage statistics for a given date.
491
+
492
+ Returns metrics like:
493
+ - Total interactions
494
+ - Interactions by type
495
+ - Average response time
496
+ - Success rate
497
+ - Most common intents
498
+
499
+ Args:
500
+ date: Date string in YYYY-MM-DD format
501
+
502
+ Returns:
503
+ Statistics dictionary
504
+ """
505
+ logs = get_logs_for_date(date)
506
+
507
+ if not logs:
508
+ return {"error": "No logs found for date", "date": date}
509
+
510
+ # Calculate statistics
511
+ total = len(logs)
512
+ successful = sum(1 for log in logs if log.get("success", False))
513
+
514
+ # Response time statistics
515
+ response_times = [
516
+ log["response_time_ms"]
517
+ for log in logs
518
+ if log.get("response_time_ms") is not None
519
+ ]
520
+ avg_response_time = sum(response_times) / len(response_times) if response_times else 0
521
+
522
+ # Interaction type breakdown
523
+ interaction_counts = {}
524
+ for log in logs:
525
+ itype = log.get("interaction_type", "unknown")
526
+ interaction_counts[itype] = interaction_counts.get(itype, 0) + 1
527
+
528
+ # Intent breakdown
529
+ intent_counts = {}
530
+ for log in logs:
531
+ intent = log.get("intent", "unknown")
532
+ intent_counts[intent] = intent_counts.get(intent, 0) + 1
533
+
534
+ return {
535
+ "date": date,
536
+ "total_interactions": total,
537
+ "successful_interactions": successful,
538
+ "success_rate": f"{(successful/total*100):.1f}%",
539
+ "avg_response_time_ms": round(avg_response_time, 2),
540
+ "interactions_by_type": interaction_counts,
541
+ "top_intents": dict(sorted(
542
+ intent_counts.items(),
543
+ key=lambda x: x[1],
544
+ reverse=True
545
+ )[:5])
546
+ }
547
+
548
+
549
+ # ============================================================
550
+ # LOG CLEANUP (For maintenance)
551
+ # ============================================================
552
+
553
+ def cleanup_old_logs(days_to_keep: int = 90) -> int:
554
+ """
555
+ ๐Ÿงน Removes log files older than specified days.
556
+
557
+ Args:
558
+ days_to_keep: Number of days to retain logs
559
+
560
+ Returns:
561
+ Number of files deleted
562
+
563
+ Example:
564
+ # Delete logs older than 90 days
565
+ deleted = cleanup_old_logs(90)
566
+ """
567
+ from datetime import timedelta
568
+
569
+ cutoff_date = datetime.now(timezone.utc) - timedelta(days=days_to_keep)
570
+ deleted_count = 0
571
+
572
+ try:
573
+ for log_file in LOGS_BASE_DIR.glob("*.jsonl"):
574
+ try:
575
+ # Parse date from filename (YYYY-MM-DD.jsonl)
576
+ date_str = log_file.stem
577
+ file_date = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
578
+
579
+ if file_date < cutoff_date:
580
+ log_file.unlink()
581
+ deleted_count += 1
582
+ logger.info(f"Deleted old log file: {log_file.name}")
583
+
584
+ except ValueError:
585
+ # Skip files that don't match date format
586
+ continue
587
+
588
+ except Exception as e:
589
+ logger.error(f"Error during log cleanup: {e}")
590
+
591
+ logger.info(f"Log cleanup complete: {deleted_count} files deleted")
592
+ return deleted_count
593
+
594
+
595
+ # ============================================================
596
+ # PUBLIC API FUNCTIONS (Used by other modules)
597
+ # ============================================================
598
+
599
+ def log_interaction(
600
+ tenant_id: Optional[str] = None,
601
+ interaction_type: Optional[str] = None,
602
+ intent: Optional[str] = None,
603
+ response_time_ms: Optional[float] = None,
604
+ success: Optional[bool] = None,
605
+ metadata: Optional[Dict[str, Any]] = None,
606
+ **kwargs
607
+ ) -> None:
608
+ """
609
+ ๐Ÿ“ Simplified logging function used throughout Penny's codebase.
610
+
611
+ This is the main logging function called by orchestrator, router, agents, and model utils.
612
+ It creates a structured log entry and writes it to the log file.
613
+
614
+ Args:
615
+ tenant_id: City/location identifier (optional)
616
+ interaction_type: Type of interaction (e.g., "weather", "events", "orchestration") (optional)
617
+ intent: Detected intent (e.g., "weather", "emergency") (optional)
618
+ response_time_ms: Response time in milliseconds (optional)
619
+ success: Whether the operation succeeded (optional)
620
+ metadata: Optional additional metadata dictionary
621
+ **kwargs: Additional fields to include in log entry (e.g., error, details, fallback_used)
622
+
623
+ Example:
624
+ log_interaction(
625
+ tenant_id="atlanta_ga",
626
+ interaction_type="weather",
627
+ intent="weather",
628
+ response_time_ms=150.5,
629
+ success=True,
630
+ metadata={"temperature": 72, "condition": "sunny"}
631
+ )
632
+
633
+ # Or with keyword arguments:
634
+ log_interaction(
635
+ intent="translation_initialization",
636
+ success=False,
637
+ error="model_loader unavailable"
638
+ )
639
+ """
640
+ try:
641
+ # Build log entry dictionary from provided parameters
642
+ log_entry_dict = {
643
+ "timestamp": datetime.now(timezone.utc).isoformat()
644
+ }
645
+
646
+ # Add standard fields if provided
647
+ if tenant_id is not None:
648
+ log_entry_dict["tenant_id"] = sanitize_for_logging(tenant_id)
649
+ if interaction_type is not None:
650
+ log_entry_dict["interaction_type"] = interaction_type
651
+ if intent is not None:
652
+ log_entry_dict["intent"] = intent
653
+ if response_time_ms is not None:
654
+ log_entry_dict["response_time_ms"] = round(response_time_ms, 2)
655
+ if success is not None:
656
+ log_entry_dict["success"] = success
657
+
658
+ # Add metadata if provided
659
+ if metadata:
660
+ # Sanitize metadata values
661
+ sanitized_metadata = {}
662
+ for key, value in metadata.items():
663
+ if isinstance(value, str):
664
+ sanitized_metadata[key] = sanitize_for_logging(value)
665
+ else:
666
+ sanitized_metadata[key] = value
667
+ log_entry_dict["metadata"] = sanitized_metadata
668
+
669
+ # Add any additional kwargs (for backward compatibility with model utils)
670
+ for key, value in kwargs.items():
671
+ if key not in log_entry_dict: # Don't overwrite standard fields
672
+ if isinstance(value, str):
673
+ log_entry_dict[key] = sanitize_for_logging(value)
674
+ else:
675
+ log_entry_dict[key] = value
676
+
677
+ # Write to log file
678
+ log_path = get_daily_log_path()
679
+ _write_log_entry_dict(log_path, log_entry_dict)
680
+
681
+ except Exception as e:
682
+ # Failsafe: Never let logging failures crash the application
683
+ logger.error(f"Failed to log interaction: {e}", exc_info=True)
684
+ _emergency_log_to_console_dict(log_entry_dict if 'log_entry_dict' in locals() else {})
685
+
686
+
687
+ def sanitize_for_logging(text: str) -> str:
688
+ """
689
+ ๐Ÿงน Sanitizes text for safe logging (removes PII).
690
+
691
+ This function is used throughout Penny to ensure sensitive information
692
+ is not logged. It checks for PII and masks it appropriately.
693
+
694
+ Args:
695
+ text: Text to sanitize
696
+
697
+ Returns:
698
+ Sanitized text safe for logging
699
+
700
+ Example:
701
+ safe_text = sanitize_for_logging("My email is user@example.com")
702
+ # Returns: "My email is [PII_DETECTED]"
703
+ """
704
+ if not text or not isinstance(text, str):
705
+ return str(text) if text else ""
706
+
707
+ # Check for PII
708
+ contains_pii = _check_for_pii(text)
709
+
710
+ if contains_pii:
711
+ # Mask PII
712
+ if len(text) <= 20:
713
+ return "[PII_DETECTED]"
714
+ return f"{text[:10]}...[PII_MASKED]...{text[-10:]}"
715
+
716
+ return text
717
+
718
+
719
+ def _write_log_entry_dict(log_path: Path, log_entry_dict: Dict[str, Any]) -> None:
720
+ """
721
+ ๐Ÿ“ Writes log entry dictionary to JSONL file.
722
+ Helper function for simplified logging.
723
+ """
724
+ try:
725
+ log_path.parent.mkdir(parents=True, exist_ok=True)
726
+ with open(log_path, "a", encoding="utf-8") as f:
727
+ json_str = json.dumps(log_entry_dict, ensure_ascii=False)
728
+ f.write(json_str + "\n")
729
+ except Exception as e:
730
+ logger.error(f"Failed to write log entry: {e}")
731
+ _emergency_log_to_console_dict(log_entry_dict)
732
+
733
+
734
+ def _emergency_log_to_console_dict(log_entry_dict: Dict[str, Any]) -> None:
735
+ """
736
+ ๐Ÿšจ Emergency fallback: Print log to console if file writing fails.
737
+ """
738
+ print(f"[EMERGENCY LOG] {json.dumps(log_entry_dict)}")
739
+
740
+
741
+ # ============================================================
742
+ # INITIALIZATION
743
+ # ============================================================
744
+
745
+ def initialize_logging_system() -> bool:
746
+ """
747
+ ๐Ÿš€ Initializes the logging system.
748
+ Should be called during app startup.
749
+
750
+ Returns:
751
+ True if initialization successful
752
+ """
753
+ logger.info("๐Ÿ“Š Initializing Penny's logging system...")
754
+
755
+ try:
756
+ # Ensure log directory exists
757
+ LOGS_BASE_DIR.mkdir(parents=True, exist_ok=True)
758
+
759
+ # Test write permissions
760
+ test_file = LOGS_BASE_DIR / ".write_test"
761
+ test_file.write_text("test")
762
+ test_file.unlink()
763
+
764
+ logger.info(f"โœ… Logging system initialized")
765
+ logger.info(f"๐Ÿ“ Log directory: {LOGS_BASE_DIR}")
766
+ logger.info(f"๐Ÿ”„ Daily rotation: Enabled")
767
+
768
+ # Log Azure status
769
+ if os.getenv("AZURE_LOGS_ENABLED") == "true":
770
+ logger.info("โ˜๏ธ Azure logging: Enabled")
771
+ else:
772
+ logger.info("๐Ÿ’พ Azure logging: Disabled (local only)")
773
+
774
+ return True
775
+
776
+ except Exception as e:
777
+ logger.error(f"โŒ Failed to initialize logging system: {e}")
778
+ return False
779
+
780
+
781
+ def setup_logger(
782
+ name: str,
783
+ level: int = logging.INFO
784
+ ) -> logging.Logger:
785
+ """
786
+ ๐Ÿ”ง Sets up a logger with consistent formatting.
787
+
788
+ Args:
789
+ name: Logger name (usually __name__ from calling module)
790
+ level: Logging level (default: INFO)
791
+
792
+ Returns:
793
+ Configured logger instance
794
+
795
+ Example:
796
+ logger = setup_logger(__name__)
797
+ logger.info("Module initialized")
798
+ """
799
+ logger = logging.getLogger(name)
800
+ logger.setLevel(level)
801
+
802
+ # Only add handler if logger doesn't already have one
803
+ if not logger.handlers:
804
+ handler = logging.StreamHandler()
805
+ handler.setLevel(level)
806
+
807
+ # Format: timestamp - module - level - message
808
+ formatter = logging.Formatter(
809
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
810
+ datefmt='%Y-%m-%d %H:%M:%S'
811
+ )
812
+ handler.setFormatter(formatter)
813
+ logger.addHandler(handler)
814
+
815
+ return logger