pythonprincess commited on
Commit
ba591f8
·
verified ·
1 Parent(s): 59efae2

Delete backend_pam.py

Browse files
Files changed (1) hide show
  1. backend_pam.py +0 -536
backend_pam.py DELETED
@@ -1,536 +0,0 @@
1
- # filename: backend_pam.py (ENHANCED FOR HF SPACES + NERDY LAB ASSISTANT PERSONALITY)
2
-
3
- import os
4
- import json
5
- import time
6
- from datetime import datetime
7
- from typing import Dict, Any, Optional, List
8
- from huggingface_hub import InferenceClient
9
-
10
- # --- Constants for Data Paths ---
11
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
12
- DATA_DIR = os.path.join(BASE_DIR, "data")
13
- LOGS_FILE = os.path.join(DATA_DIR, "logs.json")
14
- COMPLIANCE_FILE = os.path.join(DATA_DIR, "compliance.json")
15
-
16
- # --- HuggingFace Inference Client Setup ---
17
- HF_API_TOKEN = os.getenv("HF_READ_TOKEN")
18
- if not HF_API_TOKEN:
19
- print("⚠️ WARNING: HF_READ_TOKEN not found. Backend PAM will run in limited mode.")
20
-
21
- # Initialize InferenceClient
22
- client = InferenceClient(token=HF_API_TOKEN) if HF_API_TOKEN else InferenceClient()
23
-
24
- # Optimized models for CPU inference on HF Spaces
25
- HF_MODELS = {
26
- "phi_ner": "dslim/bert-base-NER",
27
- "log_ner": "dslim/bert-base-NER",
28
- "summarizer": "facebook/bart-large-cnn",
29
- "classifier": "facebook/bart-large-mnli"
30
- }
31
-
32
- # --- Global Storage for Loaded Data ---
33
- LOADED_DATA = None
34
-
35
- # --- Data Loading Helper ---
36
- def load_json(filepath: str) -> Dict[str, Any]:
37
- """Safely load JSON data files with encoding support"""
38
- try:
39
- with open(filepath, 'r', encoding='utf-8') as f:
40
- return json.load(f)
41
- except FileNotFoundError:
42
- print(f"⚠️ Data file not found: {filepath}")
43
- return {}
44
- except json.JSONDecodeError as e:
45
- print(f"⚠️ Failed to decode JSON from {filepath}: {e}")
46
- return {}
47
- except Exception as e:
48
- print(f"⚠️ Unexpected error loading {filepath}: {e}")
49
- return {}
50
-
51
- # --- Inference API Call Helper with Retry Logic ---
52
- def hf_infer(task: str, payload: Any, max_retries: int = 3) -> Any:
53
- """Call HuggingFace Inference API using InferenceClient"""
54
- model = HF_MODELS.get(task)
55
- if not model:
56
- return {"error": f"Invalid task: {task}"}
57
-
58
- for attempt in range(max_retries):
59
- try:
60
- if task in ["phi_ner", "log_ner"]:
61
- # Token classification (NER)
62
- result = client.token_classification(
63
- text=payload["inputs"],
64
- model=model
65
- )
66
- # Convert to expected format
67
- return [
68
- {
69
- "entity_group": item.entity_group,
70
- "score": item.score,
71
- "word": item.word,
72
- "start": item.start,
73
- "end": item.end
74
- }
75
- for item in result
76
- ]
77
-
78
- elif task == "summarizer":
79
- # Summarization
80
- result = client.summarization(
81
- text=payload["inputs"],
82
- model=model,
83
- max_length=payload.get("parameters", {}).get("max_length", 130),
84
- min_length=payload.get("parameters", {}).get("min_length", 30)
85
- )
86
- return [{"summary_text": result.summary_text}]
87
-
88
- elif task == "classifier":
89
- # Zero-shot classification
90
- result = client.zero_shot_classification(
91
- text=payload["inputs"],
92
- labels=payload["parameters"]["candidate_labels"],
93
- model=model
94
- )
95
- return {
96
- "labels": result.labels,
97
- "scores": result.scores
98
- }
99
-
100
- except Exception as e:
101
- error_msg = str(e).lower()
102
- if "loading" in error_msg and attempt < max_retries - 1:
103
- print(f"⏳ Model loading... waiting 20s (attempt {attempt + 1}/{max_retries})")
104
- time.sleep(20)
105
- continue
106
- elif attempt < max_retries - 1:
107
- print(f"⚠️ Request failed: {e} (attempt {attempt + 1}/{max_retries})")
108
- time.sleep(5)
109
- else:
110
- print(f"⚠️ Final error after {max_retries} attempts: {e}")
111
- return {"error": str(e)}
112
-
113
- return {"error": "Max retries reached"}
114
-
115
- # --- Agent Initialization ---
116
- def load_agent() -> 'PAM':
117
- """Initialize Backend PAM (Nerdy Lab Assistant)"""
118
- global LOADED_DATA
119
-
120
- if LOADED_DATA is not None:
121
- print("🔬 PAM technical assistant already loaded. Using cached data.")
122
- return PAM(LOADED_DATA)
123
-
124
- print("🤓 Loading PAM technical assistant (Nerdy Lab Assistant mode)...")
125
-
126
- data = {
127
- "LOGS": load_json(LOGS_FILE),
128
- "COMPLIANCE": load_json(COMPLIANCE_FILE)
129
- }
130
-
131
- if not data["LOGS"]:
132
- print("⚠️ Warning: Log data not loaded. PAM will have limited log analysis capabilities.")
133
- else:
134
- print("✅ Log data loaded successfully.")
135
-
136
- if not data["COMPLIANCE"]:
137
- print("⚠️ Warning: Compliance data not loaded. PAM will have limited compliance features.")
138
- else:
139
- print("✅ Compliance data loaded successfully.")
140
-
141
- LOADED_DATA = data
142
- return PAM(LOADED_DATA)
143
-
144
- # --- Helper: Classify Severity ---
145
- def classify_severity(entry: str) -> str:
146
- """Classify log entry severity with confidence"""
147
- entry_lower = entry.lower()
148
-
149
- # Critical issues
150
- critical_keywords = [
151
- "unauthorized", "failed login", "attack", "breach",
152
- "port scanning", "unavailable", "critical", "error",
153
- "denied", "blocked", "malicious"
154
- ]
155
- if any(keyword in entry_lower for keyword in critical_keywords):
156
- return "CRITICAL"
157
-
158
- # Warning level
159
- warning_keywords = [
160
- "warning", "unexpected", "unusual", "outside working hours",
161
- "retry", "slow", "timeout", "deprecated"
162
- ]
163
- if any(keyword in entry_lower for keyword in warning_keywords):
164
- return "WARNING"
165
-
166
- return "INFO"
167
-
168
- # --- PAM's Nerdy Lab Assistant Personality ---
169
- PAM_ROLE = """You are PAM, a knowledgeable and enthusiastic lab assistant in the infrastructure monitoring center.
170
- You're the nerdy, proactive team member who gets genuinely excited about finding patterns in logs and keeping systems secure.
171
- You explain technical findings clearly and encouragingly, like a helpful colleague who wants everyone to understand.
172
- You're informative but never condescending - you want to empower the team with knowledge.
173
- You use casual tech terminology but always explain what things mean.
174
- You're proactive about flagging issues and offering insights before being asked."""
175
-
176
- # Nerdy expressions for Backend PAM
177
- NERDY_INTROS = [
178
- "Ooh, interesting finding here!",
179
- "Okay so here's what I discovered:",
180
- "Alright, I ran the analysis and",
181
- "Hey, you're gonna want to see this:",
182
- "So I was digging through the data and",
183
- "Quick heads up on what I found:"
184
- ]
185
-
186
- ENCOURAGEMENT = [
187
- "Great catch asking about this!",
188
- "Good thinking checking on this!",
189
- "Smart move looking into this!",
190
- "You're on the right track!",
191
- "Excellent question!",
192
- "Love that you're being proactive!"
193
- ]
194
-
195
- PROACTIVE_PHRASES = [
196
- "I also noticed something else while I was at it",
197
- "Quick side note -",
198
- "Oh, and while we're here",
199
- "By the way, related to this",
200
- "Just flagging this too",
201
- "Something else to keep an eye on"
202
- ]
203
-
204
- import random
205
-
206
- # --- Backend PAM Class ---
207
- class PAM:
208
- """Backend PAM - Nerdy, Proactive Lab Assistant"""
209
-
210
- def __init__(self, data: Dict[str, Dict]):
211
- self.LOGS = data.get("LOGS", {})
212
- self.COMPLIANCE = data.get("COMPLIANCE", {})
213
-
214
- # Track findings for proactive suggestions
215
- self.recent_findings = []
216
-
217
- def _get_nerdy_intro(self) -> str:
218
- """Get a random nerdy introduction"""
219
- return random.choice(NERDY_INTROS)
220
-
221
- def _get_encouragement(self) -> str:
222
- """Get a random encouraging phrase"""
223
- return random.choice(ENCOURAGEMENT)
224
-
225
- def _get_proactive_phrase(self) -> str:
226
- """Get a random proactive phrase"""
227
- return random.choice(PROACTIVE_PHRASES)
228
-
229
- def _check_api_health(self) -> bool:
230
- """Check if HF API is accessible"""
231
- return HF_API_TOKEN is not None
232
-
233
- def detect_phi(self, text: str) -> Dict[str, Any]:
234
- """Detect Protected Health Information (PHI) using NER"""
235
- intro = self._get_nerdy_intro()
236
-
237
- if not self._check_api_health():
238
- return {
239
- "message": "⚠️ Hmm, I'm having trouble connecting to the analysis models right now. Let me flag this text for manual review instead!",
240
- "role": PAM_ROLE,
241
- "has_phi": None,
242
- "entities": []
243
- }
244
-
245
- # Call NER model
246
- result = hf_infer("phi_ner", {"inputs": text})
247
-
248
- if isinstance(result, dict) and "error" in result:
249
- return {
250
- "message": f"🔍 I tried to scan for PHI, but hit a snag: {result['error']}. I'd recommend a manual review just to be safe!",
251
- "role": PAM_ROLE,
252
- "has_phi": None,
253
- "entities": []
254
- }
255
-
256
- # Filter for PHI-relevant entities
257
- phi_entities = []
258
- if isinstance(result, list):
259
- phi_entities = [
260
- e for e in result
261
- if e.get("entity_group") in ["PER", "LOC", "ORG", "DATE"]
262
- and e.get("score", 0) > 0.7
263
- ]
264
-
265
- has_phi = len(phi_entities) > 0
266
-
267
- if has_phi:
268
- entities_summary = ", ".join([f"{e['word']} ({e['entity_group']})" for e in phi_entities[:3]])
269
- message = f"🔒 {intro} I detected {len(phi_entities)} potential PHI entities in this text: {entities_summary}{'...' if len(phi_entities) > 3 else ''}. Definitely want to redact these before storing or sharing!"
270
- else:
271
- message = f"✅ {intro} This text looks clean - no PHI detected! Safe to proceed with normal handling."
272
-
273
- # Proactive suggestion
274
- if has_phi:
275
- message += f" {self._get_proactive_phrase()} - if you're logging this anywhere, make sure those logs are encrypted and access-controlled."
276
-
277
- return {
278
- "message": message,
279
- "role": PAM_ROLE,
280
- "has_phi": has_phi,
281
- "entities": phi_entities,
282
- "recommendation": "Redact PHI before storage" if has_phi else "No action needed"
283
- }
284
-
285
- def parse_log(self, log_text: str) -> Dict[str, Any]:
286
- """Parse and analyze log entries for security relevance"""
287
- intro = self._get_nerdy_intro()
288
-
289
- if not self._check_api_health():
290
- return {
291
- "message": "⚠️ Can't connect to the log parser right now. I'll do a quick manual analysis instead!",
292
- "role": PAM_ROLE,
293
- "severity": classify_severity(log_text),
294
- "log_entities": []
295
- }
296
-
297
- # Call NER model for log parsing
298
- result = hf_infer("log_ner", {"inputs": log_text})
299
-
300
- severity = classify_severity(log_text)
301
-
302
- parsed_entities = []
303
- if isinstance(result, list):
304
- parsed_entities = [e for e in result if e.get("score", 0) > 0.6]
305
-
306
- # Build informative response
307
- severity_emoji = {"CRITICAL": "🚨", "WARNING": "⚠️", "INFO": "ℹ️"}
308
- emoji = severity_emoji.get(severity, "📝")
309
-
310
- message = f"{emoji} {intro} This log entry is classified as **{severity}** priority."
311
-
312
- if severity == "CRITICAL":
313
- message += " This needs immediate attention! I'd recommend investigating ASAP and documenting the incident."
314
- elif severity == "WARNING":
315
- message += " Worth keeping an eye on this - might escalate if we see more like it."
316
- else:
317
- message += " Just routine activity, but good to have it logged for the audit trail."
318
-
319
- # Add entity details if found
320
- if parsed_entities:
321
- entity_summary = f" I extracted {len(parsed_entities)} key entities from the log."
322
- message += entity_summary
323
-
324
- return {
325
- "message": message,
326
- "role": PAM_ROLE,
327
- "severity": severity,
328
- "log_entities": parsed_entities,
329
- "timestamp": datetime.now().isoformat()
330
- }
331
-
332
- def summarize(self, raw_text: str) -> Dict[str, Any]:
333
- """Generate technical summary of text (great for long logs or reports)"""
334
- encouragement = self._get_encouragement()
335
-
336
- if not self._check_api_health():
337
- return {
338
- "message": f"⚠️ {encouragement} But I can't access the summarization model right now. Can you share a bit more context on what you need?",
339
- "role": PAM_ROLE,
340
- "summary": None
341
- }
342
-
343
- # Truncate for model limits (BART handles ~1024 tokens well)
344
- truncated_text = raw_text[:1024]
345
-
346
- result = hf_infer("summarizer", {
347
- "inputs": truncated_text,
348
- "parameters": {
349
- "max_length": 130,
350
- "min_length": 30,
351
- "do_sample": False
352
- }
353
- })
354
-
355
- if isinstance(result, dict) and "error" in result:
356
- return {
357
- "message": f"🤔 {encouragement} I tried to summarize this but hit a technical issue. Could you break it into smaller chunks?",
358
- "role": PAM_ROLE,
359
- "summary": None
360
- }
361
-
362
- summary_text = result[0].get("summary_text", "") if isinstance(result, list) else ""
363
-
364
- return {
365
- "message": f"📊 {encouragement} Here's the TL;DR of what you shared:",
366
- "role": PAM_ROLE,
367
- "summary": summary_text,
368
- "original_length": len(raw_text),
369
- "summary_length": len(summary_text)
370
- }
371
-
372
- def get_latest_logs(self) -> Dict[str, Any]:
373
- """Retrieve and analyze recent system logs"""
374
- intro = self._get_nerdy_intro()
375
-
376
- if "latest_logs" not in self.LOGS or not self.LOGS["latest_logs"]:
377
- return {
378
- "message": "🤔 Hmm, I'm not seeing any logs in the system right now. Either nothing's being logged, or there's a data loading issue. Want me to check the log file paths?",
379
- "role": PAM_ROLE,
380
- "logs": [],
381
- "handoff_to_frontend": []
382
- }
383
-
384
- full_logset = []
385
- client_handoffs = []
386
- critical_count = 0
387
- warning_count = 0
388
-
389
- for item in self.LOGS["latest_logs"]:
390
- entry = item.get("entry", "")
391
- timestamp = item.get("timestamp", "Unknown time")
392
- severity = classify_severity(entry)
393
-
394
- # Count severity levels
395
- if severity == "CRITICAL":
396
- critical_count += 1
397
- elif severity == "WARNING":
398
- warning_count += 1
399
-
400
- formatted = f"[{timestamp}] ({severity}) {entry}"
401
- full_logset.append(formatted)
402
-
403
- # Identify client-facing issues that Frontend PAM should handle
404
- if any(keyword in entry.lower() for keyword in ["frontend", "provider unavailable", "user", "client"]):
405
- client_handoffs.append(formatted)
406
-
407
- # Build proactive, informative response
408
- total = len(full_logset)
409
- message = f"📡 {intro} I reviewed {total} recent log entries. "
410
-
411
- if critical_count > 0:
412
- message += f"**Heads up:** {critical_count} critical issues detected that need immediate action! "
413
- if warning_count > 0:
414
- message += f"{warning_count} warnings worth monitoring. "
415
- if critical_count == 0 and warning_count == 0:
416
- message += "Everything looks stable - no major issues! "
417
-
418
- if client_handoffs:
419
- message += f"\n\n{self._get_proactive_phrase()} - {len(client_handoffs)} of these are client-facing issues. I'll pass those to Frontend PAM to handle with users."
420
-
421
- return {
422
- "message": message,
423
- "role": PAM_ROLE,
424
- "logs": full_logset,
425
- "summary": {
426
- "total": total,
427
- "critical": critical_count,
428
- "warnings": warning_count,
429
- "info": total - critical_count - warning_count
430
- },
431
- "handoff_to_frontend": client_handoffs
432
- }
433
-
434
- def check_compliance(self) -> Dict[str, Any]:
435
- """Run compliance status check and provide recommendations"""
436
- encouragement = self._get_encouragement()
437
-
438
- if not self.COMPLIANCE:
439
- return {
440
- "message": f"🤔 {encouragement} But I don't have access to the compliance data right now. Let me know if you need me to check the data file setup!",
441
- "role": PAM_ROLE,
442
- "compliance_report": []
443
- }
444
-
445
- report = []
446
- compliant_count = 0
447
- non_compliant_items = []
448
-
449
- for item, status in self.COMPLIANCE.items():
450
- emoji = "✅" if status else "❌"
451
- readable_item = item.replace('_', ' ').title()
452
- report.append(f"{emoji} {readable_item}")
453
-
454
- if status:
455
- compliant_count += 1
456
- else:
457
- non_compliant_items.append(readable_item)
458
-
459
- total = len(self.COMPLIANCE)
460
- compliance_rate = (compliant_count / total * 100) if total > 0 else 0
461
-
462
- # Build informative, proactive response
463
- message = f"🛡️ {encouragement} Here's the compliance status:\n\n"
464
- message += f"**Overall:** {compliant_count}/{total} checks passed ({compliance_rate:.1f}%)\n\n"
465
-
466
- if non_compliant_items:
467
- message += f"**Action needed:** We have {len(non_compliant_items)} items out of compliance:\n"
468
- for item in non_compliant_items:
469
- message += f" • {item}\n"
470
- message += f"\n{self._get_proactive_phrase()} - I can help you prioritize these if you want to tackle them systematically!"
471
- else:
472
- message += "🎉 Everything's in compliance! Great work keeping things locked down."
473
-
474
- return {
475
- "message": message,
476
- "role": PAM_ROLE,
477
- "compliance_report": report,
478
- "compliance_rate": compliance_rate,
479
- "non_compliant": non_compliant_items
480
- }
481
-
482
- def process_input(self, user_input: str) -> Dict[str, Any]:
483
- """Main input processor - proactive and informative"""
484
- u_input = user_input.lower().strip()
485
- encouragement = self._get_encouragement()
486
-
487
- # Command routing with personality
488
- if "check compliance" in u_input or "compliance status" in u_input:
489
- return self.check_compliance()
490
-
491
- if "get logs" in u_input or "latest logs" in u_input or "show logs" in u_input:
492
- return self.get_latest_logs()
493
-
494
- if "detect phi" in u_input:
495
- text_to_scan = user_input[u_input.find("detect phi in") + len("detect phi in"):].strip()
496
- if not text_to_scan:
497
- text_to_scan = user_input[u_input.find("detect phi") + len("detect phi"):].strip()
498
- return self.detect_phi(text_to_scan)
499
-
500
- if "parse log" in u_input:
501
- log_to_parse = user_input[u_input.find("parse log") + len("parse log"):].strip()
502
- return self.parse_log(log_to_parse)
503
-
504
- if "summarize" in u_input or "explain" in u_input:
505
- return self.summarize(user_input)
506
-
507
- # Helpful default response with encouragement
508
- return {
509
- "message": f"👋 Hey! {encouragement} I'm PAM, your backend technical assistant. I can help you with:\n\n"
510
- "• **check compliance** - Review compliance status\n"
511
- "• **get logs** - Pull latest system logs\n"
512
- "• **detect phi in [text]** - Scan for protected health info\n"
513
- "• **parse log [entry]** - Analyze a specific log\n"
514
- "• **summarize [text]** - Generate a technical summary\n\n"
515
- "What would you like me to look into?",
516
- "role": PAM_ROLE
517
- }
518
-
519
-
520
- # --- Quick Test ---
521
- if __name__ == "__main__":
522
- print("🤓 Testing Backend PAM (Nerdy Lab Assistant)...\n")
523
- pam = load_agent()
524
-
525
- test_commands = [
526
- "check compliance",
527
- "get logs",
528
- "detect phi in Patient John Doe visited on 2024-03-15 at Memorial Hospital"
529
- ]
530
-
531
- for cmd in test_commands:
532
- print(f"\n{'='*60}")
533
- print(f"COMMAND: {cmd}")
534
- print(f"{'='*60}")
535
- response = pam.process_input(cmd)
536
- print(response.get("message", response))