| import re |
|
|
| REGEX_PATTERNS = { |
| r"User\s+\w+\d+\s+logged\s+(in|out)": "User Action", |
| r"Account\s+(?:with\s+)?ID\s+\S+\s+created\s+by": "User Action", |
| r"Backup\s+(started|ended|completed\s+successfully)": "System Notification", |
| r"System\s+updated\s+to\s+version": "System Notification", |
| r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user": "System Notification", |
| r"Disk\s+cleanup\s+completed\s+successfully": "System Notification", |
| r"System\s+reboot\s+initiated\s+by\s+user": "System Notification", |
| r"Scheduled\s+maintenance\s+(started|completed)": "System Notification", |
| r"Service\s+\w+\s+restarted\s+successfully": "System Notification", |
| } |
|
|
| def classify_with_regex(log_message: str) -> str | None: |
| """ |
| Tier 1: Rule-based classifier using regex patterns. |
| Returns category label or None if no pattern matches. |
| Latency: sub-millisecond. |
| """ |
| for pattern, label in REGEX_PATTERNS.items(): |
| if re.search(pattern, log_message, re.IGNORECASE): |
| return label |
| return None |
|
|
|
|
| def get_regex_coverage(log_messages: list[str]) -> dict: |
| """Measure regex tier coverage on a list of log messages.""" |
| matched = sum(1 for msg in log_messages if classify_with_regex(msg) is not None) |
| return { |
| "total": len(log_messages), |
| "matched": matched, |
| "coverage_pct": round(matched / len(log_messages) * 100, 2), |
| } |
|
|
|
|
| if __name__ == "__main__": |
| test_logs = [ |
| "User User123 logged in.", |
| "Backup completed successfully.", |
| "Account with ID 456 created by Admin.", |
| "GET /api/v2/resource HTTP/1.1 status: 200", |
| "Hey bro chill ya!", |
| ] |
| for log in test_logs: |
| result = classify_with_regex(log) |
| print(f"[{'✓' if result else '✗'}] {result or 'None':25s} | {log[:60]}") |
|
|