log-classification-system / processor_regex.py
NOT-OMEGA's picture
Upload 9 files
abc86a6 verified
import re
REGEX_PATTERNS = {
r"User\s+\w+\d+\s+logged\s+(in|out)": "User Action",
r"Account\s+(?:with\s+)?ID\s+\S+\s+created\s+by": "User Action",
r"Backup\s+(started|ended|completed\s+successfully)": "System Notification",
r"System\s+updated\s+to\s+version": "System Notification",
r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user": "System Notification",
r"Disk\s+cleanup\s+completed\s+successfully": "System Notification",
r"System\s+reboot\s+initiated\s+by\s+user": "System Notification",
r"Scheduled\s+maintenance\s+(started|completed)": "System Notification",
r"Service\s+\w+\s+restarted\s+successfully": "System Notification",
}
def classify_with_regex(log_message: str) -> str | None:
"""
Tier 1: Rule-based classifier using regex patterns.
Returns category label or None if no pattern matches.
Latency: sub-millisecond.
"""
for pattern, label in REGEX_PATTERNS.items():
if re.search(pattern, log_message, re.IGNORECASE):
return label
return None
def get_regex_coverage(log_messages: list[str]) -> dict:
"""Measure regex tier coverage on a list of log messages."""
matched = sum(1 for msg in log_messages if classify_with_regex(msg) is not None)
return {
"total": len(log_messages),
"matched": matched,
"coverage_pct": round(matched / len(log_messages) * 100, 2),
}
if __name__ == "__main__":
test_logs = [
"User User123 logged in.",
"Backup completed successfully.",
"Account with ID 456 created by Admin.",
"GET /api/v2/resource HTTP/1.1 status: 200", # should be None
"Hey bro chill ya!", # should be None
]
for log in test_logs:
result = classify_with_regex(log)
print(f"[{'✓' if result else '✗'}] {result or 'None':25s} | {log[:60]}")