File size: 1,881 Bytes
abc86a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import re

REGEX_PATTERNS = {
    r"User\s+\w+\d+\s+logged\s+(in|out)": "User Action",
    r"Account\s+(?:with\s+)?ID\s+\S+\s+created\s+by": "User Action",
    r"Backup\s+(started|ended|completed\s+successfully)": "System Notification",
    r"System\s+updated\s+to\s+version": "System Notification",
    r"File\s+\S+\s+uploaded\s+successfully\s+by\s+user": "System Notification",
    r"Disk\s+cleanup\s+completed\s+successfully": "System Notification",
    r"System\s+reboot\s+initiated\s+by\s+user": "System Notification",
    r"Scheduled\s+maintenance\s+(started|completed)": "System Notification",
    r"Service\s+\w+\s+restarted\s+successfully": "System Notification",
}

def classify_with_regex(log_message: str) -> str | None:
    """
    Tier 1: Rule-based classifier using regex patterns.
    Returns category label or None if no pattern matches.
    Latency: sub-millisecond.
    """
    for pattern, label in REGEX_PATTERNS.items():
        if re.search(pattern, log_message, re.IGNORECASE):
            return label
    return None


def get_regex_coverage(log_messages: list[str]) -> dict:
    """Measure regex tier coverage on a list of log messages."""
    matched = sum(1 for msg in log_messages if classify_with_regex(msg) is not None)
    return {
        "total": len(log_messages),
        "matched": matched,
        "coverage_pct": round(matched / len(log_messages) * 100, 2),
    }


if __name__ == "__main__":
    test_logs = [
        "User User123 logged in.",
        "Backup completed successfully.",
        "Account with ID 456 created by Admin.",
        "GET /api/v2/resource HTTP/1.1 status: 200",   # should be None
        "Hey bro chill ya!",                            # should be None
    ]
    for log in test_logs:
        result = classify_with_regex(log)
        print(f"[{'✓' if result else '✗'}] {result or 'None':25s} | {log[:60]}")