import os import sys # Ensure local logbert_processor and logparser are first in sys.path for all imports sys.path.insert(0, os.path.abspath(os.path.dirname(__file__))) sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), 'logparser'))) from bert_pytorch.model.log_model import BERTLog from bert_pytorch.model.bert import BERT from bert_pytorch.dataset import LogDataset, WordVocab import Drain from torch.utils.data import DataLoader from collections import defaultdict from tqdm import tqdm import numpy as np import pandas as pd import torch import time import json import ast import re # === Constants === TOP_EVENTS = 5 MAX_RCA_TOKENS = 200 # === Log Parsing === def parse_log_with_drain(log_file, input_dir, output_dir): regex = [ r"appattempt_\d+_\d+_\d+", r"job_\d+_\d+", r"task_\d+_\d+_[a-z]+_\d+", r"container_\d+", r"\b(?:\d{1,3}\.){3}\d{1,3}\b", r"(?]