from dataclasses import dataclass from enum import Enum from datetime import datetime import re class LogTag(Enum): load_start:str = "start initial TranslatePipe" load_end:str = "Pipeline is ready." audio_end:str = "Audio buffer length" transcribe_end:str = "transcribe output" transcribe_cost:str = "transcribe cost" translate_start:str = "Translation input" translate_end:str = "Translation out" translate_cost:str = "Translate cost" translate_large_end:str = "Translation large model output" translate_large_cost:str = "Translate-large cost" @dataclass class LogItem: timestamp: datetime tag: LogTag content: str = "" @classmethod def from_log(cls, log_tag, log_line): try: time_str = re.match("^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}", log_line).group(0) timestamp = datetime.strptime(time_str, "%Y-%m-%d %H:%M:%S,%f") # time_str = re.match("^\d{2}:\d{2}:\d{2}", log_line).group(0) # timestamp = datetime.strptime(time_str, "%H:%M:%S") res = re.match(".*?]:\s*(.*)", log_line) content = "" if res: content = res.group(1) except Exception as e: print(e) print(log_line) return cls(timestamp=timestamp, tag=log_tag, content=content) @dataclass class WebItem: timestamp: datetime src_text: str dst_text: str if __name__ == '__main__': a = LogItem.from_log(LogTag.translate_finish, "2025-05-08 16:17:28,468 - INFO - [ 📝 transcribe output ]: Today is Friday.") print(a)