import json import logging from pathlib import Path from typing import Dict, Iterable, List def ensure_dirs(paths: Iterable[Path]) -> None: for path in paths: path.mkdir(parents=True, exist_ok=True) def setup_logger(name: str, log_file: Path, level: int = logging.INFO) -> logging.Logger: log_file.parent.mkdir(parents=True, exist_ok=True) logger = logging.getLogger(name) logger.setLevel(level) if logger.handlers: return logger formatter = logging.Formatter( fmt="%(asctime)s | %(levelname)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) file_handler = logging.FileHandler(log_file, encoding="utf-8") file_handler.setFormatter(formatter) logger.addHandler(file_handler) stream_handler = logging.StreamHandler() stream_handler.setFormatter(formatter) logger.addHandler(stream_handler) return logger def write_jsonl(path: Path, rows: List[Dict[str, str]]) -> None: path.parent.mkdir(parents=True, exist_ok=True) with path.open("w", encoding="utf-8") as f: for row in rows: f.write(json.dumps(row, ensure_ascii=False) + "\n") def read_jsonl(path: Path) -> List[Dict[str, str]]: rows: List[Dict[str, str]] = [] with path.open("r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue rows.append(json.loads(line)) return rows