| import json |
| import logging |
| from pathlib import Path |
| from typing import Dict, Iterable, List |
|
|
|
|
| def ensure_dirs(paths: Iterable[Path]) -> None: |
| for path in paths: |
| path.mkdir(parents=True, exist_ok=True) |
|
|
|
|
| def setup_logger(name: str, log_file: Path, level: int = logging.INFO) -> logging.Logger: |
| log_file.parent.mkdir(parents=True, exist_ok=True) |
| logger = logging.getLogger(name) |
| logger.setLevel(level) |
|
|
| if logger.handlers: |
| return logger |
|
|
| formatter = logging.Formatter( |
| fmt="%(asctime)s | %(levelname)s | %(message)s", |
| datefmt="%Y-%m-%d %H:%M:%S", |
| ) |
|
|
| file_handler = logging.FileHandler(log_file, encoding="utf-8") |
| file_handler.setFormatter(formatter) |
| logger.addHandler(file_handler) |
|
|
| stream_handler = logging.StreamHandler() |
| stream_handler.setFormatter(formatter) |
| logger.addHandler(stream_handler) |
|
|
| return logger |
|
|
|
|
| def write_jsonl(path: Path, rows: List[Dict[str, str]]) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| with path.open("w", encoding="utf-8") as f: |
| for row in rows: |
| f.write(json.dumps(row, ensure_ascii=False) + "\n") |
|
|
|
|
| def read_jsonl(path: Path) -> List[Dict[str, str]]: |
| rows: List[Dict[str, str]] = [] |
| with path.open("r", encoding="utf-8") as f: |
| for line in f: |
| line = line.strip() |
| if not line: |
| continue |
| rows.append(json.loads(line)) |
| return rows |
|
|
|
|