File size: 1,518 Bytes
c6e5251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from __future__ import annotations
import os, json, math, random, time, logging, sys
from dataclasses import dataclass
from typing import Iterable, List, Dict, Any

from . import config as CFG

_LOGGER = None

def get_logger():
    global _LOGGER
    if _LOGGER is not None:
        return _LOGGER
    os.makedirs(CFG.OUTPUT_DIR, exist_ok=True)
    log_path = os.path.join(CFG.OUTPUT_DIR, 'train.log')
    logger = logging.getLogger('bpe')
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
    fh = logging.FileHandler(log_path, encoding='utf-8')
    fh.setFormatter(formatter)
    sh = logging.StreamHandler(sys.stdout)
    sh.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(sh)
    _LOGGER = logger
    return logger


def save_json(path: str, obj: Any):
    with open(path, 'w', encoding='utf-8') as f:
        json.dump(obj, f, ensure_ascii=False, indent=2)


def load_json(path: str):
    with open(path, 'r', encoding='utf-8') as f:
        return json.load(f)


def set_seed(seed: int):
    random.seed(seed)


def chunks(iterable: Iterable, size: int):
    bucket = []
    for x in iterable:
        bucket.append(x)
        if len(bucket) >= size:
            yield bucket
            bucket = []
    if bucket:
        yield bucket

class Timer:
    def __init__(self):
        self.start = time.time()
    def elapsed(self):
        return time.time() - self.start