File size: 1,386 Bytes
7feac49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Copyright (c) Alibaba, Inc. and its affiliates.
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
import pandas as pd


def transform_jsonl_to_df(dict_list: List[Dict[str, Any]]) -> pd.DataFrame:
    """Relevant function: `io_utils.read_from_jsonl()`"""
    data_dict: Dict[str, List[Any]] = {}
    for i, obj in enumerate(dict_list):
        for k, v in obj.items():
            if k not in data_dict:
                data_dict[k] = [None] * i
            data_dict[k].append(v)
        for k in set(data_dict.keys()) - set(obj.keys()):
            data_dict[k].append(None)
    return pd.DataFrame.from_dict(data_dict)


def get_seed(random_state: Optional[np.random.RandomState] = None) -> int:
    if random_state is None:
        random_state = np.random.RandomState()
    seed_max = np.iinfo(np.int32).max
    seed = random_state.randint(0, seed_max)
    return seed


def stat_array(array: Union[np.ndarray, List[int], 'torch.Tensor']) -> Tuple[Dict[str, float], str]:
    if isinstance(array, list):
        array = np.array(array)
    mean = array.mean().item()
    std = array.std().item()
    min_ = array.min().item()
    max_ = array.max().item()
    size = array.shape[0]
    string = f'{mean:.6f}±{std:.6f}, min={min_:.6f}, max={max_:.6f}, size={size}'
    return {'mean': mean, 'std': std, 'min': min_, 'max': max_, 'size': size}, string