| import logging |
| from pathlib import Path |
| from typing import Dict |
| from typing import List |
| from typing import Union |
|
|
| from typeguard import check_argument_types |
|
|
|
|
| def read_2column_text(path: Union[Path, str]) -> Dict[str, str]: |
| """Read a text file having 2 column as dict object. |
| |
| Examples: |
| wav.scp: |
| key1 /some/path/a.wav |
| key2 /some/path/b.wav |
| |
| >>> read_2column_text('wav.scp') |
| {'key1': '/some/path/a.wav', 'key2': '/some/path/b.wav'} |
| |
| """ |
| assert check_argument_types() |
|
|
| data = {} |
| with Path(path).open("r", encoding="utf-8") as f: |
| for linenum, line in enumerate(f, 1): |
| sps = line.rstrip().split(maxsplit=1) |
| if len(sps) == 1: |
| k, v = sps[0], "" |
| else: |
| k, v = sps |
| if k in data: |
| raise RuntimeError(f"{k} is duplicated ({path}:{linenum})") |
| data[k] = v |
| return data |
|
|
|
|
| def load_num_sequence_text( |
| path: Union[Path, str], loader_type: str = "csv_int" |
| ) -> Dict[str, List[Union[float, int]]]: |
| """Read a text file indicating sequences of number |
| |
| Examples: |
| key1 1 2 3 |
| key2 34 5 6 |
| |
| >>> d = load_num_sequence_text('text') |
| >>> np.testing.assert_array_equal(d["key1"], np.array([1, 2, 3])) |
| """ |
| assert check_argument_types() |
| if loader_type == "text_int": |
| delimiter = " " |
| dtype = int |
| elif loader_type == "text_float": |
| delimiter = " " |
| dtype = float |
| elif loader_type == "csv_int": |
| delimiter = "," |
| dtype = int |
| elif loader_type == "csv_float": |
| delimiter = "," |
| dtype = float |
| else: |
| raise ValueError(f"Not supported loader_type={loader_type}") |
|
|
| |
| |
| |
| |
| |
| d = read_2column_text(path) |
|
|
| |
| retval = {} |
| for k, v in d.items(): |
| try: |
| retval[k] = [dtype(i) for i in v.split(delimiter)] |
| except TypeError: |
| logging.error(f'Error happened with path="{path}", id="{k}", value="{v}"') |
| raise |
| return retval |
|
|