File size: 4,800 Bytes
7daf628 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | """
Utilities for input-output loading/saving.
"""
from typing import Any, List
import yaml
import pickle
import json
import pandas as pd
class PrettySafeLoader(yaml.SafeLoader):
"""Custom loader for reading YAML files"""
def construct_python_tuple(self, node):
return tuple(self.construct_sequence(node))
PrettySafeLoader.add_constructor(
u'tag:yaml.org,2002:python/tuple',
PrettySafeLoader.construct_python_tuple
)
def load_yml(path: str, loader_type: str = 'default'):
"""Read params from a yml file.
Args:
path (str): path to the .yml file
loader_type (str, optional): type of loader used to load yml files. Defaults to 'default'.
Returns:
Any: object (typically dict) loaded from .yml file
"""
assert loader_type in ['default', 'safe']
loader = yaml.Loader if (loader_type == "default") else PrettySafeLoader
with open(path, 'r') as f:
data = yaml.load(f, Loader=loader)
return data
def save_yml(data: dict, path: str):
"""Save params in the given yml file path.
Args:
data (dict): data object to save
path (str): path to .yml file to be saved
"""
with open(path, 'w') as f:
yaml.dump(data, f, default_flow_style=False)
def load_pkl(path: str, encoding: str = "ascii"):
"""Loads a .pkl file.
Args:
path (str): path to the .pkl file
encoding (str, optional): encoding to use for loading. Defaults to "ascii".
Returns:
Any: unpickled object
"""
return pickle.load(open(path, "rb"), encoding=encoding)
def save_pkl(data: Any, path: str) -> None:
"""Saves given object into .pkl file
Args:
data (Any): object to be saved
path (str): path to the location to be saved at
"""
with open(path, 'wb') as f:
pickle.dump(data, f)
def load_json(path: str) -> dict:
"""Helper to load json file"""
with open(path, 'rb') as f:
data = json.load(f)
return data
def save_json(data: dict, path: str):
"""Helper to save `dict` as .json file."""
with open(path, 'w') as f:
json.dump(data, f, indent=2)
def load_txt(path: str):
"""Loads lines of a .txt file.
Args:
path (str): path to the .txt file
Returns:
List: lines of .txt file
"""
with open(path) as f:
lines = f.read().splitlines()
return lines
def save_txt(data: dict, path: str):
"""Writes data (lines) to a txt file.
Args:
data (dict): List of strings
path (str): path to .txt file
"""
assert isinstance(data, list)
lines = "\n".join(data)
with open(path, "w") as f:
f.write(str(lines))
def read_spreadsheet(sheet_id, gid, url=None, drop_na=True, **kwargs):
if url is None:
BASE_URL = 'https://docs.google.com/spreadsheets/d/'
url = BASE_URL + sheet_id + f'/export?gid={gid}&format=csv'
df = pd.read_csv(url, **kwargs)
if drop_na:
# drop all rows which have atleast 1 NaN value
df = df.dropna(axis=0)
return df
def load_midi(file, rate=16000):
import pretty_midi
assert file.endswith('.mid')
pm = pretty_midi.PrettyMIDI(file)
y = pm.synthesize(fs=rate)
return y, rate
def load_ptz(path):
import gzip
import torch
with gzip.open(path, 'rb') as f:
data = torch.load(f)
return data
def save_video(frames, path, fps=30):
import imageio
imageio.mimwrite(path, frames, fps=fps)
def read_spreadsheet(sheet_id, gid, gid_key="granularity", **kwargs):
BASE_URL = 'https://docs.google.com/spreadsheets/d/'
df = df = pd.read_csv(BASE_URL + sheet_id + f'/export?gid={gid}&format=csv', **kwargs)
return df
def load_jsonl(file_path: str) -> list:
"""Load data from a JSONL file.
Args:
file_path (str): Path to the JSONL file
Returns:
list: List of dictionaries, where each dictionary is a JSON object from the file
Example:
>>> data = load_jsonl("path/to/file.jsonl")
>>> print(data[0]) # Print first JSON object
"""
data = []
with open(file_path, 'r', encoding='utf-8') as f:
for line in f:
if line.strip(): # Skip empty lines
data.append(json.loads(line))
return data
def save_jsonl(data: list, file_path: str) -> None:
"""Save data to a JSONL file.
Args:
data (list): List of dictionaries to save
file_path (str): Path where to save the JSONL file
Example:
>>> data = [{"text": "hello"}, {"text": "world"}]
>>> save_jsonl(data, "output.jsonl")
"""
with open(file_path, 'w', encoding='utf-8') as f:
for item in data:
f.write(json.dumps(item) + '\n')
|