""" Utilities for input-output loading/saving. """ from typing import Any, List import yaml import pickle import json import pandas as pd class PrettySafeLoader(yaml.SafeLoader): """Custom loader for reading YAML files""" def construct_python_tuple(self, node): return tuple(self.construct_sequence(node)) PrettySafeLoader.add_constructor( u'tag:yaml.org,2002:python/tuple', PrettySafeLoader.construct_python_tuple ) def load_yml(path: str, loader_type: str = 'default'): """Read params from a yml file. Args: path (str): path to the .yml file loader_type (str, optional): type of loader used to load yml files. Defaults to 'default'. Returns: Any: object (typically dict) loaded from .yml file """ assert loader_type in ['default', 'safe'] loader = yaml.Loader if (loader_type == "default") else PrettySafeLoader with open(path, 'r') as f: data = yaml.load(f, Loader=loader) return data def save_yml(data: dict, path: str): """Save params in the given yml file path. Args: data (dict): data object to save path (str): path to .yml file to be saved """ with open(path, 'w') as f: yaml.dump(data, f, default_flow_style=False) def load_pkl(path: str, encoding: str = "ascii"): """Loads a .pkl file. Args: path (str): path to the .pkl file encoding (str, optional): encoding to use for loading. Defaults to "ascii". Returns: Any: unpickled object """ return pickle.load(open(path, "rb"), encoding=encoding) def save_pkl(data: Any, path: str) -> None: """Saves given object into .pkl file Args: data (Any): object to be saved path (str): path to the location to be saved at """ with open(path, 'wb') as f: pickle.dump(data, f) def load_json(path: str) -> dict: """Helper to load json file""" with open(path, 'rb') as f: data = json.load(f) return data def save_json(data: dict, path: str): """Helper to save `dict` as .json file.""" with open(path, 'w') as f: json.dump(data, f, indent=2) def load_txt(path: str): """Loads lines of a .txt file. Args: path (str): path to the .txt file Returns: List: lines of .txt file """ with open(path) as f: lines = f.read().splitlines() return lines def save_txt(data: dict, path: str): """Writes data (lines) to a txt file. Args: data (dict): List of strings path (str): path to .txt file """ assert isinstance(data, list) lines = "\n".join(data) with open(path, "w") as f: f.write(str(lines)) def read_spreadsheet(sheet_id, gid, url=None, drop_na=True, **kwargs): if url is None: BASE_URL = 'https://docs.google.com/spreadsheets/d/' url = BASE_URL + sheet_id + f'/export?gid={gid}&format=csv' df = pd.read_csv(url, **kwargs) if drop_na: # drop all rows which have atleast 1 NaN value df = df.dropna(axis=0) return df def load_midi(file, rate=16000): import pretty_midi assert file.endswith('.mid') pm = pretty_midi.PrettyMIDI(file) y = pm.synthesize(fs=rate) return y, rate def load_ptz(path): import gzip import torch with gzip.open(path, 'rb') as f: data = torch.load(f) return data def save_video(frames, path, fps=30): import imageio imageio.mimwrite(path, frames, fps=fps) def read_spreadsheet(sheet_id, gid, gid_key="granularity", **kwargs): BASE_URL = 'https://docs.google.com/spreadsheets/d/' df = df = pd.read_csv(BASE_URL + sheet_id + f'/export?gid={gid}&format=csv', **kwargs) return df def load_jsonl(file_path: str) -> list: """Load data from a JSONL file. Args: file_path (str): Path to the JSONL file Returns: list: List of dictionaries, where each dictionary is a JSON object from the file Example: >>> data = load_jsonl("path/to/file.jsonl") >>> print(data[0]) # Print first JSON object """ data = [] with open(file_path, 'r', encoding='utf-8') as f: for line in f: if line.strip(): # Skip empty lines data.append(json.loads(line)) return data def save_jsonl(data: list, file_path: str) -> None: """Save data to a JSONL file. Args: data (list): List of dictionaries to save file_path (str): Path where to save the JSONL file Example: >>> data = [{"text": "hello"}, {"text": "world"}] >>> save_jsonl(data, "output.jsonl") """ with open(file_path, 'w', encoding='utf-8') as f: for item in data: f.write(json.dumps(item) + '\n')