| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| import gzip |
| import json |
| import pickle |
| from pathlib import Path |
| from typing import Any, Union |
|
|
| import pandas as pd |
|
|
| from protenix.utils.torch_utils import map_values_to_list |
|
|
| PANDAS_NA_VALUES = [ |
| "", |
| "#N/A", |
| "#N/A N/A", |
| "#NA", |
| "-1.#IND", |
| "-1.#QNAN", |
| "-NaN", |
| "-nan", |
| "1.#IND", |
| "1.#QNAN", |
| "<NA>", |
| "N/A", |
| |
| "NULL", |
| "NaN", |
| "n/a", |
| "nan", |
| "null", |
| ] |
|
|
|
|
| def read_indices_csv(csv: Union[str, Path]) -> pd.DataFrame: |
| """ |
| Read a csv file without the content changing. |
| |
| Args: |
| csv (Union[str, Path]): A csv file path. |
| |
| Returns: |
| pd.DataFrame : A pandas DataFrame. |
| """ |
| df = pd.read_csv(csv, na_values=PANDAS_NA_VALUES, keep_default_na=False, dtype=str) |
| return df |
|
|
|
|
| def load_gzip_pickle(pkl: Union[str, Path]) -> Any: |
| """ |
| Load a gzip pickle file. |
| |
| Args: |
| pkl (Union[str, Path]): A gzip pickle file path. |
| |
| Returns: |
| Any: The loaded data. |
| """ |
| with gzip.open(pkl, "rb") as f: |
| data = pickle.load(f) |
| return data |
|
|
|
|
| def dump_gzip_pickle(data: Any, pkl: Union[str, Path]): |
| """ |
| Dump a gzip pickle file. |
| |
| Args: |
| data (Any): The data to be dumped. |
| pkl (Union[str, Path]): A gzip pickle file path. |
| """ |
| with gzip.open(pkl, "wb") as f: |
| pickle.dump(data, f) |
|
|
|
|
| def save_json(data: dict, output_fpath: Union[str, Path], indent: int = 4): |
| """ |
| Save a dictionary to a JSON file. |
| |
| Args: |
| data (dict): The dictionary to be saved. |
| output_fpath (Union[str, Path]): The output file path. |
| indent (int, optional): The indentation level for the JSON file. Defaults to 4. |
| """ |
| data_json = data.copy() |
| data_json = map_values_to_list(data_json) |
| with open(output_fpath, "w") as f: |
| if indent is not None: |
| json.dump(data_json, f, indent=indent) |
| else: |
| json.dump(data_json, f) |
|
|