File size: 2,262 Bytes
66242b8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | from __future__ import annotations
from typing import Any
from pathlib import Path
import json
import pickle
import numpy as np
import pandas as pd
from scipy import sparse
# default JSON format
def _json_default(obj):
if isinstance(obj, (np.integer, np.int64, np.int32)):
return int(obj)
if isinstance(obj, (np.floating, np.float64, np.float32)):
return float(obj)
if isinstance(obj, np.ndarray):
return obj.tolist()
if isinstance(obj, Path):
return str(obj)
return str(obj)
# for config
def save_json(obj, path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
json.dump(obj, f, indent=2, ensure_ascii=False, default=_json_default)
# for summary dataframe
def save_dataframe(df: pd.DataFrame, path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(path, index=False)
# saving dataframe for each splot separately
def save_split_frames(split_dict: dict[str, pd.DataFrame], out_dir: Path, suffix: str) -> None:
out_dir.mkdir(parents=True, exist_ok=True)
for split, df in split_dict.items():
df.to_parquet(out_dir / f"{split}_{suffix}.parquet", index=False)
def save_pickle(obj, path: Path) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with open(path, "wb") as f:
pickle.dump(obj, f)
def save_array_dict(array_dict: dict[str, np.ndarray], out_dir: Path, suffix: str) -> None:
out_dir.mkdir(parents=True, exist_ok=True)
for name, arr in array_dict.items():
np.save(out_dir / f"{name}_{suffix}.npy", np.asarray(arr))
def save_matrix_dict(matrix_dict: dict[str, object], out_dir: Path, suffix: str) -> None:
out_dir.mkdir(parents=True, exist_ok=True)
for split, matrix in matrix_dict.items():
if sparse.issparse(matrix):
sparse.save_npz(out_dir / f"{split}_{suffix}.npz", matrix)
else:
np.save(out_dir / f"{split}_{suffix}.npy", np.asarray(matrix))
def load_json(path: Path) -> Any:
with path.open("r", encoding="utf-8") as handle:
return json.load(handle)
def load_pickle(path: Path) -> Any:
with path.open("rb") as handle:
return pickle.load(handle) |