| from __future__ import annotations |
|
|
| from typing import Any |
| from pathlib import Path |
| import json |
| import pickle |
|
|
| import numpy as np |
| import pandas as pd |
| from scipy import sparse |
|
|
| |
| def _json_default(obj): |
| if isinstance(obj, (np.integer, np.int64, np.int32)): |
| return int(obj) |
| if isinstance(obj, (np.floating, np.float64, np.float32)): |
| return float(obj) |
| if isinstance(obj, np.ndarray): |
| return obj.tolist() |
| if isinstance(obj, Path): |
| return str(obj) |
| return str(obj) |
|
|
| |
| def save_json(obj, path: Path) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| with open(path, "w", encoding="utf-8") as f: |
| json.dump(obj, f, indent=2, ensure_ascii=False, default=_json_default) |
| |
|
|
| |
| def save_dataframe(df: pd.DataFrame, path: Path) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| df.to_csv(path, index=False) |
|
|
| |
| def save_split_frames(split_dict: dict[str, pd.DataFrame], out_dir: Path, suffix: str) -> None: |
| out_dir.mkdir(parents=True, exist_ok=True) |
| for split, df in split_dict.items(): |
| df.to_parquet(out_dir / f"{split}_{suffix}.parquet", index=False) |
|
|
|
|
|
|
| def save_pickle(obj, path: Path) -> None: |
| path.parent.mkdir(parents=True, exist_ok=True) |
| with open(path, "wb") as f: |
| pickle.dump(obj, f) |
|
|
| def save_array_dict(array_dict: dict[str, np.ndarray], out_dir: Path, suffix: str) -> None: |
| out_dir.mkdir(parents=True, exist_ok=True) |
| for name, arr in array_dict.items(): |
| np.save(out_dir / f"{name}_{suffix}.npy", np.asarray(arr)) |
|
|
| def save_matrix_dict(matrix_dict: dict[str, object], out_dir: Path, suffix: str) -> None: |
| out_dir.mkdir(parents=True, exist_ok=True) |
| for split, matrix in matrix_dict.items(): |
| if sparse.issparse(matrix): |
| sparse.save_npz(out_dir / f"{split}_{suffix}.npz", matrix) |
| else: |
| np.save(out_dir / f"{split}_{suffix}.npy", np.asarray(matrix)) |
|
|
|
|
|
|
| def load_json(path: Path) -> Any: |
| with path.open("r", encoding="utf-8") as handle: |
| return json.load(handle) |
| |
| def load_pickle(path: Path) -> Any: |
| with path.open("rb") as handle: |
| return pickle.load(handle) |