SoulX-Singer / soulxsinger /utils /file_utils.py
Xinsheng-Wang's picture
Upload folder using huggingface_hub
c7f3ffb verified
"""
Description:
This script contains a collection of functions designed to handle various
file reading and writing operations. It provides utilities to read from files,
write data to files, and perform file manipulation tasks.
"""
import os
import json
from tqdm import tqdm
from typing import List, Dict
from pathlib import Path
from omegaconf import OmegaConf, DictConfig
def write_jsonl(metadata: List[dict], file_path: Path):
"""Writes a list of dictionaries to a JSONL file.
Args:
metadata : List[dict]
A list of dictionaries, each representing a piece of meta.
file_path : Path
The file path to save the JSONL file
This function writes each dictionary in the list to a new line in the specified file.
"""
with open(file_path, "w", encoding="utf-8") as f:
for meta in tqdm(metadata, desc="writing jsonl"):
# Convert dictionary to JSON string and write it to the file with a newline
json_str = json.dumps(meta, ensure_ascii=False) + "\n"
f.write(json_str)
print(f"jsonl saved to {file_path}")
def read_jsonl(file_path: Path) -> List[dict]:
"""
Reads a JSONL file and returns a list of dictionaries.
Args:
file_path : Path
The path to the JSONL file to be read.
Returns:
List[dict]
A list of dictionaries parsed from each line of the JSONL file.
"""
metadata = []
# Open the file for reading
with open(file_path, "r", encoding="utf-8") as f:
# Split the file into lines
lines = f.read().splitlines()
# Process each line
for line in lines:
# Convert JSON string back to dictionary and append to list
meta = json.loads(line)
metadata.append(meta)
# Return the list of metadata
return metadata
def load_config(config_path: Path) -> DictConfig:
"""Loads a configuration file and optionally merges it with a base configuration.
Args:
config_path (Path): Path to the configuration file.
"""
# Load the initial configuration from the given path
config = OmegaConf.load(config_path)
# Check if there is a base configuration specified and merge if necessary
if config.get("base_config", None) is not None:
base_config = OmegaConf.load(config["base_config"])
config = OmegaConf.merge(base_config, config)
return config