"""Unified HuggingFace dataset operations""" import json from pathlib import Path from datasets import Dataset, load_dataset class HFDataManager: """Handles all HuggingFace dataset loading and saving operations""" @staticmethod def load_from_hf(hf_repo): """Load data from HuggingFace dataset repository""" try: dataset = load_dataset(hf_repo, split="train") return dataset.to_list() except Exception: # Return empty list if dataset doesn't exist or can't be loaded return [] @staticmethod def push_to_hf(data, repo_id, private=True): """Push data to HuggingFace dataset repository""" dataset = Dataset.from_list(data) dataset.push_to_hub(repo_id, private=private) @staticmethod def save_to_jsonl(data, file_path): """Save data to local JSONL file""" file_path = Path(file_path) file_path.parent.mkdir(parents=True, exist_ok=True) with open(file_path, "w") as f: for item in data: f.write(json.dumps(item) + "\n") @staticmethod def load_from_jsonl(file_path): """Load data from local JSONL file""" data = [] with open(file_path, "r") as f: for line in f: data.append(json.loads(line.strip())) return data