Spaces:
Sleeping
Sleeping
File size: 1,387 Bytes
57be184 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
"""Unified HuggingFace dataset operations"""
import json
from pathlib import Path
from datasets import Dataset, load_dataset
class HFDataManager:
"""Handles all HuggingFace dataset loading and saving operations"""
@staticmethod
def load_from_hf(hf_repo):
"""Load data from HuggingFace dataset repository"""
try:
dataset = load_dataset(hf_repo, split="train")
return dataset.to_list()
except Exception:
# Return empty list if dataset doesn't exist or can't be loaded
return []
@staticmethod
def push_to_hf(data, repo_id, private=True):
"""Push data to HuggingFace dataset repository"""
dataset = Dataset.from_list(data)
dataset.push_to_hub(repo_id, private=private)
@staticmethod
def save_to_jsonl(data, file_path):
"""Save data to local JSONL file"""
file_path = Path(file_path)
file_path.parent.mkdir(parents=True, exist_ok=True)
with open(file_path, "w") as f:
for item in data:
f.write(json.dumps(item) + "\n")
@staticmethod
def load_from_jsonl(file_path):
"""Load data from local JSONL file"""
data = []
with open(file_path, "r") as f:
for line in f:
data.append(json.loads(line.strip()))
return data |