Spaces:
Sleeping
Sleeping
| """Unified HuggingFace dataset operations""" | |
| import json | |
| from pathlib import Path | |
| from datasets import Dataset, load_dataset | |
| class HFDataManager: | |
| """Handles all HuggingFace dataset loading and saving operations""" | |
| def load_from_hf(hf_repo): | |
| """Load data from HuggingFace dataset repository""" | |
| try: | |
| dataset = load_dataset(hf_repo, split="train") | |
| return dataset.to_list() | |
| except Exception: | |
| # Return empty list if dataset doesn't exist or can't be loaded | |
| return [] | |
| def push_to_hf(data, repo_id, private=True): | |
| """Push data to HuggingFace dataset repository""" | |
| dataset = Dataset.from_list(data) | |
| dataset.push_to_hub(repo_id, private=private) | |
| def save_to_jsonl(data, file_path): | |
| """Save data to local JSONL file""" | |
| file_path = Path(file_path) | |
| file_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(file_path, "w") as f: | |
| for item in data: | |
| f.write(json.dumps(item) + "\n") | |
| def load_from_jsonl(file_path): | |
| """Load data from local JSONL file""" | |
| data = [] | |
| with open(file_path, "r") as f: | |
| for line in f: | |
| data.append(json.loads(line.strip())) | |
| return data |