DialogueExtractor / exporters.py
Mfischthal's picture
Upload 7 files
f432fa9 verified
raw
history blame contribute delete
755 Bytes
import os, json
from typing import List, Dict, Any, Optional
from datasets import Dataset
def to_jsonl(records: List[Dict[str, Any]], path: str) -> None:
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
for r in records:
f.write(json.dumps(r, ensure_ascii=False) + "\n")
def to_hf_dataset(records: List[Dict[str, Any]], save_to: Optional[str] = None,
push_repo: Optional[str] = None, private: bool = True, token: Optional[str] = None):
ds = Dataset.from_list(records)
if save_to:
os.makedirs(save_to, exist_ok=True)
ds.save_to_disk(save_to)
if push_repo:
ds.push_to_hub(push_repo, private=private, token=token)
return ds