File size: 755 Bytes
f432fa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import os, json
from typing import List, Dict, Any, Optional
from datasets import Dataset

def to_jsonl(records: List[Dict[str, Any]], path: str) -> None:
    os.makedirs(os.path.dirname(path), exist_ok=True)
    with open(path, "w", encoding="utf-8") as f:
        for r in records:
            f.write(json.dumps(r, ensure_ascii=False) + "\n")

def to_hf_dataset(records: List[Dict[str, Any]], save_to: Optional[str] = None,
                  push_repo: Optional[str] = None, private: bool = True, token: Optional[str] = None):
    ds = Dataset.from_list(records)
    if save_to:
        os.makedirs(save_to, exist_ok=True)
        ds.save_to_disk(save_to)
    if push_repo:
        ds.push_to_hub(push_repo, private=private, token=token)
    return ds