73aa68f
1
2
3
4
5
6
7
8
9
import datasets # 读取txt文件 with open('data/novel.txt', 'r', encoding='utf-8') as f: lines = f.readlines() data = {'text': lines} dataset = datasets.Dataset.from_dict(data) dataset.save_to_disk('models/processed_dataset')