jljiu commited on
Commit
73aa68f
·
verified ·
1 Parent(s): e974fae

Create preprocess_data.py

Browse files
Files changed (1) hide show
  1. preprocess_data.py +9 -0
preprocess_data.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+
3
+ # 读取txt文件
4
+ with open('data/novel.txt', 'r', encoding='utf-8') as f:
5
+ lines = f.readlines()
6
+
7
+ data = {'text': lines}
8
+ dataset = datasets.Dataset.from_dict(data)
9
+ dataset.save_to_disk('models/processed_dataset')