Create preprocess_data.py
Browse files- preprocess_data.py +9 -0
preprocess_data.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import datasets
|
| 2 |
+
|
| 3 |
+
# 读取txt文件
|
| 4 |
+
with open('data/novel.txt', 'r', encoding='utf-8') as f:
|
| 5 |
+
lines = f.readlines()
|
| 6 |
+
|
| 7 |
+
data = {'text': lines}
|
| 8 |
+
dataset = datasets.Dataset.from_dict(data)
|
| 9 |
+
dataset.save_to_disk('models/processed_dataset')
|