edwjin commited on
Commit
eb20058
·
verified ·
1 Parent(s): 6457a8e

Delete load_texts.py

Browse files
Files changed (1) hide show
  1. load_texts.py +0 -17
load_texts.py DELETED
@@ -1,17 +0,0 @@
1
- def load_texts(filename):
2
- """
3
- This function loads all texts from the specified directory, ignoring any files with "test" in their name. The text is used for "training" the tokenizer. Since our tokenizer is simple, we don't need to do any training, but we still need to ignore the test data.
4
- """
5
-
6
- # texts = []
7
- # files = os.listdir(directory)
8
- # for filename in files:
9
- # if "test" in filename: ## don't "read test files"
10
- # continue
11
- # with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
12
- # texts.append(file.read())
13
- # return texts
14
-
15
- with open(filename, 'r', encoding='utf-8') as file:
16
- for line in file:
17
- yield line.strip()