Spaces:
Sleeping
Sleeping
Delete load_texts.py
Browse files- load_texts.py +0 -17
load_texts.py
DELETED
|
@@ -1,17 +0,0 @@
|
|
| 1 |
-
def load_texts(filename):
|
| 2 |
-
"""
|
| 3 |
-
This function loads all texts from the specified directory, ignoring any files with "test" in their name. The text is used for "training" the tokenizer. Since our tokenizer is simple, we don't need to do any training, but we still need to ignore the test data.
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
# texts = []
|
| 7 |
-
# files = os.listdir(directory)
|
| 8 |
-
# for filename in files:
|
| 9 |
-
# if "test" in filename: ## don't "read test files"
|
| 10 |
-
# continue
|
| 11 |
-
# with open(os.path.join(directory, filename), 'r', encoding='utf-8') as file:
|
| 12 |
-
# texts.append(file.read())
|
| 13 |
-
# return texts
|
| 14 |
-
|
| 15 |
-
with open(filename, 'r', encoding='utf-8') as file:
|
| 16 |
-
for line in file:
|
| 17 |
-
yield line.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|