with open("training_data/corpus.txt", "r", encoding="utf-8") as file: corpus = file.read()