with open('data.txt', 'r') as file: corpus = file.read()