| import pickle | |
| import gzip | |
| def get_unique_words(corpus_filename): | |
| """ | |
| Get a list of unique words from a corpus file | |
| """ | |
| unique_words = set() | |
| with open(corpus_filename, 'r', encoding='utf-8') as file: | |
| for line in file: | |
| words = line.strip().split() | |
| unique_words.update(words) | |
| return list(unique_words) | |
| def save_compressed_word_list(words, filename): | |
| """ | |
| Save a list of words to a compressed file | |
| """ | |
| with gzip.open(filename, 'wb') as file: | |
| pickle.dump(words, file) | |
| def load_compressed_word_list(filename): | |
| """ | |
| Load a list of words from a compressed file | |
| """ | |
| with gzip.open(filename, 'rb') as file: | |
| return pickle.load(file) | |
| def get_autocomplete(input_word=" ", all_words=" "): | |
| """ | |
| Get a list of words that start with the input word | |
| """ | |
| return [word for word in all_words if word.startswith(input_word)] |