File size: 1,360 Bytes
74e28e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
def file_preprocess(file_name: str):
"""
Preprocesses the file by:
1. removing duplicates
2. sorting the words
3. removing non-alphabetic words
"""
words = []
alphabets = set("abcdefghijklmnopqrstuvwxyz")
file = open(file_name, "r")
words = file.read().splitlines()
file.close()
# print(
# sorted(
# list(
# set(
# word
# for word in words
# if all(char.lower() in alphabets for char in word)
# )
# )
# )
# )
file = open(file_name, "w")
file.write(
"\n".join(
sorted(
list(
set(
word
for word in words
if all(char.lower() in alphabets for char in word)
)
)
)
)
)
file.close()
def find_duplicates_in_files(file_names: list[str]):
words = []
for file_name in file_names:
file = open(file_name, "r")
words += file.read().splitlines()
file.close()
print(sorted(list(set([word for word in words if words.count(word) > 1]))))
file_preprocess("adjectives.txt")
# find_duplicates_in_files(["conjunctions.txt", "adverbs.txt", "adjectives.txt"])
|