File size: 1,360 Bytes
74e28e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def file_preprocess(file_name: str):
    """
    Preprocesses the file by:
    1. removing duplicates
    2. sorting the words
    3. removing non-alphabetic words
    """
    words = []
    alphabets = set("abcdefghijklmnopqrstuvwxyz")
    file = open(file_name, "r")
    words = file.read().splitlines()
    file.close()
    # print(
    #     sorted(
    #         list(
    #             set(
    #                 word
    #                 for word in words
    #                 if all(char.lower() in alphabets for char in word)
    #             )
    #         )
    #     )
    # )
    file = open(file_name, "w")
    file.write(
        "\n".join(
            sorted(
                list(
                    set(
                        word
                        for word in words
                        if all(char.lower() in alphabets for char in word)
                    )
                )
            )
        )
    )
    file.close()


def find_duplicates_in_files(file_names: list[str]):
    words = []
    for file_name in file_names:
        file = open(file_name, "r")
        words += file.read().splitlines()
        file.close()
    print(sorted(list(set([word for word in words if words.count(word) > 1]))))


file_preprocess("adjectives.txt")
# find_duplicates_in_files(["conjunctions.txt", "adverbs.txt", "adjectives.txt"])