File size: 5,028 Bytes
c20b69e
 
 
fc5e51e
 
 
 
c20b69e
fc5e51e
 
c20b69e
 
fc5e51e
c20b69e
fc5e51e
c20b69e
 
7ef0785
 
 
c20b69e
 
 
fc5e51e
c20b69e
fc5e51e
c20b69e
 
fc5e51e
 
c20b69e
 
fc5e51e
c20b69e
fc5e51e
c20b69e
15e4f2e
c20b69e
fc5e51e
 
 
c20b69e
 
fc5e51e
 
c20b69e
fc5e51e
 
c20b69e
 
 
fc5e51e
 
 
 
 
 
c20b69e
 
 
 
 
 
 
 
 
 
 
fc5e51e
 
 
c20b69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc5e51e
c20b69e
 
fc5e51e
 
 
 
c20b69e
 
fc5e51e
 
 
 
 
 
 
 
 
c20b69e
fc5e51e
 
 
 
 
 
 
 
 
 
c20b69e
fc5e51e
 
 
 
 
c20b69e
7ef0785
 
 
 
 
 
 
 
 
 
 
fc5e51e
7ef0785
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
class DFA:
    # 5-tuple init
    def __init__(
        self, alphabets, states, transition_functions, initial_state, final_states
    ):
        self.alphabets = alphabets
        self.states = states
        self.transitions = transition_functions
        self.initial_state = initial_state
        self.final_states = final_states

    # Processes the input word and returns True if the word is accepted by the DFA
    def is_accepting(self, input_string: str):
        current_state = self.initial_state

        for char in input_string:
            current_state = self.transitions.get(current_state, {}).get(
                char, len(self.states) - 1
            )

        return current_state in self.final_states

    # Processes the input paragraph and returns a dictionary of accepted words and their positions
    def check(self, paragraph: str):
        # Reject empty input
        if paragraph.strip() == "":
            raise ValueError("Empty string provided")

        # Normalize the input
        paragraph = paragraph.lower().strip()

        # Convert the paragraph to a list of characters
        chars = list(paragraph)

        current_word = ""
        accepted_words: dict[str : list[tuple[int, int]]] = (
            {}
        )  # returns: {word: [(start_index, end_index)]}

        # Start and end index of the current word
        word_start = 0
        word_end = 0

        # Traverse the characters in the paragraph
        for i, char in enumerate(chars):
            # If the character is in the alphabet, then it is part of the word
            if char in self.alphabets:
                current_word += char
                word_end = i

            # If the character is not in the alphabet or it is the last character in the paragraph
            # then the current word is complete
            # Check if the current word is accepted by the DFA
            # If it is accepted, add it to the accepted_words dictionary
            # Reset the current_word to an empty string
            if char not in self.alphabets or i == len(chars) - 1:
                if current_word != "":
                    if self.is_accepting(current_word):
                        accepted_words[current_word] = accepted_words.get(
                            current_word, []
                        ) + [(word_start, word_end)]
                    current_word = ""
                word_start = i + 1

        return accepted_words


# Generate a DFA from a list of words
def generate_dfa(input_strings: list[str]) -> DFA:
    alphabets = {
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
    }
    states = set([0])
    transition_functions = {}
    final_states = set()

    for input_string in [
        input_string.lower().strip() for input_string in input_strings
    ]:

        current_state = 0

        for i in input_string:

            if i not in alphabets:
                raise ValueError(f"Invalid character '{i}' in string '{input_string}'")

            # Find the upcoming state based on the current state and the input character to reach the next state
            upcoming_state = transition_functions.get(current_state)
            if upcoming_state is not None:
                upcoming_state = upcoming_state.get(i)

            # Create a new state if the upcoming state is not found
            if upcoming_state is None:
                # The new state is the next integer after the maximum state in the set of states
                upcoming_state = len(states)
                # Add the new state to the set of states
                states.add(upcoming_state)
                # Add the transition into the transition functions dictionary
                if current_state not in transition_functions:
                    transition_functions[current_state] = {}
                transition_functions[current_state][i] = upcoming_state

            # Move to the upcoming state
            current_state = upcoming_state

        # Add the final state to the set of final states
        final_states.add(current_state)

    # Add the trap state to the set of states
    states.add(len(states))

    # Add the trap state to the transition functions
    for state in states:
        if state not in transition_functions:
            transition_functions[state] = {}
        for alphabet in alphabets:
            if alphabet not in transition_functions[state]:
                transition_functions[state][alphabet] = len(states) - 1

    return DFA(alphabets, states, transition_functions, 0, final_states)


dfa = generate_dfa(["and", "but", "or", "because"])
print(dfa.check("me and you."))