Spaces:

DarylT01
/

dfa-ui

Sleeping

File size: 5,028 Bytes

c20b69e
 
 
fc5e51e
 
 
 
c20b69e
fc5e51e
 
c20b69e
 
fc5e51e
c20b69e
fc5e51e
c20b69e
 
7ef0785
 
 
c20b69e
 
 
fc5e51e
c20b69e
fc5e51e
c20b69e
 
fc5e51e
 
c20b69e
 
fc5e51e
c20b69e
fc5e51e
c20b69e
15e4f2e
c20b69e
fc5e51e
 
 
c20b69e
 
fc5e51e
 
c20b69e
fc5e51e
 
c20b69e
 
 
fc5e51e
 
 
 
 
 
c20b69e
 
 
 
 
 
 
 
 
 
 
fc5e51e
 
 
c20b69e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc5e51e
c20b69e
 
fc5e51e
 
 
 
c20b69e
 
fc5e51e
 
 
 
 
 
 
 
 
c20b69e
fc5e51e
 
 
 
 
 
 
 
 
 
c20b69e
fc5e51e
 
 
 
 
c20b69e
7ef0785
 
 
 
 
 
 
 
 
 
 
fc5e51e
7ef0785

# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
class DFA:
    # 5-tuple init
    def __init__(
        self, alphabets, states, transition_functions, initial_state, final_states
    ):
        self.alphabets = alphabets
        self.states = states
        self.transitions = transition_functions
        self.initial_state = initial_state
        self.final_states = final_states

    # Processes the input word and returns True if the word is accepted by the DFA
    def is_accepting(self, input_string: str):
        current_state = self.initial_state

        for char in input_string:
            current_state = self.transitions.get(current_state, {}).get(
                char, len(self.states) - 1
            )

        return current_state in self.final_states

    # Processes the input paragraph and returns a dictionary of accepted words and their positions
    def check(self, paragraph: str):
        # Reject empty input
        if paragraph.strip() == "":
            raise ValueError("Empty string provided")

        # Normalize the input
        paragraph = paragraph.lower().strip()

        # Convert the paragraph to a list of characters
        chars = list(paragraph)

        current_word = ""
        accepted_words: dict[str : list[tuple[int, int]]] = (
            {}
        )  # returns: {word: [(start_index, end_index)]}

        # Start and end index of the current word
        word_start = 0
        word_end = 0

        # Traverse the characters in the paragraph
        for i, char in enumerate(chars):
            # If the character is in the alphabet, then it is part of the word
            if char in self.alphabets:
                current_word += char
                word_end = i

            # If the character is not in the alphabet or it is the last character in the paragraph
            # then the current word is complete
            # Check if the current word is accepted by the DFA
            # If it is accepted, add it to the accepted_words dictionary
            # Reset the current_word to an empty string
            if char not in self.alphabets or i == len(chars) - 1:
                if current_word != "":
                    if self.is_accepting(current_word):
                        accepted_words[current_word] = accepted_words.get(
                            current_word, []
                        ) + [(word_start, word_end)]
                    current_word = ""
                word_start = i + 1

        return accepted_words


# Generate a DFA from a list of words
def generate_dfa(input_strings: list[str]) -> DFA:
    alphabets = {
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
    }
    states = set([0])
    transition_functions = {}
    final_states = set()

    for input_string in [
        input_string.lower().strip() for input_string in input_strings
    ]:

        current_state = 0

        for i in input_string:

            if i not in alphabets:
                raise ValueError(f"Invalid character '{i}' in string '{input_string}'")

            # Find the upcoming state based on the current state and the input character to reach the next state
            upcoming_state = transition_functions.get(current_state)
            if upcoming_state is not None:
                upcoming_state = upcoming_state.get(i)

            # Create a new state if the upcoming state is not found
            if upcoming_state is None:
                # The new state is the next integer after the maximum state in the set of states
                upcoming_state = len(states)
                # Add the new state to the set of states
                states.add(upcoming_state)
                # Add the transition into the transition functions dictionary
                if current_state not in transition_functions:
                    transition_functions[current_state] = {}
                transition_functions[current_state][i] = upcoming_state

            # Move to the upcoming state
            current_state = upcoming_state

        # Add the final state to the set of final states
        final_states.add(current_state)

    # Add the trap state to the set of states
    states.add(len(states))

    # Add the trap state to the transition functions
    for state in states:
        if state not in transition_functions:
            transition_functions[state] = {}
        for alphabet in alphabets:
            if alphabet not in transition_functions[state]:
                transition_functions[state][alphabet] = len(states) - 1

    return DFA(alphabets, states, transition_functions, 0, final_states)


dfa = generate_dfa(["and", "but", "or", "because"])
print(dfa.check("me and you."))