# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
class DFA:
    # 5-tuple init
    def __init__(
        self, alphabets, states, transition_functions, initial_state, final_states
    ):
        self.alphabets = alphabets
        self.states = states
        self.transitions = transition_functions
        self.initial_state = initial_state
        self.final_states = final_states

    # Processes the input word and returns True if the word is accepted by the DFA
    def is_accepting(self, input_string: str):
        current_state = self.initial_state

        for char in input_string:
            current_state = self.transitions.get(current_state, {}).get(
                char, len(self.states) - 1
            )

        return current_state in self.final_states

    # Processes the input paragraph and returns a dictionary of accepted words and their positions
    def check(self, paragraph: str):
        # Reject empty input
        if paragraph.strip() == "":
            raise ValueError("Empty string provided")

        # Normalize the input
        paragraph = paragraph.lower().strip()

        # Convert the paragraph to a list of characters
        chars = list(paragraph)

        current_word = ""
        accepted_words: dict[str : list[tuple[int, int]]] = (
            {}
        )  # returns: {word: [(start_index, end_index)]}

        # Start and end index of the current word
        word_start = 0
        word_end = 0

        # Traverse the characters in the paragraph
        for i, char in enumerate(chars):
            # If the character is in the alphabet, then it is part of the word
            if char in self.alphabets:
                current_word += char
                word_end = i

            # If the character is not in the alphabet or it is the last character in the paragraph
            # then the current word is complete
            # Check if the current word is accepted by the DFA
            # If it is accepted, add it to the accepted_words dictionary
            # Reset the current_word to an empty string
            if char not in self.alphabets or i == len(chars) - 1:
                if current_word != "":
                    if self.is_accepting(current_word):
                        accepted_words[current_word] = accepted_words.get(
                            current_word, []
                        ) + [(word_start, word_end)]
                    current_word = ""
                word_start = i + 1

        return accepted_words


# Generate a DFA from a list of words
def generate_dfa(input_strings: list[str]) -> DFA:
    alphabets = {
        "a",
        "b",
        "c",
        "d",
        "e",
        "f",
        "g",
        "h",
        "i",
        "j",
        "k",
        "l",
        "m",
        "n",
        "o",
        "p",
        "q",
        "r",
        "s",
        "t",
        "u",
        "v",
        "w",
        "x",
        "y",
        "z",
    }
    states = set([0])
    transition_functions = {}
    final_states = set()

    for input_string in [
        input_string.lower().strip() for input_string in input_strings
    ]:

        current_state = 0

        for i in input_string:

            if i not in alphabets:
                raise ValueError(f"Invalid character '{i}' in string '{input_string}'")

            # Find the upcoming state based on the current state and the input character to reach the next state
            upcoming_state = transition_functions.get(current_state)
            if upcoming_state is not None:
                upcoming_state = upcoming_state.get(i)

            # Create a new state if the upcoming state is not found
            if upcoming_state is None:
                # The new state is the next integer after the maximum state in the set of states
                upcoming_state = len(states)
                # Add the new state to the set of states
                states.add(upcoming_state)
                # Add the transition into the transition functions dictionary
                if current_state not in transition_functions:
                    transition_functions[current_state] = {}
                transition_functions[current_state][i] = upcoming_state

            # Move to the upcoming state
            current_state = upcoming_state

        # Add the final state to the set of final states
        final_states.add(current_state)

    # Add the trap state to the set of states
    states.add(len(states))

    # Add the trap state to the transition functions
    for state in states:
        if state not in transition_functions:
            transition_functions[state] = {}
        for alphabet in alphabets:
            if alphabet not in transition_functions[state]:
                transition_functions[state][alphabet] = len(states) - 1

    return DFA(alphabets, states, transition_functions, 0, final_states)


dfa = generate_dfa(["and", "but", "or", "because"])
print(dfa.check("me and you."))