File size: 5,028 Bytes
c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e 7ef0785 c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e 15e4f2e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e fc5e51e c20b69e 7ef0785 fc5e51e 7ef0785 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
class DFA:
# 5-tuple init
def __init__(
self, alphabets, states, transition_functions, initial_state, final_states
):
self.alphabets = alphabets
self.states = states
self.transitions = transition_functions
self.initial_state = initial_state
self.final_states = final_states
# Processes the input word and returns True if the word is accepted by the DFA
def is_accepting(self, input_string: str):
current_state = self.initial_state
for char in input_string:
current_state = self.transitions.get(current_state, {}).get(
char, len(self.states) - 1
)
return current_state in self.final_states
# Processes the input paragraph and returns a dictionary of accepted words and their positions
def check(self, paragraph: str):
# Reject empty input
if paragraph.strip() == "":
raise ValueError("Empty string provided")
# Normalize the input
paragraph = paragraph.lower().strip()
# Convert the paragraph to a list of characters
chars = list(paragraph)
current_word = ""
accepted_words: dict[str : list[tuple[int, int]]] = (
{}
) # returns: {word: [(start_index, end_index)]}
# Start and end index of the current word
word_start = 0
word_end = 0
# Traverse the characters in the paragraph
for i, char in enumerate(chars):
# If the character is in the alphabet, then it is part of the word
if char in self.alphabets:
current_word += char
word_end = i
# If the character is not in the alphabet or it is the last character in the paragraph
# then the current word is complete
# Check if the current word is accepted by the DFA
# If it is accepted, add it to the accepted_words dictionary
# Reset the current_word to an empty string
if char not in self.alphabets or i == len(chars) - 1:
if current_word != "":
if self.is_accepting(current_word):
accepted_words[current_word] = accepted_words.get(
current_word, []
) + [(word_start, word_end)]
current_word = ""
word_start = i + 1
return accepted_words
# Generate a DFA from a list of words
def generate_dfa(input_strings: list[str]) -> DFA:
alphabets = {
"a",
"b",
"c",
"d",
"e",
"f",
"g",
"h",
"i",
"j",
"k",
"l",
"m",
"n",
"o",
"p",
"q",
"r",
"s",
"t",
"u",
"v",
"w",
"x",
"y",
"z",
}
states = set([0])
transition_functions = {}
final_states = set()
for input_string in [
input_string.lower().strip() for input_string in input_strings
]:
current_state = 0
for i in input_string:
if i not in alphabets:
raise ValueError(f"Invalid character '{i}' in string '{input_string}'")
# Find the upcoming state based on the current state and the input character to reach the next state
upcoming_state = transition_functions.get(current_state)
if upcoming_state is not None:
upcoming_state = upcoming_state.get(i)
# Create a new state if the upcoming state is not found
if upcoming_state is None:
# The new state is the next integer after the maximum state in the set of states
upcoming_state = len(states)
# Add the new state to the set of states
states.add(upcoming_state)
# Add the transition into the transition functions dictionary
if current_state not in transition_functions:
transition_functions[current_state] = {}
transition_functions[current_state][i] = upcoming_state
# Move to the upcoming state
current_state = upcoming_state
# Add the final state to the set of final states
final_states.add(current_state)
# Add the trap state to the set of states
states.add(len(states))
# Add the trap state to the transition functions
for state in states:
if state not in transition_functions:
transition_functions[state] = {}
for alphabet in alphabets:
if alphabet not in transition_functions[state]:
transition_functions[state][alphabet] = len(states) - 1
return DFA(alphabets, states, transition_functions, 0, final_states)
dfa = generate_dfa(["and", "but", "or", "because"])
print(dfa.check("me and you."))
|