docs(dfa): add dfa comments and minor refactor
Browse files
main.py
CHANGED
|
@@ -1,18 +1,21 @@
|
|
| 1 |
# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
|
| 2 |
class DFA:
|
| 3 |
# 5-tuple init
|
| 4 |
-
def __init__(
|
| 5 |
-
self
|
|
|
|
|
|
|
| 6 |
self.states = states
|
| 7 |
-
self.transitions =
|
| 8 |
-
self.
|
| 9 |
self.final_states = final_states
|
| 10 |
|
|
|
|
| 11 |
def is_accepting(self, input_string: str):
|
| 12 |
-
current_state = self.
|
| 13 |
|
| 14 |
for char in input_string:
|
| 15 |
-
if char not in self.
|
| 16 |
return False
|
| 17 |
|
| 18 |
current_state = self.transitions.get(current_state, {}).get(char, None)
|
|
@@ -21,24 +24,40 @@ class DFA:
|
|
| 21 |
|
| 22 |
return current_state in self.final_states
|
| 23 |
|
|
|
|
| 24 |
def check(self, paragraph: str):
|
|
|
|
| 25 |
if paragraph.strip() == "":
|
| 26 |
raise ValueError("Empty string provided")
|
|
|
|
|
|
|
| 27 |
paragraph = paragraph.lower().strip()
|
| 28 |
|
|
|
|
| 29 |
chars = list(paragraph)
|
|
|
|
| 30 |
current_word = ""
|
| 31 |
accepted_words: dict[list[tuple[int, int]]] = (
|
| 32 |
{}
|
| 33 |
-
) # {word: [(start_index, end_index)]}
|
|
|
|
|
|
|
| 34 |
word_start = 0
|
| 35 |
word_end = 0
|
|
|
|
|
|
|
| 36 |
for i, char in enumerate(chars):
|
| 37 |
-
|
|
|
|
| 38 |
current_word += char
|
| 39 |
word_end = i
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
if current_word != "":
|
| 43 |
if self.is_accepting(current_word):
|
| 44 |
accepted_words[current_word] = accepted_words.get(
|
|
@@ -50,8 +69,9 @@ class DFA:
|
|
| 50 |
return accepted_words
|
| 51 |
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
|
|
|
| 55 |
"a",
|
| 56 |
"b",
|
| 57 |
"c",
|
|
@@ -80,31 +100,40 @@ def generate_dfa(words: list[str]) -> DFA:
|
|
| 80 |
"z",
|
| 81 |
}
|
| 82 |
states = set([0])
|
| 83 |
-
|
| 84 |
-
start_state = 0
|
| 85 |
final_states = set()
|
| 86 |
|
| 87 |
-
for
|
|
|
|
|
|
|
|
|
|
| 88 |
current_state = 0
|
| 89 |
-
for char in word:
|
| 90 |
-
# Get the next state in the DFA of the current state based on the character available for transition
|
| 91 |
-
next_state = transitions.get(current_state, {}).get(char, None)
|
| 92 |
-
|
| 93 |
-
# If the next state is not in the DFA, then create one for it.
|
| 94 |
-
if next_state is None:
|
| 95 |
-
next_state = len(states)
|
| 96 |
-
transitions.setdefault(current_state, {})[char] = next_state
|
| 97 |
-
states.add(next_state)
|
| 98 |
-
current_state = next_state
|
| 99 |
-
final_states.add(current_state)
|
| 100 |
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
|
|
|
| 108 |
|
| 109 |
-
|
| 110 |
-
print(dfa.check("...and the and the and ... and"))
|
|
|
|
| 1 |
# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
|
| 2 |
class DFA:
|
| 3 |
# 5-tuple init
|
| 4 |
+
def __init__(
|
| 5 |
+
self, alphabets, states, transition_functions, initial_state, final_states
|
| 6 |
+
):
|
| 7 |
+
self.alphabets = alphabets
|
| 8 |
self.states = states
|
| 9 |
+
self.transitions = transition_functions
|
| 10 |
+
self.initial_state = initial_state
|
| 11 |
self.final_states = final_states
|
| 12 |
|
| 13 |
+
# Processes the input word and returns True if the word is accepted by the DFA
|
| 14 |
def is_accepting(self, input_string: str):
|
| 15 |
+
current_state = self.initial_state
|
| 16 |
|
| 17 |
for char in input_string:
|
| 18 |
+
if char not in self.alphabets:
|
| 19 |
return False
|
| 20 |
|
| 21 |
current_state = self.transitions.get(current_state, {}).get(char, None)
|
|
|
|
| 24 |
|
| 25 |
return current_state in self.final_states
|
| 26 |
|
| 27 |
+
# Processes the input paragraph and returns a dictionary of accepted words and their positions
|
| 28 |
def check(self, paragraph: str):
|
| 29 |
+
# Reject empty input
|
| 30 |
if paragraph.strip() == "":
|
| 31 |
raise ValueError("Empty string provided")
|
| 32 |
+
|
| 33 |
+
# Normalize the input
|
| 34 |
paragraph = paragraph.lower().strip()
|
| 35 |
|
| 36 |
+
# Convert the paragraph to a list of characters
|
| 37 |
chars = list(paragraph)
|
| 38 |
+
|
| 39 |
current_word = ""
|
| 40 |
accepted_words: dict[list[tuple[int, int]]] = (
|
| 41 |
{}
|
| 42 |
+
) # returns: {word: [(start_index, end_index)]}
|
| 43 |
+
|
| 44 |
+
# Start and end index of the current word
|
| 45 |
word_start = 0
|
| 46 |
word_end = 0
|
| 47 |
+
|
| 48 |
+
# Traverse the characters in the paragraph
|
| 49 |
for i, char in enumerate(chars):
|
| 50 |
+
# If the character is in the alphabet, then it is part of the word
|
| 51 |
+
if char in self.alphabets:
|
| 52 |
current_word += char
|
| 53 |
word_end = i
|
| 54 |
|
| 55 |
+
# If the character is not in the alphabet or it is the last character in the paragraph
|
| 56 |
+
# then the current word is complete
|
| 57 |
+
# Check if the current word is accepted by the DFA
|
| 58 |
+
# If it is accepted, add it to the accepted_words dictionary
|
| 59 |
+
# Reset the current_word to an empty string
|
| 60 |
+
if char not in self.alphabets or i == len(chars) - 1:
|
| 61 |
if current_word != "":
|
| 62 |
if self.is_accepting(current_word):
|
| 63 |
accepted_words[current_word] = accepted_words.get(
|
|
|
|
| 69 |
return accepted_words
|
| 70 |
|
| 71 |
|
| 72 |
+
# Generate a DFA from a list of words
|
| 73 |
+
def generate_dfa(input_strings: list[str]) -> DFA:
|
| 74 |
+
alphabets = {
|
| 75 |
"a",
|
| 76 |
"b",
|
| 77 |
"c",
|
|
|
|
| 100 |
"z",
|
| 101 |
}
|
| 102 |
states = set([0])
|
| 103 |
+
transition_functions = {}
|
|
|
|
| 104 |
final_states = set()
|
| 105 |
|
| 106 |
+
for input_string in [
|
| 107 |
+
input_string.lower().strip() for input_string in input_strings
|
| 108 |
+
]:
|
| 109 |
+
|
| 110 |
current_state = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
for i in input_string:
|
| 113 |
+
|
| 114 |
+
if i not in alphabets:
|
| 115 |
+
raise ValueError(f"Invalid character '{i}' in string '{input_string}'")
|
| 116 |
+
|
| 117 |
+
# Find the upcoming state based on the current state and the input character to reach the next state
|
| 118 |
+
upcoming_state = transition_functions.get(current_state)
|
| 119 |
+
if upcoming_state is not None:
|
| 120 |
+
upcoming_state = upcoming_state.get(i)
|
| 121 |
|
| 122 |
+
# Create a new state if the upcoming state is not found
|
| 123 |
+
if upcoming_state is None:
|
| 124 |
+
# The new state is the next integer after the maximum state in the set of states
|
| 125 |
+
upcoming_state = len(states)
|
| 126 |
+
# Add the new state to the set of states
|
| 127 |
+
states.add(upcoming_state)
|
| 128 |
+
# Add the transition into the transition functions dictionary
|
| 129 |
+
if current_state not in transition_functions:
|
| 130 |
+
transition_functions[current_state] = {}
|
| 131 |
+
transition_functions[current_state][i] = upcoming_state
|
| 132 |
|
| 133 |
+
# Move to the upcoming state
|
| 134 |
+
current_state = upcoming_state
|
| 135 |
+
|
| 136 |
+
# Add the final state to the set of final states
|
| 137 |
+
final_states.add(current_state)
|
| 138 |
|
| 139 |
+
return DFA(alphabets, states, transition_functions, 0, final_states)
|
|
|