DarylT01 commited on
Commit
fc5e51e
·
1 Parent(s): 73cf5f3

docs(dfa): add dfa comments and minor refactor

Browse files
Files changed (1) hide show
  1. main.py +61 -32
main.py CHANGED
@@ -1,18 +1,21 @@
1
  # References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
2
  class DFA:
3
  # 5-tuple init
4
- def __init__(self, alphabet, states, transitions, start_state, final_states):
5
- self.alphabet = alphabet
 
 
6
  self.states = states
7
- self.transitions = transitions
8
- self.start_state = start_state
9
  self.final_states = final_states
10
 
 
11
  def is_accepting(self, input_string: str):
12
- current_state = self.start_state
13
 
14
  for char in input_string:
15
- if char not in self.alphabet:
16
  return False
17
 
18
  current_state = self.transitions.get(current_state, {}).get(char, None)
@@ -21,24 +24,40 @@ class DFA:
21
 
22
  return current_state in self.final_states
23
 
 
24
  def check(self, paragraph: str):
 
25
  if paragraph.strip() == "":
26
  raise ValueError("Empty string provided")
 
 
27
  paragraph = paragraph.lower().strip()
28
 
 
29
  chars = list(paragraph)
 
30
  current_word = ""
31
  accepted_words: dict[list[tuple[int, int]]] = (
32
  {}
33
- ) # {word: [(start_index, end_index)]}
 
 
34
  word_start = 0
35
  word_end = 0
 
 
36
  for i, char in enumerate(chars):
37
- if char in self.alphabet:
 
38
  current_word += char
39
  word_end = i
40
 
41
- if char not in self.alphabet or i == len(chars) - 1:
 
 
 
 
 
42
  if current_word != "":
43
  if self.is_accepting(current_word):
44
  accepted_words[current_word] = accepted_words.get(
@@ -50,8 +69,9 @@ class DFA:
50
  return accepted_words
51
 
52
 
53
- def generate_dfa(words: list[str]) -> DFA:
54
- alphabet = {
 
55
  "a",
56
  "b",
57
  "c",
@@ -80,31 +100,40 @@ def generate_dfa(words: list[str]) -> DFA:
80
  "z",
81
  }
82
  states = set([0])
83
- transitions = {}
84
- start_state = 0
85
  final_states = set()
86
 
87
- for i, word in enumerate(words):
 
 
 
88
  current_state = 0
89
- for char in word:
90
- # Get the next state in the DFA of the current state based on the character available for transition
91
- next_state = transitions.get(current_state, {}).get(char, None)
92
-
93
- # If the next state is not in the DFA, then create one for it.
94
- if next_state is None:
95
- next_state = len(states)
96
- transitions.setdefault(current_state, {})[char] = next_state
97
- states.add(next_state)
98
- current_state = next_state
99
- final_states.add(current_state)
100
 
101
- return DFA(alphabet, states, transitions, start_state, final_states)
 
 
 
 
 
 
 
 
102
 
 
 
 
 
 
 
 
 
 
 
103
 
104
- words = []
105
- with open("words.txt") as file:
106
- words = file.readlines()
107
- words = [word.strip() for word in words]
 
108
 
109
- dfa = generate_dfa(words)
110
- print(dfa.check("...and the and the and ... and"))
 
1
  # References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
2
  class DFA:
3
  # 5-tuple init
4
+ def __init__(
5
+ self, alphabets, states, transition_functions, initial_state, final_states
6
+ ):
7
+ self.alphabets = alphabets
8
  self.states = states
9
+ self.transitions = transition_functions
10
+ self.initial_state = initial_state
11
  self.final_states = final_states
12
 
13
+ # Processes the input word and returns True if the word is accepted by the DFA
14
  def is_accepting(self, input_string: str):
15
+ current_state = self.initial_state
16
 
17
  for char in input_string:
18
+ if char not in self.alphabets:
19
  return False
20
 
21
  current_state = self.transitions.get(current_state, {}).get(char, None)
 
24
 
25
  return current_state in self.final_states
26
 
27
+ # Processes the input paragraph and returns a dictionary of accepted words and their positions
28
  def check(self, paragraph: str):
29
+ # Reject empty input
30
  if paragraph.strip() == "":
31
  raise ValueError("Empty string provided")
32
+
33
+ # Normalize the input
34
  paragraph = paragraph.lower().strip()
35
 
36
+ # Convert the paragraph to a list of characters
37
  chars = list(paragraph)
38
+
39
  current_word = ""
40
  accepted_words: dict[list[tuple[int, int]]] = (
41
  {}
42
+ ) # returns: {word: [(start_index, end_index)]}
43
+
44
+ # Start and end index of the current word
45
  word_start = 0
46
  word_end = 0
47
+
48
+ # Traverse the characters in the paragraph
49
  for i, char in enumerate(chars):
50
+ # If the character is in the alphabet, then it is part of the word
51
+ if char in self.alphabets:
52
  current_word += char
53
  word_end = i
54
 
55
+ # If the character is not in the alphabet or it is the last character in the paragraph
56
+ # then the current word is complete
57
+ # Check if the current word is accepted by the DFA
58
+ # If it is accepted, add it to the accepted_words dictionary
59
+ # Reset the current_word to an empty string
60
+ if char not in self.alphabets or i == len(chars) - 1:
61
  if current_word != "":
62
  if self.is_accepting(current_word):
63
  accepted_words[current_word] = accepted_words.get(
 
69
  return accepted_words
70
 
71
 
72
+ # Generate a DFA from a list of words
73
+ def generate_dfa(input_strings: list[str]) -> DFA:
74
+ alphabets = {
75
  "a",
76
  "b",
77
  "c",
 
100
  "z",
101
  }
102
  states = set([0])
103
+ transition_functions = {}
 
104
  final_states = set()
105
 
106
+ for input_string in [
107
+ input_string.lower().strip() for input_string in input_strings
108
+ ]:
109
+
110
  current_state = 0
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ for i in input_string:
113
+
114
+ if i not in alphabets:
115
+ raise ValueError(f"Invalid character '{i}' in string '{input_string}'")
116
+
117
+ # Find the upcoming state based on the current state and the input character to reach the next state
118
+ upcoming_state = transition_functions.get(current_state)
119
+ if upcoming_state is not None:
120
+ upcoming_state = upcoming_state.get(i)
121
 
122
+ # Create a new state if the upcoming state is not found
123
+ if upcoming_state is None:
124
+ # The new state is the next integer after the maximum state in the set of states
125
+ upcoming_state = len(states)
126
+ # Add the new state to the set of states
127
+ states.add(upcoming_state)
128
+ # Add the transition into the transition functions dictionary
129
+ if current_state not in transition_functions:
130
+ transition_functions[current_state] = {}
131
+ transition_functions[current_state][i] = upcoming_state
132
 
133
+ # Move to the upcoming state
134
+ current_state = upcoming_state
135
+
136
+ # Add the final state to the set of final states
137
+ final_states.add(current_state)
138
 
139
+ return DFA(alphabets, states, transition_functions, 0, final_states)