DarylT01 commited on
Commit
15e4f2e
·
1 Parent(s): fc5e51e

feat(type): identify word type

Browse files
Files changed (6) hide show
  1. adjectives.txt +3 -0
  2. adverbs.txt +3 -0
  3. app.py +76 -33
  4. conjunctions.txt +3 -0
  5. main.py +1 -1
  6. words.txt +0 -42
adjectives.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ and
2
+ but
3
+ or
adverbs.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ nor
2
+ for
3
+ yet
app.py CHANGED
@@ -6,19 +6,29 @@ from essential_generators import DocumentGenerator
6
  from main import generate_dfa
7
 
8
  # DataFrame
9
- words = []
10
- with open("words.txt") as file:
11
- words = file.read().splitlines()
12
- words = [word.strip() for word in words]
13
-
14
- df = pd.DataFrame(
15
- {
16
- "Words": words,
17
- }
18
- )
 
 
 
 
 
 
 
 
19
 
20
  # DFA function call
21
- dfa = generate_dfa(words)
 
 
22
 
23
 
24
  # Generate examples
@@ -36,9 +46,11 @@ def color_match(text: gr.Textbox):
36
  pointer = 0
37
 
38
  # Get the result of the DFA check on the input text
39
- match_dict = dfa.check(text)
 
 
40
 
41
- if not match_dict:
42
  return (
43
  '<div style="background-color: #dc2626; color: #fff; text-align: center; width: 100%; padding: 10px; font-weight:800; font-size:1.5rem">Rejected</div>',
44
  None,
@@ -47,9 +59,21 @@ def color_match(text: gr.Textbox):
47
 
48
  # Flatten the match_dict into a list of tuples and sort by the start index
49
  matches = sorted(
50
- (start, end, word)
51
- for word, indices in match_dict.items()
52
- for start, end in indices
 
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
 
55
  for start, end, word in matches:
@@ -64,23 +88,40 @@ def color_match(text: gr.Textbox):
64
  # Combine the strings
65
  colored_text = "".join(colored_text)
66
 
67
- # Call getPositions function and get the DataFrame
68
- positions_df = getPositions(text)
69
-
70
- return colored_text, positions_df
71
-
72
-
73
- # Get positions function
74
- def getPositions(text):
75
- match_dict = dfa.check(text)
76
- positions_df = pd.DataFrame(columns=["Words", "Positions", "Occurences"])
77
- for word, positions in match_dict.items():
78
  # Convert the list of tuples to a string
79
  positions_str = ", ".join(map(str, positions))
80
  # Store the word and the positions string in the wordPositions dictionary
81
- positions_df.loc[len(positions_df)] = [word, positions_str, len(positions)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
- return positions_df
84
 
85
 
86
  # Search and display function
@@ -132,13 +173,15 @@ with gr.Blocks() as demo:
132
  label="Search",
133
  placeholder="Search accepted words here",
134
  lines=1,
135
- info="List of accpetable words in DFA",
136
  show_copy_button=True,
137
  )
138
  with gr.Row():
139
  cancel_btn = gr.ClearButton(search, variant="stop", interactive=False)
140
  search_btn = gr.Button(value="Search", variant="primary")
141
- resultSearch = gr.Dataframe(df, height=300, col_count=1, headers=["Words"])
 
 
142
 
143
  search.change(
144
  text_change_search,
@@ -187,8 +230,8 @@ with gr.Blocks() as demo:
187
  # positionTitle = gr.HTML("<h2 style='color: gold;'>Position</h2>")
188
  position = gr.Dataframe(
189
  show_label=True,
190
- col_count=3,
191
- headers=["Words", "Positions", "Occurences"],
192
  interactive=False,
193
  )
194
 
 
6
  from main import generate_dfa
7
 
8
  # DataFrame
9
+ conjunctions = []
10
+ with open("conjunctions.txt") as file:
11
+ conjunctions = file.read().splitlines()
12
+ conjunctions = [word.strip() for word in conjunctions]
13
+ with open("adverbs.txt") as file:
14
+ adverbs = file.read().splitlines()
15
+ adverbs = [word.strip() for word in adverbs]
16
+ with open("adjectives.txt") as file:
17
+ adjectives = file.read().splitlines()
18
+ adjectives = [word.strip() for word in adjectives]
19
+
20
+ df = pd.DataFrame(columns=["Words", "Type"])
21
+ for i in conjunctions:
22
+ df.loc[len(df)] = [i, "Conjunction"]
23
+ for i in adverbs:
24
+ df.loc[len(df)] = [i, "Adverb"]
25
+ for i in adjectives:
26
+ df.loc[len(df)] = [i, "Adjective"]
27
 
28
  # DFA function call
29
+ dfa_conjunctions = generate_dfa(conjunctions)
30
+ dfa_adverbs = generate_dfa(adverbs)
31
+ dfa_adjectives = generate_dfa(adjectives)
32
 
33
 
34
  # Generate examples
 
46
  pointer = 0
47
 
48
  # Get the result of the DFA check on the input text
49
+ match_dict_conj = dfa_conjunctions.check(text)
50
+ match_dict_adv = dfa_adverbs.check(text)
51
+ match_dict_adj = dfa_adjectives.check(text)
52
 
53
+ if not match_dict_conj and not match_dict_adv and not match_dict_adj:
54
  return (
55
  '<div style="background-color: #dc2626; color: #fff; text-align: center; width: 100%; padding: 10px; font-weight:800; font-size:1.5rem">Rejected</div>',
56
  None,
 
59
 
60
  # Flatten the match_dict into a list of tuples and sort by the start index
61
  matches = sorted(
62
+ [
63
+ (start, end, word)
64
+ for word, indices in match_dict_conj.items()
65
+ for start, end in indices
66
+ ]
67
+ + [
68
+ (start, end, word)
69
+ for word, indices in match_dict_adv.items()
70
+ for start, end in indices
71
+ ]
72
+ + [
73
+ (start, end, word)
74
+ for word, indices in match_dict_adj.items()
75
+ for start, end in indices
76
+ ]
77
  )
78
 
79
  for start, end, word in matches:
 
88
  # Combine the strings
89
  colored_text = "".join(colored_text)
90
 
91
+ # Create the DataFrame
92
+ positions_df = pd.DataFrame(columns=["Words", "Type", "Positions", "Occurences"])
93
+ for word, positions in match_dict_conj.items():
 
 
 
 
 
 
 
 
94
  # Convert the list of tuples to a string
95
  positions_str = ", ".join(map(str, positions))
96
  # Store the word and the positions string in the wordPositions dictionary
97
+ positions_df.loc[len(positions_df)] = [
98
+ word,
99
+ "Conjunction",
100
+ positions_str,
101
+ len(positions),
102
+ ]
103
+ for word, positions in match_dict_adv.items():
104
+ # Convert the list of tuples to a string
105
+ positions_str = ", ".join(map(str, positions))
106
+ # Store the word and the positions string in the wordPositions dictionary
107
+ positions_df.loc[len(positions_df)] = [
108
+ word,
109
+ "Adverb",
110
+ positions_str,
111
+ len(positions),
112
+ ]
113
+ for word, positions in match_dict_adj.items():
114
+ # Convert the list of tuples to a string
115
+ positions_str = ", ".join(map(str, positions))
116
+ # Store the word and the positions string in the wordPositions dictionary
117
+ positions_df.loc[len(positions_df)] = [
118
+ word,
119
+ "Adjective",
120
+ positions_str,
121
+ len(positions),
122
+ ]
123
 
124
+ return colored_text, positions_df
125
 
126
 
127
  # Search and display function
 
173
  label="Search",
174
  placeholder="Search accepted words here",
175
  lines=1,
176
+ info="List of acceptable words in DFA",
177
  show_copy_button=True,
178
  )
179
  with gr.Row():
180
  cancel_btn = gr.ClearButton(search, variant="stop", interactive=False)
181
  search_btn = gr.Button(value="Search", variant="primary")
182
+ resultSearch = gr.Dataframe(
183
+ df, height=300, col_count=2, headers=["Words", "Type"]
184
+ )
185
 
186
  search.change(
187
  text_change_search,
 
230
  # positionTitle = gr.HTML("<h2 style='color: gold;'>Position</h2>")
231
  position = gr.Dataframe(
232
  show_label=True,
233
+ col_count=4,
234
+ headers=["Words", "Type", "Positions", "Occurences"],
235
  interactive=False,
236
  )
237
 
conjunctions.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ subsequently
2
+ meanwhile
3
+ afterwards
main.py CHANGED
@@ -37,7 +37,7 @@ class DFA:
37
  chars = list(paragraph)
38
 
39
  current_word = ""
40
- accepted_words: dict[list[tuple[int, int]]] = (
41
  {}
42
  ) # returns: {word: [(start_index, end_index)]}
43
 
 
37
  chars = list(paragraph)
38
 
39
  current_word = ""
40
+ accepted_words: dict[str : list[tuple[int, int]]] = (
41
  {}
42
  ) # returns: {word: [(start_index, end_index)]}
43
 
words.txt DELETED
@@ -1,42 +0,0 @@
1
- and
2
- but
3
- or
4
- nor
5
- for
6
- yet
7
- so
8
- although
9
- though
10
- because
11
- since
12
- until
13
- after
14
- before
15
- as
16
- if
17
- once
18
- provided
19
- that
20
- unless
21
- whereas
22
- while
23
- lest
24
- whether
25
- however
26
- moreover
27
- nevertheless
28
- nonetheless
29
- consequently
30
- therefore
31
- thus
32
- hence
33
- besides
34
- furthermore
35
- otherwise
36
- instead
37
- similarly
38
- likewise
39
- accordingly
40
- subsequently
41
- meanwhile
42
- afterwards