DarylT01 commited on
Commit
c20b69e
·
1 Parent(s): dde04d8

feat(new): first commit to huggingface spaces

Browse files
Files changed (5) hide show
  1. .gitignore +3 -0
  2. app.py +168 -0
  3. main.py +110 -0
  4. requirements.txt +0 -0
  5. words.txt +42 -0
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .venv
2
+ env/
3
+ __pycache__/
app.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import libraries and modules
2
+ # Run pip install gradio and wonderwords in terminal
3
+ import gradio as gr
4
+ import pandas as pd
5
+ from wonderwords import RandomSentence
6
+ from main import generate_dfa
7
+
8
+ # DataFrame
9
+ words = []
10
+ with open("words.txt") as file:
11
+ words = file.read().splitlines()
12
+ words = [word.strip() for word in words]
13
+
14
+ df = pd.DataFrame({
15
+ 'Words': words,
16
+ })
17
+
18
+ # DFA function call
19
+ dfa = generate_dfa(words)
20
+
21
+ # Generate examples || RandomSentence is not the best way to generate examples || Should be replaced with self-generated examples
22
+ def generateExamples():
23
+ s = RandomSentence()
24
+ examples = []
25
+ for i in range(3):
26
+ examples.append(s.sentence())
27
+ return examples
28
+
29
+ # Color match function
30
+ def color_match(text):
31
+ colored_text = []
32
+ pointer = 0
33
+
34
+ # Get the result of the DFA check on the input text
35
+ match_dict = dfa.check(text)
36
+
37
+ # Flatten the match_dict into a list of tuples and sort by the start index
38
+ matches = sorted((start, end, word) for word, indices in match_dict.items() for start, end in indices)
39
+
40
+ for start, end, word in matches:
41
+ colored_text.append(text[pointer:start])
42
+ # print(f"Start: {start}, End: {end}, Word: {word}")
43
+ # End need to be incremented by 1 to include the last character
44
+ colored_text.append(f'<span style="color:red;">{text[start:end + 1]}</span>')
45
+ # print(f"Colored Text: {colored_text}")
46
+ # Move the pointer to the end of the match
47
+ pointer = end + 1
48
+
49
+ # Add remaining text
50
+ colored_text.append(text[pointer:])
51
+ # print(f"Text before merge: {colored_text}")
52
+ # Combine the strings
53
+ colored_text = ''.join(colored_text)
54
+ # print(f"Colored Text after merging: {colored_text}")
55
+
56
+ # Call getOccurrences function and get the DataFrame
57
+ occurrences_df = getOccurrences(text)
58
+ # print(f"Occurences_df: {occurrences_df}")
59
+
60
+ # Call getPositions function and get the DataFrame
61
+ positions_df = getPositions(text)
62
+ # print(f"Positions_df: {positions_df}")
63
+
64
+ return colored_text, occurrences_df, positions_df
65
+
66
+ # Get occurrences function
67
+ def getOccurrences(text):
68
+ match_dict = dfa.check(text)
69
+ wordCount = {}
70
+ for word, positions in match_dict.items():
71
+ # print(f"Word: {word}, Positions: {positions}")
72
+ # print(f"Length of positions: {len(positions)}")
73
+ # Store the word and the number of occurrences in the wordCount dictionary
74
+ wordCount[word] = len(positions)
75
+ # print(f"Word Count: {wordCount}")
76
+
77
+ # Convert the wordCount dictionary to a DataFrame
78
+ occurrences_df = pd.DataFrame(list(wordCount.items()), columns=['Words', 'Occurrences'])
79
+ # print(occurences_df)
80
+ return occurrences_df
81
+
82
+ # Get positions function
83
+ def getPositions(text):
84
+ match_dict = dfa.check(text)
85
+ wordPositions = {}
86
+ for word, positions in match_dict.items():
87
+ # Convert the list of tuples to a string
88
+ positions_str = ', '.join(map(str, positions))
89
+ print(f"Word: {word}, Positions: {positions_str}")
90
+ # Store the word and the positions string in the wordPositions dictionary
91
+ wordPositions[word] = positions_str
92
+ print(f"Word Positions: {wordPositions}")
93
+
94
+ # Convert the wordPositions dictionary to a DataFrame
95
+ positions_df = pd.DataFrame(list(wordPositions.items()), columns=['Words', 'Positions'])
96
+ print(f"Positions_df: {positions_df}")
97
+
98
+ return positions_df
99
+
100
+ # Search and display function
101
+ def search_and_display(search_query):
102
+ # Filter the DataFrame based on the search query
103
+ filtered_df = df[df['Words'].str.contains(search_query)]
104
+ # print(f"Filtered text: {filtered_df}")
105
+ return filtered_df
106
+
107
+ # CSS styling
108
+ # css = """
109
+ #warning {background-color: #FFCCCB}
110
+ # .feedback textarea {font-size: 24px !important}
111
+ # """
112
+
113
+ # Example to apply CSS styling
114
+ # with gr.Blocks(css=css) as demo:
115
+ # box1 = gr.Textbox(value="Good Job", elem_classes="feedback")
116
+ # box2 = gr.Textbox(value="Failure", elem_id="warning", elem_classes="feedback")
117
+
118
+ # Gradio UI
119
+ with gr.Blocks() as demo:
120
+
121
+ # Title block
122
+ # Apply CSS styling to the title
123
+ title = gr.HTML("<h1 style='color: gold; margin-bottom: 0px font-weight:bold'>English Conjuction Finder</h1>")
124
+ description = gr.HTML("<p style='color: #fef9c3;'>Enter a text and see the words that are accepted by the DFA highlighted in red.</p>")
125
+
126
+ # Search block
127
+ search = gr.Textbox(label="Search", placeholder="Search accepted words here", lines=1, info="List of accpetable words in DFA")
128
+ search_btn = gr.Button(value="Search")
129
+ resultSearch = gr.Dataframe(df, height=300)
130
+
131
+ search_btn.click(
132
+ search_and_display, inputs=[search], outputs=[resultSearch], api_name=False
133
+ )
134
+
135
+ # Adding a line break
136
+ line_break = gr.HTML("<br>")
137
+
138
+ # Text block for DFA and color match
139
+ textTitle = gr.HTML("<h2>Try it here!</h2>")
140
+ text = gr.Textbox(label="Text", placeholder="Enter text here", info="Enter text to check for DFA match")
141
+ submit_btn = gr.Button(value="Submit")
142
+
143
+ # Examples block
144
+ examples_data = generateExamples()
145
+ examples = gr.Examples(
146
+ examples=examples_data,
147
+ inputs=[text],
148
+ )
149
+
150
+ # Result block
151
+ resultTitle = gr.HTML("<h2 style='color: gold; margin-bottom: 5px'>Result</h2>")
152
+ result = gr.HTML("<p></p>")
153
+
154
+ # Occurrences block
155
+ occurrencesTitle = gr.HTML("<h2 style='color: gold; margin-bottom: 5px'>Occurrences</h2>")
156
+ occurrences = gr.Dataframe()
157
+
158
+ # Position block
159
+ positionTitle = gr.HTML("<h2 style='color: gold;'>Position</h2>")
160
+ position = gr.Dataframe()
161
+
162
+ submit_btn.click(
163
+ color_match, inputs=[text], outputs=[result, occurrences, position], api_name=False
164
+ )
165
+
166
+ # Launch the app
167
+ demo.launch(share=True)
168
+
main.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
2
+ class DFA:
3
+ # 5-tuple init
4
+ def __init__(self, alphabet, states, transitions, start_state, final_states):
5
+ self.alphabet = alphabet
6
+ self.states = states
7
+ self.transitions = transitions
8
+ self.start_state = start_state
9
+ self.final_states = final_states
10
+
11
+ def is_accepting(self, input_string: str):
12
+ current_state = self.start_state
13
+
14
+ for char in input_string:
15
+ if char not in self.alphabet:
16
+ return False
17
+
18
+ current_state = self.transitions.get(current_state, {}).get(char, None)
19
+ if current_state is None:
20
+ return False
21
+
22
+ return current_state in self.final_states
23
+
24
+ def check(self, paragraph: str):
25
+ if paragraph.strip() == "":
26
+ raise ValueError("Empty string provided")
27
+ paragraph = paragraph.lower().strip()
28
+
29
+ chars = list(paragraph)
30
+ current_word = ""
31
+ accepted_words: dict[list[tuple[int, int]]] = (
32
+ {}
33
+ ) # {word: [(start_index, end_index)]}
34
+ word_start = 0
35
+ word_end = 0
36
+ for i, char in enumerate(chars):
37
+ if char in self.alphabet:
38
+ current_word += char
39
+ word_end = i
40
+
41
+ if char not in self.alphabet or i == len(chars) - 1:
42
+ if current_word != "":
43
+ if self.is_accepting(current_word):
44
+ accepted_words[current_word] = accepted_words.get(
45
+ current_word, []
46
+ ) + [(word_start, word_end)]
47
+ current_word = ""
48
+ word_start = i + 1
49
+
50
+ return accepted_words
51
+
52
+
53
+ def generate_dfa(words: list[str]) -> DFA:
54
+ alphabet = {
55
+ "a",
56
+ "b",
57
+ "c",
58
+ "d",
59
+ "e",
60
+ "f",
61
+ "g",
62
+ "h",
63
+ "i",
64
+ "j",
65
+ "k",
66
+ "l",
67
+ "m",
68
+ "n",
69
+ "o",
70
+ "p",
71
+ "q",
72
+ "r",
73
+ "s",
74
+ "t",
75
+ "u",
76
+ "v",
77
+ "w",
78
+ "x",
79
+ "y",
80
+ "z",
81
+ }
82
+ states = set([0])
83
+ transitions = {}
84
+ start_state = 0
85
+ final_states = set()
86
+
87
+ for i, word in enumerate(words):
88
+ current_state = 0
89
+ for char in word:
90
+ # Get the next state in the DFA of the current state based on the character available for transition
91
+ next_state = transitions.get(current_state, {}).get(char, None)
92
+
93
+ # If the next state is not in the DFA, then create one for it.
94
+ if next_state is None:
95
+ next_state = len(states)
96
+ transitions.setdefault(current_state, {})[char] = next_state
97
+ states.add(next_state)
98
+ current_state = next_state
99
+ final_states.add(current_state)
100
+
101
+ return DFA(alphabet, states, transitions, start_state, final_states)
102
+
103
+
104
+ words = []
105
+ with open("words.txt") as file:
106
+ words = file.readlines()
107
+ words = [word.strip() for word in words]
108
+
109
+ dfa = generate_dfa(words)
110
+ print(dfa.check("...and the and the and ... and"))
requirements.txt ADDED
Binary file (2.38 kB). View file
 
words.txt ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ and
2
+ but
3
+ or
4
+ nor
5
+ for
6
+ yet
7
+ so
8
+ although
9
+ though
10
+ because
11
+ since
12
+ until
13
+ after
14
+ before
15
+ as
16
+ if
17
+ once
18
+ provided
19
+ that
20
+ unless
21
+ whereas
22
+ while
23
+ lest
24
+ whether
25
+ however
26
+ moreover
27
+ nevertheless
28
+ nonetheless
29
+ consequently
30
+ therefore
31
+ thus
32
+ hence
33
+ besides
34
+ furthermore
35
+ otherwise
36
+ instead
37
+ similarly
38
+ likewise
39
+ accordingly
40
+ subsequently
41
+ meanwhile
42
+ afterwards