Spaces:

DarylT01
/

dfa-ui

Sleeping

App Files Files Community

DarylT01 commited on Apr 30, 2024

Commit

c20b69e

1 Parent(s): dde04d8

feat(new): first commit to huggingface spaces

Browse files

Files changed (5) hide show

.gitignore +3 -0
app.py +168 -0
main.py +110 -0
requirements.txt +0 -0
words.txt +42 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@

+.venv
+env/
+__pycache__/

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# Import libraries and modules
+# Run pip install gradio and wonderwords in terminal
+import gradio as gr
+import pandas as pd
+from wonderwords import RandomSentence
+from main import generate_dfa
+# DataFrame
+words = []
+with open("words.txt") as file:
+    words = file.read().splitlines()
+    words = [word.strip() for word in words]
+    df = pd.DataFrame({
+        'Words': words,
+    })
+# DFA function call
+dfa = generate_dfa(words)
+# Generate examples || RandomSentence is not the best way to generate examples || Should be replaced with self-generated examples
+def generateExamples():
+    s = RandomSentence()
+    examples = []
+    for i in range(3):
+        examples.append(s.sentence())
+    return examples
+# Color match function
+def color_match(text):
+    colored_text = []
+    pointer = 0
+    # Get the result of the DFA check on the input text
+    match_dict = dfa.check(text)
+    # Flatten the match_dict into a list of tuples and sort by the start index
+    matches = sorted((start, end, word) for word, indices in match_dict.items() for start, end in indices)
+    for start, end, word in matches:
+        colored_text.append(text[pointer:start])
+        # print(f"Start: {start}, End: {end}, Word: {word}")
+        # End need to be incremented by 1 to include the last character
+        colored_text.append(f'<span style="color:red;">{text[start:end + 1]}</span>')
+        # print(f"Colored Text: {colored_text}")
+        # Move the pointer to the end of the match
+        pointer = end + 1
+    # Add remaining text
+    colored_text.append(text[pointer:])
+    # print(f"Text before merge: {colored_text}")
+    # Combine the strings
+    colored_text = ''.join(colored_text)
+    # print(f"Colored Text after merging: {colored_text}")
+    # Call getOccurrences function and get the DataFrame
+    occurrences_df = getOccurrences(text)
+    # print(f"Occurences_df: {occurrences_df}")
+    # Call getPositions function and get the DataFrame
+    positions_df = getPositions(text)
+    # print(f"Positions_df: {positions_df}")
+    return colored_text, occurrences_df, positions_df
+# Get occurrences function
+def getOccurrences(text):
+    match_dict = dfa.check(text)
+    wordCount = {}
+    for word, positions in match_dict.items():
+        # print(f"Word: {word}, Positions: {positions}")
+        # print(f"Length of positions: {len(positions)}")
+        # Store the word and the number of occurrences in the wordCount dictionary
+        wordCount[word] = len(positions)
+    # print(f"Word Count: {wordCount}")
+    # Convert the wordCount dictionary to a DataFrame
+    occurrences_df = pd.DataFrame(list(wordCount.items()), columns=['Words', 'Occurrences'])
+    # print(occurences_df)
+    return occurrences_df
+# Get positions function
+def getPositions(text):
+    match_dict = dfa.check(text)
+    wordPositions = {}
+    for word, positions in match_dict.items():
+        # Convert the list of tuples to a string
+        positions_str = ', '.join(map(str, positions))
+        print(f"Word: {word}, Positions: {positions_str}")
+        # Store the word and the positions string in the wordPositions dictionary
+        wordPositions[word] = positions_str
+        print(f"Word Positions: {wordPositions}")
+    # Convert the wordPositions dictionary to a DataFrame
+    positions_df = pd.DataFrame(list(wordPositions.items()), columns=['Words', 'Positions'])
+    print(f"Positions_df: {positions_df}")
+    return positions_df
+# Search and display function
+def search_and_display(search_query):
+    # Filter the DataFrame based on the search query
+    filtered_df = df[df['Words'].str.contains(search_query)]
+    # print(f"Filtered text: {filtered_df}")
+    return filtered_df
+# CSS styling
+# css = """
+#warning {background-color: #FFCCCB}
+# .feedback textarea {font-size: 24px !important}
+# """
+# Example to apply CSS styling
+# with gr.Blocks(css=css) as demo:
+#     box1 = gr.Textbox(value="Good Job", elem_classes="feedback")
+#     box2 = gr.Textbox(value="Failure", elem_id="warning", elem_classes="feedback")
+# Gradio UI
+with gr.Blocks() as demo:
+    # Title block
+    # Apply CSS styling to the title
+    title = gr.HTML("<h1 style='color: gold; margin-bottom: 0px font-weight:bold'>English Conjuction Finder</h1>")
+    description = gr.HTML("<p style='color: #fef9c3;'>Enter a text and see the words that are accepted by the DFA highlighted in red.</p>")
+    # Search block
+    search = gr.Textbox(label="Search", placeholder="Search accepted words here", lines=1, info="List of accpetable words in DFA")
+    search_btn = gr.Button(value="Search")
+    resultSearch = gr.Dataframe(df, height=300)
+    search_btn.click(
+        search_and_display, inputs=[search], outputs=[resultSearch], api_name=False
+    )
+    # Adding a line break
+    line_break = gr.HTML("<br>")
+    # Text block for DFA and color match
+    textTitle = gr.HTML("<h2>Try it here!</h2>")
+    text = gr.Textbox(label="Text", placeholder="Enter text here", info="Enter text to check for DFA match")
+    submit_btn = gr.Button(value="Submit")
+    # Examples block
+    examples_data = generateExamples()
+    examples = gr.Examples(
+        examples=examples_data,
+        inputs=[text],
+    )
+    # Result block
+    resultTitle = gr.HTML("<h2 style='color: gold; margin-bottom: 5px'>Result</h2>")
+    result = gr.HTML("<p></p>")
+    # Occurrences block
+    occurrencesTitle = gr.HTML("<h2 style='color: gold; margin-bottom: 5px'>Occurrences</h2>")
+    occurrences = gr.Dataframe()
+    # Position block
+    positionTitle = gr.HTML("<h2 style='color: gold;'>Position</h2>")
+    position = gr.Dataframe()
+    submit_btn.click(
+        color_match, inputs=[text], outputs=[result, occurrences, position], api_name=False
+    )
+# Launch the app
+demo.launch(share=True)

main.py ADDED Viewed

	@@ -0,0 +1,110 @@

+# References https://blog.devgenius.io/finite-automata-implement-a-dfa-in-python-64dc3d7005d9
+class DFA:
+    # 5-tuple init
+    def __init__(self, alphabet, states, transitions, start_state, final_states):
+        self.alphabet = alphabet
+        self.states = states
+        self.transitions = transitions
+        self.start_state = start_state
+        self.final_states = final_states
+    def is_accepting(self, input_string: str):
+        current_state = self.start_state
+        for char in input_string:
+            if char not in self.alphabet:
+                return False
+            current_state = self.transitions.get(current_state, {}).get(char, None)
+            if current_state is None:
+                return False
+        return current_state in self.final_states
+    def check(self, paragraph: str):
+        if paragraph.strip() == "":
+            raise ValueError("Empty string provided")
+        paragraph = paragraph.lower().strip()
+        chars = list(paragraph)
+        current_word = ""
+        accepted_words: dict[list[tuple[int, int]]] = (
+            {}
+        )  # {word: [(start_index, end_index)]}
+        word_start = 0
+        word_end = 0
+        for i, char in enumerate(chars):
+            if char in self.alphabet:
+                current_word += char
+                word_end = i
+            if char not in self.alphabet or i == len(chars) - 1:
+                if current_word != "":
+                    if self.is_accepting(current_word):
+                        accepted_words[current_word] = accepted_words.get(
+                            current_word, []
+                        ) + [(word_start, word_end)]
+                    current_word = ""
+                word_start = i + 1
+        return accepted_words
+def generate_dfa(words: list[str]) -> DFA:
+    alphabet = {
+        "a",
+        "b",
+        "c",
+        "d",
+        "e",
+        "f",
+        "g",
+        "h",
+        "i",
+        "j",
+        "k",
+        "l",
+        "m",
+        "n",
+        "o",
+        "p",
+        "q",
+        "r",
+        "s",
+        "t",
+        "u",
+        "v",
+        "w",
+        "x",
+        "y",
+        "z",
+    }
+    states = set([0])
+    transitions = {}
+    start_state = 0
+    final_states = set()
+    for i, word in enumerate(words):
+        current_state = 0
+        for char in word:
+            # Get the next state in the DFA of the current state based on the character available for transition
+            next_state = transitions.get(current_state, {}).get(char, None)
+            # If the next state is not in the DFA, then create one for it.
+            if next_state is None:
+                next_state = len(states)
+                transitions.setdefault(current_state, {})[char] = next_state
+                states.add(next_state)
+            current_state = next_state
+        final_states.add(current_state)
+    return DFA(alphabet, states, transitions, start_state, final_states)
+words = []
+with open("words.txt") as file:
+    words = file.readlines()
+    words = [word.strip() for word in words]
+dfa = generate_dfa(words)
+print(dfa.check("...and the and the and ... and"))

requirements.txt ADDED Viewed

Binary file (2.38 kB). View file

words.txt ADDED Viewed

	@@ -0,0 +1,42 @@

+and
+but
+or
+nor
+for
+yet
+so
+although
+though
+because
+since
+until
+after
+before
+as
+if
+once
+provided
+that
+unless
+whereas
+while
+lest
+whether
+however
+moreover
+nevertheless
+nonetheless
+consequently
+therefore
+thus
+hence
+besides
+furthermore
+otherwise
+instead
+similarly
+likewise
+accordingly
+subsequently
+meanwhile
+afterwards