huamnifierWithSimpleGrammer

Running

App Files Files

sashtech commited on Sep 10, 2024

Commit

3b5e5a8

verified ·

1 Parent(s): 463d2eb

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -30,7 +30,7 @@ def get_synonyms_nltk(word, pos):
     synsets = wordnet.synsets(word, pos=pos)
     if synsets:
         lemmas = synsets[0].lemmas()
-        return [lemma.name() for lemma in lemmas]
     return []
 # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
@@ -55,14 +55,14 @@ def capitalize_sentences_and_nouns(text):
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
-        # Check for tense correction based on modal verbs
-        if token.pos_ == "VERB" and token.dep_ in {"aux", "auxpass"}:
-            # Replace with appropriate verb form
-            lemma = wordnet.morphy(token.text, wordnet.VERB) or token.text
-            corrected_text.append(lemma)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Function to correct singular/plural errors (Singular/Plural Correction)
@@ -72,17 +72,15 @@ def correct_singular_plural_errors(text):
     for token in doc:
         if token.pos_ == "NOUN":
-            # Check if the noun is singular or plural
             if token.tag_ == "NN":  # Singular noun
-                # Look for determiners like "many" to correct to plural
-                if any(child.text.lower() in ['many', 'several', 'few'] for child in token.head.children):
-                    corrected_text.append(token.lemma_ + 's')
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
-                # Look for determiners like "a", "one" to correct to singular
-                if any(child.text.lower() in ['a', 'one'] for child in token.head.children):
-                    corrected_text.append(token.lemma_)
                 else:
                     corrected_text.append(token.text)
             else:
@@ -96,13 +94,16 @@ def correct_singular_plural_errors(text):
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
-    for token in doc:
-        if token.text in ['a', 'an']:
-            next_token = token.nbor(1)
-            if token.text == "a" and next_token.text[0].lower() in "aeiou":
-                corrected_text.append("an")
-            elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
-                corrected_text.append("a")
             else:
                 corrected_text.append(token.text)
         else:
@@ -115,28 +116,30 @@ def paraphrase_with_spacy_nltk(text):
     paraphrased_words = []
     for token in doc:
-        # Map SpaCy POS tags to WordNet POS tags
         pos = None
-        if token.pos_ in {"NOUN"}:
             pos = wordnet.NOUN
-        elif token.pos_ in {"VERB"}:
             pos = wordnet.VERB
-        elif token.pos_ in {"ADJ"}:
             pos = wordnet.ADJ
-        elif token.pos_ in {"ADV"}:
             pos = wordnet.ADV
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
-        # Replace with a synonym only if it makes sense
-        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
-            paraphrased_words.append(synonyms[0])
         else:
             paraphrased_words.append(token.text)
-    # Join the words back into a sentence
     paraphrased_sentence = ' '.join(paraphrased_words)
     return paraphrased_sentence
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
@@ -146,15 +149,11 @@ def paraphrase_and_correct(text):
     # Step 2: Apply grammatical corrections on the paraphrased text
     corrected_text = correct_article_errors(paraphrased_text)
     corrected_text = capitalize_sentences_and_nouns(corrected_text)
     corrected_text = correct_singular_plural_errors(corrected_text)
-    # Step 3: Capitalize sentences and proper nouns (final correction step)
-    final_text = correct_tense_errors(corrected_text)
-    return final_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
@@ -162,18 +161,18 @@ with gr.Blocks() as demo:
         t1 = gr.Textbox(lines=5, label='Text')
         button1 = gr.Button("🤖 Predict!")
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
-        score1 = gr.Textbox(lines=1, label='Prob')
         # Connect the prediction function to the button
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
-        text_input = gr.Textbox(lines=5, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
         output_text = gr.Textbox(label="Paraphrased Text")
         # Connect the paraphrasing function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app with the remaining functionalities
-demo.launch()

     synsets = wordnet.synsets(word, pos=pos)
     if synsets:
         lemmas = synsets[0].lemmas()
+        return [lemma.name().replace('_', ' ') for lemma in lemmas]
     return []
 # Function to capitalize the first letter of sentences and proper nouns (Humanifier)
 def correct_tense_errors(text):
     doc = nlp(text)
     corrected_text = []
     for token in doc:
+        if token.tag_ in {"VBD", "VBN"} and token.lemma_:
+            # Convert past tense verbs to their base form
+            corrected_text.append(token.lemma_)
         else:
             corrected_text.append(token.text)
     return ' '.join(corrected_text)
 # Function to correct singular/plural errors (Singular/Plural Correction)
     for token in doc:
         if token.pos_ == "NOUN":
             if token.tag_ == "NN":  # Singular noun
+                if any(child.text.lower() in {'many', 'several', 'few', 'a', 'one'} for child in token.head.children):
+                    corrected_text.append(token.text if token.text.endswith('s') else token.text + 's')
                 else:
                     corrected_text.append(token.text)
             elif token.tag_ == "NNS":  # Plural noun
+                if any(child.text.lower() in {'a', 'one'} for child in token.head.children):
+                    singular = token.lemma_
+                    corrected_text.append(singular)
                 else:
                     corrected_text.append(token.text)
             else:
 def correct_article_errors(text):
     doc = nlp(text)
     corrected_text = []
+    tokens = list(doc)
+    for i, token in enumerate(tokens):
+        if token.text.lower() in {'a', 'an'}:
+            if i + 1 < len(tokens):
+                next_token = tokens[i + 1]
+                if next_token.text[0].lower() in 'aeiou':
+                    corrected_text.append('an')
+                else:
+                    corrected_text.append('a')
             else:
                 corrected_text.append(token.text)
         else:
     paraphrased_words = []
     for token in doc:
         pos = None
+        if token.pos_ == "NOUN":
             pos = wordnet.NOUN
+        elif token.pos_ == "VERB":
             pos = wordnet.VERB
+        elif token.pos_ == "ADJ":
             pos = wordnet.ADJ
+        elif token.pos_ == "ADV":
             pos = wordnet.ADV
         synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
+        # Replace with a synonym only if it's more common and fits the context
+        if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"}:
+            # Avoid replacing with the same word or rare synonyms
+            synonym = synonyms[0]
+            if synonym != token.text.lower() and len(synonym.split()) == 1:
+                paraphrased_words.append(synonym)
+            else:
+                paraphrased_words.append(token.text)
         else:
             paraphrased_words.append(token.text)
     paraphrased_sentence = ' '.join(paraphrased_words)
     return paraphrased_sentence
 # Combined function: Paraphrase -> Grammar Correction -> Capitalization (Humanifier)
     # Step 2: Apply grammatical corrections on the paraphrased text
     corrected_text = correct_article_errors(paraphrased_text)
     corrected_text = capitalize_sentences_and_nouns(corrected_text)
     corrected_text = correct_singular_plural_errors(corrected_text)
+    corrected_text = correct_tense_errors(corrected_text)
+    return corrected_text
 # Gradio app setup with two tabs
 with gr.Blocks() as demo:
         t1 = gr.Textbox(lines=5, label='Text')
         button1 = gr.Button("🤖 Predict!")
         label1 = gr.Textbox(lines=1, label='Predicted Label 🎃')
+        score1 = gr.Textbox(lines=1, label='Probability')
         # Connect the prediction function to the button
         button1.click(predict_en, inputs=[t1], outputs=[label1, score1], api_name='predict_en')
     with gr.Tab("Humanifier"):
+        text_input = gr.Textbox(lines=10, label="Input Text")
         paraphrase_button = gr.Button("Paraphrase & Correct")
         output_text = gr.Textbox(label="Paraphrased Text")
         # Connect the paraphrasing function to the button
         paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
 # Launch the app with the remaining functionalities
+demo.launch()