Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,8 @@ import spacy
|
|
| 5 |
import subprocess
|
| 6 |
import nltk
|
| 7 |
from nltk.corpus import wordnet
|
|
|
|
|
|
|
| 8 |
|
| 9 |
# Initialize the English text classification pipeline for AI detection
|
| 10 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
@@ -51,53 +53,47 @@ def capitalize_sentences_and_nouns(text):
|
|
| 51 |
|
| 52 |
return ' '.join(corrected_text)
|
| 53 |
|
| 54 |
-
#
|
| 55 |
def correct_tense_errors(text):
|
| 56 |
doc = nlp(text)
|
| 57 |
corrected_text = []
|
|
|
|
| 58 |
for token in doc:
|
| 59 |
if token.pos_ == "VERB":
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
elif token.tag_ in {"VBD", "VBN"}: # Past tense correction
|
| 64 |
-
corrected_text.append(lemma + "ed")
|
| 65 |
-
else:
|
| 66 |
-
corrected_text.append(token.text)
|
| 67 |
else:
|
| 68 |
corrected_text.append(token.text)
|
|
|
|
| 69 |
return ' '.join(corrected_text)
|
| 70 |
|
| 71 |
-
#
|
| 72 |
def correct_singular_plural_errors(text):
|
| 73 |
doc = nlp(text)
|
| 74 |
corrected_text = []
|
| 75 |
-
|
| 76 |
for token in doc:
|
| 77 |
if token.pos_ == "NOUN":
|
| 78 |
-
if token.tag_ == "NN"
|
| 79 |
-
corrected_text.append(token.text
|
| 80 |
-
elif token.tag_ == "NNS"
|
| 81 |
-
corrected_text.append(token.
|
| 82 |
-
else:
|
| 83 |
-
corrected_text.append(token.text)
|
| 84 |
else:
|
| 85 |
corrected_text.append(token.text)
|
| 86 |
-
|
| 87 |
return ' '.join(corrected_text)
|
| 88 |
|
| 89 |
-
#
|
| 90 |
def correct_article_errors(text):
|
| 91 |
doc = nlp(text)
|
| 92 |
corrected_text = []
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
if token.text.lower() in ['a', 'an']:
|
| 96 |
next_token = token.nbor(1)
|
| 97 |
-
|
| 98 |
-
if token.text == "a" and next_word_lemma[0].lower() in vowels:
|
| 99 |
corrected_text.append("an")
|
| 100 |
-
elif token.text == "an" and
|
| 101 |
corrected_text.append("a")
|
| 102 |
else:
|
| 103 |
corrected_text.append(token.text)
|
|
@@ -105,12 +101,19 @@ def correct_article_errors(text):
|
|
| 105 |
corrected_text.append(token.text)
|
| 106 |
return ' '.join(corrected_text)
|
| 107 |
|
| 108 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
def paraphrase_with_spacy_nltk(text):
|
| 110 |
doc = nlp(text)
|
| 111 |
paraphrased_words = []
|
| 112 |
|
| 113 |
for token in doc:
|
|
|
|
| 114 |
pos = None
|
| 115 |
if token.pos_ in {"NOUN"}:
|
| 116 |
pos = wordnet.NOUN
|
|
@@ -123,12 +126,13 @@ def paraphrase_with_spacy_nltk(text):
|
|
| 123 |
|
| 124 |
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
|
| 125 |
|
| 126 |
-
# Replace with a synonym only if it makes sense
|
| 127 |
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
|
| 128 |
paraphrased_words.append(synonyms[0])
|
| 129 |
else:
|
| 130 |
paraphrased_words.append(token.text)
|
| 131 |
|
|
|
|
| 132 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
| 133 |
|
| 134 |
return paraphrased_sentence
|
|
@@ -143,8 +147,11 @@ def paraphrase_and_correct(text):
|
|
| 143 |
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
| 144 |
corrected_text = correct_singular_plural_errors(corrected_text)
|
| 145 |
|
| 146 |
-
# Step 3:
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
return final_text
|
| 150 |
|
|
@@ -167,5 +174,5 @@ with gr.Blocks() as demo:
|
|
| 167 |
# Connect the paraphrasing function to the button
|
| 168 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
| 169 |
|
| 170 |
-
# Launch the app
|
| 171 |
demo.launch()
|
|
|
|
| 5 |
import subprocess
|
| 6 |
import nltk
|
| 7 |
from nltk.corpus import wordnet
|
| 8 |
+
from gingerit.gingerit import GingerIt
|
| 9 |
+
from pattern.en import conjugate, lemma, pluralize, singularize
|
| 10 |
|
| 11 |
# Initialize the English text classification pipeline for AI detection
|
| 12 |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta")
|
|
|
|
| 53 |
|
| 54 |
return ' '.join(corrected_text)
|
| 55 |
|
| 56 |
+
# Function to correct tense errors using Pattern
|
| 57 |
def correct_tense_errors(text):
|
| 58 |
doc = nlp(text)
|
| 59 |
corrected_text = []
|
| 60 |
+
|
| 61 |
for token in doc:
|
| 62 |
if token.pos_ == "VERB":
|
| 63 |
+
# Use conjugate from Pattern to adjust the tense of the verb
|
| 64 |
+
verb_form = conjugate(lemma(token.text), tense='past') # Example: fix to past tense
|
| 65 |
+
corrected_text.append(verb_form)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
else:
|
| 67 |
corrected_text.append(token.text)
|
| 68 |
+
|
| 69 |
return ' '.join(corrected_text)
|
| 70 |
|
| 71 |
+
# Function to correct singular/plural errors using Pattern
|
| 72 |
def correct_singular_plural_errors(text):
|
| 73 |
doc = nlp(text)
|
| 74 |
corrected_text = []
|
| 75 |
+
|
| 76 |
for token in doc:
|
| 77 |
if token.pos_ == "NOUN":
|
| 78 |
+
if token.tag_ == "NN": # Singular noun
|
| 79 |
+
corrected_text.append(singularize(token.text))
|
| 80 |
+
elif token.tag_ == "NNS": # Plural noun
|
| 81 |
+
corrected_text.append(pluralize(token.text))
|
|
|
|
|
|
|
| 82 |
else:
|
| 83 |
corrected_text.append(token.text)
|
| 84 |
+
|
| 85 |
return ' '.join(corrected_text)
|
| 86 |
|
| 87 |
+
# Function to check and correct article errors
|
| 88 |
def correct_article_errors(text):
|
| 89 |
doc = nlp(text)
|
| 90 |
corrected_text = []
|
| 91 |
+
for token in doc:
|
| 92 |
+
if token.text in ['a', 'an']:
|
|
|
|
| 93 |
next_token = token.nbor(1)
|
| 94 |
+
if token.text == "a" and next_token.text[0].lower() in "aeiou":
|
|
|
|
| 95 |
corrected_text.append("an")
|
| 96 |
+
elif token.text == "an" and next_token.text[0].lower() not in "aeiou":
|
| 97 |
corrected_text.append("a")
|
| 98 |
else:
|
| 99 |
corrected_text.append(token.text)
|
|
|
|
| 101 |
corrected_text.append(token.text)
|
| 102 |
return ' '.join(corrected_text)
|
| 103 |
|
| 104 |
+
# Function to correct overall grammar using GingerIt
|
| 105 |
+
def correct_grammar(text):
|
| 106 |
+
parser = GingerIt()
|
| 107 |
+
result = parser.parse(text)
|
| 108 |
+
return result['result']
|
| 109 |
+
|
| 110 |
+
# Paraphrasing function using SpaCy and NLTK (Humanifier)
|
| 111 |
def paraphrase_with_spacy_nltk(text):
|
| 112 |
doc = nlp(text)
|
| 113 |
paraphrased_words = []
|
| 114 |
|
| 115 |
for token in doc:
|
| 116 |
+
# Map SpaCy POS tags to WordNet POS tags
|
| 117 |
pos = None
|
| 118 |
if token.pos_ in {"NOUN"}:
|
| 119 |
pos = wordnet.NOUN
|
|
|
|
| 126 |
|
| 127 |
synonyms = get_synonyms_nltk(token.text.lower(), pos) if pos else []
|
| 128 |
|
| 129 |
+
# Replace with a synonym only if it makes sense
|
| 130 |
if synonyms and token.pos_ in {"NOUN", "VERB", "ADJ", "ADV"} and synonyms[0] != token.text.lower():
|
| 131 |
paraphrased_words.append(synonyms[0])
|
| 132 |
else:
|
| 133 |
paraphrased_words.append(token.text)
|
| 134 |
|
| 135 |
+
# Join the words back into a sentence
|
| 136 |
paraphrased_sentence = ' '.join(paraphrased_words)
|
| 137 |
|
| 138 |
return paraphrased_sentence
|
|
|
|
| 147 |
corrected_text = capitalize_sentences_and_nouns(corrected_text)
|
| 148 |
corrected_text = correct_singular_plural_errors(corrected_text)
|
| 149 |
|
| 150 |
+
# Step 3: Correct tense errors
|
| 151 |
+
corrected_text = correct_tense_errors(corrected_text)
|
| 152 |
+
|
| 153 |
+
# Step 4: Correct overall grammar using GingerIt
|
| 154 |
+
final_text = correct_grammar(corrected_text)
|
| 155 |
|
| 156 |
return final_text
|
| 157 |
|
|
|
|
| 174 |
# Connect the paraphrasing function to the button
|
| 175 |
paraphrase_button.click(paraphrase_and_correct, inputs=text_input, outputs=output_text)
|
| 176 |
|
| 177 |
+
# Launch the app
|
| 178 |
demo.launch()
|