Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,81 +1,84 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
-
from
|
| 4 |
-
import nltk
|
| 5 |
-
from nltk.tokenize import sent_tokenize
|
| 6 |
import re
|
| 7 |
|
| 8 |
-
|
|
|
|
|
|
|
| 9 |
|
| 10 |
-
# Load grammar correction
|
| 11 |
grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
|
| 12 |
-
toxicity_classifier = pipeline("text-classification", model="unitary/toxic-bert")
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
article.parse()
|
| 21 |
-
return article.text
|
| 22 |
-
return text_input
|
| 23 |
-
|
| 24 |
-
def check_grammar(text):
|
| 25 |
-
result = grammar_corrector(text, max_length=512, do_sample=False)
|
| 26 |
-
return result[0]['generated_text']
|
| 27 |
|
| 28 |
def detect_sensitive_content(text):
|
|
|
|
| 29 |
sentences = sent_tokenize(text)
|
| 30 |
-
|
| 31 |
-
for
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
highlighted = highlighted.replace(sent, f"<span style='background-color: red'>{sent}</span>")
|
| 42 |
-
diff_words = [(o, c) for o, c in zip(original.split(), corrected.split()) if o != c]
|
| 43 |
-
for o, c in diff_words:
|
| 44 |
-
highlighted = highlighted.replace(c, f"<span style='background-color: yellow'>{c}</span>")
|
| 45 |
-
return highlighted
|
| 46 |
-
|
| 47 |
-
def review_blog(input_type, text_input, url_input):
|
| 48 |
-
if not text_input and not url_input:
|
| 49 |
-
return "Please provide input text or a URL.", ""
|
| 50 |
-
raw_text = extract_text(input_type, text_input, url_input)
|
| 51 |
-
corrected = check_grammar(raw_text)
|
| 52 |
-
sensitive = detect_sensitive_content(corrected)
|
| 53 |
-
highlighted = highlight_text(raw_text, corrected, sensitive)
|
| 54 |
-
return highlighted, corrected
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
- <span style='background-color: yellow'>**Yellow:** Grammar corrections</span><br>
|
| 61 |
-
- <span style='background-color: red'>**Red:** Sensitive or toxic content</span>""", elem_id="legend")
|
| 62 |
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
text_input: gr.update(visible=choice == "Text"),
|
| 70 |
-
url_input: gr.update(visible=choice == "URL")
|
| 71 |
-
}
|
| 72 |
|
| 73 |
-
|
|
|
|
| 74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
review_btn = gr.Button("Review Blog")
|
| 76 |
-
html_output = gr.HTML(label="Highlighted Output")
|
| 77 |
-
final_output = gr.Textbox(label="Corrected Blog", lines=10)
|
| 78 |
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from transformers import pipeline
|
| 3 |
+
from nltk import download, sent_tokenize
|
|
|
|
|
|
|
| 4 |
import re
|
| 5 |
|
| 6 |
+
# Download necessary NLTK models
|
| 7 |
+
download('punkt')
|
| 8 |
+
download('punkt_tab') # Fixes the recent error with PunktTokenizer
|
| 9 |
|
| 10 |
+
# Load the grammar correction model (T5 based)
|
| 11 |
grammar_corrector = pipeline("text2text-generation", model="vennify/t5-base-grammar-correction")
|
|
|
|
| 12 |
|
| 13 |
+
# Define sensitive/toxic keyword patterns (you can extend this list)
|
| 14 |
+
sensitive_keywords = [
|
| 15 |
+
r"\bhate\b", r"\bstupid\b", r"\bidiot\b", r"\btrash\b", r"\bkill\b",
|
| 16 |
+
r"\bnot allowed\b", r"\bnobody cares\b", r"\bterrorist\b", r"\bgo back\b",
|
| 17 |
+
r"\bimmigrants\b", r"\bslur\b", r"\bdisgusting\b"
|
| 18 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
def detect_sensitive_content(text):
|
| 21 |
+
# Tokenize into sentences
|
| 22 |
sentences = sent_tokenize(text)
|
| 23 |
+
highlighted = []
|
| 24 |
+
for sentence in sentences:
|
| 25 |
+
flagged = False
|
| 26 |
+
for pattern in sensitive_keywords:
|
| 27 |
+
if re.search(pattern, sentence, re.IGNORECASE):
|
| 28 |
+
flagged = True
|
| 29 |
+
break
|
| 30 |
+
if flagged:
|
| 31 |
+
sentence = f"<span style='color: red'>{sentence}</span>"
|
| 32 |
+
highlighted.append(sentence)
|
| 33 |
+
return " ".join(highlighted)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
def highlight_grammar(original, corrected):
|
| 36 |
+
original_words = original.split()
|
| 37 |
+
corrected_words = corrected.split()
|
| 38 |
+
highlighted = []
|
|
|
|
|
|
|
| 39 |
|
| 40 |
+
for orig, corr in zip(original_words, corrected_words):
|
| 41 |
+
if orig != corr:
|
| 42 |
+
highlighted.append(f"<span style='color: yellow'>{corr}</span>")
|
| 43 |
+
else:
|
| 44 |
+
highlighted.append(corr)
|
| 45 |
+
|
| 46 |
+
# Add remaining corrected words
|
| 47 |
+
if len(corrected_words) > len(original_words):
|
| 48 |
+
for word in corrected_words[len(original_words):]:
|
| 49 |
+
highlighted.append(f"<span style='color: yellow'>{word}</span>")
|
| 50 |
+
|
| 51 |
+
return " ".join(highlighted)
|
| 52 |
+
|
| 53 |
+
def review_blog(input_text, input_type):
|
| 54 |
+
if input_type == "URL":
|
| 55 |
+
return "URL support is under development."
|
| 56 |
|
| 57 |
+
# Step 1: Grammar correction
|
| 58 |
+
corrected_output = grammar_corrector(input_text, max_length=512, do_sample=False)[0]["generated_text"]
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
# Step 2: Highlight grammar issues
|
| 61 |
+
grammar_highlighted = highlight_grammar(input_text, corrected_output)
|
| 62 |
|
| 63 |
+
# Step 3: Highlight sensitive content
|
| 64 |
+
sensitive_highlighted = detect_sensitive_content(corrected_output)
|
| 65 |
+
|
| 66 |
+
return gr.update(value=grammar_highlighted), gr.update(value=sensitive_highlighted)
|
| 67 |
+
|
| 68 |
+
# Gradio UI
|
| 69 |
+
with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
|
| 70 |
+
gr.Markdown("📝 **AI Blog Reviewer**")
|
| 71 |
+
gr.Markdown("#### Highlights:\n- <span style='color: yellow'>Yellow</span>: Grammar corrections\n- <span style='color: red'>Red</span>: Sensitive or toxic content")
|
| 72 |
+
|
| 73 |
+
input_type = gr.Radio(["Text", "URL"], label="Input Type", value="Text")
|
| 74 |
+
blog_input = gr.Textbox(lines=8, label="Blog Text", placeholder="Paste your blog content here...")
|
| 75 |
review_btn = gr.Button("Review Blog")
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
gr.Markdown("#### 🔧 Grammar Corrections:")
|
| 78 |
+
grammar_output = gr.HTML()
|
| 79 |
+
gr.Markdown("#### 🚨 Sensitive/Toxic Content:")
|
| 80 |
+
sensitive_output = gr.HTML()
|
| 81 |
+
|
| 82 |
+
review_btn.click(review_blog, inputs=[blog_input, input_type], outputs=[grammar_output, sensitive_output])
|
| 83 |
|
| 84 |
demo.launch()
|