Update app.py
Browse files
app.py
CHANGED
|
@@ -1,28 +1,55 @@
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
|
| 4 |
-
# Load
|
| 5 |
-
|
|
|
|
| 6 |
|
| 7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
"""
|
| 9 |
-
Returns
|
| 10 |
"""
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
return {
|
| 14 |
-
"
|
| 15 |
-
"
|
| 16 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
}
|
| 18 |
|
| 19 |
-
# Gradio
|
| 20 |
demo = gr.Interface(
|
| 21 |
-
fn=
|
| 22 |
-
inputs=gr.Textbox(label="Enter text"),
|
| 23 |
-
outputs=gr.JSON(label="Result"),
|
| 24 |
-
title="Spam Detector
|
| 25 |
-
description="
|
| 26 |
)
|
| 27 |
|
| 28 |
if __name__ == "__main__":
|
|
|
|
| 1 |
from transformers import pipeline
|
| 2 |
import gradio as gr
|
| 3 |
+
import re
|
| 4 |
|
| 5 |
+
# ---------- Load models once (faster) ----------
|
| 6 |
+
spam_pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
|
| 7 |
+
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 8 |
|
| 9 |
+
# ---------- Helper ----------
|
| 10 |
+
def is_gibberish(text: str) -> bool:
|
| 11 |
+
letters = len(re.findall(r"[a-zA-Z]", text))
|
| 12 |
+
total = len(text)
|
| 13 |
+
if total == 0:
|
| 14 |
+
return True
|
| 15 |
+
return letters / total < 0.6 # more than 40% non-letters → gibberish
|
| 16 |
+
|
| 17 |
+
# ---------- Core Detection ----------
|
| 18 |
+
def detect(text: str) -> dict:
|
| 19 |
"""
|
| 20 |
+
Returns a JSON-like dictionary with individual flags and final decision
|
| 21 |
"""
|
| 22 |
+
# Ad/spam detection
|
| 23 |
+
spam_result = spam_pipe(text)[0]
|
| 24 |
+
spam_flag = spam_result["label"] != "LABEL_0"
|
| 25 |
+
|
| 26 |
+
# Off-topic / irrelevant detection
|
| 27 |
+
zero_result = zero_shot(text, candidate_labels=["relevant", "irrelevant"])
|
| 28 |
+
# zero-shot gives scores sorted by confidence
|
| 29 |
+
top_label = zero_result["labels"][0]
|
| 30 |
+
irrelevant_flag = top_label == "irrelevant"
|
| 31 |
+
|
| 32 |
+
# Gibberish detection
|
| 33 |
+
gibberish_flag = is_gibberish(text)
|
| 34 |
+
|
| 35 |
return {
|
| 36 |
+
"input": text,
|
| 37 |
+
"spam_flag": spam_flag,
|
| 38 |
+
"irrelevant_flag": irrelevant_flag,
|
| 39 |
+
"gibberish_flag": gibberish_flag,
|
| 40 |
+
"overall_flag": spam_flag or irrelevant_flag or gibberish_flag,
|
| 41 |
+
"spam_model_confidence": float(spam_result["score"]),
|
| 42 |
+
"zero_shot_top_label": top_label,
|
| 43 |
+
"zero_shot_confidence": float(zero_result["scores"][0])
|
| 44 |
}
|
| 45 |
|
| 46 |
+
# ---------- Gradio Interface ----------
|
| 47 |
demo = gr.Interface(
|
| 48 |
+
fn=detect,
|
| 49 |
+
inputs=gr.Textbox(label="Enter text to classify"),
|
| 50 |
+
outputs=gr.JSON(label="Detection Result"),
|
| 51 |
+
title="Spam / Irrelevance / Gibberish Detector",
|
| 52 |
+
description="Combines spam detection, off-topic classification, and gibberish check."
|
| 53 |
)
|
| 54 |
|
| 55 |
if __name__ == "__main__":
|