Update app.py
Browse files
app.py
CHANGED
|
@@ -2,54 +2,38 @@ from transformers import pipeline
|
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
| 4 |
|
| 5 |
-
#
|
| 6 |
spam_pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
|
| 7 |
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 8 |
|
| 9 |
-
# ---------- Helper ----------
|
| 10 |
def is_gibberish(text: str) -> bool:
|
| 11 |
letters = len(re.findall(r"[a-zA-Z]", text))
|
| 12 |
-
|
| 13 |
-
if total == 0:
|
| 14 |
-
return True
|
| 15 |
-
return letters / total < 0.6 # more than 40% non-letters → gibberish
|
| 16 |
|
| 17 |
-
# ---------- Core Detection ----------
|
| 18 |
def detect(text: str) -> dict:
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
# Off-topic / irrelevant detection
|
| 27 |
-
zero_result = zero_shot(text, candidate_labels=["relevant", "irrelevant"])
|
| 28 |
-
# zero-shot gives scores sorted by confidence
|
| 29 |
-
top_label = zero_result["labels"][0]
|
| 30 |
-
irrelevant_flag = top_label == "irrelevant"
|
| 31 |
-
|
| 32 |
-
# Gibberish detection
|
| 33 |
gibberish_flag = is_gibberish(text)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
return {
|
| 36 |
"input": text,
|
| 37 |
-
"
|
| 38 |
-
"irrelevant_flag": irrelevant_flag,
|
| 39 |
-
"gibberish_flag": gibberish_flag,
|
| 40 |
-
"overall_flag": spam_flag or irrelevant_flag or gibberish_flag,
|
| 41 |
-
"spam_model_confidence": float(spam_result["score"]),
|
| 42 |
-
"zero_shot_top_label": top_label,
|
| 43 |
-
"zero_shot_confidence": float(zero_result["scores"][0])
|
| 44 |
}
|
| 45 |
|
| 46 |
-
# ---------- Gradio Interface ----------
|
| 47 |
demo = gr.Interface(
|
| 48 |
fn=detect,
|
| 49 |
-
inputs=gr.Textbox(label="Enter text
|
| 50 |
-
outputs=gr.JSON(label="
|
| 51 |
-
title="Spam
|
| 52 |
-
description="
|
| 53 |
)
|
| 54 |
|
| 55 |
if __name__ == "__main__":
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
| 4 |
|
| 5 |
+
# Load models once
|
| 6 |
spam_pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
|
| 7 |
zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
| 8 |
|
|
|
|
| 9 |
def is_gibberish(text: str) -> bool:
|
| 10 |
letters = len(re.findall(r"[a-zA-Z]", text))
|
| 11 |
+
return len(text) == 0 or (letters / len(text) < 0.6)
|
|
|
|
|
|
|
|
|
|
| 12 |
|
|
|
|
| 13 |
def detect(text: str) -> dict:
|
| 14 |
+
# Ad/spam check
|
| 15 |
+
spam_flag = spam_pipe(text)[0]["label"] != "LABEL_0"
|
| 16 |
+
# Irrelevant check
|
| 17 |
+
top = zero_shot(text, candidate_labels=["relevant", "irrelevant"])["labels"][0]
|
| 18 |
+
irrelevant_flag = top == "irrelevant"
|
| 19 |
+
# Gibberish check
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
gibberish_flag = is_gibberish(text)
|
| 21 |
|
| 22 |
+
# Final decision: if any of the three is true → spam
|
| 23 |
+
spam = spam_flag or irrelevant_flag or gibberish_flag
|
| 24 |
+
|
| 25 |
+
# Minimal output
|
| 26 |
return {
|
| 27 |
"input": text,
|
| 28 |
+
"spam": spam
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
}
|
| 30 |
|
|
|
|
| 31 |
demo = gr.Interface(
|
| 32 |
fn=detect,
|
| 33 |
+
inputs=gr.Textbox(label="Enter complaint text"),
|
| 34 |
+
outputs=gr.JSON(label="Result"),
|
| 35 |
+
title="Spam Detector",
|
| 36 |
+
description="Returns only whether the input is spam or not."
|
| 37 |
)
|
| 38 |
|
| 39 |
if __name__ == "__main__":
|