Amii2410 commited on
Commit
e5e31f2
·
verified ·
1 Parent(s): 5e92b10

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -15
app.py CHANGED
@@ -1,28 +1,55 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
3
 
4
- # Load the model once (global)
5
- pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
 
6
 
7
- def is_spam(input_text: str) -> dict:
 
 
 
 
 
 
 
 
 
8
  """
9
- Returns {"spam": bool, "label": str, "score": float}
10
  """
11
- result = pipe(input_text)[0]
12
- is_spam_flag = result["label"] != "LABEL_0"
 
 
 
 
 
 
 
 
 
 
 
13
  return {
14
- "spam": is_spam_flag,
15
- "label": result["label"],
16
- "score": float(result["score"])
 
 
 
 
 
17
  }
18
 
19
- # Gradio interface (for both API & UI)
20
  demo = gr.Interface(
21
- fn=is_spam,
22
- inputs=gr.Textbox(label="Enter text"),
23
- outputs=gr.JSON(label="Result"),
24
- title="Spam Detector API",
25
- description="Detect spam using Hugging Face Transformers."
26
  )
27
 
28
  if __name__ == "__main__":
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import re
4
 
5
+ # ---------- Load models once (faster) ----------
6
+ spam_pipe = pipeline("text-classification", model="Titeiiko/OTIS-Official-Spam-Model")
7
+ zero_shot = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
8
 
9
+ # ---------- Helper ----------
10
+ def is_gibberish(text: str) -> bool:
11
+ letters = len(re.findall(r"[a-zA-Z]", text))
12
+ total = len(text)
13
+ if total == 0:
14
+ return True
15
+ return letters / total < 0.6 # more than 40% non-letters → gibberish
16
+
17
+ # ---------- Core Detection ----------
18
+ def detect(text: str) -> dict:
19
  """
20
+ Returns a JSON-like dictionary with individual flags and final decision
21
  """
22
+ # Ad/spam detection
23
+ spam_result = spam_pipe(text)[0]
24
+ spam_flag = spam_result["label"] != "LABEL_0"
25
+
26
+ # Off-topic / irrelevant detection
27
+ zero_result = zero_shot(text, candidate_labels=["relevant", "irrelevant"])
28
+ # zero-shot gives scores sorted by confidence
29
+ top_label = zero_result["labels"][0]
30
+ irrelevant_flag = top_label == "irrelevant"
31
+
32
+ # Gibberish detection
33
+ gibberish_flag = is_gibberish(text)
34
+
35
  return {
36
+ "input": text,
37
+ "spam_flag": spam_flag,
38
+ "irrelevant_flag": irrelevant_flag,
39
+ "gibberish_flag": gibberish_flag,
40
+ "overall_flag": spam_flag or irrelevant_flag or gibberish_flag,
41
+ "spam_model_confidence": float(spam_result["score"]),
42
+ "zero_shot_top_label": top_label,
43
+ "zero_shot_confidence": float(zero_result["scores"][0])
44
  }
45
 
46
+ # ---------- Gradio Interface ----------
47
  demo = gr.Interface(
48
+ fn=detect,
49
+ inputs=gr.Textbox(label="Enter text to classify"),
50
+ outputs=gr.JSON(label="Detection Result"),
51
+ title="Spam / Irrelevance / Gibberish Detector",
52
+ description="Combines spam detection, off-topic classification, and gibberish check."
53
  )
54
 
55
  if __name__ == "__main__":