ai-assist-sh commited on
Commit
8547a14
·
verified ·
1 Parent(s): d4166be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -5
app.py CHANGED
@@ -1,8 +1,68 @@
1
- import gradio as gr, sys
2
- def ping(name):
3
- import gradio as g
4
- return f"Hello, {name or 'world'}! (gradio {g.__version__}, py {sys.version.split()[0]})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- demo = gr.Interface(ping, "text", "text", title="Ping")
7
  if __name__ == "__main__":
8
  demo.launch()
 
 
1
+ import os, re
2
+ import gradio as gr
3
+
4
+ os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
5
+
6
+ URL_MODEL_ID = "CrabInHoney/urlbert-tiny-v4-malicious-url-classifier"
7
+ URL_LABEL_MAP = {"LABEL_0":"benign","LABEL_1":"defacement","LABEL_2":"malware","LABEL_3":"phishing"}
8
+ URL_RE = re.compile(r"""(?xi)\b(?:https?://|www\.)[a-z0-9\-._~%]+(?:/[^\s<>"']*)?""")
9
+
10
+ _pipe = None # created on first analyze()
11
+
12
+ def _extract_urls(t: str):
13
+ return sorted(set(m.group(0) for m in URL_RE.finditer(t or "")))
14
+
15
+ def _pretty(raw, id2label):
16
+ if id2label:
17
+ if raw in id2label: return id2label[raw]
18
+ k = raw.replace("LABEL_","")
19
+ if k in id2label: return id2label[k]
20
+ return URL_LABEL_MAP.get(raw, raw)
21
+
22
+ def analyze(text: str):
23
+ text = (text or "").strip()
24
+ if not text:
25
+ return "Paste an email body or a URL.", "", "", []
26
+ urls = [text] if (text.lower().startswith(("http://","https://","www.")) and " " not in text) else _extract_urls(text)
27
+ if not urls:
28
+ return "No URLs detected in the text.", "", "", []
29
+
30
+ global _pipe
31
+ if _pipe is None:
32
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
33
+ tok = AutoTokenizer.from_pretrained(URL_MODEL_ID)
34
+ mdl = AutoModelForSequenceClassification.from_pretrained(URL_MODEL_ID)
35
+ _pipe = pipeline("text-classification", model=mdl, tokenizer=tok, device=-1, top_k=None)
36
+
37
+ id2label = getattr(_pipe.model.config, "id2label", None)
38
+ rows, unsafe, top_label, top_conf = [], False, "", ""
39
+ for i, u in enumerate(urls, 1):
40
+ scores = sorted(_pipe(u)[0], key=lambda s: s["score"], reverse=True)
41
+ top = scores[0]
42
+ lbl = _pretty(top["label"], id2label)
43
+ conf = round(100*float(top["score"]), 2)
44
+ rows.append([u, lbl, conf])
45
+ if i == 1:
46
+ top_label, top_conf = lbl, f"{conf:.2f}%"
47
+ if lbl.lower() in {"phishing","malware","defacement"}:
48
+ unsafe = True
49
+ verdict = "🔴 UNSAFE (links flagged)" if unsafe else "🟢 SAFE (all links benign)"
50
+ return verdict, top_label, top_conf, rows
51
+
52
+ demo = gr.Interface(
53
+ fn=analyze,
54
+ inputs=gr.Textbox(lines=6, label="Email or URL"),
55
+ outputs=[
56
+ gr.Markdown(label="Verdict"),
57
+ gr.Textbox(label="Prediction", interactive=False),
58
+ gr.Textbox(label="Confidence", interactive=False),
59
+ gr.Dataframe(headers=["URL","Prediction","Confidence (%)"], datatype=["str","str","number"],
60
+ row_count=(0,"dynamic"), col_count=(3,"fixed"), interactive=False, label="Per-link results")
61
+ ],
62
+ title="🛡️ Phishing Detector (via Link Analysis)",
63
+ description="We extract links and classify each with a compact malicious-URL model."
64
+ )
65
 
 
66
  if __name__ == "__main__":
67
  demo.launch()
68
+