DungSon commited on
Commit
21e929b
·
verified ·
1 Parent(s): 06f454b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -2
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import os
2
-
3
  # Đặt biến cache phòng khi runtime override (khớp Dockerfile)
4
  os.environ.setdefault("HF_HOME", "/data/hf")
5
  os.environ.setdefault("HF_HUB_CACHE", "/data/hf/hub")
@@ -43,7 +43,29 @@ def softmax_logs(d):
43
  ex = {k: math.exp(v - m) for k, v in d.items()}
44
  Z = sum(ex.values())
45
  return {k: ex[k]/Z for k in ex}
46
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  @app.get("/health")
48
  def health():
49
  return {"status": "ok", "device": str(device)}
@@ -61,7 +83,15 @@ def predict(item: Item):
61
  hsd_probs = softmax_logs(score_labels(enc, hsd_labels))
62
  hsd_label = max(hsd_probs, key=hsd_probs.get)
63
 
 
 
 
 
64
  return {
65
  "toxic-speech-detection": {"label": tox_label, "probs": tox_probs},
66
  "hate-speech-detection": {"label": hsd_label, "probs": hsd_probs},
 
 
 
 
67
  }
 
1
  import os
2
+ import re
3
  # Đặt biến cache phòng khi runtime override (khớp Dockerfile)
4
  os.environ.setdefault("HF_HOME", "/data/hf")
5
  os.environ.setdefault("HF_HUB_CACHE", "/data/hf/hub")
 
43
  ex = {k: math.exp(v - m) for k, v in d.items()}
44
  Z = sum(ex.values())
45
  return {k: ex[k]/Z for k in ex}
46
+ def generate_text(prompt: str, max_new_tokens: int = 64):
47
+ # Nếu model cần prefix tác vụ, thêm tại đây, ví dụ:
48
+ # prompt = f"hate-spans-detection: {prompt}"
49
+ enc = tok(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
50
+ with torch.no_grad():
51
+ out = model.generate(
52
+ **enc,
53
+ max_new_tokens=max_new_tokens,
54
+ num_beams=4,
55
+ do_sample=False,
56
+ early_stopping=True
57
+ )
58
+ return tok.decode(out[0], skip_special_tokens=True)
59
+ def extract_hate_spans(output_text: str):
60
+ # Hỗ trợ cả 2 kiểu: [hate]... [hate] hoặc [hate]...[/hate]
61
+ spans = []
62
+ # Kiểu 1: [hate]... [hate]
63
+ spans += re.findall(r"\[hate\](.*?)\[hate\]", output_text, flags=re.IGNORECASE|re.DOTALL)
64
+ # Kiểu 2: [hate]...[/hate]
65
+ spans += re.findall(r"\[hate\](.*?)\[/hate\]", output_text, flags=re.IGNORECASE|re.DOTALL)
66
+ # Làm sạch
67
+ spans = [s.strip() for s in spans if s.strip()]
68
+ return spans
69
  @app.get("/health")
70
  def health():
71
  return {"status": "ok", "device": str(device)}
 
83
  hsd_probs = softmax_logs(score_labels(enc, hsd_labels))
84
  hsd_label = max(hsd_probs, key=hsd_probs.get)
85
 
86
+ span_prompt = text
87
+ gen = generate_text(span_prompt, max_new_tokens=64)
88
+ spans = extract_hate_spans(gen)
89
+
90
  return {
91
  "toxic-speech-detection": {"label": tox_label, "probs": tox_probs},
92
  "hate-speech-detection": {"label": hsd_label, "probs": hsd_probs},
93
+ "hate-spans-detection": {
94
+ "spans": spans if spans else [],
95
+ "raw": gen # giữ nguyên đầu ra để bạn debug định dạng
96
+ }
97
  }