Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
import os
|
| 2 |
-
|
| 3 |
# Đặt biến cache phòng khi runtime override (khớp Dockerfile)
|
| 4 |
os.environ.setdefault("HF_HOME", "/data/hf")
|
| 5 |
os.environ.setdefault("HF_HUB_CACHE", "/data/hf/hub")
|
|
@@ -43,7 +43,29 @@ def softmax_logs(d):
|
|
| 43 |
ex = {k: math.exp(v - m) for k, v in d.items()}
|
| 44 |
Z = sum(ex.values())
|
| 45 |
return {k: ex[k]/Z for k in ex}
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
@app.get("/health")
|
| 48 |
def health():
|
| 49 |
return {"status": "ok", "device": str(device)}
|
|
@@ -61,7 +83,15 @@ def predict(item: Item):
|
|
| 61 |
hsd_probs = softmax_logs(score_labels(enc, hsd_labels))
|
| 62 |
hsd_label = max(hsd_probs, key=hsd_probs.get)
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
return {
|
| 65 |
"toxic-speech-detection": {"label": tox_label, "probs": tox_probs},
|
| 66 |
"hate-speech-detection": {"label": hsd_label, "probs": hsd_probs},
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
}
|
|
|
|
| 1 |
import os
|
| 2 |
+
import re
|
| 3 |
# Đặt biến cache phòng khi runtime override (khớp Dockerfile)
|
| 4 |
os.environ.setdefault("HF_HOME", "/data/hf")
|
| 5 |
os.environ.setdefault("HF_HUB_CACHE", "/data/hf/hub")
|
|
|
|
| 43 |
ex = {k: math.exp(v - m) for k, v in d.items()}
|
| 44 |
Z = sum(ex.values())
|
| 45 |
return {k: ex[k]/Z for k in ex}
|
| 46 |
+
def generate_text(prompt: str, max_new_tokens: int = 64):
|
| 47 |
+
# Nếu model cần prefix tác vụ, thêm tại đây, ví dụ:
|
| 48 |
+
# prompt = f"hate-spans-detection: {prompt}"
|
| 49 |
+
enc = tok(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
|
| 50 |
+
with torch.no_grad():
|
| 51 |
+
out = model.generate(
|
| 52 |
+
**enc,
|
| 53 |
+
max_new_tokens=max_new_tokens,
|
| 54 |
+
num_beams=4,
|
| 55 |
+
do_sample=False,
|
| 56 |
+
early_stopping=True
|
| 57 |
+
)
|
| 58 |
+
return tok.decode(out[0], skip_special_tokens=True)
|
| 59 |
+
def extract_hate_spans(output_text: str):
|
| 60 |
+
# Hỗ trợ cả 2 kiểu: [hate]... [hate] hoặc [hate]...[/hate]
|
| 61 |
+
spans = []
|
| 62 |
+
# Kiểu 1: [hate]... [hate]
|
| 63 |
+
spans += re.findall(r"\[hate\](.*?)\[hate\]", output_text, flags=re.IGNORECASE|re.DOTALL)
|
| 64 |
+
# Kiểu 2: [hate]...[/hate]
|
| 65 |
+
spans += re.findall(r"\[hate\](.*?)\[/hate\]", output_text, flags=re.IGNORECASE|re.DOTALL)
|
| 66 |
+
# Làm sạch
|
| 67 |
+
spans = [s.strip() for s in spans if s.strip()]
|
| 68 |
+
return spans
|
| 69 |
@app.get("/health")
|
| 70 |
def health():
|
| 71 |
return {"status": "ok", "device": str(device)}
|
|
|
|
| 83 |
hsd_probs = softmax_logs(score_labels(enc, hsd_labels))
|
| 84 |
hsd_label = max(hsd_probs, key=hsd_probs.get)
|
| 85 |
|
| 86 |
+
span_prompt = text
|
| 87 |
+
gen = generate_text(span_prompt, max_new_tokens=64)
|
| 88 |
+
spans = extract_hate_spans(gen)
|
| 89 |
+
|
| 90 |
return {
|
| 91 |
"toxic-speech-detection": {"label": tox_label, "probs": tox_probs},
|
| 92 |
"hate-speech-detection": {"label": hsd_label, "probs": hsd_probs},
|
| 93 |
+
"hate-spans-detection": {
|
| 94 |
+
"spans": spans if spans else [],
|
| 95 |
+
"raw": gen # giữ nguyên đầu ra để bạn debug định dạng
|
| 96 |
+
}
|
| 97 |
}
|