Spaces:
Running
Running
File size: 1,886 Bytes
6c0101a 4af09ee 6c0101a 1e87b56 6c0101a 1e87b56 6c0101a 1e87b56 6c0101a 1e87b56 4af09ee 1e87b56 4af09ee 1e87b56 4af09ee 1e87b56 4af09ee 1e87b56 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
import math
app = FastAPI(title="MODERATION_API_V2")
# 初始化模型
# Model A: 多语言通用毒性检测
pipe_a = pipeline("text-classification", model="textdetox/bert-multilingual-toxicity-classifier")
# Model B: 中文专门化攻击性检测
pipe_b = pipeline("text-classification", model="thu-coai/roberta-base-cold")
class CheckRequest(BaseModel):
text: str
@app.post("/analyze")
async def analyze(request: CheckRequest):
text = request.text
if not text.strip():
return {"STATUS": "ERROR", "REASON": "EMPTY_TEXT"}
# 推理并提取风险概率 (0.0 - 1.0)
# 针对 textdetox: LABEL_1 为有害
res_a = pipe_a(text)[0]
risk_a = res_a['score'] if res_a['label'] == 'LABEL_1' else 1 - res_a['score']
# 针对 thu-coai: LABEL_1 为攻击性
res_b = pipe_b(text)[0]
risk_b = res_b['score'] if res_b['label'] == 'LABEL_1' else 1 - res_b['score']
# 综合风险评分逻辑 (加权计算)
# 权重分配:Max_Score(70%) + Avg_Score(30%)
combined_risk = (max(risk_a, risk_b) * 0.7) + (((risk_a + risk_b) / 2) * 0.3)
# 计算数字风险等级 (1-5 级)
# Level 1: [0.0-0.2] SAFE
# Level 5: [0.8-1.0] BLOCKED
risk_level = math.ceil(combined_risk * 5)
risk_level = max(1, min(5, risk_level))
# 状态映射
if risk_level >= 4:
status = "BLOCKED"
elif risk_level == 3:
status = "REVIEW"
else:
status = "PASSED"
return {
"TEXT": text,
"STATUS": status,
"RISK_LEVEL": risk_level,
"CONFIDENCE_SCORE": round(combined_risk, 4),
"RAW_DATA": {
"GENERAL_MODEL": round(risk_a, 4),
"SPECIALIZED_MODEL": round(risk_b, 4)
}
}
@app.get("/health")
async def health():
return {"STATUS": "UP"}
|