Spaces:

StarrySkyWorld
/

TextCheck

Running

File size: 1,886 Bytes

6c0101a
4af09ee
6c0101a
1e87b56
6c0101a
1e87b56
6c0101a
1e87b56
 
 
 
 
6c0101a
1e87b56
4af09ee
 
1e87b56
 
 
4af09ee
1e87b56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4af09ee
 
1e87b56
 
 
 
 
 
 
4af09ee
 
1e87b56

from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
import math

app = FastAPI(title="MODERATION_API_V2")

# 初始化模型
# Model A: 多语言通用毒性检测
pipe_a = pipeline("text-classification", model="textdetox/bert-multilingual-toxicity-classifier")
# Model B: 中文专门化攻击性检测
pipe_b = pipeline("text-classification", model="thu-coai/roberta-base-cold")

class CheckRequest(BaseModel):
    text: str

@app.post("/analyze")
async def analyze(request: CheckRequest):
    text = request.text
    if not text.strip():
        return {"STATUS": "ERROR", "REASON": "EMPTY_TEXT"}

    # 推理并提取风险概率 (0.0 - 1.0)
    # 针对 textdetox: LABEL_1 为有害
    res_a = pipe_a(text)[0]
    risk_a = res_a['score'] if res_a['label'] == 'LABEL_1' else 1 - res_a['score']

    # 针对 thu-coai: LABEL_1 为攻击性
    res_b = pipe_b(text)[0]
    risk_b = res_b['score'] if res_b['label'] == 'LABEL_1' else 1 - res_b['score']

    # 综合风险评分逻辑 (加权计算)
    # 权重分配：Max_Score(70%) + Avg_Score(30%)
    combined_risk = (max(risk_a, risk_b) * 0.7) + (((risk_a + risk_b) / 2) * 0.3)

    # 计算数字风险等级 (1-5 级)
    # Level 1: [0.0-0.2] SAFE
    # Level 5: [0.8-1.0] BLOCKED
    risk_level = math.ceil(combined_risk * 5)
    risk_level = max(1, min(5, risk_level))

    # 状态映射
    if risk_level >= 4:
        status = "BLOCKED"
    elif risk_level == 3:
        status = "REVIEW"
    else:
        status = "PASSED"

    return {
        "TEXT": text,
        "STATUS": status,
        "RISK_LEVEL": risk_level,
        "CONFIDENCE_SCORE": round(combined_risk, 4),
        "RAW_DATA": {
            "GENERAL_MODEL": round(risk_a, 4),
            "SPECIALIZED_MODEL": round(risk_b, 4)
        }
    }

@app.get("/health")
async def health():
    return {"STATUS": "UP"}