File size: 1,891 Bytes
f527ad4
eeb36ab
24db96b
c5c7261
24db96b
7e7b068
24db96b
7e7b068
24db96b
c5c7261
24db96b
 
 
 
 
 
 
c5c7261
24db96b
 
c5c7261
24db96b
 
 
 
 
 
7e7b068
24db96b
 
 
 
 
 
 
 
 
 
 
 
 
 
eeb36ab
24db96b
eeb36ab
24db96b
 
 
 
 
 
c5c7261
24db96b
c5c7261
24db96b
 
eeb36ab
24db96b
 
eeb36ab
24db96b
 
 
 
 
 
 
 
c5c7261
24db96b
 
c5c7261
24db96b
eeb36ab
 
24db96b
 
 
 
 
eeb36ab
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

app = FastAPI()

MODEL_ID = "Qwen/Qwen2.5-0.5B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="cpu",
    torch_dtype=torch.float32,
    low_cpu_mem_usage=True
)

class InputText(BaseModel):
    message: str

def clean_score(text: str) -> int:
    match = re.search(r"\d+", text)
    if not match:
        return 0
    score = int(match.group())
    return max(0, min(100, score))

def score_to_action(score: int):
    if score <= 20:
        return False, False, False, "normal_or_criticism"
    elif score <= 40:
        return False, True, False, "rude_language"
    elif score <= 70:
        return True, True, False, "harassment_or_hate"
    else:
        return True, True, True, "sexual_or_severe_abuse"

@app.post("/moderate")
def moderate_text(data: InputText):
    prompt = f"""
You are a strict chat moderation AI.

Score the message from 0 to 100.

Rules:
0–20: normal talk or criticism
21–40: rude language
41–60: harassment
61–80: violent or hateful
81–100: sexual, extreme abuse, threats

The message may be in English, Hindi, or Hinglish.

Message:
"{data.message}"

Return ONLY the number.
"""

    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_new_tokens=6,
            temperature=0.0,
            do_sample=False
        )

    result = tokenizer.decode(output[0], skip_special_tokens=True)
    score = clean_score(result)

    delete, warn, timeout, reason = score_to_action(score)

    return {
        "delete": delete,
        "warn": warn,
        "timeout": timeout,
        "score": score,
        "reason": reason
    }