Spaces:

HoangDaoAI
/

toxic-api

Sleeping

File size: 5,515 Bytes

ae4e2a6

"""
Response Schemas
================
DTOs for API responses
"""

from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from enum import Enum


class SentimentLabel(str, Enum):
    """Sentiment labels"""
    TOXIC = "toxic"
    CLEAN = "clean"


class WordScore(BaseModel):
    """Word-level score information"""
    
    word: str = Field(..., description="The word")
    score: float = Field(..., ge=0.0, le=1.0, description="Toxicity score (0-1)")
    position: Dict[str, int] = Field(..., description="Position in text {start, end}")
    is_toxic: bool = Field(..., description="Whether word is toxic")
    is_stop_word: bool = Field(..., description="Whether word is a stop word")


class SentenceResult(BaseModel):
    """Sentence-level analysis result"""
    
    sentence_number: int = Field(..., description="Sentence index (1-based)")
    text: str = Field(..., description="Sentence text")
    label: SentimentLabel = Field(..., description="Toxic or clean")
    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
    threshold: float = Field(..., ge=0.0, le=1.0, description="Threshold used")
    word_count: int = Field(..., description="Number of words")
    word_scores: Optional[List[WordScore]] = Field(None, description="Word-level scores")


class ToxicWordSummary(BaseModel):
    """Summary of toxic words"""
    
    word: str = Field(..., description="Toxic word")
    score: float = Field(..., ge=0.0, le=1.0, description="Maximum score")
    occurrences: int = Field(..., description="Number of occurrences")
    sentences: List[int] = Field(..., description="Sentence numbers containing this word")


class Statistics(BaseModel):
    """Overall statistics"""
    
    total_words: int = Field(..., description="Total number of words")
    toxic_words: int = Field(..., description="Number of toxic words")
    mean_score: float = Field(..., ge=0.0, le=1.0, description="Mean toxicity score")
    median_score: float = Field(..., ge=0.0, le=1.0, description="Median toxicity score")
    max_score: float = Field(..., ge=0.0, le=1.0, description="Maximum toxicity score")
    min_score: float = Field(..., ge=0.0, le=1.0, description="Minimum toxicity score")


class AnalysisResponse(BaseModel):
    """Complete analysis response"""
    
    success: bool = Field(True, description="Whether analysis succeeded")
    text: str = Field(..., description="Original input text")
    overall_label: SentimentLabel = Field(..., description="Overall text sentiment")
    toxic_sentence_count: int = Field(..., description="Number of toxic sentences")
    clean_sentence_count: int = Field(..., description="Number of clean sentences")
    total_sentences: int = Field(..., description="Total number of sentences")
    sentences: List[SentenceResult] = Field(..., description="Sentence-level results")
    toxic_words_summary: List[ToxicWordSummary] = Field(..., description="Summary of toxic words")
    statistics: Statistics = Field(..., description="Overall statistics")
    html_highlighted: Optional[str] = Field(None, description="HTML with highlighting")
    
    class Config:
        json_schema_extra = {
            "example": {
                "success": True,
                "text": "Đồ ngu ngốc!",
                "overall_label": "toxic",
                "toxic_sentence_count": 1,
                "clean_sentence_count": 0,
                "total_sentences": 1,
                "sentences": [
                    {
                        "sentence_number": 1,
                        "text": "Đồ ngu ngốc!",
                        "label": "toxic",
                        "confidence": 0.998,
                        "threshold": 0.62,
                        "word_count": 3,
                        "word_scores": [
                            {
                                "word": "Đồ",
                                "score": 0.902,
                                "position": {"start": 0, "end": 2},
                                "is_toxic": True,
                                "is_stop_word": False
                            }
                        ]
                    }
                ],
                "toxic_words_summary": [
                    {
                        "word": "ngu",
                        "score": 0.924,
                        "occurrences": 1,
                        "sentences": [1]
                    }
                ],
                "statistics": {
                    "total_words": 3,
                    "toxic_words": 3,
                    "mean_score": 0.856,
                    "median_score": 0.865,
                    "max_score": 0.924,
                    "min_score": 0.756
                },
                "html_highlighted": "<div>...</div>"
            }
        }


class HealthResponse(BaseModel):
    """Health check response"""
    
    status: str = Field(..., description="Service status")
    model_loaded: bool = Field(..., description="Whether model is loaded")
    device: str = Field(..., description="Device being used (cpu/cuda)")
    model_name: str = Field(..., description="Model name")
    version: str = Field(..., description="API version")


class ErrorResponse(BaseModel):
    """Error response"""
    
    success: bool = Field(False, description="Always false for errors")
    error: str = Field(..., description="Error message")
    detail: Optional[str] = Field(None, description="Detailed error information")