File size: 5,515 Bytes
ae4e2a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
"""
Response Schemas
================
DTOs for API responses
"""

from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from enum import Enum


class SentimentLabel(str, Enum):
    """Sentiment labels"""
    TOXIC = "toxic"
    CLEAN = "clean"


class WordScore(BaseModel):
    """Word-level score information"""
    
    word: str = Field(..., description="The word")
    score: float = Field(..., ge=0.0, le=1.0, description="Toxicity score (0-1)")
    position: Dict[str, int] = Field(..., description="Position in text {start, end}")
    is_toxic: bool = Field(..., description="Whether word is toxic")
    is_stop_word: bool = Field(..., description="Whether word is a stop word")


class SentenceResult(BaseModel):
    """Sentence-level analysis result"""
    
    sentence_number: int = Field(..., description="Sentence index (1-based)")
    text: str = Field(..., description="Sentence text")
    label: SentimentLabel = Field(..., description="Toxic or clean")
    confidence: float = Field(..., ge=0.0, le=1.0, description="Confidence score")
    threshold: float = Field(..., ge=0.0, le=1.0, description="Threshold used")
    word_count: int = Field(..., description="Number of words")
    word_scores: Optional[List[WordScore]] = Field(None, description="Word-level scores")


class ToxicWordSummary(BaseModel):
    """Summary of toxic words"""
    
    word: str = Field(..., description="Toxic word")
    score: float = Field(..., ge=0.0, le=1.0, description="Maximum score")
    occurrences: int = Field(..., description="Number of occurrences")
    sentences: List[int] = Field(..., description="Sentence numbers containing this word")


class Statistics(BaseModel):
    """Overall statistics"""
    
    total_words: int = Field(..., description="Total number of words")
    toxic_words: int = Field(..., description="Number of toxic words")
    mean_score: float = Field(..., ge=0.0, le=1.0, description="Mean toxicity score")
    median_score: float = Field(..., ge=0.0, le=1.0, description="Median toxicity score")
    max_score: float = Field(..., ge=0.0, le=1.0, description="Maximum toxicity score")
    min_score: float = Field(..., ge=0.0, le=1.0, description="Minimum toxicity score")


class AnalysisResponse(BaseModel):
    """Complete analysis response"""
    
    success: bool = Field(True, description="Whether analysis succeeded")
    text: str = Field(..., description="Original input text")
    overall_label: SentimentLabel = Field(..., description="Overall text sentiment")
    toxic_sentence_count: int = Field(..., description="Number of toxic sentences")
    clean_sentence_count: int = Field(..., description="Number of clean sentences")
    total_sentences: int = Field(..., description="Total number of sentences")
    sentences: List[SentenceResult] = Field(..., description="Sentence-level results")
    toxic_words_summary: List[ToxicWordSummary] = Field(..., description="Summary of toxic words")
    statistics: Statistics = Field(..., description="Overall statistics")
    html_highlighted: Optional[str] = Field(None, description="HTML with highlighting")
    
    class Config:
        json_schema_extra = {
            "example": {
                "success": True,
                "text": "Đồ ngu ngốc!",
                "overall_label": "toxic",
                "toxic_sentence_count": 1,
                "clean_sentence_count": 0,
                "total_sentences": 1,
                "sentences": [
                    {
                        "sentence_number": 1,
                        "text": "Đồ ngu ngốc!",
                        "label": "toxic",
                        "confidence": 0.998,
                        "threshold": 0.62,
                        "word_count": 3,
                        "word_scores": [
                            {
                                "word": "Đồ",
                                "score": 0.902,
                                "position": {"start": 0, "end": 2},
                                "is_toxic": True,
                                "is_stop_word": False
                            }
                        ]
                    }
                ],
                "toxic_words_summary": [
                    {
                        "word": "ngu",
                        "score": 0.924,
                        "occurrences": 1,
                        "sentences": [1]
                    }
                ],
                "statistics": {
                    "total_words": 3,
                    "toxic_words": 3,
                    "mean_score": 0.856,
                    "median_score": 0.865,
                    "max_score": 0.924,
                    "min_score": 0.756
                },
                "html_highlighted": "<div>...</div>"
            }
        }


class HealthResponse(BaseModel):
    """Health check response"""
    
    status: str = Field(..., description="Service status")
    model_loaded: bool = Field(..., description="Whether model is loaded")
    device: str = Field(..., description="Device being used (cpu/cuda)")
    model_name: str = Field(..., description="Model name")
    version: str = Field(..., description="API version")


class ErrorResponse(BaseModel):
    """Error response"""
    
    success: bool = Field(False, description="Always false for errors")
    error: str = Field(..., description="Error message")
    detail: Optional[str] = Field(None, description="Detailed error information")