File size: 4,396 Bytes
ae4e2a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
HTML Generator
==============
Generate HTML highlighting (Single Responsibility)
"""

from typing import List, Dict
from app.services.text_processor import TextProcessor


class HTMLGenerator:
    """
    HTML generation service
    
    Responsibilities:
    - Generate HTML with highlighting
    - Format toxic/clean sentences differently
    """
    
    @staticmethod
    def generate_highlighted_html(
        text: str,
        sentence_results: List[Dict[str, any]]
    ) -> str:
        """
        Generate HTML with highlighting
        
        Args:
            text: Original text
            sentence_results: List of sentence analysis results
            
        Returns:
            HTML string with highlighting
        """
        html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">'
        
        last_end = 0
        
        for sent_data in sentence_results:
            sent_start = sent_data['sent_start']
            sent_end = sent_data['sent_end']
            is_toxic = sent_data['is_toxic']
            words = sent_data['words']
            scores = sent_data['scores']
            threshold = sent_data['threshold']
            
            # Add space between sentences
            if sent_start > last_end:
                html += text[last_end:sent_start]
            
            sent_text = text[sent_start:sent_end]
            
            if is_toxic:
                # Toxic sentence - highlight words
                sent_html = HTMLGenerator._generate_toxic_sentence_html(
                    sent_text, sent_start, words, scores, threshold
                )
                html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>'
            else:
                # Clean sentence - plain text
                html += f'<span style="color: #444;">{sent_text}</span>'
            
            last_end = sent_end
        
        # Add remaining text
        if last_end < len(text):
            html += text[last_end:]
        
        html += '</div>'
        return html
    
    @staticmethod
    def _generate_toxic_sentence_html(
        sent_text: str,
        sent_start: int,
        words: List[Dict[str, any]],
        scores: List[float],
        threshold: float
    ) -> str:
        """
        Generate HTML for toxic sentence
        
        Args:
            sent_text: Sentence text
            sent_start: Sentence start position in full text
            words: List of words
            scores: Word scores
            threshold: Toxicity threshold
            
        Returns:
            HTML string for sentence
        """
        sent_html = ""
        char_idx = 0
        word_idx = 0
        
        while char_idx < len(sent_text):
            if word_idx < len(words):
                word_info = words[word_idx]
                word_start_rel = word_info['start'] - sent_start
                word_end_rel = word_info['end'] - sent_start
                
                if char_idx == word_start_rel:
                    word = word_info['word']
                    score = scores[word_idx]
                    
                    if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1:
                        # Toxic word - red background
                        color = int(255 * (1 - score))
                        sent_html += (
                            f'<span style="background-color: rgb(255, {color}, {color}); '
                            f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; '
                            f'font-weight: bold;">{word}</span>'
                        )
                    else:
                        # Non-toxic word
                        if TextProcessor.is_stop_word(word):
                            sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>'
                        else:
                            sent_html += f'<span style="color: #333;">{word}</span>'
                    
                    char_idx = word_end_rel
                    word_idx += 1
                    continue
            
            # Not at word - add character (punctuation, space, etc)
            sent_html += sent_text[char_idx]
            char_idx += 1
        
        return sent_html