Spaces:

HoangDaoAI
/

toxic-api

Running

File size: 4,396 Bytes

ae4e2a6

"""
HTML Generator
==============
Generate HTML highlighting (Single Responsibility)
"""

from typing import List, Dict
from app.services.text_processor import TextProcessor


class HTMLGenerator:
    """
    HTML generation service
    
    Responsibilities:
    - Generate HTML with highlighting
    - Format toxic/clean sentences differently
    """
    
    @staticmethod
    def generate_highlighted_html(
        text: str,
        sentence_results: List[Dict[str, any]]
    ) -> str:
        """
        Generate HTML with highlighting
        
        Args:
            text: Original text
            sentence_results: List of sentence analysis results
            
        Returns:
            HTML string with highlighting
        """
        html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">'
        
        last_end = 0
        
        for sent_data in sentence_results:
            sent_start = sent_data['sent_start']
            sent_end = sent_data['sent_end']
            is_toxic = sent_data['is_toxic']
            words = sent_data['words']
            scores = sent_data['scores']
            threshold = sent_data['threshold']
            
            # Add space between sentences
            if sent_start > last_end:
                html += text[last_end:sent_start]
            
            sent_text = text[sent_start:sent_end]
            
            if is_toxic:
                # Toxic sentence - highlight words
                sent_html = HTMLGenerator._generate_toxic_sentence_html(
                    sent_text, sent_start, words, scores, threshold
                )
                html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>'
            else:
                # Clean sentence - plain text
                html += f'<span style="color: #444;">{sent_text}</span>'
            
            last_end = sent_end
        
        # Add remaining text
        if last_end < len(text):
            html += text[last_end:]
        
        html += '</div>'
        return html
    
    @staticmethod
    def _generate_toxic_sentence_html(
        sent_text: str,
        sent_start: int,
        words: List[Dict[str, any]],
        scores: List[float],
        threshold: float
    ) -> str:
        """
        Generate HTML for toxic sentence
        
        Args:
            sent_text: Sentence text
            sent_start: Sentence start position in full text
            words: List of words
            scores: Word scores
            threshold: Toxicity threshold
            
        Returns:
            HTML string for sentence
        """
        sent_html = ""
        char_idx = 0
        word_idx = 0
        
        while char_idx < len(sent_text):
            if word_idx < len(words):
                word_info = words[word_idx]
                word_start_rel = word_info['start'] - sent_start
                word_end_rel = word_info['end'] - sent_start
                
                if char_idx == word_start_rel:
                    word = word_info['word']
                    score = scores[word_idx]
                    
                    if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1:
                        # Toxic word - red background
                        color = int(255 * (1 - score))
                        sent_html += (
                            f'<span style="background-color: rgb(255, {color}, {color}); '
                            f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; '
                            f'font-weight: bold;">{word}</span>'
                        )
                    else:
                        # Non-toxic word
                        if TextProcessor.is_stop_word(word):
                            sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>'
                        else:
                            sent_html += f'<span style="color: #333;">{word}</span>'
                    
                    char_idx = word_end_rel
                    word_idx += 1
                    continue
            
            # Not at word - add character (punctuation, space, etc)
            sent_html += sent_text[char_idx]
            char_idx += 1
        
        return sent_html