toxic-api / app /services /html_generator.py
handrix
Initial deployment - Toxic Detection API
ae4e2a6
"""
HTML Generator
==============
Generate HTML highlighting (Single Responsibility)
"""
from typing import List, Dict
from app.services.text_processor import TextProcessor
class HTMLGenerator:
"""
HTML generation service
Responsibilities:
- Generate HTML with highlighting
- Format toxic/clean sentences differently
"""
@staticmethod
def generate_highlighted_html(
text: str,
sentence_results: List[Dict[str, any]]
) -> str:
"""
Generate HTML with highlighting
Args:
text: Original text
sentence_results: List of sentence analysis results
Returns:
HTML string with highlighting
"""
html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">'
last_end = 0
for sent_data in sentence_results:
sent_start = sent_data['sent_start']
sent_end = sent_data['sent_end']
is_toxic = sent_data['is_toxic']
words = sent_data['words']
scores = sent_data['scores']
threshold = sent_data['threshold']
# Add space between sentences
if sent_start > last_end:
html += text[last_end:sent_start]
sent_text = text[sent_start:sent_end]
if is_toxic:
# Toxic sentence - highlight words
sent_html = HTMLGenerator._generate_toxic_sentence_html(
sent_text, sent_start, words, scores, threshold
)
html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>'
else:
# Clean sentence - plain text
html += f'<span style="color: #444;">{sent_text}</span>'
last_end = sent_end
# Add remaining text
if last_end < len(text):
html += text[last_end:]
html += '</div>'
return html
@staticmethod
def _generate_toxic_sentence_html(
sent_text: str,
sent_start: int,
words: List[Dict[str, any]],
scores: List[float],
threshold: float
) -> str:
"""
Generate HTML for toxic sentence
Args:
sent_text: Sentence text
sent_start: Sentence start position in full text
words: List of words
scores: Word scores
threshold: Toxicity threshold
Returns:
HTML string for sentence
"""
sent_html = ""
char_idx = 0
word_idx = 0
while char_idx < len(sent_text):
if word_idx < len(words):
word_info = words[word_idx]
word_start_rel = word_info['start'] - sent_start
word_end_rel = word_info['end'] - sent_start
if char_idx == word_start_rel:
word = word_info['word']
score = scores[word_idx]
if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1:
# Toxic word - red background
color = int(255 * (1 - score))
sent_html += (
f'<span style="background-color: rgb(255, {color}, {color}); '
f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; '
f'font-weight: bold;">{word}</span>'
)
else:
# Non-toxic word
if TextProcessor.is_stop_word(word):
sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>'
else:
sent_html += f'<span style="color: #333;">{word}</span>'
char_idx = word_end_rel
word_idx += 1
continue
# Not at word - add character (punctuation, space, etc)
sent_html += sent_text[char_idx]
char_idx += 1
return sent_html