Spaces:
Running
Running
File size: 4,396 Bytes
ae4e2a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
"""
HTML Generator
==============
Generate HTML highlighting (Single Responsibility)
"""
from typing import List, Dict
from app.services.text_processor import TextProcessor
class HTMLGenerator:
"""
HTML generation service
Responsibilities:
- Generate HTML with highlighting
- Format toxic/clean sentences differently
"""
@staticmethod
def generate_highlighted_html(
text: str,
sentence_results: List[Dict[str, any]]
) -> str:
"""
Generate HTML with highlighting
Args:
text: Original text
sentence_results: List of sentence analysis results
Returns:
HTML string with highlighting
"""
html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">'
last_end = 0
for sent_data in sentence_results:
sent_start = sent_data['sent_start']
sent_end = sent_data['sent_end']
is_toxic = sent_data['is_toxic']
words = sent_data['words']
scores = sent_data['scores']
threshold = sent_data['threshold']
# Add space between sentences
if sent_start > last_end:
html += text[last_end:sent_start]
sent_text = text[sent_start:sent_end]
if is_toxic:
# Toxic sentence - highlight words
sent_html = HTMLGenerator._generate_toxic_sentence_html(
sent_text, sent_start, words, scores, threshold
)
html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>'
else:
# Clean sentence - plain text
html += f'<span style="color: #444;">{sent_text}</span>'
last_end = sent_end
# Add remaining text
if last_end < len(text):
html += text[last_end:]
html += '</div>'
return html
@staticmethod
def _generate_toxic_sentence_html(
sent_text: str,
sent_start: int,
words: List[Dict[str, any]],
scores: List[float],
threshold: float
) -> str:
"""
Generate HTML for toxic sentence
Args:
sent_text: Sentence text
sent_start: Sentence start position in full text
words: List of words
scores: Word scores
threshold: Toxicity threshold
Returns:
HTML string for sentence
"""
sent_html = ""
char_idx = 0
word_idx = 0
while char_idx < len(sent_text):
if word_idx < len(words):
word_info = words[word_idx]
word_start_rel = word_info['start'] - sent_start
word_end_rel = word_info['end'] - sent_start
if char_idx == word_start_rel:
word = word_info['word']
score = scores[word_idx]
if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1:
# Toxic word - red background
color = int(255 * (1 - score))
sent_html += (
f'<span style="background-color: rgb(255, {color}, {color}); '
f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; '
f'font-weight: bold;">{word}</span>'
)
else:
# Non-toxic word
if TextProcessor.is_stop_word(word):
sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>'
else:
sent_html += f'<span style="color: #333;">{word}</span>'
char_idx = word_end_rel
word_idx += 1
continue
# Not at word - add character (punctuation, space, etc)
sent_html += sent_text[char_idx]
char_idx += 1
return sent_html
|