Spaces:
Sleeping
Sleeping
| """ | |
| HTML Generator | |
| ============== | |
| Generate HTML highlighting (Single Responsibility) | |
| """ | |
| from typing import List, Dict | |
| from app.services.text_processor import TextProcessor | |
| class HTMLGenerator: | |
| """ | |
| HTML generation service | |
| Responsibilities: | |
| - Generate HTML with highlighting | |
| - Format toxic/clean sentences differently | |
| """ | |
| def generate_highlighted_html( | |
| text: str, | |
| sentence_results: List[Dict[str, any]] | |
| ) -> str: | |
| """ | |
| Generate HTML with highlighting | |
| Args: | |
| text: Original text | |
| sentence_results: List of sentence analysis results | |
| Returns: | |
| HTML string with highlighting | |
| """ | |
| html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">' | |
| last_end = 0 | |
| for sent_data in sentence_results: | |
| sent_start = sent_data['sent_start'] | |
| sent_end = sent_data['sent_end'] | |
| is_toxic = sent_data['is_toxic'] | |
| words = sent_data['words'] | |
| scores = sent_data['scores'] | |
| threshold = sent_data['threshold'] | |
| # Add space between sentences | |
| if sent_start > last_end: | |
| html += text[last_end:sent_start] | |
| sent_text = text[sent_start:sent_end] | |
| if is_toxic: | |
| # Toxic sentence - highlight words | |
| sent_html = HTMLGenerator._generate_toxic_sentence_html( | |
| sent_text, sent_start, words, scores, threshold | |
| ) | |
| html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>' | |
| else: | |
| # Clean sentence - plain text | |
| html += f'<span style="color: #444;">{sent_text}</span>' | |
| last_end = sent_end | |
| # Add remaining text | |
| if last_end < len(text): | |
| html += text[last_end:] | |
| html += '</div>' | |
| return html | |
| def _generate_toxic_sentence_html( | |
| sent_text: str, | |
| sent_start: int, | |
| words: List[Dict[str, any]], | |
| scores: List[float], | |
| threshold: float | |
| ) -> str: | |
| """ | |
| Generate HTML for toxic sentence | |
| Args: | |
| sent_text: Sentence text | |
| sent_start: Sentence start position in full text | |
| words: List of words | |
| scores: Word scores | |
| threshold: Toxicity threshold | |
| Returns: | |
| HTML string for sentence | |
| """ | |
| sent_html = "" | |
| char_idx = 0 | |
| word_idx = 0 | |
| while char_idx < len(sent_text): | |
| if word_idx < len(words): | |
| word_info = words[word_idx] | |
| word_start_rel = word_info['start'] - sent_start | |
| word_end_rel = word_info['end'] - sent_start | |
| if char_idx == word_start_rel: | |
| word = word_info['word'] | |
| score = scores[word_idx] | |
| if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1: | |
| # Toxic word - red background | |
| color = int(255 * (1 - score)) | |
| sent_html += ( | |
| f'<span style="background-color: rgb(255, {color}, {color}); ' | |
| f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; ' | |
| f'font-weight: bold;">{word}</span>' | |
| ) | |
| else: | |
| # Non-toxic word | |
| if TextProcessor.is_stop_word(word): | |
| sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>' | |
| else: | |
| sent_html += f'<span style="color: #333;">{word}</span>' | |
| char_idx = word_end_rel | |
| word_idx += 1 | |
| continue | |
| # Not at word - add character (punctuation, space, etc) | |
| sent_html += sent_text[char_idx] | |
| char_idx += 1 | |
| return sent_html | |