Spaces:

HoangDaoAI
/

toxic-api

Sleeping

toxic-api / app /services /html_generator.py

handrix

Initial deployment - Toxic Detection API

ae4e2a6 12 days ago

4.4 kB

	"""
	HTML Generator
	==============
	Generate HTML highlighting (Single Responsibility)
	"""

	from typing import List, Dict
	from app.services.text_processor import TextProcessor


	class HTMLGenerator:
	"""
	HTML generation service

	Responsibilities:
	- Generate HTML with highlighting
	- Format toxic/clean sentences differently
	"""

	@staticmethod
	def generate_highlighted_html(
	text: str,
	sentence_results: List[Dict[str, any]]
	) -> str:
	"""
	Generate HTML with highlighting

	Args:
	text: Original text
	sentence_results: List of sentence analysis results

	Returns:
	HTML string with highlighting
	"""
	html = '<div style="line-height: 2.2; font-size: 16px; font-family: Arial; max-width: 900px;">'

	last_end = 0

	for sent_data in sentence_results:
	sent_start = sent_data['sent_start']
	sent_end = sent_data['sent_end']
	is_toxic = sent_data['is_toxic']
	words = sent_data['words']
	scores = sent_data['scores']
	threshold = sent_data['threshold']

	# Add space between sentences
	if sent_start > last_end:
	html += text[last_end:sent_start]

	sent_text = text[sent_start:sent_end]

	if is_toxic:
	# Toxic sentence - highlight words
	sent_html = HTMLGenerator._generate_toxic_sentence_html(
	sent_text, sent_start, words, scores, threshold
	)
	html += f'<span style="border-left: 3px solid #ff6b6b; padding-left: 8px; display: inline-block; margin: 4px 0;">{sent_html}</span>'
	else:
	# Clean sentence - plain text
	html += f'<span style="color: #444;">{sent_text}</span>'

	last_end = sent_end

	# Add remaining text
	if last_end < len(text):
	html += text[last_end:]

	html += '</div>'
	return html

	@staticmethod
	def _generate_toxic_sentence_html(
	sent_text: str,
	sent_start: int,
	words: List[Dict[str, any]],
	scores: List[float],
	threshold: float
	) -> str:
	"""
	Generate HTML for toxic sentence

	Args:
	sent_text: Sentence text
	sent_start: Sentence start position in full text
	words: List of words
	scores: Word scores
	threshold: Toxicity threshold

	Returns:
	HTML string for sentence
	"""
	sent_html = ""
	char_idx = 0
	word_idx = 0

	while char_idx < len(sent_text):
	if word_idx < len(words):
	word_info = words[word_idx]
	word_start_rel = word_info['start'] - sent_start
	word_end_rel = word_info['end'] - sent_start

	if char_idx == word_start_rel:
	word = word_info['word']
	score = scores[word_idx]

	if score > threshold and not TextProcessor.is_stop_word(word) and len(word) > 1:
	# Toxic word - red background
	color = int(255 * (1 - score))
	sent_html += (
	f'<span style="background-color: rgb(255, {color}, {color}); '
	f'padding: 2px 4px; margin: 0 1px; border-radius: 3px; '
	f'font-weight: bold;">{word}</span>'
	)
	else:
	# Non-toxic word
	if TextProcessor.is_stop_word(word):
	sent_html += f'<span style="color: #aaa; font-style: italic;">{word}</span>'
	else:
	sent_html += f'<span style="color: #333;">{word}</span>'

	char_idx = word_end_rel
	word_idx += 1
	continue

	# Not at word - add character (punctuation, space, etc)
	sent_html += sent_text[char_idx]
	char_idx += 1

	return sent_html