Spaces:

Y-Research-Group
/

PosterGen

Running

App Files Files Community

PosterGen / src /agents /font_agent.py

Hadlay

First commit

46a8a46 6 months ago

raw

history blame contribute delete

15 kB

	"""
	font styling and keyword highlighting
	"""

	import json
	import re
	from pathlib import Path
	from typing import Dict, Any, List

	from src.state.poster_state import PosterState
	from utils.langgraph_utils import LangGraphAgent, extract_json, load_prompt
	from utils.src.logging_utils import log_agent_info, log_agent_success, log_agent_error, log_agent_warning
	from src.config.poster_config import load_config
	from jinja2 import Template


	class FontAgent:
	"""handles text styling and keyword highlighting"""

	def __init__(self):
	self.name = "font_agent"
	self.keyword_extraction_prompt = load_prompt("config/prompts/extract_keywords.txt")

	def __call__(self, state: PosterState) -> PosterState:
	log_agent_info(self.name, "starting font styling")

	try:
	design_layout = state.get("design_layout", [])
	color_scheme = state.get("color_scheme", {})
	story_board = state.get("story_board", {})

	if not design_layout:
	raise ValueError("missing design_layout from layout agent")
	if not color_scheme:
	raise ValueError("missing color_scheme from color agent")
	if not story_board:
	raise ValueError("missing story_board from story board curator")

	# identify keywords to highlight
	keywords = self._identify_keywords(story_board, state)

	# apply styling to layout
	styled_layout = self._apply_styling(design_layout, color_scheme, keywords, state)

	state["styled_layout"] = styled_layout
	state["keywords"] = keywords
	state["current_agent"] = self.name

	self._save_styled_layout(state)

	# count total keywords across all sections
	total_keywords = sum(len(kw_list) for kw_list in keywords.get("section_keywords", {}).values())

	log_agent_success(self.name, f"applied enhanced styling to {len(styled_layout)} elements")
	log_agent_success(self.name, f"identified {total_keywords} keywords for highlighting")

	except Exception as e:
	log_agent_error(self.name, f"failed: {e}")
	state["errors"].append(f"{self.name}: {e}")

	return state

	def _identify_keywords(self, story_board: Dict, state: PosterState) -> Dict[str, Any]:
	"""identify keywords using story board content and enhanced narrative"""

	enhanced_narrative = state.get("enhanced_narrative", {})

	# extract keywords using LLM with external prompt
	log_agent_info(self.name, "identifying keywords for highlighting")

	agent = LangGraphAgent("expert at identifying key terms for visual highlighting", state["text_model"])

	template_data = {
	"enhanced_narrative": json.dumps(enhanced_narrative, indent=2),
	"curated_content": json.dumps(story_board, indent=2)
	}

	prompt = Template(self.keyword_extraction_prompt).render(**template_data)
	response = agent.step(prompt)
	result = extract_json(response.content)

	# add token usage
	state["tokens"].add_text(response.input_tokens, response.output_tokens)

	return result

	def _apply_styling(self, layout: List[Dict], colors: Dict, keywords: Dict, state: PosterState) -> List[Dict]:
	"""apply styling with proper bullet point and bold formatting"""
	styled_layout = []
	section_keywords = keywords.get("section_keywords", {})

	# process all elements with enhanced styling
	for element in layout:
	styled_element = element.copy()

	# apply element-specific styling
	if element.get("type") == "title":
	self._apply_title_styling(styled_element, colors)

	elif element.get("type") in ["section_title", "title_accent_block", "title_accent_line"]:
	# these are handled by the section title designer
	pass

	elif element.get("type") == "section_container":
	self._apply_section_container_styling(styled_element, colors)

	elif element.get("type") in ["text", "visual", "mixed"]:
	self._apply_content_styling(styled_element, colors, section_keywords)

	elif element.get("type") in ["conf_logo", "aff_logo"]:
	# logos don't need text styling
	pass

	styled_layout.append(styled_element)

	# sort by priority for proper rendering order
	styled_layout.sort(key=lambda x: x.get("priority", 0.5))

	return styled_layout

	def _apply_title_styling(self, element: Dict, colors: Dict):
	"""apply styling to title elements"""
	element["font_family"] = "Helvetica Neue"
	element["font_color"] = colors.get("text_on_theme", "#FFFFFF")
	element["font_size"] = 100
	element["author_font_size"] = 72
	element["font_weight"] = "bold"

	def _apply_section_container_styling(self, element: Dict, colors: Dict):
	"""apply styling to section container elements"""
	element["border_color"] = colors.get("mono_light", "#CCCCCC")
	element["border_width"] = 1
	element["fill_color"] = "#FFFFFF" # white background

	def _apply_content_styling(self, element: Dict, colors: Dict, section_keywords: Dict):
	"""apply styling to content elements with keyword highlighting"""
	# determine parent section for keyword lookup
	parent_section = self._extract_parent_section(element)
	keywords_for_section = section_keywords.get(parent_section, {})

	# ensure proper bullet point formatting first (before keyword highlighting to preserve formatting)
	if element.get("content"):
	element["content"] = self._format_bullet_points(element["content"])

	# apply keyword highlighting to content (after bullet formatting)
	if keywords_for_section and element.get("content"):
	content = element["content"]
	original_content = content
	content = self._apply_keyword_highlighting(content, keywords_for_section, colors)
	element["content"] = content

	# debug logging
	if content != original_content:
	total_keywords = sum(len(kw_list) for kw_list in keywords_for_section.values() if isinstance(kw_list, list))
	log_agent_info(self.name, f"Applied highlighting to {parent_section}: found {total_keywords} keywords")
	elif keywords_for_section:
	total_keywords = sum(len(kw_list) for kw_list in keywords_for_section.values() if isinstance(kw_list, list))
	log_agent_warning(self.name, f"Keywords found for {parent_section} ({total_keywords} total) but no highlighting applied")

	# apply base text styling
	element["font_family"] = "Arial"
	element["font_color"] = colors.get("text", "#000000")
	element["font_size"] = 44

	def _extract_parent_section(self, element: Dict) -> str:
	"""extract parent section id from element"""
	element_id = element.get("id", "")

	# extract section id from element id
	if "_" in element_id and element_id.endswith("_text"):
	# remove the "_text" suffix to get the section ID
	return element_id[:-5] # remove last 5 characters ("_text")
	elif "_" in element_id:
	# fallback: remove last part after underscore
	parts = element_id.split("_")
	if len(parts) > 1:
	return "_".join(parts[:-1])

	return ""

	def _apply_keyword_highlighting(self, content: str, keywords: Dict, colors: Dict) -> str:
	"""apply semantic-based keyword highlighting with three distinct styles"""
	# use contrast color for highlighting
	highlight_color = colors.get("contrast", colors.get("theme", "#1E3A8A"))

	# define highlighting styles based on semantic categories
	style_functions = {
	"bold_contrast": lambda text: f"<color:{highlight_color}>{text}</color>", # contrast color (bold applied automatically in renderer)
	"bold": lambda text: f"{text}", # just bold
	"italic": lambda text: f"{text}" # italic
	}

	# apply each style category
	for style_type, style_func in style_functions.items():
	keyword_list = keywords.get(style_type, [])
	for keyword in keyword_list:
	if not keyword.strip():
	continue
	content = self._highlight_keyword_in_content(content, keyword, style_func)

	return content

	def _highlight_keyword_in_content(self, content: str, keyword: str, style_func) -> str:
	"""highlight a specific keyword in content"""
	if f"<color:" in content and keyword.lower() in content.lower():
	return content

	escaped_keyword = re.escape(keyword.strip())

	# first try to match keyword with existing bold formatting
	bold_pattern = rf'\\([^]?{escaped_keyword}[^]?)\\'
	bold_match = re.search(bold_pattern, content, re.IGNORECASE)

	if bold_match:
	# extract the full bold text, replace only the keyword part
	full_bold_text = bold_match.group(1)
	keyword_in_bold = re.search(escaped_keyword, full_bold_text, re.IGNORECASE)
	if keyword_in_bold:
	# replace just the keyword within the bold text
	original_keyword = keyword_in_bold.group(0)
	new_keyword_formatted = style_func(original_keyword)

	# check if style_func returns color format
	if '<color:' in new_keyword_formatted:
	# remove the outer ** since color already implies bold
	new_bold_text = full_bold_text.replace(original_keyword, new_keyword_formatted, 1)
	old_full_bold = bold_match.group(0)
	return content.replace(old_full_bold, new_bold_text, 1)
	else:
	# for regular bold/italic, keep the ** wrapper
	new_keyword = new_keyword_formatted.replace('', '').replace('', '') # remove any extra bold markers
	new_bold_text = full_bold_text.replace(original_keyword, new_keyword, 1)
	old_full_bold = bold_match.group(0)
	new_full_bold = f'{new_bold_text}'
	return content.replace(old_full_bold, new_full_bold, 1)

	# then match keyword with existing italic formatting
	italic_pattern = rf'\({escaped_keyword})\'
	italic_match = re.search(italic_pattern, content, re.IGNORECASE)

	if italic_match:
	old_formatted = italic_match.group(0)
	new_formatted = style_func(keyword)
	return content.replace(old_formatted, new_formatted, 1)

	plain_pattern = rf'\b{escaped_keyword}\b'
	plain_match = re.search(plain_pattern, content, re.IGNORECASE)

	if plain_match:
	matched_text = plain_match.group(0)
	new_formatted = style_func(matched_text)
	return content.replace(matched_text, new_formatted, 1)

	return content

	def _format_bullet_points(self, content: str) -> str:
	"""ensure proper bullet point formatting"""
	if not content:
	return content

	lines = content.split('\n')
	formatted_lines = []

	for line in lines:
	line = line.strip()
	if not line:
	continue

	# ensure start with '•' or preserve existing '•'
	if line.startswith('• '):
	formatted_lines.append(line)
	elif line.startswith('- '):
	# dash -> bullet
	formatted_lines.append('• ' + line[2:])
	elif line.startswith('* '):
	# asterisk -> bullet
	formatted_lines.append('• ' + line[2:])
	elif not line.startswith('•'):
	# add bullet if missing (for content that should be bulleted)
	if any(line.lower().startswith(word) for word in ['the ', 'this ', 'our ', 'we ', 'new ', 'key ', 'main ']):
	formatted_lines.append('• ' + line)
	else:
	formatted_lines.append(line)
	else:
	formatted_lines.append(line)

	return '\n'.join(formatted_lines)

	def get_styling_interfaces(self) -> Dict[str, Any]:
	"""return interfaces for renderer to properly handle styled content"""
	config = load_config()
	font_params = config["typography"]

	return {
	"bullet_point_marker": "•",
	"bold_start_tag": "**",
	"bold_end_tag": "**",
	"italic_start_tag": "*",
	"italic_end_tag": "*",
	"color_start_tag": "<color:",
	"color_end_tag": "</color>",
	"line_spacing": font_params["line_spacing"], # from config
	"paragraph_spacing": font_params["paragraph_spacing"],
	"font_sizes": {
	"title": font_params["sizes"]["title"],
	"authors": font_params["sizes"]["authors"],
	"section_title": font_params["sizes"]["section_title"],
	"body_text": font_params["sizes"]["body_text"]
	}
	}

	def _save_styled_layout(self, state: PosterState):
	"""save styled layout and keywords"""
	output_dir = Path(state["output_dir"]) / "content"
	output_dir.mkdir(parents=True, exist_ok=True)

	# styled layout
	with open(output_dir / "styled_layout.json", "w", encoding='utf-8') as f:
	json.dump(state.get("styled_layout", []), f, indent=2)

	# keywords
	with open(output_dir / "keywords.json", "w", encoding='utf-8') as f:
	json.dump(state.get("keywords", {}), f, indent=2)

	# styling interfaces
	with open(output_dir / "styling_interfaces.json", "w", encoding='utf-8') as f:
	json.dump(self.get_styling_interfaces(), f, indent=2)


	def font_agent_node(state: PosterState) -> Dict[str, Any]:
	result = FontAgent()(state)
	return {
	**state,
	"styled_layout": result["styled_layout"],
	"keywords": result.get("keywords"),
	"tokens": result["tokens"],
	"current_agent": result["current_agent"],
	"errors": result["errors"]
	}