Spaces:

Y-Research-Group
/

PosterGen

Running

File size: 15,010 Bytes

46a8a46

"""
font styling and keyword highlighting
"""

import json
import re
from pathlib import Path
from typing import Dict, Any, List

from src.state.poster_state import PosterState
from utils.langgraph_utils import LangGraphAgent, extract_json, load_prompt
from utils.src.logging_utils import log_agent_info, log_agent_success, log_agent_error, log_agent_warning
from src.config.poster_config import load_config
from jinja2 import Template


class FontAgent:
    """handles text styling and keyword highlighting"""
    
    def __init__(self):
        self.name = "font_agent"
        self.keyword_extraction_prompt = load_prompt("config/prompts/extract_keywords.txt")

    def __call__(self, state: PosterState) -> PosterState:
        log_agent_info(self.name, "starting font styling")
        
        try:
            design_layout = state.get("design_layout", [])
            color_scheme = state.get("color_scheme", {})
            story_board = state.get("story_board", {})
            
            if not design_layout:
                raise ValueError("missing design_layout from layout agent")
            if not color_scheme:
                raise ValueError("missing color_scheme from color agent")
            if not story_board:
                raise ValueError("missing story_board from story board curator")
            
            # identify keywords to highlight
            keywords = self._identify_keywords(story_board, state)
            
            # apply styling to layout
            styled_layout = self._apply_styling(design_layout, color_scheme, keywords, state)
            
            state["styled_layout"] = styled_layout
            state["keywords"] = keywords
            state["current_agent"] = self.name
            
            self._save_styled_layout(state)
            
            # count total keywords across all sections
            total_keywords = sum(len(kw_list) for kw_list in keywords.get("section_keywords", {}).values())
            
            log_agent_success(self.name, f"applied enhanced styling to {len(styled_layout)} elements")
            log_agent_success(self.name, f"identified {total_keywords} keywords for highlighting")

        except Exception as e:
            log_agent_error(self.name, f"failed: {e}")
            state["errors"].append(f"{self.name}: {e}")
            
        return state

    def _identify_keywords(self, story_board: Dict, state: PosterState) -> Dict[str, Any]:
        """identify keywords using story board content and enhanced narrative"""
        
        enhanced_narrative = state.get("enhanced_narrative", {})
        
        # extract keywords using LLM with external prompt
        log_agent_info(self.name, "identifying keywords for highlighting")
        
        agent = LangGraphAgent("expert at identifying key terms for visual highlighting", state["text_model"])
        
        template_data = {
            "enhanced_narrative": json.dumps(enhanced_narrative, indent=2),
            "curated_content": json.dumps(story_board, indent=2)
        }
        
        prompt = Template(self.keyword_extraction_prompt).render(**template_data)
        response = agent.step(prompt)
        result = extract_json(response.content)
        
        # add token usage
        state["tokens"].add_text(response.input_tokens, response.output_tokens)
        
        return result

    def _apply_styling(self, layout: List[Dict], colors: Dict, keywords: Dict, state: PosterState) -> List[Dict]:
        """apply styling with proper bullet point and bold formatting"""
        styled_layout = []
        section_keywords = keywords.get("section_keywords", {})
        
        # process all elements with enhanced styling
        for element in layout:
            styled_element = element.copy()
            
            # apply element-specific styling
            if element.get("type") == "title":
                self._apply_title_styling(styled_element, colors)
            
            elif element.get("type") in ["section_title", "title_accent_block", "title_accent_line"]:
                # these are handled by the section title designer
                pass
            
            elif element.get("type") == "section_container":
                self._apply_section_container_styling(styled_element, colors)
                
            elif element.get("type") in ["text", "visual", "mixed"]:
                self._apply_content_styling(styled_element, colors, section_keywords)
            
            elif element.get("type") in ["conf_logo", "aff_logo"]:
                # logos don't need text styling
                pass
            
            styled_layout.append(styled_element)
        
        # sort by priority for proper rendering order
        styled_layout.sort(key=lambda x: x.get("priority", 0.5))
        
        return styled_layout

    def _apply_title_styling(self, element: Dict, colors: Dict):
        """apply styling to title elements"""
        element["font_family"] = "Helvetica Neue"
        element["font_color"] = colors.get("text_on_theme", "#FFFFFF")
        element["font_size"] = 100
        element["author_font_size"] = 72
        element["font_weight"] = "bold"

    def _apply_section_container_styling(self, element: Dict, colors: Dict):
        """apply styling to section container elements"""
        element["border_color"] = colors.get("mono_light", "#CCCCCC")
        element["border_width"] = 1
        element["fill_color"] = "#FFFFFF"  # white background

    def _apply_content_styling(self, element: Dict, colors: Dict, section_keywords: Dict):
        """apply styling to content elements with keyword highlighting"""
        # determine parent section for keyword lookup
        parent_section = self._extract_parent_section(element)
        keywords_for_section = section_keywords.get(parent_section, {})
        
        # ensure proper bullet point formatting first (before keyword highlighting to preserve formatting)
        if element.get("content"):
            element["content"] = self._format_bullet_points(element["content"])
        
        # apply keyword highlighting to content (after bullet formatting)
        if keywords_for_section and element.get("content"):
            content = element["content"]
            original_content = content
            content = self._apply_keyword_highlighting(content, keywords_for_section, colors)
            element["content"] = content
            
            # debug logging
            if content != original_content:
                total_keywords = sum(len(kw_list) for kw_list in keywords_for_section.values() if isinstance(kw_list, list))
                log_agent_info(self.name, f"Applied highlighting to {parent_section}: found {total_keywords} keywords")
            elif keywords_for_section:
                total_keywords = sum(len(kw_list) for kw_list in keywords_for_section.values() if isinstance(kw_list, list))
                log_agent_warning(self.name, f"Keywords found for {parent_section} ({total_keywords} total) but no highlighting applied")
        
        # apply base text styling
        element["font_family"] = "Arial"
        element["font_color"] = colors.get("text", "#000000")
        element["font_size"] = 44

    def _extract_parent_section(self, element: Dict) -> str:
        """extract parent section id from element"""
        element_id = element.get("id", "")
        
        # extract section id from element id
        if "_" in element_id and element_id.endswith("_text"):
            # remove the "_text" suffix to get the section ID
            return element_id[:-5]  # remove last 5 characters ("_text")
        elif "_" in element_id:
            # fallback: remove last part after underscore
            parts = element_id.split("_")
            if len(parts) > 1:
                return "_".join(parts[:-1])
        
        return ""

    def _apply_keyword_highlighting(self, content: str, keywords: Dict, colors: Dict) -> str:
        """apply semantic-based keyword highlighting with three distinct styles"""
        # use contrast color for highlighting
        highlight_color = colors.get("contrast", colors.get("theme", "#1E3A8A"))
        
        # define highlighting styles based on semantic categories
        style_functions = {
            "bold_contrast": lambda text: f"<color:{highlight_color}>{text}</color>",  # contrast color (bold applied automatically in renderer)
            "bold": lambda text: f"**{text}**",  # just bold
            "italic": lambda text: f"*{text}*"  # italic
        }
        
        # apply each style category
        for style_type, style_func in style_functions.items():
            keyword_list = keywords.get(style_type, [])
            for keyword in keyword_list:
                if not keyword.strip():
                    continue
                content = self._highlight_keyword_in_content(content, keyword, style_func)
        
        return content

    def _highlight_keyword_in_content(self, content: str, keyword: str, style_func) -> str:
        """highlight a specific keyword in content"""
        if f"<color:" in content and keyword.lower() in content.lower():
            return content
        
        escaped_keyword = re.escape(keyword.strip())
        
        # first try to match keyword with existing bold formatting
        bold_pattern = rf'\*\*([^*]*?{escaped_keyword}[^*]*?)\*\*'
        bold_match = re.search(bold_pattern, content, re.IGNORECASE)
        
        if bold_match:
            # extract the full bold text, replace only the keyword part
            full_bold_text = bold_match.group(1)
            keyword_in_bold = re.search(escaped_keyword, full_bold_text, re.IGNORECASE)
            if keyword_in_bold:
                # replace just the keyword within the bold text
                original_keyword = keyword_in_bold.group(0)
                new_keyword_formatted = style_func(original_keyword)
                
                # check if style_func returns color format
                if '<color:' in new_keyword_formatted:
                    # remove the outer ** since color already implies bold
                    new_bold_text = full_bold_text.replace(original_keyword, new_keyword_formatted, 1)
                    old_full_bold = bold_match.group(0)
                    return content.replace(old_full_bold, new_bold_text, 1)
                else:
                    # for regular bold/italic, keep the ** wrapper
                    new_keyword = new_keyword_formatted.replace('**', '').replace('**', '')  # remove any extra bold markers
                    new_bold_text = full_bold_text.replace(original_keyword, new_keyword, 1)
                    old_full_bold = bold_match.group(0)
                    new_full_bold = f'**{new_bold_text}**'
                    return content.replace(old_full_bold, new_full_bold, 1)
        
        # then match keyword with existing italic formatting  
        italic_pattern = rf'\*({escaped_keyword})\*'
        italic_match = re.search(italic_pattern, content, re.IGNORECASE)
        
        if italic_match:
            old_formatted = italic_match.group(0)
            new_formatted = style_func(keyword)
            return content.replace(old_formatted, new_formatted, 1)
        
        plain_pattern = rf'\b{escaped_keyword}\b'
        plain_match = re.search(plain_pattern, content, re.IGNORECASE)
        
        if plain_match:
            matched_text = plain_match.group(0)
            new_formatted = style_func(matched_text)
            return content.replace(matched_text, new_formatted, 1)
        
        return content

    def _format_bullet_points(self, content: str) -> str:
        """ensure proper bullet point formatting"""
        if not content:
            return content
        
        lines = content.split('\n')
        formatted_lines = []
        
        for line in lines:
            line = line.strip()
            if not line:
                continue
            
            # ensure start with '•' or preserve existing '•'
            if line.startswith('• '):
                formatted_lines.append(line)
            elif line.startswith('- '):
                # dash -> bullet
                formatted_lines.append('• ' + line[2:])
            elif line.startswith('* '):
                # asterisk -> bullet
                formatted_lines.append('• ' + line[2:])
            elif not line.startswith('•'):
                # add bullet if missing (for content that should be bulleted)
                if any(line.lower().startswith(word) for word in ['the ', 'this ', 'our ', 'we ', 'new ', 'key ', 'main ']):
                    formatted_lines.append('• ' + line)
                else:
                    formatted_lines.append(line)
            else:
                formatted_lines.append(line)
        
        return '\n'.join(formatted_lines)

    def get_styling_interfaces(self) -> Dict[str, Any]:
        """return interfaces for renderer to properly handle styled content"""
        config = load_config()
        font_params = config["typography"]
        
        return {
            "bullet_point_marker": "•",
            "bold_start_tag": "**",
            "bold_end_tag": "**",
            "italic_start_tag": "*",
            "italic_end_tag": "*",
            "color_start_tag": "<color:",
            "color_end_tag": "</color>",
            "line_spacing": font_params["line_spacing"],  # from config
            "paragraph_spacing": font_params["paragraph_spacing"],
            "font_sizes": {
                "title": font_params["sizes"]["title"],
                "authors": font_params["sizes"]["authors"],
                "section_title": font_params["sizes"]["section_title"],
                "body_text": font_params["sizes"]["body_text"]
            }
        }

    def _save_styled_layout(self, state: PosterState):
        """save styled layout and keywords"""
        output_dir = Path(state["output_dir"]) / "content"
        output_dir.mkdir(parents=True, exist_ok=True)
        
        # styled layout
        with open(output_dir / "styled_layout.json", "w", encoding='utf-8') as f:
            json.dump(state.get("styled_layout", []), f, indent=2)
        
        # keywords
        with open(output_dir / "keywords.json", "w", encoding='utf-8') as f:
            json.dump(state.get("keywords", {}), f, indent=2)
        
        # styling interfaces
        with open(output_dir / "styling_interfaces.json", "w", encoding='utf-8') as f:
            json.dump(self.get_styling_interfaces(), f, indent=2)


def font_agent_node(state: PosterState) -> Dict[str, Any]:
    result = FontAgent()(state)
    return {
        **state,
        "styled_layout": result["styled_layout"],
        "keywords": result.get("keywords"),
        "tokens": result["tokens"],
        "current_agent": result["current_agent"],
        "errors": result["errors"]
    }