File size: 5,980 Bytes
b482b16
 
 
 
 
 
 
 
 
 
 
268baab
 
 
 
 
 
b482b16
 
268baab
b482b16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268baab
 
 
b482b16
 
 
268baab
b482b16
 
 
268baab
 
b482b16
 
 
 
268baab
b482b16
 
268baab
 
b482b16
268baab
 
 
 
 
 
 
 
b482b16
268baab
 
 
 
 
 
 
 
 
 
b482b16
 
 
 
 
 
268baab
 
b482b16
 
 
268baab
b482b16
 
 
 
268baab
 
b482b16
 
 
 
 
268baab
b482b16
 
268baab
 
b482b16
 
268baab
b482b16
 
268baab
 
 
 
b482b16
268baab
b482b16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268baab
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
"""
Response formatter for handling long responses and table formatting.
Ensures responses are mobile-friendly and appropriately sized.
"""
import re
from config import MAX_RESPONSE_WORDS_LEAD, MAX_RESPONSE_WORDS_SUBAGENT
from src.utils.logging import get_logger

logger = get_logger("response_formatter")


CONTINUATION_PROMPT = {
    'en': "*Would you like me to continue with more details?*",
    'de': "*Möchten Sie, dass ich mit weiteren Details fortfahre?*"
}


class ResponseFormatter:
    """Formats agent responses for optimal display"""

    @staticmethod
    def count_words(text: str) -> int:
        """Count words in text"""
        words = text.split()
        return len(words)
    
    @staticmethod
    def remove_tables(text: str) -> str:
        """
        Convert markdown tables to bullet point lists.
        Tables don't display well on mobile devices.
        
        Args:
            text: Response text potentially containing tables
            
        Returns:
            Text with tables converted to bullet points
        """
        # Pattern to match markdown tables
        table_pattern = r'\|[^\n]+\|\n\|[-:\s|]+\|\n(\|[^\n]+\|\n)+'
        
        def table_to_bullets(match):
            table_text = match.group(0)
            lines = [line.strip() for line in table_text.split('\n') if line.strip()]
            
            if len(lines) < 3:  # Not a valid table
                return table_text
            
            # Extract headers (first line)
            headers = [cell.strip() for cell in lines[0].split('|') if cell.strip()]
            
            # Skip separator line (second line)
            # Process data rows
            bullet_points = []
            for line in lines[2:]:
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells and len(cells) == len(headers):
                    # Create bullet point from row
                    row_text = ", ".join([
                        f"**{headers[i]}**: {cells[i]}" 
                        for i in range(len(cells))
                        if cells[i]
                    ])
                    bullet_points.append(f"• {row_text}")
            
            return "\n".join(bullet_points)
        
        # Replace tables with bullet points
        formatted = re.sub(table_pattern, table_to_bullets, text)
        
        if formatted != text:
            logger.info("Converted table to bullet points for mobile-friendly display")
        
        return formatted
    
    @staticmethod
    def chunk_response(
        text: str,
        max_words: int = MAX_RESPONSE_WORDS_LEAD,
        language: str = 'en'
    ) -> tuple[str, str | None]:
        """
        Split long response into current response and continuation.

        Args:
            text: Full response text
            max_words: Maximum words for current response
            language: Language code ('en' or 'de') for continuation prompt

        Returns:
            Tuple of (current_response, continuation_or_none)
        """
        word_count = ResponseFormatter.count_words(text)

        if word_count <= max_words:
            return text, None

        # Need to chunk — preserve line structure (markdown formatting)
        logger.info(f"Response has {word_count} words, chunking to {max_words} words")

        lines = text.split('\n')
        current_lines = []
        current_word_count = 0

        for line in lines:
            line_words = len(line.split()) if line.strip() else 0
            if current_word_count + line_words > max_words and current_lines:
                break
            current_lines.append(line)
            current_word_count += line_words

        current = '\n'.join(current_lines)
        continuation = '\n'.join(lines[len(current_lines):])

        # Add continuation prompt in the correct language
        continuation_msg = CONTINUATION_PROMPT.get(language, CONTINUATION_PROMPT['en'])
        current += f"\n\n{continuation_msg}"

        return current, continuation
    
    @staticmethod
    def format_response(
        text: str,
        agent_type: str = 'lead',
        enable_chunking: bool = True,
        language: str = 'en'
    ) -> str:
        """
        Format response: remove tables and handle length.

        Args:
            text: Raw response text
            agent_type: 'lead' or 'subagent' (determines max length)
            enable_chunking: Whether to chunk long responses
            language: Language code ('en' or 'de') for any generated text

        Returns:
            Formatted response text
        """
        # Remove tables
        formatted = ResponseFormatter.remove_tables(text)

        # Determine max words
        max_words = (
            MAX_RESPONSE_WORDS_LEAD
            if agent_type == 'lead'
            else MAX_RESPONSE_WORDS_SUBAGENT
        )

        # Handle chunking if enabled
        if enable_chunking:
            formatted, _continuation = ResponseFormatter.chunk_response(
                formatted,
                max_words,
                language
            )

        return formatted
    
    @staticmethod
    def clean_response(text: str) -> str:
        """
        Clean up response text (remove extra whitespace, etc.)
        
        Args:
            text: Response text
            
        Returns:
            Cleaned text
        """
        # Remove multiple consecutive newlines
        cleaned = re.sub(r'\n{3,}', '\n\n', text)
        
        # Remove trailing whitespace
        cleaned = cleaned.strip()
        
        return cleaned
    
    @staticmethod
    def format_name_of_university(formatted_response, language):
        if language == "en":
            pattern = r"Universität St\.Gallen"
            replace = "University of St.Gallen"
            formatted_response = re.sub(pattern, replace, formatted_response)
        
        return formatted_response