Spaces:
Sleeping
Sleeping
File size: 5,980 Bytes
b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab b482b16 268baab | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 | """
Response formatter for handling long responses and table formatting.
Ensures responses are mobile-friendly and appropriately sized.
"""
import re
from config import MAX_RESPONSE_WORDS_LEAD, MAX_RESPONSE_WORDS_SUBAGENT
from src.utils.logging import get_logger
logger = get_logger("response_formatter")
CONTINUATION_PROMPT = {
'en': "*Would you like me to continue with more details?*",
'de': "*Möchten Sie, dass ich mit weiteren Details fortfahre?*"
}
class ResponseFormatter:
"""Formats agent responses for optimal display"""
@staticmethod
def count_words(text: str) -> int:
"""Count words in text"""
words = text.split()
return len(words)
@staticmethod
def remove_tables(text: str) -> str:
"""
Convert markdown tables to bullet point lists.
Tables don't display well on mobile devices.
Args:
text: Response text potentially containing tables
Returns:
Text with tables converted to bullet points
"""
# Pattern to match markdown tables
table_pattern = r'\|[^\n]+\|\n\|[-:\s|]+\|\n(\|[^\n]+\|\n)+'
def table_to_bullets(match):
table_text = match.group(0)
lines = [line.strip() for line in table_text.split('\n') if line.strip()]
if len(lines) < 3: # Not a valid table
return table_text
# Extract headers (first line)
headers = [cell.strip() for cell in lines[0].split('|') if cell.strip()]
# Skip separator line (second line)
# Process data rows
bullet_points = []
for line in lines[2:]:
cells = [cell.strip() for cell in line.split('|') if cell.strip()]
if cells and len(cells) == len(headers):
# Create bullet point from row
row_text = ", ".join([
f"**{headers[i]}**: {cells[i]}"
for i in range(len(cells))
if cells[i]
])
bullet_points.append(f"• {row_text}")
return "\n".join(bullet_points)
# Replace tables with bullet points
formatted = re.sub(table_pattern, table_to_bullets, text)
if formatted != text:
logger.info("Converted table to bullet points for mobile-friendly display")
return formatted
@staticmethod
def chunk_response(
text: str,
max_words: int = MAX_RESPONSE_WORDS_LEAD,
language: str = 'en'
) -> tuple[str, str | None]:
"""
Split long response into current response and continuation.
Args:
text: Full response text
max_words: Maximum words for current response
language: Language code ('en' or 'de') for continuation prompt
Returns:
Tuple of (current_response, continuation_or_none)
"""
word_count = ResponseFormatter.count_words(text)
if word_count <= max_words:
return text, None
# Need to chunk — preserve line structure (markdown formatting)
logger.info(f"Response has {word_count} words, chunking to {max_words} words")
lines = text.split('\n')
current_lines = []
current_word_count = 0
for line in lines:
line_words = len(line.split()) if line.strip() else 0
if current_word_count + line_words > max_words and current_lines:
break
current_lines.append(line)
current_word_count += line_words
current = '\n'.join(current_lines)
continuation = '\n'.join(lines[len(current_lines):])
# Add continuation prompt in the correct language
continuation_msg = CONTINUATION_PROMPT.get(language, CONTINUATION_PROMPT['en'])
current += f"\n\n{continuation_msg}"
return current, continuation
@staticmethod
def format_response(
text: str,
agent_type: str = 'lead',
enable_chunking: bool = True,
language: str = 'en'
) -> str:
"""
Format response: remove tables and handle length.
Args:
text: Raw response text
agent_type: 'lead' or 'subagent' (determines max length)
enable_chunking: Whether to chunk long responses
language: Language code ('en' or 'de') for any generated text
Returns:
Formatted response text
"""
# Remove tables
formatted = ResponseFormatter.remove_tables(text)
# Determine max words
max_words = (
MAX_RESPONSE_WORDS_LEAD
if agent_type == 'lead'
else MAX_RESPONSE_WORDS_SUBAGENT
)
# Handle chunking if enabled
if enable_chunking:
formatted, _continuation = ResponseFormatter.chunk_response(
formatted,
max_words,
language
)
return formatted
@staticmethod
def clean_response(text: str) -> str:
"""
Clean up response text (remove extra whitespace, etc.)
Args:
text: Response text
Returns:
Cleaned text
"""
# Remove multiple consecutive newlines
cleaned = re.sub(r'\n{3,}', '\n\n', text)
# Remove trailing whitespace
cleaned = cleaned.strip()
return cleaned
@staticmethod
def format_name_of_university(formatted_response, language):
if language == "en":
pattern = r"Universität St\.Gallen"
replace = "University of St.Gallen"
formatted_response = re.sub(pattern, replace, formatted_response)
return formatted_response
|