Speedofmastery's picture
Merge Landrun + Browser-Use + Chromium with AI agent support (without binary files)
d7b3d84
"""Utility functions for code-use agent."""
import re
def truncate_message_content(content: str, max_length: int = 10000) -> str:
"""Truncate message content to max_length characters for history."""
if len(content) <= max_length:
return content
# Truncate and add marker
return content[:max_length] + f'\n\n[... truncated {len(content) - max_length} characters for history]'
def detect_token_limit_issue(
completion: str,
completion_tokens: int | None,
max_tokens: int | None,
stop_reason: str | None,
) -> tuple[bool, str | None]:
"""
Detect if the LLM response hit token limits or is repetitive garbage.
Returns: (is_problematic, error_message)
"""
# Check 1: Stop reason indicates max_tokens
if stop_reason == 'max_tokens':
return True, f'Response terminated due to max_tokens limit (stop_reason: {stop_reason})'
# Check 2: Used 90%+ of max_tokens (if we have both values)
if completion_tokens is not None and max_tokens is not None and max_tokens > 0:
usage_ratio = completion_tokens / max_tokens
if usage_ratio >= 0.9:
return True, f'Response used {usage_ratio:.1%} of max_tokens ({completion_tokens}/{max_tokens})'
# Check 3: Last 6 characters repeat 40+ times (repetitive garbage)
if len(completion) >= 6:
last_6 = completion[-6:]
repetition_count = completion.count(last_6)
if repetition_count >= 40:
return True, f'Repetitive output detected: last 6 chars "{last_6}" appears {repetition_count} times'
return False, None
def extract_url_from_task(task: str) -> str | None:
"""Extract URL from task string using naive pattern matching."""
# Remove email addresses from task before looking for URLs
task_without_emails = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b', '', task)
# Look for common URL patterns
patterns = [
r'https?://[^\s<>"\']+', # Full URLs with http/https
r'(?:www\.)?[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.[a-zA-Z]{2,}(?:/[^\s<>"\']*)?', # Domain names with subdomains and optional paths
]
found_urls = []
for pattern in patterns:
matches = re.finditer(pattern, task_without_emails)
for match in matches:
url = match.group(0)
# Remove trailing punctuation that's not part of URLs
url = re.sub(r'[.,;:!?()\[\]]+$', '', url)
# Add https:// if missing
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
found_urls.append(url)
unique_urls = list(set(found_urls))
# If multiple URLs found, skip auto-navigation to avoid ambiguity
if len(unique_urls) > 1:
return None
# If exactly one URL found, return it
if len(unique_urls) == 1:
return unique_urls[0]
return None
def extract_code_blocks(text: str) -> dict[str, str]:
"""Extract all code blocks from markdown response.
Supports:
- ```python, ```js, ```javascript, ```bash, ```markdown, ```md
- Named blocks: ```js variable_name → saved as 'variable_name' in namespace
- Nested blocks: Use 4+ backticks for outer block when inner content has 3 backticks
Returns dict mapping block_name -> content
Note: Python blocks are NO LONGER COMBINED. Each python block executes separately
to allow sequential execution with JS/bash blocks in between.
"""
# Pattern to match code blocks with language identifier and optional variable name
# Matches: ```lang\n or ```lang varname\n or ````+lang\n (4+ backticks for nested blocks)
# Uses non-greedy matching and backreferences to match opening/closing backticks
pattern = r'(`{3,})(\w+)(?:\s+(\w+))?\n(.*?)\1(?:\n|$)'
matches = re.findall(pattern, text, re.DOTALL)
blocks: dict[str, str] = {}
python_block_counter = 0
for backticks, lang, var_name, content in matches:
lang = lang.lower()
# Normalize language names
if lang in ('javascript', 'js'):
lang_normalized = 'js'
elif lang in ('markdown', 'md'):
lang_normalized = 'markdown'
elif lang in ('sh', 'shell'):
lang_normalized = 'bash'
elif lang == 'python':
lang_normalized = 'python'
else:
# Unknown language, skip
continue
# Only process supported types
if lang_normalized in ('python', 'js', 'bash', 'markdown'):
content = content.rstrip() # Only strip trailing whitespace, preserve leading for indentation
if content:
# Determine the key to use
if var_name:
# Named block - use the variable name
block_key = var_name
blocks[block_key] = content
elif lang_normalized == 'python':
# Unnamed Python blocks - give each a unique key to preserve order
block_key = f'python_{python_block_counter}'
blocks[block_key] = content
python_block_counter += 1
else:
# Other unnamed blocks (js, bash, markdown) - keep last one only
blocks[lang_normalized] = content
# If we have multiple python blocks, mark the first one as 'python' for backward compat
if python_block_counter > 0:
blocks['python'] = blocks['python_0']
# Fallback: if no python block but there's generic ``` block, treat as python
if python_block_counter == 0 and 'python' not in blocks:
generic_pattern = r'```\n(.*?)```'
generic_matches = re.findall(generic_pattern, text, re.DOTALL)
if generic_matches:
combined = '\n\n'.join(m.strip() for m in generic_matches if m.strip())
if combined:
blocks['python'] = combined
return blocks