Spaces:
Sleeping
Sleeping
File size: 4,284 Bytes
d7b3d84 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 | def cap_text_length(text: str, max_length: int) -> str:
"""Cap text length for display."""
if len(text) <= max_length:
return text
return text[:max_length] + '...'
def generate_css_selector_for_element(enhanced_node) -> str | None:
"""Generate a CSS selector using node properties from version 0.5.0 approach."""
import re
if not enhanced_node or not hasattr(enhanced_node, 'tag_name') or not enhanced_node.tag_name:
return None
# Get base selector from tag name (simplified since we don't have xpath in EnhancedDOMTreeNode)
tag_name = enhanced_node.tag_name.lower().strip()
if not tag_name or not re.match(r'^[a-zA-Z][a-zA-Z0-9-]*$', tag_name):
return None
css_selector = tag_name
# Add ID if available (most specific)
if enhanced_node.attributes and 'id' in enhanced_node.attributes:
element_id = enhanced_node.attributes['id']
if element_id and element_id.strip():
element_id = element_id.strip()
# Validate ID contains only valid characters for # selector
if re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', element_id):
return f'#{element_id}'
else:
# For IDs with special characters ($, ., :, etc.), use attribute selector
# Escape quotes in the ID value
escaped_id = element_id.replace('"', '\\"')
return f'{tag_name}[id="{escaped_id}"]'
# Handle class attributes (from version 0.5.0 approach)
if enhanced_node.attributes and 'class' in enhanced_node.attributes and enhanced_node.attributes['class']:
# Define a regex pattern for valid class names in CSS
valid_class_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_-]*$')
# Iterate through the class attribute values
classes = enhanced_node.attributes['class'].split()
for class_name in classes:
# Skip empty class names
if not class_name.strip():
continue
# Check if the class name is valid
if valid_class_name_pattern.match(class_name):
# Append the valid class name to the CSS selector
css_selector += f'.{class_name}'
# Expanded set of safe attributes that are stable and useful for selection (from v0.5.0)
SAFE_ATTRIBUTES = {
# Data attributes (if they're stable in your application)
'id',
# Standard HTML attributes
'name',
'type',
'placeholder',
# Accessibility attributes
'aria-label',
'aria-labelledby',
'aria-describedby',
'role',
# Common form attributes
'for',
'autocomplete',
'required',
'readonly',
# Media attributes
'alt',
'title',
'src',
# Custom stable attributes (add any application-specific ones)
'href',
'target',
}
# Always include dynamic attributes (include_dynamic_attributes=True equivalent)
include_dynamic_attributes = True
if include_dynamic_attributes:
dynamic_attributes = {
'data-id',
'data-qa',
'data-cy',
'data-testid',
}
SAFE_ATTRIBUTES.update(dynamic_attributes)
# Handle other attributes (from version 0.5.0 approach)
if enhanced_node.attributes:
for attribute, value in enhanced_node.attributes.items():
if attribute == 'class':
continue
# Skip invalid attribute names
if not attribute.strip():
continue
if attribute not in SAFE_ATTRIBUTES:
continue
# Escape special characters in attribute names
safe_attribute = attribute.replace(':', r'\:')
# Handle different value cases
if value == '':
css_selector += f'[{safe_attribute}]'
elif any(char in value for char in '"\'<>`\n\r\t'):
# Use contains for values with special characters
# For newline-containing text, only use the part before the newline
if '\n' in value:
value = value.split('\n')[0]
# Regex-substitute *any* whitespace with a single space, then strip.
collapsed_value = re.sub(r'\s+', ' ', value).strip()
# Escape embedded double-quotes.
safe_value = collapsed_value.replace('"', '\\"')
css_selector += f'[{safe_attribute}*="{safe_value}"]'
else:
css_selector += f'[{safe_attribute}="{value}"]'
# Final validation: ensure the selector is safe and doesn't contain problematic characters
# Note: quotes are allowed in attribute selectors like [name="value"]
if css_selector and not any(char in css_selector for char in ['\n', '\r', '\t']):
return css_selector
# If we get here, the selector was problematic, return just the tag name as fallback
return tag_name
|