Spaces:
Sleeping
Sleeping
| def cap_text_length(text: str, max_length: int) -> str: | |
| """Cap text length for display.""" | |
| if len(text) <= max_length: | |
| return text | |
| return text[:max_length] + '...' | |
| def generate_css_selector_for_element(enhanced_node) -> str | None: | |
| """Generate a CSS selector using node properties from version 0.5.0 approach.""" | |
| import re | |
| if not enhanced_node or not hasattr(enhanced_node, 'tag_name') or not enhanced_node.tag_name: | |
| return None | |
| # Get base selector from tag name (simplified since we don't have xpath in EnhancedDOMTreeNode) | |
| tag_name = enhanced_node.tag_name.lower().strip() | |
| if not tag_name or not re.match(r'^[a-zA-Z][a-zA-Z0-9-]*$', tag_name): | |
| return None | |
| css_selector = tag_name | |
| # Add ID if available (most specific) | |
| if enhanced_node.attributes and 'id' in enhanced_node.attributes: | |
| element_id = enhanced_node.attributes['id'] | |
| if element_id and element_id.strip(): | |
| element_id = element_id.strip() | |
| # Validate ID contains only valid characters for # selector | |
| if re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', element_id): | |
| return f'#{element_id}' | |
| else: | |
| # For IDs with special characters ($, ., :, etc.), use attribute selector | |
| # Escape quotes in the ID value | |
| escaped_id = element_id.replace('"', '\\"') | |
| return f'{tag_name}[id="{escaped_id}"]' | |
| # Handle class attributes (from version 0.5.0 approach) | |
| if enhanced_node.attributes and 'class' in enhanced_node.attributes and enhanced_node.attributes['class']: | |
| # Define a regex pattern for valid class names in CSS | |
| valid_class_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_-]*$') | |
| # Iterate through the class attribute values | |
| classes = enhanced_node.attributes['class'].split() | |
| for class_name in classes: | |
| # Skip empty class names | |
| if not class_name.strip(): | |
| continue | |
| # Check if the class name is valid | |
| if valid_class_name_pattern.match(class_name): | |
| # Append the valid class name to the CSS selector | |
| css_selector += f'.{class_name}' | |
| # Expanded set of safe attributes that are stable and useful for selection (from v0.5.0) | |
| SAFE_ATTRIBUTES = { | |
| # Data attributes (if they're stable in your application) | |
| 'id', | |
| # Standard HTML attributes | |
| 'name', | |
| 'type', | |
| 'placeholder', | |
| # Accessibility attributes | |
| 'aria-label', | |
| 'aria-labelledby', | |
| 'aria-describedby', | |
| 'role', | |
| # Common form attributes | |
| 'for', | |
| 'autocomplete', | |
| 'required', | |
| 'readonly', | |
| # Media attributes | |
| 'alt', | |
| 'title', | |
| 'src', | |
| # Custom stable attributes (add any application-specific ones) | |
| 'href', | |
| 'target', | |
| } | |
| # Always include dynamic attributes (include_dynamic_attributes=True equivalent) | |
| include_dynamic_attributes = True | |
| if include_dynamic_attributes: | |
| dynamic_attributes = { | |
| 'data-id', | |
| 'data-qa', | |
| 'data-cy', | |
| 'data-testid', | |
| } | |
| SAFE_ATTRIBUTES.update(dynamic_attributes) | |
| # Handle other attributes (from version 0.5.0 approach) | |
| if enhanced_node.attributes: | |
| for attribute, value in enhanced_node.attributes.items(): | |
| if attribute == 'class': | |
| continue | |
| # Skip invalid attribute names | |
| if not attribute.strip(): | |
| continue | |
| if attribute not in SAFE_ATTRIBUTES: | |
| continue | |
| # Escape special characters in attribute names | |
| safe_attribute = attribute.replace(':', r'\:') | |
| # Handle different value cases | |
| if value == '': | |
| css_selector += f'[{safe_attribute}]' | |
| elif any(char in value for char in '"\'<>`\n\r\t'): | |
| # Use contains for values with special characters | |
| # For newline-containing text, only use the part before the newline | |
| if '\n' in value: | |
| value = value.split('\n')[0] | |
| # Regex-substitute *any* whitespace with a single space, then strip. | |
| collapsed_value = re.sub(r'\s+', ' ', value).strip() | |
| # Escape embedded double-quotes. | |
| safe_value = collapsed_value.replace('"', '\\"') | |
| css_selector += f'[{safe_attribute}*="{safe_value}"]' | |
| else: | |
| css_selector += f'[{safe_attribute}="{value}"]' | |
| # Final validation: ensure the selector is safe and doesn't contain problematic characters | |
| # Note: quotes are allowed in attribute selectors like [name="value"] | |
| if css_selector and not any(char in css_selector for char in ['\n', '\r', '\t']): | |
| return css_selector | |
| # If we get here, the selector was problematic, return just the tag name as fallback | |
| return tag_name | |