def cap_text_length(text: str, max_length: int) -> str: """Cap text length for display.""" if len(text) <= max_length: return text return text[:max_length] + '...' def generate_css_selector_for_element(enhanced_node) -> str | None: """Generate a CSS selector using node properties from version 0.5.0 approach.""" import re if not enhanced_node or not hasattr(enhanced_node, 'tag_name') or not enhanced_node.tag_name: return None # Get base selector from tag name (simplified since we don't have xpath in EnhancedDOMTreeNode) tag_name = enhanced_node.tag_name.lower().strip() if not tag_name or not re.match(r'^[a-zA-Z][a-zA-Z0-9-]*$', tag_name): return None css_selector = tag_name # Add ID if available (most specific) if enhanced_node.attributes and 'id' in enhanced_node.attributes: element_id = enhanced_node.attributes['id'] if element_id and element_id.strip(): element_id = element_id.strip() # Validate ID contains only valid characters for # selector if re.match(r'^[a-zA-Z][a-zA-Z0-9_-]*$', element_id): return f'#{element_id}' else: # For IDs with special characters ($, ., :, etc.), use attribute selector # Escape quotes in the ID value escaped_id = element_id.replace('"', '\\"') return f'{tag_name}[id="{escaped_id}"]' # Handle class attributes (from version 0.5.0 approach) if enhanced_node.attributes and 'class' in enhanced_node.attributes and enhanced_node.attributes['class']: # Define a regex pattern for valid class names in CSS valid_class_name_pattern = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_-]*$') # Iterate through the class attribute values classes = enhanced_node.attributes['class'].split() for class_name in classes: # Skip empty class names if not class_name.strip(): continue # Check if the class name is valid if valid_class_name_pattern.match(class_name): # Append the valid class name to the CSS selector css_selector += f'.{class_name}' # Expanded set of safe attributes that are stable and useful for selection (from v0.5.0) SAFE_ATTRIBUTES = { # Data attributes (if they're stable in your application) 'id', # Standard HTML attributes 'name', 'type', 'placeholder', # Accessibility attributes 'aria-label', 'aria-labelledby', 'aria-describedby', 'role', # Common form attributes 'for', 'autocomplete', 'required', 'readonly', # Media attributes 'alt', 'title', 'src', # Custom stable attributes (add any application-specific ones) 'href', 'target', } # Always include dynamic attributes (include_dynamic_attributes=True equivalent) include_dynamic_attributes = True if include_dynamic_attributes: dynamic_attributes = { 'data-id', 'data-qa', 'data-cy', 'data-testid', } SAFE_ATTRIBUTES.update(dynamic_attributes) # Handle other attributes (from version 0.5.0 approach) if enhanced_node.attributes: for attribute, value in enhanced_node.attributes.items(): if attribute == 'class': continue # Skip invalid attribute names if not attribute.strip(): continue if attribute not in SAFE_ATTRIBUTES: continue # Escape special characters in attribute names safe_attribute = attribute.replace(':', r'\:') # Handle different value cases if value == '': css_selector += f'[{safe_attribute}]' elif any(char in value for char in '"\'<>`\n\r\t'): # Use contains for values with special characters # For newline-containing text, only use the part before the newline if '\n' in value: value = value.split('\n')[0] # Regex-substitute *any* whitespace with a single space, then strip. collapsed_value = re.sub(r'\s+', ' ', value).strip() # Escape embedded double-quotes. safe_value = collapsed_value.replace('"', '\\"') css_selector += f'[{safe_attribute}*="{safe_value}"]' else: css_selector += f'[{safe_attribute}="{value}"]' # Final validation: ensure the selector is safe and doesn't contain problematic characters # Note: quotes are allowed in attribute selectors like [name="value"] if css_selector and not any(char in css_selector for char in ['\n', '\r', '\t']): return css_selector # If we get here, the selector was problematic, return just the tag name as fallback return tag_name