""" Identifier Utilities for Schema Generation This module provides centralized functions for generating consistent identifiers and validating schema configurations across all annotation schema types. """ import html import logging from collections.abc import Mapping from typing import Dict, Any, Tuple, List logger = logging.getLogger(__name__) def validate_schema_config(annotation_scheme: dict) -> bool: """ Validate schema configuration before generating HTML. Args: annotation_scheme: Schema configuration dictionary Returns: bool: True if valid, raises exception if invalid Raises: ValueError: If configuration is invalid """ # Check required fields required_fields = ["name", "description"] for field in required_fields: if field not in annotation_scheme: raise ValueError(f"Missing required field: {field}") # Validate schema name schema_name = annotation_scheme["name"] if not schema_name or not str(schema_name).strip(): raise ValueError("Schema name cannot be empty") # Validate description description = annotation_scheme["description"] if not description or not str(description).strip(): raise ValueError("Schema description cannot be empty") # Validate labels if present if "labels" in annotation_scheme: labels = annotation_scheme["labels"] if not labels: raise ValueError("Labels list cannot be empty") # Check for duplicate labels label_names = [] for label in labels: if isinstance(label, str): label_names.append(label.strip()) elif isinstance(label, dict) and "name" in label: label_names.append(label["name"].strip()) else: raise ValueError(f"Invalid label format: {label}") # Check for empty labels if any(not name for name in label_names): raise ValueError("Label names cannot be empty") # Check for duplicates if len(label_names) != len(set(label_names)): duplicates = [name for name in set(label_names) if label_names.count(name) > 1] raise ValueError(f"Duplicate labels found: {duplicates}") logger.debug(f"Schema configuration validation passed for: {schema_name}") return True def generate_element_identifier(schema_name: str, label_name: str, element_type: str = "default") -> Dict[str, str]: """ Generate consistent identifiers for form elements. Args: schema_name: Name of the annotation schema label_name: Name of the specific label/option element_type: Type of element (radio, checkbox, text, etc.) Returns: dict: Contains id, name, schema, and label_name attributes """ # Sanitize inputs safe_schema = escape_html_content(schema_name.strip()) safe_label = escape_html_content(label_name.strip()) # Generate unique identifier (using underscore to avoid conflicts with CSS selectors) element_id = f"{safe_schema}_{safe_label}_{element_type}".replace(":::", "_") # For radio buttons, use schema name as the group name to ensure mutual exclusivity if element_type == "radio": element_name = safe_schema else: element_name = f"{safe_schema}:::{safe_label}" return { "id": element_id, "name": element_name, "schema": safe_schema, "label_name": safe_label } def generate_element_value(label_data: Any, index: int, annotation_scheme: dict) -> str: """ Generate consistent value attributes for form elements. Args: label_data: Label configuration (string or dict) index: Index of the label in the list annotation_scheme: Full schema configuration Returns: str: Value to use for the element """ # Handle custom key_value first if isinstance(label_data, dict) and "key_value" in label_data: return str(label_data["key_value"]) # Handle sequential key binding if annotation_scheme.get("sequential_key_binding"): return str(index % 10) # Default to label name if isinstance(label_data, str): return label_data elif isinstance(label_data, dict) and "name" in label_data: return label_data["name"] # Fallback to index return str(index) def escape_html_content(content: str) -> str: """ Escape HTML content to prevent injection. Args: content: Content to escape Returns: str: Escaped content """ if not content: return "" return html.escape(str(content)) def humanize_label(text: str) -> str: """Turn a machine label (``agent_a_much_better``) into readable text (``Agent A Much Better``) for display only -- the stored annotation value is always the original label name, never this. Tokens that are already mixed/upper case or contain digits+letters (acronyms, ``GPT4``, ``v2``) are preserved as-is so we don't mangle them; purely lowercase tokens are capitalized. """ if not text: return "" # Never mangle Jinja/template expressions (e.g. dynamic_labels: # "{{instance_obj.labels[0]}}"). Humanizing would rewrite `instance_obj` # to `instance Obj`, producing invalid Jinja and a 500 at render time. if "{{" in str(text) or "{%" in str(text): return str(text) s = str(text).replace("_", " ").replace("-", " ") s = " ".join(s.split()) # collapse whitespace out = [] for tok in s.split(" "): if tok.islower(): out.append(tok[:1].upper() + tok[1:]) else: out.append(tok) # preserve ACRONYMs, GPT4, v2, MixedCase return " ".join(out) def display_label_text(label_data: Any, annotation_scheme: dict) -> str: """Resolve the *visible* text for a label. Precedence: explicit ``displayed_label`` on a dict label > humanized name (default, when ``humanize_labels`` is not disabled) > raw name. Stored value is unaffected by this function. """ if isinstance(label_data, Mapping): if label_data.get("displayed_label"): return str(label_data["displayed_label"]) name = label_data.get("name", "") else: name = label_data if annotation_scheme.get("humanize_labels", True): return humanize_label(name) return str(name) def safe_generate_layout(annotation_scheme: dict, layout_function: callable, *args, **kwargs) -> Tuple[str, List[Tuple[str, str]]]: """ Safely generate layout with proper error handling. Args: annotation_scheme: Schema configuration layout_function: Function to generate layout *args, **kwargs: Additional arguments for the layout function Returns: tuple: (html_string, key_bindings) """ try: # Validate configuration validate_schema_config(annotation_scheme) # Generate layout return layout_function(annotation_scheme, *args, **kwargs) except Exception as e: schema_name = annotation_scheme.get('name', 'unknown') logger.error(f"Failed to generate layout for schema '{schema_name}': {e}") # Return error HTML instead of crashing error_html = f"""

Error Generating Annotation Form

Schema: {escape_html_content(schema_name)}

{escape_html_content(str(e))}

""" return error_html, [] def generate_validation_attribute(annotation_scheme: dict, label_name: str = None) -> str: """ Generate validation attribute for form elements. Args: annotation_scheme: Schema configuration label_name: Specific label name for required_label validation Returns: str: Validation attribute value """ label_requirement = annotation_scheme.get("label_requirement", {}) # Normalize: label_requirement: true (bool) → {"required": true} if isinstance(label_requirement, bool): label_requirement = {"required": True} if label_requirement else {} # Support top-level required: true as shorthand for label_requirement.required if not label_requirement and annotation_scheme.get("required") is True: label_requirement = {"required": True} # Debug logging logger.debug(f"generate_validation_attribute called with label_requirement: {label_requirement}") logger.debug(f"label_name: {label_name}") # Check for required_label validation if label_name and label_requirement.get("required_label"): required_labels = label_requirement["required_label"] if isinstance(required_labels, str) and label_name == required_labels: logger.debug(f"Returning 'required_label' for label: {label_name}") return "required_label" elif isinstance(required_labels, list) and label_name in required_labels: logger.debug(f"Returning 'required_label' for label: {label_name}") return "required_label" # Check for general required validation if label_requirement.get("required"): logger.debug(f"Returning 'required' for general requirement") return "required" logger.debug(f"Returning empty string - no validation requirements met") return "" def generate_layout_attributes(annotation_scheme: dict) -> str: """ Generate layout-related HTML attributes for grid positioning. Args: annotation_scheme: Schema configuration that may contain: - layout: dict with layout options - columns: Number of grid columns to span (1-6, default: 1) - rows: Number of grid rows to span (1-4, default: 1) - order: Explicit ordering integer for grid placement - min_width: Minimum width CSS value (e.g., "200px") - max_width: Maximum width CSS value (e.g., "400px") - align_self: Alignment override (start, center, end, stretch) Returns: str: HTML attribute string for layout (e.g., 'data-grid-columns="2" data-grid-rows="1"') Example config: annotation_schemes: - name: preference description: "Which is better?" layout: columns: 2 # Span 2 columns in the grid rows: 1 # Span 1 row (default) order: 1 # Explicit ordering min_width: "200px" max_width: "400px" align_self: "start" """ layout_config = annotation_scheme.get("layout", {}) attrs = [] # Column span (1-6, default: 1) columns = layout_config.get("columns", 1) if not isinstance(columns, int) or columns < 1: columns = 1 elif columns > 6: columns = 6 attrs.append(f'data-grid-columns="{columns}"') # Row span (1-4, default: 1) rows = layout_config.get("rows", 1) if isinstance(rows, int) and rows > 1: rows = min(rows, 4) attrs.append(f'data-grid-rows="{rows}"') # Explicit order (integer) order = layout_config.get("order") if isinstance(order, int): attrs.append(f'data-grid-order="{order}"') # Min/max width via CSS custom properties in style attribute style_parts = [] min_width = layout_config.get("min_width") if min_width and isinstance(min_width, str): style_parts.append(f"--form-min-width: {html.escape(min_width)}") max_width = layout_config.get("max_width") if max_width and isinstance(max_width, str): style_parts.append(f"--form-max-width: {html.escape(max_width)}") if style_parts: attrs.append(f'style="{"; ".join(style_parts)}"') # Align self override align_self = layout_config.get("align_self") valid_alignments = ["start", "center", "end", "stretch"] if align_self and align_self in valid_alignments: attrs.append(f'data-align-self="{align_self}"') return " ".join(attrs) def generate_tooltip_html(label_data: Dict[str, Any]) -> str: """ Generate tooltip HTML attribute from label data. This function provides centralized tooltip generation for all schema types. It checks for tooltip text in the label configuration, either directly or from an external file. Args: label_data: Label configuration dictionary that may contain: - tooltip: Direct tooltip text string - tooltip_file: Path to file containing tooltip text Returns: str: Tooltip HTML attribute string (e.g., 'data-toggle="tooltip" ...') or empty string if no tooltip is configured Example: >>> label_data = {"name": "Option 1", "tooltip": "Select this option"} >>> generate_tooltip_html(label_data) 'data-toggle="tooltip" data-html="true" data-placement="top" title="Select this option"' """ if not isinstance(label_data, dict): return "" tooltip_text = "" # Check for direct tooltip text if "tooltip" in label_data: tooltip_text = label_data["tooltip"] logger.debug(f"Found direct tooltip text for label") # Check for tooltip file elif "tooltip_file" in label_data: try: with open(label_data["tooltip_file"], "rt", encoding="utf-8") as f: tooltip_text = "".join(f.readlines()) logger.debug(f"Read tooltip from file: {label_data['tooltip_file']}") except FileNotFoundError: logger.error(f"Tooltip file not found: {label_data['tooltip_file']}") return "" except PermissionError: logger.error(f"Permission denied reading tooltip file: {label_data['tooltip_file']}") return "" except Exception as e: logger.error(f"Failed to read tooltip file '{label_data['tooltip_file']}': {e}") return "" if tooltip_text: escaped_tooltip = escape_html_content(tooltip_text) return f'data-toggle="tooltip" data-html="true" data-placement="top" title="{escaped_tooltip}"' return ""