""" Identifier Utilities for Schema Generation This module provides centralized functions for generating consistent identifiers and validating schema configurations across all annotation schema types. """ import html import logging from collections.abc import Mapping from typing import Dict, Any, Tuple, List logger = logging.getLogger(__name__) def validate_schema_config(annotation_scheme: dict) -> bool: """ Validate schema configuration before generating HTML. Args: annotation_scheme: Schema configuration dictionary Returns: bool: True if valid, raises exception if invalid Raises: ValueError: If configuration is invalid """ # Check required fields required_fields = ["name", "description"] for field in required_fields: if field not in annotation_scheme: raise ValueError(f"Missing required field: {field}") # Validate schema name schema_name = annotation_scheme["name"] if not schema_name or not str(schema_name).strip(): raise ValueError("Schema name cannot be empty") # Validate description description = annotation_scheme["description"] if not description or not str(description).strip(): raise ValueError("Schema description cannot be empty") # Validate labels if present if "labels" in annotation_scheme: labels = annotation_scheme["labels"] if not labels: raise ValueError("Labels list cannot be empty") # Check for duplicate labels label_names = [] for label in labels: if isinstance(label, str): label_names.append(label.strip()) elif isinstance(label, dict) and "name" in label: label_names.append(label["name"].strip()) else: raise ValueError(f"Invalid label format: {label}") # Check for empty labels if any(not name for name in label_names): raise ValueError("Label names cannot be empty") # Check for duplicates if len(label_names) != len(set(label_names)): duplicates = [name for name in set(label_names) if label_names.count(name) > 1] raise ValueError(f"Duplicate labels found: {duplicates}") logger.debug(f"Schema configuration validation passed for: {schema_name}") return True def generate_element_identifier(schema_name: str, label_name: str, element_type: str = "default") -> Dict[str, str]: """ Generate consistent identifiers for form elements. Args: schema_name: Name of the annotation schema label_name: Name of the specific label/option element_type: Type of element (radio, checkbox, text, etc.) Returns: dict: Contains id, name, schema, and label_name attributes """ # Sanitize inputs safe_schema = escape_html_content(schema_name.strip()) safe_label = escape_html_content(label_name.strip()) # Generate unique identifier (using underscore to avoid conflicts with CSS selectors) element_id = f"{safe_schema}_{safe_label}_{element_type}".replace(":::", "_") # For radio buttons, use schema name as the group name to ensure mutual exclusivity if element_type == "radio": element_name = safe_schema else: element_name = f"{safe_schema}:::{safe_label}" return { "id": element_id, "name": element_name, "schema": safe_schema, "label_name": safe_label } def generate_element_value(label_data: Any, index: int, annotation_scheme: dict) -> str: """ Generate consistent value attributes for form elements. Args: label_data: Label configuration (string or dict) index: Index of the label in the list annotation_scheme: Full schema configuration Returns: str: Value to use for the element """ # Handle custom key_value first if isinstance(label_data, dict) and "key_value" in label_data: return str(label_data["key_value"]) # Handle sequential key binding if annotation_scheme.get("sequential_key_binding"): return str(index % 10) # Default to label name if isinstance(label_data, str): return label_data elif isinstance(label_data, dict) and "name" in label_data: return label_data["name"] # Fallback to index return str(index) def escape_html_content(content: str) -> str: """ Escape HTML content to prevent injection. Args: content: Content to escape Returns: str: Escaped content """ if not content: return "" return html.escape(str(content)) def humanize_label(text: str) -> str: """Turn a machine label (``agent_a_much_better``) into readable text (``Agent A Much Better``) for display only -- the stored annotation value is always the original label name, never this. Tokens that are already mixed/upper case or contain digits+letters (acronyms, ``GPT4``, ``v2``) are preserved as-is so we don't mangle them; purely lowercase tokens are capitalized. """ if not text: return "" # Never mangle Jinja/template expressions (e.g. dynamic_labels: # "{{instance_obj.labels[0]}}"). Humanizing would rewrite `instance_obj` # to `instance Obj`, producing invalid Jinja and a 500 at render time. if "{{" in str(text) or "{%" in str(text): return str(text) s = str(text).replace("_", " ").replace("-", " ") s = " ".join(s.split()) # collapse whitespace out = [] for tok in s.split(" "): if tok.islower(): out.append(tok[:1].upper() + tok[1:]) else: out.append(tok) # preserve ACRONYMs, GPT4, v2, MixedCase return " ".join(out) def display_label_text(label_data: Any, annotation_scheme: dict) -> str: """Resolve the *visible* text for a label. Precedence: explicit ``displayed_label`` on a dict label > humanized name (default, when ``humanize_labels`` is not disabled) > raw name. Stored value is unaffected by this function. """ if isinstance(label_data, Mapping): if label_data.get("displayed_label"): return str(label_data["displayed_label"]) name = label_data.get("name", "") else: name = label_data if annotation_scheme.get("humanize_labels", True): return humanize_label(name) return str(name) def safe_generate_layout(annotation_scheme: dict, layout_function: callable, *args, **kwargs) -> Tuple[str, List[Tuple[str, str]]]: """ Safely generate layout with proper error handling. Args: annotation_scheme: Schema configuration layout_function: Function to generate layout *args, **kwargs: Additional arguments for the layout function Returns: tuple: (html_string, key_bindings) """ try: # Validate configuration validate_schema_config(annotation_scheme) # Generate layout return layout_function(annotation_scheme, *args, **kwargs) except Exception as e: schema_name = annotation_scheme.get('name', 'unknown') logger.error(f"Failed to generate layout for schema '{schema_name}': {e}") # Return error HTML instead of crashing error_html = f"""
Schema: {escape_html_content(schema_name)}
{escape_html_content(str(e))}