codebook / potato /server_utils /schemas /identifier_utils.py
davidjurgens's picture
Deploy: Potato — Codebook Annotation
aceb1b2 verified
Raw
History Blame Contribute Delete
14.3 kB
"""
Identifier Utilities for Schema Generation
This module provides centralized functions for generating consistent identifiers
and validating schema configurations across all annotation schema types.
"""
import html
import logging
from collections.abc import Mapping
from typing import Dict, Any, Tuple, List
logger = logging.getLogger(__name__)
def validate_schema_config(annotation_scheme: dict) -> bool:
"""
Validate schema configuration before generating HTML.
Args:
annotation_scheme: Schema configuration dictionary
Returns:
bool: True if valid, raises exception if invalid
Raises:
ValueError: If configuration is invalid
"""
# Check required fields
required_fields = ["name", "description"]
for field in required_fields:
if field not in annotation_scheme:
raise ValueError(f"Missing required field: {field}")
# Validate schema name
schema_name = annotation_scheme["name"]
if not schema_name or not str(schema_name).strip():
raise ValueError("Schema name cannot be empty")
# Validate description
description = annotation_scheme["description"]
if not description or not str(description).strip():
raise ValueError("Schema description cannot be empty")
# Validate labels if present
if "labels" in annotation_scheme:
labels = annotation_scheme["labels"]
if not labels:
raise ValueError("Labels list cannot be empty")
# Check for duplicate labels
label_names = []
for label in labels:
if isinstance(label, str):
label_names.append(label.strip())
elif isinstance(label, dict) and "name" in label:
label_names.append(label["name"].strip())
else:
raise ValueError(f"Invalid label format: {label}")
# Check for empty labels
if any(not name for name in label_names):
raise ValueError("Label names cannot be empty")
# Check for duplicates
if len(label_names) != len(set(label_names)):
duplicates = [name for name in set(label_names) if label_names.count(name) > 1]
raise ValueError(f"Duplicate labels found: {duplicates}")
logger.debug(f"Schema configuration validation passed for: {schema_name}")
return True
def generate_element_identifier(schema_name: str, label_name: str, element_type: str = "default") -> Dict[str, str]:
"""
Generate consistent identifiers for form elements.
Args:
schema_name: Name of the annotation schema
label_name: Name of the specific label/option
element_type: Type of element (radio, checkbox, text, etc.)
Returns:
dict: Contains id, name, schema, and label_name attributes
"""
# Sanitize inputs
safe_schema = escape_html_content(schema_name.strip())
safe_label = escape_html_content(label_name.strip())
# Generate unique identifier (using underscore to avoid conflicts with CSS selectors)
element_id = f"{safe_schema}_{safe_label}_{element_type}".replace(":::", "_")
# For radio buttons, use schema name as the group name to ensure mutual exclusivity
if element_type == "radio":
element_name = safe_schema
else:
element_name = f"{safe_schema}:::{safe_label}"
return {
"id": element_id,
"name": element_name,
"schema": safe_schema,
"label_name": safe_label
}
def generate_element_value(label_data: Any, index: int, annotation_scheme: dict) -> str:
"""
Generate consistent value attributes for form elements.
Args:
label_data: Label configuration (string or dict)
index: Index of the label in the list
annotation_scheme: Full schema configuration
Returns:
str: Value to use for the element
"""
# Handle custom key_value first
if isinstance(label_data, dict) and "key_value" in label_data:
return str(label_data["key_value"])
# Handle sequential key binding
if annotation_scheme.get("sequential_key_binding"):
return str(index % 10)
# Default to label name
if isinstance(label_data, str):
return label_data
elif isinstance(label_data, dict) and "name" in label_data:
return label_data["name"]
# Fallback to index
return str(index)
def escape_html_content(content: str) -> str:
"""
Escape HTML content to prevent injection.
Args:
content: Content to escape
Returns:
str: Escaped content
"""
if not content:
return ""
return html.escape(str(content))
def humanize_label(text: str) -> str:
"""Turn a machine label (``agent_a_much_better``) into readable text
(``Agent A Much Better``) for display only -- the stored annotation
value is always the original label name, never this.
Tokens that are already mixed/upper case or contain digits+letters
(acronyms, ``GPT4``, ``v2``) are preserved as-is so we don't mangle
them; purely lowercase tokens are capitalized.
"""
if not text:
return ""
# Never mangle Jinja/template expressions (e.g. dynamic_labels:
# "{{instance_obj.labels[0]}}"). Humanizing would rewrite `instance_obj`
# to `instance Obj`, producing invalid Jinja and a 500 at render time.
if "{{" in str(text) or "{%" in str(text):
return str(text)
s = str(text).replace("_", " ").replace("-", " ")
s = " ".join(s.split()) # collapse whitespace
out = []
for tok in s.split(" "):
if tok.islower():
out.append(tok[:1].upper() + tok[1:])
else:
out.append(tok) # preserve ACRONYMs, GPT4, v2, MixedCase
return " ".join(out)
def display_label_text(label_data: Any, annotation_scheme: dict) -> str:
"""Resolve the *visible* text for a label.
Precedence: explicit ``displayed_label`` on a dict label >
humanized name (default, when ``humanize_labels`` is not disabled) >
raw name. Stored value is unaffected by this function.
"""
if isinstance(label_data, Mapping):
if label_data.get("displayed_label"):
return str(label_data["displayed_label"])
name = label_data.get("name", "")
else:
name = label_data
if annotation_scheme.get("humanize_labels", True):
return humanize_label(name)
return str(name)
def safe_generate_layout(annotation_scheme: dict, layout_function: callable, *args, **kwargs) -> Tuple[str, List[Tuple[str, str]]]:
"""
Safely generate layout with proper error handling.
Args:
annotation_scheme: Schema configuration
layout_function: Function to generate layout
*args, **kwargs: Additional arguments for the layout function
Returns:
tuple: (html_string, key_bindings)
"""
try:
# Validate configuration
validate_schema_config(annotation_scheme)
# Generate layout
return layout_function(annotation_scheme, *args, **kwargs)
except Exception as e:
schema_name = annotation_scheme.get('name', 'unknown')
logger.error(f"Failed to generate layout for schema '{schema_name}': {e}")
# Return error HTML instead of crashing
error_html = f"""
<div class="annotation-error" style="border: 2px solid #ff0000; padding: 10px; margin: 10px 0; background-color: #fff5f5;">
<h4 style="color: #ff0000; margin: 0 0 10px 0;">Error Generating Annotation Form</h4>
<p style="margin: 0; color: #666;">Schema: {escape_html_content(schema_name)}</p>
<p style="margin: 5px 0 0 0; color: #333;">{escape_html_content(str(e))}</p>
</div>
"""
return error_html, []
def generate_validation_attribute(annotation_scheme: dict, label_name: str = None) -> str:
"""
Generate validation attribute for form elements.
Args:
annotation_scheme: Schema configuration
label_name: Specific label name for required_label validation
Returns:
str: Validation attribute value
"""
label_requirement = annotation_scheme.get("label_requirement", {})
# Normalize: label_requirement: true (bool) → {"required": true}
if isinstance(label_requirement, bool):
label_requirement = {"required": True} if label_requirement else {}
# Support top-level required: true as shorthand for label_requirement.required
if not label_requirement and annotation_scheme.get("required") is True:
label_requirement = {"required": True}
# Debug logging
logger.debug(f"generate_validation_attribute called with label_requirement: {label_requirement}")
logger.debug(f"label_name: {label_name}")
# Check for required_label validation
if label_name and label_requirement.get("required_label"):
required_labels = label_requirement["required_label"]
if isinstance(required_labels, str) and label_name == required_labels:
logger.debug(f"Returning 'required_label' for label: {label_name}")
return "required_label"
elif isinstance(required_labels, list) and label_name in required_labels:
logger.debug(f"Returning 'required_label' for label: {label_name}")
return "required_label"
# Check for general required validation
if label_requirement.get("required"):
logger.debug(f"Returning 'required' for general requirement")
return "required"
logger.debug(f"Returning empty string - no validation requirements met")
return ""
def generate_layout_attributes(annotation_scheme: dict) -> str:
"""
Generate layout-related HTML attributes for grid positioning.
Args:
annotation_scheme: Schema configuration that may contain:
- layout: dict with layout options
- columns: Number of grid columns to span (1-6, default: 1)
- rows: Number of grid rows to span (1-4, default: 1)
- order: Explicit ordering integer for grid placement
- min_width: Minimum width CSS value (e.g., "200px")
- max_width: Maximum width CSS value (e.g., "400px")
- align_self: Alignment override (start, center, end, stretch)
Returns:
str: HTML attribute string for layout (e.g., 'data-grid-columns="2" data-grid-rows="1"')
Example config:
annotation_schemes:
- name: preference
description: "Which is better?"
layout:
columns: 2 # Span 2 columns in the grid
rows: 1 # Span 1 row (default)
order: 1 # Explicit ordering
min_width: "200px"
max_width: "400px"
align_self: "start"
"""
layout_config = annotation_scheme.get("layout", {})
attrs = []
# Column span (1-6, default: 1)
columns = layout_config.get("columns", 1)
if not isinstance(columns, int) or columns < 1:
columns = 1
elif columns > 6:
columns = 6
attrs.append(f'data-grid-columns="{columns}"')
# Row span (1-4, default: 1)
rows = layout_config.get("rows", 1)
if isinstance(rows, int) and rows > 1:
rows = min(rows, 4)
attrs.append(f'data-grid-rows="{rows}"')
# Explicit order (integer)
order = layout_config.get("order")
if isinstance(order, int):
attrs.append(f'data-grid-order="{order}"')
# Min/max width via CSS custom properties in style attribute
style_parts = []
min_width = layout_config.get("min_width")
if min_width and isinstance(min_width, str):
style_parts.append(f"--form-min-width: {html.escape(min_width)}")
max_width = layout_config.get("max_width")
if max_width and isinstance(max_width, str):
style_parts.append(f"--form-max-width: {html.escape(max_width)}")
if style_parts:
attrs.append(f'style="{"; ".join(style_parts)}"')
# Align self override
align_self = layout_config.get("align_self")
valid_alignments = ["start", "center", "end", "stretch"]
if align_self and align_self in valid_alignments:
attrs.append(f'data-align-self="{align_self}"')
return " ".join(attrs)
def generate_tooltip_html(label_data: Dict[str, Any]) -> str:
"""
Generate tooltip HTML attribute from label data.
This function provides centralized tooltip generation for all schema types.
It checks for tooltip text in the label configuration, either directly or
from an external file.
Args:
label_data: Label configuration dictionary that may contain:
- tooltip: Direct tooltip text string
- tooltip_file: Path to file containing tooltip text
Returns:
str: Tooltip HTML attribute string (e.g., 'data-toggle="tooltip" ...')
or empty string if no tooltip is configured
Example:
>>> label_data = {"name": "Option 1", "tooltip": "Select this option"}
>>> generate_tooltip_html(label_data)
'data-toggle="tooltip" data-html="true" data-placement="top" title="Select this option"'
"""
if not isinstance(label_data, dict):
return ""
tooltip_text = ""
# Check for direct tooltip text
if "tooltip" in label_data:
tooltip_text = label_data["tooltip"]
logger.debug(f"Found direct tooltip text for label")
# Check for tooltip file
elif "tooltip_file" in label_data:
try:
with open(label_data["tooltip_file"], "rt", encoding="utf-8") as f:
tooltip_text = "".join(f.readlines())
logger.debug(f"Read tooltip from file: {label_data['tooltip_file']}")
except FileNotFoundError:
logger.error(f"Tooltip file not found: {label_data['tooltip_file']}")
return ""
except PermissionError:
logger.error(f"Permission denied reading tooltip file: {label_data['tooltip_file']}")
return ""
except Exception as e:
logger.error(f"Failed to read tooltip file '{label_data['tooltip_file']}': {e}")
return ""
if tooltip_text:
escaped_tooltip = escape_html_content(tooltip_text)
return f'data-toggle="tooltip" data-html="true" data-placement="top" title="{escaped_tooltip}"'
return ""