""" Instance Display Renderer Provides the main InstanceDisplayRenderer class that handles rendering instance content for display, separate from annotation collection. This module enables the new `instance_display` configuration section that explicitly defines what content to show annotators. Usage: from potato.server_utils.instance_display import InstanceDisplayRenderer renderer = InstanceDisplayRenderer(config) html = renderer.render(instance_data) template_vars = renderer.get_template_variables(instance_data) """ import html as html_module import logging from typing import Dict, Any, List, Optional from .displays import display_registry logger = logging.getLogger(__name__) class InstanceDisplayError(Exception): """Exception raised when instance display rendering fails.""" pass class InstanceDisplayRenderer: """ Renders instance content for display based on configuration. This class separates content display from annotation collection, allowing any combination of display types with any annotation schemes. """ def __init__(self, config: Dict[str, Any]): """ Initialize the renderer. Args: config: The full configuration dictionary """ self.config = config self.display_config = config.get("instance_display", {}) self.fields = self.display_config.get("fields", []) self.layout = self.display_config.get("layout", {}) # Extract span targets — query the registry instead of a hardcoded list self.span_targets = [ f["key"] for f in self.fields if f.get("span_target") and display_registry.type_supports_span_target(f.get("type", "")) ] # Warn about span_target on unsupported types for f in self.fields: if f.get("span_target") and not display_registry.type_supports_span_target(f.get("type", "")): logger.warning( f"Field '{f.get('key')}' has span_target=true but display type " f"'{f.get('type')}' does not support span annotation. " f"Span annotation will not work on this field." ) # Track if we have instance_display configured self.has_instance_display = bool(self.fields) logger.debug( f"InstanceDisplayRenderer initialized: " f"has_instance_display={self.has_instance_display}, " f"span_targets={self.span_targets}" ) def render(self, instance_data: Dict[str, Any]) -> str: """ Render all display fields for an instance. Args: instance_data: The instance data dictionary Returns: HTML string containing all rendered display fields Raises: InstanceDisplayError: If a required field is missing from instance data """ if not self.has_instance_display: # No instance_display configured, return empty # (legacy behavior will be handled by the template) return "" # Validate all required fields exist self._validate_fields(instance_data) # Get layout configuration direction = self.layout.get("direction", "vertical") gap = self.layout.get("gap", "20px") # Build container classes and styles container_classes = ["instance-display-container", f"layout-{direction}"] container_style = f"gap: {gap};" # Render each field rendered_fields = [] for field in self.fields: field_html = self._render_field(field, instance_data) rendered_fields.append(field_html) # Combine into container fields_html = "\n".join(rendered_fields) # Build data attributes for raw field access by annotation schemas # Include all string/URL fields from instance data for source_field lookups import json raw_data = {} for key, value in instance_data.items(): if isinstance(value, (str, int, float, bool)) or value is None: raw_data[key] = value raw_data_json = html_module.escape(json.dumps(raw_data)) return f'''
{fields_html}
''' def _validate_fields(self, instance_data: Dict[str, Any]) -> None: """ Validate that all configured fields exist in the instance data. Fields whose display type is marked ``lazy_populated`` in the display registry (``interactive_chat``, ``live_agent``, ``live_coding_agent``) are exempt -- their data key is expected to be written after initial render (by a live agent session). Args: instance_data: The instance data dictionary Raises: InstanceDisplayError: If any non-lazy field is missing """ non_lazy = [ f for f in self.fields if not display_registry.is_lazy_populated(f.get("type", "")) ] missing_non_lazy = [ f["key"] for f in non_lazy if f["key"] not in instance_data ] # Every non-lazy field missing is almost always a config/data # key mismatch (e.g. fields reference task_description but the # data uses task), not a transient lazy state -- make it loud so # it isn't silently rendered as a blank page. if non_lazy and len(missing_non_lazy) == len(non_lazy): logger.error( "instance_display: ALL %d non-lazy field(s) %s are absent " "from the instance data (available keys: %s). This is " "almost certainly a config/data key mismatch.", len(non_lazy), missing_non_lazy, list(instance_data.keys()), ) for field in self.fields: key = field["key"] if key in instance_data: continue field_type = field.get("type", "") if display_registry.is_lazy_populated(field_type): logger.debug( "Skipping validation for lazy-populated field '%s' (type=%s); " "data is written after initial render.", key, field_type, ) continue available = list(instance_data.keys()) raise InstanceDisplayError( f"Display field '{key}' not found in instance data. " f"Available fields: {available}" ) def _render_field(self, field: Dict[str, Any], instance_data: Dict[str, Any]) -> str: """ Render a single display field. Args: field: The field configuration instance_data: The instance data dictionary Returns: HTML string for the field """ key = field["key"] field_type = field["type"] data = instance_data.get(key) # For format-based display types, process the file if data is a file path format_display_types = ["pdf", "document", "spreadsheet", "code"] if field_type in format_display_types and isinstance(data, str): data = self._process_format_file(data, field_type, field) try: rendered = display_registry.render(field_type, field, data) # Check if resizable is enabled (global setting or per-field override) global_resizable = self.display_config.get("resizable", True) field_resizable = field.get("display_options", {}).get("resizable", global_resizable) # Wrap with resizable container if enabled if field_resizable: rendered = self._wrap_resizable(rendered, field) return rendered except ValueError as e: logger.error(f"Error rendering field '{key}': {e}") return f'
Error rendering field "{key}": {e}
' def _wrap_resizable(self, inner_html: str, field: Dict[str, Any]) -> str: """ Wrap rendered content in a resizable container. Args: inner_html: The rendered field HTML field: The field configuration Returns: HTML wrapped in resizable container """ display_options = field.get("display_options", {}) max_height = display_options.get("max_height", 500) min_height = display_options.get("min_height", 100) style = f"max-height: {max_height}px; min-height: {min_height}px; position: relative;" return f'''
{inner_html}
''' def _process_format_file( self, file_path: str, display_type: str, field: Dict[str, Any] ) -> Any: """ Process a file using the format handler system. If the data is a file path and a format handler is available, extract the content and return FormatOutput data. Args: file_path: Path to the file to process display_type: The display type (pdf, document, etc.) field: The field configuration Returns: Either the original file_path (for client-side rendering like PDF.js) or extracted content dict for server-side rendering """ try: from potato.format_handlers import format_handler_registry except ImportError: # Format handlers not available, return original data logger.debug("Format handlers not available, using raw file path") return file_path # Check if the file path should be processed # For PDFs, we typically use client-side rendering with PDF.js # unless explicitly configured for server-side extraction display_options = field.get("display_options", {}) if display_type == "pdf": # By default, PDFs use client-side rendering (return path as-is) # If server_extract is set, use the format handler if not display_options.get("server_extract", False): return file_path # Check if format handler can handle this file if not format_handler_registry.can_handle(file_path): logger.debug(f"No format handler for {file_path}, using raw data") return file_path try: # Extract content using format handler extraction_options = display_options.get("extraction_options", {}) output = format_handler_registry.extract(file_path, options=extraction_options) # Return as dict for the display renderer return { "text": output.text, "rendered_html": output.rendered_html, "coordinate_map": output.coordinate_map, "metadata": output.metadata, "format_name": output.format_name, "source_path": output.source_path, } except Exception as e: logger.warning(f"Format handler extraction failed for {file_path}: {e}") return file_path def get_template_variables(self, instance_data: Dict[str, Any]) -> Dict[str, Any]: """ Get template variables for Jinja access. Returns a dictionary with: - display_html: The complete rendered display HTML - display_fields: Dictionary of field key -> rendered HTML - display_raw: Dictionary of field key -> raw data value - span_targets: List of field keys that are span targets - multi_span_mode: Boolean indicating if multiple span targets exist - has_instance_display: Boolean indicating if instance_display is configured Args: instance_data: The instance data dictionary Returns: Dictionary of template variables """ result = { "display_html": "", "display_fields": {}, "display_raw": {}, "span_targets": self.span_targets, "multi_span_mode": len(self.span_targets) > 1, "has_instance_display": self.has_instance_display, } if not self.has_instance_display: return result # Validate fields. A missing field here is a real config problem # (lazy-populated types like interactive_chat are already filtered # out by _validate_fields), but the renderer surfaces it inline # via ``display_error`` so the page still loads -- WARN is the # right severity, not ERROR. try: self._validate_fields(instance_data) except InstanceDisplayError as e: logger.warning(f"Field validation failed: {e}") result["display_error"] = str(e) return result # Render complete display result["display_html"] = self.render(instance_data) # Render individual fields and collect raw data for field in self.fields: key = field["key"] field_type = field["type"] data = instance_data.get(key) result["display_raw"][key] = data try: result["display_fields"][key] = display_registry.render(field_type, field, data) except ValueError as e: logger.error(f"Error rendering field '{key}': {e}") result["display_fields"][key] = f'
Error: {e}
' return result def get_span_target_fields(self) -> List[Dict[str, Any]]: """ Get the list of fields configured as span targets. Returns: List of field configuration dictionaries for span targets """ return [f for f in self.fields if f.get("span_target")] def get_primary_text_field(self) -> Optional[str]: """ Get the primary text field key for legacy compatibility. Returns the first span target if any, otherwise the first text field, otherwise None. Returns: Field key string or None """ # First, check span targets if self.span_targets: return self.span_targets[0] # Then look for any text field for field in self.fields: if field.get("type") == "text": return field["key"] return None def should_use_legacy_display(self) -> bool: """ Check if legacy display mode should be used. Returns True if no instance_display is configured, meaning the template should fall back to displaying text_key. Returns: True if legacy mode should be used """ return not self.has_instance_display def get_instance_display_renderer(config: Dict[str, Any]) -> InstanceDisplayRenderer: """ Get or create an InstanceDisplayRenderer for the given config. This is a convenience function that creates a renderer. In the future, this could cache renderers per config hash. Args: config: The configuration dictionary Returns: InstanceDisplayRenderer instance """ return InstanceDisplayRenderer(config)