codebook / potato /server_utils /instance_display.py
davidjurgens's picture
Deploy: Potato — Codebook Annotation
aceb1b2 verified
Raw
History Blame Contribute Delete
15.4 kB
"""
Instance Display Renderer
Provides the main InstanceDisplayRenderer class that handles rendering
instance content for display, separate from annotation collection.
This module enables the new `instance_display` configuration section
that explicitly defines what content to show annotators.
Usage:
from potato.server_utils.instance_display import InstanceDisplayRenderer
renderer = InstanceDisplayRenderer(config)
html = renderer.render(instance_data)
template_vars = renderer.get_template_variables(instance_data)
"""
import html as html_module
import logging
from typing import Dict, Any, List, Optional
from .displays import display_registry
logger = logging.getLogger(__name__)
class InstanceDisplayError(Exception):
"""Exception raised when instance display rendering fails."""
pass
class InstanceDisplayRenderer:
"""
Renders instance content for display based on configuration.
This class separates content display from annotation collection,
allowing any combination of display types with any annotation schemes.
"""
def __init__(self, config: Dict[str, Any]):
"""
Initialize the renderer.
Args:
config: The full configuration dictionary
"""
self.config = config
self.display_config = config.get("instance_display", {})
self.fields = self.display_config.get("fields", [])
self.layout = self.display_config.get("layout", {})
# Extract span targets — query the registry instead of a hardcoded list
self.span_targets = [
f["key"] for f in self.fields
if f.get("span_target") and display_registry.type_supports_span_target(f.get("type", ""))
]
# Warn about span_target on unsupported types
for f in self.fields:
if f.get("span_target") and not display_registry.type_supports_span_target(f.get("type", "")):
logger.warning(
f"Field '{f.get('key')}' has span_target=true but display type "
f"'{f.get('type')}' does not support span annotation. "
f"Span annotation will not work on this field."
)
# Track if we have instance_display configured
self.has_instance_display = bool(self.fields)
logger.debug(
f"InstanceDisplayRenderer initialized: "
f"has_instance_display={self.has_instance_display}, "
f"span_targets={self.span_targets}"
)
def render(self, instance_data: Dict[str, Any]) -> str:
"""
Render all display fields for an instance.
Args:
instance_data: The instance data dictionary
Returns:
HTML string containing all rendered display fields
Raises:
InstanceDisplayError: If a required field is missing from instance data
"""
if not self.has_instance_display:
# No instance_display configured, return empty
# (legacy behavior will be handled by the template)
return ""
# Validate all required fields exist
self._validate_fields(instance_data)
# Get layout configuration
direction = self.layout.get("direction", "vertical")
gap = self.layout.get("gap", "20px")
# Build container classes and styles
container_classes = ["instance-display-container", f"layout-{direction}"]
container_style = f"gap: {gap};"
# Render each field
rendered_fields = []
for field in self.fields:
field_html = self._render_field(field, instance_data)
rendered_fields.append(field_html)
# Combine into container
fields_html = "\n".join(rendered_fields)
# Build data attributes for raw field access by annotation schemas
# Include all string/URL fields from instance data for source_field lookups
import json
raw_data = {}
for key, value in instance_data.items():
if isinstance(value, (str, int, float, bool)) or value is None:
raw_data[key] = value
raw_data_json = html_module.escape(json.dumps(raw_data))
return f'''
<div class="{' '.join(container_classes)}" style="{container_style}" data-instance-fields="{raw_data_json}">
{fields_html}
</div>
'''
def _validate_fields(self, instance_data: Dict[str, Any]) -> None:
"""
Validate that all configured fields exist in the instance data.
Fields whose display type is marked ``lazy_populated`` in the
display registry (``interactive_chat``, ``live_agent``,
``live_coding_agent``) are exempt -- their data key is expected
to be written after initial render (by a live agent session).
Args:
instance_data: The instance data dictionary
Raises:
InstanceDisplayError: If any non-lazy field is missing
"""
non_lazy = [
f for f in self.fields
if not display_registry.is_lazy_populated(f.get("type", ""))
]
missing_non_lazy = [
f["key"] for f in non_lazy if f["key"] not in instance_data
]
# Every non-lazy field missing is almost always a config/data
# key mismatch (e.g. fields reference task_description but the
# data uses task), not a transient lazy state -- make it loud so
# it isn't silently rendered as a blank page.
if non_lazy and len(missing_non_lazy) == len(non_lazy):
logger.error(
"instance_display: ALL %d non-lazy field(s) %s are absent "
"from the instance data (available keys: %s). This is "
"almost certainly a config/data key mismatch.",
len(non_lazy), missing_non_lazy,
list(instance_data.keys()),
)
for field in self.fields:
key = field["key"]
if key in instance_data:
continue
field_type = field.get("type", "")
if display_registry.is_lazy_populated(field_type):
logger.debug(
"Skipping validation for lazy-populated field '%s' (type=%s); "
"data is written after initial render.",
key, field_type,
)
continue
available = list(instance_data.keys())
raise InstanceDisplayError(
f"Display field '{key}' not found in instance data. "
f"Available fields: {available}"
)
def _render_field(self, field: Dict[str, Any], instance_data: Dict[str, Any]) -> str:
"""
Render a single display field.
Args:
field: The field configuration
instance_data: The instance data dictionary
Returns:
HTML string for the field
"""
key = field["key"]
field_type = field["type"]
data = instance_data.get(key)
# For format-based display types, process the file if data is a file path
format_display_types = ["pdf", "document", "spreadsheet", "code"]
if field_type in format_display_types and isinstance(data, str):
data = self._process_format_file(data, field_type, field)
try:
rendered = display_registry.render(field_type, field, data)
# Check if resizable is enabled (global setting or per-field override)
global_resizable = self.display_config.get("resizable", True)
field_resizable = field.get("display_options", {}).get("resizable", global_resizable)
# Wrap with resizable container if enabled
if field_resizable:
rendered = self._wrap_resizable(rendered, field)
return rendered
except ValueError as e:
logger.error(f"Error rendering field '{key}': {e}")
return f'<div class="display-error">Error rendering field "{key}": {e}</div>'
def _wrap_resizable(self, inner_html: str, field: Dict[str, Any]) -> str:
"""
Wrap rendered content in a resizable container.
Args:
inner_html: The rendered field HTML
field: The field configuration
Returns:
HTML wrapped in resizable container
"""
display_options = field.get("display_options", {})
max_height = display_options.get("max_height", 500)
min_height = display_options.get("min_height", 100)
style = f"max-height: {max_height}px; min-height: {min_height}px; position: relative;"
return f'''<div class="display-field-resizable" style="{style}">
{inner_html}
</div>'''
def _process_format_file(
self,
file_path: str,
display_type: str,
field: Dict[str, Any]
) -> Any:
"""
Process a file using the format handler system.
If the data is a file path and a format handler is available,
extract the content and return FormatOutput data.
Args:
file_path: Path to the file to process
display_type: The display type (pdf, document, etc.)
field: The field configuration
Returns:
Either the original file_path (for client-side rendering like PDF.js)
or extracted content dict for server-side rendering
"""
try:
from potato.format_handlers import format_handler_registry
except ImportError:
# Format handlers not available, return original data
logger.debug("Format handlers not available, using raw file path")
return file_path
# Check if the file path should be processed
# For PDFs, we typically use client-side rendering with PDF.js
# unless explicitly configured for server-side extraction
display_options = field.get("display_options", {})
if display_type == "pdf":
# By default, PDFs use client-side rendering (return path as-is)
# If server_extract is set, use the format handler
if not display_options.get("server_extract", False):
return file_path
# Check if format handler can handle this file
if not format_handler_registry.can_handle(file_path):
logger.debug(f"No format handler for {file_path}, using raw data")
return file_path
try:
# Extract content using format handler
extraction_options = display_options.get("extraction_options", {})
output = format_handler_registry.extract(file_path, options=extraction_options)
# Return as dict for the display renderer
return {
"text": output.text,
"rendered_html": output.rendered_html,
"coordinate_map": output.coordinate_map,
"metadata": output.metadata,
"format_name": output.format_name,
"source_path": output.source_path,
}
except Exception as e:
logger.warning(f"Format handler extraction failed for {file_path}: {e}")
return file_path
def get_template_variables(self, instance_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Get template variables for Jinja access.
Returns a dictionary with:
- display_html: The complete rendered display HTML
- display_fields: Dictionary of field key -> rendered HTML
- display_raw: Dictionary of field key -> raw data value
- span_targets: List of field keys that are span targets
- multi_span_mode: Boolean indicating if multiple span targets exist
- has_instance_display: Boolean indicating if instance_display is configured
Args:
instance_data: The instance data dictionary
Returns:
Dictionary of template variables
"""
result = {
"display_html": "",
"display_fields": {},
"display_raw": {},
"span_targets": self.span_targets,
"multi_span_mode": len(self.span_targets) > 1,
"has_instance_display": self.has_instance_display,
}
if not self.has_instance_display:
return result
# Validate fields. A missing field here is a real config problem
# (lazy-populated types like interactive_chat are already filtered
# out by _validate_fields), but the renderer surfaces it inline
# via ``display_error`` so the page still loads -- WARN is the
# right severity, not ERROR.
try:
self._validate_fields(instance_data)
except InstanceDisplayError as e:
logger.warning(f"Field validation failed: {e}")
result["display_error"] = str(e)
return result
# Render complete display
result["display_html"] = self.render(instance_data)
# Render individual fields and collect raw data
for field in self.fields:
key = field["key"]
field_type = field["type"]
data = instance_data.get(key)
result["display_raw"][key] = data
try:
result["display_fields"][key] = display_registry.render(field_type, field, data)
except ValueError as e:
logger.error(f"Error rendering field '{key}': {e}")
result["display_fields"][key] = f'<div class="display-error">Error: {e}</div>'
return result
def get_span_target_fields(self) -> List[Dict[str, Any]]:
"""
Get the list of fields configured as span targets.
Returns:
List of field configuration dictionaries for span targets
"""
return [f for f in self.fields if f.get("span_target")]
def get_primary_text_field(self) -> Optional[str]:
"""
Get the primary text field key for legacy compatibility.
Returns the first span target if any, otherwise the first text field,
otherwise None.
Returns:
Field key string or None
"""
# First, check span targets
if self.span_targets:
return self.span_targets[0]
# Then look for any text field
for field in self.fields:
if field.get("type") == "text":
return field["key"]
return None
def should_use_legacy_display(self) -> bool:
"""
Check if legacy display mode should be used.
Returns True if no instance_display is configured, meaning
the template should fall back to displaying text_key.
Returns:
True if legacy mode should be used
"""
return not self.has_instance_display
def get_instance_display_renderer(config: Dict[str, Any]) -> InstanceDisplayRenderer:
"""
Get or create an InstanceDisplayRenderer for the given config.
This is a convenience function that creates a renderer.
In the future, this could cache renderers per config hash.
Args:
config: The configuration dictionary
Returns:
InstanceDisplayRenderer instance
"""
return InstanceDisplayRenderer(config)