Spaces:
Paused
Paused
File size: 15,354 Bytes
aceb1b2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 | """
Instance Display Renderer
Provides the main InstanceDisplayRenderer class that handles rendering
instance content for display, separate from annotation collection.
This module enables the new `instance_display` configuration section
that explicitly defines what content to show annotators.
Usage:
from potato.server_utils.instance_display import InstanceDisplayRenderer
renderer = InstanceDisplayRenderer(config)
html = renderer.render(instance_data)
template_vars = renderer.get_template_variables(instance_data)
"""
import html as html_module
import logging
from typing import Dict, Any, List, Optional
from .displays import display_registry
logger = logging.getLogger(__name__)
class InstanceDisplayError(Exception):
"""Exception raised when instance display rendering fails."""
pass
class InstanceDisplayRenderer:
"""
Renders instance content for display based on configuration.
This class separates content display from annotation collection,
allowing any combination of display types with any annotation schemes.
"""
def __init__(self, config: Dict[str, Any]):
"""
Initialize the renderer.
Args:
config: The full configuration dictionary
"""
self.config = config
self.display_config = config.get("instance_display", {})
self.fields = self.display_config.get("fields", [])
self.layout = self.display_config.get("layout", {})
# Extract span targets — query the registry instead of a hardcoded list
self.span_targets = [
f["key"] for f in self.fields
if f.get("span_target") and display_registry.type_supports_span_target(f.get("type", ""))
]
# Warn about span_target on unsupported types
for f in self.fields:
if f.get("span_target") and not display_registry.type_supports_span_target(f.get("type", "")):
logger.warning(
f"Field '{f.get('key')}' has span_target=true but display type "
f"'{f.get('type')}' does not support span annotation. "
f"Span annotation will not work on this field."
)
# Track if we have instance_display configured
self.has_instance_display = bool(self.fields)
logger.debug(
f"InstanceDisplayRenderer initialized: "
f"has_instance_display={self.has_instance_display}, "
f"span_targets={self.span_targets}"
)
def render(self, instance_data: Dict[str, Any]) -> str:
"""
Render all display fields for an instance.
Args:
instance_data: The instance data dictionary
Returns:
HTML string containing all rendered display fields
Raises:
InstanceDisplayError: If a required field is missing from instance data
"""
if not self.has_instance_display:
# No instance_display configured, return empty
# (legacy behavior will be handled by the template)
return ""
# Validate all required fields exist
self._validate_fields(instance_data)
# Get layout configuration
direction = self.layout.get("direction", "vertical")
gap = self.layout.get("gap", "20px")
# Build container classes and styles
container_classes = ["instance-display-container", f"layout-{direction}"]
container_style = f"gap: {gap};"
# Render each field
rendered_fields = []
for field in self.fields:
field_html = self._render_field(field, instance_data)
rendered_fields.append(field_html)
# Combine into container
fields_html = "\n".join(rendered_fields)
# Build data attributes for raw field access by annotation schemas
# Include all string/URL fields from instance data for source_field lookups
import json
raw_data = {}
for key, value in instance_data.items():
if isinstance(value, (str, int, float, bool)) or value is None:
raw_data[key] = value
raw_data_json = html_module.escape(json.dumps(raw_data))
return f'''
<div class="{' '.join(container_classes)}" style="{container_style}" data-instance-fields="{raw_data_json}">
{fields_html}
</div>
'''
def _validate_fields(self, instance_data: Dict[str, Any]) -> None:
"""
Validate that all configured fields exist in the instance data.
Fields whose display type is marked ``lazy_populated`` in the
display registry (``interactive_chat``, ``live_agent``,
``live_coding_agent``) are exempt -- their data key is expected
to be written after initial render (by a live agent session).
Args:
instance_data: The instance data dictionary
Raises:
InstanceDisplayError: If any non-lazy field is missing
"""
non_lazy = [
f for f in self.fields
if not display_registry.is_lazy_populated(f.get("type", ""))
]
missing_non_lazy = [
f["key"] for f in non_lazy if f["key"] not in instance_data
]
# Every non-lazy field missing is almost always a config/data
# key mismatch (e.g. fields reference task_description but the
# data uses task), not a transient lazy state -- make it loud so
# it isn't silently rendered as a blank page.
if non_lazy and len(missing_non_lazy) == len(non_lazy):
logger.error(
"instance_display: ALL %d non-lazy field(s) %s are absent "
"from the instance data (available keys: %s). This is "
"almost certainly a config/data key mismatch.",
len(non_lazy), missing_non_lazy,
list(instance_data.keys()),
)
for field in self.fields:
key = field["key"]
if key in instance_data:
continue
field_type = field.get("type", "")
if display_registry.is_lazy_populated(field_type):
logger.debug(
"Skipping validation for lazy-populated field '%s' (type=%s); "
"data is written after initial render.",
key, field_type,
)
continue
available = list(instance_data.keys())
raise InstanceDisplayError(
f"Display field '{key}' not found in instance data. "
f"Available fields: {available}"
)
def _render_field(self, field: Dict[str, Any], instance_data: Dict[str, Any]) -> str:
"""
Render a single display field.
Args:
field: The field configuration
instance_data: The instance data dictionary
Returns:
HTML string for the field
"""
key = field["key"]
field_type = field["type"]
data = instance_data.get(key)
# For format-based display types, process the file if data is a file path
format_display_types = ["pdf", "document", "spreadsheet", "code"]
if field_type in format_display_types and isinstance(data, str):
data = self._process_format_file(data, field_type, field)
try:
rendered = display_registry.render(field_type, field, data)
# Check if resizable is enabled (global setting or per-field override)
global_resizable = self.display_config.get("resizable", True)
field_resizable = field.get("display_options", {}).get("resizable", global_resizable)
# Wrap with resizable container if enabled
if field_resizable:
rendered = self._wrap_resizable(rendered, field)
return rendered
except ValueError as e:
logger.error(f"Error rendering field '{key}': {e}")
return f'<div class="display-error">Error rendering field "{key}": {e}</div>'
def _wrap_resizable(self, inner_html: str, field: Dict[str, Any]) -> str:
"""
Wrap rendered content in a resizable container.
Args:
inner_html: The rendered field HTML
field: The field configuration
Returns:
HTML wrapped in resizable container
"""
display_options = field.get("display_options", {})
max_height = display_options.get("max_height", 500)
min_height = display_options.get("min_height", 100)
style = f"max-height: {max_height}px; min-height: {min_height}px; position: relative;"
return f'''<div class="display-field-resizable" style="{style}">
{inner_html}
</div>'''
def _process_format_file(
self,
file_path: str,
display_type: str,
field: Dict[str, Any]
) -> Any:
"""
Process a file using the format handler system.
If the data is a file path and a format handler is available,
extract the content and return FormatOutput data.
Args:
file_path: Path to the file to process
display_type: The display type (pdf, document, etc.)
field: The field configuration
Returns:
Either the original file_path (for client-side rendering like PDF.js)
or extracted content dict for server-side rendering
"""
try:
from potato.format_handlers import format_handler_registry
except ImportError:
# Format handlers not available, return original data
logger.debug("Format handlers not available, using raw file path")
return file_path
# Check if the file path should be processed
# For PDFs, we typically use client-side rendering with PDF.js
# unless explicitly configured for server-side extraction
display_options = field.get("display_options", {})
if display_type == "pdf":
# By default, PDFs use client-side rendering (return path as-is)
# If server_extract is set, use the format handler
if not display_options.get("server_extract", False):
return file_path
# Check if format handler can handle this file
if not format_handler_registry.can_handle(file_path):
logger.debug(f"No format handler for {file_path}, using raw data")
return file_path
try:
# Extract content using format handler
extraction_options = display_options.get("extraction_options", {})
output = format_handler_registry.extract(file_path, options=extraction_options)
# Return as dict for the display renderer
return {
"text": output.text,
"rendered_html": output.rendered_html,
"coordinate_map": output.coordinate_map,
"metadata": output.metadata,
"format_name": output.format_name,
"source_path": output.source_path,
}
except Exception as e:
logger.warning(f"Format handler extraction failed for {file_path}: {e}")
return file_path
def get_template_variables(self, instance_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Get template variables for Jinja access.
Returns a dictionary with:
- display_html: The complete rendered display HTML
- display_fields: Dictionary of field key -> rendered HTML
- display_raw: Dictionary of field key -> raw data value
- span_targets: List of field keys that are span targets
- multi_span_mode: Boolean indicating if multiple span targets exist
- has_instance_display: Boolean indicating if instance_display is configured
Args:
instance_data: The instance data dictionary
Returns:
Dictionary of template variables
"""
result = {
"display_html": "",
"display_fields": {},
"display_raw": {},
"span_targets": self.span_targets,
"multi_span_mode": len(self.span_targets) > 1,
"has_instance_display": self.has_instance_display,
}
if not self.has_instance_display:
return result
# Validate fields. A missing field here is a real config problem
# (lazy-populated types like interactive_chat are already filtered
# out by _validate_fields), but the renderer surfaces it inline
# via ``display_error`` so the page still loads -- WARN is the
# right severity, not ERROR.
try:
self._validate_fields(instance_data)
except InstanceDisplayError as e:
logger.warning(f"Field validation failed: {e}")
result["display_error"] = str(e)
return result
# Render complete display
result["display_html"] = self.render(instance_data)
# Render individual fields and collect raw data
for field in self.fields:
key = field["key"]
field_type = field["type"]
data = instance_data.get(key)
result["display_raw"][key] = data
try:
result["display_fields"][key] = display_registry.render(field_type, field, data)
except ValueError as e:
logger.error(f"Error rendering field '{key}': {e}")
result["display_fields"][key] = f'<div class="display-error">Error: {e}</div>'
return result
def get_span_target_fields(self) -> List[Dict[str, Any]]:
"""
Get the list of fields configured as span targets.
Returns:
List of field configuration dictionaries for span targets
"""
return [f for f in self.fields if f.get("span_target")]
def get_primary_text_field(self) -> Optional[str]:
"""
Get the primary text field key for legacy compatibility.
Returns the first span target if any, otherwise the first text field,
otherwise None.
Returns:
Field key string or None
"""
# First, check span targets
if self.span_targets:
return self.span_targets[0]
# Then look for any text field
for field in self.fields:
if field.get("type") == "text":
return field["key"]
return None
def should_use_legacy_display(self) -> bool:
"""
Check if legacy display mode should be used.
Returns True if no instance_display is configured, meaning
the template should fall back to displaying text_key.
Returns:
True if legacy mode should be used
"""
return not self.has_instance_display
def get_instance_display_renderer(config: Dict[str, Any]) -> InstanceDisplayRenderer:
"""
Get or create an InstanceDisplayRenderer for the given config.
This is a convenience function that creates a renderer.
In the future, this could cache renderers per config hash.
Args:
config: The configuration dictionary
Returns:
InstanceDisplayRenderer instance
"""
return InstanceDisplayRenderer(config)
|