""" Span Layout """ import logging from collections.abc import Mapping from collections import defaultdict from potato.ai.ai_help_wrapper import get_ai_wrapper, get_dynamic_ai_help from potato.server_utils.config_module import config from .identifier_utils import ( safe_generate_layout, generate_element_identifier, generate_validation_attribute, escape_html_content, generate_layout_attributes ) from item_state_management import SpanAnnotation logger = logging.getLogger(__name__) SPAN_COLOR_PALETTE = [ "(110, 86, 207)", # Primary purple #6E56CF "(239, 68, 68)", # Destructive red #EF4444 "(113, 113, 122)", # Gray #71717A "(245, 158, 11)", # Amber #F59E0B "(16, 185, 129)", # Success green #10B981 "(59, 130, 246)", # Blue #3B82F6 "(220, 38, 38)", # Red #DC2626 "(139, 92, 246)", # Purple #8B5CF6 "(156, 163, 175)", # Light gray #9CA3AF "(107, 114, 128)", # Medium gray #6B7280 "(55, 65, 81)", # Dark gray #374151 "(249, 115, 22)", # Orange #F97316 "(6, 182, 212)", # Cyan #06B6D4 "(236, 72, 153)", # Pink #EC4899 "(5, 150, 105)", # Dark green #059669 "(124, 58, 237)", # Violet #7C3AED "(22, 163, 74)", # Green #16A34A "(234, 88, 12)", # Dark orange #EA580C "(37, 99, 235)", # Blue #2563EB "(127, 29, 29)", # Dark red #7F1D1D "(168, 85, 247)", # Purple #A855F7 "(34, 197, 94)", # Green #22C55E ] span_counter = 0 SPAN_COLOR_PALETTE_LENGTH = len(SPAN_COLOR_PALETTE) def reset_span_counter(): """Reset the span color counter to 0. Used for test isolation.""" global span_counter span_counter = 0 def get_span_color(schema, span_label): """ Returns the color of a span with this label as a string with an RGB triple in parentheses, or None if the span is unmapped. """ if "ui" not in config or "spans" not in config["ui"]: return None span_ui = config["ui"]["spans"] if "span_colors" not in span_ui: return None if schema in span_ui["span_colors"]: schema_colors = span_ui["span_colors"][schema] if span_label in schema_colors: return schema_colors[span_label] return None def set_span_color(schema, span_label, color): """ Sets the color of a span with this label as a string with an RGB triple in parentheses. :color: a string containing an RGB triple in parentheses """ if "ui" not in config: ui = {} config["ui"] = ui else: ui = config["ui"] if "spans" not in ui: span_ui = {} ui["spans"] = span_ui else: span_ui = ui["spans"] if "span_colors" not in span_ui: span_colors = defaultdict(dict) span_ui["span_colors"] = span_colors else: span_colors = span_ui["span_colors"] # Ensure the schema key exists (span_colors may be a regular dict, not defaultdict) if schema not in span_colors: span_colors[schema] = {} span_colors[schema][span_label] = color def _generate_span_layout_internal(annotation_scheme, horizontal=False): """ Internal function to generate span layout after validation. Configuration options: allow_discontinuous (bool): Enable discontinuous span selection via Ctrl/Cmd+click. When enabled, users can hold Ctrl (Windows/Linux) or Cmd (Mac) and click to add additional non-contiguous text ranges to an existing span annotation. Default: false entity_linking (dict): Configuration for knowledge base entity linking. When enabled, users can link annotated spans to external knowledge bases like Wikidata or UMLS. Configuration options: - enabled (bool): Whether entity linking is enabled. Default: false - knowledge_bases (list): List of KB configurations, each with: - name (str): Display name for the KB - type (str): KB type ("wikidata", "umls", "rest") - api_key (str): Optional API key for authenticated services - language (str): Language code for results. Default: "en" - auto_search (bool): Automatically search when span is created. Default: true - required (bool): Require entity link before saving span. Default: false Example: entity_linking: enabled: true knowledge_bases: - name: wikidata type: wikidata language: en - name: umls type: umls api_key: ${UMLS_API_KEY} auto_search: true required: false """ import json as json_module # Initialize form wrapper scheme_name = annotation_scheme["name"] # Get target_field for multi-span support (optional) target_field = annotation_scheme.get("target_field", "") target_field_attr = f' data-target-field="{escape_html_content(target_field)}"' if target_field else "" # Check for discontinuous span support allow_discontinuous = annotation_scheme.get("allow_discontinuous", False) discontinuous_attr = ' data-allow-discontinuous="true"' if allow_discontinuous else "" # Check for entity linking support entity_linking = annotation_scheme.get("entity_linking", {}) entity_linking_enabled = entity_linking.get("enabled", False) entity_linking_attr = "" if entity_linking_enabled: # Serialize entity_linking config to JSON for frontend el_config = { "enabled": True, "knowledge_bases": entity_linking.get("knowledge_bases", []), "auto_search": entity_linking.get("auto_search", True), "required": entity_linking.get("required", False), "multi_select": entity_linking.get("multi_select", False) } el_json = json_module.dumps(el_config) entity_linking_attr = f' data-entity-linking=\'{escape_html_content(el_json)}\'' # Check for show_span_labels option (default: true) show_span_labels = annotation_scheme.get("show_span_labels", True) show_labels_attr = '' if show_span_labels else ' data-show-span-labels="false"' # Get layout attributes for grid positioning layout_attrs = generate_layout_attributes(annotation_scheme) schematic = f"""
" return schematic, key_bindings def _generate_tooltip(label_data): """ Generate tooltip HTML attribute from label data. Args: label_data (dict): Label configuration containing tooltip information Returns: str: Tooltip HTML attribute or empty string if no tooltip """ tooltip_text = "" if "tooltip" in label_data: tooltip_text = label_data["tooltip"] elif "tooltip_file" in label_data: try: with open(label_data["tooltip_file"], "rt", encoding="utf-8") as f: tooltip_text = "".join(f.readlines()) except Exception as e: logger.error(f"Failed to read tooltip file: {e}") return "" if tooltip_text: escaped_tooltip = escape_html_content(tooltip_text) return f'data-toggle="tooltip" data-html="true" data-placement="top" title="{escaped_tooltip}"' return "" def generate_span_layout(annotation_scheme, horizontal=False): """ Generate span layout HTML for the given annotation scheme. Args: annotation_scheme (dict): The annotation scheme configuration horizontal (bool): Whether to display horizontally Returns: tuple: (HTML string, key bindings list) """ return safe_generate_layout(annotation_scheme, _generate_span_layout_internal, horizontal) def render_span_annotations(text, span_annotations, target_field=None): """ Render span annotations into HTML with boundary-based algorithm. Supports discontinuous spans with additional_parts. Args: text (str): The original text to annotate span_annotations: Dictionary of span_id -> span data, or list of SpanAnnotation objects, or field-keyed dict: {field_key: [span_list]} target_field (str, optional): Filter spans to only those targeting this field Returns: str: HTML with span annotations rendered """ if not span_annotations: return text # Handle field-keyed format for multi-span mode: {field_key: [spans]} if isinstance(span_annotations, dict): # Check if this is a field-keyed dict (values are lists) first_value = next(iter(span_annotations.values()), None) if isinstance(first_value, list): # Field-keyed format - extract spans for target_field if target_field: field_spans = span_annotations.get(target_field, []) return render_span_annotations(text, field_spans, target_field=None) else: # No target field specified, flatten all spans all_spans = [] for field_spans in span_annotations.values(): all_spans.extend(field_spans) return render_span_annotations(text, all_spans, target_field=None) # Regular dict format: span_id -> span_data sorted_spans = sorted( span_annotations.items(), key=lambda x: x[1].get('start', 0) ) else: # Convert list of SpanAnnotation objects to list of tuples spans_as_tuples = [] for span in span_annotations: if hasattr(span, 'get_id'): # SpanAnnotation object with methods # Filter by target_field if specified span_target = span.get_target_field() if hasattr(span, 'get_target_field') else None if target_field and span_target and span_target != target_field: continue # Skip spans not targeting this field span_id = span.get_id() # Get additional_parts for discontinuous spans additional_parts = [] if hasattr(span, 'get_additional_parts'): additional_parts = span.get_additional_parts() or [] elif hasattr(span, 'additional_parts'): additional_parts = getattr(span, 'additional_parts', []) or [] # Get KB entity linking data kb_id = None kb_source = None kb_label = None if hasattr(span, 'get_kb_id'): kb_id = span.get_kb_id() kb_source = span.get_kb_source() if hasattr(span, 'get_kb_source') else None kb_label = span.get_kb_label() if hasattr(span, 'get_kb_label') else None elif hasattr(span, 'kb_id'): kb_id = getattr(span, 'kb_id', None) kb_source = getattr(span, 'kb_source', None) kb_label = getattr(span, 'kb_label', None) span_data = { 'schema': span.get_schema() if hasattr(span, 'get_schema') else getattr(span, 'schema', ''), 'name': span.get_name() if hasattr(span, 'get_name') else getattr(span, 'name', ''), 'title': span.get_title() if hasattr(span, 'get_title') else getattr(span, 'title', ''), 'start': span.get_start() if hasattr(span, 'get_start') else getattr(span, 'start', 0), 'end': span.get_end() if hasattr(span, 'get_end') else getattr(span, 'end', 0), 'target_field': span_target, 'additional_parts': additional_parts, 'kb_id': kb_id, 'kb_source': kb_source, 'kb_label': kb_label, } elif isinstance(span, dict): # Filter by target_field if specified span_target = span.get('target_field') if target_field and span_target and span_target != target_field: continue # Skip spans not targeting this field span_id = span.get('id', f"span_{span.get('start', 0)}_{span.get('end', 0)}") span_data = span else: continue spans_as_tuples.append((span_id, span_data)) sorted_spans = sorted(spans_as_tuples, key=lambda x: x[1].get('start', 0)) # Create boundary points (including additional_parts for discontinuous spans) boundaries = [] for span_id, span_data in sorted_spans: # Add primary span boundaries boundaries.append((span_data['start'], 'start', span_id, span_data)) boundaries.append((span_data['end'], 'end', span_id, span_data)) # Add boundaries for additional parts (discontinuous spans) additional_parts = span_data.get('additional_parts', []) for part in additional_parts: # Create a modified span_data for this part that includes discontinuous marker part_data = span_data.copy() part_data['_is_discontinuous_part'] = True boundaries.append((part['start'], 'start', span_id, part_data)) boundaries.append((part['end'], 'end', span_id, part_data)) # Sort boundaries by position boundaries.sort(key=lambda x: x[0]) # Build the rendered text result = "" current_pos = 0 active_spans = [] for pos, boundary_type, span_id, span_data in boundaries: # Add text before this boundary if pos > current_pos: result += text[current_pos:pos] if boundary_type == 'start': # Start a new span active_spans.append(span_id) # Get color for this span color = get_span_color(span_data['schema'], span_data['name']) if not color: color = "(128, 128, 128)" # Default gray # Convert RGB to hex with alpha color_parts = color.strip("()").split(", ") r, g, b = int(color_parts[0]), int(color_parts[1]), int(color_parts[2]) hex_color = f"#{r:02x}{g:02x}{b:02x}66" # 66 = 40% alpha to match label background # Add target_field attribute if present target_attr = f' data-target-field="{span_data.get("target_field", "")}"' if span_data.get("target_field") else "" # Check if this is a discontinuous span part is_discontinuous = span_data.get('_is_discontinuous_part', False) or len(span_data.get('additional_parts', [])) > 0 discontinuous_class = ' discontinuous-part' if is_discontinuous else '' discontinuous_attr = ' data-discontinuous="true"' if is_discontinuous else "" # Add KB entity linking attributes kb_id = span_data.get('kb_id', '') kb_source = span_data.get('kb_source', '') kb_label = span_data.get('kb_label', '') kb_attr = "" kb_class = "" if kb_id: kb_attr = f' data-kb-id="{escape_html_content(kb_id)}" data-kb-source="{escape_html_content(kb_source)}"' if kb_label: kb_attr += f' data-kb-label="{escape_html_content(kb_label)}"' kb_class = ' has-entity-link' result += f'' elif boundary_type == 'end': # End the span result += "" # Remove from active spans active_spans = [s for s in active_spans if s != span_id] current_pos = pos # Add remaining text if current_pos < len(text): result += text[current_pos:] return result def get_spans_for_field(span_annotations, target_field): """ Extract spans for a specific target field from span annotations. Args: span_annotations: Span annotations in any format target_field: The field key to filter by Returns: List of spans targeting the specified field """ if not span_annotations: return [] # Handle field-keyed format if isinstance(span_annotations, dict): first_value = next(iter(span_annotations.values()), None) if isinstance(first_value, list): return span_annotations.get(target_field, []) # Handle list of SpanAnnotation objects result = [] if isinstance(span_annotations, (list, tuple)): for span in span_annotations: if hasattr(span, 'get_target_field'): if span.get_target_field() == target_field: result.append(span) elif isinstance(span, dict) and span.get('target_field') == target_field: result.append(span) return result