""" Handle all front-end related functionalities. """ import base64 import os import logging import json import re import hashlib from collections import OrderedDict #add local module from pathlib import Path import sys path_root = Path(__file__).parents[2] sys.path.append(str(path_root)) from potato.server_utils.config_module import config from potato.server_utils.schemas.registry import schema_registry from potato.server_utils.schemas.keybinding_allocator import allocate_keybindings logger = logging.getLogger(__name__) # TODO: Move this to config.yaml files # Items which will be displayed in the popup statistics sidebar STATS_KEYS = { "Annotated instances": "Annotated instances", "Total working time": "Total working time", "Average time on each instance": "Average time on each instance", "Agreement": "Agreement", } # Default name for the generated annotation layout file DEFAULT_ANNOTATION_LAYOUT_SUBDIR = "layouts" DEFAULT_ANNOTATION_LAYOUT_FILENAME = "task_layout.html" SUPPORTED_HEADER_LOGO_EXTENSIONS = {'.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.webp'} EXTENSION_TO_MIME = { '.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.gif': 'image/gif', '.svg': 'image/svg+xml', '.ico': 'image/x-icon', '.webp': 'image/webp', } def resolve_header_logo_src(config: dict) -> str: """ Resolve the ``header_logo`` config value into a src URL for an ```` tag. - If not configured, returns ``""``. - If the value is an HTTP(S) URL, returns it directly. - Otherwise, reads the local file, base64-encodes it, and returns a data URL. Returns: A URL string suitable for ````, or ``""`` if not configured. """ logo_path = config.get("header_logo") if not logo_path: return "" # Pass through external URLs if logo_path.startswith(("http://", "https://")): return logo_path try: resolved = resolve_project_asset_path(config, logo_path) except FileNotFoundError: logger.warning("header_logo file not found: %s", logo_path) return "" ext = os.path.splitext(resolved)[1].lower() if ext not in SUPPORTED_HEADER_LOGO_EXTENSIONS: logger.warning("header_logo has unsupported extension '%s' (supported: %s)", ext, ', '.join(sorted(SUPPORTED_HEADER_LOGO_EXTENSIONS))) return "" mime = EXTENSION_TO_MIME[ext] with open(resolved, "rb") as f: encoded = base64.b64encode(f.read()).decode("ascii") return f"data:{mime};base64,{encoded}" def resolve_project_asset_path(config: dict, relative_path: str) -> str: """ Resolve a project-relative asset path using the config file directory as base. Args: config: The configuration dict (must contain ``__config_file__``) relative_path: The path as specified in the config (absolute or relative) Returns: Absolute path to the resolved file Raises: FileNotFoundError: If the file does not exist at the resolved path """ if os.path.isabs(relative_path) and os.path.exists(relative_path): return relative_path if os.path.exists(relative_path): return os.path.abspath(relative_path) # Resolve relative to the config file's directory config_file = config.get("__config_file__", "") if config_file: real_path = os.path.realpath(config_file) dir_path = os.path.dirname(real_path) abs_path = os.path.join(dir_path, relative_path) if os.path.exists(abs_path): return abs_path raise FileNotFoundError(f"Project asset file not found: {relative_path}") def load_project_base_css_html(config: dict) -> str: """ Load the project-level ``base_css`` file and return it wrapped in a ``' def _stringify_dict_keys(value): """ Recursively convert all dict keys to strings so the structure can be JSON-serialized with ``sort_keys=True``. YAML 1.1 parses unquoted ``yes``/``no``/``true``/``false`` as booleans, so a config dict can end up with mixed key types (e.g. ``str`` and ``bool``). ``json.dumps(..., sort_keys=True)`` then raises ``TypeError`` because it cannot order keys of different types. Coercing keys to strings up front makes hashing robust regardless of how the config was authored or merged. """ if isinstance(value, dict): return {str(k): _stringify_dict_keys(v) for k, v in value.items()} if isinstance(value, list): return [_stringify_dict_keys(v) for v in value] if isinstance(value, tuple): return tuple(_stringify_dict_keys(v) for v in value) return value def compute_config_md5(config): """ Compute MD5 hash of the config dict for template invalidation. """ # Remove unserializable fields if needed config_copy = {k: v for k, v in config.items() if k not in ['__config_file__', 'site_file']} normalized = _stringify_dict_keys(config_copy) config_str = json.dumps(normalized, sort_keys=True, default=str) return hashlib.md5(config_str.encode('utf-8')).hexdigest() def generate_annotation_layout_file(config: dict, annotation_schemes: list[dict], layout_name: str = None) -> str: """ Generate a dedicated annotation layout file in the task directory under layouts/task_layout.html. If layout_name is provided, uses task_layout_{layout_name}.html instead. """ task_dir = config.get("task_dir") if not task_dir: raise ValueError("task_dir is required in config to generate annotation layout file") # Ensure task directory and layouts subdirectory exist layout_dir = os.path.join(task_dir, DEFAULT_ANNOTATION_LAYOUT_SUBDIR) if not os.path.exists(layout_dir): os.makedirs(layout_dir) # Generate the layout file path filename = f"task_layout_{layout_name}.html" if layout_name else DEFAULT_ANNOTATION_LAYOUT_FILENAME layout_file_path = os.path.join(layout_dir, filename) # Generate the HTML layout content schema_layouts = "" all_keybindings = [] for annotation_scheme in annotation_schemes: schema_layout, keybindings = generate_schematic(annotation_scheme) schema_layouts += schema_layout + "\n" all_keybindings.extend(keybindings) # Compute combined hash (config + schema content) for cache invalidation config_hash = compute_config_md5(config) schema_content_hash = hashlib.md5() schema_content_hash.update(schema_layouts.encode('utf-8')) combined_hash = f"{config_hash}_{schema_content_hash.hexdigest()}" # Create the layout HTML content with combined hash at the top layout_content = f"""
{schema_layouts}
""" # Write the layout file with open(layout_file_path, "wt", encoding="utf-8") as outf: outf.write(layout_content) logger.info(f"Generated annotation layout file: {layout_file_path}") return layout_file_path def get_or_generate_annotation_layout(config: dict, annotation_schemes: list[dict], layout_name: str = None) -> str: """ Get the annotation layout file path, generating it if it doesn't exist or if the config hash has changed. If layout_name is provided, uses task_layout_{layout_name}.html instead. """ task_dir = config.get("task_dir") if not task_dir: raise ValueError("task_dir is required in config") layout_dir = os.path.join(task_dir, DEFAULT_ANNOTATION_LAYOUT_SUBDIR) filename = f"task_layout_{layout_name}.html" if layout_name else DEFAULT_ANNOTATION_LAYOUT_FILENAME layout_file_path = os.path.join(layout_dir, filename) config_hash = compute_config_md5(config) # Also hash the actual generated schema content to detect code changes # (e.g., if bws.py changes separators, the config hash won't change but the output will) # NOTE: must match the concatenation format used in generate_annotation_layout_file # (each layout followed by "\n") so hashes are consistent schema_content_hash = hashlib.md5() schema_layouts = "" for annotation_scheme in annotation_schemes: layout_html, _ = generate_schematic(annotation_scheme) schema_layouts += layout_html + "\n" schema_content_hash.update(schema_layouts.encode('utf-8')) combined_hash = f"{config_hash}_{schema_content_hash.hexdigest()}" # Check if the layout file already exists and if the hash matches if os.path.exists(layout_file_path): with open(layout_file_path, "rt", encoding="utf-8") as f: for _ in range(2): # Only need to check the first two lines line = f.readline() if line.startswith("', '').strip() if file_hash == combined_hash: logger.info(f"Using existing annotation layout file: {layout_file_path} (hash match)") return layout_file_path else: logger.info(f"Hash mismatch (config or schema code changed), regenerating: {layout_file_path}") break # Generate the layout file if it doesn't exist or hash mismatches logger.info(f"Annotation layout file not found or hash mismatch, generating: {layout_file_path}") return generate_annotation_layout_file(config, annotation_schemes, layout_name=layout_name) def generate_schematic(annotation_scheme): """ Based on the task's yaml configuration, generate the full HTML site needed to annotate the tasks's data. Uses the schema registry to look up the generator function for the annotation type. """ # Ensure annotation_id is set before any schema generator runs. # This is the single bottleneck before all generators, so it serves as a # safety net for any caller that doesn't pre-assign annotation_id. if "annotation_id" not in annotation_scheme: annotation_scheme["annotation_id"] = 0 # Figure out which kind of tasks we're doing and build the input frame annotation_type = annotation_scheme["annotation_type"] # Use the schema registry to get the generator return schema_registry.generate(annotation_scheme) def generate_keybindings_sidebar(config, keybindings, horizontal=False): """ Generate an HTML layout for the end-user of the keybindings for the current task. The layout is intended to be displayed in a side bar or on the annotation page if fixed_keybinding_layout.html is used as the layout """ if config.get("horizontal_key_bindings"): horizontal = True if not keybindings: return "" if horizontal: keybindings = [[it[0], it[1].split(":")[-1]] for it in keybindings] lines = list(zip(*keybindings)) layout = '' for line in lines: layout += ( "" + "".join(["" % it for it in line]) + "" ) layout += "
  %s  
" else: layout = "" for key, desc in keybindings: layout += '' % (key, desc) layout += "
KeyDescription
%s%s
" return layout def generate_statistics_sidebar(statistics): """ Generate an HTML layout for the end-user of the statistics for the current task. The layout is intended to be displayed in a side bar """ layout = "" for key in statistics: desc = "{{statistics_nav['%s']}}" % statistics[key] layout += '' % (key, desc) layout += "
%s%s
" return layout def generate_annotation_html_template(config: dict) -> str: """ Generates the full HTML file in site/ for annotating this tasks data, combining the various templates with the annotation specification in the yaml file and returns the path to the HTML template for this annotation task. """ logger.info("Generating anntotation site at %s" % config["site_dir"]) # # Stage 1: Construct the core HTML file devoid the annotation-specific content # # Use hardcoded template paths - no longer configurable cur_program_dir = os.path.dirname(os.path.abspath(__file__)) html_template_file = os.path.join(cur_program_dir, '..', 'templates', 'base_template_v2.html') header_file = os.path.join(cur_program_dir, '..', 'templates', 'header.html') logger.debug(f"Reading html annotation template: {html_template_file}") if not os.path.exists(html_template_file): raise FileNotFoundError("html_template_file not found: %s" % html_template_file) with open(html_template_file, "rt", encoding="utf-8") as file_p: html_template = "".join(file_p.readlines()) # Load the header content we'll stuff in the template, which has scripts # and assets we'll need logger.debug("Reading html header %s" % header_file) if not os.path.exists(header_file): raise FileNotFoundError("header_file not found: %s" % header_file) with open(header_file, "rt", encoding="utf-8") as file_p: header = "".join(file_p.readlines()) html_template = html_template.replace("{{ HEADER }}", header) if config.get("hide_navbar"): html_template = html_template.replace( '