codebook / potato /server_utils /schemas /text_edit.py
davidjurgens's picture
Deploy: Potato — Codebook Annotation
aceb1b2 verified
Raw
History Blame Contribute Delete
11.1 kB
"""
Inline Text Editing / Post-Edit Layout
Annotators directly edit displayed text, with the system tracking insertions,
deletions, and substitutions as a structured diff. Used for MT post-editing,
grammar correction, text simplification, and paraphrase generation.
Research: WMT post-editing shared tasks; MQM-APE (COLING 2025).
"""
import logging
from .identifier_utils import (
safe_generate_layout,
generate_element_identifier,
generate_validation_attribute,
escape_html_content,
generate_layout_attributes
)
logger = logging.getLogger(__name__)
def generate_text_edit_layout(annotation_scheme):
"""
Generate HTML for an Inline Text Editing interface.
Args:
annotation_scheme (dict): Configuration including:
- name: Schema identifier
- description: Display description
- source_field: Field in data containing text to edit
- show_diff: Whether to show real-time diff highlighting
- show_edit_distance: Whether to show edit distance counter
- allow_reset: Whether to show "Reset to original" button
Returns:
tuple: (html_string, key_bindings)
"""
return safe_generate_layout(annotation_scheme, _generate_text_edit_layout_internal)
def _generate_text_edit_layout_internal(annotation_scheme):
schema_name = annotation_scheme['name']
description = annotation_scheme['description']
source_field = annotation_scheme.get('source_field', '')
show_diff = annotation_scheme.get('show_diff', True)
show_edit_distance = annotation_scheme.get('show_edit_distance', True)
allow_reset = annotation_scheme.get('allow_reset', True)
layout_attrs = generate_layout_attributes(annotation_scheme)
validation = generate_validation_attribute(annotation_scheme)
identifiers = generate_element_identifier(schema_name, schema_name, "hidden")
reset_btn = ""
if allow_reset:
reset_btn = f"""
<button type="button" class="text-edit-reset-btn"
onclick="textEditReset('{escape_html_content(schema_name)}')">
Reset to Original
</button>
"""
diff_display = ""
if show_diff:
diff_display = f"""
<div class="text-edit-diff-display" id="{schema_name}-diff">
<div class="text-edit-diff-label">Changes:</div>
<div class="text-edit-diff-content" id="{schema_name}-diff-content"></div>
</div>
"""
edit_distance_display = ""
if show_edit_distance:
edit_distance_display = f"""
<div class="text-edit-stats" id="{schema_name}-stats">
<span class="text-edit-stat">Words changed: <strong id="{schema_name}-word-dist">0</strong></span>
<span class="text-edit-stat">Chars changed: <strong id="{schema_name}-char-dist">0</strong></span>
</div>
"""
html = f"""
<form id="{escape_html_content(schema_name)}" class="annotation-form shadcn-text-edit-container"
action="javascript:void(0)"
data-annotation-id="{escape_html_content(str(annotation_scheme.get('annotation_id', '')))}"
data-annotation-type="text_edit"
data-schema-name="{escape_html_content(schema_name)}"
data-source-field="{escape_html_content(source_field)}"
{layout_attrs}>
<fieldset schema_name="{escape_html_content(schema_name)}">
<legend class="shadcn-text-edit-title">{escape_html_content(description)}</legend>
<div class="text-edit-source-block" id="{schema_name}-source">
<div class="text-edit-source-label">Original:</div>
<div class="text-edit-source-text" id="{schema_name}-source-text"></div>
</div>
<div class="text-edit-editor-block">
<div class="text-edit-editor-label">Edit below:</div>
<textarea class="text-edit-textarea annotation-input"
id="{schema_name}-editor"
name="{identifiers['name']}"
schema="{identifiers['schema']}"
label_name="{identifiers['label_name']}"
validation="{validation}"
rows="5"
oninput="textEditOnInput('{escape_html_content(schema_name)}')"></textarea>
</div>
{edit_distance_display}
{diff_display}
<div class="text-edit-controls">
{reset_btn}
</div>
<input type="hidden"
class="annotation-input text-edit-data-input"
id="{identifiers['id']}"
name="{identifiers['name']}-data"
schema="{identifiers['schema']}"
label_name="{identifiers['label_name']}"
validation="{validation}"
value="">
</fieldset>
</form>
<script>
(function() {{
window.textEditOnInput = function(schemaName) {{
var editor = document.getElementById(schemaName + '-editor');
var sourceText = document.getElementById(schemaName + '-source-text').textContent || '';
var editedText = editor.value;
// Compute word-level edit distance
var srcWords = sourceText.trim().split(/\\s+/).filter(function(w) {{ return w; }});
var editWords = editedText.trim().split(/\\s+/).filter(function(w) {{ return w; }});
var wordDist = levenshtein(srcWords, editWords);
var charDist = levenshteinStr(sourceText, editedText);
// Update stats
var wordEl = document.getElementById(schemaName + '-word-dist');
var charEl = document.getElementById(schemaName + '-char-dist');
if (wordEl) wordEl.textContent = wordDist;
if (charEl) charEl.textContent = charDist;
// Update diff display
var diffContent = document.getElementById(schemaName + '-diff-content');
if (diffContent) {{
diffContent.innerHTML = computeWordDiff(srcWords, editWords);
}}
// Store result in hidden input
var data = JSON.stringify({{
edited_text: editedText,
original_text: sourceText,
edit_distance_chars: charDist,
edit_distance_words: wordDist
}});
var hiddenInput = document.getElementById(schemaName).querySelector('.text-edit-data-input');
hiddenInput.value = data;
hiddenInput.setAttribute('data-modified', 'true');
hiddenInput.dispatchEvent(new Event('change', {{ bubbles: true }}));
}};
window.textEditReset = function(schemaName) {{
var sourceText = document.getElementById(schemaName + '-source-text').textContent || '';
document.getElementById(schemaName + '-editor').value = sourceText;
textEditOnInput(schemaName);
}};
function levenshtein(a, b) {{
var m = a.length, n = b.length;
var dp = Array.from({{length: m + 1}}, function() {{ return new Array(n + 1).fill(0); }});
for (var i = 0; i <= m; i++) dp[i][0] = i;
for (var j = 0; j <= n; j++) dp[0][j] = j;
for (var i = 1; i <= m; i++)
for (var j = 1; j <= n; j++)
dp[i][j] = a[i-1] === b[j-1] ? dp[i-1][j-1] :
1 + Math.min(dp[i-1][j], dp[i][j-1], dp[i-1][j-1]);
return dp[m][n];
}}
function levenshteinStr(a, b) {{
// Character-level Levenshtein with optimization for long strings
var m = a.length, n = b.length;
if (m === 0) return n;
if (n === 0) return m;
// Use two-row optimization
var prev = new Array(n + 1);
var curr = new Array(n + 1);
for (var j = 0; j <= n; j++) prev[j] = j;
for (var i = 1; i <= m; i++) {{
curr[0] = i;
for (var j = 1; j <= n; j++)
curr[j] = a[i-1] === b[j-1] ? prev[j-1] :
1 + Math.min(prev[j], curr[j-1], prev[j-1]);
var tmp = prev; prev = curr; curr = tmp;
}}
return prev[n];
}}
function computeWordDiff(srcWords, editWords) {{
// Simple word diff visualization
var html = '';
var si = 0, ei = 0;
// Use LCS-based approach
var lcs = lcsWords(srcWords, editWords);
var li = 0;
si = 0; ei = 0;
while (si < srcWords.length || ei < editWords.length) {{
if (li < lcs.length && si < srcWords.length && ei < editWords.length &&
srcWords[si] === lcs[li] && editWords[ei] === lcs[li]) {{
html += '<span class="text-edit-diff-same">' + escapeHtml(lcs[li]) + '</span> ';
si++; ei++; li++;
}} else if (li < lcs.length && ei < editWords.length && editWords[ei] === lcs[li]) {{
html += '<span class="text-edit-diff-del">' + escapeHtml(srcWords[si]) + '</span> ';
si++;
}} else if (li < lcs.length && si < srcWords.length && srcWords[si] === lcs[li]) {{
html += '<span class="text-edit-diff-ins">' + escapeHtml(editWords[ei]) + '</span> ';
ei++;
}} else {{
if (si < srcWords.length) {{
html += '<span class="text-edit-diff-del">' + escapeHtml(srcWords[si]) + '</span> ';
si++;
}}
if (ei < editWords.length) {{
html += '<span class="text-edit-diff-ins">' + escapeHtml(editWords[ei]) + '</span> ';
ei++;
}}
}}
}}
return html;
}}
function lcsWords(a, b) {{
var m = a.length, n = b.length;
var dp = Array.from({{length: m + 1}}, function() {{ return new Array(n + 1).fill(0); }});
for (var i = 1; i <= m; i++)
for (var j = 1; j <= n; j++)
dp[i][j] = a[i-1] === b[j-1] ? dp[i-1][j-1] + 1 : Math.max(dp[i-1][j], dp[i][j-1]);
// Backtrack
var result = [];
var i = m, j = n;
while (i > 0 && j > 0) {{
if (a[i-1] === b[j-1]) {{ result.unshift(a[i-1]); i--; j--; }}
else if (dp[i-1][j] > dp[i][j-1]) i--;
else j--;
}}
return result;
}}
function escapeHtml(str) {{
var div = document.createElement('div');
div.textContent = str;
return div.innerHTML;
}}
}})();
</script>
"""
logger.info(f"Generated text edit layout for {schema_name}")
return html, []