File size: 18,750 Bytes
78431ff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 | <!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Polyscriptor HTR</title>
<link rel="stylesheet" href="/static/app.css">
</head>
<body>
<!-- Header -->
<header id="header">
<div class="header-left">
<span class="header-logo">⬑</span>
<h1>Polyscriptor <span class="header-sub">HTR</span></h1>
</div>
<div class="header-right">
<div id="gpu-status" class="gpu-widget"></div>
<button id="btn-help" class="btn-icon" title="Help">?</button>
</div>
</header>
<!-- Main 3-column layout -->
<main id="app">
<!-- Left: Engine + Image controls -->
<aside id="engine-panel" class="panel" data-panel="settings">
<section class="panel-section">
<h2>HTR Engine</h2>
<label for="engine-select">Engine</label>
<select id="engine-select" disabled>
<option>Loading enginesβ¦</option>
</select>
<p id="engine-description" class="muted"></p>
<div id="config-form"></div>
<div id="kraken-preset-row" class="hidden" style="margin-top:8px">
<label for="kraken-preset-select" style="display:block;font-size:0.78rem;margin-bottom:3px">Kraken Model Preset</label>
<select id="kraken-preset-select" style="width:100%">
<option value="">Loading presetsβ¦</option>
</select>
<span id="kraken-preset-status" class="muted" style="font-size:0.72rem;display:block;margin-top:3px"></span>
</div>
<button id="btn-load-model" class="btn btn-primary" disabled>Load Model</button>
<div id="engine-status" class="status-badge hidden"></div>
</section>
<hr>
<section class="panel-section">
<h2>Image</h2>
<div id="upload-area" class="upload-area">
<svg class="upload-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
<path stroke-linecap="round" stroke-linejoin="round"
d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5m-13.5-9L12 3m0 0l4.5 4.5M12 3v13.5"/>
</svg>
<p>Drop image or PDF, or click to browse</p>
<input type="file" id="file-input" accept="image/*,.pdf" multiple hidden>
</div>
<p id="image-info" class="muted"></p>
<div id="batch-queue-section" class="hidden">
<div class="batch-queue-header">
<span class="section-label">Queue</span>
<span id="batch-overall-progress" class="batch-overall-progress hidden"></span>
</div>
<div id="batch-list"></div>
<div class="batch-options-row">
<label class="checkbox-label" title="Use PAGE XML segmentation if a matching .xml file was uploaded for this image">
<input type="checkbox" id="batch-use-pagexml" checked>
Use PAGE XML
</label>
<label class="checkbox-label" title="Skip images that have already been transcribed in this session">
<input type="checkbox" id="batch-resume">
Resume
</label>
</div>
<div class="btn-row" style="margin-top:6px">
<button id="btn-process-batch" class="btn btn-primary btn-small">Process All</button>
<button id="btn-clear-batch" class="btn btn-small btn-outline">Clear</button>
</div>
<div id="batch-export-row" class="btn-row hidden" style="margin-top:6px">
<button id="btn-export-batch-txt" class="btn btn-small">All TXT</button>
<button id="btn-export-batch-csv" class="btn btn-small">All CSV</button>
<button id="btn-export-batch-txt-zip" class="btn btn-small btn-primary">Download ZIP (TXT)</button>
<button id="btn-export-batch-thinking-zip" class="btn btn-small btn-primary">Download ZIP (Thinking)</button>
<button id="btn-export-batch-xml" class="btn btn-small btn-primary">Download ZIP (XML)</button>
</div>
</div>
<div id="xml-upload-row" class="xml-row hidden">
<span id="xml-status" class="muted">No PAGE XML</span>
<label class="btn btn-small btn-outline" for="xml-input">
Upload XML
<input type="file" id="xml-input" accept=".xml" hidden multiple>
</label>
</div>
</section>
<hr>
<section class="panel-section" id="seg-controls">
<h2>Segmentation</h2>
<label for="seg-method">Method</label>
<select id="seg-method">
<option value="kraken" selected>Kraken Classical</option>
<option value="hpp">HPP / projection profile fallback</option>
<option value="kraken-blla" disabled>Kraken Neural / blla (server only)</option>
</select>
<label for="seg-device">Device</label>
<select id="seg-device">
<option value="cpu">CPU</option>
<option value="cuda:0">GPU 0</option>
<option value="cuda:1">GPU 1</option>
</select>
<div id="blla-options" style="display:none">
<div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap">
<div style="display:flex;flex-direction:column;gap:3px">
<label for="seg-max-columns">Max columns</label>
<input type="number" id="seg-max-columns" min="1" max="12" value="6" style="width:60px">
</div>
<div style="display:flex;flex-direction:column;gap:3px">
<label for="seg-split-width">Split width %</label>
<input type="number" id="seg-split-width" min="5" max="80" value="40" step="5" style="width:60px" title="Min region width (% of page) to trigger sub-column splitting. Lower = split narrower regions. Double pages: try 20.">
</div>
</div>
<div style="margin-top:6px">
<label for="seg-text-direction">Reading direction</label>
<select id="seg-text-direction" title="Controls column reading order. Use horizontal-rl for Arabic, Ottoman, Hebrew manuscripts.">
<option value="horizontal-lr">LTR (Latin, Cyrillic, β¦)</option>
<option value="horizontal-rl">RTL (Arabic, Ottoman, Hebrew, β¦)</option>
<option value="vertical-lr">Vertical LTR</option>
<option value="vertical-rl">Vertical RTL</option>
</select>
</div>
</div>
</section>
<div id="seg-regions-list" class="hidden"></div>
<div class="panel-footer">
<div class="btn-row footer-btn-row">
<button id="btn-segment" class="btn btn-outline" disabled title="Preview line segmentation without transcribing">Segment</button>
<button id="btn-transcribe" class="btn btn-accent" disabled>Transcribe</button>
</div>
</div>
</aside>
<div class="panel-resize-handle" id="resize-left" title="Drag to resize"></div>
<!-- Center: Image viewer -->
<section id="viewer-panel" class="panel" data-panel="image">
<!-- Zoom toolbar β only visible when image is loaded -->
<div id="zoom-toolbar" class="zoom-toolbar hidden">
<button class="zoom-btn" id="btn-zoom-out" title="Zoom out">β</button>
<span id="zoom-level" class="zoom-level">100%</span>
<button class="zoom-btn" id="btn-zoom-in" title="Zoom in">+</button>
<button class="zoom-btn zoom-fit" id="btn-zoom-fit" title="Fit to view">β‘</button>
<span class="zoom-toolbar-sep"></span>
<button class="btn btn-small btn-outline nav-btn" id="btn-nav-prev" title="Previous image (β)" disabled>βΉ Prev</button>
<span id="batch-nav-label" class="batch-nav-label-toolbar"></span>
<button class="btn btn-small btn-outline nav-btn" id="btn-nav-next" title="Next image (β)" disabled>Next βΊ</button>
</div>
<!-- Scroll area fills remaining height -->
<div id="viewer-scroll">
<div id="viewer-placeholder" class="viewer-placeholder">
<svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1">
<rect x="3" y="3" width="18" height="18" rx="2"/>
<circle cx="8.5" cy="8.5" r="1.5"/>
<path stroke-linecap="round" stroke-linejoin="round" d="M21 15l-5-5L5 21"/>
</svg>
<p>Upload an image to begin</p>
</div>
<div id="image-container" class="hidden">
<img id="page-image">
<canvas id="overlay-canvas"></canvas>
</div>
</div>
</section>
<div class="panel-resize-handle" id="resize-right" title="Drag to resize"></div>
<!-- Right: Transcription results -->
<section id="results-panel" class="panel" data-panel="results">
<div class="results-header">
<div class="results-header-row">
<h2>Transcription</h2>
<div class="results-header-controls">
<select id="font-select" class="font-select" title="Transcription font"></select>
<button id="btn-col-layout" class="btn-icon hidden" title="Toggle column layout">β</button>
</div>
</div>
<div id="results-search-row" class="results-search-row hidden">
<input type="search" id="results-search" placeholder="Search linesβ¦" autocomplete="off">
<span id="results-search-count" class="muted"></span>
</div>
<div id="conf-filter-row" class="conf-filter-row hidden">
<label>Min conf: <strong id="conf-threshold-val">0%</strong></label>
<input type="range" id="conf-threshold" min="0" max="100" value="0" step="5">
</div>
<div id="progress-container" class="hidden">
<div id="progress-bar"><div id="progress-fill"></div></div>
<div class="progress-row">
<p id="progress-text" class="muted">0 / 0 lines</p>
<button id="btn-cancel" class="btn btn-small hidden">Cancel</button>
</div>
</div>
</div>
<div id="transcription-lines"></div>
<div id="results-footer" class="hidden">
<p id="results-summary" class="muted"></p>
<div class="btn-row">
<button id="btn-copy-text" class="btn btn-small">Copy Text</button>
<button id="btn-export-txt" class="btn btn-small">TXT</button>
<button id="btn-export-csv" class="btn btn-small">CSV</button>
<button id="btn-export-xml" class="btn btn-small hidden">XML</button>
</div>
</div>
</section>
</main>
<!-- Mobile tab bar (visible < 700px) -->
<nav id="mobile-tabs">
<button class="tab-btn active" data-target="settings">
<svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M11.49 3.17c-.38-1.56-2.6-1.56-2.98 0a1.532 1.532 0 01-2.286.948c-1.372-.836-2.942.734-2.106 2.106.54.886.061 2.042-.947 2.287-1.561.379-1.561 2.6 0 2.978a1.532 1.532 0 01.947 2.287c-.836 1.372.734 2.942 2.106 2.106a1.532 1.532 0 012.287.947c.379 1.561 2.6 1.561 2.978 0a1.533 1.533 0 012.287-.947c1.372.836 2.942-.734 2.106-2.106a1.533 1.533 0 01.947-2.287c1.561-.379 1.561-2.6 0-2.978a1.532 1.532 0 01-.947-2.287c.836-1.372-.734-2.942-2.106-2.106a1.532 1.532 0 01-2.287-.947zM10 13a3 3 0 100-6 3 3 0 000 6z" clip-rule="evenodd"/></svg>
Settings
</button>
<button class="tab-btn" data-target="image">
<svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 3a2 2 0 00-2 2v10a2 2 0 002 2h12a2 2 0 002-2V5a2 2 0 00-2-2H4zm12 12H4l4-8 3 6 2-4 3 6z" clip-rule="evenodd"/></svg>
Image
</button>
<button class="tab-btn" data-target="results">
<svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 4a2 2 0 012-2h4.586A2 2 0 0112 2.586L15.414 6A2 2 0 0116 7.414V16a2 2 0 01-2 2H6a2 2 0 01-2-2V4zm2 6a1 1 0 011-1h6a1 1 0 110 2H7a1 1 0 01-1-1zm1 3a1 1 0 100 2h6a1 1 0 100-2H7z" clip-rule="evenodd"/></svg>
Results
</button>
</nav>
<!-- Help modal -->
<dialog id="help-modal">
<div class="modal-header">
<h2>Polyscriptor HTR β Quick Guide</h2>
<button id="btn-help-close" class="btn-icon">β</button>
</div>
<div class="modal-body">
<h3>Quick Start</h3>
<ol>
<li><strong>Select an engine</strong> from the dropdown and configure it (model path, API key, etc.).</li>
<li>Click <strong>Load Model</strong> and wait for the green status badge.</li>
<li><strong>Upload an image</strong> by dragging it onto the upload area or clicking to browse.</li>
<li>Optionally click <strong>Segment</strong> to preview line detection before transcribing.</li>
<li>Click <strong>Transcribe</strong>. Lines appear one by one as they are processed.</li>
<li><strong>Export</strong> the result as TXT, CSV, or PAGE XML.</li>
</ol>
<h3>Source Code</h3>
<p>
The public Polyscriptor source code is available on
<a href="https://github.com/achimrabus/polyscriptor" target="_blank" rel="noopener noreferrer">GitHub</a>.
This Hugging Face Space runs a curated hosted demo configuration.
</p>
<h3>Engines</h3>
<table>
<tr><th>Engine</th><th>Best for</th></tr>
<tr><td>CRNN-CTC</td><td>Fastest; works well on Church Slavonic, Glagolitic, Ukrainian with trained models</td></tr>
<tr><td>TrOCR</td><td>HuggingFace Transformer OCR; good general-purpose accuracy</td></tr>
<tr><td>Qwen3-VL</td><td>Large vision-language model; best quality but slow, needs GPU</td></tr>
<tr><td>Kraken</td><td>Classical HTR; good for Latin scripts</td></tr>
<tr><td>Party</td><td>Whole-page transformer; requires PAGE XML with line segmentation</td></tr>
<tr><td>Commercial APIs</td><td>OpenAI / Gemini / Claude β cloud inference, no local GPU needed</td></tr>
<tr><td>OpenWebUI</td><td>Locally hosted models via OpenWebUI/Ollama</td></tr>
</table>
<h3>Segmentation</h3>
<ul>
<li><strong>Kraken Classical</strong> β default line segmentation in this Hugging Face CPU demo.</li>
<li><strong>HPP</strong> β horizontal projection profile fallback.</li>
<li><strong>Kraken Neural / blla</strong> β available on the full server setup, but not enabled in this Space.</li>
<li><strong>PAGE XML upload</strong> β skip segmentation entirely by uploading an existing PAGE XML annotation (e.g. from Transkribus).</li>
</ul>
<h3>Tips</h3>
<ul>
<li>Click a transcription line to highlight the corresponding bounding box in the image.</li>
<li>Confidence badges: <span class="conf-high demo-badge">high β₯90%</span> <span class="conf-mid demo-badge">mid β₯75%</span> <span class="conf-low demo-badge">low <75%</span></li>
<li>Line-segmenting engines (CRNN-CTC, TrOCR, Kraken) use the segmentation method above. Page-level engines (Party, Qwen3-VL, Commercial APIs) do their own segmentation.</li>
<li>API keys can be saved on the server β enter the key once, check <em>Save key on server</em>.</li>
<li>Uploads are kept for 24 hours, then cleaned up automatically.</li>
</ul>
<h3>Keyboard</h3>
<ul>
<li><kbd>Esc</kbd> β close this dialog</li>
</ul>
</div>
</dialog>
<!-- Toast notification container -->
<div id="toast-container"></div>
<script type="module" src="/static/app.js"></script>
<script>
// Help modal
const modal = document.getElementById('help-modal');
document.getElementById('btn-help').addEventListener('click', () => modal.showModal());
document.getElementById('btn-help-close').addEventListener('click', () => modal.close());
modal.addEventListener('click', e => { if (e.target === modal) modal.close(); });
// Mobile tab bar
const tabBtns = document.querySelectorAll('.tab-btn');
const panels = document.querySelectorAll('[data-panel]');
tabBtns.forEach(btn => {
btn.addEventListener('click', () => {
const target = btn.dataset.target;
tabBtns.forEach(b => b.classList.remove('active'));
btn.classList.add('active');
panels.forEach(p => {
p.classList.toggle('panel-active', p.dataset.panel === target);
});
});
});
// Default: settings active on mobile
document.querySelector('[data-panel="settings"]').classList.add('panel-active');
</script>
</body>
</html>
|