| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Polyscriptor HTR</title> |
| <link rel="stylesheet" href="/static/app.css"> |
| </head> |
| <body> |
| |
| <header id="header"> |
| <div class="header-left"> |
| <span class="header-logo">⬑</span> |
| <h1>Polyscriptor <span class="header-sub">HTR</span></h1> |
| </div> |
| <div class="header-right"> |
| <div id="gpu-status" class="gpu-widget"></div> |
| <button id="btn-help" class="btn-icon" title="Help">?</button> |
| </div> |
| </header> |
|
|
| |
| <main id="app"> |
| |
| <aside id="engine-panel" class="panel" data-panel="settings"> |
|
|
|
|
| <section class="panel-section"> |
| <h2>HTR Engine</h2> |
| <label for="engine-select">Engine</label> |
| <select id="engine-select" disabled> |
| <option>Loading enginesβ¦</option> |
| </select> |
| <p id="engine-description" class="muted"></p> |
| <div id="config-form"></div> |
| <div id="kraken-preset-row" class="hidden" style="margin-top:8px"> |
| <label for="kraken-preset-select" style="display:block;font-size:0.78rem;margin-bottom:3px">Kraken Model Preset</label> |
| <select id="kraken-preset-select" style="width:100%"> |
| <option value="">Loading presetsβ¦</option> |
| </select> |
| <span id="kraken-preset-status" class="muted" style="font-size:0.72rem;display:block;margin-top:3px"></span> |
| </div> |
| <button id="btn-load-model" class="btn btn-primary" disabled>Load Model</button> |
| <div id="engine-status" class="status-badge hidden"></div> |
| </section> |
|
|
| <hr> |
|
|
| <section class="panel-section"> |
| <h2>Image</h2> |
| <div id="upload-area" class="upload-area"> |
| <svg class="upload-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"> |
| <path stroke-linecap="round" stroke-linejoin="round" |
| d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5m-13.5-9L12 3m0 0l4.5 4.5M12 3v13.5"/> |
| </svg> |
| <p>Drop image or PDF, or click to browse</p> |
| <input type="file" id="file-input" accept="image/*,.pdf" multiple hidden> |
| </div> |
| <p id="image-info" class="muted"></p> |
| <div id="batch-queue-section" class="hidden"> |
| <div class="batch-queue-header"> |
| <span class="section-label">Queue</span> |
| <span id="batch-overall-progress" class="batch-overall-progress hidden"></span> |
| </div> |
| <div id="batch-list"></div> |
| <div class="batch-options-row"> |
| <label class="checkbox-label" title="Use PAGE XML segmentation if a matching .xml file was uploaded for this image"> |
| <input type="checkbox" id="batch-use-pagexml" checked> |
| Use PAGE XML |
| </label> |
| <label class="checkbox-label" title="Skip images that have already been transcribed in this session"> |
| <input type="checkbox" id="batch-resume"> |
| Resume |
| </label> |
| </div> |
| <div class="btn-row" style="margin-top:6px"> |
| <button id="btn-process-batch" class="btn btn-primary btn-small">Process All</button> |
| <button id="btn-clear-batch" class="btn btn-small btn-outline">Clear</button> |
| </div> |
| <div id="batch-export-row" class="btn-row hidden" style="margin-top:6px"> |
| <button id="btn-export-batch-txt" class="btn btn-small">All TXT</button> |
| <button id="btn-export-batch-csv" class="btn btn-small">All CSV</button> |
| <button id="btn-export-batch-txt-zip" class="btn btn-small btn-primary">Download ZIP (TXT)</button> |
| <button id="btn-export-batch-thinking-zip" class="btn btn-small btn-primary">Download ZIP (Thinking)</button> |
| <button id="btn-export-batch-xml" class="btn btn-small btn-primary">Download ZIP (XML)</button> |
| </div> |
| </div> |
| <div id="xml-upload-row" class="xml-row hidden"> |
| <span id="xml-status" class="muted">No PAGE XML</span> |
| <label class="btn btn-small btn-outline" for="xml-input"> |
| Upload XML |
| <input type="file" id="xml-input" accept=".xml" hidden multiple> |
| </label> |
| </div> |
| </section> |
|
|
| <hr> |
|
|
| <section class="panel-section" id="seg-controls"> |
| <h2>Segmentation</h2> |
| <label for="seg-method">Method</label> |
| <select id="seg-method"> |
| <option value="kraken" selected>Kraken Classical</option> |
| <option value="hpp">HPP / projection profile fallback</option> |
| <option value="kraken-blla" disabled>Kraken Neural / blla (server only)</option> |
| </select> |
|
|
| <label for="seg-device">Device</label> |
| <select id="seg-device"> |
| <option value="cpu">CPU</option> |
| <option value="cuda:0">GPU 0</option> |
| <option value="cuda:1">GPU 1</option> |
| </select> |
|
|
| <div id="blla-options" style="display:none"> |
| <div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap"> |
| <div style="display:flex;flex-direction:column;gap:3px"> |
| <label for="seg-max-columns">Max columns</label> |
| <input type="number" id="seg-max-columns" min="1" max="12" value="6" style="width:60px"> |
| </div> |
| <div style="display:flex;flex-direction:column;gap:3px"> |
| <label for="seg-split-width">Split width %</label> |
| <input type="number" id="seg-split-width" min="5" max="80" value="40" step="5" style="width:60px" title="Min region width (% of page) to trigger sub-column splitting. Lower = split narrower regions. Double pages: try 20."> |
| </div> |
| </div> |
| <div style="margin-top:6px"> |
| <label for="seg-text-direction">Reading direction</label> |
| <select id="seg-text-direction" title="Controls column reading order. Use horizontal-rl for Arabic, Ottoman, Hebrew manuscripts."> |
| <option value="horizontal-lr">LTR (Latin, Cyrillic, β¦)</option> |
| <option value="horizontal-rl">RTL (Arabic, Ottoman, Hebrew, β¦)</option> |
| <option value="vertical-lr">Vertical LTR</option> |
| <option value="vertical-rl">Vertical RTL</option> |
| </select> |
| </div> |
| </div> |
| </section> |
|
|
| <div id="seg-regions-list" class="hidden"></div> |
|
|
| <div class="panel-footer"> |
| <div class="btn-row footer-btn-row"> |
| <button id="btn-segment" class="btn btn-outline" disabled title="Preview line segmentation without transcribing">Segment</button> |
| <button id="btn-transcribe" class="btn btn-accent" disabled>Transcribe</button> |
| </div> |
| </div> |
| </aside> |
| <div class="panel-resize-handle" id="resize-left" title="Drag to resize"></div> |
|
|
| |
| <section id="viewer-panel" class="panel" data-panel="image"> |
| |
| <div id="zoom-toolbar" class="zoom-toolbar hidden"> |
| <button class="zoom-btn" id="btn-zoom-out" title="Zoom out">β</button> |
| <span id="zoom-level" class="zoom-level">100%</span> |
| <button class="zoom-btn" id="btn-zoom-in" title="Zoom in">+</button> |
| <button class="zoom-btn zoom-fit" id="btn-zoom-fit" title="Fit to view">β‘</button> |
| <span class="zoom-toolbar-sep"></span> |
| <button class="btn btn-small btn-outline nav-btn" id="btn-nav-prev" title="Previous image (β)" disabled>βΉ Prev</button> |
| <span id="batch-nav-label" class="batch-nav-label-toolbar"></span> |
| <button class="btn btn-small btn-outline nav-btn" id="btn-nav-next" title="Next image (β)" disabled>Next βΊ</button> |
| </div> |
| |
| <div id="viewer-scroll"> |
| <div id="viewer-placeholder" class="viewer-placeholder"> |
| <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1"> |
| <rect x="3" y="3" width="18" height="18" rx="2"/> |
| <circle cx="8.5" cy="8.5" r="1.5"/> |
| <path stroke-linecap="round" stroke-linejoin="round" d="M21 15l-5-5L5 21"/> |
| </svg> |
| <p>Upload an image to begin</p> |
| </div> |
| <div id="image-container" class="hidden"> |
| <img id="page-image"> |
| <canvas id="overlay-canvas"></canvas> |
| </div> |
| </div> |
| </section> |
| <div class="panel-resize-handle" id="resize-right" title="Drag to resize"></div> |
|
|
| |
| <section id="results-panel" class="panel" data-panel="results"> |
| <div class="results-header"> |
| <div class="results-header-row"> |
| <h2>Transcription</h2> |
| <div class="results-header-controls"> |
| <select id="font-select" class="font-select" title="Transcription font"></select> |
| <button id="btn-col-layout" class="btn-icon hidden" title="Toggle column layout">β</button> |
| </div> |
| </div> |
| <div id="results-search-row" class="results-search-row hidden"> |
| <input type="search" id="results-search" placeholder="Search linesβ¦" autocomplete="off"> |
| <span id="results-search-count" class="muted"></span> |
| </div> |
| <div id="conf-filter-row" class="conf-filter-row hidden"> |
| <label>Min conf: <strong id="conf-threshold-val">0%</strong></label> |
| <input type="range" id="conf-threshold" min="0" max="100" value="0" step="5"> |
| </div> |
| <div id="progress-container" class="hidden"> |
| <div id="progress-bar"><div id="progress-fill"></div></div> |
| <div class="progress-row"> |
| <p id="progress-text" class="muted">0 / 0 lines</p> |
| <button id="btn-cancel" class="btn btn-small hidden">Cancel</button> |
| </div> |
| </div> |
| </div> |
| <div id="transcription-lines"></div> |
| <div id="results-footer" class="hidden"> |
| <p id="results-summary" class="muted"></p> |
| <div class="btn-row"> |
| <button id="btn-copy-text" class="btn btn-small">Copy Text</button> |
| <button id="btn-export-txt" class="btn btn-small">TXT</button> |
| <button id="btn-export-csv" class="btn btn-small">CSV</button> |
| <button id="btn-export-xml" class="btn btn-small hidden">XML</button> |
| </div> |
| </div> |
| </section> |
| </main> |
|
|
| |
| <nav id="mobile-tabs"> |
| <button class="tab-btn active" data-target="settings"> |
| <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M11.49 3.17c-.38-1.56-2.6-1.56-2.98 0a1.532 1.532 0 01-2.286.948c-1.372-.836-2.942.734-2.106 2.106.54.886.061 2.042-.947 2.287-1.561.379-1.561 2.6 0 2.978a1.532 1.532 0 01.947 2.287c-.836 1.372.734 2.942 2.106 2.106a1.532 1.532 0 012.287.947c.379 1.561 2.6 1.561 2.978 0a1.533 1.533 0 012.287-.947c1.372.836 2.942-.734 2.106-2.106a1.533 1.533 0 01.947-2.287c1.561-.379 1.561-2.6 0-2.978a1.532 1.532 0 01-.947-2.287c.836-1.372-.734-2.942-2.106-2.106a1.532 1.532 0 01-2.287-.947zM10 13a3 3 0 100-6 3 3 0 000 6z" clip-rule="evenodd"/></svg> |
| Settings |
| </button> |
| <button class="tab-btn" data-target="image"> |
| <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 3a2 2 0 00-2 2v10a2 2 0 002 2h12a2 2 0 002-2V5a2 2 0 00-2-2H4zm12 12H4l4-8 3 6 2-4 3 6z" clip-rule="evenodd"/></svg> |
| Image |
| </button> |
| <button class="tab-btn" data-target="results"> |
| <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 4a2 2 0 012-2h4.586A2 2 0 0112 2.586L15.414 6A2 2 0 0116 7.414V16a2 2 0 01-2 2H6a2 2 0 01-2-2V4zm2 6a1 1 0 011-1h6a1 1 0 110 2H7a1 1 0 01-1-1zm1 3a1 1 0 100 2h6a1 1 0 100-2H7z" clip-rule="evenodd"/></svg> |
| Results |
| </button> |
| </nav> |
|
|
| |
| <dialog id="help-modal"> |
| <div class="modal-header"> |
| <h2>Polyscriptor HTR β Quick Guide</h2> |
| <button id="btn-help-close" class="btn-icon">β</button> |
| </div> |
| <div class="modal-body"> |
| <h3>Quick Start</h3> |
| <ol> |
| <li><strong>Select an engine</strong> from the dropdown and configure it (model path, API key, etc.).</li> |
| <li>Click <strong>Load Model</strong> and wait for the green status badge.</li> |
| <li><strong>Upload an image</strong> by dragging it onto the upload area or clicking to browse.</li> |
| <li>Optionally click <strong>Segment</strong> to preview line detection before transcribing.</li> |
| <li>Click <strong>Transcribe</strong>. Lines appear one by one as they are processed.</li> |
| <li><strong>Export</strong> the result as TXT, CSV, or PAGE XML.</li> |
| </ol> |
|
|
| <h3>Source Code</h3> |
| <p> |
| The public Polyscriptor source code is available on |
| <a href="https://github.com/achimrabus/polyscriptor" target="_blank" rel="noopener noreferrer">GitHub</a>. |
| This Hugging Face Space runs a curated hosted demo configuration. |
| </p> |
|
|
| <h3>Engines</h3> |
| <table> |
| <tr><th>Engine</th><th>Best for</th></tr> |
| <tr><td>CRNN-CTC</td><td>Fastest; works well on Church Slavonic, Glagolitic, Ukrainian with trained models</td></tr> |
| <tr><td>TrOCR</td><td>HuggingFace Transformer OCR; good general-purpose accuracy</td></tr> |
| <tr><td>Qwen3-VL</td><td>Large vision-language model; best quality but slow, needs GPU</td></tr> |
| <tr><td>Kraken</td><td>Classical HTR; good for Latin scripts</td></tr> |
| <tr><td>Party</td><td>Whole-page transformer; requires PAGE XML with line segmentation</td></tr> |
| <tr><td>Commercial APIs</td><td>OpenAI / Gemini / Claude β cloud inference, no local GPU needed</td></tr> |
| <tr><td>OpenWebUI</td><td>Locally hosted models via OpenWebUI/Ollama</td></tr> |
| </table> |
|
|
| <h3>Segmentation</h3> |
| <ul> |
| <li><strong>Kraken Classical</strong> β default line segmentation in this Hugging Face CPU demo.</li> |
| <li><strong>HPP</strong> β horizontal projection profile fallback.</li> |
| <li><strong>Kraken Neural / blla</strong> β available on the full server setup, but not enabled in this Space.</li> |
| <li><strong>PAGE XML upload</strong> β skip segmentation entirely by uploading an existing PAGE XML annotation (e.g. from Transkribus).</li> |
| </ul> |
|
|
| <h3>Tips</h3> |
| <ul> |
| <li>Click a transcription line to highlight the corresponding bounding box in the image.</li> |
| <li>Confidence badges: <span class="conf-high demo-badge">high β₯90%</span> <span class="conf-mid demo-badge">mid β₯75%</span> <span class="conf-low demo-badge">low <75%</span></li> |
| <li>Line-segmenting engines (CRNN-CTC, TrOCR, Kraken) use the segmentation method above. Page-level engines (Party, Qwen3-VL, Commercial APIs) do their own segmentation.</li> |
| <li>API keys can be saved on the server β enter the key once, check <em>Save key on server</em>.</li> |
| <li>Uploads are kept for 24 hours, then cleaned up automatically.</li> |
| </ul> |
|
|
| <h3>Keyboard</h3> |
| <ul> |
| <li><kbd>Esc</kbd> β close this dialog</li> |
| </ul> |
| </div> |
| </dialog> |
|
|
| |
| <div id="toast-container"></div> |
|
|
| <script type="module" src="/static/app.js"></script> |
| <script> |
| |
| const modal = document.getElementById('help-modal'); |
| document.getElementById('btn-help').addEventListener('click', () => modal.showModal()); |
| document.getElementById('btn-help-close').addEventListener('click', () => modal.close()); |
| modal.addEventListener('click', e => { if (e.target === modal) modal.close(); }); |
| |
| |
| const tabBtns = document.querySelectorAll('.tab-btn'); |
| const panels = document.querySelectorAll('[data-panel]'); |
| tabBtns.forEach(btn => { |
| btn.addEventListener('click', () => { |
| const target = btn.dataset.target; |
| tabBtns.forEach(b => b.classList.remove('active')); |
| btn.classList.add('active'); |
| panels.forEach(p => { |
| p.classList.toggle('panel-active', p.dataset.panel === target); |
| }); |
| }); |
| }); |
| |
| document.querySelector('[data-panel="settings"]').classList.add('panel-active'); |
| </script> |
| </body> |
| </html> |
|
|