File size: 18,750 Bytes
78431ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Polyscriptor HTR</title>
    <link rel="stylesheet" href="/static/app.css">
</head>
<body>
    <!-- Header -->
    <header id="header">
        <div class="header-left">
            <span class="header-logo">⬑</span>
            <h1>Polyscriptor <span class="header-sub">HTR</span></h1>
        </div>
        <div class="header-right">
            <div id="gpu-status" class="gpu-widget"></div>
            <button id="btn-help" class="btn-icon" title="Help">?</button>
        </div>
    </header>

    <!-- Main 3-column layout -->
    <main id="app">
        <!-- Left: Engine + Image controls -->
        <aside id="engine-panel" class="panel" data-panel="settings">


            <section class="panel-section">
                <h2>HTR Engine</h2>
                <label for="engine-select">Engine</label>
                <select id="engine-select" disabled>
                    <option>Loading engines…</option>
                </select>
                <p id="engine-description" class="muted"></p>
                <div id="config-form"></div>
                <div id="kraken-preset-row" class="hidden" style="margin-top:8px">
                    <label for="kraken-preset-select" style="display:block;font-size:0.78rem;margin-bottom:3px">Kraken Model Preset</label>
                    <select id="kraken-preset-select" style="width:100%">
                        <option value="">Loading presets…</option>
                    </select>
                    <span id="kraken-preset-status" class="muted" style="font-size:0.72rem;display:block;margin-top:3px"></span>
                </div>
                <button id="btn-load-model" class="btn btn-primary" disabled>Load Model</button>
                <div id="engine-status" class="status-badge hidden"></div>
            </section>

            <hr>

            <section class="panel-section">
                <h2>Image</h2>
                <div id="upload-area" class="upload-area">
                    <svg class="upload-icon" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5">
                        <path stroke-linecap="round" stroke-linejoin="round"
                              d="M3 16.5v2.25A2.25 2.25 0 005.25 21h13.5A2.25 2.25 0 0021 18.75V16.5m-13.5-9L12 3m0 0l4.5 4.5M12 3v13.5"/>
                    </svg>
                    <p>Drop image or PDF, or click to browse</p>
                    <input type="file" id="file-input" accept="image/*,.pdf" multiple hidden>
                </div>
                <p id="image-info" class="muted"></p>
                <div id="batch-queue-section" class="hidden">
                    <div class="batch-queue-header">
                        <span class="section-label">Queue</span>
                        <span id="batch-overall-progress" class="batch-overall-progress hidden"></span>
                    </div>
                    <div id="batch-list"></div>
                    <div class="batch-options-row">
                        <label class="checkbox-label" title="Use PAGE XML segmentation if a matching .xml file was uploaded for this image">
                            <input type="checkbox" id="batch-use-pagexml" checked>
                            Use PAGE XML
                        </label>
                        <label class="checkbox-label" title="Skip images that have already been transcribed in this session">
                            <input type="checkbox" id="batch-resume">
                            Resume
                        </label>
                    </div>
                    <div class="btn-row" style="margin-top:6px">
                        <button id="btn-process-batch" class="btn btn-primary btn-small">Process All</button>
                        <button id="btn-clear-batch" class="btn btn-small btn-outline">Clear</button>
                    </div>
                    <div id="batch-export-row" class="btn-row hidden" style="margin-top:6px">
                        <button id="btn-export-batch-txt" class="btn btn-small">All TXT</button>
                        <button id="btn-export-batch-csv" class="btn btn-small">All CSV</button>
                        <button id="btn-export-batch-txt-zip" class="btn btn-small btn-primary">Download ZIP (TXT)</button>
                        <button id="btn-export-batch-thinking-zip" class="btn btn-small btn-primary">Download ZIP (Thinking)</button>
                        <button id="btn-export-batch-xml" class="btn btn-small btn-primary">Download ZIP (XML)</button>
                    </div>
                </div>
                <div id="xml-upload-row" class="xml-row hidden">
                    <span id="xml-status" class="muted">No PAGE XML</span>
                    <label class="btn btn-small btn-outline" for="xml-input">
                        Upload XML
                        <input type="file" id="xml-input" accept=".xml" hidden multiple>
                    </label>
                </div>
            </section>

            <hr>

            <section class="panel-section" id="seg-controls">
                <h2>Segmentation</h2>
                <label for="seg-method">Method</label>
                <select id="seg-method">
                    <option value="kraken" selected>Kraken Classical</option>
                    <option value="hpp">HPP / projection profile fallback</option>
                    <option value="kraken-blla" disabled>Kraken Neural / blla (server only)</option>
                </select>

                <label for="seg-device">Device</label>
                <select id="seg-device">
                    <option value="cpu">CPU</option>
                    <option value="cuda:0">GPU 0</option>
                    <option value="cuda:1">GPU 1</option>
                </select>

                <div id="blla-options" style="display:none">
                    <div style="display:flex;gap:12px;align-items:center;flex-wrap:wrap">
                        <div style="display:flex;flex-direction:column;gap:3px">
                            <label for="seg-max-columns">Max columns</label>
                            <input type="number" id="seg-max-columns" min="1" max="12" value="6" style="width:60px">
                        </div>
                        <div style="display:flex;flex-direction:column;gap:3px">
                            <label for="seg-split-width">Split width %</label>
                            <input type="number" id="seg-split-width" min="5" max="80" value="40" step="5" style="width:60px" title="Min region width (% of page) to trigger sub-column splitting. Lower = split narrower regions. Double pages: try 20.">
                        </div>
                    </div>
                    <div style="margin-top:6px">
                        <label for="seg-text-direction">Reading direction</label>
                        <select id="seg-text-direction" title="Controls column reading order. Use horizontal-rl for Arabic, Ottoman, Hebrew manuscripts.">
                            <option value="horizontal-lr">LTR (Latin, Cyrillic, …)</option>
                            <option value="horizontal-rl">RTL (Arabic, Ottoman, Hebrew, …)</option>
                            <option value="vertical-lr">Vertical LTR</option>
                            <option value="vertical-rl">Vertical RTL</option>
                        </select>
                    </div>
                </div>
            </section>

            <div id="seg-regions-list" class="hidden"></div>

            <div class="panel-footer">
                <div class="btn-row footer-btn-row">
                    <button id="btn-segment" class="btn btn-outline" disabled title="Preview line segmentation without transcribing">Segment</button>
                    <button id="btn-transcribe" class="btn btn-accent" disabled>Transcribe</button>
                </div>
            </div>
        </aside>
        <div class="panel-resize-handle" id="resize-left" title="Drag to resize"></div>

        <!-- Center: Image viewer -->
        <section id="viewer-panel" class="panel" data-panel="image">
            <!-- Zoom toolbar β€” only visible when image is loaded -->
            <div id="zoom-toolbar" class="zoom-toolbar hidden">
                <button class="zoom-btn" id="btn-zoom-out" title="Zoom out">βˆ’</button>
                <span id="zoom-level" class="zoom-level">100%</span>
                <button class="zoom-btn" id="btn-zoom-in" title="Zoom in">+</button>
                <button class="zoom-btn zoom-fit" id="btn-zoom-fit" title="Fit to view">⊑</button>
                <span class="zoom-toolbar-sep"></span>
                <button class="btn btn-small btn-outline nav-btn" id="btn-nav-prev" title="Previous image (←)" disabled>β€Ή Prev</button>
                <span id="batch-nav-label" class="batch-nav-label-toolbar"></span>
                <button class="btn btn-small btn-outline nav-btn" id="btn-nav-next" title="Next image (β†’)" disabled>Next β€Ί</button>
            </div>
            <!-- Scroll area fills remaining height -->
            <div id="viewer-scroll">
                <div id="viewer-placeholder" class="viewer-placeholder">
                    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1">
                        <rect x="3" y="3" width="18" height="18" rx="2"/>
                        <circle cx="8.5" cy="8.5" r="1.5"/>
                        <path stroke-linecap="round" stroke-linejoin="round" d="M21 15l-5-5L5 21"/>
                    </svg>
                    <p>Upload an image to begin</p>
                </div>
                <div id="image-container" class="hidden">
                    <img id="page-image">
                    <canvas id="overlay-canvas"></canvas>
                </div>
            </div>
        </section>
        <div class="panel-resize-handle" id="resize-right" title="Drag to resize"></div>

        <!-- Right: Transcription results -->
        <section id="results-panel" class="panel" data-panel="results">
            <div class="results-header">
                <div class="results-header-row">
                    <h2>Transcription</h2>
                    <div class="results-header-controls">
                        <select id="font-select" class="font-select" title="Transcription font"></select>
                        <button id="btn-col-layout" class="btn-icon hidden" title="Toggle column layout">⊞</button>
                    </div>
                </div>
                <div id="results-search-row" class="results-search-row hidden">
                    <input type="search" id="results-search" placeholder="Search lines…" autocomplete="off">
                    <span id="results-search-count" class="muted"></span>
                </div>
                <div id="conf-filter-row" class="conf-filter-row hidden">
                    <label>Min conf: <strong id="conf-threshold-val">0%</strong></label>
                    <input type="range" id="conf-threshold" min="0" max="100" value="0" step="5">
                </div>
                <div id="progress-container" class="hidden">
                    <div id="progress-bar"><div id="progress-fill"></div></div>
                    <div class="progress-row">
                        <p id="progress-text" class="muted">0 / 0 lines</p>
                        <button id="btn-cancel" class="btn btn-small hidden">Cancel</button>
                    </div>
                </div>
            </div>
            <div id="transcription-lines"></div>
            <div id="results-footer" class="hidden">
                <p id="results-summary" class="muted"></p>
                <div class="btn-row">
                    <button id="btn-copy-text" class="btn btn-small">Copy Text</button>
                    <button id="btn-export-txt" class="btn btn-small">TXT</button>
                    <button id="btn-export-csv" class="btn btn-small">CSV</button>
                    <button id="btn-export-xml" class="btn btn-small hidden">XML</button>
                </div>
            </div>
        </section>
    </main>

    <!-- Mobile tab bar (visible < 700px) -->
    <nav id="mobile-tabs">
        <button class="tab-btn active" data-target="settings">
            <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M11.49 3.17c-.38-1.56-2.6-1.56-2.98 0a1.532 1.532 0 01-2.286.948c-1.372-.836-2.942.734-2.106 2.106.54.886.061 2.042-.947 2.287-1.561.379-1.561 2.6 0 2.978a1.532 1.532 0 01.947 2.287c-.836 1.372.734 2.942 2.106 2.106a1.532 1.532 0 012.287.947c.379 1.561 2.6 1.561 2.978 0a1.533 1.533 0 012.287-.947c1.372.836 2.942-.734 2.106-2.106a1.533 1.533 0 01.947-2.287c1.561-.379 1.561-2.6 0-2.978a1.532 1.532 0 01-.947-2.287c.836-1.372-.734-2.942-2.106-2.106a1.532 1.532 0 01-2.287-.947zM10 13a3 3 0 100-6 3 3 0 000 6z" clip-rule="evenodd"/></svg>
            Settings
        </button>
        <button class="tab-btn" data-target="image">
            <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 3a2 2 0 00-2 2v10a2 2 0 002 2h12a2 2 0 002-2V5a2 2 0 00-2-2H4zm12 12H4l4-8 3 6 2-4 3 6z" clip-rule="evenodd"/></svg>
            Image
        </button>
        <button class="tab-btn" data-target="results">
            <svg viewBox="0 0 20 20" fill="currentColor"><path fill-rule="evenodd" d="M4 4a2 2 0 012-2h4.586A2 2 0 0112 2.586L15.414 6A2 2 0 0116 7.414V16a2 2 0 01-2 2H6a2 2 0 01-2-2V4zm2 6a1 1 0 011-1h6a1 1 0 110 2H7a1 1 0 01-1-1zm1 3a1 1 0 100 2h6a1 1 0 100-2H7z" clip-rule="evenodd"/></svg>
            Results
        </button>
    </nav>

    <!-- Help modal -->
    <dialog id="help-modal">
        <div class="modal-header">
            <h2>Polyscriptor HTR β€” Quick Guide</h2>
            <button id="btn-help-close" class="btn-icon">βœ•</button>
        </div>
        <div class="modal-body">
            <h3>Quick Start</h3>
            <ol>
                <li><strong>Select an engine</strong> from the dropdown and configure it (model path, API key, etc.).</li>
                <li>Click <strong>Load Model</strong> and wait for the green status badge.</li>
                <li><strong>Upload an image</strong> by dragging it onto the upload area or clicking to browse.</li>
                <li>Optionally click <strong>Segment</strong> to preview line detection before transcribing.</li>
                <li>Click <strong>Transcribe</strong>. Lines appear one by one as they are processed.</li>
                <li><strong>Export</strong> the result as TXT, CSV, or PAGE XML.</li>
            </ol>

            <h3>Source Code</h3>
            <p>
                The public Polyscriptor source code is available on
                <a href="https://github.com/achimrabus/polyscriptor" target="_blank" rel="noopener noreferrer">GitHub</a>.
                This Hugging Face Space runs a curated hosted demo configuration.
            </p>

            <h3>Engines</h3>
            <table>
                <tr><th>Engine</th><th>Best for</th></tr>
                <tr><td>CRNN-CTC</td><td>Fastest; works well on Church Slavonic, Glagolitic, Ukrainian with trained models</td></tr>
                <tr><td>TrOCR</td><td>HuggingFace Transformer OCR; good general-purpose accuracy</td></tr>
                <tr><td>Qwen3-VL</td><td>Large vision-language model; best quality but slow, needs GPU</td></tr>
                <tr><td>Kraken</td><td>Classical HTR; good for Latin scripts</td></tr>
                <tr><td>Party</td><td>Whole-page transformer; requires PAGE XML with line segmentation</td></tr>
                <tr><td>Commercial APIs</td><td>OpenAI / Gemini / Claude β€” cloud inference, no local GPU needed</td></tr>
                <tr><td>OpenWebUI</td><td>Locally hosted models via OpenWebUI/Ollama</td></tr>
            </table>

            <h3>Segmentation</h3>
            <ul>
                <li><strong>Kraken Classical</strong> β€” default line segmentation in this Hugging Face CPU demo.</li>
                <li><strong>HPP</strong> β€” horizontal projection profile fallback.</li>
                <li><strong>Kraken Neural / blla</strong> β€” available on the full server setup, but not enabled in this Space.</li>
                <li><strong>PAGE XML upload</strong> β€” skip segmentation entirely by uploading an existing PAGE XML annotation (e.g. from Transkribus).</li>
            </ul>

            <h3>Tips</h3>
            <ul>
                <li>Click a transcription line to highlight the corresponding bounding box in the image.</li>
                <li>Confidence badges: <span class="conf-high demo-badge">high β‰₯90%</span> <span class="conf-mid demo-badge">mid β‰₯75%</span> <span class="conf-low demo-badge">low &lt;75%</span></li>
                <li>Line-segmenting engines (CRNN-CTC, TrOCR, Kraken) use the segmentation method above. Page-level engines (Party, Qwen3-VL, Commercial APIs) do their own segmentation.</li>
                <li>API keys can be saved on the server β€” enter the key once, check <em>Save key on server</em>.</li>
                <li>Uploads are kept for 24 hours, then cleaned up automatically.</li>
            </ul>

            <h3>Keyboard</h3>
            <ul>
                <li><kbd>Esc</kbd> β€” close this dialog</li>
            </ul>
        </div>
    </dialog>

    <!-- Toast notification container -->
    <div id="toast-container"></div>

    <script type="module" src="/static/app.js"></script>
    <script>
        // Help modal
        const modal = document.getElementById('help-modal');
        document.getElementById('btn-help').addEventListener('click', () => modal.showModal());
        document.getElementById('btn-help-close').addEventListener('click', () => modal.close());
        modal.addEventListener('click', e => { if (e.target === modal) modal.close(); });

        // Mobile tab bar
        const tabBtns = document.querySelectorAll('.tab-btn');
        const panels = document.querySelectorAll('[data-panel]');
        tabBtns.forEach(btn => {
            btn.addEventListener('click', () => {
                const target = btn.dataset.target;
                tabBtns.forEach(b => b.classList.remove('active'));
                btn.classList.add('active');
                panels.forEach(p => {
                    p.classList.toggle('panel-active', p.dataset.panel === target);
                });
            });
        });
        // Default: settings active on mobile
        document.querySelector('[data-panel="settings"]').classList.add('panel-active');
    </script>
</body>
</html>