fhueni commited on
Commit
81de2bb
·
1 Parent(s): c193a16

chore: removed unnecessary files

Browse files
Files changed (4) hide show
  1. archive/index.js +0 -124
  2. docling-html-parser.js +0 -442
  3. index.js +0 -510
  4. style.css +0 -62
archive/index.js DELETED
@@ -1,124 +0,0 @@
1
- import {
2
- AutoProcessor,
3
- AutoModelForVision2Seq,
4
- AutoModelForQuestionAnswering,
5
- RawImage,
6
- TextStreamer,
7
- pipeline
8
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers";
9
-
10
- const modelLoaderOverlay = document.getElementById("model-loader-overlay");
11
- const processingIndicator = document.getElementById("processing-indicator");
12
-
13
- const promptInput = document.getElementById("prompt-input");
14
- const generateBtn = document.getElementById("process-btn");
15
- let model, processor;
16
- let currentImage = null;
17
-
18
-
19
- /**
20
- * Loads and initializes the model and processor.
21
- */
22
- async function initializeModel() {
23
- try {
24
- const model_id = "HuggingFaceTB/SmolLM2-1.7B-Instruct";
25
- // processor = await AutoProcessor.from_pretrained(model_id);
26
- const progress = {};
27
-
28
- model ??= pipeline('translation', 'Xenova/nllb-200-distilled-600M', {
29
- progress_callback: (data) => {
30
- if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
31
- progress[data.file] = data;
32
- if (Object.keys(progress).length !== 3) return;
33
- let sum = 0;
34
- let total = 0;
35
- for (const [key, val] of Object.entries(progress)) {
36
- sum += val.loaded;
37
- total += val.total;
38
- }
39
- const overallPercent = Math.round((sum / total) * 100);
40
- document.getElementById("model-progress").value = overallPercent;
41
- document.getElementById("progress-text").textContent = overallPercent + "%";
42
- }
43
- }
44
- });
45
-
46
-
47
- /*
48
- model = await AutoModelForQuestionAnswering.from_pretrained(model_id, {
49
- dtype: {
50
- embed_tokens: "fp16",
51
- decoder_model_merged: "fp32",
52
- },
53
- device: "webgpu",
54
- progress_callback: (data) => {
55
- if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
56
- progress[data.file] = data;
57
- if (Object.keys(progress).length !== 3) return;
58
- let sum = 0;
59
- let total = 0;
60
- for (const [key, val] of Object.entries(progress)) {
61
- sum += val.loaded;
62
- total += val.total;
63
- }
64
- const overallPercent = Math.round((sum / total) * 100);
65
- document.getElementById("model-progress").value = overallPercent;
66
- document.getElementById("progress-text").textContent = overallPercent + "%";
67
- }
68
- },
69
- });
70
- */
71
- modelLoaderOverlay.style.display = "none";
72
- console.log("Model loaded successfully.");
73
- } catch (error) {
74
- console.error("Failed to load model:", error);
75
- modelLoaderOverlay.innerHTML = `
76
- <h2 class="text-center text-red-500 text-xl font-semibold">Failed to Load Model</h2>
77
- <p class="text-center text-white text-md mt-2">Please refresh the page to try again. Check the console for errors.</p>
78
- `;
79
- }
80
- }
81
-
82
- /**
83
- * Processes an image and generates Docling text.
84
- * @param {ImageBitmap|HTMLImageElement} imageObject An image object to process.
85
- */
86
- async function process(imageObject) {
87
-
88
- }
89
-
90
-
91
- /**
92
- * Manages the visibility of UI components based on the app state.
93
- * @param {'initial'|'processing'|'result'} state The current state.
94
- */
95
- function setUiState(state) {
96
- processingIndicator.classList.add("hidden");
97
- if (state === "initial") {
98
- // Clear previous results when going back to initial
99
- // document.getElementById('detection-stats').innerHTML = '';
100
- // document.getElementById('drug-matches').innerHTML = '';
101
- generateBtn.disabled = true;
102
- } else if (state === "processing") {
103
- // Keep stats visible during processing, but clear matches while streaming
104
- // document.getElementById('drug-matches').innerHTML = '';
105
- processingIndicator.classList.remove("hidden");
106
- generateBtn.disabled = true;
107
- } else if (state === "result") {
108
- // Preserve the populated stats and matches on result
109
- generateBtn.disabled = false;
110
- }
111
- }
112
-
113
-
114
- // Event Listeners
115
- generateBtn.addEventListener("click", () => {
116
- if (currentImage) {
117
- processImage(currentImage);
118
- }
119
- });
120
-
121
- document.addEventListener("DOMContentLoaded", () => {
122
- setUiState("initial");
123
- initializeModel();
124
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
docling-html-parser.js DELETED
@@ -1,442 +0,0 @@
1
- /**
2
- * A converter that transforms Docling-formatted strings into HTML.
3
- */
4
- export class DoclingConverter {
5
- constructor() {
6
- this.simpleTagMap = {
7
- doctag: "div",
8
- document: "div",
9
- ordered_list: "ol",
10
- unordered_list: "ul",
11
- list_item: "li",
12
- caption: "figcaption",
13
- footnote: "sup",
14
- formula: "div",
15
- page_footer: "footer",
16
- page_header: "header",
17
- picture: "figure",
18
- chart: "figure",
19
- table: "table",
20
- otsl: "table",
21
- text: "p",
22
- paragraph: "p",
23
- title: "h1",
24
- document_index: "div",
25
- form: "form",
26
- key_value_region: "dl",
27
- reference: "a",
28
- smiles: "span",
29
- };
30
- this.selfClosingTagMap = {
31
- checkbox_selected: '<input type="checkbox" checked disabled>',
32
- checkbox_unselected: '<input type="checkbox" disabled>',
33
- page_break: '<hr class="page-break">',
34
- };
35
- this.TABLE_TAG_CONFIG = {
36
- "<ched>": { htmlTag: "th" },
37
- "<rhed>": { htmlTag: "th", scope: "row" },
38
- "<srow>": { htmlTag: "th", scope: "row" },
39
- "<fcel>": { htmlTag: "td" },
40
- "<ecel>": { htmlTag: "td" },
41
- "<ucel>": { htmlTag: "td" },
42
- "<lcel>": { htmlTag: "td" },
43
- "<xcel>": { htmlTag: "td" },
44
- };
45
- this.TABLE_TAG_REGEX = new RegExp(`(${Object.keys(this.TABLE_TAG_CONFIG).join("|")})`);
46
- const selfClosingNames = Object.keys(this.selfClosingTagMap).join("|");
47
- this.combinedTagRegex = new RegExp(`(<([a-z_0-9]+)>(.*?)<\\/\\2>)|(<(${selfClosingNames})>)`, "s");
48
- }
49
- escapeHtml(text) {
50
- if (!text) return "";
51
- return text.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/"/g, "&quot;");
52
- }
53
- convert(docling) {
54
- let html = ` ${docling} `;
55
- html = this.cleanupMetadataTokens(html);
56
- html = this.processTags(html);
57
- return html.trim();
58
- }
59
- processTags(text) {
60
- let remainingText = text;
61
- let result = "";
62
- while (remainingText.length > 0) {
63
- const match = remainingText.match(this.combinedTagRegex);
64
- if (match && typeof match.index === "number") {
65
- const textBefore = remainingText.substring(0, match.index);
66
- result += this.escapeHtml(textBefore);
67
- const fullMatch = match[0];
68
- const pairedTagName = match[2];
69
- const pairedContent = match[3];
70
- const selfClosingTagName = match[5];
71
- if (pairedTagName !== undefined) {
72
- result += this.convertSingleTag(pairedTagName, pairedContent);
73
- } else if (selfClosingTagName !== undefined) {
74
- result += this.selfClosingTagMap[selfClosingTagName] || "";
75
- }
76
- remainingText = remainingText.substring(match.index + fullMatch.length);
77
- } else {
78
- result += this.escapeHtml(remainingText);
79
- break;
80
- }
81
- }
82
- return result;
83
- }
84
- convertSingleTag(tagName, content) {
85
- if (tagName === "list_item") {
86
- content = content.trim().replace(/^[·-]\s*/g, "");
87
- }
88
- switch (tagName) {
89
- case "code":
90
- return this.convertBlockCode(content);
91
- case "otsl":
92
- return this.convertTable(content);
93
- case "picture":
94
- case "chart":
95
- return this.convertPictureOrChart(tagName, content);
96
- case "inline":
97
- return this.convertInlineContent(content);
98
- case "section_header_level_0":
99
- case "section_header_level_1":
100
- case "section_header_level_2":
101
- case "section_header_level_3":
102
- case "section_header_level_4":
103
- case "section_header_level_5":
104
- const level = parseInt(tagName.at(-1), 10) + 1;
105
- return `<h${level}>${this.processTags(content)}</h${level}>`;
106
- default:
107
- const htmlTag = this.simpleTagMap[tagName];
108
- if (htmlTag) {
109
- const processedContent = this.processTags(content);
110
- const startTag = this.getStartTag(tagName, htmlTag);
111
- return `${startTag}${processedContent}</${htmlTag}>`;
112
- }
113
- console.warn(`Unknown tag encountered: ${tagName}, escaping it.`);
114
- return this.escapeHtml(`<${tagName}>${content}</${tagName}>`);
115
- }
116
- }
117
- getStartTag(doclingTag, htmlTag) {
118
- switch (doclingTag) {
119
- case "doctag":
120
- case "document":
121
- return '<div class="docling-document">';
122
- case "formula":
123
- return '<div class="formula">';
124
- case "document_index":
125
- return '<div class="toc">';
126
- case "smiles":
127
- return '<span class="smiles">';
128
- case "reference":
129
- return '<a href="#">';
130
- default:
131
- return `<${htmlTag}>`;
132
- }
133
- }
134
- convertInlineContent(content) {
135
- const inlineTagRegex = /<(code|formula|text|smiles)>(.*?)<\/\1>/s;
136
- let remainingText = content;
137
- let result = "";
138
- while (remainingText.length > 0) {
139
- const match = remainingText.match(inlineTagRegex);
140
- if (match && typeof match.index === "number") {
141
- const textBefore = remainingText.substring(0, match.index);
142
- result += this.escapeHtml(textBefore);
143
- const [fullMatch, tagName, innerContent] = match;
144
- switch (tagName) {
145
- case "code":
146
- const langRegex = /<_(.*?)_>/;
147
- const langMatch = innerContent.match(langRegex);
148
- if (langMatch && langMatch[1]) {
149
- const language = this.sanitizeLanguageName(langMatch[1]);
150
- const codeContent = innerContent.replace(langRegex, "").trim();
151
- const escapedCode = this.escapeHtml(codeContent);
152
- const langClass = language !== "unknown" ? ` class="language-${language}"` : "";
153
- result += `<code${langClass}>${escapedCode}</code>`;
154
- } else {
155
- result += `<code>${this.escapeHtml(innerContent)}</code>`;
156
- }
157
- break;
158
- case "formula":
159
- result += `<span class="formula">${this.escapeHtml(innerContent)}</span>`;
160
- break;
161
- case "smiles":
162
- result += `<span class="smiles">${this.escapeHtml(innerContent)}</span>`;
163
- break;
164
- case "text":
165
- result += this.escapeHtml(innerContent);
166
- break;
167
- }
168
- remainingText = remainingText.substring(match.index + fullMatch.length);
169
- } else {
170
- result += this.escapeHtml(remainingText);
171
- break;
172
- }
173
- }
174
- return result;
175
- }
176
- convertBlockCode(content) {
177
- const langRegex = /<_(.*?)_>/;
178
- const langMatch = content.match(langRegex);
179
- let language = "unknown";
180
- let codeContent = content;
181
- if (langMatch && langMatch[1]) {
182
- language = this.sanitizeLanguageName(langMatch[1]);
183
- codeContent = content.replace(langRegex, "").trim();
184
- }
185
- const escapedCode = this.escapeHtml(codeContent);
186
- const langClass = language !== "unknown" ? ` class="language-${language}"` : "";
187
- return `<pre><code${langClass}>${escapedCode}</code></pre>`;
188
- }
189
- convertTable(content) {
190
- const rows = content
191
- .trim()
192
- .split(/<nl>/)
193
- .filter((row) => row.length > 0);
194
- const cellGrid = [];
195
- rows.forEach((rowStr, rowIndex) => {
196
- var _a;
197
- const parts = rowStr.split(this.TABLE_TAG_REGEX);
198
- const currentRow = [];
199
- let gridColIndex = 0;
200
- for (let i = 1; i < parts.length; i += 2) {
201
- const tag = parts[i];
202
- const cellContent = parts[i + 1] || "";
203
- switch (tag) {
204
- case "<lcel>":
205
- if (currentRow.length > 0) {
206
- currentRow[currentRow.length - 1].colspan++;
207
- }
208
- break;
209
- case "<ucel>":
210
- if (rowIndex > 0 && ((_a = cellGrid[rowIndex - 1]) === null || _a === void 0 ? void 0 : _a[gridColIndex])) {
211
- cellGrid[rowIndex - 1][gridColIndex].rowspan++;
212
- }
213
- gridColIndex++;
214
- break;
215
- case "<xcel>":
216
- if (currentRow.length > 0) {
217
- currentRow[currentRow.length - 1].colspan++;
218
- }
219
- break;
220
- default:
221
- if (this.TABLE_TAG_CONFIG[tag]) {
222
- currentRow.push({
223
- content: cellContent,
224
- tag,
225
- colspan: 1,
226
- rowspan: 1,
227
- });
228
- gridColIndex++;
229
- }
230
- break;
231
- }
232
- }
233
- cellGrid.push(currentRow);
234
- });
235
- const htmlRows = cellGrid
236
- .map((row) => {
237
- const cellsHtml = row
238
- .map((cell) => {
239
- const config = this.TABLE_TAG_CONFIG[cell.tag];
240
- if (!config) return "";
241
- const attrs = [];
242
- if (cell.colspan > 1) attrs.push(`colspan="${cell.colspan}"`);
243
- if (cell.rowspan > 1) attrs.push(`rowspan="${cell.rowspan}"`);
244
- if (config.scope) attrs.push(`scope="${config.scope}"`);
245
- const processedContent = this.processTags(cell.content);
246
- const attrString = attrs.length > 0 ? ` ${attrs.join(" ")}` : "";
247
- return `<${config.htmlTag}${attrString}>${processedContent}</${config.htmlTag}>`;
248
- })
249
- .join("");
250
- return `<tr>${cellsHtml}</tr>`;
251
- })
252
- .join("");
253
- return `<table><tbody>${htmlRows}</tbody></table>`;
254
- }
255
- convertPictureOrChart(tag, content) {
256
- if (/<(fcel|ched|rhed)>/.test(content)) {
257
- const cleanedContent = content.replace(/<[a-z_]+>/g, (match) => {
258
- if (match.startsWith("<fcel") || match.startsWith("<ched") || match.startsWith("<rhed") || match.startsWith("<nl")) {
259
- return match;
260
- }
261
- return "";
262
- });
263
- return this.convertTable(cleanedContent);
264
- }
265
- let captionHtml = "";
266
- const captionRegex = /<caption>(.*?)<\/caption>/s;
267
- const captionMatch = content.match(captionRegex);
268
- if (captionMatch && captionMatch[1]) {
269
- const captionContent = this.processTags(captionMatch[1]);
270
- captionHtml = `<figcaption>${captionContent}</figcaption>`;
271
- }
272
- const contentWithoutCaption = content.replace(captionRegex, "");
273
- const classificationRegex = /<([a-z_]+)>/;
274
- const classMatch = contentWithoutCaption.match(classificationRegex);
275
- let altText = tag;
276
- if (classMatch) {
277
- altText = classMatch[1].replace(/_/g, " ");
278
- }
279
- const imgHtml = `<img alt="${this.escapeHtml(altText)}" src="">`;
280
- const figureTag = this.simpleTagMap[tag] || "figure";
281
- return `<${figureTag}>${imgHtml}${captionHtml}</${figureTag}>`;
282
- }
283
- sanitizeLanguageName(lang) {
284
- const lowerLang = lang.toLowerCase();
285
- const aliasMap = {
286
- "c#": "csharp",
287
- "c++": "cpp",
288
- objectivec: "objective-c",
289
- visualbasic: "vb",
290
- javascript: "js",
291
- typescript: "ts",
292
- python: "py",
293
- ruby: "rb",
294
- dockerfile: "docker",
295
- };
296
- return aliasMap[lowerLang] || lowerLang.replace(/[\s#+]/g, "-");
297
- }
298
- cleanupMetadataTokens(docling) {
299
- return docling.replace(/<loc_[0-9]+>/g, "");
300
- }
301
- }
302
-
303
- export function doclingToHtml(docling) {
304
- const converter = new DoclingConverter();
305
- const body = converter.convert(docling);
306
- return `<!DOCTYPE html>
307
- <html>
308
- <head>
309
- <meta charset="UTF-8">
310
- <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.23/dist/katex.min.css" integrity="sha384-//SZkxyB7axjCAopkAL1E1rve+ZSPKapD89Lo/lLhcsXR+zOYl5z6zJZEFXil+q0" crossorigin="anonymous">
311
- <style>
312
- html {
313
- background-color: #f5f5f5;
314
- font-family: Arial, sans-serif;
315
- line-height: 1.6;
316
- }
317
- header, footer {
318
- text-align: center;
319
- margin-bottom: 1rem;
320
- font-size: 1em;
321
- }
322
- body {
323
- max-width: 800px;
324
- margin: 0 auto;
325
- padding: 2rem;
326
- background-color: white;
327
- box-shadow: 0 0 10px rgba(0,0,0,0.1);
328
- }
329
- h1, h2, h3, h4, h5, h6 {
330
- color: #333;
331
- margin-top: 1.5em;
332
- margin-bottom: 0.5em;
333
- }
334
- h1 {
335
- font-size: 2em;
336
- border-bottom: 1px solid #eee;
337
- padding-bottom: 0.3em;
338
- }
339
- table {
340
- border-collapse: collapse;
341
- margin: 1em 0;
342
- width: 100%;
343
- }
344
- th, td {
345
- border: 1px solid #ddd;
346
- padding: 8px;
347
- text-align: left;
348
- }
349
- th {
350
- background-color: #f2f2f2;
351
- font-weight: bold;
352
- }
353
- figure {
354
- margin: 1.5em 0;
355
- text-align: center;
356
- }
357
- figcaption {
358
- color: #666;
359
- font-style: italic;
360
- margin-top: 0.5em;
361
- }
362
- img {
363
- max-width: 100%;
364
- height: auto;
365
- }
366
- pre {
367
- background-color: #f6f8fa;
368
- border-radius: 3px;
369
- padding: 1em;
370
- overflow: auto;
371
- }
372
- code {
373
- font-family: monospace;
374
- background-color: #f6f8fa;
375
- padding: 0.2em 0.4em;
376
- border-radius: 3px;
377
- }
378
- pre code {
379
- background-color: transparent;
380
- padding: 0;
381
- }
382
- .formula {
383
- text-align: center;
384
- padding: 0.5em;
385
- margin: 1em 0;
386
- }
387
- .formula:not(:has(.katex)) {
388
- color: transparent;
389
- }
390
- .page-break {
391
- page-break-after: always;
392
- border-top: 1px dashed #ccc;
393
- margin: 2em 0;
394
- }
395
- .key-value-region {
396
- background-color: #f9f9f9;
397
- padding: 1em;
398
- border-radius: 4px;
399
- margin: 1em 0;
400
- }
401
- .key-value-region dt {
402
- font-weight: bold;
403
- }
404
- .key-value-region dd {
405
- margin-left: 1em;
406
- margin-bottom: 0.5em;
407
- }
408
- .form-container {
409
- border: 1px solid #ddd;
410
- padding: 1em;
411
- border-radius: 4px;
412
- margin: 1em 0;
413
- }
414
- .form-item {
415
- margin-bottom: 0.5em;
416
- }
417
- </style>
418
- </head>
419
- <body>
420
- ${body}
421
- <script type="module">
422
- import katex from 'https://cdn.jsdelivr.net/npm/katex@0.16.23/dist/katex.mjs';
423
- import renderMathInElement from "https://cdn.jsdelivr.net/npm/katex@0.16.23/dist/contrib/auto-render.mjs";
424
- const mathElements = document.querySelectorAll('.formula');
425
- for (let element of mathElements) {
426
- katex.render(element.textContent, element, {
427
- throwOnError: false,
428
- });
429
- }
430
- renderMathInElement(document.body, {
431
- delimiters: [
432
- {left: "$$", right: "$$", display: true},
433
- {left: "\\\\[", right: "\\\\]", display: true},
434
- {left: "$", right: "$", display: false},
435
- {left: "\\\\(", right: "\\\\)", display: false}
436
- ],
437
- throwOnError : false,
438
- });
439
- </script>
440
- </body>
441
- </html>`;
442
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index.js DELETED
@@ -1,510 +0,0 @@
1
- import {
2
- AutoProcessor,
3
- AutoModelForVision2Seq,
4
- RawImage,
5
- TextStreamer,
6
- } from "https://cdn.jsdelivr.net/npm/@huggingface/transformers";
7
- import {doclingToHtml} from "./docling-html-parser.js";
8
-
9
- const modelLoaderOverlay = document.getElementById("model-loader-overlay");
10
- const imageDropArea = document.getElementById("image-drop-area");
11
- const imagePlaceholder = document.getElementById("image-placeholder");
12
- const imagePreviewContainer = document.getElementById("image-preview-container");
13
- const imagePreview = document.getElementById("image-preview");
14
- const removeImageBtn = document.getElementById("remove-image-btn");
15
- const fileInput = document.getElementById("file-input");
16
- const exampleImages = document.querySelectorAll(".example-image");
17
- const examplesContainer = document.getElementById("examples-container");
18
- const examplesTitle = document.getElementById("examples-title");
19
- const processingIndicator = document.getElementById("processing-indicator");
20
- const welcomeMessage = document.getElementById("welcome-message");
21
- const doclingView = document.getElementById("docling-view");
22
- const htmlView = document.getElementById("html-view");
23
- const doclingOutput = document.getElementById("docling-output");
24
- const htmlIframe = document.getElementById("html-iframe");
25
- const viewToggle = document.getElementById("view-toggle");
26
- const hiddenCanvas = document.getElementById("hidden-canvas");
27
- const promptInput = document.getElementById("prompt-input");
28
- const generateBtn = document.getElementById("generate-btn");
29
- let model, processor;
30
- let currentImageWidth, currentImageHeight;
31
- let currentImage = null;
32
-
33
- /**
34
- * Updates the statistics and drug matches display
35
- * @param {Object} stats - Statistics object containing counts
36
- * @param {Array} drugMatches - Array of drug matches found
37
- */
38
- function updateStatsAndMatches(stats, drugMatches) {
39
- // Update the statistics
40
- const statsHtml = `
41
- <div class="divide-y divide-gray-200">
42
- <div class="py-3 flex justify-between items-center">
43
- <span class="text-gray-600">Total Overlays</span>
44
- <span class="font-semibold text-indigo-600">${stats.totalOverlays}</span>
45
- </div>
46
- <div class="py-3 flex justify-between items-center">
47
- <span class="text-gray-600">Tag Types</span>
48
- <span class="font-semibold text-indigo-600">${stats.tagTypes}</span>
49
- </div>
50
- <div class="py-3 flex justify-between items-center">
51
- <span class="text-gray-600">Drug Matches Found</span>
52
- <span class="font-semibold text-indigo-600">${stats.totalDrugMatches}</span>
53
- </div>
54
- </div>
55
- `;
56
- document.getElementById('detection-stats').innerHTML = statsHtml;
57
-
58
- // Update drug matches display
59
- const drugMatchesHtml = drugMatches.length === 0
60
- ? '<div class="text-gray-500 text-center py-4">No medication matches found</div>'
61
- : `
62
- <div class="grid grid-cols-2 gap-4">
63
- ${drugMatches.map(match => `
64
- <div class="bg-white p-3 rounded-lg shadow-sm border border-gray-100">
65
- <div class="font-medium text-indigo-600">${match.drug}</div>
66
- </div>
67
- `).join('')}
68
- </div>
69
- `;
70
- document.getElementById('drug-matches').innerHTML = drugMatchesHtml;
71
- }
72
-
73
- // Helper: escape regex special chars
74
- function escapeRegex(str) {
75
- return str.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
76
- }
77
-
78
- // Helper: parse CSV text into array of objects (handles quoted fields)
79
- function parseCSV(csvText) {
80
- const lines = csvText.split(/\r?\n/).filter(l => l.trim() !== '');
81
- if (lines.length === 0) return [];
82
- const splitLine = (line) => line.split(/,(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)/).map(s => {
83
- let val = s.trim();
84
- if (val.startsWith('"') && val.endsWith('"')) {
85
- val = val.slice(1, -1).replace(/""/g, '"');
86
- }
87
- return val;
88
- });
89
- const headers = splitLine(lines[0]);
90
- const rows = [];
91
- for (let i = 1; i < lines.length; i++) {
92
- const parts = splitLine(lines[i]);
93
- if (parts.length === 0) continue;
94
- const obj = {};
95
- for (let j = 0; j < headers.length; j++) {
96
- obj[headers[j]] = parts[j] || '';
97
- }
98
- rows.push(obj);
99
- }
100
- return rows;
101
- }
102
-
103
- /**
104
- * Loads and initializes the model and processor.
105
- */
106
- async function initializeModel() {
107
- try {
108
- const model_id = "onnx-community/granite-docling-258M-ONNX";
109
- processor = await AutoProcessor.from_pretrained(model_id);
110
- const progress = {};
111
- model = await AutoModelForVision2Seq.from_pretrained(model_id, {
112
- dtype: {
113
- embed_tokens: "fp16",
114
- vision_encoder: "fp32",
115
- decoder_model_merged: "fp32",
116
- },
117
- device: "webgpu",
118
- progress_callback: (data) => {
119
- if (data.status === "progress" && data.file?.endsWith?.("onnx_data")) {
120
- progress[data.file] = data;
121
- if (Object.keys(progress).length !== 3) return;
122
- let sum = 0;
123
- let total = 0;
124
- for (const [key, val] of Object.entries(progress)) {
125
- sum += val.loaded;
126
- total += val.total;
127
- }
128
- const overallPercent = Math.round((sum / total) * 100);
129
- document.getElementById("model-progress").value = overallPercent;
130
- document.getElementById("progress-text").textContent = overallPercent + "%";
131
- }
132
- },
133
- });
134
- modelLoaderOverlay.style.display = "none";
135
- console.log("Model loaded successfully.");
136
- } catch (error) {
137
- console.error("Failed to load model:", error);
138
- modelLoaderOverlay.innerHTML = `
139
- <h2 class="text-center text-red-500 text-xl font-semibold">Failed to Load Model</h2>
140
- <p class="text-center text-white text-md mt-2">Please refresh the page to try again. Check the console for errors.</p>
141
- `;
142
- }
143
- }
144
-
145
- /**
146
- * Processes an image and generates Docling text.
147
- * @param {ImageBitmap|HTMLImageElement} imageObject An image object to process.
148
- */
149
- async function processImage(imageObject) {
150
- if (!model || !processor) {
151
- alert("Model is not loaded yet. Please wait.");
152
- return;
153
- }
154
- // Reset UI
155
- setUiState("processing");
156
- clearOverlays();
157
- let fullText = "";
158
- doclingOutput.textContent = "";
159
- htmlIframe.srcdoc = "";
160
-
161
- try {
162
- // Draw image to canvas and get RawImage
163
- const ctx = hiddenCanvas.getContext("2d");
164
- hiddenCanvas.width = imageObject.width;
165
- hiddenCanvas.height = imageObject.height;
166
- ctx.drawImage(imageObject, 0, 0);
167
- const image = RawImage.fromCanvas(hiddenCanvas);
168
-
169
- // Create input messages
170
- const messages = [{
171
- role: "user",
172
- content: [{type: "image"}, {type: "text", text: promptInput.value}],
173
- }];
174
-
175
- // Prepare inputs for the model
176
- const text = processor.apply_chat_template(messages, {
177
- add_generation_prompt: true,
178
- });
179
- const inputs = await processor(text, [image], {
180
- do_image_splitting: true,
181
- });
182
-
183
- // Generate output
184
- await model.generate({
185
- ...inputs,
186
- max_new_tokens: 4096,
187
- streamer: new TextStreamer(processor.tokenizer, {
188
- skip_prompt: true,
189
- skip_special_tokens: false,
190
- callback_function: (streamedText) => {
191
- fullText += streamedText;
192
- doclingOutput.textContent += streamedText;
193
- },
194
- }),
195
- });
196
-
197
- // Strip <|end_of_text|> from the end
198
- fullText = fullText.replace(/<\|end_of_text\|>$/, "");
199
- doclingOutput.textContent = fullText;
200
-
201
- // Parse loc tags and create overlays
202
- const tagRegex = /<(\w+)><loc_(\d+)><loc_(\d+)><loc_(\d+)><loc_(\d+)>/g;
203
- const overlays = [];
204
- let match;
205
- while ((match = tagRegex.exec(fullText)) !== null) {
206
- const tagType = match[1];
207
- const locs = [parseInt(match[2]), parseInt(match[3]), parseInt(match[4]), parseInt(match[5])];
208
- overlays.push({tagType, locs});
209
- }
210
-
211
- // Create overlays on the image
212
- const colorMap = {};
213
- function getRandomColor() {
214
- return `rgb(${Math.floor(Math.random() * 256)}, ${Math.floor(Math.random() * 256)}, ${Math.floor(Math.random() * 256)})`;
215
- }
216
-
217
- const imgRect = imagePreview.getBoundingClientRect();
218
- const containerRect = imagePreviewContainer.getBoundingClientRect();
219
- const imageOffsetLeft = imgRect.left - containerRect.left;
220
- const imageOffsetTop = imgRect.top - containerRect.top;
221
- const scaleX = imgRect.width / currentImageWidth;
222
- const scaleY = imgRect.height / currentImageHeight;
223
-
224
- overlays.forEach(({tagType, locs}) => {
225
- const color = colorMap[tagType] || (colorMap[tagType] = getRandomColor());
226
- const [leftLoc, topLoc, rightLoc, bottomLoc] = locs;
227
- const left = imageOffsetLeft + (leftLoc / 500) * currentImageWidth * scaleX;
228
- const top = imageOffsetTop + (topLoc / 500) * currentImageHeight * scaleY;
229
- const width = ((rightLoc - leftLoc) / 500) * currentImageWidth * scaleX;
230
- const height = ((bottomLoc - topLoc) / 500) * currentImageHeight * scaleY;
231
- const overlay = document.createElement("div");
232
- overlay.className = "overlay";
233
- overlay.style.setProperty('--overlay-color', color);
234
- const rgbMatch = color.match(/rgb\((\d+),\s*(\d+),\s*(\d+)\)/);
235
- overlay.style.setProperty('--overlay-color-rgb', `${rgbMatch[1]},${rgbMatch[2]},${rgbMatch[3]}`);
236
- overlay.style.position = "absolute";
237
- overlay.style.left = left + "px";
238
- overlay.style.top = top + "px";
239
- overlay.style.width = width + "px";
240
- overlay.style.height = height + "px";
241
- imagePreviewContainer.appendChild(overlay);
242
- });
243
-
244
- // Initial results and stats
245
- const results = {
246
- doclingText: fullText,
247
- overlays: overlays,
248
- htmlContent: doclingToHtml(fullText)
249
- };
250
-
251
- const stats = {
252
- totalOverlays: overlays.length,
253
- tagTypes: [...new Set(overlays.map(o => o.tagType))].length,
254
- totalDrugMatches: 0
255
- };
256
-
257
- // Update stats with initial values (no drug matches yet)
258
- updateStatsAndMatches(stats, []);
259
- console.log('Initial stats updated:', stats);
260
-
261
- // Load and process drug names from CSV
262
- try {
263
- console.log('Starting to load drugs.csv...');
264
- const response = await fetch('drugs.csv');
265
- const csvText = await response.text();
266
- console.log('CSV loaded, first 100 characters:', csvText.substring(0, 100));
267
-
268
- // Parse CSV robustly
269
- const rows = parseCSV(csvText);
270
- const drugNames = new Set();
271
-
272
- // Helper function to validate a potential drug name
273
- const isValidDrugName = (name) => {
274
- if (!name || name.length < 4) return false; // Must be at least 4 characters
275
- if (/^\d/.test(name)) return false; // Must not start with a number
276
- if (/^\d+\.?\d*$/.test(name)) return false; // Must not be just a number
277
- if (!/[a-zA-Z]/.test(name)) return false; // Must contain at least one letter
278
- return true;
279
- };
280
-
281
- rows.forEach(row => {
282
- // Process brand names
283
- const brand = (row['brand_name'] || row['brand'] || '').trim();
284
- if (brand) {
285
- // Add the full brand name
286
- const cleanBrand = brand
287
- .replace(/\s*\([^)]*\)/g, '') // Remove parenthetical content
288
- .replace(/\d+\s*(?:mg|ml|mcg|g)\b/gi, '') // Remove dosage amounts
289
- .trim();
290
- if (isValidDrugName(cleanBrand)) {
291
- drugNames.add(cleanBrand.toLowerCase());
292
- }
293
-
294
- // For generic names with salts (e.g., "ATORVASTATIN CALCIUM"), add base name too
295
- const baseName = cleanBrand.split(/\s+/)[0]; // Get first word
296
- if (isValidDrugName(baseName)) {
297
- drugNames.add(baseName.toLowerCase());
298
- }
299
- }
300
-
301
- // Process active ingredients
302
- const ai = (row['active_ingredients'] || row['active_ingredient'] || '').trim();
303
- if (ai) {
304
- // Split on commas and semicolons
305
- const parts = ai.split(/[;,]/)
306
- .map(p => {
307
- const cleaned = p.trim()
308
- .replace(/\s*\([^)]*\)/g, '') // Remove parenthetical content
309
- .replace(/\d+\s*(?:mg|ml|mcg|g)\b/gi, '') // Remove dosage amounts
310
- .trim();
311
-
312
- // Add both full name and base name (without salt)
313
- const results = new Set();
314
- if (isValidDrugName(cleaned)) {
315
- results.add(cleaned.toLowerCase());
316
- }
317
-
318
- // Add base name (first word) if it's valid
319
- const baseName = cleaned.split(/\s+/)[0];
320
- if (isValidDrugName(baseName)) {
321
- results.add(baseName.toLowerCase());
322
- }
323
-
324
- return Array.from(results);
325
- })
326
- .flat();
327
-
328
- parts.forEach(p => drugNames.add(p));
329
- }
330
- }); // Look for drug matches using word-boundary regex
331
- const drugMatches = [];
332
- const detectedText = doclingOutput.textContent.toLowerCase();
333
- for (const drugName of drugNames) {
334
- try {
335
- const pattern = new RegExp('\\b' + escapeRegex(drugName) + '\\b', 'i');
336
- const m = detectedText.match(pattern);
337
- if (m) {
338
- const idx = detectedText.search(pattern);
339
- drugMatches.push({ drug: drugName, found: true });
340
- }
341
- } catch (e) {
342
- // fallback to simple includes if regex fails
343
- if (detectedText.includes(drugName)) {
344
- drugMatches.push({ drug: drugName, found: true });
345
- }
346
- }
347
- }
348
-
349
- // Update results and display
350
- results.drugMatches = drugMatches;
351
- stats.totalDrugMatches = drugMatches.length;
352
- console.log('Found drug matches:', drugMatches.length);
353
- console.log('Updated stats:', stats);
354
- updateStatsAndMatches(stats, drugMatches);
355
- console.log('Stats and matches updated in UI');
356
-
357
- // Create HTML iframe
358
- htmlIframe.srcdoc = results.htmlContent;
359
- } catch (error) {
360
- console.error('Error processing drug matches:', error);
361
- updateStatsAndMatches(stats, []);
362
- }
363
- } catch (error) {
364
- console.error("Error during image processing:", error);
365
- doclingOutput.textContent = `An error occurred: ${error.message}`;
366
- } finally {
367
- setUiState("result");
368
- }
369
- }
370
-
371
- /**
372
- * Manages the visibility of UI components based on the app state.
373
- * @param {'initial'|'processing'|'result'} state The current state.
374
- */
375
- function setUiState(state) {
376
- welcomeMessage.style.display = "none";
377
- processingIndicator.classList.add("hidden");
378
- doclingView.classList.add("hidden");
379
- htmlView.classList.add("hidden");
380
- if (state === "initial") {
381
- // Clear previous results when going back to initial
382
- document.getElementById('detection-stats').innerHTML = '';
383
- document.getElementById('drug-matches').innerHTML = '';
384
- welcomeMessage.style.display = "flex";
385
- generateBtn.disabled = true;
386
- } else if (state === "processing") {
387
- // Keep stats visible during processing, but clear matches while streaming
388
- document.getElementById('drug-matches').innerHTML = '';
389
- viewToggle.checked = false;
390
- processingIndicator.classList.remove("hidden");
391
- doclingView.classList.remove("hidden");
392
- generateBtn.disabled = true;
393
- } else if (state === "result") {
394
- // Preserve the populated stats and matches on result
395
- viewToggle.checked = true;
396
- htmlView.classList.remove("hidden");
397
- generateBtn.disabled = false;
398
- }
399
- }
400
-
401
- /**
402
- * Clears all overlay divs from the image preview container.
403
- */
404
- function clearOverlays() {
405
- document.querySelectorAll(".overlay").forEach((el) => el.remove());
406
- }
407
-
408
- /**
409
- * Handles the selection of an image file.
410
- * @param {File|string} source The image file or URL.
411
- */
412
- function handleImageSelection(source) {
413
- const reader = new FileReader();
414
- const img = new Image();
415
- img.onload = () => {
416
- currentImageWidth = img.naturalWidth;
417
- currentImageHeight = img.naturalHeight;
418
- currentImage = img;
419
- imagePreview.src = img.src;
420
- imagePlaceholder.classList.add("hidden");
421
- imagePreviewContainer.classList.remove("hidden");
422
- examplesContainer.classList.add("hidden");
423
- examplesTitle.classList.add("hidden");
424
- processImage(img);
425
- };
426
- img.onerror = () => {
427
- alert("Failed to load image.");
428
- };
429
- if (typeof source === "string") {
430
- fetch(source)
431
- .then((res) => res.blob())
432
- .then((blob) => {
433
- img.src = URL.createObjectURL(blob);
434
- })
435
- .catch((e) => {
436
- console.error("CORS issue likely. Trying proxy or direct load.", e);
437
- img.crossOrigin = "anonymous";
438
- img.src = source;
439
- });
440
- } else {
441
- reader.onload = (e) => {
442
- img.src = e.target.result;
443
- };
444
- reader.readAsDataURL(source);
445
- }
446
- }
447
-
448
- // Event Listeners
449
- imageDropArea.addEventListener("click", () => fileInput.click());
450
- imageDropArea.addEventListener("dragover", (e) => {
451
- e.preventDefault();
452
- imageDropArea.classList.add("border-indigo-500", "bg-indigo-50");
453
- });
454
- imageDropArea.addEventListener("dragleave", () => {
455
- imageDropArea.classList.remove("border-indigo-500", "bg-indigo-50");
456
- });
457
- imageDropArea.addEventListener("drop", (e) => {
458
- e.preventDefault();
459
- imageDropArea.classList.remove("border-indigo-500", "bg-indigo-50");
460
- const files = e.dataTransfer.files;
461
- if (files.length > 0 && files[0].type.startsWith("image/")) {
462
- handleImageSelection(files[0]);
463
- }
464
- });
465
-
466
- fileInput.addEventListener("change", (e) => {
467
- const files = e.target.files;
468
- if (files.length > 0) {
469
- handleImageSelection(files[0]);
470
- }
471
- });
472
-
473
- exampleImages.forEach((img) => {
474
- img.addEventListener("click", () => {
475
- promptInput.value = img.dataset.prompt;
476
- handleImageSelection(img.src);
477
- });
478
- });
479
-
480
- removeImageBtn.addEventListener("click", (e) => {
481
- e.stopPropagation();
482
- currentImage = null;
483
- imagePreview.src = "";
484
- fileInput.value = "";
485
- imagePlaceholder.classList.remove("hidden");
486
- imagePreviewContainer.classList.add("hidden");
487
- examplesContainer.classList.remove("hidden");
488
- examplesTitle.classList.remove("hidden");
489
- setUiState("initial");
490
- doclingOutput.textContent = "";
491
- htmlIframe.srcdoc = "";
492
- clearOverlays();
493
- });
494
-
495
- viewToggle.addEventListener("change", () => {
496
- const isHtmlView = viewToggle.checked;
497
- htmlView.classList.toggle("hidden", !isHtmlView);
498
- doclingView.classList.toggle("hidden", isHtmlView);
499
- });
500
-
501
- generateBtn.addEventListener("click", () => {
502
- if (currentImage) {
503
- processImage(currentImage);
504
- }
505
- });
506
-
507
- document.addEventListener("DOMContentLoaded", () => {
508
- setUiState("initial");
509
- initializeModel();
510
- });
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
style.css DELETED
@@ -1,62 +0,0 @@
1
- body {
2
- font-family: "Inter", sans-serif;
3
- }
4
- .loader {
5
- border-top-color: #3498db;
6
- animation: spin 1s linear infinite;
7
- }
8
- .loader-large {
9
- border: 8px solid #e5e7eb;
10
- border-top: 8px solid #3498db;
11
- animation: spin 1s linear infinite;
12
- }
13
- .loader-small {
14
- border: 4px solid #e5e7eb;
15
- border-top: 4px solid #3498db;
16
- animation: spin 1s linear infinite;
17
- }
18
- @keyframes spin {
19
- 0% {
20
- transform: rotate(0deg);
21
- }
22
- 100% {
23
- transform: rotate(360deg);
24
- }
25
- }
26
- /* Custom toggle switch */
27
- .toggle-checkbox {
28
- /* knob */
29
- position: absolute;
30
- top: 0.25rem; /* aligns inside the track */
31
- left: 0.25rem;
32
- width: 1rem;
33
- height: 1rem;
34
- border-radius: 9999px;
35
- background: #ffffff;
36
- border: 1px solid #d1d5db;
37
- transition: transform 0.2s ease, border-color 0.2s ease;
38
- transform: translateX(0);
39
- }
40
- .toggle-label {
41
- /* track */
42
- display: block;
43
- width: 2.5rem; /* matches wrapper w-10 */
44
- height: 1.5rem; /* matches h-6 */
45
- background: #d1d5db;
46
- border-radius: 9999px;
47
- transition: background-color 0.2s ease;
48
- }
49
- .toggle-checkbox:checked {
50
- transform: translateX(1rem); /* slide knob to the right */
51
- border-color: #4f46e5;
52
- }
53
- .toggle-checkbox:checked + .toggle-label {
54
- background-color: #4f46e5;
55
- }
56
- .overlay {
57
- border: 2px solid var(--overlay-color);
58
- transition: background-color 0.2s;
59
- }
60
- .overlay:hover {
61
- background-color: rgba(var(--overlay-color-rgb), 0.7);
62
- }