Spaces:
Running
Running
Switch default recognition to LightOnOCR; add Try these moments tour
Browse filesMirror of brave-new-demos commits 03b4c32 + 1a1b995.
- Default recognition is now lightonai/LightOnOCR-2-1B (was Florence-2-base).
Better Markdown on dense layouts; ~4 GB cold download.
- New 'Try these moments' UI section above the main panels with four
concrete prompts visitors can act on without reading the README.
The HF entrypoint still has empty PRELOAD by default, so the Space still
boots in ~30 s; first sample click will now trigger a LightOnOCR download
on the cache miss.
- src/config.ts +7 -7
- web/public/index.html +27 -0
- web/public/style.css +23 -0
src/config.ts
CHANGED
|
@@ -8,23 +8,23 @@ export type ModelOption = {
|
|
| 8 |
};
|
| 9 |
|
| 10 |
export const RECOGNITION_MODELS: ModelOption[] = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
{
|
| 12 |
id: "microsoft/Florence-2-base",
|
| 13 |
label: "Florence-2-base (small, fast)",
|
| 14 |
-
description: "Microsoft DaViT + decoder, 270M.
|
| 15 |
options: { task: "<OCR>" },
|
| 16 |
},
|
| 17 |
{
|
| 18 |
id: "microsoft/Florence-2-large",
|
| 19 |
label: "Florence-2-large",
|
| 20 |
-
description: "Larger Florence-2 variant, 770M.
|
| 21 |
options: { task: "<OCR>" },
|
| 22 |
},
|
| 23 |
-
{
|
| 24 |
-
id: "lightonai/LightOnOCR-2-1B",
|
| 25 |
-
label: "LightOnOCR-2-1B (premium, GPU recommended)",
|
| 26 |
-
description: "Pixtral encoder + Qwen3 decoder, 2.1B. Markdown output. Loads on CPU but slow under Rosetta.",
|
| 27 |
-
},
|
| 28 |
{
|
| 29 |
id: "PaddlePaddle/PaddleOCR-VL-1.5",
|
| 30 |
label: "PaddleOCR-VL-1.5 (GPU image)",
|
|
|
|
| 8 |
};
|
| 9 |
|
| 10 |
export const RECOGNITION_MODELS: ModelOption[] = [
|
| 11 |
+
{
|
| 12 |
+
id: "lightonai/LightOnOCR-2-1B",
|
| 13 |
+
label: "LightOnOCR-2-1B (default)",
|
| 14 |
+
description: "Pixtral encoder + Qwen3 decoder, 2.1B. Strong Markdown output across dense layouts. ~4 GB to download on first call.",
|
| 15 |
+
},
|
| 16 |
{
|
| 17 |
id: "microsoft/Florence-2-base",
|
| 18 |
label: "Florence-2-base (small, fast)",
|
| 19 |
+
description: "Microsoft DaViT + decoder, 270M. Fast on CPU but terse on dense layouts; better on multi-column text.",
|
| 20 |
options: { task: "<OCR>" },
|
| 21 |
},
|
| 22 |
{
|
| 23 |
id: "microsoft/Florence-2-large",
|
| 24 |
label: "Florence-2-large",
|
| 25 |
+
description: "Larger Florence-2 variant, 770M. Better than Florence-2-base but still leans terse on receipts.",
|
| 26 |
options: { task: "<OCR>" },
|
| 27 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
{
|
| 29 |
id: "PaddlePaddle/PaddleOCR-VL-1.5",
|
| 30 |
label: "PaddleOCR-VL-1.5 (GPU image)",
|
web/public/index.html
CHANGED
|
@@ -66,6 +66,33 @@
|
|
| 66 |
</p>
|
| 67 |
</section>
|
| 68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
<main>
|
| 70 |
<section class="panel" id="panel-events">
|
| 71 |
<header><h2>Sample documents</h2></header>
|
|
|
|
| 66 |
</p>
|
| 67 |
</section>
|
| 68 |
|
| 69 |
+
<section class="tour">
|
| 70 |
+
<h3>Try these moments</h3>
|
| 71 |
+
<ol class="tour-list">
|
| 72 |
+
<li>
|
| 73 |
+
<strong>Click any sample on the left.</strong> All three models run
|
| 74 |
+
in one pipeline. The footer prints per-stage timings as each one
|
| 75 |
+
lands.
|
| 76 |
+
</li>
|
| 77 |
+
<li>
|
| 78 |
+
<strong>Open "See the SIE call"</strong> in any panel, then swap the
|
| 79 |
+
model dropdown above. The snippet updates with the one parameter
|
| 80 |
+
that changed. That is the swap-a-string pitch in action.
|
| 81 |
+
</li>
|
| 82 |
+
<li>
|
| 83 |
+
<strong>Click the receipt, then the multi-column page.</strong>
|
| 84 |
+
Donut (fine-tuned on receipts) dominates the first; recognition
|
| 85 |
+
dominates the second. Same pipeline, different model wins.
|
| 86 |
+
</li>
|
| 87 |
+
<li>
|
| 88 |
+
<strong>Switch NER from <code>gliner_multi</code> to
|
| 89 |
+
<code>gliner_large</code>.</strong> Same labels, same input text,
|
| 90 |
+
different confidence scores. Model quality is a single dropdown
|
| 91 |
+
away.
|
| 92 |
+
</li>
|
| 93 |
+
</ol>
|
| 94 |
+
</section>
|
| 95 |
+
|
| 96 |
<main>
|
| 97 |
<section class="panel" id="panel-events">
|
| 98 |
<header><h2>Sample documents</h2></header>
|
web/public/style.css
CHANGED
|
@@ -110,6 +110,29 @@ h1 { font-size: 16px; margin: 0; font-weight: 600; }
|
|
| 110 |
}
|
| 111 |
.why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
|
| 112 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
main {
|
| 114 |
flex: 1; display: grid;
|
| 115 |
grid-template-columns: 0.95fr 1.4fr 1.2fr;
|
|
|
|
| 110 |
}
|
| 111 |
.why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
|
| 112 |
|
| 113 |
+
.tour {
|
| 114 |
+
padding: 12px 20px; border-bottom: 1px solid var(--line);
|
| 115 |
+
background: rgba(98,182,255,0.04);
|
| 116 |
+
}
|
| 117 |
+
.tour h3 {
|
| 118 |
+
margin: 0 0 6px 0; font-size: 11px; letter-spacing: 0.6px;
|
| 119 |
+
text-transform: uppercase; color: var(--accent-2); font-weight: 600;
|
| 120 |
+
}
|
| 121 |
+
.tour-list {
|
| 122 |
+
margin: 0; padding-left: 22px;
|
| 123 |
+
color: var(--muted); line-height: 1.55;
|
| 124 |
+
max-width: 1100px;
|
| 125 |
+
display: grid;
|
| 126 |
+
grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
|
| 127 |
+
gap: 4px 24px;
|
| 128 |
+
}
|
| 129 |
+
.tour-list li { padding: 3px 0; }
|
| 130 |
+
.tour-list strong { color: var(--text); font-weight: 600; }
|
| 131 |
+
.tour-list code {
|
| 132 |
+
background: var(--line); padding: 1px 5px; border-radius: 3px;
|
| 133 |
+
color: var(--accent-2); font-size: 11px;
|
| 134 |
+
}
|
| 135 |
+
|
| 136 |
main {
|
| 137 |
flex: 1; display: grid;
|
| 138 |
grid-template-columns: 0.95fr 1.4fr 1.2fr;
|