Spaces:

superlinked
/

document-ocr

Running

fm1320 commited on 6 days ago

Commit

0df0841

1 Parent(s): 3d62adb

Switch default recognition to LightOnOCR; add Try these moments tour

Mirror of brave-new-demos commits 03b4c32 + 1a1b995.

- Default recognition is now lightonai/LightOnOCR-2-1B (was Florence-2-base).
Better Markdown on dense layouts; ~4 GB cold download.
- New 'Try these moments' UI section above the main panels with four
concrete prompts visitors can act on without reading the README.

The HF entrypoint still has empty PRELOAD by default, so the Space still
boots in ~30 s; first sample click will now trigger a LightOnOCR download
on the cache miss.

Files changed (3) hide show

src/config.ts +7 -7
web/public/index.html +27 -0
web/public/style.css +23 -0

src/config.ts CHANGED Viewed

@@ -8,23 +8,23 @@ export type ModelOption = {
 };
 export const RECOGNITION_MODELS: ModelOption[] = [
   {
     id: "microsoft/Florence-2-base",
     label: "Florence-2-base (small, fast)",
-    description: "Microsoft DaViT + decoder, 270M. Default OCR with the <OCR> task. Fast on CPU.",
     options: { task: "<OCR>" },
   },
   {
     id: "microsoft/Florence-2-large",
     label: "Florence-2-large",
-    description: "Larger Florence-2 variant, 770M. Higher quality, still CPU-runnable; ~2x latency.",
     options: { task: "<OCR>" },
   },
-  {
-    id: "lightonai/LightOnOCR-2-1B",
-    label: "LightOnOCR-2-1B (premium, GPU recommended)",
-    description: "Pixtral encoder + Qwen3 decoder, 2.1B. Markdown output. Loads on CPU but slow under Rosetta.",
-  },
   {
     id: "PaddlePaddle/PaddleOCR-VL-1.5",
     label: "PaddleOCR-VL-1.5 (GPU image)",

 };
 export const RECOGNITION_MODELS: ModelOption[] = [
+  {
+    id: "lightonai/LightOnOCR-2-1B",
+    label: "LightOnOCR-2-1B (default)",
+    description: "Pixtral encoder + Qwen3 decoder, 2.1B. Strong Markdown output across dense layouts. ~4 GB to download on first call.",
+  },
   {
     id: "microsoft/Florence-2-base",
     label: "Florence-2-base (small, fast)",
+    description: "Microsoft DaViT + decoder, 270M. Fast on CPU but terse on dense layouts; better on multi-column text.",
     options: { task: "<OCR>" },
   },
   {
     id: "microsoft/Florence-2-large",
     label: "Florence-2-large",
+    description: "Larger Florence-2 variant, 770M. Better than Florence-2-base but still leans terse on receipts.",
     options: { task: "<OCR>" },
   },
   {
     id: "PaddlePaddle/PaddleOCR-VL-1.5",
     label: "PaddleOCR-VL-1.5 (GPU image)",

web/public/index.html CHANGED Viewed

@@ -66,6 +66,33 @@
       </p>
     </section>
     <main>
       <section class="panel" id="panel-events">
         <header><h2>Sample documents</h2></header>

       </p>
     </section>
+    <section class="tour">
+      <h3>Try these moments</h3>
+      <ol class="tour-list">
+        <li>
+          <strong>Click any sample on the left.</strong> All three models run
+          in one pipeline. The footer prints per-stage timings as each one
+          lands.
+        </li>
+        <li>
+          <strong>Open "See the SIE call"</strong> in any panel, then swap the
+          model dropdown above. The snippet updates with the one parameter
+          that changed. That is the swap-a-string pitch in action.
+        </li>
+        <li>
+          <strong>Click the receipt, then the multi-column page.</strong>
+          Donut (fine-tuned on receipts) dominates the first; recognition
+          dominates the second. Same pipeline, different model wins.
+        </li>
+        <li>
+          <strong>Switch NER from <code>gliner_multi</code> to
+          <code>gliner_large</code>.</strong> Same labels, same input text,
+          different confidence scores. Model quality is a single dropdown
+          away.
+        </li>
+      </ol>
+    </section>
     <main>
       <section class="panel" id="panel-events">
         <header><h2>Sample documents</h2></header>

web/public/style.css CHANGED Viewed

@@ -110,6 +110,29 @@ h1 { font-size: 16px; margin: 0; font-weight: 600; }
 }
 .why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
 main {
   flex: 1; display: grid;
   grid-template-columns: 0.95fr 1.4fr 1.2fr;

 }
 .why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
+.tour {
+  padding: 12px 20px; border-bottom: 1px solid var(--line);
+  background: rgba(98,182,255,0.04);
+}
+.tour h3 {
+  margin: 0 0 6px 0; font-size: 11px; letter-spacing: 0.6px;
+  text-transform: uppercase; color: var(--accent-2); font-weight: 600;
+}
+.tour-list {
+  margin: 0; padding-left: 22px;
+  color: var(--muted); line-height: 1.55;
+  max-width: 1100px;
+  display: grid;
+  grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
+  gap: 4px 24px;
+}
+.tour-list li { padding: 3px 0; }
+.tour-list strong { color: var(--text); font-weight: 600; }
+.tour-list code {
+  background: var(--line); padding: 1px 5px; border-radius: 3px;
+  color: var(--accent-2); font-size: 11px;
+}
 main {
   flex: 1; display: grid;
   grid-template-columns: 0.95fr 1.4fr 1.2fr;