fm1320 commited on
Commit
0df0841
·
1 Parent(s): 3d62adb

Switch default recognition to LightOnOCR; add Try these moments tour

Browse files

Mirror of brave-new-demos commits 03b4c32 + 1a1b995.

- Default recognition is now lightonai/LightOnOCR-2-1B (was Florence-2-base).
Better Markdown on dense layouts; ~4 GB cold download.
- New 'Try these moments' UI section above the main panels with four
concrete prompts visitors can act on without reading the README.

The HF entrypoint still has empty PRELOAD by default, so the Space still
boots in ~30 s; first sample click will now trigger a LightOnOCR download
on the cache miss.

Files changed (3) hide show
  1. src/config.ts +7 -7
  2. web/public/index.html +27 -0
  3. web/public/style.css +23 -0
src/config.ts CHANGED
@@ -8,23 +8,23 @@ export type ModelOption = {
8
  };
9
 
10
  export const RECOGNITION_MODELS: ModelOption[] = [
 
 
 
 
 
11
  {
12
  id: "microsoft/Florence-2-base",
13
  label: "Florence-2-base (small, fast)",
14
- description: "Microsoft DaViT + decoder, 270M. Default OCR with the <OCR> task. Fast on CPU.",
15
  options: { task: "<OCR>" },
16
  },
17
  {
18
  id: "microsoft/Florence-2-large",
19
  label: "Florence-2-large",
20
- description: "Larger Florence-2 variant, 770M. Higher quality, still CPU-runnable; ~2x latency.",
21
  options: { task: "<OCR>" },
22
  },
23
- {
24
- id: "lightonai/LightOnOCR-2-1B",
25
- label: "LightOnOCR-2-1B (premium, GPU recommended)",
26
- description: "Pixtral encoder + Qwen3 decoder, 2.1B. Markdown output. Loads on CPU but slow under Rosetta.",
27
- },
28
  {
29
  id: "PaddlePaddle/PaddleOCR-VL-1.5",
30
  label: "PaddleOCR-VL-1.5 (GPU image)",
 
8
  };
9
 
10
  export const RECOGNITION_MODELS: ModelOption[] = [
11
+ {
12
+ id: "lightonai/LightOnOCR-2-1B",
13
+ label: "LightOnOCR-2-1B (default)",
14
+ description: "Pixtral encoder + Qwen3 decoder, 2.1B. Strong Markdown output across dense layouts. ~4 GB to download on first call.",
15
+ },
16
  {
17
  id: "microsoft/Florence-2-base",
18
  label: "Florence-2-base (small, fast)",
19
+ description: "Microsoft DaViT + decoder, 270M. Fast on CPU but terse on dense layouts; better on multi-column text.",
20
  options: { task: "<OCR>" },
21
  },
22
  {
23
  id: "microsoft/Florence-2-large",
24
  label: "Florence-2-large",
25
+ description: "Larger Florence-2 variant, 770M. Better than Florence-2-base but still leans terse on receipts.",
26
  options: { task: "<OCR>" },
27
  },
 
 
 
 
 
28
  {
29
  id: "PaddlePaddle/PaddleOCR-VL-1.5",
30
  label: "PaddleOCR-VL-1.5 (GPU image)",
web/public/index.html CHANGED
@@ -66,6 +66,33 @@
66
  </p>
67
  </section>
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  <main>
70
  <section class="panel" id="panel-events">
71
  <header><h2>Sample documents</h2></header>
 
66
  </p>
67
  </section>
68
 
69
+ <section class="tour">
70
+ <h3>Try these moments</h3>
71
+ <ol class="tour-list">
72
+ <li>
73
+ <strong>Click any sample on the left.</strong> All three models run
74
+ in one pipeline. The footer prints per-stage timings as each one
75
+ lands.
76
+ </li>
77
+ <li>
78
+ <strong>Open "See the SIE call"</strong> in any panel, then swap the
79
+ model dropdown above. The snippet updates with the one parameter
80
+ that changed. That is the swap-a-string pitch in action.
81
+ </li>
82
+ <li>
83
+ <strong>Click the receipt, then the multi-column page.</strong>
84
+ Donut (fine-tuned on receipts) dominates the first; recognition
85
+ dominates the second. Same pipeline, different model wins.
86
+ </li>
87
+ <li>
88
+ <strong>Switch NER from <code>gliner_multi</code> to
89
+ <code>gliner_large</code>.</strong> Same labels, same input text,
90
+ different confidence scores. Model quality is a single dropdown
91
+ away.
92
+ </li>
93
+ </ol>
94
+ </section>
95
+
96
  <main>
97
  <section class="panel" id="panel-events">
98
  <header><h2>Sample documents</h2></header>
web/public/style.css CHANGED
@@ -110,6 +110,29 @@ h1 { font-size: 16px; margin: 0; font-weight: 600; }
110
  }
111
  .why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  main {
114
  flex: 1; display: grid;
115
  grid-template-columns: 0.95fr 1.4fr 1.2fr;
 
110
  }
111
  .why-sie p { margin: 0; color: var(--muted); line-height: 1.6; max-width: 1100px; }
112
 
113
+ .tour {
114
+ padding: 12px 20px; border-bottom: 1px solid var(--line);
115
+ background: rgba(98,182,255,0.04);
116
+ }
117
+ .tour h3 {
118
+ margin: 0 0 6px 0; font-size: 11px; letter-spacing: 0.6px;
119
+ text-transform: uppercase; color: var(--accent-2); font-weight: 600;
120
+ }
121
+ .tour-list {
122
+ margin: 0; padding-left: 22px;
123
+ color: var(--muted); line-height: 1.55;
124
+ max-width: 1100px;
125
+ display: grid;
126
+ grid-template-columns: repeat(auto-fit, minmax(260px, 1fr));
127
+ gap: 4px 24px;
128
+ }
129
+ .tour-list li { padding: 3px 0; }
130
+ .tour-list strong { color: var(--text); font-weight: 600; }
131
+ .tour-list code {
132
+ background: var(--line); padding: 1px 5px; border-radius: 3px;
133
+ color: var(--accent-2); font-size: 11px;
134
+ }
135
+
136
  main {
137
  flex: 1; display: grid;
138
  grid-template-columns: 0.95fr 1.4fr 1.2fr;