Spaces:
Running on Zero
Running on Zero
Upload folder using huggingface_hub
Browse files- .gitattributes +3 -0
- app.py +6 -0
- examples.json +18 -6
- images/britannica-vol1-leaf60.jpg +3 -0
- images/britannica-vol1-leaf61.jpg +3 -0
- images/commoner-1901-01-23.jpg +3 -0
- index.html +39 -4
.gitattributes
CHANGED
|
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
images/britannica-vol1-leaf60.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
images/britannica-vol1-leaf61.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
images/commoner-1901-01-23.jpg filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -16,6 +16,7 @@ from pathlib import Path
|
|
| 16 |
import spaces
|
| 17 |
import torch
|
| 18 |
from fastapi.responses import HTMLResponse, JSONResponse
|
|
|
|
| 19 |
from gradio import Server
|
| 20 |
from transformers import (
|
| 21 |
AutoModelForMultimodalLM,
|
|
@@ -258,5 +259,10 @@ async def get_results():
|
|
| 258 |
return JSONResponse({"summary_md": summary, "per_passage": rows})
|
| 259 |
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
if __name__ == "__main__":
|
| 262 |
app.launch(show_error=True)
|
|
|
|
| 16 |
import spaces
|
| 17 |
import torch
|
| 18 |
from fastapi.responses import HTMLResponse, JSONResponse
|
| 19 |
+
from fastapi.staticfiles import StaticFiles
|
| 20 |
from gradio import Server
|
| 21 |
from transformers import (
|
| 22 |
AutoModelForMultimodalLM,
|
|
|
|
| 259 |
return JSONResponse({"summary_md": summary, "per_passage": rows})
|
| 260 |
|
| 261 |
|
| 262 |
+
_images_dir = HERE / "images"
|
| 263 |
+
if _images_dir.is_dir():
|
| 264 |
+
app.mount("/static", StaticFiles(directory=str(_images_dir)), name="static")
|
| 265 |
+
|
| 266 |
+
|
| 267 |
if __name__ == "__main__":
|
| 268 |
app.launch(show_error=True)
|
examples.json
CHANGED
|
@@ -32,31 +32,43 @@
|
|
| 32 |
{
|
| 33 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/0",
|
| 34 |
"label": "The Commoner 1901-01-23: Had tho American consumers 1\u2026",
|
| 35 |
-
"ocr_input": "Had tho American consumers 1 Jl . - 1 -L .. l-l! i.J1 i.L I oeen privuegea vo ouy ac wic cjuotunuus grauieu wio foreign buyers, the Americana would .have saved about 98010,080.97 on their purchase. \"\"\"More 'than eight millions of dollars! \"This measures tho extortion practiced upon the hard ware merchant, but this must bo increased by the merchant's profit, if his profit is estimated upon a percentage basis, before it measures the extor tion practiced upon the consumer."
|
|
|
|
|
|
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/1",
|
| 39 |
"label": "The Commoner 1901-01-23: A government resting on force\u2026",
|
| 40 |
-
"ocr_input": "A government resting on force is, on tho other hand, ever unstable because it excites hatred rather than affection anjd is continually at war with human nature; it is in constant antag onism to that universal sentiment- which is de fined as the love of liberty; ' All history sustains tho self-evident truths which form the fonndafiibif -W1 government deriving its just powers from the consent of the governed. ' All' history condemns a political structure which appeals only to fear and- relief upon bayonets for its support."
|
|
|
|
|
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/2",
|
| 44 |
"label": "The Commoner 1901-01-23: It is riot necessary to\u2026",
|
| 45 |
-
"ocr_input": "It is riot necessary to apologize for the use of a term which distinguishes the great body of the population from the comparatively few, who, for one reason or another, withdraw themselves from sympathetic connection with their fellows. Among the Greeks \"BLoi polloi\" was used to describe tho many, while among the Romans the word \"plebs\" was employed for the same purpose. These appellations, like \"the common people,\" have been assumed with pride by those to whom they were applied, while they have been used as terms of reproach by those who cdunted themselves n.TnrTify t.Tin."
|
|
|
|
|
|
|
| 46 |
},
|
| 47 |
{
|
| 48 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/3",
|
| 49 |
"label": "The Commoner 1901-01-23: This quotation is reproduced because\u2026",
|
| 50 |
-
"ocr_input": "This quotation is reproduced because it fairly represents the views of those who criticize tho expression. It has, however, an eminently , respectable origin. In the same chapter in which Christ condensed man's duty to his fellows into the commandment: Thou shalt love thy neighbor as thyself; in the same chapter in which he de nounced those who devour widows' houses and for a pretense make long prayers in this same chapter it is said of Him: The common people heard Him gladly."
|
|
|
|
|
|
|
| 51 |
},
|
| 52 |
{
|
| 53 |
"id": "britannica1771/vol1/leaf60",
|
| 54 |
"label": "Britannica 1771 (long \u017f): At the thick end\u2026",
|
| 55 |
-
"ocr_input": "At the thick end of the bean,there is a fmall hole vifible to the naked eye, immediatelyover the radicle or future root, that it may have a freepaffage into the foil. Plate IV. fig. I. A. When thefecoats are taken off, the body of the feed appears, whichis divided into two fmooth portions or lobes. Thefmoothnefs of the lobes is owing to a thin'filin or cuticlewith which they are covered.At the bafts of the bean is placed the radicle Qr futureroot, Plate IV. fig. 3. A."
|
|
|
|
|
|
|
| 56 |
},
|
| 57 |
{
|
| 58 |
"id": "britannica1771/vol1/leaf61",
|
| 59 |
"label": "Britannica 1771 (long \u017f): II. Of the Root.I\u2026",
|
| 60 |
-
"ocr_input": "II. Of the Root.I n examining the root of plants, the firft thing thatprefents itfelf is the Ikin, which is of various colours indifferent plants. Every root, after it has arrived at acertain age, has a double fkin. The firfl is coeval withthe other parts, and exifls in the feed: but afterwardsthere is a ring fent off from the bark, and forms a fe-cond fit in; eg. in the root of the dandelion, towardsthe end of May, the original or outer fkin appears ftiri-veled, and is eafily feparated from the new one, whichis frefher, and adheres more firmly to the bark."
|
|
|
|
|
|
|
| 61 |
}
|
| 62 |
]
|
|
|
|
| 32 |
{
|
| 33 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/0",
|
| 34 |
"label": "The Commoner 1901-01-23: Had tho American consumers 1\u2026",
|
| 35 |
+
"ocr_input": "Had tho American consumers 1 Jl . - 1 -L .. l-l! i.J1 i.L I oeen privuegea vo ouy ac wic cjuotunuus grauieu wio foreign buyers, the Americana would .have saved about 98010,080.97 on their purchase. \"\"\"More 'than eight millions of dollars! \"This measures tho extortion practiced upon the hard ware merchant, but this must bo increased by the merchant's profit, if his profit is estimated upon a percentage basis, before it measures the extor tion practiced upon the consumer.",
|
| 36 |
+
"image": "static/commoner-1901-01-23.jpg",
|
| 37 |
+
"image_caption": "The Commoner (Lincoln, Nebraska), 23 Jan 1901, front page \u2014 via Chronicling America, Library of Congress"
|
| 38 |
},
|
| 39 |
{
|
| 40 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/1",
|
| 41 |
"label": "The Commoner 1901-01-23: A government resting on force\u2026",
|
| 42 |
+
"ocr_input": "A government resting on force is, on tho other hand, ever unstable because it excites hatred rather than affection anjd is continually at war with human nature; it is in constant antag onism to that universal sentiment- which is de fined as the love of liberty; ' All history sustains tho self-evident truths which form the fonndafiibif -W1 government deriving its just powers from the consent of the governed. ' All' history condemns a political structure which appeals only to fear and- relief upon bayonets for its support.",
|
| 43 |
+
"image": "static/commoner-1901-01-23.jpg",
|
| 44 |
+
"image_caption": "The Commoner (Lincoln, Nebraska), 23 Jan 1901, front page \u2014 via Chronicling America, Library of Congress"
|
| 45 |
},
|
| 46 |
{
|
| 47 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/2",
|
| 48 |
"label": "The Commoner 1901-01-23: It is riot necessary to\u2026",
|
| 49 |
+
"ocr_input": "It is riot necessary to apologize for the use of a term which distinguishes the great body of the population from the comparatively few, who, for one reason or another, withdraw themselves from sympathetic connection with their fellows. Among the Greeks \"BLoi polloi\" was used to describe tho many, while among the Romans the word \"plebs\" was employed for the same purpose. These appellations, like \"the common people,\" have been assumed with pride by those to whom they were applied, while they have been used as terms of reproach by those who cdunted themselves n.TnrTify t.Tin.",
|
| 50 |
+
"image": "static/commoner-1901-01-23.jpg",
|
| 51 |
+
"image_caption": "The Commoner (Lincoln, Nebraska), 23 Jan 1901, front page \u2014 via Chronicling America, Library of Congress"
|
| 52 |
},
|
| 53 |
{
|
| 54 |
"id": "commoner/ca-46032385-1901-01-23-ed-1-0001/3",
|
| 55 |
"label": "The Commoner 1901-01-23: This quotation is reproduced because\u2026",
|
| 56 |
+
"ocr_input": "This quotation is reproduced because it fairly represents the views of those who criticize tho expression. It has, however, an eminently , respectable origin. In the same chapter in which Christ condensed man's duty to his fellows into the commandment: Thou shalt love thy neighbor as thyself; in the same chapter in which he de nounced those who devour widows' houses and for a pretense make long prayers in this same chapter it is said of Him: The common people heard Him gladly.",
|
| 57 |
+
"image": "static/commoner-1901-01-23.jpg",
|
| 58 |
+
"image_caption": "The Commoner (Lincoln, Nebraska), 23 Jan 1901, front page \u2014 via Chronicling America, Library of Congress"
|
| 59 |
},
|
| 60 |
{
|
| 61 |
"id": "britannica1771/vol1/leaf60",
|
| 62 |
"label": "Britannica 1771 (long \u017f): At the thick end\u2026",
|
| 63 |
+
"ocr_input": "At the thick end of the bean,there is a fmall hole vifible to the naked eye, immediatelyover the radicle or future root, that it may have a freepaffage into the foil. Plate IV. fig. I. A. When thefecoats are taken off, the body of the feed appears, whichis divided into two fmooth portions or lobes. Thefmoothnefs of the lobes is owing to a thin'filin or cuticlewith which they are covered.At the bafts of the bean is placed the radicle Qr futureroot, Plate IV. fig. 3. A.",
|
| 64 |
+
"image": "static/britannica-vol1-leaf60.jpg",
|
| 65 |
+
"image_caption": "Encyclopaedia Britannica, 1st edition (1771), vol. 1 \u2014 National Library of Scotland digitisation"
|
| 66 |
},
|
| 67 |
{
|
| 68 |
"id": "britannica1771/vol1/leaf61",
|
| 69 |
"label": "Britannica 1771 (long \u017f): II. Of the Root.I\u2026",
|
| 70 |
+
"ocr_input": "II. Of the Root.I n examining the root of plants, the firft thing thatprefents itfelf is the Ikin, which is of various colours indifferent plants. Every root, after it has arrived at acertain age, has a double fkin. The firfl is coeval withthe other parts, and exifls in the feed: but afterwardsthere is a ring fent off from the bark, and forms a fe-cond fit in; eg. in the root of the dandelion, towardsthe end of May, the original or outer fkin appears ftiri-veled, and is eafily feparated from the new one, whichis frefher, and adheres more firmly to the bark.",
|
| 71 |
+
"image": "static/britannica-vol1-leaf61.jpg",
|
| 72 |
+
"image_caption": "Encyclopaedia Britannica, 1st edition (1771), vol. 1 \u2014 National Library of Scotland digitisation"
|
| 73 |
}
|
| 74 |
]
|
images/britannica-vol1-leaf60.jpg
ADDED
|
Git LFS Details
|
images/britannica-vol1-leaf61.jpg
ADDED
|
Git LFS Details
|
images/commoner-1901-01-23.jpg
ADDED
|
Git LFS Details
|
index.html
CHANGED
|
@@ -190,6 +190,13 @@
|
|
| 190 |
<summary>Human transcription of this passage</summary>
|
| 191 |
<div class="gold-text" id="gold-text"></div>
|
| 192 |
</details>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
<div class="controls">
|
| 194 |
<button class="run-btn" id="run">Correct the Proofs</button>
|
| 195 |
<label class="toggle">
|
|
@@ -261,6 +268,21 @@
|
|
| 261 |
</section>
|
| 262 |
|
| 263 |
<section class="pane" id="pane-ledger">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 264 |
<div id="results-table"><p style="text-align:center;font-style:italic">Fetching the ledger…</p></div>
|
| 265 |
<div class="footnotes" id="results-notes"></div>
|
| 266 |
</section>
|
|
@@ -325,6 +347,18 @@ function setGoldAvailable(gold) {
|
|
| 325 |
for (const id of ["no-gold-note", "no-gold-note-press"])
|
| 326 |
$(id).classList.toggle("visible", !gold);
|
| 327 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
function currentGold() {
|
| 329 |
return activeExample && $("ocr-input").value === activeExample.ocr_input ? (activeExample.gold || "") : "";
|
| 330 |
}
|
|
@@ -386,6 +420,7 @@ async function loadExamples() {
|
|
| 386 |
activeExample = e;
|
| 387 |
updateCount();
|
| 388 |
setGoldAvailable(e.gold || "");
|
|
|
|
| 389 |
if (e.cached) renderCached(e);
|
| 390 |
};
|
| 391 |
chips.appendChild(b);
|
|
@@ -416,7 +451,7 @@ function updateCount() {
|
|
| 416 |
el.textContent = `${n} / ${MAX_CHARS}`;
|
| 417 |
el.classList.toggle("over", n > MAX_CHARS);
|
| 418 |
}
|
| 419 |
-
$("ocr-input").addEventListener("input", () => { activeExample = null; updateCount(); setGoldAvailable(""); });
|
| 420 |
updateCount();
|
| 421 |
|
| 422 |
/* ---------- run ---------- */
|
|
@@ -505,9 +540,9 @@ async function loadResults() {
|
|
| 505 |
const notes = lines.filter(l => /^(Micro|Mean)/.test(l)).map(l => `<p>${l}</p>`).join("");
|
| 506 |
$("results-notes").innerHTML = notes +
|
| 507 |
`<p><b>Bold</b> marks the better of the two main models on each measure.
|
| 508 |
-
The OCR‑seeded‑canvas row is greyed out: it converges fastest but barely edits anything
|
| 509 |
-
|
| 510 |
-
|
| 511 |
} catch { $("results-table").innerHTML = "<p style='text-align:center;font-style:italic'>The ledger could not be fetched.</p>"; }
|
| 512 |
}
|
| 513 |
|
|
|
|
| 190 |
<summary>Human transcription of this passage</summary>
|
| 191 |
<div class="gold-text" id="gold-text"></div>
|
| 192 |
</details>
|
| 193 |
+
<details class="authority" id="page-image">
|
| 194 |
+
<summary>View the original page (full page — the passage is one excerpt from it)</summary>
|
| 195 |
+
<div class="gold-text" style="text-align:center">
|
| 196 |
+
<img id="page-img" loading="lazy" alt="Scanned source page" style="max-width:100%;border:1px solid var(--rule)" />
|
| 197 |
+
<div id="page-caption" style="font-size:.78rem;font-style:italic;margin-top:.4rem"></div>
|
| 198 |
+
</div>
|
| 199 |
+
</details>
|
| 200 |
<div class="controls">
|
| 201 |
<button class="run-btn" id="run">Correct the Proofs</button>
|
| 202 |
<label class="toggle">
|
|
|
|
| 268 |
</section>
|
| 269 |
|
| 270 |
<section class="pane" id="pane-ledger">
|
| 271 |
+
<div class="footnotes" style="margin-bottom:1.4rem">
|
| 272 |
+
<p><b>The data.</b> 75 passages from <a href="https://doi.org/10.15131/shef.data.25439023">BLN600</a>,
|
| 273 |
+
a corpus of 600 excerpts of 19th‑century London newspapers (largely crime reporting) from the British
|
| 274 |
+
Library's collections, each paired with both the original OCR and a careful <em>human transcription</em>.
|
| 275 |
+
That human transcription is the “right answer” every number below is measured against. Passages longer
|
| 276 |
+
than DiffusionGemma's 256‑token output block were trimmed at a point where OCR and transcription align,
|
| 277 |
+
so the pairs stay parallel. (BLN600 is CC‑BY‑NC, so the passages themselves aren't republished here — only
|
| 278 |
+
these metrics.)</p>
|
| 279 |
+
<p><b>The task.</b> Both models got the identical instruction — fix recognition errors only, don't modernise
|
| 280 |
+
or rephrase — one passage at a time on the same A100 GPU. <b>CER / WER</b>: how far the output remains from
|
| 281 |
+
the human transcription, by character / by word (the “OCR input” row is the damage before any correction).
|
| 282 |
+
<b>Relative CER reduction</b>: how much of that damage the model repaired.
|
| 283 |
+
<b>Over‑correction</b>: how much text that was already right the model needlessly changed.
|
| 284 |
+
<b>Fix rate</b>: how much of what was actually wrong it fixed.</p>
|
| 285 |
+
</div>
|
| 286 |
<div id="results-table"><p style="text-align:center;font-style:italic">Fetching the ledger…</p></div>
|
| 287 |
<div class="footnotes" id="results-notes"></div>
|
| 288 |
</section>
|
|
|
|
| 347 |
for (const id of ["no-gold-note", "no-gold-note-press"])
|
| 348 |
$(id).classList.toggle("visible", !gold);
|
| 349 |
}
|
| 350 |
+
function setPageImage(e) {
|
| 351 |
+
const panel = $("page-image");
|
| 352 |
+
if (e && e.image) {
|
| 353 |
+
panel.classList.add("visible");
|
| 354 |
+
panel.open = false;
|
| 355 |
+
$("page-img").src = e.image;
|
| 356 |
+
$("page-caption").textContent = e.image_caption || "";
|
| 357 |
+
} else {
|
| 358 |
+
panel.classList.remove("visible");
|
| 359 |
+
$("page-img").removeAttribute("src");
|
| 360 |
+
}
|
| 361 |
+
}
|
| 362 |
function currentGold() {
|
| 363 |
return activeExample && $("ocr-input").value === activeExample.ocr_input ? (activeExample.gold || "") : "";
|
| 364 |
}
|
|
|
|
| 420 |
activeExample = e;
|
| 421 |
updateCount();
|
| 422 |
setGoldAvailable(e.gold || "");
|
| 423 |
+
setPageImage(e);
|
| 424 |
if (e.cached) renderCached(e);
|
| 425 |
};
|
| 426 |
chips.appendChild(b);
|
|
|
|
| 451 |
el.textContent = `${n} / ${MAX_CHARS}`;
|
| 452 |
el.classList.toggle("over", n > MAX_CHARS);
|
| 453 |
}
|
| 454 |
+
$("ocr-input").addEventListener("input", () => { activeExample = null; updateCount(); setGoldAvailable(""); setPageImage(null); });
|
| 455 |
updateCount();
|
| 456 |
|
| 457 |
/* ---------- run ---------- */
|
|
|
|
| 540 |
const notes = lines.filter(l => /^(Micro|Mean)/.test(l)).map(l => `<p>${l}</p>`).join("");
|
| 541 |
$("results-notes").innerHTML = notes +
|
| 542 |
`<p><b>Bold</b> marks the better of the two main models on each measure.
|
| 543 |
+
The OCR‑seeded‑canvas row is greyed out: it converges fastest but barely edits anything
|
| 544 |
+
(a negative result — see the repo notes), so highlighting its numbers would mislead.
|
| 545 |
+
Single run, one prompt, no significance testing — a pragmatic day‑one benchmark, not a study.</p>`;
|
| 546 |
} catch { $("results-table").innerHTML = "<p style='text-align:center;font-style:italic'>The ledger could not be fetched.</p>"; }
|
| 547 |
}
|
| 548 |
|