davanstrien HF Staff commited on
Commit
41d7fc8
·
verified ·
1 Parent(s): 90c2600

Upload folder using huggingface_hub

Browse files
app.py CHANGED
@@ -1,18 +1,22 @@
1
  """DiffusionGemma vs Gemma-4 on post-OCR correction — ZeroGPU comparison Space.
2
 
3
- Side-by-side correction of 19th-century English newspaper OCR by an
 
4
  experimental block-diffusion LLM (google/diffusiongemma-26B-A4B-it) and an
5
  autoregressive baseline (google/gemma-4-E4B-it).
6
  """
7
 
 
8
  import json
9
  import os
 
10
  import time
11
  from pathlib import Path
12
 
13
- import gradio as gr
14
  import spaces
15
  import torch
 
 
16
  from transformers import (
17
  AutoModelForMultimodalLM,
18
  AutoProcessor,
@@ -20,7 +24,7 @@ from transformers import (
20
  TextDiffusionStreamer,
21
  )
22
 
23
- from diff_utils import COLOR_MAP, diff_highlight
24
 
25
  # Keep in sync with benchmark.py PROMPT_TEMPLATE — the benchmark numbers in the
26
  # results tab were produced with exactly this prompt.
@@ -35,15 +39,12 @@ OCR text:
35
  {ocr}"""
36
 
37
  MAX_INPUT_CHARS = 1200 # roughly the 220-token benchmark cap
 
38
 
39
 
40
  def model_path(volume_path: str, model_id: str) -> str:
41
- """Prefer a mounted hf:// volume (see `hf spaces volumes`) over a download.
42
-
43
- Volume reads go over FUSE, which is sometimes slower for safetensors loading
44
- than a fresh download to local disk — set USE_VOLUMES=0 (Space variable) to
45
- force from_pretrained downloads for comparison.
46
- """
47
  if os.environ.get("USE_VOLUMES", "1") == "0":
48
  return model_id
49
  return volume_path if os.path.isdir(volume_path) else model_id
@@ -64,7 +65,7 @@ g4_model = AutoModelForMultimodalLM.from_pretrained(G4_PATH, dtype=torch.bfloat1
64
  print(f"Gemma-4 loaded in {time.perf_counter() - t0:.0f}s")
65
 
66
 
67
- STOP_MARKERS = ("<turn|>", "<eos>", "<end_of_turn>", "<pad>")
68
 
69
 
70
  def extract_answer(raw: str) -> str:
@@ -79,6 +80,25 @@ def extract_answer(raw: str) -> str:
79
  return raw.strip()
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  class SnapshotStreamer(TextDiffusionStreamer):
83
  """Captures the decoded canvas at each denoising step; suppresses the
84
  parent's ANSI console printing."""
@@ -118,27 +138,66 @@ def _decode_generated(processor, output, input_len) -> str:
118
  return extract_answer(raw)
119
 
120
 
121
- # size="xlarge" (96GB) on both: total module-level CUDA state is ~68GB bf16,
122
- # which exceeds the default 48GB ZeroGPU slice.
123
- @spaces.GPU(duration=120, size="xlarge")
124
- def run_diffusiongemma(ocr_text: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  inputs = _prepare_inputs(dg_processor, dg_model, ocr_text)
126
  streamer = SnapshotStreamer(dg_processor.tokenizer)
 
 
 
 
 
 
 
 
 
 
127
  t0 = time.perf_counter()
128
- output = dg_model.generate(**inputs, max_new_tokens=256, streamer=streamer)
129
  torch.cuda.synchronize()
130
  seconds = time.perf_counter() - t0
131
  text = _decode_generated(dg_processor, output, inputs["input_ids"].shape[-1])
132
  n_tokens = len(dg_processor.tokenizer(text)["input_ids"])
133
- timing = (
134
- f"**{seconds:.1f}s** · ~{n_tokens / seconds:.0f} tok/s · "
135
- f"{len(streamer.snapshots)} denoising steps"
136
- )
137
- return text, diff_highlight(ocr_text, text), timing, streamer.snapshots
138
-
139
-
 
 
 
 
 
 
140
  @spaces.GPU(duration=60, size="xlarge")
141
- def run_gemma4(ocr_text: str):
 
 
 
142
  inputs = _prepare_inputs(g4_processor, g4_model, ocr_text)
143
  t0 = time.perf_counter()
144
  output = g4_model.generate(**inputs, max_new_tokens=256, do_sample=False)
@@ -146,127 +205,51 @@ def run_gemma4(ocr_text: str):
146
  seconds = time.perf_counter() - t0
147
  text = _decode_generated(g4_processor, output, inputs["input_ids"].shape[-1])
148
  n_tokens = len(g4_processor.tokenizer(text)["input_ids"])
149
- timing = f"**{seconds:.1f}s** · ~{n_tokens / seconds:.0f} tok/s (greedy)"
150
- return text, diff_highlight(ocr_text, text), timing
151
-
152
-
153
- # ---------------------------------------------------------------- UI data
154
-
155
- examples: list[dict] = []
156
- examples_path = Path("examples.json")
157
- if examples_path.exists():
158
- examples = json.loads(examples_path.read_text())
159
- example_choices = {e["label"]: e["ocr_input"] for e in examples}
160
-
161
- summary_md = "*Benchmark results pending — see the repo for methodology.*"
162
- if Path("results/summary.md").exists():
163
- summary_md = Path("results/summary.md").read_text()
164
-
165
- per_passage_rows = []
166
- if Path("results/per_passage_metrics.jsonl").exists():
167
- per_passage_rows = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  json.loads(line)
169
- for line in Path("results/per_passage_metrics.jsonl").read_text().splitlines()
170
  if line.strip()
171
  ]
 
172
 
173
 
174
- def load_example(label: str) -> str:
175
- return example_choices.get(label, "")
176
-
177
-
178
- def check_length(text: str):
179
- if len(text) > MAX_INPUT_CHARS:
180
- raise gr.Error(
181
- f"Input too long ({len(text)} chars). DiffusionGemma generates a single "
182
- f"256-token block, so inputs are capped at ~{MAX_INPUT_CHARS} characters."
183
- )
184
- return text
185
-
186
-
187
- def update_snapshot(snapshots: list[str], step: int) -> str:
188
- if not snapshots:
189
- return ""
190
- return snapshots[min(int(step), len(snapshots) - 1)]
191
-
192
-
193
- with gr.Blocks(title="DiffusionGemma vs Gemma-4: post-OCR correction") as demo:
194
- gr.Markdown(
195
- "# DiffusionGemma vs Gemma-4: post-OCR correction\n"
196
- "Compare Google's **experimental diffusion LLM** "
197
- "([google/diffusiongemma-26B-A4B-it](https://huggingface.co/google/diffusiongemma-26B-A4B-it), "
198
- "26B MoE / 3.8B active, released 2026-06-10) against an autoregressive baseline "
199
- "([google/gemma-4-E4B-it](https://huggingface.co/google/gemma-4-E4B-it)) on correcting "
200
- "19th-century English newspaper OCR. Both run in bf16. Highlights show what each model "
201
- "**changed relative to the OCR input** (yellow = changed, green = added, red ⌫ = deleted)."
202
- )
203
-
204
- with gr.Tab("Live comparison"):
205
- with gr.Row():
206
- example_dd = gr.Dropdown(
207
- label="Example passages (ICDAR2019 post-OCR, CC-BY-4.0)",
208
- choices=list(example_choices),
209
- value=None,
210
- scale=2,
211
- )
212
- ocr_box = gr.Textbox(
213
- label="Noisy OCR text",
214
- lines=6,
215
- value=next(iter(example_choices.values()), ""),
216
- max_length=MAX_INPUT_CHARS,
217
- )
218
- run_btn = gr.Button("Run both models", variant="primary")
219
- with gr.Row():
220
- with gr.Column():
221
- gr.Markdown("### DiffusionGemma 26B-A4B (diffusion)")
222
- dg_timing = gr.Markdown("")
223
- dg_diff = gr.HighlightedText(
224
- label="Output (diff vs input)", color_map=COLOR_MAP, combine_adjacent=True
225
- )
226
- with gr.Accordion("Raw output", open=False):
227
- dg_raw = gr.Textbox(lines=6, show_label=False)
228
- with gr.Column():
229
- gr.Markdown("### Gemma-4-E4B (autoregressive)")
230
- g4_timing = gr.Markdown("")
231
- g4_diff = gr.HighlightedText(
232
- label="Output (diff vs input)", color_map=COLOR_MAP, combine_adjacent=True
233
- )
234
- with gr.Accordion("Raw output", open=False):
235
- g4_raw = gr.Textbox(lines=6, show_label=False)
236
-
237
- snapshots_state = gr.State([])
238
- example_dd.change(load_example, example_dd, ocr_box)
239
- run_btn.click(check_length, ocr_box, ocr_box).success(
240
- run_diffusiongemma, ocr_box, [dg_raw, dg_diff, dg_timing, snapshots_state]
241
- ).then(run_gemma4, ocr_box, [g4_raw, g4_diff, g4_timing])
242
-
243
- with gr.Tab("Denoising progression"):
244
- gr.Markdown(
245
- "DiffusionGemma starts from a random 256-token canvas and iteratively denoises it. "
246
- "Run a comparison first, then scrub through the intermediate canvas states."
247
- )
248
- step_slider = gr.Slider(0, 47, step=1, value=0, label="Denoising step")
249
- snapshot_box = gr.Textbox(lines=10, label="Canvas at step", interactive=False)
250
- step_slider.change(update_snapshot, [snapshots_state, step_slider], snapshot_box)
251
- snapshots_state.change(
252
- lambda s: (gr.Slider(0, max(len(s) - 1, 1), step=1, value=0), update_snapshot(s, 0)),
253
- snapshots_state,
254
- [step_slider, snapshot_box],
255
- )
256
-
257
- with gr.Tab("Benchmark results"):
258
- gr.Markdown(summary_md)
259
- if per_passage_rows:
260
- gr.Markdown("### Per-passage metrics (BLN600, n=75)")
261
- gr.DataFrame(
262
- value=[[row.get(k) for k in per_passage_rows[0]] for row in per_passage_rows],
263
- headers=list(per_passage_rows[0]),
264
- interactive=False,
265
- )
266
- gr.Markdown(
267
- "Benchmark texts come from [BLN600](https://doi.org/10.15131/shef.data.25439023) "
268
- "(CC-BY-NC-4.0), so passage texts are not redistributed here — only metrics. "
269
- "See the Space README for methodology and limitations."
270
- )
271
-
272
- demo.launch()
 
1
  """DiffusionGemma vs Gemma-4 on post-OCR correction — ZeroGPU comparison Space.
2
 
3
+ gradio.Server pattern: custom HTML frontend (index.html) + Gradio queuing
4
+ backend. Side-by-side correction of 19th-century English newspaper OCR by an
5
  experimental block-diffusion LLM (google/diffusiongemma-26B-A4B-it) and an
6
  autoregressive baseline (google/gemma-4-E4B-it).
7
  """
8
 
9
+ import difflib
10
  import json
11
  import os
12
+ import re
13
  import time
14
  from pathlib import Path
15
 
 
16
  import spaces
17
  import torch
18
+ from fastapi.responses import HTMLResponse, JSONResponse
19
+ from gradio import Server
20
  from transformers import (
21
  AutoModelForMultimodalLM,
22
  AutoProcessor,
 
24
  TextDiffusionStreamer,
25
  )
26
 
27
+ HERE = Path(__file__).resolve().parent
28
 
29
  # Keep in sync with benchmark.py PROMPT_TEMPLATE — the benchmark numbers in the
30
  # results tab were produced with exactly this prompt.
 
39
  {ocr}"""
40
 
41
  MAX_INPUT_CHARS = 1200 # roughly the 220-token benchmark cap
42
+ STOP_MARKERS = ("<turn|>", "<eos>", "<end_of_turn>", "<pad>")
43
 
44
 
45
  def model_path(volume_path: str, model_id: str) -> str:
46
+ """Prefer a mounted hf:// volume unless USE_VOLUMES=0 (FUSE reads can be
47
+ slower for safetensors loading than a fresh download to local disk)."""
 
 
 
 
48
  if os.environ.get("USE_VOLUMES", "1") == "0":
49
  return model_id
50
  return volume_path if os.path.isdir(volume_path) else model_id
 
65
  print(f"Gemma-4 loaded in {time.perf_counter() - t0:.0f}s")
66
 
67
 
68
+ # ---------------------------------------------------------------- text utils
69
 
70
 
71
  def extract_answer(raw: str) -> str:
 
80
  return raw.strip()
81
 
82
 
83
+ def diff_segments(input_text: str, output_text: str) -> list[dict]:
84
+ """Word+whitespace diff of output vs input -> [{text, op}] segments,
85
+ op in {same, changed, added, removed}. Rendered by the frontend."""
86
+ tokens_in = re.findall(r"\S+|\s+", input_text)
87
+ tokens_out = re.findall(r"\S+|\s+", output_text)
88
+ sm = difflib.SequenceMatcher(None, tokens_in, tokens_out, autojunk=False)
89
+ segments = []
90
+ for op, i1, i2, j1, j2 in sm.get_opcodes():
91
+ if op == "equal":
92
+ segments.append({"text": "".join(tokens_out[j1:j2]), "op": "same"})
93
+ elif op == "replace":
94
+ segments.append({"text": "".join(tokens_out[j1:j2]), "op": "changed"})
95
+ elif op == "insert":
96
+ segments.append({"text": "".join(tokens_out[j1:j2]), "op": "added"})
97
+ elif op == "delete":
98
+ segments.append({"text": "".join(tokens_in[i1:i2]), "op": "removed"})
99
+ return segments
100
+
101
+
102
  class SnapshotStreamer(TextDiffusionStreamer):
103
  """Captures the decoded canvas at each denoising step; suppresses the
104
  parent's ANSI console printing."""
 
138
  return extract_answer(raw)
139
 
140
 
141
+ def _validate(ocr_text: str) -> str | None:
142
+ if not ocr_text or not ocr_text.strip():
143
+ return "Empty input."
144
+ if len(ocr_text) > MAX_INPUT_CHARS:
145
+ return (
146
+ f"Input too long ({len(ocr_text)} chars). DiffusionGemma generates a single "
147
+ f"256-token block, so inputs are capped at ~{MAX_INPUT_CHARS} characters."
148
+ )
149
+ return None
150
+
151
+
152
+ # ---------------------------------------------------------------- API
153
+
154
+ app = Server()
155
+
156
+
157
+ @app.api(name="run_diffusiongemma")
158
+ @spaces.GPU(duration=90, size="xlarge")
159
+ def run_diffusiongemma(ocr_text: str, canvas_init: bool = False) -> dict:
160
+ """Correct OCR text with DiffusionGemma. canvas_init=True seeds the first
161
+ denoising canvas with the OCR text itself (experimental — under-corrects;
162
+ see the results tab) instead of random noise."""
163
+ if err := _validate(ocr_text):
164
+ return {"error": err}
165
  inputs = _prepare_inputs(dg_processor, dg_model, ocr_text)
166
  streamer = SnapshotStreamer(dg_processor.tokenizer)
167
+ gen_kwargs: dict = {"max_new_tokens": 256, "streamer": streamer}
168
+ if canvas_init:
169
+ canvas_length = getattr(dg_model.generation_config, "canvas_length", None) or 256
170
+ ids = dg_processor.tokenizer(ocr_text, add_special_tokens=False)["input_ids"]
171
+ ids = ids[:canvas_length]
172
+ vocab = dg_model.config.text_config.vocab_size
173
+ pad = torch.randint(vocab, (canvas_length - len(ids),))
174
+ canvas = torch.cat([torch.tensor(ids, dtype=torch.long), pad])
175
+ gen_kwargs["decoder_input_ids"] = canvas.unsqueeze(0).to(dg_model.device)
176
+
177
  t0 = time.perf_counter()
178
+ output = dg_model.generate(**inputs, **gen_kwargs)
179
  torch.cuda.synchronize()
180
  seconds = time.perf_counter() - t0
181
  text = _decode_generated(dg_processor, output, inputs["input_ids"].shape[-1])
182
  n_tokens = len(dg_processor.tokenizer(text)["input_ids"])
183
+ return {
184
+ "text": text,
185
+ "diff": diff_segments(ocr_text.strip(), text),
186
+ "seconds": round(seconds, 2),
187
+ "tokens_per_second": round(n_tokens / seconds, 1),
188
+ "denoising_steps": len(streamer.snapshots),
189
+ "snapshots": [extract_answer(s) for s in streamer.snapshots],
190
+ "canvas_init": canvas_init,
191
+ "error": None,
192
+ }
193
+
194
+
195
+ @app.api(name="run_gemma4")
196
  @spaces.GPU(duration=60, size="xlarge")
197
+ def run_gemma4(ocr_text: str) -> dict:
198
+ """Correct OCR text with the autoregressive Gemma-4-E4B baseline (greedy)."""
199
+ if err := _validate(ocr_text):
200
+ return {"error": err}
201
  inputs = _prepare_inputs(g4_processor, g4_model, ocr_text)
202
  t0 = time.perf_counter()
203
  output = g4_model.generate(**inputs, max_new_tokens=256, do_sample=False)
 
205
  seconds = time.perf_counter() - t0
206
  text = _decode_generated(g4_processor, output, inputs["input_ids"].shape[-1])
207
  n_tokens = len(g4_processor.tokenizer(text)["input_ids"])
208
+ return {
209
+ "text": text,
210
+ "diff": diff_segments(ocr_text.strip(), text),
211
+ "seconds": round(seconds, 2),
212
+ "tokens_per_second": round(n_tokens / seconds, 1),
213
+ "error": None,
214
+ }
215
+
216
+
217
+ # ---------------------------------------------------------------- static data
218
+
219
+
220
+ @app.get("/", response_class=HTMLResponse)
221
+ async def homepage():
222
+ return (HERE / "index.html").read_text(encoding="utf-8")
223
+
224
+
225
+ @app.get("/data/examples")
226
+ async def get_examples():
227
+ examples = json.loads((HERE / "examples.json").read_text())
228
+ cached = {}
229
+ cached_path = HERE / "examples_cached.json"
230
+ if cached_path.exists():
231
+ for e in json.loads(cached_path.read_text()):
232
+ for m, out in e["output"].items():
233
+ out.pop("_raw", None)
234
+ cached[e["id"]] = e["output"]
235
+ for e in examples:
236
+ e["cached"] = cached.get(e["id"])
237
+ if e["cached"]:
238
+ for m, out in e["cached"].items():
239
+ out["diff"] = diff_segments(e["ocr_input"].strip(), out["text"])
240
+ return JSONResponse(examples)
241
+
242
+
243
+ @app.get("/data/results")
244
+ async def get_results():
245
+ summary = (HERE / "results" / "summary.md").read_text()
246
+ rows = [
247
  json.loads(line)
248
+ for line in (HERE / "results" / "per_passage_metrics.jsonl").read_text().splitlines()
249
  if line.strip()
250
  ]
251
+ return JSONResponse({"summary_md": summary, "per_passage": rows})
252
 
253
 
254
+ if __name__ == "__main__":
255
+ app.launch(show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
diff_utils.py DELETED
@@ -1,39 +0,0 @@
1
- """Token-level diff between OCR input and model output for gr.HighlightedText."""
2
-
3
- import difflib
4
- import re
5
-
6
- COLOR_MAP = {"changed": "yellow", "added": "green", "removed": "red"}
7
-
8
-
9
- def diff_highlight(input_text: str, output_text: str) -> list[tuple[str, str | None]]:
10
- """Segments of `output_text` labelled by how they differ from `input_text`.
11
-
12
- Word + whitespace tokenization (lossless), so highlights align with what
13
- the reader sees. Deleted input text is marked with a small marker segment.
14
- """
15
- tokens_in = re.findall(r"\S+|\s+", input_text)
16
- tokens_out = re.findall(r"\S+|\s+", output_text)
17
- sm = difflib.SequenceMatcher(None, tokens_in, tokens_out, autojunk=False)
18
- segments: list[tuple[str, str | None]] = []
19
- for op, i1, i2, j1, j2 in sm.get_opcodes():
20
- if op == "equal":
21
- segments.append(("".join(tokens_out[j1:j2]), None))
22
- elif op == "replace":
23
- segments.append(("".join(tokens_out[j1:j2]), "changed"))
24
- elif op == "insert":
25
- segments.append(("".join(tokens_out[j1:j2]), "added"))
26
- elif op == "delete":
27
- segments.append((" ⌫ ", "removed"))
28
- return segments
29
-
30
-
31
- if __name__ == "__main__":
32
- segs = diff_highlight("the qvick brown fox jumps", "the quick brown fox")
33
- print(segs)
34
- assert ("the ", None) in segs or segs[0][1] is None
35
- assert any(label == "changed" for _, label in segs)
36
- assert any(label == "removed" for _, label in segs)
37
- out = "".join(s for s, label in segs if label != "removed")
38
- assert out == "the quick brown fox"
39
- print("diff_utils ok")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
examples.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/107.txt",
4
+ "label": "para Htio HOrastua.foz Da bateffe of the\u2026",
5
+ "ocr_input": "para Htio HOrastua.foz Da bateffe of the npgf> ji to l)is djamtnc.tDitf) manp toufyte Ipgbt Sa (o tbe comtc.w Defer noeD ootbtie ztU bis maptic.stonopngc enuprovtnc iOt these hnpgbceo.baupng full grete ttoimorr 2lno of manboDc.be putte bett? fpist aforniDrr ji)cu; eommaunDpng.lphe a gentpll bpng Co leue ba strpte.anD"
6
+ },
7
+ {
8
+ "id": "example/icdar2019/evaluation_4M_without_Finnish/EN/EN1/2.txt",
9
+ "label": "26 The Academy of Sciences. into two\u2026",
10
+ "ocr_input": "26 The Academy of Sciences. into two unequal parts, as the two Tro\u2022 picks, and the two Polar Circles: Everj circle is divided into sixty parts, which they call first minutes; and each Minute likewise into sixty parts, which we call second Minutes. The Natural Sphere, or the Celestial Globe, befides the foregoing Circles, of fers to our view divers Constellations: The Antients reckoned eight and fort}, comprehending in this number all tk Stars to be seen in Greece, and all tk known Parts of the Wor Id ; 12 of those Constellations are contained in the %$\u2022 diack,i 1 are to be seen toward the Nortb ) and 15 towards the South; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theorical, is that part which by the help of some Engines and Orbs, ojfers to our view those C\u0153le/ltal Motions which are not so obvious to ever] common understanding."
11
+ },
12
+ {
13
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/52.txt",
14
+ "label": "3 Remarty on Des-CartesV Part. I. to\u2026",
15
+ "ocr_input": "3 Remarty on Des-CartesV Part. I. to conceive, That whatsoever thinks does not Exist 4t the Iime of its Thinking: Wherefore he con- eludes, That he is, and that he Thinks, is of aU the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an In vention as this, in order to Improve Know ledge , might not rather proceed from some Person in Bedlam, than from Des-Cartes ? Of whom if Interrogated, how he comes to sup pose, That a Man may point-blank, on the ac count of his Fiction, deny the Being of a God, together with all the visible Particulars already mention\u2019d ? As to the Existence of the Deity, he that considers the necessity of its Concession, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of"
16
+ },
17
+ {
18
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/39.txt",
19
+ "label": "43 2 AN ENGLISH ANTHOLOGY. [BK. IV.\u2026",
20
+ "ocr_input": "43 2 AN ENGLISH ANTHOLOGY. [BK. IV. CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried; Not a soldier discharged his farewell shot O\u2019er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam\u2019s misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. f Few and short were"
21
+ },
22
+ {
23
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/28.txt",
24
+ "label": "144 AN ENGLISH ANTHOLOGY. [BK A hollow\u2026",
25
+ "ocr_input": "144 AN ENGLISH ANTHOLOGY. [BK A hollow wind did seem to answer, No; Go seek elsewhere. I did ; and going did a rainbow note; Surely, thought I, This is the lace of Peace\u2019s coat; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man ; Whom when for Peace I did demand, he thus began : There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived ; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth ; For they that taste it do rehearse That"
26
+ },
27
+ {
28
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/122.txt",
29
+ "label": "t Jeademia Scienthrum. I $ mltiplicare feu\u2026",
30
+ "ocr_input": "t Jeademia Scienthrum. I $ mltiplicare feu in aliumduc\u20acreut4 2 eft toties fiimere multiplicandum quoties sumi potest in multiplicatore unitas: Quare si 4 ducas in 2,fumma ituraest8. Unum numcrum divi- lere per aliura ut 8 per 2 nihil aliud [A quam invenire quoties 2 conti- leantur in 8. Numerus planus a duo- >us numeris in fe invicem ductis pro- lucitur v i2 igitur est-numerus planus juia producitur a numero 6in aducto. Solidus numerus a tribus numeris ultiplicatis oritur : Ejusmodi est 24, ;x tribus enim hisce numeris multipli ers emergit 2, 3, 4 ; si enim 2 duco n 3 habeo 6, & siduxero6 in 4,"
31
+ }
32
+ ]
examples_cached.json ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/107.txt",
4
+ "ocr_input": "para Htio HOrastua.foz Da bateffe of the npgf> ji to l)is djamtnc.tDitf) manp toufyte Ipgbt Sa (o tbe comtc.w Defer noeD ootbtie ztU bis maptic.stonopngc enuprovtnc iOt these hnpgbceo.baupng full grete ttoimorr 2lno of manboDc.be putte bett? fpist aforniDrr ji)cu; eommaunDpng.lphe a gentpll bpng Co leue ba strpte.anD",
5
+ "gold": "Pars And Adrastus.for derkenesse of the nygh Fro his chambre.with many torches lyght In to the courte.is descended downe All his mayne.stondynge enuyprowne Of these knyghtes.hauyng full grete wounder And of manhode.he putte hem fyrst asounder Hem commaundyng.lyke a gentyll kyng To leue her stryte.and",
6
+ "output": {
7
+ "diffusiongemma": {
8
+ "text": "per Horatio Horace. The battle of the night is to distant. Many fought for so the contest de feared not but mastering upon import these hopes. being full great tomorrow no of man be put best first afford for commanding. the gentle being able to spate and",
9
+ "seconds": 2.364,
10
+ "tokens_generated": 53,
11
+ "denoising_steps": 13,
12
+ "tokens_per_forward": 4,
13
+ "thought_chars": 0
14
+ },
15
+ "diffusiongemma_canvas": {
16
+ "text": "para Htio HOrastua.foz Da bateffe of the npgf> ji to l)is djamtnc.tDitf) manp toufyte Ipgbt Sa (o tbe comtc.w Defer noeD ootbtie ztU bis maptic.stonopngc enuprovtnc iOt these hnpgbceo.baupng full grete ttoimorr 2lno of manboDc.be putte bett? fpist aforniDrr ji)cu; eommaunDpng.lphe a gentpll bpng Co leue ba strpte.anD\n\n***Note:*** *The provided OCR text is severely corrupted, containing many unintelligible character strings (e.g., \"npgf>\", \"djamtnc.tDitf)\", \"enuprovtnc\"). Without the source image or more reliable reference, it is impossible to accurately reconstruct the intended 19th-century words without guessing or rephrasing.*",
17
+ "seconds": 2.705,
18
+ "tokens_generated": 219,
19
+ "denoising_steps": 14,
20
+ "tokens_per_forward": 15,
21
+ "thought_chars": 0
22
+ },
23
+ "gemma4": {
24
+ "text": "para Htio Horastua.foz Da bateffe of the npgf> ji to l)is djamtnc.tDitf) manp toufyte Ipgbt Sa (o tbe comtc.w Defer noeD ootbtie ztU bis maptic.stonopngc enuprovtnc iOt these hnpgbceo.baupng full grete ttoimorr 2lno of manboDc.be putte bett? fpist aforniDrr ji)cu; eommaunDpng.lphe a gentpll bpng Co leue ba strpte.anD",
25
+ "seconds": 10.214,
26
+ "tokens_generated": 140,
27
+ "denoising_steps": null,
28
+ "tokens_per_forward": null,
29
+ "thought_chars": 0
30
+ }
31
+ }
32
+ },
33
+ {
34
+ "id": "example/icdar2019/evaluation_4M_without_Finnish/EN/EN1/2.txt",
35
+ "ocr_input": "26 The Academy of Sciences. into two unequal parts, as the two Tro\u2022 picks, and the two Polar Circles: Everj circle is divided into sixty parts, which they call first minutes; and each Minute likewise into sixty parts, which we call second Minutes. The Natural Sphere, or the Celestial Globe, befides the foregoing Circles, of fers to our view divers Constellations: The Antients reckoned eight and fort}, comprehending in this number all tk Stars to be seen in Greece, and all tk known Parts of the Wor Id ; 12 of those Constellations are contained in the %$\u2022 diack,i 1 are to be seen toward the Nortb ) and 15 towards the South; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theorical, is that part which by the help of some Engines and Orbs, ojfers to our view those C\u0153le/ltal Motions which are not so obvious to ever] common understanding.",
36
+ "gold": "into two unequal parts, as the two Tro- picks, and the two Polar Circles : Every circle is divided into fixty parts, which they call first minutes ; and each Minute likewise into ffxty parts, which we call second Minutes. This 26 The Academy of Sciences. The Natural Sphere, or the C\u0153lestial Globe, beffdes the foregoing Circles, of- fers to our view divers Constellations : The Antients reckon\u2019d eight and forty, comprehending in this number all the Stars to be feen in Greece, and all the known Parts of the World ; 12 of those Constellations are contain\u2019d in the Zo- diack,21 are to be seen toward the North, and 15 towards the South ; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theorical, is that part which by the help of some Engines and Orbs, offers to our view those C\u0153lestial Motions which are not so obvious to every common understanding.",
37
+ "output": {
38
+ "diffusiongemma": {
39
+ "text": "26 The Academy of Sciences. into two unequal parts, as the two Tropics, and the two Polar Circles: Every circle is divided into sixty parts, which they call first minutes; and each Minute likewise into sixty parts, which we call second Minutes. The Natural Sphere, or the Celestial Globe, beside the foregoing Circles, offers to our view divers Constellations: The Ancients reckoned eight and forty, comprehending in this number all the Stars to be seen in Greece, and all the known Parts of the World; 12 of those Constellations are contained in the Zodiac, 6 are to be seen toward the North and 15 towards the South; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theoretical, is that part which by the help of some Engines and Orbs, offers to our view those Celestial Motions which are not so obvious to every common understanding.",
40
+ "seconds": 1.927,
41
+ "tokens_generated": 191,
42
+ "denoising_steps": 10,
43
+ "tokens_per_forward": 19,
44
+ "thought_chars": 0
45
+ },
46
+ "diffusiongemma_canvas": {
47
+ "text": "26 The Academy of Sciences. into two unequal parts, as the two Tro\u2022 picks, and the two Polar Circles: Everj circle is divided into sixty parts, which they call first minutes; and each Minute likewise into sixty parts, which we call second Minutes. The Natural Sphere, or the Celestial Globe, befides the foregoing Circles, of fers to our view divers Constellations: The Antients reckoned eight and fort}, comprehending in this number all tk Stars to be seen in Greece, and all tk known Parts of the Wor Id ; 12 of those Constellations are contained in the %$\u2022 diack,i 1 are to be seen toward the Nortb ) and 15 towards the South; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theorical, is that part which by the help of some Engines and Orbs, ojfers to our view those C\u0153le/ltal Motions which are not so obvious to ever] common understanding.",
48
+ "seconds": 1.058,
49
+ "tokens_generated": 211,
50
+ "denoising_steps": 5,
51
+ "tokens_per_forward": 42,
52
+ "thought_chars": 0
53
+ },
54
+ "gemma4": {
55
+ "text": "26 The Academy of Sciences. into two unequal parts, as the two Tropics, and the two Polar Circles: Every circle is divided into sixty parts, which they call first minutes; and each Minute likewise into sixty parts, which we call second Minutes. The Natural Sphere, or the Celestial Globe, besides the foregoing Circles, offers to our view divers Constellations: The Ancients reckoned eight and forty, comprehending in this number all the Stars to be seen in Greece, and all the known Parts of the World; 12 of those Constellations are contained in the Zodiac, 1 are to be seen toward the North and 15 towards the South; but of late there are twelve other Constellations discovered towards the South. The Systematical Astronomy, which others call the Theoretical, is that part which by the help of some Engines and Orbs, offers to our view those Celestial Motions which are not so obvious to every common understanding.",
56
+ "seconds": 14.017,
57
+ "tokens_generated": 192,
58
+ "denoising_steps": null,
59
+ "tokens_per_forward": null,
60
+ "thought_chars": 0
61
+ }
62
+ }
63
+ },
64
+ {
65
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/52.txt",
66
+ "ocr_input": "3 Remarty on Des-CartesV Part. I. to conceive, That whatsoever thinks does not Exist 4t the Iime of its Thinking: Wherefore he con- eludes, That he is, and that he Thinks, is of aU the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an In vention as this, in order to Improve Know ledge , might not rather proceed from some Person in Bedlam, than from Des-Cartes ? Of whom if Interrogated, how he comes to sup pose, That a Man may point-blank, on the ac count of his Fiction, deny the Being of a God, together with all the visible Particulars already mention\u2019d ? As to the Existence of the Deity, he that considers the necessity of its Concession, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of",
67
+ "gold": "8 Remarks on Des-Cartes's Part. I. to conceive, That whatsoever thinks does not Exist at the Time of its Thinking : Wherefore he con\u2011 cludes, That he is, and that he Thinks, is of all the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an In\u2011 vention as this, in order to Improve Know\u2011 ledge , might not rather proceed from some Person in Bedlam , than from Des\u2011Cartes? Of whom if Interrogated, how he comes to sup\u2011 pose, That a Man may point\u2011blank, on the ac\u2011 count of his Fiction, deny the Being of a God, together with all the viffble Particulars already mention\u2019d ? As to the Existence of the Deity, he that conffders the nece\ueba7ty of its Conce\ueba7on, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of",
68
+ "output": {
69
+ "diffusiongemma": {
70
+ "text": "3 Remarks on Descartes. Part. I. to conceive, That whatsoever thinks does not Exist at the Time of its Thinking: Wherefore he concludes,, That he is, and that he Thinks, is of all the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an Invention as this, in order to Improve Knowledge, might not rather proceed from some Person in Bedlam, than from Descartes? Of whom if Interrogated, how he comes to suppose, That a Man may point-blank, on the account of his Fiction, deny the Being of a God, together with all the visible Particulars already mention\u2019d? As to the Existence of the Deity, he that considers the necessity of its Concession, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of",
71
+ "seconds": 3.626,
72
+ "tokens_generated": 188,
73
+ "denoising_steps": 20,
74
+ "tokens_per_forward": 9,
75
+ "thought_chars": 0
76
+ },
77
+ "diffusiongemma_canvas": {
78
+ "text": "3 Remarty on Des-CartesV Part. I. to conceive, That whatsoever thinks does not Exist 4t the Iime of its Thinking: Wherefore he con- eludes, That he is, and that he Thinks, is of aU the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an In vention as this, in order to Improve Know ledge , might not rather proceed from some Person in Bedlam, than from Des-Cartes ? Of whom if Interrogated, how he comes to sup pose, That a Man may point-blank, on the ac count of his Fiction, deny the Being of a God, together with all the visible Particulars already mention\u2019d ? As to the Existence of the Deity, he that considers the necessity of its Concession, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of",
79
+ "seconds": 0.701,
80
+ "tokens_generated": 206,
81
+ "denoising_steps": 3,
82
+ "tokens_per_forward": 69,
83
+ "thought_chars": 0
84
+ },
85
+ "gemma4": {
86
+ "text": "3 Remarty on Des-CartesV Part. I. to conceive, That whatsoever thinks does not Exist at the Time of its Thinking: Wherefore he con- eludes, That he is, and that he Thinks, is of aU the most certain Truth that can be acquired by Philosophical Order. Who would not judge, That so wild an In vention as this, in order to Improve Know ledge , might not rather proceed from some Person in Bedlam, than from Des-Cartes ? Of whom if Interrogated, how he comes to sup pose, That a Man may point-blank, on the ac count of his Fiction, deny the Being of a God, together with all the visible Particulars already mention\u2019d ? As to the Existence of the Deity, he that considers the necessity of its Concession, as he beholds the wonderful Conduct of the Universe, with so many miraculous Objects as are contain\u2019d therein, will as soon give credit to any Forgery of",
87
+ "seconds": 14.885,
88
+ "tokens_generated": 204,
89
+ "denoising_steps": null,
90
+ "tokens_per_forward": null,
91
+ "thought_chars": 0
92
+ }
93
+ }
94
+ },
95
+ {
96
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/39.txt",
97
+ "ocr_input": "43 2 AN ENGLISH ANTHOLOGY. [BK. IV. CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried; Not a soldier discharged his farewell shot O\u2019er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam\u2019s misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. f Few and short were",
98
+ "gold": "432 * First published anonymously in the Newry Telegraph, 1817. \u2020 ' A question arose as to which was the most perfect ode that had been produced. Shelley contended for Coleridge's On Switzer- land; others named some of Moore's Melodies, and Campbell's Hohenlinden. Lord Byron said, \" I will show you an ode I consider little inferior to the best which the present prolific age has produced \". CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried ; Not a soldier discharged his farewell shot O'er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam's misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. \u2020 Few and short were",
99
+ "output": {
100
+ "diffusiongemma": {
101
+ "text": "432 AN ENGLISH ANTHOLOGY. [BK. IV. CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried; Not a soldier discharged his farewell shot O\u2019er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam\u2019s misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. Few and short were",
102
+ "seconds": 1.367,
103
+ "tokens_generated": 139,
104
+ "denoising_steps": 7,
105
+ "tokens_per_forward": 20,
106
+ "thought_chars": 0
107
+ },
108
+ "diffusiongemma_canvas": {
109
+ "text": "43 2 AN ENGLISH ANTHOLOGY. [BK. IV. CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried; Not a soldier discharged his farewell shot O\u2019er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam\u2019s misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. Few and short were",
110
+ "seconds": 0.701,
111
+ "tokens_generated": 140,
112
+ "denoising_steps": 3,
113
+ "tokens_per_forward": 47,
114
+ "thought_chars": 0
115
+ },
116
+ "gemma4": {
117
+ "text": "43 2 AN ENGLISH ANTHOLOGY. [BK. IV. CCCXXXIV. THE BURIAL OF SIR JOHN MOORE.* Not a drum was heard, not a funeral note, As his corpse to the rampart we hurried; Not a soldier discharged his farewell shot O\u2019er the grave where our hero we buried. We buried him darkly at dead of night, The sods with our bayonets turning; By the struggling moonbeam\u2019s misty light, And the lantern dimly burning. No useless coffin enclosed his breast, Not in sheet nor in shroud we wound him; But he lay like a warrior taking his rest, With his martial cloak around him. f Few and short were",
118
+ "seconds": 10.352,
119
+ "tokens_generated": 142,
120
+ "denoising_steps": null,
121
+ "tokens_per_forward": null,
122
+ "thought_chars": 0
123
+ }
124
+ }
125
+ },
126
+ {
127
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/28.txt",
128
+ "ocr_input": "144 AN ENGLISH ANTHOLOGY. [BK A hollow wind did seem to answer, No; Go seek elsewhere. I did ; and going did a rainbow note; Surely, thought I, This is the lace of Peace\u2019s coat; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man ; Whom when for Peace I did demand, he thus began : There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived ; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth ; For they that taste it do rehearse That",
129
+ "gold": "144 A hollow wind did seem to answer, No ; Go seek elsewhere. I did ; and going did a rainbow note ; Surely, thought I, This is the lace of Peace's coat ; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial ; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man ; Whom when for Peace I did demand, he thus began : There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived ; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat ; Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth ; For they that taste it do rehearse That",
130
+ "output": {
131
+ "diffusiongemma": {
132
+ "text": "144 AN ENGLISH ANTHOLOGY. [A hollow wind did seem to answer, No; Go seek elsewhere. I did; and going did a rainbow note; Surely, thought I, This is the lace of Peace\u2019s coat; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man; Whom when for Peace I did demand, he thus began: There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth; For they that taste it do rehearse That",
133
+ "seconds": 0.844,
134
+ "tokens_generated": 220,
135
+ "denoising_steps": 4,
136
+ "tokens_per_forward": 55,
137
+ "thought_chars": 0
138
+ },
139
+ "diffusiongemma_canvas": {
140
+ "text": "144 AN ENGLISH ANTHOLOGY. [BK A hollow wind did seem to answer, No; Go seek elsewhere. I did ; and going did a rainbow note; Surely, thought I, This is the lace of Peace\u2019s coat; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man ; Whom when for Peace I did demand, he thus began : There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived ; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth ; For they that taste it do rehearse That",
141
+ "seconds": 0.528,
142
+ "tokens_generated": 221,
143
+ "denoising_steps": 2,
144
+ "tokens_per_forward": 110,
145
+ "thought_chars": 0
146
+ },
147
+ "gemma4": {
148
+ "text": "144 AN ENGLISH ANTHOLOGY. [A hollow wind did seem to answer, No; Go seek elsewhere. I did; and going did a rainbow note; Surely, thought I, This is the lace of Peace\u2019s coat; I will search out the matter. But while I looked the clouds immediately Did break and scatter. Then went I to a garden and did spy A gallant flower, The crown imperial; sure, said I, Peace at the root must dwell. But when I digged, I saw a worm devour What showed so well. At length I met a reverend good old man; Whom when for Peace I did demand, he thus began: There was a Prince of old At Salem dwelt, who lived with good increase Of flock and fold. He sweetly lived; yet sweetness did not save His life from foes. But after death out of his grave There sprang twelve stalks of wheat Which many wondering at, got some of those To plant and set. It prospered strangely, and did soon disperse Through all the earth; For they that taste it do rehearse That",
149
+ "seconds": 16.111,
150
+ "tokens_generated": 220,
151
+ "denoising_steps": null,
152
+ "tokens_per_forward": null,
153
+ "thought_chars": 0
154
+ }
155
+ }
156
+ },
157
+ {
158
+ "id": "example/icdar2019/full_22M_without_Finnish/EN/EN1/122.txt",
159
+ "ocr_input": "t Jeademia Scienthrum. I $ mltiplicare feu in aliumduc\u20acreut4 2 eft toties fiimere multiplicandum quoties sumi potest in multiplicatore unitas: Quare si 4 ducas in 2,fumma ituraest8. Unum numcrum divi- lere per aliura ut 8 per 2 nihil aliud [A quam invenire quoties 2 conti- leantur in 8. Numerus planus a duo- >us numeris in fe invicem ductis pro- lucitur v i2 igitur est-numerus planus juia producitur a numero 6in aducto. Solidus numerus a tribus numeris ultiplicatis oritur : Ejusmodi est 24, ;x tribus enim hisce numeris multipli ers emergit 2, 3, 4 ; si enim 2 duco n 3 habeo 6, & siduxero6 in 4,",
160
+ "gold": "mltiplicare seu in alium ducere ut 4 2 est toties sumere multiplicandum quoties sumi potest in multiplicatore unitas: Quare ff 4 ducas in 2,summa utura est 8. Unum numerum divi- dere per alium ut 8 per 2 nihil aliud est quam invenire quoties 2 conti- neantur in 8. Numerus planus a duo- us numeris in se invicem ductis pro- ducitur, 12 igitur est numerus planus quia producitur a numero 6 in 2 ducto. Solidus numerus a tribus numeris multiplicatis oritur : Ejusmodi est 24, ex tribus enim hisce numeris multipli- catis emergit 2, 3, 4 ; ff enim 2 duco n 3 habeo 6, & ff duxero 6 in 4,",
161
+ "output": {
162
+ "diffusiongemma": {
163
+ "text": "In Academia Scientorum. Multiplicare feu in aliumducere est toties fines multiplicandum quoties sumi potest in multiplicatore unitas: Quare si duas in 2, summa itura est. Unum numerum dividere per alia ut 8 per 2 nihil aliud quam invenire quoties 2 continentur in 8. Numerus planus a duois numeris in se invicem ductis prolucitur; igitur est numerus planus quia producitur a numero 6 inducto. Solidus numerus a tribus numeris multiplicatis oritur: Ejusmodi est 24, quae ex tribus enim hisce numeris multiplicis emergit 2, 3, 4; si enim 2 duco 3 habeo 6, & si duco in 4,",
164
+ "seconds": 2.015,
165
+ "tokens_generated": 181,
166
+ "denoising_steps": 11,
167
+ "tokens_per_forward": 16,
168
+ "thought_chars": 0
169
+ },
170
+ "diffusiongemma_canvas": {
171
+ "text": "t Jeademia Scienthrum. I $ mltiplicare feu in aliumduc\u20acreut4 2 eft toties fiimere multiplicandum quoties sumi potest in multiplicatore unitas: Quare si 4 ducas in 2,fumma ituraest8. Unum numcrum divi- lere per aliura ut 8 per 2 nihil aliud [A quam invenire quoties 2 conti- leantur in 8. Numerus planus a duo- >us numeris in fe invicem ductis pro- lucitur v i2 igitur est-numerus planus juia producitur a numero 6in aducto. Solidus numerus a tribus numeris ultiplicatis oritur : Ejusmodi est 24, ;x tribus enim hisce numeris multipli ers emergit 2, 3, 4 ; si enim 2 duco n 3 habeo 6, & siduxero6 in 4,",
172
+ "seconds": 0.516,
173
+ "tokens_generated": 221,
174
+ "denoising_steps": 2,
175
+ "tokens_per_forward": 110,
176
+ "thought_chars": 0
177
+ },
178
+ "gemma4": {
179
+ "text": "De Academia Scientiarum. I multiplicare feu in alium ducereut a est toties simere multiplicandum quoties sumi potest in multiplicatore unitas: Quare si 4 ducas in 2, summa itura est. Unum numcrum dividere per aliura ut 8 per 2 nihil aliud [a quam invenire quoties 2 continentur in 8. Numerus planus a duo-us numeris in se invicem ductis pro-lucitur vi2 igitur est-numerus planus quia producitur a numero 6 in aducto. Solidus numerus a tribus numeris ultiplicatis oritur : Ejusmodi est 24, ;x tribus enim hisce numeris multipli er emergit 2, 3, 4 ; si enim 2 duco n 3 habeo 6, & siduxero6 in 4,",
180
+ "seconds": 14.6,
181
+ "tokens_generated": 200,
182
+ "denoising_steps": null,
183
+ "tokens_per_forward": null,
184
+ "thought_chars": 0
185
+ }
186
+ }
187
+ }
188
+ ]
index.html ADDED
@@ -0,0 +1,411 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8" />
5
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
6
+ <title>The Post-OCR Gazette — DiffusionGemma vs Gemma-4</title>
7
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
8
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
9
+ <link href="https://fonts.googleapis.com/css2?family=UnifrakturCook:wght@700&family=Old+Standard+TT:ital,wght@0,400;0,700;1,400&family=Special+Elite&display=swap" rel="stylesheet" />
10
+ <style>
11
+ :root {
12
+ --paper: #f4ecd8;
13
+ --paper-deep: #ece1c6;
14
+ --ink: #1d1a14;
15
+ --ink-soft: #4a4339;
16
+ --rule: #2a251c;
17
+ --oxblood: #7a1f1f;
18
+ --ochre-bg: #e8d27a66;
19
+ --ochre-edge: #a8842a;
20
+ --green-ink: #2e5d34;
21
+ --red-ink: #9c2b22;
22
+ --col-gap: 2.5rem;
23
+ }
24
+ * { box-sizing: border-box; }
25
+ html { background: #d9cdaf; }
26
+ body {
27
+ margin: 0 auto;
28
+ max-width: 1180px;
29
+ padding: 2.2rem 2.4rem 4rem;
30
+ background:
31
+ url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='180' height='180'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='2'/%3E%3CfeColorMatrix values='0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0.05 0'/%3E%3C/filter%3E%3Crect width='180' height='180' filter='url(%23n)'/%3E%3C/svg%3E"),
32
+ var(--paper);
33
+ color: var(--ink);
34
+ font-family: "Old Standard TT", "Iowan Old Style", Georgia, serif;
35
+ font-size: 17px;
36
+ line-height: 1.55;
37
+ box-shadow: 0 0 60px rgba(0,0,0,.35);
38
+ min-height: 100vh;
39
+ }
40
+
41
+ /* ---------- masthead ---------- */
42
+ header { text-align: center; animation: settle .7s ease-out both; }
43
+ .gazette-name {
44
+ font-family: "UnifrakturCook", serif;
45
+ font-size: clamp(2.4rem, 5.5vw, 4.2rem);
46
+ margin: 0;
47
+ letter-spacing: .01em;
48
+ }
49
+ .gazette-name .amp { color: var(--oxblood); }
50
+ .dateline {
51
+ display: flex; align-items: center; gap: 1rem;
52
+ margin: .65rem 0 0;
53
+ font-variant: small-caps; letter-spacing: .22em; font-size: .8rem; color: var(--ink-soft);
54
+ }
55
+ .dateline::before, .dateline::after { content: ""; flex: 1; border-top: 1px solid var(--rule); }
56
+ .double-rule { border: 0; border-top: 3px double var(--rule); margin: .8rem 0 0; }
57
+ .standfirst {
58
+ max-width: 62ch; margin: 1.1rem auto 0; font-style: italic; color: var(--ink-soft); font-size: 1.02rem;
59
+ }
60
+ .standfirst a { color: var(--oxblood); }
61
+
62
+ /* ---------- composing desk ---------- */
63
+ .desk { margin-top: 2rem; animation: settle .7s .12s ease-out both; }
64
+ .desk-head {
65
+ font-variant: small-caps; letter-spacing: .3em; font-size: .78rem;
66
+ color: var(--ink-soft); text-align: center; margin-bottom: .8rem;
67
+ }
68
+ .desk-head::before { content: "❧ "; color: var(--oxblood); }
69
+ .desk-head::after { content: " ☙"; color: var(--oxblood); }
70
+ .chips { display: flex; flex-wrap: wrap; gap: .5rem; justify-content: center; }
71
+ .chip {
72
+ font-family: inherit; font-size: .82rem; cursor: pointer;
73
+ background: transparent; color: var(--ink);
74
+ border: 1px solid var(--rule); border-radius: 0; padding: .3rem .7rem;
75
+ transition: background .15s, color .15s;
76
+ }
77
+ .chip:hover { background: var(--ink); color: var(--paper); }
78
+ .chip.active { background: var(--oxblood); color: var(--paper); border-color: var(--oxblood); }
79
+ textarea {
80
+ width: 100%; min-height: 9.5rem; margin-top: 1rem; padding: 1rem 1.1rem;
81
+ font-family: "Special Elite", "Courier New", monospace; font-size: .92rem; line-height: 1.7;
82
+ color: var(--ink); background: var(--paper-deep);
83
+ border: 1px solid var(--rule); outline: none; resize: vertical;
84
+ box-shadow: inset 0 1px 6px rgba(0,0,0,.12);
85
+ }
86
+ textarea:focus { border-color: var(--oxblood); }
87
+ .controls { display: flex; flex-wrap: wrap; align-items: center; gap: 1.2rem; margin-top: .9rem; }
88
+ .run-btn {
89
+ font-family: inherit; font-variant: small-caps; letter-spacing: .18em; font-size: 1rem;
90
+ background: var(--ink); color: var(--paper); border: 1px solid var(--ink);
91
+ padding: .55rem 1.6rem; cursor: pointer; transition: background .15s, transform .05s;
92
+ }
93
+ .run-btn:hover { background: var(--oxblood); border-color: var(--oxblood); }
94
+ .run-btn:active { transform: translateY(1px); }
95
+ .run-btn[disabled] { opacity: .45; cursor: wait; }
96
+ label.toggle {
97
+ display: inline-flex; align-items: center; gap: .45rem; font-size: .88rem; color: var(--ink-soft); cursor: pointer;
98
+ }
99
+ label.toggle input { accent-color: var(--oxblood); }
100
+ .toggle .tag {
101
+ font-variant: small-caps; font-size: .68rem; letter-spacing: .12em;
102
+ border: 1px solid var(--ochre-edge); color: var(--ochre-edge); padding: 0 .35rem;
103
+ }
104
+ .charcount { margin-left: auto; font-size: .78rem; color: var(--ink-soft); font-variant: small-caps; letter-spacing: .1em; }
105
+ .charcount.over { color: var(--red-ink); }
106
+
107
+ /* ---------- columns ---------- */
108
+ .columns {
109
+ display: grid; grid-template-columns: 1fr 1fr; gap: 0 var(--col-gap);
110
+ margin-top: 2.4rem; position: relative; animation: settle .7s .22s ease-out both;
111
+ }
112
+ .columns::before {
113
+ content: ""; position: absolute; top: 0; bottom: 0; left: 50%;
114
+ border-left: 1px solid var(--rule);
115
+ }
116
+ @media (max-width: 760px) {
117
+ .columns { grid-template-columns: 1fr; gap: 2.2rem; }
118
+ .columns::before { display: none; }
119
+ }
120
+ .col-head { border-top: 3px double var(--rule); border-bottom: 1px solid var(--rule); padding: .45rem 0 .4rem; text-align: center; }
121
+ .col-head h2 { margin: 0; font-size: 1.25rem; letter-spacing: .04em; font-weight: 700; }
122
+ .col-head .col-sub { font-variant: small-caps; letter-spacing: .25em; font-size: .7rem; color: var(--ink-soft); }
123
+ .col-head .col-sub a { color: inherit; text-decoration: none; border-bottom: 1px dotted var(--ink-soft); }
124
+ .statline {
125
+ display: flex; justify-content: center; gap: 1.6rem; padding: .55rem 0;
126
+ border-bottom: 1px solid var(--rule); font-size: .8rem; font-variant: small-caps; letter-spacing: .08em;
127
+ color: var(--ink-soft); min-height: 2.2rem; align-items: baseline;
128
+ }
129
+ .statline b { font-size: 1.25rem; color: var(--ink); font-variant: normal; letter-spacing: 0; }
130
+ .statline .cached-tag { color: var(--oxblood); font-style: italic; font-variant: normal; letter-spacing: 0; }
131
+ .proof {
132
+ padding: 1rem .2rem 0; min-height: 8rem; font-size: 1rem; line-height: 1.75; word-wrap: break-word;
133
+ }
134
+ .proof .placeholder { color: var(--ink-soft); font-style: italic; opacity: .7; }
135
+ .proof .spinner { font-style: italic; color: var(--oxblood); }
136
+ .proof .spinner::after { content: ""; animation: dots 1.2s steps(4) infinite; }
137
+ @keyframes dots { 0% { content: ""; } 25% { content: "."; } 50% { content: ".."; } 75% { content: "..."; } }
138
+ .seg-changed { background: var(--ochre-bg); border-bottom: 2px solid var(--ochre-edge); }
139
+ .seg-added { color: var(--green-ink); border-bottom: 2px solid var(--green-ink); font-weight: 700; }
140
+ .seg-removed { color: var(--red-ink); text-decoration: line-through; opacity: .75; }
141
+ .error-box {
142
+ border: 1px solid var(--red-ink); color: var(--red-ink); padding: .6rem .9rem; font-style: italic; margin-top: 1rem;
143
+ }
144
+
145
+ /* ---------- press (denoising scrubber) ---------- */
146
+ .press { margin-top: 1.4rem; border-top: 1px solid var(--rule); padding-top: .8rem; display: none; }
147
+ .press.visible { display: block; }
148
+ .press .press-head { font-variant: small-caps; letter-spacing: .2em; font-size: .72rem; color: var(--ink-soft); display: flex; justify-content: space-between; }
149
+ .press input[type=range] { width: 100%; accent-color: var(--oxblood); margin: .5rem 0; }
150
+ .press .canvas-view {
151
+ font-family: "Special Elite", monospace; font-size: .8rem; line-height: 1.65;
152
+ background: var(--paper-deep); border: 1px dashed var(--rule);
153
+ padding: .8rem .9rem; min-height: 5.5rem; max-height: 14rem; overflow-y: auto; white-space: pre-wrap;
154
+ }
155
+
156
+ /* ---------- legend ---------- */
157
+ .legend { display: flex; gap: 1.6rem; justify-content: center; margin-top: 2rem; font-size: .8rem; color: var(--ink-soft); flex-wrap: wrap; }
158
+ .legend span.swatch { padding: 0 .4rem; }
159
+
160
+ /* ---------- results ---------- */
161
+ .results { margin-top: 3rem; animation: settle .7s .3s ease-out both; }
162
+ .section-head {
163
+ text-align: center; border-top: 3px double var(--rule); padding-top: .6rem; margin-bottom: 1.1rem;
164
+ }
165
+ .section-head h2 { margin: 0; font-size: 1.45rem; letter-spacing: .03em; }
166
+ .section-head .col-sub { font-variant: small-caps; letter-spacing: .25em; font-size: .72rem; color: var(--ink-soft); }
167
+ table { border-collapse: collapse; margin: 0 auto; font-size: .92rem; }
168
+ th, td { padding: .45rem .9rem; border-bottom: 1px solid #00000022; text-align: right; }
169
+ th:first-child, td:first-child { text-align: left; }
170
+ thead th { border-bottom: 2px solid var(--rule); font-variant: small-caps; letter-spacing: .06em; font-weight: 700; }
171
+ tbody tr:first-child td { color: var(--ink-soft); font-style: italic; }
172
+ tbody tr:nth-child(2) td:first-child::before { content: "★ "; color: var(--oxblood); }
173
+ .footnotes { max-width: 75ch; margin: 1.3rem auto 0; font-size: .85rem; color: var(--ink-soft); }
174
+ .footnotes p { margin: .3rem 0; }
175
+
176
+ footer {
177
+ margin-top: 3.5rem; border-top: 3px double var(--rule); padding-top: 1rem;
178
+ text-align: center; font-size: .82rem; color: var(--ink-soft);
179
+ }
180
+ footer a { color: var(--oxblood); }
181
+
182
+ @keyframes settle { from { opacity: 0; transform: translateY(14px); } to { opacity: 1; transform: none; } }
183
+ @media (prefers-reduced-motion: reduce) { * { animation: none !important; transition: none !important; } }
184
+ </style>
185
+ </head>
186
+ <body>
187
+
188
+ <header>
189
+ <h1 class="gazette-name">The Post‑OCR Gazette</h1>
190
+ <div class="dateline"><span>Vol. I — Diffusion <span style="font-variant:normal">vs</span> Autoregression — Price one GPU‑second</span></div>
191
+ <hr class="double-rule" />
192
+ <p class="standfirst">
193
+ In which <a href="https://huggingface.co/google/diffusiongemma-26B-A4B-it">DiffusionGemma‑26B‑A4B</a>,
194
+ an experimental block‑diffusion language model, and
195
+ <a href="https://huggingface.co/google/gemma-4-E4B-it">Gemma‑4‑E4B</a>, its autoregressive cousin,
196
+ are set against one another in the correction of nineteenth‑century newspaper OCR.
197
+ Marks upon the proofs show what each model <em>changed</em> in your text.
198
+ </p>
199
+ </header>
200
+
201
+ <section class="desk">
202
+ <div class="desk-head">The Composing Desk</div>
203
+ <div class="chips" id="chips"></div>
204
+ <textarea id="ocr-input" spellcheck="false" placeholder="Paste noisy OCR text here, or pick a passage above…"></textarea>
205
+ <div class="controls">
206
+ <button class="run-btn" id="run">Correct the Proofs</button>
207
+ <label class="toggle">
208
+ <input type="checkbox" id="canvas-toggle" />
209
+ seed canvas with OCR text <span class="tag">experimental — under‑corrects</span>
210
+ </label>
211
+ <span class="charcount" id="charcount"></span>
212
+ </div>
213
+ </section>
214
+
215
+ <section class="columns">
216
+ <div class="col" id="col-dg">
217
+ <div class="col-head">
218
+ <h2>The Diffusionist</h2>
219
+ <div class="col-sub"><a href="https://huggingface.co/google/diffusiongemma-26B-A4B-it">DiffusionGemma‑26B‑A4B‑it</a> · denoises 256 tokens in parallel</div>
220
+ </div>
221
+ <div class="statline" id="stats-dg"><span class="placeholder">awaiting copy</span></div>
222
+ <div class="proof" id="proof-dg"><span class="placeholder">The diffusion model’s corrected proof will appear here.</span></div>
223
+ <div class="press" id="press">
224
+ <div class="press-head"><span>The Press — denoising, step by step</span><span id="press-step"></span></div>
225
+ <input type="range" id="press-slider" min="0" max="0" value="0" step="1" />
226
+ <div class="canvas-view" id="press-canvas"></div>
227
+ </div>
228
+ </div>
229
+ <div class="col" id="col-g4">
230
+ <div class="col-head">
231
+ <h2>The Autoregressive</h2>
232
+ <div class="col-sub"><a href="https://huggingface.co/google/gemma-4-E4B-it">Gemma‑4‑E4B‑it</a> · one token after another, greedily</div>
233
+ </div>
234
+ <div class="statline" id="stats-g4"><span class="placeholder">awaiting copy</span></div>
235
+ <div class="proof" id="proof-g4"><span class="placeholder">The autoregressive model’s corrected proof will appear here.</span></div>
236
+ </div>
237
+ </section>
238
+
239
+ <div class="legend">
240
+ <span><span class="swatch seg-changed">changed</span> — altered from the input</span>
241
+ <span><span class="swatch seg-added">added</span> — inserted text</span>
242
+ <span><span class="swatch seg-removed">removed</span> — deleted from the input</span>
243
+ </div>
244
+
245
+ <section class="results">
246
+ <div class="section-head">
247
+ <h2>The Ledger</h2>
248
+ <div class="col-sub">75 passages of BLN600 · A100‑80GB · full methodology in the repository</div>
249
+ </div>
250
+ <div id="results-table"><p style="text-align:center;font-style:italic">Fetching the ledger…</p></div>
251
+ <div class="footnotes" id="results-notes"></div>
252
+ </section>
253
+
254
+ <footer>
255
+ <p>
256
+ Benchmark texts: <a href="https://doi.org/10.15131/shef.data.25439023">BLN600</a> (CC‑BY‑NC — metrics only republished here).
257
+ Demo passages: ICDAR2019 post‑OCR (CC‑BY‑4.0). DiffusionGemma is experimental and one day old at press time;
258
+ its sampler has no greedy mode, so the comparison is defaults‑vs‑greedy. Single run, no significance testing.
259
+ </p>
260
+ <p>Set in Old Standard &amp; Special Elite · powered by ZeroGPU · by <a href="https://huggingface.co/davanstrien">davanstrien</a></p>
261
+ </footer>
262
+
263
+ <script type="module">
264
+ import { Client } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
265
+
266
+ const $ = (id) => document.getElementById(id);
267
+ const MAX_CHARS = 1200;
268
+ let client = null, examples = [], activeExample = null, snapshots = [];
269
+
270
+ const connect = (async () => { client = await Client.connect(window.location.origin); })();
271
+
272
+ /* ---------- helpers ---------- */
273
+ function renderDiff(el, segs) {
274
+ el.innerHTML = "";
275
+ for (const s of segs) {
276
+ const span = document.createElement("span");
277
+ if (s.op !== "same") span.className = "seg-" + s.op;
278
+ span.textContent = s.op === "removed" ? (s.text.trim() ? s.text : " ⌫ ") : s.text;
279
+ el.appendChild(span);
280
+ }
281
+ }
282
+ function stats(el, r, extra = "") {
283
+ el.innerHTML = `<span><b>${r.seconds}</b> s</span><span><b>${r.tokens_per_second}</b> tok/s</span>` +
284
+ (r.denoising_steps ? `<span><b>${r.denoising_steps}</b> steps</span>` : "") + extra;
285
+ }
286
+ function spinner(el, msg) { el.innerHTML = `<span class="spinner">${msg}</span>`; }
287
+ function showError(el, msg) { el.innerHTML = `<div class="error-box">${msg}</div>`; }
288
+
289
+ function showSnapshots(snaps) {
290
+ snapshots = snaps || [];
291
+ const press = $("press");
292
+ if (!snapshots.length) { press.classList.remove("visible"); return; }
293
+ press.classList.add("visible");
294
+ const slider = $("press-slider");
295
+ slider.max = snapshots.length - 1;
296
+ slider.value = snapshots.length - 1;
297
+ updatePress();
298
+ }
299
+ function updatePress() {
300
+ const i = +$("press-slider").value;
301
+ $("press-step").textContent = `step ${i + 1} of ${snapshots.length}`;
302
+ $("press-canvas").textContent = snapshots[i] || "";
303
+ }
304
+ $("press-slider").addEventListener("input", updatePress);
305
+
306
+ /* ---------- examples ---------- */
307
+ async function loadExamples() {
308
+ const res = await fetch("data/examples");
309
+ examples = await res.json();
310
+ const chips = $("chips");
311
+ examples.forEach((e, i) => {
312
+ const b = document.createElement("button");
313
+ b.className = "chip"; b.textContent = e.label;
314
+ b.onclick = () => {
315
+ document.querySelectorAll(".chip").forEach(c => c.classList.remove("active"));
316
+ b.classList.add("active");
317
+ $("ocr-input").value = e.ocr_input;
318
+ activeExample = e;
319
+ updateCount();
320
+ if (e.cached) renderCached(e);
321
+ };
322
+ chips.appendChild(b);
323
+ if (i === 0) b.click();
324
+ });
325
+ }
326
+ function renderCached(e) {
327
+ const dg = e.cached.diffusiongemma, g4 = e.cached.gemma4;
328
+ const tag = `<span class="cached-tag">— from the morning edition (pre‑computed)</span>`;
329
+ if (dg) {
330
+ stats($("stats-dg"), { seconds: dg.seconds, tokens_per_second: Math.round(dg.tokens_generated / dg.seconds), denoising_steps: dg.denoising_steps }, tag);
331
+ renderDiff($("proof-dg"), dg.diff);
332
+ }
333
+ if (g4) {
334
+ stats($("stats-g4"), { seconds: g4.seconds, tokens_per_second: Math.round(g4.tokens_generated / g4.seconds) }, tag);
335
+ renderDiff($("proof-g4"), g4.diff);
336
+ }
337
+ showSnapshots([]);
338
+ }
339
+
340
+ /* ---------- character count ---------- */
341
+ function updateCount() {
342
+ const n = $("ocr-input").value.length;
343
+ const el = $("charcount");
344
+ el.textContent = `${n} / ${MAX_CHARS} characters`;
345
+ el.classList.toggle("over", n > MAX_CHARS);
346
+ }
347
+ $("ocr-input").addEventListener("input", () => { activeExample = null; updateCount(); });
348
+ updateCount();
349
+
350
+ /* ---------- run ---------- */
351
+ $("run").addEventListener("click", async () => {
352
+ const text = $("ocr-input").value.trim();
353
+ if (!text) return;
354
+ if (text.length > MAX_CHARS) { showError($("proof-dg"), `Input too long (${text.length} chars; the cap is ${MAX_CHARS} — DiffusionGemma writes a single 256‑token block).`); return; }
355
+ const btn = $("run");
356
+ btn.disabled = true;
357
+ await connect;
358
+ spinner($("proof-dg"), "denoising the canvas"); $("stats-dg").innerHTML = "";
359
+ spinner($("proof-g4"), "queued behind the diffusionist"); $("stats-g4").innerHTML = "";
360
+ showSnapshots([]);
361
+ try {
362
+ const dg = (await client.predict("/run_diffusiongemma", {
363
+ ocr_text: text, canvas_init: $("canvas-toggle").checked,
364
+ })).data[0];
365
+ if (dg.error) showError($("proof-dg"), dg.error);
366
+ else {
367
+ stats($("stats-dg"), dg, dg.canvas_init ? `<span class="cached-tag">— OCR‑seeded canvas</span>` : "");
368
+ renderDiff($("proof-dg"), dg.diff);
369
+ showSnapshots(dg.snapshots);
370
+ }
371
+ spinner($("proof-g4"), "composing, one token at a time");
372
+ const g4 = (await client.predict("/run_gemma4", { ocr_text: text })).data[0];
373
+ if (g4.error) showError($("proof-g4"), g4.error);
374
+ else { stats($("stats-g4"), g4); renderDiff($("proof-g4"), g4.diff); }
375
+ } catch (err) {
376
+ showError($("proof-dg"), `The press has jammed: ${err.message || err}`);
377
+ $("proof-g4").innerHTML = "";
378
+ } finally {
379
+ btn.disabled = false;
380
+ }
381
+ });
382
+
383
+ /* ---------- results ledger ---------- */
384
+ async function loadResults() {
385
+ try {
386
+ const res = await fetch("data/results");
387
+ const data = await res.json();
388
+ const lines = data.summary_md.split("\n");
389
+ const tableLines = lines.filter(l => l.trim().startsWith("|"));
390
+ if (tableLines.length > 2) {
391
+ const parse = (l) => l.split("|").slice(1, -1).map(c => c.trim());
392
+ const head = parse(tableLines[0]);
393
+ const rows = tableLines.slice(2).map(parse);
394
+ let html = "<table><thead><tr>" + head.map(h => `<th>${h}</th>`).join("") + "</tr></thead><tbody>";
395
+ for (const r of rows) html += "<tr>" + r.map(c => `<td>${c}</td>`).join("") + "</tr>";
396
+ html += "</tbody></table>";
397
+ $("results-table").innerHTML = html;
398
+ }
399
+ const notes = lines.filter(l => /^(Micro|Mean)/.test(l)).map(l => `<p>${l}</p>`).join("");
400
+ $("results-notes").innerHTML = notes +
401
+ `<p>Over‑correction: of input characters already correct (vs the gold transcription), the share the model changed.
402
+ Fix rate: of input characters that were wrong, the share the model changed.
403
+ The OCR‑seeded‑canvas condition (toggle above) converges in 2–5 steps but barely edits — a negative result, reported honestly.</p>`;
404
+ } catch { $("results-table").innerHTML = "<p style='text-align:center;font-style:italic'>The ledger could not be fetched.</p>"; }
405
+ }
406
+
407
+ loadExamples();
408
+ loadResults();
409
+ </script>
410
+ </body>
411
+ </html>
requirements.txt CHANGED
@@ -1,4 +1,6 @@
 
1
  transformers>=5.11,<6
2
  accelerate
3
  pillow
4
  torchvision
 
 
1
+ gradio>=6.16.0
2
  transformers>=5.11,<6
3
  accelerate
4
  pillow
5
  torchvision
6
+ spaces
results/per_passage_metrics.jsonl ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"id": "bln600/3200810915", "n_chars_gold": 980, "cer_input": 0.02040816326530612, "wer_input": 0.09027777777777778, "cer_dg": 0.00510204081632653, "wer_dg": 0.034722222222222224, "rel_cer_red_dg": 0.75, "overcorr_dg": 0.002068252326783868, "fixrate_dg": 1.0, "seconds_dg": 1.051, "tok_s_dg": 194.10085632730733, "denoising_steps_dg": 5, "cer_dgc": 0.02040816326530612, "wer_dgc": 0.09027777777777778, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.069, "tok_s_dgc": 202.99345182413472, "denoising_steps_dgc": 5, "cer_g4": 0.0020408163265306124, "wer_g4": 0.013888888888888888, "rel_cer_red_g4": 0.9, "overcorr_g4": 0.0, "fixrate_g4": 1.0, "seconds_g4": 14.924, "tok_s_g4": 13.669257571696596}
2
+ {"id": "bln600/3200807901", "n_chars_gold": 911, "cer_input": 0.02854006586169045, "wer_input": 0.15441176470588236, "cer_dg": 0.010976948408342482, "wer_dg": 0.03676470588235294, "rel_cer_red_dg": 0.6153846153846154, "overcorr_dg": 0.012168141592920354, "fixrate_dg": 0.7916666666666666, "seconds_dg": 1.057, "tok_s_dg": 188.2686849574267, "denoising_steps_dg": 5, "cer_dgc": 0.02854006586169045, "wer_dgc": 0.15441176470588236, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.537, "tok_s_dgc": 407.8212290502793, "denoising_steps_dgc": 2, "cer_g4": 0.005488474204171241, "wer_g4": 0.029411764705882353, "rel_cer_red_g4": 0.8076923076923076, "overcorr_g4": 0.00331858407079646, "fixrate_g4": 0.7083333333333334, "seconds_g4": 15.093, "tok_s_g4": 13.714967203339297}
3
+ {"id": "bln600/3206205267", "n_chars_gold": 759, "cer_input": 0.11594202898550725, "wer_input": 0.27972027972027974, "cer_dg": 0.08959156785243742, "wer_dg": 0.14685314685314685, "rel_cer_red_dg": 0.22727272727272724, "overcorr_dg": 0.008275862068965517, "fixrate_dg": 0.5555555555555556, "seconds_dg": 3.012, "tok_s_dg": 63.41301460823373, "denoising_steps_dg": 16, "cer_dgc": 0.11594202898550725, "wer_dgc": 0.27972027972027974, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.538, "tok_s_dgc": 407.0631970260223, "denoising_steps_dgc": 2, "cer_g4": 0.10408432147562582, "wer_g4": 0.2097902097902098, "rel_cer_red_g4": 0.10227272727272728, "overcorr_g4": 0.001379310344827586, "fixrate_g4": 0.18055555555555555, "seconds_g4": 15.211, "tok_s_g4": 13.674314640720532}
4
+ {"id": "bln600/3206195717", "n_chars_gold": 903, "cer_input": 0.028792912513842746, "wer_input": 0.11409395973154363, "cer_dg": 0.015503875968992248, "wer_dg": 0.06711409395973154, "rel_cer_red_dg": 0.46153846153846156, "overcorr_dg": 0.001128668171557562, "fixrate_dg": 0.5909090909090909, "seconds_dg": 1.041, "tok_s_dg": 193.0835734870317, "denoising_steps_dg": 5, "cer_dgc": 0.028792912513842746, "wer_dgc": 0.11409395973154363, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.534, "tok_s_dgc": 402.6217228464419, "denoising_steps_dgc": 2, "cer_g4": 0.015503875968992248, "wer_g4": 0.0738255033557047, "rel_cer_red_g4": 0.46153846153846156, "overcorr_g4": 0.0, "fixrate_g4": 0.5454545454545454, "seconds_g4": 14.775, "tok_s_g4": 13.739424703891709}
5
+ {"id": "bln600/3205317819", "n_chars_gold": 879, "cer_input": 0.04664391353811149, "wer_input": 0.1375, "cer_dg": 0.030716723549488054, "wer_dg": 0.08125, "rel_cer_red_dg": 0.3414634146341463, "overcorr_dg": 0.012895662368112544, "fixrate_dg": 0.8648648648648649, "seconds_dg": 1.041, "tok_s_dg": 187.31988472622479, "denoising_steps_dg": 5, "cer_dgc": 0.04664391353811149, "wer_dgc": 0.1375, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 413.0841121495327, "denoising_steps_dgc": 2, "cer_g4": 0.026166097838452786, "wer_g4": 0.05625, "rel_cer_red_g4": 0.43902439024390244, "overcorr_g4": 0.0035169988276670576, "fixrate_g4": 0.7837837837837838, "seconds_g4": 14.724, "tok_s_g4": 13.71909807117631}
6
+ {"id": "bln600/3200811336", "n_chars_gold": 750, "cer_input": 0.07333333333333333, "wer_input": 0.3007518796992481, "cer_dg": 0.014666666666666666, "wer_dg": 0.05263157894736842, "rel_cer_red_dg": 0.7999999999999999, "overcorr_dg": 0.006983240223463687, "fixrate_dg": 1.0, "seconds_dg": 1.765, "tok_s_dg": 103.68271954674222, "denoising_steps_dg": 9, "cer_dgc": 0.27066666666666667, "wer_dgc": 0.44360902255639095, "rel_cer_red_dgc": -2.690909090909091, "overcorr_dgc": 0.0, "fixrate_dgc": 0.02040816326530612, "seconds_dgc": 1.995, "tok_s_dgc": 128.32080200501252, "denoising_steps_dgc": 10, "cer_g4": 0.02666666666666667, "wer_g4": 0.12030075187969924, "rel_cer_red_g4": 0.6363636363636362, "overcorr_g4": 0.0, "fixrate_g4": 0.7551020408163265, "seconds_g4": 14.135, "tok_s_g4": 13.724796604174037}
7
+ {"id": "bln600/3200810789", "n_chars_gold": 720, "cer_input": 0.09444444444444444, "wer_input": 0.25925925925925924, "cer_dg": 0.043055555555555555, "wer_dg": 0.07407407407407407, "rel_cer_red_dg": 0.5441176470588235, "overcorr_dg": 0.019005847953216373, "fixrate_dg": 0.9253731343283582, "seconds_dg": 1.736, "tok_s_dg": 95.6221198156682, "denoising_steps_dg": 9, "cer_dgc": 0.09444444444444444, "wer_dgc": 0.25925925925925924, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.53, "tok_s_dgc": 411.3207547169811, "denoising_steps_dgc": 2, "cer_g4": 0.0625, "wer_g4": 0.1388888888888889, "rel_cer_red_g4": 0.338235294117647, "overcorr_g4": 0.011695906432748537, "fixrate_g4": 0.47761194029850745, "seconds_g4": 13.862, "tok_s_g4": 13.706535853412205}
8
+ {"id": "bln600/3206321007", "n_chars_gold": 938, "cer_input": 0.011727078891257996, "wer_input": 0.0625, "cer_dg": 0.0021321961620469083, "wer_dg": 0.0125, "rel_cer_red_dg": 0.8181818181818182, "overcorr_dg": 0.001072961373390558, "fixrate_dg": 1.0, "seconds_dg": 1.047, "tok_s_dg": 193.8872970391595, "denoising_steps_dg": 5, "cer_dgc": 0.011727078891257996, "wer_dgc": 0.0625, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.704, "tok_s_dgc": 296.875, "denoising_steps_dgc": 3, "cer_g4": 0.0031982942430703624, "wer_g4": 0.025, "rel_cer_red_g4": 0.7272727272727272, "overcorr_g4": 0.0, "fixrate_g4": 0.8, "seconds_g4": 14.837, "tok_s_g4": 13.749410258138438}
9
+ {"id": "bln600/3200810696", "n_chars_gold": 785, "cer_input": 0.08789808917197452, "wer_input": 0.20454545454545456, "cer_dg": 0.02038216560509554, "wer_dg": 0.06060606060606061, "rel_cer_red_dg": 0.7681159420289856, "overcorr_dg": 0.003911342894393742, "fixrate_dg": 0.8260869565217391, "seconds_dg": 2.126, "tok_s_dg": 86.5475070555033, "denoising_steps_dg": 11, "cer_dgc": 0.08789808917197452, "wer_dgc": 0.20454545454545456, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.238, "tok_s_dgc": 177.7059773828756, "denoising_steps_dgc": 6, "cer_g4": 0.06878980891719745, "wer_g4": 0.16666666666666666, "rel_cer_red_g4": 0.21739130434782605, "overcorr_g4": 0.001303780964797914, "fixrate_g4": 0.30434782608695654, "seconds_g4": 15.031, "tok_s_g4": 13.70500964673009}
10
+ {"id": "bln600/3206247303", "n_chars_gold": 737, "cer_input": 0.09905020352781546, "wer_input": 0.28, "cer_dg": 0.06377204884667571, "wer_dg": 0.144, "rel_cer_red_dg": 0.35616438356164376, "overcorr_dg": 0.031294452347083924, "fixrate_dg": 0.9130434782608695, "seconds_dg": 2.99, "tok_s_dg": 58.8628762541806, "denoising_steps_dg": 16, "cer_dgc": 0.09905020352781546, "wer_dgc": 0.28, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.71, "tok_s_dgc": 309.85915492957747, "denoising_steps_dgc": 3, "cer_g4": 0.05970149253731343, "wer_g4": 0.16, "rel_cer_red_g4": 0.3972602739726027, "overcorr_g4": 0.007112375533428165, "fixrate_g4": 0.6956521739130435, "seconds_g4": 14.213, "tok_s_g4": 13.719833954830086}
11
+ {"id": "bln600/3200808401", "n_chars_gold": 724, "cer_input": 0.11602209944751381, "wer_input": 0.31451612903225806, "cer_dg": 0.03867403314917127, "wer_dg": 0.08064516129032258, "rel_cer_red_dg": 0.6666666666666666, "overcorr_dg": 0.012949640287769784, "fixrate_dg": 0.8795180722891566, "seconds_dg": 1.936, "tok_s_dg": 90.39256198347108, "denoising_steps_dg": 10, "cer_dgc": 0.11602209944751381, "wer_dgc": 0.31451612903225806, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.707, "tok_s_dgc": 311.1739745403112, "denoising_steps_dgc": 3, "cer_g4": 0.062154696132596686, "wer_g4": 0.1532258064516129, "rel_cer_red_g4": 0.46428571428571425, "overcorr_g4": 0.0028776978417266188, "fixrate_g4": 0.4819277108433735, "seconds_g4": 14.12, "tok_s_g4": 13.739376770538245}
12
+ {"id": "bln600/3200808398", "n_chars_gold": 756, "cer_input": 0.10449735449735449, "wer_input": 0.37815126050420167, "cer_dg": 0.03968253968253968, "wer_dg": 0.08403361344537816, "rel_cer_red_dg": 0.620253164556962, "overcorr_dg": 0.02631578947368421, "fixrate_dg": 0.9210526315789473, "seconds_dg": 1.562, "tok_s_dg": 103.0729833546735, "denoising_steps_dg": 8, "cer_dgc": 0.10449735449735449, "wer_dgc": 0.37815126050420167, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.532, "tok_s_dgc": 415.4135338345864, "denoising_steps_dgc": 2, "cer_g4": 0.062169312169312166, "wer_g4": 0.17647058823529413, "rel_cer_red_g4": 0.4050632911392405, "overcorr_g4": 0.002770083102493075, "fixrate_g4": 0.4868421052631579, "seconds_g4": 13.707, "tok_s_g4": 13.715619756328882}
13
+ {"id": "bln600/3200810738", "n_chars_gold": 890, "cer_input": 0.06404494382022471, "wer_input": 0.16666666666666666, "cer_dg": 0.04157303370786517, "wer_dg": 0.05555555555555555, "rel_cer_red_dg": 0.35087719298245607, "overcorr_dg": 0.00813953488372093, "fixrate_dg": 0.6590909090909091, "seconds_dg": 1.41, "tok_s_dg": 141.13475177304966, "denoising_steps_dg": 7, "cer_dgc": 0.06404494382022471, "wer_dgc": 0.16666666666666666, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.536, "tok_s_dgc": 412.3134328358209, "denoising_steps_dgc": 2, "cer_g4": 0.03932584269662921, "wer_g4": 0.05555555555555555, "rel_cer_red_g4": 0.38596491228070173, "overcorr_g4": 0.005813953488372093, "fixrate_g4": 0.6136363636363636, "seconds_g4": 14.664, "tok_s_g4": 13.707037643207856}
14
+ {"id": "bln600/3205295659", "n_chars_gold": 863, "cer_input": 0.0776361529548088, "wer_input": 0.2517482517482518, "cer_dg": 0.04055619930475087, "wer_dg": 0.0979020979020979, "rel_cer_red_dg": 0.4776119402985074, "overcorr_dg": 0.019559902200488997, "fixrate_dg": 0.9047619047619048, "seconds_dg": 1.749, "tok_s_dg": 101.20068610634648, "denoising_steps_dg": 9, "cer_dgc": 0.0776361529548088, "wer_dgc": 0.2517482517482518, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 403.7383177570093, "denoising_steps_dgc": 2, "cer_g4": 0.03707995365005794, "wer_g4": 0.0979020979020979, "rel_cer_red_g4": 0.5223880597014925, "overcorr_g4": 0.008557457212713936, "fixrate_g4": 0.6825396825396826, "seconds_g4": 12.979, "tok_s_g4": 13.714461822944758}
15
+ {"id": "bln600/3206191683", "n_chars_gold": 930, "cer_input": 0.021505376344086023, "wer_input": 0.10457516339869281, "cer_dg": 0.012903225806451613, "wer_dg": 0.06535947712418301, "rel_cer_red_dg": 0.4, "overcorr_dg": 0.004347826086956522, "fixrate_dg": 0.8, "seconds_dg": 1.025, "tok_s_dg": 199.02439024390245, "denoising_steps_dg": 5, "cer_dgc": 0.021505376344086023, "wer_dgc": 0.10457516339869281, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.53, "tok_s_dgc": 411.3207547169811, "denoising_steps_dgc": 2, "cer_g4": 0.010752688172043012, "wer_g4": 0.0457516339869281, "rel_cer_red_g4": 0.5, "overcorr_g4": 0.002173913043478261, "fixrate_g4": 0.8, "seconds_g4": 14.743, "tok_s_g4": 13.701417621922268}
16
+ {"id": "bln600/3206277695", "n_chars_gold": 809, "cer_input": 0.08776266996291718, "wer_input": 0.3387096774193548, "cer_dg": 0.049443757725587144, "wer_dg": 0.11290322580645161, "rel_cer_red_dg": 0.43661971830985913, "overcorr_dg": 0.02340702210663199, "fixrate_dg": 0.9056603773584906, "seconds_dg": 7.955, "tok_s_dg": 20.86737900691389, "denoising_steps_dg": 44, "cer_dgc": 0.242274412855377, "wer_dgc": 0.4596774193548387, "rel_cer_red_dgc": -1.7605633802816902, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.808, "tok_s_dgc": 141.5929203539823, "denoising_steps_dgc": 9, "cer_g4": 0.0754017305315204, "wer_g4": 0.2903225806451613, "rel_cer_red_g4": 0.14084507042253516, "overcorr_g4": 0.0039011703511053317, "fixrate_g4": 0.20754716981132076, "seconds_g4": 15.438, "tok_s_g4": 13.667573519885995}
17
+ {"id": "bln600/3200807974", "n_chars_gold": 958, "cer_input": 0.022964509394572025, "wer_input": 0.1258741258741259, "cer_dg": 0.003131524008350731, "wer_dg": 0.02097902097902098, "rel_cer_red_dg": 0.8636363636363636, "overcorr_dg": 0.0020942408376963353, "fixrate_dg": 1.0, "seconds_dg": 1.031, "tok_s_dg": 194.95635305528614, "denoising_steps_dg": 5, "cer_dgc": 0.021920668058455117, "wer_dgc": 0.11888111888111888, "rel_cer_red_dgc": 0.04545454545454539, "overcorr_dgc": 0.0, "fixrate_dgc": 0.047619047619047616, "seconds_dgc": 0.706, "tok_s_dgc": 310.19830028328613, "denoising_steps_dgc": 3, "cer_g4": 0.003131524008350731, "wer_g4": 0.027972027972027972, "rel_cer_red_g4": 0.8636363636363636, "overcorr_g4": 0.0010471204188481676, "fixrate_g4": 1.0, "seconds_g4": 14.642, "tok_s_g4": 13.727632837044121}
18
+ {"id": "bln600/3206328498", "n_chars_gold": 834, "cer_input": 0.06115107913669065, "wer_input": 0.2462686567164179, "cer_dg": 0.03597122302158273, "wer_dg": 0.09701492537313433, "rel_cer_red_dg": 0.411764705882353, "overcorr_dg": 0.01373283395755306, "fixrate_dg": 0.9722222222222222, "seconds_dg": 1.768, "tok_s_dg": 109.72850678733032, "denoising_steps_dg": 9, "cer_dgc": 0.06115107913669065, "wer_dgc": 0.2462686567164179, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.536, "tok_s_dgc": 404.8507462686567, "denoising_steps_dgc": 2, "cer_g4": 0.045563549160671464, "wer_g4": 0.13432835820895522, "rel_cer_red_g4": 0.2549019607843137, "overcorr_g4": 0.0149812734082397, "fixrate_g4": 0.6111111111111112, "seconds_g4": 14.601, "tok_s_g4": 13.697691938908292}
19
+ {"id": "bln600/3200813273", "n_chars_gold": 794, "cer_input": 0.08438287153652393, "wer_input": 0.3492063492063492, "cer_dg": 0.0491183879093199, "wer_dg": 0.11904761904761904, "rel_cer_red_dg": 0.41791044776119407, "overcorr_dg": 0.02230971128608924, "fixrate_dg": 0.9047619047619048, "seconds_dg": 1.947, "tok_s_dg": 88.34103749357986, "denoising_steps_dg": 10, "cer_dgc": 0.08438287153652393, "wer_dgc": 0.3492063492063492, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.537, "tok_s_dgc": 411.5456238361266, "denoising_steps_dgc": 2, "cer_g4": 0.027707808564231738, "wer_g4": 0.09523809523809523, "rel_cer_red_g4": 0.6716417910447762, "overcorr_g4": 0.003937007874015748, "fixrate_g4": 0.7619047619047619, "seconds_g4": 13.188, "tok_s_g4": 13.724598119502577}
20
+ {"id": "bln600/3207167129", "n_chars_gold": 869, "cer_input": 0.04142692750287687, "wer_input": 0.16326530612244897, "cer_dg": 0.00805523590333717, "wer_dg": 0.04081632653061224, "rel_cer_red_dg": 0.8055555555555556, "overcorr_dg": 0.0035671819262782403, "fixrate_dg": 0.9655172413793104, "seconds_dg": 2.292, "tok_s_dg": 86.82373472949389, "denoising_steps_dg": 12, "cer_dgc": 0.04142692750287687, "wer_dgc": 0.16326530612244897, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.533, "tok_s_dgc": 414.6341463414634, "denoising_steps_dgc": 2, "cer_g4": 0.010356731875719217, "wer_g4": 0.047619047619047616, "rel_cer_red_g4": 0.75, "overcorr_g4": 0.0023781212841854932, "fixrate_g4": 0.896551724137931, "seconds_g4": 14.677, "tok_s_g4": 13.694896777270559}
21
+ {"id": "bln600/3206247284", "n_chars_gold": 794, "cer_input": 0.08060453400503778, "wer_input": 0.2619047619047619, "cer_dg": 0.05667506297229219, "wer_dg": 0.1349206349206349, "rel_cer_red_dg": 0.296875, "overcorr_dg": 0.03439153439153439, "fixrate_dg": 0.8703703703703703, "seconds_dg": 3.406, "tok_s_dg": 51.08631826189078, "denoising_steps_dg": 18, "cer_dgc": 0.08060453400503778, "wer_dgc": 0.2619047619047619, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.533, "tok_s_dgc": 412.75797373358347, "denoising_steps_dgc": 2, "cer_g4": 0.04408060453400504, "wer_g4": 0.10317460317460317, "rel_cer_red_g4": 0.453125, "overcorr_g4": 0.013227513227513227, "fixrate_g4": 0.8148148148148148, "seconds_g4": 13.427, "tok_s_g4": 13.703731287703881}
22
+ {"id": "bln600/3205297535", "n_chars_gold": 860, "cer_input": 0.015116279069767442, "wer_input": 0.041379310344827586, "cer_dg": 0.002325581395348837, "wer_dg": 0.020689655172413793, "rel_cer_red_dg": 0.8461538461538461, "overcorr_dg": 0.002328288707799767, "fixrate_dg": 0.9230769230769231, "seconds_dg": 0.866, "tok_s_dg": 235.56581986143186, "denoising_steps_dg": 4, "cer_dgc": 0.015116279069767442, "wer_dgc": 0.041379310344827586, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.534, "tok_s_dgc": 400.74906367041194, "denoising_steps_dgc": 2, "cer_g4": 0.010465116279069767, "wer_g4": 0.027586206896551724, "rel_cer_red_g4": 0.3076923076923077, "overcorr_g4": 0.0011641443538998836, "fixrate_g4": 0.3076923076923077, "seconds_g4": 15.283, "tok_s_g4": 13.740757704639142}
23
+ {"id": "bln600/3206257745", "n_chars_gold": 922, "cer_input": 0.027114967462039046, "wer_input": 0.11320754716981132, "cer_dg": 0.004338394793926247, "wer_dg": 0.031446540880503145, "rel_cer_red_dg": 0.8400000000000001, "overcorr_dg": 0.0, "fixrate_dg": 0.9545454545454546, "seconds_dg": 1.221, "tok_s_dg": 167.07616707616705, "denoising_steps_dg": 6, "cer_dgc": 0.027114967462039046, "wer_dgc": 0.11320754716981132, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.063, "tok_s_dgc": 203.19849482596427, "denoising_steps_dgc": 5, "cer_g4": 0.020607375271149676, "wer_g4": 0.07547169811320754, "rel_cer_red_g4": 0.23999999999999996, "overcorr_g4": 0.0, "fixrate_g4": 0.22727272727272727, "seconds_g4": 15.492, "tok_s_g4": 13.684482313452104}
24
+ {"id": "bln600/3206206317", "n_chars_gold": 894, "cer_input": 0.02796420581655481, "wer_input": 0.10738255033557047, "cer_dg": 0.02348993288590604, "wer_dg": 0.053691275167785234, "rel_cer_red_dg": 0.15999999999999995, "overcorr_dg": 0.011428571428571429, "fixrate_dg": 0.6818181818181818, "seconds_dg": 1.561, "tok_s_dg": 128.76361306854582, "denoising_steps_dg": 8, "cer_dgc": 0.02796420581655481, "wer_dgc": 0.10738255033557047, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 401.86915887850466, "denoising_steps_dgc": 2, "cer_g4": 0.01901565995525727, "wer_g4": 0.053691275167785234, "rel_cer_red_g4": 0.32, "overcorr_g4": 0.0034285714285714284, "fixrate_g4": 0.6363636363636364, "seconds_g4": 14.841, "tok_s_g4": 13.745704467353953}
25
+ {"id": "bln600/3200801615", "n_chars_gold": 656, "cer_input": 0.14786585365853658, "wer_input": 0.40540540540540543, "cer_dg": 0.053353658536585365, "wer_dg": 0.0990990990990991, "rel_cer_red_dg": 0.6391752577319588, "overcorr_dg": 0.014469453376205787, "fixrate_dg": 0.9, "seconds_dg": 2.285, "tok_s_dg": 72.21006564551422, "denoising_steps_dg": 12, "cer_dgc": 0.14786585365853658, "wer_dgc": 0.40540540540540543, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.531, "tok_s_dgc": 412.42937853107344, "denoising_steps_dgc": 2, "cer_g4": 0.12804878048780488, "wer_g4": 0.34234234234234234, "rel_cer_red_g4": 0.13402061855670097, "overcorr_g4": 0.006430868167202572, "fixrate_g4": 0.15555555555555556, "seconds_g4": 14.788, "tok_s_g4": 13.727346497159859}
26
+ {"id": "bln600/3200811798", "n_chars_gold": 751, "cer_input": 0.10119840213049268, "wer_input": 0.3790322580645161, "cer_dg": 0.07856191744340879, "wer_dg": 0.13709677419354838, "rel_cer_red_dg": 0.2236842105263158, "overcorr_dg": 0.05270655270655271, "fixrate_dg": 0.9384615384615385, "seconds_dg": 1.395, "tok_s_dg": 113.97849462365592, "denoising_steps_dg": 7, "cer_dgc": 0.28495339547270304, "wer_dgc": 0.5161290322580645, "rel_cer_red_dgc": -1.8157894736842102, "overcorr_dgc": 0.0014245014245014246, "fixrate_dgc": 0.03076923076923077, "seconds_dgc": 1.641, "tok_s_dgc": 156.00243753808653, "denoising_steps_dgc": 8, "cer_g4": 0.05059920106524634, "wer_g4": 0.12096774193548387, "rel_cer_red_g4": 0.5, "overcorr_g4": 0.019943019943019943, "fixrate_g4": 0.9076923076923077, "seconds_g4": 12.041, "tok_s_g4": 13.703180798936964}
27
+ {"id": "bln600/3207166294", "n_chars_gold": 542, "cer_input": 0.31365313653136534, "wer_input": 0.7865168539325843, "cer_dg": 0.22140221402214022, "wer_dg": 0.3146067415730337, "rel_cer_red_dg": 0.2941176470588236, "overcorr_dg": 0.10983981693363844, "fixrate_dg": 0.7861635220125787, "seconds_dg": 2.109, "tok_s_dg": 56.42484589853011, "denoising_steps_dg": 11, "cer_dgc": 0.31365313653136534, "wer_dgc": 0.7865168539325843, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.872, "tok_s_dgc": 251.14678899082568, "denoising_steps_dgc": 4, "cer_g4": 0.31365313653136534, "wer_g4": 0.7865168539325843, "rel_cer_red_g4": 0.0, "overcorr_g4": 0.0, "fixrate_g4": 0.0, "seconds_g4": 16.109, "tok_s_g4": 13.656961946737846}
28
+ {"id": "bln600/3206227029", "n_chars_gold": 745, "cer_input": 0.07919463087248323, "wer_input": 0.3170731707317073, "cer_dg": 0.028187919463087248, "wer_dg": 0.0975609756097561, "rel_cer_red_dg": 0.6440677966101696, "overcorr_dg": 0.013850415512465374, "fixrate_dg": 0.9230769230769231, "seconds_dg": 1.418, "tok_s_dg": 125.52891396332863, "denoising_steps_dg": 7, "cer_dgc": 0.07919463087248323, "wer_dgc": 0.3170731707317073, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.539, "tok_s_dgc": 402.59740259740255, "denoising_steps_dgc": 2, "cer_g4": 0.026845637583892617, "wer_g4": 0.08130081300813008, "rel_cer_red_g4": 0.6610169491525424, "overcorr_g4": 0.012465373961218837, "fixrate_g4": 0.9038461538461539, "seconds_g4": 13.098, "tok_s_g4": 13.74255611543747}
29
+ {"id": "bln600/3206328810", "n_chars_gold": 778, "cer_input": 0.032133676092544985, "wer_input": 0.14960629921259844, "cer_dg": 0.030848329048843187, "wer_dg": 0.08661417322834646, "rel_cer_red_dg": 0.039999999999999966, "overcorr_dg": 0.015665796344647518, "fixrate_dg": 0.8260869565217391, "seconds_dg": 2.102, "tok_s_dg": 96.0989533777355, "denoising_steps_dg": 11, "cer_dgc": 0.032133676092544985, "wer_dgc": 0.14960629921259844, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.71, "tok_s_dgc": 309.85915492957747, "denoising_steps_dgc": 3, "cer_g4": 0.021850899742930592, "wer_g4": 0.09448818897637795, "rel_cer_red_g4": 0.31999999999999995, "overcorr_g4": 0.0039164490861618795, "fixrate_g4": 0.4782608695652174, "seconds_g4": 15.514, "tok_s_g4": 13.665076704911693}
30
+ {"id": "bln600/3200811642", "n_chars_gold": 864, "cer_input": 0.052083333333333336, "wer_input": 0.2, "cer_dg": 0.02546296296296296, "wer_dg": 0.11428571428571428, "rel_cer_red_dg": 0.5111111111111112, "overcorr_dg": 0.006045949214026602, "fixrate_dg": 0.7714285714285715, "seconds_dg": 3.179, "tok_s_dg": 61.9691726958163, "denoising_steps_dg": 17, "cer_dgc": 0.05092592592592592, "wer_dgc": 0.2, "rel_cer_red_dgc": 0.02222222222222232, "overcorr_dgc": 0.0, "fixrate_dgc": 0.02857142857142857, "seconds_dgc": 1.057, "tok_s_dgc": 203.4058656575213, "denoising_steps_dgc": 5, "cer_g4": 0.03125, "wer_g4": 0.1, "rel_cer_red_g4": 0.4, "overcorr_g4": 0.006045949214026602, "fixrate_g4": 0.6571428571428571, "seconds_g4": 14.206, "tok_s_g4": 13.726594396733775}
31
+ {"id": "bln600/3202982093", "n_chars_gold": 852, "cer_input": 0.03403755868544601, "wer_input": 0.1232876712328767, "cer_dg": 0.01995305164319249, "wer_dg": 0.0547945205479452, "rel_cer_red_dg": 0.41379310344827586, "overcorr_dg": 0.0024009603841536613, "fixrate_dg": 0.625, "seconds_dg": 1.211, "tok_s_dg": 166.804293971924, "denoising_steps_dg": 6, "cer_dgc": 0.03403755868544601, "wer_dgc": 0.1232876712328767, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.532, "tok_s_dgc": 400.37593984962405, "denoising_steps_dgc": 2, "cer_g4": 0.02112676056338028, "wer_g4": 0.07534246575342465, "rel_cer_red_g4": 0.3793103448275862, "overcorr_g4": 0.0012004801920768306, "fixrate_g4": 0.4166666666666667, "seconds_g4": 14.959, "tok_s_g4": 13.704124607259844}
32
+ {"id": "bln600/3206225744", "n_chars_gold": 921, "cer_input": 0.0249728555917481, "wer_input": 0.12578616352201258, "cer_dg": 0.018458197611292075, "wer_dg": 0.0440251572327044, "rel_cer_red_dg": 0.2608695652173913, "overcorr_dg": 0.005512679162072767, "fixrate_dg": 0.65, "seconds_dg": 4.989, "tok_s_dg": 40.28863499699339, "denoising_steps_dg": 27, "cer_dgc": 0.0249728555917481, "wer_dgc": 0.12578616352201258, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.537, "tok_s_dgc": 405.95903165735564, "denoising_steps_dgc": 2, "cer_g4": 0.014115092290988056, "wer_g4": 0.03773584905660377, "rel_cer_red_g4": 0.4347826086956522, "overcorr_g4": 0.004410143329658214, "fixrate_g4": 0.7, "seconds_g4": 14.716, "tok_s_g4": 13.726556129382985}
33
+ {"id": "bln600/3207642541", "n_chars_gold": 781, "cer_input": 0.07554417413572344, "wer_input": 0.23622047244094488, "cer_dg": 0.056338028169014086, "wer_dg": 0.11023622047244094, "rel_cer_red_dg": 0.25423728813559326, "overcorr_dg": 0.021361815754339118, "fixrate_dg": 0.8928571428571429, "seconds_dg": 1.926, "tok_s_dg": 93.45794392523365, "denoising_steps_dg": 10, "cer_dgc": 0.07554417413572344, "wer_dgc": 0.23622047244094488, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.877, "tok_s_dgc": 251.99543899657925, "denoising_steps_dgc": 4, "cer_g4": 0.0499359795134443, "wer_g4": 0.13385826771653545, "rel_cer_red_g4": 0.3389830508474577, "overcorr_g4": 0.014686248331108143, "fixrate_g4": 0.5357142857142857, "seconds_g4": 13.935, "tok_s_g4": 13.706494438464299}
34
+ {"id": "bln600/3200810720", "n_chars_gold": 956, "cer_input": 0.09518828451882845, "wer_input": 0.13043478260869565, "cer_dg": 0.08054393305439331, "wer_dg": 0.08074534161490683, "rel_cer_red_dg": 0.1538461538461538, "overcorr_dg": 0.007625272331154684, "fixrate_dg": 0.4262295081967213, "seconds_dg": 3.336, "tok_s_dg": 61.15107913669065, "denoising_steps_dg": 18, "cer_dgc": 0.09518828451882845, "wer_dgc": 0.13043478260869565, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.538, "tok_s_dgc": 408.92193308550185, "denoising_steps_dgc": 2, "cer_g4": 0.08891213389121339, "wer_g4": 0.11180124223602485, "rel_cer_red_g4": 0.06593406593406596, "overcorr_g4": 0.0, "fixrate_g4": 0.21311475409836064, "seconds_g4": 15.435, "tok_s_g4": 13.67022999676061}
35
+ {"id": "bln600/3206237533", "n_chars_gold": 959, "cer_input": 0.04692387904066736, "wer_input": 0.12101910828025478, "cer_dg": 0.029197080291970802, "wer_dg": 0.01910828025477707, "rel_cer_red_dg": 0.37777777777777777, "overcorr_dg": 0.0010775862068965517, "fixrate_dg": 0.8823529411764706, "seconds_dg": 1.234, "tok_s_dg": 155.59157212317666, "denoising_steps_dg": 6, "cer_dgc": 0.04692387904066736, "wer_dgc": 0.12101910828025478, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.71, "tok_s_dgc": 290.14084507042253, "denoising_steps_dgc": 3, "cer_g4": 0.035453597497393116, "wer_g4": 0.05732484076433121, "rel_cer_red_g4": 0.24444444444444446, "overcorr_g4": 0.0, "fixrate_g4": 0.5882352941176471, "seconds_g4": 14.2, "tok_s_g4": 13.732394366197184}
36
+ {"id": "bln600/3200810766", "n_chars_gold": 913, "cer_input": 0.023001095290251915, "wer_input": 0.12101910828025478, "cer_dg": 0.002190580503833516, "wer_dg": 0.012738853503184714, "rel_cer_red_dg": 0.9047619047619047, "overcorr_dg": 0.0, "fixrate_dg": 0.95, "seconds_dg": 0.868, "tok_s_dg": 228.110599078341, "denoising_steps_dg": 4, "cer_dgc": 0.023001095290251915, "wer_dgc": 0.12101910828025478, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 413.0841121495327, "denoising_steps_dgc": 2, "cer_g4": 0.0032858707557502738, "wer_g4": 0.01910828025477707, "rel_cer_red_g4": 0.8571428571428571, "overcorr_g4": 0.0, "fixrate_g4": 0.9, "seconds_g4": 14.418, "tok_s_g4": 13.73283395755306}
37
+ {"id": "bln600/3206232849", "n_chars_gold": 797, "cer_input": 0.037641154328732745, "wer_input": 0.1310344827586207, "cer_dg": 0.01756587202007528, "wer_dg": 0.020689655172413793, "rel_cer_red_dg": 0.5333333333333333, "overcorr_dg": 0.00903225806451613, "fixrate_dg": 1.0, "seconds_dg": 1.207, "tok_s_dg": 166.52858326429163, "denoising_steps_dg": 6, "cer_dgc": 0.1794228356336261, "wer_dgc": 0.23448275862068965, "rel_cer_red_dgc": -3.766666666666667, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.616, "tok_s_dgc": 158.41584158415841, "denoising_steps_dgc": 8, "cer_g4": 0.026348808030112924, "wer_g4": 0.06896551724137931, "rel_cer_red_g4": 0.29999999999999993, "overcorr_g4": 0.0, "fixrate_g4": 0.5454545454545454, "seconds_g4": 15.383, "tok_s_g4": 13.716440226223755}
38
+ {"id": "bln600/3206227140", "n_chars_gold": 946, "cer_input": 0.02959830866807611, "wer_input": 0.11538461538461539, "cer_dg": 0.017970401691331923, "wer_dg": 0.02564102564102564, "rel_cer_red_dg": 0.39285714285714285, "overcorr_dg": 0.006514657980456026, "fixrate_dg": 0.9444444444444444, "seconds_dg": 2.277, "tok_s_dg": 86.5173473869126, "denoising_steps_dg": 12, "cer_dgc": 0.02959830866807611, "wer_dgc": 0.11538461538461539, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.526, "tok_s_dgc": 393.5361216730038, "denoising_steps_dgc": 2, "cer_g4": 0.014799154334038054, "wer_g4": 0.02564102564102564, "rel_cer_red_g4": 0.5, "overcorr_g4": 0.0, "fixrate_g4": 0.7777777777777778, "seconds_g4": 14.725, "tok_s_g4": 13.718166383701188}
39
+ {"id": "bln600/3206201441", "n_chars_gold": 887, "cer_input": 0.018038331454340473, "wer_input": 0.09395973154362416, "cer_dg": 0.010146561443066516, "wer_dg": 0.040268456375838924, "rel_cer_red_dg": 0.4375, "overcorr_dg": 0.004555808656036446, "fixrate_dg": 0.75, "seconds_dg": 1.577, "tok_s_dg": 128.72542802790107, "denoising_steps_dg": 8, "cer_dgc": 0.018038331454340473, "wer_dgc": 0.09395973154362416, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.705, "tok_s_dgc": 304.96453900709224, "denoising_steps_dgc": 3, "cer_g4": 0.007891770011273957, "wer_g4": 0.040268456375838924, "rel_cer_red_g4": 0.5625, "overcorr_g4": 0.0011389521640091116, "fixrate_g4": 0.625, "seconds_g4": 15.086, "tok_s_g4": 13.721331035397057}
40
+ {"id": "bln600/3200810140", "n_chars_gold": 688, "cer_input": 0.16715116279069767, "wer_input": 0.4112903225806452, "cer_dg": 0.059593023255813955, "wer_dg": 0.11290322580645161, "rel_cer_red_dg": 0.6434782608695653, "overcorr_dg": 0.013740458015267175, "fixrate_dg": 0.8407079646017699, "seconds_dg": 1.748, "tok_s_dg": 92.10526315789474, "denoising_steps_dg": 9, "cer_dgc": 0.32848837209302323, "wer_dgc": 0.532258064516129, "rel_cer_red_dgc": -0.9652173913043478, "overcorr_dgc": 0.0, "fixrate_dgc": 0.008849557522123894, "seconds_dgc": 1.641, "tok_s_dgc": 156.00243753808653, "denoising_steps_dgc": 8, "cer_g4": 0.10901162790697674, "wer_g4": 0.20161290322580644, "rel_cer_red_g4": 0.34782608695652173, "overcorr_g4": 0.004580152671755725, "fixrate_g4": 0.48672566371681414, "seconds_g4": 13.021, "tok_s_g4": 13.670225021119728}
41
+ {"id": "bln600/3206260927", "n_chars_gold": 931, "cer_input": 0.03007518796992481, "wer_input": 0.14814814814814814, "cer_dg": 0.01288936627282492, "wer_dg": 0.037037037037037035, "rel_cer_red_dg": 0.5714285714285713, "overcorr_dg": 0.004395604395604396, "fixrate_dg": 0.8636363636363636, "seconds_dg": 1.601, "tok_s_dg": 126.17114303560275, "denoising_steps_dg": 8, "cer_dgc": 0.03007518796992481, "wer_dgc": 0.14814814814814814, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.045454545454545456, "seconds_dgc": 0.889, "tok_s_dgc": 241.84476940382453, "denoising_steps_dgc": 4, "cer_g4": 0.00966702470461869, "wer_g4": 0.04938271604938271, "rel_cer_red_g4": 0.6785714285714286, "overcorr_g4": 0.001098901098901099, "fixrate_g4": 0.8181818181818182, "seconds_g4": 14.839, "tok_s_g4": 13.680167127164903}
42
+ {"id": "bln600/3206319497", "n_chars_gold": 732, "cer_input": 0.1051912568306011, "wer_input": 0.3492063492063492, "cer_dg": 0.056010928961748634, "wer_dg": 0.11904761904761904, "rel_cer_red_dg": 0.4675324675324676, "overcorr_dg": 0.02180232558139535, "fixrate_dg": 0.9193548387096774, "seconds_dg": 2.111, "tok_s_dg": 79.58313595452391, "denoising_steps_dg": 11, "cer_dgc": 0.32650273224043713, "wer_dgc": 0.5714285714285714, "rel_cer_red_dgc": -2.1038961038961035, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 2.354, "tok_s_dgc": 108.75106202209005, "denoising_steps_dgc": 12, "cer_g4": 0.05874316939890711, "wer_g4": 0.1349206349206349, "rel_cer_red_g4": 0.44155844155844154, "overcorr_g4": 0.00436046511627907, "fixrate_g4": 0.5161290322580645, "seconds_g4": 13.76, "tok_s_g4": 13.73546511627907}
43
+ {"id": "bln600/3200811142", "n_chars_gold": 802, "cer_input": 0.07605985037406483, "wer_input": 0.22556390977443608, "cer_dg": 0.02743142144638404, "wer_dg": 0.05263157894736842, "rel_cer_red_dg": 0.639344262295082, "overcorr_dg": 0.0037831021437578815, "fixrate_dg": 0.8360655737704918, "seconds_dg": 2.271, "tok_s_dg": 83.22324966974901, "denoising_steps_dg": 12, "cer_dgc": 0.07605985037406483, "wer_dgc": 0.22556390977443608, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.879, "tok_s_dgc": 251.42207053469852, "denoising_steps_dgc": 4, "cer_g4": 0.03117206982543641, "wer_g4": 0.06015037593984962, "rel_cer_red_g4": 0.5901639344262295, "overcorr_g4": 0.008827238335435058, "fixrate_g4": 0.7377049180327869, "seconds_g4": 13.801, "tok_s_g4": 13.694659807260344}
44
+ {"id": "bln600/3206237352", "n_chars_gold": 807, "cer_input": 0.07311028500619579, "wer_input": 0.35036496350364965, "cer_dg": 0.022304832713754646, "wer_dg": 0.058394160583941604, "rel_cer_red_dg": 0.6949152542372882, "overcorr_dg": 0.007741935483870968, "fixrate_dg": 0.9464285714285714, "seconds_dg": 2.821, "tok_s_dg": 58.84438142502658, "denoising_steps_dg": 15, "cer_dgc": 0.07187112763320942, "wer_dgc": 0.34306569343065696, "rel_cer_red_dgc": 0.01694915254237287, "overcorr_dgc": 0.0, "fixrate_dgc": 0.017857142857142856, "seconds_dgc": 0.53, "tok_s_dgc": 405.66037735849056, "denoising_steps_dgc": 2, "cer_g4": 0.01858736059479554, "wer_g4": 0.06569343065693431, "rel_cer_red_g4": 0.7457627118644067, "overcorr_g4": 0.003870967741935484, "fixrate_g4": 0.8214285714285714, "seconds_g4": 12.59, "tok_s_g4": 13.741064336775219}
45
+ {"id": "bln600/3200810602", "n_chars_gold": 837, "cer_input": 0.02986857825567503, "wer_input": 0.13380281690140844, "cer_dg": 0.017921146953405017, "wer_dg": 0.04929577464788732, "rel_cer_red_dg": 0.4, "overcorr_dg": 0.012165450121654502, "fixrate_dg": 0.8333333333333334, "seconds_dg": 2.3, "tok_s_dg": 84.78260869565219, "denoising_steps_dg": 12, "cer_dgc": 0.02986857825567503, "wer_dgc": 0.13380281690140844, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 413.0841121495327, "denoising_steps_dgc": 2, "cer_g4": 0.005973715651135006, "wer_g4": 0.04225352112676056, "rel_cer_red_g4": 0.8, "overcorr_g4": 0.0012165450121654502, "fixrate_g4": 0.8333333333333334, "seconds_g4": 14.433, "tok_s_g4": 13.718561629598836}
46
+ {"id": "bln600/3206211444", "n_chars_gold": 811, "cer_input": 0.06165228113440197, "wer_input": 0.2116788321167883, "cer_dg": 0.040690505548705305, "wer_dg": 0.08029197080291971, "rel_cer_red_dg": 0.3399999999999999, "overcorr_dg": 0.021711366538952746, "fixrate_dg": 0.9523809523809523, "seconds_dg": 2.459, "tok_s_dg": 74.82716551443676, "denoising_steps_dg": 13, "cer_dgc": 0.06535141800246609, "wer_dgc": 0.2116788321167883, "rel_cer_red_dgc": -0.06000000000000002, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.045, "tok_s_dgc": 210.5263157894737, "denoising_steps_dgc": 5, "cer_g4": 0.02342786683107275, "wer_g4": 0.08759124087591241, "rel_cer_red_g4": 0.62, "overcorr_g4": 0.0038314176245210726, "fixrate_g4": 0.7380952380952381, "seconds_g4": 14.124, "tok_s_g4": 13.73548569810252}
47
+ {"id": "bln600/3206232857", "n_chars_gold": 840, "cer_input": 0.06666666666666667, "wer_input": 0.2808219178082192, "cer_dg": 0.058333333333333334, "wer_dg": 0.08904109589041095, "rel_cer_red_dg": 0.12499999999999997, "overcorr_dg": 0.04084158415841584, "fixrate_dg": 0.8571428571428571, "seconds_dg": 1.924, "tok_s_dg": 96.15384615384616, "denoising_steps_dg": 10, "cer_dgc": 0.06666666666666667, "wer_dgc": 0.2808219178082192, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.875, "tok_s_dgc": 252.57142857142858, "denoising_steps_dgc": 4, "cer_g4": 0.02857142857142857, "wer_g4": 0.10273972602739725, "rel_cer_red_g4": 0.5714285714285715, "overcorr_g4": 0.0037128712871287127, "fixrate_g4": 0.6428571428571429, "seconds_g4": 14.063, "tok_s_g4": 13.723956481547322}
48
+ {"id": "bln600/3200812731", "n_chars_gold": 949, "cer_input": 0.030558482613277135, "wer_input": 0.15060240963855423, "cer_dg": 0.01791359325605901, "wer_dg": 0.03614457831325301, "rel_cer_red_dg": 0.4137931034482759, "overcorr_dg": 0.006458557588805167, "fixrate_dg": 0.9230769230769231, "seconds_dg": 1.579, "tok_s_dg": 126.6624445851805, "denoising_steps_dg": 8, "cer_dgc": 0.030558482613277135, "wer_dgc": 0.15060240963855423, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.711, "tok_s_dgc": 305.2039381153305, "denoising_steps_dgc": 3, "cer_g4": 0.015806111696522657, "wer_g4": 0.05421686746987952, "rel_cer_red_g4": 0.48275862068965514, "overcorr_g4": 0.004305705059203444, "fixrate_g4": 0.8076923076923077, "seconds_g4": 14.808, "tok_s_g4": 13.70880605078336}
49
+ {"id": "bln600/3200810449", "n_chars_gold": 711, "cer_input": 0.10970464135021098, "wer_input": 0.37168141592920356, "cer_dg": 0.046413502109704644, "wer_dg": 0.11504424778761062, "rel_cer_red_dg": 0.576923076923077, "overcorr_dg": 0.014705882352941176, "fixrate_dg": 0.8571428571428571, "seconds_dg": 2.289, "tok_s_dg": 77.32634338138925, "denoising_steps_dg": 12, "cer_dgc": 0.1111111111111111, "wer_dgc": 0.37168141592920356, "rel_cer_red_dgc": -0.012820512820512706, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.537, "tok_s_dgc": 405.95903165735564, "denoising_steps_dgc": 2, "cer_g4": 0.052039381153305204, "wer_g4": 0.13274336283185842, "rel_cer_red_g4": 0.5256410256410257, "overcorr_g4": 0.010294117647058823, "fixrate_g4": 0.7571428571428571, "seconds_g4": 13.032, "tok_s_g4": 13.735420503376304}
50
+ {"id": "bln600/3200810146", "n_chars_gold": 810, "cer_input": 0.06296296296296296, "wer_input": 0.2265625, "cer_dg": 0.016049382716049384, "wer_dg": 0.078125, "rel_cer_red_dg": 0.7450980392156862, "overcorr_dg": 0.008871989860583017, "fixrate_dg": 0.94, "seconds_dg": 1.761, "tok_s_dg": 101.64679159568428, "denoising_steps_dg": 9, "cer_dgc": 0.06296296296296296, "wer_dgc": 0.2265625, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.062, "tok_s_dgc": 206.21468926553672, "denoising_steps_dgc": 5, "cer_g4": 0.019753086419753086, "wer_g4": 0.09375, "rel_cer_red_g4": 0.6862745098039216, "overcorr_g4": 0.0025348542458808617, "fixrate_g4": 0.76, "seconds_g4": 13.71, "tok_s_g4": 13.712618526622903}
51
+ {"id": "bln600/3206190899", "n_chars_gold": 891, "cer_input": 0.03928170594837262, "wer_input": 0.14383561643835616, "cer_dg": 0.026936026936026935, "wer_dg": 0.0821917808219178, "rel_cer_red_dg": 0.31428571428571433, "overcorr_dg": 0.008101851851851851, "fixrate_dg": 0.8214285714285714, "seconds_dg": 1.226, "tok_s_dg": 161.50081566068516, "denoising_steps_dg": 6, "cer_dgc": 0.03928170594837262, "wer_dgc": 0.14383561643835616, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.884, "tok_s_dgc": 243.21266968325793, "denoising_steps_dgc": 4, "cer_g4": 0.026936026936026935, "wer_g4": 0.08904109589041095, "rel_cer_red_g4": 0.31428571428571433, "overcorr_g4": 0.0023148148148148147, "fixrate_g4": 0.5, "seconds_g4": 14.978, "tok_s_g4": 13.68674055281079}
52
+ {"id": "bln600/3206210894", "n_chars_gold": 945, "cer_input": 0.039153439153439155, "wer_input": 0.16149068322981366, "cer_dg": 0.008465608465608466, "wer_dg": 0.024844720496894408, "rel_cer_red_dg": 0.7837837837837838, "overcorr_dg": 0.003264417845484222, "fixrate_dg": 0.9696969696969697, "seconds_dg": 2.442, "tok_s_dg": 80.26208026208026, "denoising_steps_dg": 13, "cer_dgc": 0.039153439153439155, "wer_dgc": 0.16149068322981366, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.702, "tok_s_dgc": 313.3903133903134, "denoising_steps_dgc": 3, "cer_g4": 0.005291005291005291, "wer_g4": 0.024844720496894408, "rel_cer_red_g4": 0.8648648648648649, "overcorr_g4": 0.0, "fixrate_g4": 0.8484848484848485, "seconds_g4": 14.386, "tok_s_g4": 13.693869039343808}
53
+ {"id": "bln600/3200810641", "n_chars_gold": 667, "cer_input": 0.18440779610194902, "wer_input": 0.375, "cer_dg": 0.029985007496251874, "wer_dg": 0.07142857142857142, "rel_cer_red_dg": 0.8373983739837398, "overcorr_dg": 0.009216589861751152, "fixrate_dg": 0.9274193548387096, "seconds_dg": 4.74, "tok_s_dg": 37.130801687763714, "denoising_steps_dg": 26, "cer_dgc": 0.18440779610194902, "wer_dgc": 0.375, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.704, "tok_s_dgc": 311.0795454545455, "denoising_steps_dgc": 3, "cer_g4": 0.12143928035982009, "wer_g4": 0.16071428571428573, "rel_cer_red_g4": 0.3414634146341463, "overcorr_g4": 0.0030721966205837174, "fixrate_g4": 0.532258064516129, "seconds_g4": 14.058, "tok_s_g4": 13.65770379854887}
54
+ {"id": "bln600/3206329327", "n_chars_gold": 905, "cer_input": 0.01878453038674033, "wer_input": 0.09316770186335403, "cer_dg": 0.011049723756906077, "wer_dg": 0.037267080745341616, "rel_cer_red_dg": 0.411764705882353, "overcorr_dg": 0.004479283314669653, "fixrate_dg": 0.7857142857142857, "seconds_dg": 1.037, "tok_s_dg": 198.6499517839923, "denoising_steps_dg": 5, "cer_dgc": 0.01878453038674033, "wer_dgc": 0.09316770186335403, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.704, "tok_s_dgc": 305.3977272727273, "denoising_steps_dgc": 3, "cer_g4": 0.013259668508287293, "wer_g4": 0.049689440993788817, "rel_cer_red_g4": 0.29411764705882354, "overcorr_g4": 0.0011198208286674132, "fixrate_g4": 0.5714285714285714, "seconds_g4": 15.226, "tok_s_g4": 13.72652042558781}
55
+ {"id": "bln600/3200810779", "n_chars_gold": 868, "cer_input": 0.026497695852534562, "wer_input": 0.10596026490066225, "cer_dg": 0.013824884792626729, "wer_dg": 0.059602649006622516, "rel_cer_red_dg": 0.47826086956521735, "overcorr_dg": 0.005847953216374269, "fixrate_dg": 0.8260869565217391, "seconds_dg": 1.225, "tok_s_dg": 159.18367346938774, "denoising_steps_dg": 6, "cer_dgc": 0.026497695852534562, "wer_dgc": 0.10596026490066225, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.54, "tok_s_dgc": 407.4074074074074, "denoising_steps_dgc": 2, "cer_g4": 0.016129032258064516, "wer_g4": 0.052980132450331126, "rel_cer_red_g4": 0.391304347826087, "overcorr_g4": 0.0035087719298245615, "fixrate_g4": 0.6521739130434783, "seconds_g4": 14.838, "tok_s_g4": 13.68108909556544}
56
+ {"id": "bln600/3206323884", "n_chars_gold": 867, "cer_input": 0.04498269896193772, "wer_input": 0.19727891156462585, "cer_dg": 0.02191464821222607, "wer_dg": 0.06802721088435375, "rel_cer_red_dg": 0.5128205128205128, "overcorr_dg": 0.0059594755661501785, "fixrate_dg": 0.8518518518518519, "seconds_dg": 2.108, "tok_s_dg": 92.50474383301707, "denoising_steps_dg": 11, "cer_dgc": 0.04498269896193772, "wer_dgc": 0.19727891156462585, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.71, "tok_s_dgc": 309.85915492957747, "denoising_steps_dgc": 3, "cer_g4": 0.025374855824682813, "wer_g4": 0.08163265306122448, "rel_cer_red_g4": 0.43589743589743596, "overcorr_g4": 0.0023837902264600714, "fixrate_g4": 0.5925925925925926, "seconds_g4": 14.647, "tok_s_g4": 13.722946678500717}
57
+ {"id": "bln600/3206327837", "n_chars_gold": 831, "cer_input": 0.1552346570397112, "wer_input": 0.3897058823529412, "cer_dg": 0.08904933814681107, "wer_dg": 0.1323529411764706, "rel_cer_red_dg": 0.42635658914728686, "overcorr_dg": 0.010825439783491205, "fixrate_dg": 0.7634408602150538, "seconds_dg": 1.573, "tok_s_dg": 102.98792116973935, "denoising_steps_dg": 8, "cer_dgc": 0.1552346570397112, "wer_dgc": 0.3897058823529412, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.529, "tok_s_dgc": 417.76937618147446, "denoising_steps_dgc": 2, "cer_g4": 0.10469314079422383, "wer_g4": 0.16911764705882354, "rel_cer_red_g4": 0.32558139534883723, "overcorr_g4": 0.005412719891745603, "fixrate_g4": 0.5591397849462365, "seconds_g4": 13.069, "tok_s_g4": 13.696533782232764}
58
+ {"id": "bln600/3206258468", "n_chars_gold": 889, "cer_input": 0.012373453318335208, "wer_input": 0.07051282051282051, "cer_dg": 0.01124859392575928, "wer_dg": 0.038461538461538464, "rel_cer_red_dg": 0.09090909090909098, "overcorr_dg": 0.00340522133938706, "fixrate_dg": 0.4444444444444444, "seconds_dg": 1.383, "tok_s_dg": 146.059291395517, "denoising_steps_dg": 7, "cer_dgc": 0.010123734533183352, "wer_dgc": 0.057692307692307696, "rel_cer_red_dgc": 0.18181818181818182, "overcorr_dgc": 0.0, "fixrate_dgc": 0.2222222222222222, "seconds_dgc": 0.71, "tok_s_dgc": 288.7323943661972, "denoising_steps_dgc": 3, "cer_g4": 0.006749156355455568, "wer_g4": 0.03205128205128205, "rel_cer_red_g4": 0.4545454545454546, "overcorr_g4": 0.0, "fixrate_g4": 0.4444444444444444, "seconds_g4": 14.849, "tok_s_g4": 13.73829887534514}
59
+ {"id": "bln600/3206234047", "n_chars_gold": 919, "cer_input": 0.009793253536452665, "wer_input": 0.05952380952380952, "cer_dg": 0.001088139281828074, "wer_dg": 0.005952380952380952, "rel_cer_red_dg": 0.888888888888889, "overcorr_dg": 0.0, "fixrate_dg": 0.8888888888888888, "seconds_dg": 1.229, "tok_s_dg": 168.42961757526442, "denoising_steps_dg": 6, "cer_dgc": 0.009793253536452665, "wer_dgc": 0.05952380952380952, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.538, "tok_s_dgc": 392.19330855018586, "denoising_steps_dgc": 2, "cer_g4": 0.003264417845484222, "wer_g4": 0.017857142857142856, "rel_cer_red_g4": 0.6666666666666666, "overcorr_g4": 0.0, "fixrate_g4": 0.6666666666666666, "seconds_g4": 15.196, "tok_s_g4": 13.68781258225849}
60
+ {"id": "bln600/3200811894", "n_chars_gold": 907, "cer_input": 0.04630650496141125, "wer_input": 0.125, "cer_dg": 0.022050716648291068, "wer_dg": 0.05625, "rel_cer_red_dg": 0.5238095238095238, "overcorr_dg": 0.010112359550561797, "fixrate_dg": 0.8536585365853658, "seconds_dg": 1.4, "tok_s_dg": 140.0, "denoising_steps_dg": 7, "cer_dgc": 0.04630650496141125, "wer_dgc": 0.125, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.535, "tok_s_dgc": 411.21495327102804, "denoising_steps_dgc": 2, "cer_g4": 0.03858875413450937, "wer_g4": 0.0625, "rel_cer_red_g4": 0.16666666666666669, "overcorr_g4": 0.011235955056179775, "fixrate_g4": 0.43902439024390244, "seconds_g4": 14.813, "tok_s_g4": 13.704178761898332}
61
+ {"id": "bln600/3206235678", "n_chars_gold": 962, "cer_input": 0.02182952182952183, "wer_input": 0.09876543209876543, "cer_dg": 0.015592515592515593, "wer_dg": 0.04938271604938271, "rel_cer_red_dg": 0.2857142857142857, "overcorr_dg": 0.005296610169491525, "fixrate_dg": 0.7058823529411765, "seconds_dg": 1.38, "tok_s_dg": 145.6521739130435, "denoising_steps_dg": 7, "cer_dgc": 0.02182952182952183, "wer_dgc": 0.09876543209876543, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.526, "tok_s_dgc": 408.74524714828897, "denoising_steps_dgc": 2, "cer_g4": 0.011434511434511435, "wer_g4": 0.05555555555555555, "rel_cer_red_g4": 0.47619047619047616, "overcorr_g4": 0.0, "fixrate_g4": 0.6470588235294118, "seconds_g4": 15.092, "tok_s_g4": 13.71587596077392}
62
+ {"id": "bln600/3206232665", "n_chars_gold": 877, "cer_input": 0.027366020524515394, "wer_input": 0.1437908496732026, "cer_dg": 0.005701254275940707, "wer_dg": 0.032679738562091505, "rel_cer_red_dg": 0.7916666666666666, "overcorr_dg": 0.0011614401858304297, "fixrate_dg": 0.9, "seconds_dg": 1.399, "tok_s_dg": 146.53323802716227, "denoising_steps_dg": 7, "cer_dgc": 0.027366020524515394, "wer_dgc": 0.1437908496732026, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.53, "tok_s_dgc": 409.4339622641509, "denoising_steps_dgc": 2, "cer_g4": 0.0034207525655644243, "wer_g4": 0.026143790849673203, "rel_cer_red_g4": 0.875, "overcorr_g4": 0.0011614401858304297, "fixrate_g4": 0.95, "seconds_g4": 15.092, "tok_s_g4": 13.71587596077392}
63
+ {"id": "bln600/3202413243", "n_chars_gold": 815, "cer_input": 0.023312883435582823, "wer_input": 0.11564625850340136, "cer_dg": 0.007361963190184049, "wer_dg": 0.027210884353741496, "rel_cer_red_dg": 0.6842105263157895, "overcorr_dg": 0.0025031289111389237, "fixrate_dg": 0.9230769230769231, "seconds_dg": 1.216, "tok_s_dg": 162.82894736842107, "denoising_steps_dg": 6, "cer_dgc": 0.023312883435582823, "wer_dgc": 0.11564625850340136, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.53, "tok_s_dgc": 394.33962264150944, "denoising_steps_dgc": 2, "cer_g4": 0.011042944785276074, "wer_g4": 0.061224489795918366, "rel_cer_red_g4": 0.5263157894736842, "overcorr_g4": 0.0025031289111389237, "fixrate_g4": 0.5384615384615384, "seconds_g4": 14.542, "tok_s_g4": 13.684500068766333}
64
+ {"id": "bln600/3206201960", "n_chars_gold": 997, "cer_input": 0.01905717151454363, "wer_input": 0.08092485549132948, "cer_dg": 0.004012036108324975, "wer_dg": 0.017341040462427744, "rel_cer_red_dg": 0.7894736842105263, "overcorr_dg": 0.0, "fixrate_dg": 0.8823529411764706, "seconds_dg": 1.027, "tok_s_dg": 198.63680623174295, "denoising_steps_dg": 5, "cer_dgc": 0.01905717151454363, "wer_dgc": 0.08092485549132948, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.531, "tok_s_dgc": 412.42937853107344, "denoising_steps_dgc": 2, "cer_g4": 0.010030090270812437, "wer_g4": 0.03468208092485549, "rel_cer_red_g4": 0.4736842105263158, "overcorr_g4": 0.0, "fixrate_g4": 0.47058823529411764, "seconds_g4": 15.017, "tok_s_g4": 13.717786508623561}
65
+ {"id": "bln600/3200810452", "n_chars_gold": 676, "cer_input": 0.1893491124260355, "wer_input": 0.4444444444444444, "cer_dg": 0.11094674556213018, "wer_dg": 0.1623931623931624, "rel_cer_red_dg": 0.4140625, "overcorr_dg": 0.06570512820512821, "fixrate_dg": 0.9243697478991597, "seconds_dg": 2.112, "tok_s_dg": 67.70833333333333, "denoising_steps_dg": 11, "cer_dgc": 0.1893491124260355, "wer_dgc": 0.4444444444444444, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.711, "tok_s_dgc": 309.42334739803096, "denoising_steps_dgc": 3, "cer_g4": 0.1849112426035503, "wer_g4": 0.4444444444444444, "rel_cer_red_g4": 0.023437499999999972, "overcorr_g4": 0.0016025641025641025, "fixrate_g4": 0.058823529411764705, "seconds_g4": 15.685, "tok_s_g4": 13.57985336308575}
66
+ {"id": "bln600/3200812176", "n_chars_gold": 758, "cer_input": 0.18601583113456466, "wer_input": 0.3888888888888889, "cer_dg": 0.12532981530343007, "wer_dg": 0.19047619047619047, "rel_cer_red_dg": 0.32624113475177313, "overcorr_dg": 0.05597579425113464, "fixrate_dg": 0.8282828282828283, "seconds_dg": 1.925, "tok_s_dg": 74.28571428571428, "denoising_steps_dg": 10, "cer_dgc": 0.18601583113456466, "wer_dgc": 0.3888888888888889, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.538, "tok_s_dgc": 408.92193308550185, "denoising_steps_dgc": 2, "cer_g4": 0.1266490765171504, "wer_g4": 0.2222222222222222, "rel_cer_red_g4": 0.3191489361702128, "overcorr_g4": 0.037821482602118005, "fixrate_g4": 0.8383838383838383, "seconds_g4": 10.814, "tok_s_g4": 13.685962641020899}
67
+ {"id": "bln600/3207644658", "n_chars_gold": 1018, "cer_input": 0.03339882121807466, "wer_input": 0.1696969696969697, "cer_dg": 0.0137524557956778, "wer_dg": 0.03636363636363636, "rel_cer_red_dg": 0.5882352941176471, "overcorr_dg": 0.005045408678102927, "fixrate_dg": 1.0, "seconds_dg": 1.215, "tok_s_dg": 164.60905349794237, "denoising_steps_dg": 6, "cer_dgc": 0.03241650294695481, "wer_dgc": 0.16363636363636364, "rel_cer_red_dgc": 0.029411764705882408, "overcorr_dgc": 0.0, "fixrate_dgc": 0.03571428571428571, "seconds_dgc": 0.883, "tok_s_dgc": 248.01812004530012, "denoising_steps_dgc": 4, "cer_g4": 0.015717092337917484, "wer_g4": 0.06060606060606061, "rel_cer_red_g4": 0.5294117647058824, "overcorr_g4": 0.0030272452068617556, "fixrate_g4": 0.7142857142857143, "seconds_g4": 14.625, "tok_s_g4": 13.675213675213675}
68
+ {"id": "bln600/3200811895", "n_chars_gold": 819, "cer_input": 0.07326007326007326, "wer_input": 0.22962962962962963, "cer_dg": 0.045177045177045176, "wer_dg": 0.06666666666666667, "rel_cer_red_dg": 0.38333333333333336, "overcorr_dg": 0.02746566791510612, "fixrate_dg": 0.9491525423728814, "seconds_dg": 1.761, "tok_s_dg": 100.51107325383306, "denoising_steps_dg": 9, "cer_dgc": 0.07326007326007326, "wer_dgc": 0.22962962962962963, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.714, "tok_s_dgc": 309.5238095238095, "denoising_steps_dgc": 3, "cer_g4": 0.03785103785103785, "wer_g4": 0.08888888888888889, "rel_cer_red_g4": 0.4833333333333334, "overcorr_g4": 0.0, "fixrate_g4": 0.6949152542372882, "seconds_g4": 14.63, "tok_s_g4": 13.670539986329459}
69
+ {"id": "bln600/3206260984", "n_chars_gold": 865, "cer_input": 0.03468208092485549, "wer_input": 0.1510791366906475, "cer_dg": 0.015028901734104046, "wer_dg": 0.05755395683453238, "rel_cer_red_dg": 0.5666666666666667, "overcorr_dg": 0.005868544600938967, "fixrate_dg": 0.8148148148148148, "seconds_dg": 1.394, "tok_s_dg": 144.1893830703013, "denoising_steps_dg": 7, "cer_dgc": 0.03468208092485549, "wer_dgc": 0.1510791366906475, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.706, "tok_s_dgc": 310.19830028328613, "denoising_steps_dgc": 3, "cer_g4": 0.013872832369942197, "wer_g4": 0.05755395683453238, "rel_cer_red_g4": 0.6, "overcorr_g4": 0.0011737089201877935, "fixrate_g4": 0.6666666666666666, "seconds_g4": 15.071, "tok_s_g4": 13.668635127065224}
70
+ {"id": "bln600/3206237358", "n_chars_gold": 864, "cer_input": 0.06018518518518518, "wer_input": 0.24, "cer_dg": 0.017361111111111112, "wer_dg": 0.05333333333333334, "rel_cer_red_dg": 0.7115384615384616, "overcorr_dg": 0.013205282112845138, "fixrate_dg": 0.9777777777777777, "seconds_dg": 1.749, "tok_s_dg": 103.48770726129216, "denoising_steps_dg": 9, "cer_dgc": 0.06018518518518518, "wer_dgc": 0.24, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.698, "tok_s_dgc": 315.18624641833816, "denoising_steps_dgc": 3, "cer_g4": 0.016203703703703703, "wer_g4": 0.03333333333333333, "rel_cer_red_g4": 0.7307692307692308, "overcorr_g4": 0.009603841536614645, "fixrate_g4": 0.9111111111111111, "seconds_g4": 13.466, "tok_s_g4": 13.664042774394773}
71
+ {"id": "bln600/3206203527", "n_chars_gold": 867, "cer_input": 0.03229527104959631, "wer_input": 0.13836477987421383, "cer_dg": 0.02306805074971165, "wer_dg": 0.05660377358490566, "rel_cer_red_dg": 0.28571428571428564, "overcorr_dg": 0.009478672985781991, "fixrate_dg": 0.85, "seconds_dg": 1.224, "tok_s_dg": 164.2156862745098, "denoising_steps_dg": 6, "cer_dgc": 0.03229527104959631, "wer_dgc": 0.13836477987421383, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.706, "tok_s_dgc": 307.36543909348444, "denoising_steps_dgc": 3, "cer_g4": 0.012687427912341407, "wer_g4": 0.05660377358490566, "rel_cer_red_g4": 0.6071428571428572, "overcorr_g4": 0.001184834123222749, "fixrate_g4": 0.75, "seconds_g4": 14.975, "tok_s_g4": 13.689482470784641}
72
+ {"id": "bln600/3205295999", "n_chars_gold": 748, "cer_input": 0.12834224598930483, "wer_input": 0.42276422764227645, "cer_dg": 0.07887700534759358, "wer_dg": 0.17886178861788618, "rel_cer_red_dg": 0.38541666666666674, "overcorr_dg": 0.03654970760233918, "fixrate_dg": 0.8860759493670886, "seconds_dg": 1.753, "tok_s_dg": 88.9903023388477, "denoising_steps_dg": 9, "cer_dgc": 0.12834224598930483, "wer_dgc": 0.42276422764227645, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.702, "tok_s_dgc": 313.3903133903134, "denoising_steps_dgc": 3, "cer_g4": 0.07754010695187166, "wer_g4": 0.16260162601626016, "rel_cer_red_g4": 0.39583333333333337, "overcorr_g4": 0.027777777777777776, "fixrate_g4": 0.8354430379746836, "seconds_g4": 11.884, "tok_s_g4": 13.631773813530797}
73
+ {"id": "bln600/3206223444", "n_chars_gold": 845, "cer_input": 0.13254437869822486, "wer_input": 0.3191489361702128, "cer_dg": 0.1467455621301775, "wer_dg": 0.24113475177304963, "rel_cer_red_dg": -0.10714285714285701, "overcorr_dg": 0.05548037889039242, "fixrate_dg": 0.7878787878787878, "seconds_dg": 5.699, "tok_s_dg": 30.707141603790138, "denoising_steps_dg": 31, "cer_dgc": 0.13254437869822486, "wer_dgc": 0.3191489361702128, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.709, "tok_s_dgc": 310.29619181946407, "denoising_steps_dgc": 3, "cer_g4": 0.12662721893491125, "wer_g4": 0.28368794326241137, "rel_cer_red_g4": 0.04464285714285716, "overcorr_g4": 0.0040595399188092015, "fixrate_g4": 0.19696969696969696, "seconds_g4": 15.486, "tok_s_g4": 13.625209866976624}
74
+ {"id": "bln600/3200810270", "n_chars_gold": 814, "cer_input": 0.038083538083538086, "wer_input": 0.16666666666666666, "cer_dg": 0.01597051597051597, "wer_dg": 0.08333333333333333, "rel_cer_red_dg": 0.5806451612903226, "overcorr_dg": 0.005025125628140704, "fixrate_dg": 0.7857142857142857, "seconds_dg": 1.383, "tok_s_dg": 145.3362255965293, "denoising_steps_dg": 7, "cer_dgc": 0.038083538083538086, "wer_dgc": 0.16666666666666666, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 1.055, "tok_s_dgc": 209.478672985782, "denoising_steps_dgc": 5, "cer_g4": 0.0171990171990172, "wer_g4": 0.06818181818181818, "rel_cer_red_g4": 0.5483870967741936, "overcorr_g4": 0.0, "fixrate_g4": 0.75, "seconds_g4": 14.589, "tok_s_g4": 13.708958804578792}
75
+ {"id": "bln600/3206190914", "n_chars_gold": 879, "cer_input": 0.04664391353811149, "wer_input": 0.12666666666666668, "cer_dg": 0.03981797497155859, "wer_dg": 0.08666666666666667, "rel_cer_red_dg": 0.1463414634146341, "overcorr_dg": 0.010588235294117647, "fixrate_dg": 0.6764705882352942, "seconds_dg": 1.392, "tok_s_dg": 142.24137931034483, "denoising_steps_dg": 7, "cer_dgc": 0.04664391353811149, "wer_dgc": 0.12666666666666668, "rel_cer_red_dgc": 0.0, "overcorr_dgc": 0.0, "fixrate_dgc": 0.0, "seconds_dgc": 0.881, "tok_s_dgc": 240.6356413166856, "denoising_steps_dgc": 4, "cer_g4": 0.04323094425483504, "wer_g4": 0.1, "rel_cer_red_g4": 0.07317073170731697, "overcorr_g4": 0.002352941176470588, "fixrate_g4": 0.17647058823529413, "seconds_g4": 15.216, "tok_s_g4": 13.669821240799159}
results/summary.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Benchmark summary
2
+
3
+ Passages: 75 · macro means over passages (micro CER in footnote)
4
+
5
+ | Model | CER ↓ | WER ↓ | Rel. CER reduction ↑ | Over-correction ↓ | Fix rate ↑ | Median s/passage | tok/s |
6
+ |---|---|---|---|---|---|---|---|
7
+ | OCR input (uncorrected) | 0.066 | 0.215 | — | — | — | — | — |
8
+ | DiffusionGemma 26B-A4B-it | 0.036 | 0.076 | 49.4% | 1.4% | 85.2% | 1.74 | 118.7 |
9
+ | DiffusionGemma (OCR-seeded canvas) | 0.081 | 0.226 | -17.2% | 0.0% | 0.6% | 0.70 | 323.2 |
10
+ | Gemma-4-E4B-it | 0.042 | 0.107 | 45.9% | 0.4% | 61.5% | 14.68 | 13.7 |
11
+
12
+ Micro (corpus-level) CER — input: 0.062, DiffusionGemma 26B-A4B-it: 0.033, DiffusionGemma (OCR-seeded canvas): 0.075, Gemma-4-E4B-it: 0.038.
13
+ Mean denoising steps, DiffusionGemma 26B-A4B-it: 10.1 (max 48).
14
+ Mean denoising steps, DiffusionGemma (OCR-seeded canvas): 3.3 (max 48).
15
+
16
+ ## Config
17
+
18
+ ```json
19
+ {
20
+ "date": "2026-06-10",
21
+ "dataset": "bln600",
22
+ "n": 75,
23
+ "seed": 42,
24
+ "max_passage_tokens": 220,
25
+ "prompt": "Correct the OCR errors in the following text from a 19th-century English newspaper.\nFix only recognition errors (wrong, missing, or extra characters). Do not modernise spelling, do not rephrase, and do not add or remove content. Preserve the original punctuation unless it is clearly an OCR error.\nOutput only the corrected text, with no commentary or preamble.\n\nOCR text:\n{ocr}",
26
+ "transformers": "5.11.0",
27
+ "torch": "2.9.1+cu128",
28
+ "gpu": "NVIDIA A100-SXM4-80GB",
29
+ "generation": {
30
+ "diffusiongemma": "generation_config defaults (entropy sampler), max_new_tokens=256",
31
+ "diffusiongemma_canvas": "as diffusiongemma, but first canvas seeded with the OCR text via decoder_input_ids (random tail padding, seed 0)",
32
+ "gemma4": "do_sample=False (greedy), max_new_tokens=256"
33
+ }
34
+ }
35
+ ```