GOCR-Demo / app.py
Keyven's picture
GOCR — KSVTRv3-de (deutscher Recognizer), ONNX/CPU · Apache-2.0 (built on OpenOCR + PaddleOCR)
30b08a5
Raw
History Blame Contribute Delete
7.35 kB
"""German-OCR · GOCR — Demo (CPU), im german-ocr.de-Look.
Moderner OCR-Viewer: buntes Box-Overlay + Maske + Text-Layout + strukturiertes JSON.
Nutzt das eigene g_ocr-Paket (from_pretrained). Schnell, CPU, kein GPU.
"""
import os
import json
import time
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import gradio as gr
import matplotlib.font_manager as fm
import g_ocr
_HERE = os.path.dirname(os.path.abspath(__file__))
EXAMPLES_DIR = os.path.join(_HERE, "examples")
_FONT_PATH = fm.findfont("DejaVu Sans")
WEBSITE = "https://german-ocr.de"
LOGO = "https://german-ocr.de/icon.png"
ocr = g_ocr.from_pretrained(drop_score=0.4)
ocr.read(np.zeros((64, 64, 3), np.uint8)) # warmup
# Lebendige, gut unterscheidbare Farben (eine pro Region)
PALETTE = [
(230, 25, 75), (60, 180, 75), (0, 130, 200), (245, 130, 48), (145, 30, 180),
(0, 158, 115), (240, 50, 230), (0, 114, 178), (213, 94, 0), (204, 121, 167),
(86, 180, 233), (170, 110, 40), (0, 128, 128), (128, 0, 0), (75, 175, 60),
]
_font_cache = {}
def _font(sz):
sz = int(max(10, min(42, sz)))
if sz not in _font_cache:
try:
_font_cache[sz] = ImageFont.truetype(_FONT_PATH, sz)
except Exception:
_font_cache[sz] = ImageFont.load_default()
return _font_cache[sz]
def _col(i):
return PALETTE[i % len(PALETTE)]
def _pts(r):
return [(int(x), int(y)) for x, y in r["quad"]]
def _overlay(rgb, regions):
"""Original + buntes Box-Overlay (je Region eigene Farbe, halbtransparente Füllung)."""
base = Image.fromarray(rgb).convert("RGBA")
ov = Image.new("RGBA", base.size, (0, 0, 0, 0))
d = ImageDraw.Draw(ov)
for i, r in enumerate(regions):
c = _col(i)
p = _pts(r)
d.polygon(p, fill=c + (46,))
d.line(p + [p[0]], fill=c + (255,), width=3, joint="curve")
return np.array(Image.alpha_composite(base, ov).convert("RGB"))
def _mask(shape, regions):
"""Detektions-Maske: erkannte Regionen weiß auf schwarz."""
h, w = shape[:2]
im = Image.new("RGB", (w, h), (0, 0, 0))
d = ImageDraw.Draw(im)
for r in regions:
d.polygon(_pts(r), fill=(255, 255, 255))
return np.array(im)
def _textlayout(shape, regions):
"""Erkannte Werte farbig an Originalposition (weißes Blatt)."""
h, w = shape[:2]
im = Image.new("RGB", (w, h), (255, 255, 255))
d = ImageDraw.Draw(im)
for i, r in enumerate(regions):
p = _pts(r)
x0 = min(q[0] for q in p)
y0 = min(q[1] for q in p)
bh = max(q[1] for q in p) - y0
d.text((x0 + 1, y0), r["text"][:60], fill=_col(i), font=_font(bh * 0.72))
return np.array(im)
def _pretty(res):
head = {k: res[k] for k in ("engine", "version", "image", "n_regions")}
s = json.dumps(head, ensure_ascii=False, indent=2)[:-2]
s += ',\n "text": ' + json.dumps(res["text"], ensure_ascii=False)
s += ',\n "regions": [\n'
s += ",\n".join(" " + json.dumps(r, ensure_ascii=False) for r in res["regions"])
s += "\n ]\n}"
return s
def run(image):
if image is None:
return None, None, None, "", "{}", "Bitte ein Bild hochladen."
t0 = time.time()
res = ocr.read(image)
ms = (time.time() - t0) * 1000
R = res["regions"]
boxes = _overlay(image, R)
mask = _mask(image.shape, R)
tlay = _textlayout(image.shape, R)
status = f"✅ {res['n_regions']} Regionen · {ms:.0f} ms (CPU) — Output bereit fürs LLM"
return boxes, mask, tlay, res["text"], _pretty(res), status
HEADER = f"""
<div style="text-align:center;padding:24px 16px 4px;">
<img src="{LOGO}" alt="German-OCR" style="height:54px;display:block;margin:0 auto;"/>
<div style="font-size:1.95em;font-weight:800;color:#0f172a;margin-top:8px;letter-spacing:-.5px;">
German-OCR <span style="color:#2563eb;">GOCR</span></div>
<div style="font-size:1.12em;color:#334155;margin-top:2px;">Deutsche OCR-Engine — ganzes Dokument → Text + Position (bbox)</div>
<div style="color:#64748b;margin-top:4px;font-size:.98em;">Schnell · CPU, kein GPU · Output direkt fürs LLM</div>
<div style="margin-top:12px;display:flex;gap:8px;justify-content:center;flex-wrap:wrap;">
<span style="background:#eff6ff;color:#1d4ed8;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">🇩🇪 Deutsch</span>
<span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">⚡ CPU, kein GPU</span>
<span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">📦 ~38 MB</span>
<span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">🎯 bbox + JSON</span>
</div>
</div>
"""
FOOTER = f"""
<div style="text-align:center;margin:20px auto 6px;padding:18px;border-radius:14px;
background:#2563eb;color:#fff;max-width:1080px;">
<div style="font-size:1.2em;font-weight:800;">Das KI-Büro für Ihre Dokumente</div>
<div style="opacity:.92;margin:6px 0 12px;">OCR · Vision · KI-Assistent · REST-API — on-prem, DSGVO (Frankfurt).</div>
<a href="{WEBSITE}" target="_blank" style="background:#fff;color:#2563eb;font-weight:800;
padding:9px 20px;border-radius:8px;text-decoration:none;">→ german-ocr.de</a>
</div>
"""
def build():
theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate")
with gr.Blocks(title="German-OCR · GOCR", theme=theme) as demo:
gr.HTML(HEADER)
with gr.Row():
with gr.Column(scale=1):
inp = gr.Image(label="Dokument", type="numpy", height=420)
btn = gr.Button("🚀 Dokument erkennen", variant="primary", size="lg")
status = gr.Textbox(label="Status", interactive=False)
if os.path.isdir(EXAMPLES_DIR) and os.listdir(EXAMPLES_DIR):
gr.Examples(examples=[os.path.join(EXAMPLES_DIR, f)
for f in sorted(os.listdir(EXAMPLES_DIR))],
inputs=inp, label="Beispiele")
with gr.Column(scale=1):
with gr.Tab("📦 Boxen"):
out_boxes = gr.Image(label="Erkannte Regionen (Farbe je Box)",
interactive=False, height=420)
with gr.Tab("🎭 Maske"):
out_mask = gr.Image(label="Detektions-Maske", interactive=False, height=420)
with gr.Tab("🔤 Text-Layout"):
out_tlay = gr.Image(label="Erkannte Werte an Position",
interactive=False, height=420)
with gr.Tab("📋 Text"):
out_txt = gr.Textbox(label="Volltext (Lesereihenfolge)", lines=15)
with gr.Tab("🔧 JSON"):
out_json = gr.Code(label="Strukturiertes JSON — direkt fürs LLM/Backend",
language="json")
gr.HTML(FOOTER)
btn.click(run, inputs=inp,
outputs=[out_boxes, out_mask, out_tlay, out_txt, out_json, status])
return demo
if __name__ == "__main__":
build().launch(server_name="0.0.0.0", server_port=7860,
allowed_paths=[EXAMPLES_DIR, os.path.expanduser("~/.cache")])