| """German-OCR · GOCR — Demo (CPU), im german-ocr.de-Look. |
| |
| Moderner OCR-Viewer: buntes Box-Overlay + Maske + Text-Layout + strukturiertes JSON. |
| Nutzt das eigene g_ocr-Paket (from_pretrained). Schnell, CPU, kein GPU. |
| """ |
| import os |
| import json |
| import time |
|
|
| import numpy as np |
| from PIL import Image, ImageDraw, ImageFont |
| import gradio as gr |
| import matplotlib.font_manager as fm |
|
|
| import g_ocr |
|
|
| _HERE = os.path.dirname(os.path.abspath(__file__)) |
| EXAMPLES_DIR = os.path.join(_HERE, "examples") |
| _FONT_PATH = fm.findfont("DejaVu Sans") |
| WEBSITE = "https://german-ocr.de" |
| LOGO = "https://german-ocr.de/icon.png" |
|
|
| ocr = g_ocr.from_pretrained(drop_score=0.4) |
| ocr.read(np.zeros((64, 64, 3), np.uint8)) |
|
|
| |
| PALETTE = [ |
| (230, 25, 75), (60, 180, 75), (0, 130, 200), (245, 130, 48), (145, 30, 180), |
| (0, 158, 115), (240, 50, 230), (0, 114, 178), (213, 94, 0), (204, 121, 167), |
| (86, 180, 233), (170, 110, 40), (0, 128, 128), (128, 0, 0), (75, 175, 60), |
| ] |
|
|
| _font_cache = {} |
|
|
|
|
| def _font(sz): |
| sz = int(max(10, min(42, sz))) |
| if sz not in _font_cache: |
| try: |
| _font_cache[sz] = ImageFont.truetype(_FONT_PATH, sz) |
| except Exception: |
| _font_cache[sz] = ImageFont.load_default() |
| return _font_cache[sz] |
|
|
|
|
| def _col(i): |
| return PALETTE[i % len(PALETTE)] |
|
|
|
|
| def _pts(r): |
| return [(int(x), int(y)) for x, y in r["quad"]] |
|
|
|
|
| def _overlay(rgb, regions): |
| """Original + buntes Box-Overlay (je Region eigene Farbe, halbtransparente Füllung).""" |
| base = Image.fromarray(rgb).convert("RGBA") |
| ov = Image.new("RGBA", base.size, (0, 0, 0, 0)) |
| d = ImageDraw.Draw(ov) |
| for i, r in enumerate(regions): |
| c = _col(i) |
| p = _pts(r) |
| d.polygon(p, fill=c + (46,)) |
| d.line(p + [p[0]], fill=c + (255,), width=3, joint="curve") |
| return np.array(Image.alpha_composite(base, ov).convert("RGB")) |
|
|
|
|
| def _mask(shape, regions): |
| """Detektions-Maske: erkannte Regionen weiß auf schwarz.""" |
| h, w = shape[:2] |
| im = Image.new("RGB", (w, h), (0, 0, 0)) |
| d = ImageDraw.Draw(im) |
| for r in regions: |
| d.polygon(_pts(r), fill=(255, 255, 255)) |
| return np.array(im) |
|
|
|
|
| def _textlayout(shape, regions): |
| """Erkannte Werte farbig an Originalposition (weißes Blatt).""" |
| h, w = shape[:2] |
| im = Image.new("RGB", (w, h), (255, 255, 255)) |
| d = ImageDraw.Draw(im) |
| for i, r in enumerate(regions): |
| p = _pts(r) |
| x0 = min(q[0] for q in p) |
| y0 = min(q[1] for q in p) |
| bh = max(q[1] for q in p) - y0 |
| d.text((x0 + 1, y0), r["text"][:60], fill=_col(i), font=_font(bh * 0.72)) |
| return np.array(im) |
|
|
|
|
| def _pretty(res): |
| head = {k: res[k] for k in ("engine", "version", "image", "n_regions")} |
| s = json.dumps(head, ensure_ascii=False, indent=2)[:-2] |
| s += ',\n "text": ' + json.dumps(res["text"], ensure_ascii=False) |
| s += ',\n "regions": [\n' |
| s += ",\n".join(" " + json.dumps(r, ensure_ascii=False) for r in res["regions"]) |
| s += "\n ]\n}" |
| return s |
|
|
|
|
| def run(image): |
| if image is None: |
| return None, None, None, "", "{}", "Bitte ein Bild hochladen." |
| t0 = time.time() |
| res = ocr.read(image) |
| ms = (time.time() - t0) * 1000 |
| R = res["regions"] |
| boxes = _overlay(image, R) |
| mask = _mask(image.shape, R) |
| tlay = _textlayout(image.shape, R) |
| status = f"✅ {res['n_regions']} Regionen · {ms:.0f} ms (CPU) — Output bereit fürs LLM" |
| return boxes, mask, tlay, res["text"], _pretty(res), status |
|
|
|
|
| HEADER = f""" |
| <div style="text-align:center;padding:24px 16px 4px;"> |
| <img src="{LOGO}" alt="German-OCR" style="height:54px;display:block;margin:0 auto;"/> |
| <div style="font-size:1.95em;font-weight:800;color:#0f172a;margin-top:8px;letter-spacing:-.5px;"> |
| German-OCR <span style="color:#2563eb;">GOCR</span></div> |
| <div style="font-size:1.12em;color:#334155;margin-top:2px;">Deutsche OCR-Engine — ganzes Dokument → Text + Position (bbox)</div> |
| <div style="color:#64748b;margin-top:4px;font-size:.98em;">Schnell · CPU, kein GPU · Output direkt fürs LLM</div> |
| <div style="margin-top:12px;display:flex;gap:8px;justify-content:center;flex-wrap:wrap;"> |
| <span style="background:#eff6ff;color:#1d4ed8;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">🇩🇪 Deutsch</span> |
| <span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">⚡ CPU, kein GPU</span> |
| <span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">📦 ~38 MB</span> |
| <span style="background:#f1f5f9;color:#334155;padding:4px 12px;border-radius:999px;font-size:.85em;font-weight:600;">🎯 bbox + JSON</span> |
| </div> |
| </div> |
| """ |
|
|
| FOOTER = f""" |
| <div style="text-align:center;margin:20px auto 6px;padding:18px;border-radius:14px; |
| background:#2563eb;color:#fff;max-width:1080px;"> |
| <div style="font-size:1.2em;font-weight:800;">Das KI-Büro für Ihre Dokumente</div> |
| <div style="opacity:.92;margin:6px 0 12px;">OCR · Vision · KI-Assistent · REST-API — on-prem, DSGVO (Frankfurt).</div> |
| <a href="{WEBSITE}" target="_blank" style="background:#fff;color:#2563eb;font-weight:800; |
| padding:9px 20px;border-radius:8px;text-decoration:none;">→ german-ocr.de</a> |
| </div> |
| """ |
|
|
|
|
| def build(): |
| theme = gr.themes.Soft(primary_hue="blue", neutral_hue="slate") |
| with gr.Blocks(title="German-OCR · GOCR", theme=theme) as demo: |
| gr.HTML(HEADER) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| inp = gr.Image(label="Dokument", type="numpy", height=420) |
| btn = gr.Button("🚀 Dokument erkennen", variant="primary", size="lg") |
| status = gr.Textbox(label="Status", interactive=False) |
| if os.path.isdir(EXAMPLES_DIR) and os.listdir(EXAMPLES_DIR): |
| gr.Examples(examples=[os.path.join(EXAMPLES_DIR, f) |
| for f in sorted(os.listdir(EXAMPLES_DIR))], |
| inputs=inp, label="Beispiele") |
| with gr.Column(scale=1): |
| with gr.Tab("📦 Boxen"): |
| out_boxes = gr.Image(label="Erkannte Regionen (Farbe je Box)", |
| interactive=False, height=420) |
| with gr.Tab("🎭 Maske"): |
| out_mask = gr.Image(label="Detektions-Maske", interactive=False, height=420) |
| with gr.Tab("🔤 Text-Layout"): |
| out_tlay = gr.Image(label="Erkannte Werte an Position", |
| interactive=False, height=420) |
| with gr.Tab("📋 Text"): |
| out_txt = gr.Textbox(label="Volltext (Lesereihenfolge)", lines=15) |
| with gr.Tab("🔧 JSON"): |
| out_json = gr.Code(label="Strukturiertes JSON — direkt fürs LLM/Backend", |
| language="json") |
| gr.HTML(FOOTER) |
| btn.click(run, inputs=inp, |
| outputs=[out_boxes, out_mask, out_tlay, out_txt, out_json, status]) |
| return demo |
|
|
|
|
| if __name__ == "__main__": |
| build().launch(server_name="0.0.0.0", server_port=7860, |
| allowed_paths=[EXAMPLES_DIR, os.path.expanduser("~/.cache")]) |
|
|