Update app.py
Browse files
app.py
CHANGED
|
@@ -11,12 +11,19 @@ from marker.converters.pdf import PdfConverter
|
|
| 11 |
from marker.models import create_model_dict
|
| 12 |
from marker.output import text_from_rendered
|
| 13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
# Docling
|
| 15 |
accelerator_options = AcceleratorOptions(
|
| 16 |
num_threads=8, device=AcceleratorDevice.CPU
|
| 17 |
)
|
| 18 |
|
| 19 |
-
|
| 20 |
pipeline_options = PdfPipelineOptions()
|
| 21 |
pipeline_options.accelerator_options = accelerator_options
|
| 22 |
pipeline_options.do_ocr = True
|
|
@@ -36,6 +43,41 @@ marker_converter = PdfConverter(
|
|
| 36 |
artifact_dict=create_model_dict(),
|
| 37 |
)
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
def convert_document(file, method):
|
| 40 |
if method == "Docling":
|
| 41 |
result = docling_converter.convert(file.name)
|
|
|
|
| 11 |
from marker.models import create_model_dict
|
| 12 |
from marker.output import text_from_rendered
|
| 13 |
|
| 14 |
+
import tempfile
|
| 15 |
+
import io
|
| 16 |
+
import re
|
| 17 |
+
import os
|
| 18 |
+
import language_tool_python
|
| 19 |
+
import html
|
| 20 |
+
tool = language_tool_python.LanguageTool('de-DE')
|
| 21 |
+
|
| 22 |
# Docling
|
| 23 |
accelerator_options = AcceleratorOptions(
|
| 24 |
num_threads=8, device=AcceleratorDevice.CPU
|
| 25 |
)
|
| 26 |
|
|
|
|
| 27 |
pipeline_options = PdfPipelineOptions()
|
| 28 |
pipeline_options.accelerator_options = accelerator_options
|
| 29 |
pipeline_options.do_ocr = True
|
|
|
|
| 43 |
artifact_dict=create_model_dict(),
|
| 44 |
)
|
| 45 |
|
| 46 |
+
def check_spelling(text_input: str) -> str:
|
| 47 |
+
"""Prüft den Text mit LanguageTool und markiert Fundstellen in HTML."""
|
| 48 |
+
if tool is None:
|
| 49 |
+
return "❌ **Fehler:** LanguageTool konnte nicht geladen werden."
|
| 50 |
+
text = text_input or ""
|
| 51 |
+
if not text.strip():
|
| 52 |
+
return "*Bitte lade eine Datei hoch oder füge Text ein...*"
|
| 53 |
+
if text.startswith("❌") or text.startswith("Fehler"):
|
| 54 |
+
return text
|
| 55 |
+
matches = tool.check(text)
|
| 56 |
+
if not matches:
|
| 57 |
+
return "<p style='color: green;'>✅ Keine Fehler gefunden!</p>"
|
| 58 |
+
# HTML-Aufbau für die Anzeige der Fehler
|
| 59 |
+
parts = []
|
| 60 |
+
text_len = len(text)
|
| 61 |
+
last_idx = text_len
|
| 62 |
+
for match in reversed(matches):
|
| 63 |
+
start = match.offset
|
| 64 |
+
end = match.offset + match.error_length
|
| 65 |
+
parts.append(html.escape(text[end:last_idx]))
|
| 66 |
+
word = text[start:end]
|
| 67 |
+
suggestions = ", ".join(match.replacements[:3]) if match.replacements else "keine Vorschläge"
|
| 68 |
+
error_html = (
|
| 69 |
+
f'<span style="color: red; text-decoration: underline;" '
|
| 70 |
+
f'title="{html.escape(match.message)} (Vorschläge: {html.escape(suggestions)})">'
|
| 71 |
+
f'{html.escape(word)}</span>'
|
| 72 |
+
)
|
| 73 |
+
parts.append(error_html)
|
| 74 |
+
last_idx = start
|
| 75 |
+
parts.append(html.escape(text[:last_idx]))
|
| 76 |
+
html_texte = "".join(reversed(parts)).replace("\n\n", "<br><br>")
|
| 77 |
+
html_text = html_texte.replace("\n", "<br>")
|
| 78 |
+
html_result = f"<div><strong>⚠️ {len(matches)} Fehler gefunden</strong><br><br>{html_text}</div>"
|
| 79 |
+
return html_result
|
| 80 |
+
|
| 81 |
def convert_document(file, method):
|
| 82 |
if method == "Docling":
|
| 83 |
result = docling_converter.convert(file.name)
|