mgokg commited on
Commit
86cbf76
·
verified ·
1 Parent(s): 3b45f27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -11,12 +11,19 @@ from marker.converters.pdf import PdfConverter
11
  from marker.models import create_model_dict
12
  from marker.output import text_from_rendered
13
 
 
 
 
 
 
 
 
 
14
  # Docling
15
  accelerator_options = AcceleratorOptions(
16
  num_threads=8, device=AcceleratorDevice.CPU
17
  )
18
 
19
-
20
  pipeline_options = PdfPipelineOptions()
21
  pipeline_options.accelerator_options = accelerator_options
22
  pipeline_options.do_ocr = True
@@ -36,6 +43,41 @@ marker_converter = PdfConverter(
36
  artifact_dict=create_model_dict(),
37
  )
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def convert_document(file, method):
40
  if method == "Docling":
41
  result = docling_converter.convert(file.name)
 
11
  from marker.models import create_model_dict
12
  from marker.output import text_from_rendered
13
 
14
+ import tempfile
15
+ import io
16
+ import re
17
+ import os
18
+ import language_tool_python
19
+ import html
20
+ tool = language_tool_python.LanguageTool('de-DE')
21
+
22
  # Docling
23
  accelerator_options = AcceleratorOptions(
24
  num_threads=8, device=AcceleratorDevice.CPU
25
  )
26
 
 
27
  pipeline_options = PdfPipelineOptions()
28
  pipeline_options.accelerator_options = accelerator_options
29
  pipeline_options.do_ocr = True
 
43
  artifact_dict=create_model_dict(),
44
  )
45
 
46
+ def check_spelling(text_input: str) -> str:
47
+ """Prüft den Text mit LanguageTool und markiert Fundstellen in HTML."""
48
+ if tool is None:
49
+ return "❌ **Fehler:** LanguageTool konnte nicht geladen werden."
50
+ text = text_input or ""
51
+ if not text.strip():
52
+ return "*Bitte lade eine Datei hoch oder füge Text ein...*"
53
+ if text.startswith("❌") or text.startswith("Fehler"):
54
+ return text
55
+ matches = tool.check(text)
56
+ if not matches:
57
+ return "<p style='color: green;'>✅ Keine Fehler gefunden!</p>"
58
+ # HTML-Aufbau für die Anzeige der Fehler
59
+ parts = []
60
+ text_len = len(text)
61
+ last_idx = text_len
62
+ for match in reversed(matches):
63
+ start = match.offset
64
+ end = match.offset + match.error_length
65
+ parts.append(html.escape(text[end:last_idx]))
66
+ word = text[start:end]
67
+ suggestions = ", ".join(match.replacements[:3]) if match.replacements else "keine Vorschläge"
68
+ error_html = (
69
+ f'<span style="color: red; text-decoration: underline;" '
70
+ f'title="{html.escape(match.message)} (Vorschläge: {html.escape(suggestions)})">'
71
+ f'{html.escape(word)}</span>'
72
+ )
73
+ parts.append(error_html)
74
+ last_idx = start
75
+ parts.append(html.escape(text[:last_idx]))
76
+ html_texte = "".join(reversed(parts)).replace("\n\n", "<br><br>")
77
+ html_text = html_texte.replace("\n", "<br>")
78
+ html_result = f"<div><strong>⚠️ {len(matches)} Fehler gefunden</strong><br><br>{html_text}</div>"
79
+ return html_result
80
+
81
  def convert_document(file, method):
82
  if method == "Docling":
83
  result = docling_converter.convert(file.name)