import gradio as gr
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import (
    AcceleratorDevice,
    PdfPipelineOptions,
    AcceleratorOptions
)
import spaces
from docling.datamodel.base_models import InputFormat
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
from wordfreq import word_frequency
import tempfile
import io
import re
import os
import requests
import language_tool_python
import html
import time

_data = requests.get("https://specialist-it.de/data.json").json()
meine_orte = _data.get("meine_orte", [])


dummy = """
___|___|___|___|___|___|___|___|___|___|___|___|__
___|__<(-^,^-)=b_|___|___|___|___|___|___|___|___|
_|___|___|___|___|___|___|___|___|___|___|___|___|
___|___|___|___|___|___|___<'(o.o)'>|___|___|___|_
_|___|___|___|___|___|___|___|___|___|___|___|___|
___|___|_(-_-)zzz__|___|___|___|___|___|___|___|__
_|___|___|___|___|___|___|___|___|___|___|___|___|
___|___|___|___|___|___/ (^_^) /|___|___|___|___|_
_|___|___|___|___|___|___|___|___|___|___|___|___|
_|___|___|___|__d-(^_^)z_|___|___|___|___|___|___|
                                     DUMMYIMAGE
"""

bad_string ="""<span style="color: red; text-decoration: underline;" title="Möglicher Tippfehler gefunden. (Vorschläge: Page, Lage, Tage)">page</span>"""

css = """
#spinner_md.pending::before {
    content: "";
    display: inline-block;
    width: 20px;
    height: 20px;
    border: 3px solid #eee;
    border-top-color: #2563eb; /* Gradio Blau */
    border-radius: 50%;
    animation: spin 1s linear infinite;
    margin-right: 10px;
    vertical-align: middle;
}

@keyframes spin {
    to { transform: rotate(360deg); }
}

#spinner_md.pending::after {
    content: " Generiere Antwort...";
    font-weight: bold;
    color: #2563eb;
    vertical-align: middle;

"""

tool = language_tool_python.LanguageTool('de-DE', new_spellings=meine_orte)

def replace_markdown_images(text):
    pattern = r'!\[\]\(_page_\d+_Picture_\d+\.jpeg\)'
    replacement = '<img src="dummy.svg">'
    return re.sub(pattern, dummy, text)

def remove_hashes(text):
    return re.sub(r'#', '', text)

# Docling
accelerator_options = AcceleratorOptions(
    num_threads=8, device=AcceleratorDevice.CPU
)

pipeline_options = PdfPipelineOptions()
pipeline_options.accelerator_options = accelerator_options
pipeline_options.do_ocr = True
pipeline_options.do_table_structure = True
pipeline_options.table_structure_options.do_cell_matching = True

docling_converter = DocumentConverter(
    format_options={
        InputFormat.PDF: PdfFormatOption(
            pipeline_options=pipeline_options,
        )
    }
)

# Marker
marker_converter = PdfConverter(
    artifact_dict=create_model_dict(),
)


def check_spelling(text_input: str) -> str:
    """Prüft den Text mit LanguageTool und markiert Fundstellen in HTML."""
    
    IGNORE_RULES = {'DE_CASE'}
    
    if tool is None:
        return "❌ **Fehler:** LanguageTool konnte nicht geladen werden."
    text = text_input or ""
    if not text.strip():
        return "*Bitte lade eine Datei hoch oder füge Text ein...*"
    if text.startswith("❌") or text.startswith("Fehler"):
        return text
    
    matches = tool.check(text)
    
    # Regel-Filter
    matches = [m for m in matches if m.rule_id not in IGNORE_RULES]
    
    # Englische Wörter herausfiltern
    matches = _filter_english_words(text, matches)
    
    if not matches:
        return "<p style='color: green;'>✅ Keine Fehler gefunden!</p>"
    
    # HTML-Aufbau für die Anzeige der Fehler
    parts = []
    text_len = len(text)
    last_idx = text_len
    for match in reversed(matches):
        start = match.offset
        end = match.offset + match.error_length
        parts.append(html.escape(text[end:last_idx]))
        word = text[start:end]
        suggestions = ", ".join(match.replacements[:3]) if match.replacements else "keine Vorschläge"
        error_html = (
            f'<span style="color: red; text-decoration: underline;" '
            f'title="{html.escape(match.message)} (Vorschläge: {html.escape(suggestions)})">'
            f'{html.escape(word)}</span>'
        )
        parts.append(error_html)
        last_idx = start
    parts.append(html.escape(text[:last_idx]))
    html_texte = "".join(reversed(parts)).replace("\n\n", "<br><br>")
    html_text = html_texte.replace("\n", "<br>")
    html_result = f"<div><strong>⚠️ {len(matches)} Fehler gefunden</strong><br><br>{html_text}</div>"
    html_results = replace_markdown_images(html_result)
    return remove_hashes(html_results)


def _filter_english_words(text: str, matches: list) -> list:
    """Filtert Treffer heraus, die gültige englische Wörter sind."""
    clean_matches = []
    for m in matches:
        if m.rule_id == 'GERMAN_SPELLER_RULE':
            word = text[m.offset:m.offset + m.error_length].lower()
            if word_frequency(word, 'en') > 1e-6:
                continue
        clean_matches.append(m)
    return clean_matches

def convert_document(file):
    # load json from specialist-it.de/data.json and convert to python list. Name of python list is meine_orte.
    _data = requests.get("https://specialist-it.de/data.json").json()
    meine_orte = _data.get("meine_orte", [])
    rendered = marker_converter(file.name)
    text, _, images = text_from_rendered(rendered)
    return check_spelling(text).replace(bad_string, "")


def add_ort(neuer_ort: str) -> str:
    """Sendet einen neuen Ort per POST an das PHP-Backend."""
    neuer_ort = neuer_ort.strip()
    if not neuer_ort:
        return "⚠️ Bitte einen Ort eingeben."
    try:
        response = requests.post(
            "https://specialist-it.de/wortliste.php",
            json={"ort": neuer_ort},
            timeout=10
        )
        data = response.json()
        if response.status_code == 200 and data.get("success"):
            return f"✅ '{neuer_ort}' erfolgreich hinzugefügt. Gesamt: {data.get('anzahl_orte')} Orte."
        elif response.status_code == 409:
            return f"ℹ️ '{neuer_ort}' existiert bereits in der Liste."
        else:
            return f"❌ Fehler: {data.get('error', 'Unbekannter Fehler')}"
    except requests.exceptions.RequestException as e:
        return f"❌ Verbindungsfehler: {str(e)}"


with gr.Blocks() as app:
    gr.Markdown("# Language Tool \n")
    output_text = gr.HTML(label="Ergebnis", elem_id="spinner_md")
    file_input = gr.File(label="PDF hochladen", file_types=[".pdf"])

    convert_button = gr.Button("Convert")
    convert_button.click(
        fn=convert_document,
        inputs=[file_input],
        outputs=[output_text],
        show_progress="full"
    )

    gr.Markdown("---")
    gr.Markdown("### Begriff zur Wortliste hinzufügen (Begriff wird nicht korrigiert)")
    with gr.Row():
        ort_input = gr.Textbox(
            label="Ort",
            placeholder="z. B. Würzburg",
            scale=4
        )
        add_button = gr.Button("Hinzufügen", scale=1)
    add_status = gr.Markdown()

    add_button.click(
        fn=add_ort,
        inputs=[ort_input],
        outputs=[add_status]
    )

app.launch(debug=True, show_error=True, css=css)