Spaces:

cstr
/

spacy_de

Sleeping

App Files Files Community

cstr commited on Oct 26, 2025

Commit

398cf4f

verified ·

1 Parent(s): c31489f

Update app.py

Browse files

Files changed (1) hide show

app.py +215 -1

app.py CHANGED Viewed

	@@ -1 +1,215 @@
1	- ~~Geben~~ ~~Sie~~ ~~einen~~ ~~Text ein, um die morphologischen Details für jedes Wort zu erhalten. Um dies als API zu verwenden, klicken Sie auf den "View API"-Link unten.~~

+import gradio as gr
+import spacy
+import os
+from spacy import displacy
+# --- 1. UI Translations ---
+# A dictionary to hold all our UI text for both languages
+UI_TEXT = {
+    "de": {
+        "title": "# Deutscher NLP-Analysator (mit spaCy)",
+        "subtitle": "Geben Sie einen Text ein, um die morphologischen Details für jedes Wort zu erhalten.\n**Um dies als API zu verwenden, klicken Sie auf den \"View API\"-Link unten.**",
+        "input_label": "Deutscher Text",
+        "input_placeholder": "Die schnellen braunen Füchse...",
+        "button_text": "Analysieren",
+        "tab_graphic": "Syntaktische Analyse (Grafik)",
+        "tab_table": "Visuelle Tabelle (Tokens)",
+        "tab_json": "Roh-JSON (für API)",
+        "html_label": "Abhängigkeits-Parse",
+        "table_label": "Analyse-Ergebnisse (Tabelle)",
+        "table_headers": ["Wort", "Lemma", "POS", "Tag (detailliert)", "Morphologie", "Abhängigkeit"],
+        "json_label": "Analyse-Ergebnisse (JSON)"
+    },
+    "en": {
+        "title": "# English NLP Analyzer (with spaCy)",
+        "subtitle": "Enter any text to get the morphological details for each word.\n**To use this as an API, click the \"View API\" link at the bottom.**",
+        "input_label": "English Text",
+        "input_placeholder": "The quick brown foxes...",
+        "button_text": "Analyze",
+        "tab_graphic": "Syntactic Analysis (Graphic)",
+        "tab_table": "Visual Table (Tokens)",
+        "tab_json": "Raw JSON (for API)",
+        "html_label": "Dependency Parse",
+        "table_label": "Analysis Results (Table)",
+        "table_headers": ["Word", "Lemma", "POS", "Tag (detailed)", "Morphology", "Dependency"],
+        "json_label": "Analysis Results (JSON)"
+    }
+}
+# --- 2. Model Loading ---
+MODEL_NAMES = {
+    "de": "de_core_news_sm",
+    "en": "en_core_web_sm"
+}
+def load_model(model_name):
+    """Checks if model is installed and downloads it if not."""
+    try:
+        nlp = spacy.load(model_name)
+        print(f"{model_name} loaded successfully.")
+    except OSError:
+        print(f"{model_name} not found. Downloading...")
+        os.system(f"python -m spacy download {model_name}")
+        nlp = spacy.load(model_name)
+        print(f"{model_name} downloaded and loaded.")
+    return nlp
+# Load all models at startup and store them in a dictionary
+print("Loading models...")
+MODELS = {
+    "de": load_model(MODEL_NAMES["de"]),
+    "en": load_model(MODEL_NAMES["en"])
+}
+print("All models loaded.")
+# --- 3. The Core Processing Function ---
+def get_analysis(lang, text):
+    """
+    Processes text in the selected language and returns THREE formats:
+    1. A list of lists for the visual DataFrame.
+    2. A list of dicts for the JSON API.
+    3. An HTML string for the dependency parse visualization.
+    """
+    if not text:
+        return [], [], ""  # Return empty for all three outputs
+    # Select the correct pre-loaded model
+    lang_code = lang.lower()
+    nlp = MODELS[lang_code]
+    doc = nlp(text)
+    # 1. Data for the visual DataFrame
+    dataframe_output = []
+    # 2. Data for the JSON API
+    json_output = []
+    for token in doc:
+        # Add data for the JSON API
+        json_output.append({
+            "word": token.text,
+            "lemma": token.lemma_,
+            "pos": token.pos_,
+            "tag": token.tag_,
+            "morphology": str(token.morph),
+            "dependency": token.dep_,
+            "is_stopword": token.is_stop
+        })
+        # Add data for the visual DataFrame
+        dataframe_output.append([
+            token.text,
+            token.lemma_,
+            token.pos_,
+            token.tag_,
+            str(token.morph),
+            token.dep_
+        ])
+    # 3. Data for the HTML/DisplaCy visualization
+    options = {"compact": True, "bg": "#ffffff", "color": "#000000", "font": "Source Sans Pro"}
+    html = displacy.render(
+        doc,
+        style="dep",
+        jupyter=False,
+        options=options
+    )
+    styled_html = f"""
+    <div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; line-height: 2.5;">
+    {html}
+    </div>
+    """
+    # Return all three formats
+    return dataframe_output, json_output, styled_html
+# --- 4. UI Update Function ---
+def update_ui(lang):
+    """Updates all UI components when the language is changed."""
+    lang_code = lang.lower()
+    ui_config = UI_TEXT[lang_code]
+    # Return a dictionary mapping components to their new configurations
+    return {
+        markdown_title: gr.Markdown(value=ui_config["title"]),
+        markdown_subtitle: gr.Markdown(value=ui_config["subtitle"]),
+        text_input: gr.Textbox(
+            label=ui_config["input_label"],
+            placeholder=ui_config["input_placeholder"]
+        ),
+        analyze_button: gr.Button(value=ui_config["button_text"]),
+        tab_graphic: gr.Tab(label=ui_config["tab_graphic"]),
+        tab_table: gr.Tab(label=ui_config["tab_table"]),
+        tab_json: gr.Tab(label=ui_config["tab_json"]),
+        html_out: gr.HTML(label=ui_config["html_label"]),
+        df_out: gr.DataFrame(
+            label=ui_config["table_label"],
+            headers=ui_config["table_headers"],
+            interactive=False
+        ),
+        json_out: gr.JSON(label=ui_config["json_label"])
+    }
+# --- 5. Gradio Interface ---
+with gr.Blocks() as demo:
+    # Set default UI to German ("de")
+    default_config = UI_TEXT["de"]
+    # Language selector
+    lang_radio = gr.Radio(
+        ["DE", "EN"],
+        label="Sprache / Language",
+        value="DE"
+    )
+    markdown_title = gr.Markdown(default_config["title"])
+    markdown_subtitle = gr.Markdown(default_config["subtitle"])
+    text_input = gr.Textbox(
+        label=default_config["input_label"],
+        placeholder=default_config["input_placeholder"],
+        lines=5
+    )
+    analyze_button = gr.Button(default_config["button_text"])
+    with gr.Tabs() as tabs:
+        with gr.Tab(default_config["tab_graphic"]) as tab_graphic:
+            html_out = gr.HTML(label=default_config["html_label"])
+        with gr.Tab(default_config["tab_table"]) as tab_table:
+            df_out = gr.DataFrame(
+                label=default_config["table_label"],
+                headers=default_config["table_headers"],
+                interactive=False
+            )
+        with gr.Tab(default_config["tab_json"]) as tab_json:
+            json_out = gr.JSON(label=default_config["json_label"])
+    # --- 6. Event Listeners ---
+    # When the Analyze button is clicked
+    analyze_button.click(
+        fn=get_analysis,
+        inputs=[lang_radio, text_input],
+        outputs=[df_out, json_out, html_out],
+        api_name="get_morphology" # This API will now require 'lang' as the first input
+    )
+    # When the Language radio button is changed
+    lang_radio.change(
+        fn=update_ui,
+        inputs=lang_radio,
+        outputs=[
+            markdown_title, markdown_subtitle, text_input, analyze_button,
+            tab_graphic, tab_table, tab_json,
+            html_out, df_out, json_out
+        ]
+    )
+# Launch the app
+demo.launch()