Spaces:

cstr
/

WiktionaryDE

Sleeping

App Files Files Community

cstr commited on about 1 month ago

Commit

55bd9f8

verified ·

1 Parent(s): 0e428c5

pretty output

Browse files

Files changed (1) hide show

app.py +374 -49

app.py CHANGED Viewed

@@ -3278,6 +3278,332 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
     }
 # ============================================================================
 # 8. GRADIO UI CREATION
 # ============================================================================
@@ -3417,47 +3743,46 @@ def create_conceptnet_tab():
 def create_combined_tab():
     """Creates the UI for the CONTEXTUAL Comprehensive Analyzer tab."""
-    gr.Markdown("# 🚀 Comprehensive Analyzer (Contextual)")
-    gr.Markdown("This tool provides a deep, **lemma-based** analysis *in context*. It integrates all tools and uses the **full sentence** to rank semantic senses by relevance.")
     with gr.Column():
         text_input = gr.Textbox(
-            label="German Text",
-            placeholder="e.g., Die schnelle Katze springt über den faulen Hund.",
             lines=5
         )
         top_n_number = gr.Number(
-            label="Limit Semantic Senses per POS (0 for all)",
-            value=0,
-            step=1,
-            minimum=0,
-            interactive=True
         )
-        analyze_button = gr.Button("Run Comprehensive Analysis", variant="primary")
-    # *** ADD STATUS OUTPUT ***
     status_output = gr.Markdown(value="", visible=True)
-    output = gr.JSON(label="Comprehensive Analysis (JSON)")
-    # *** WRAPPER FUNCTION TO FORCE REFRESH ***
-    def run_analysis_with_status(text, top_n):
         try:
-            status = "🔄 Analyzing..."
-            yield status, {}
             result = comprehensive_german_analysis(text, top_n)
-            status = f"✅ Analysis complete! Found {len(result.get('lemma_deep_dive', {}))} lemmas."
-            yield status, result
         except Exception as e:
-            error_status = f"❌ Error: {str(e)}"
-            error_result = {"error": str(e), "traceback": traceback.format_exc()}
-            yield error_status, error_result
     analyze_button.click(
-        fn=run_analysis_with_status,
         inputs=[text_input, top_n_number],
-        outputs=[status_output, output],
         api_name="comprehensive_analysis"
     )
@@ -3465,56 +3790,57 @@ def create_combined_tab():
         [["Die Katze schlafen auf dem Tisch.", 3],
          ["Das ist ein Huas.", 0],
          ["Ich laufe schnell.", 3],
-         ["Der Gärtner pflanzt einen Baum.", 5],
-         ["Ich fahre an den See.", 3]],
         inputs=[text_input, top_n_number],
-        outputs=[status_output, output],
-        fn=run_analysis_with_status,
         cache_examples=False
     )
 def create_word_encyclopedia_tab():
     """--- UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
-    gr.Markdown("# 📖 Word Encyclopedia (Non-Contextual)")
-    gr.Markdown("This tool analyzes a **single word** for *all possible* grammatical and semantic forms. It finds ambiguities (e.g., 'Lauf' as noun and verb) and groups all data by Part-of-Speech.")
     with gr.Column():
         word_input = gr.Textbox(
-            label="Single German Word",
-            placeholder="e.g., Lauf, See, schnell, heute"
         )
         with gr.Row():
             top_n_number = gr.Number(
-                label="Limit Semantic Senses per POS (0 for all)",
-                value=0,
-                step=1,
-                minimum=0,
-                interactive=True
             )
-            # --- ADD DWDSMOR TO THE RADIO BUTTONS ---
             engine_radio = gr.Radio(
-                label="Select Analysis Engine (will auto-fallback)",
                 choices=[
-                    ("Wiktionary (Default)", "wiktionary"),
-                    ("DWDSmor (New)", "dwdsmor"),
                     ("HanTa (Fallback 2)", "hanta"),
                     ("IWNLP (Fallback 3)", "iwnlp")
                 ],
                 value="wiktionary",
                 interactive=True
             )
-            # --- END OF CHANGE ---
-        analyze_button = gr.Button("Analyze Word", variant="primary")
-    output = gr.JSON(label="Word Encyclopedia Analysis (JSON)")
     analyze_button.click(
-        fn=analyze_word_encyclopedia,
         inputs=[word_input, top_n_number, engine_radio],
-        outputs=[output],
         api_name="analyze_word"
     )
@@ -3522,11 +3848,10 @@ def create_word_encyclopedia_tab():
         [["Lauf", 3, "wiktionary"],
          ["See", 0, "wiktionary"],
          ["schnell", 3, "wiktionary"],
-         ["heute", 0, "wiktionary"],
-         ["gebildet", 0, "dwdsmor"]], # Example to show the new engine
         inputs=[word_input, top_n_number, engine_radio],
-        outputs=[output],
-        fn=analyze_word_encyclopedia,
         cache_examples=False
     )

     }
+# ============================================================================
+# 7.5 VISUALIZATION & HTML HELPERS (DE)
+# ============================================================================
+HTML_CSS = """
+<style>
+    /* Card Container - High Contrast */
+    .ling-card {
+        font-family: 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
+        border: 1px solid #d1d5db;
+        border-radius: 8px;
+        padding: 20px;
+        margin-bottom: 20px;
+        background: #ffffff;
+        box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
+    }
+    /* Header Section */
+    .ling-header {
+        display: flex;
+        align-items: center;
+        margin-bottom: 15px;
+        border-bottom: 2px solid #e5e7eb;
+        padding-bottom: 10px;
+    }
+    .ling-lemma {
+        font-size: 1.8em;
+        font-weight: 800;
+        color: #111827;
+        margin-right: 12px;
+    }
+    .ling-pos {
+        font-size: 0.85em;
+        text-transform: uppercase;
+        font-weight: 700;
+        padding: 4px 10px;
+        border-radius: 6px;
+        color: #fff;
+        letter-spacing: 0.05em;
+    }
+    /* POS Colors */
+    .pos-noun { background-color: #2563eb; }       /* Blue */
+    .pos-verb { background-color: #059669; }       /* Green */
+    .pos-adj { background-color: #d97706; }        /* Amber */
+    .pos-adv { background-color: #7c3aed; }        /* Purple */
+    .pos-name { background-color: #db2777; }       /* Pink */
+    .pos-other { background-color: #4b5563; }      /* Gray */
+    /* Section Headers */
+    .ling-section { margin-top: 15px; }
+    .ling-subtitle {
+        font-size: 0.85em;
+        font-weight: 700;
+        color: #374151;
+        text-transform: uppercase;
+        margin-bottom: 8px;
+        border-left: 4px solid #3b82f6;
+        padding-left: 8px;
+    }
+    /* Tables */
+    .inflection-table { width: 100%; font-size: 0.95em; border-collapse: collapse; margin-bottom: 10px; }
+    .inflection-table td { padding: 6px 10px; border-bottom: 1px solid #e5e7eb; color: #1f2937; }
+    .inflection-label { color: #6b7280; font-weight: 600; width: 35%; background: #f9fafb; }
+    /* Senses */
+    .sense-item { margin-bottom: 8px; line-height: 1.5; font-size: 1em; color: #1f2937; }
+    .source-badge {
+        display: inline-block; font-size: 0.75em; font-weight: bold; padding: 2px 6px;
+        border-radius: 4px; border: 1px solid; margin-right: 8px; vertical-align: middle;
+    }
+    .src-wikt { background: #fff1f2; color: #9f1239; border-color: #fda4af; }
+    .src-oewn { background: #eff6ff; color: #1e40af; border-color: #93c5fd; }
+    /* Relations Chips */
+    .rel-chip {
+        display: inline-block;
+        background: #f3f4f6;
+        color: #1f2937;
+        padding: 4px 10px;
+        border-radius: 15px;
+        font-size: 0.9em;
+        margin: 3px;
+        border: 1px solid #d1d5db;
+        font-weight: 500;
+    }
+    .rel-type { color: #6b7280; font-size: 0.8em; margin-right: 4px; font-weight: 700; text-transform: lowercase;}
+    /* Collapsible */
+    .kg-details > summary {
+        cursor: pointer; color: #2563eb; font-size: 0.9em; font-weight: 600;
+        margin-top: 10px; padding: 6px; border-radius: 4px; width: fit-content;
+    }
+    .kg-details > summary:hover { text-decoration: underline; background: #eff6ff; }
+    .kg-content { margin-top: 10px; padding: 10px; background: #f9fafb; border-radius: 8px; border: 1px solid #e5e7eb; }
+    /* Alert Banners */
+    .grammar-alert { padding: 10px; border-radius: 6px; margin-bottom: 15px; border: 1px solid; }
+    .alert-green { background: #ecfdf5; color: #065f46; border-color: #a7f3d0; }
+    .alert-red { background: #fef2f2; color: #991b1b; border-color: #fecaca; }
+</style>
+"""
+def _format_word_analysis_html(data: Dict[str, Any]) -> str:
+    """ Generates HTML for a single word analysis result (German version). """
+    if not data or "analysis" not in data:
+        return f"{HTML_CSS}<div class='ling-card'>Keine Daten verfügbar. {data.get('info', '')}</div>"
+    html = HTML_CSS
+    analysis = data["analysis"]
+    # Iterate over POS
+    for pos_key, entries in analysis.items():
+        if not entries: continue
+        entry = entries[0] # Take best candidate
+        # --- POS Display Logic ---
+        display_pos = pos_key.upper()
+        css_class = "pos-other"
+        if pos_key == 'noun':
+            css_class = "pos-noun"
+            display_pos = "SUBSTANTIV"
+        elif pos_key == 'verb':
+            css_class = "pos-verb"
+            display_pos = "VERB"
+        elif pos_key == 'adj' or pos_key == 'adjective':
+            css_class = "pos-adj"
+            display_pos = "ADJEKTIV"
+        elif pos_key == 'adv' or pos_key == 'adverb':
+            css_class = "pos-adv"
+            display_pos = "ADVERB"
+        # Data Extraction
+        inf_wikt = entry.get("inflections_wiktionary") or {}
+        inf_pat = entry.get("inflections_pattern") or {}
+        sem_comb = entry.get("semantics_combined") or {}
+        lemma = inf_wikt.get("base_form") or \
+                inf_pat.get("base_form") or \
+                sem_comb.get("lemma") or \
+                data.get("input_word") or "?"
+        # --- CARD START ---
+        html += f"""
+        <div class="ling-card">
+            <div class="ling-header">
+                <span class="ling-lemma">{lemma}</span>
+                <span class="ling-pos {css_class}">{display_pos}</span>
+            </div>
+        """
+        # --- Inflections Section (Pattern.de logic) ---
+        html += "<div class='ling-section'><div class='ling-subtitle'>Morphologie & Flexion</div>"
+        html += "<table class='inflection-table'>"
+        has_pattern_data = bool(inf_pat) and "error" not in inf_pat
+        if pos_key == 'noun':
+            # Pattern.de returns 'declension' or 'declension_by_gender'
+            decl = inf_pat.get('declension')
+            # Fallback if declension is inside gender key
+            if not decl and inf_pat.get('declension_by_gender'):
+                first_gender = list(inf_pat['declension_by_gender'].keys())[0]
+                decl = inf_pat['declension_by_gender'][first_gender]
+            if decl:
+                # Extract singular/plural nominative for concise display
+                nom_sg = decl.get('Nominativ Singular', {}).get('bare', '-')
+                nom_pl = decl.get('Nominativ Plural', {}).get('bare', '-')
+                gen_sg = decl.get('Genitiv Singular', {}).get('bare', '-')
+                html += f"<tr><td class='inflection-label'>Singular (Nom)</td><td>{nom_sg}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Plural (Nom)</td><td>{nom_pl}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Genitiv (Sg)</td><td>{gen_sg}</td></tr>"
+                gender = inf_pat.get('gender', 'Unknown')
+                html += f"<tr><td class='inflection-label'>Genus</td><td>{gender}</td></tr>"
+            else:
+                html += f"<tr><td colspan='2'><i>Keine Flexionsdaten gefunden.</i></td></tr>"
+        elif pos_key == 'verb':
+            cj = inf_pat.get('conjugation') or {}
+            pres = cj.get('Präsens') or {}
+            past = cj.get('Präteritum') or {}
+            parts = inf_pat.get('participles') or {}
+            html += f"<tr><td class='inflection-label'>Infinitiv</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
+            html += f"<tr><td class='inflection-label'>3. Pers. Sg. (er/sie)</td><td>{pres.get('er/sie/es', '-')}</td></tr>"
+            html += f"<tr><td class='inflection-label'>Präteritum (ich)</td><td>{past.get('ich', '-')}</td></tr>"
+            html += f"<tr><td class='inflection-label'>Partizip II</td><td>{parts.get('Partizip Perfekt', '-')}</td></tr>"
+        elif pos_key in ['adjective', 'adj']:
+            html += f"<tr><td class='inflection-label'>Positiv</td><td>{inf_pat.get('predicative', lemma)}</td></tr>"
+            html += f"<tr><td class='inflection-label'>Komparativ</td><td>{inf_pat.get('comparative', '-')}</td></tr>"
+            html += f"<tr><td class='inflection-label'>Superlativ</td><td>{inf_pat.get('superlative', '-')}</td></tr>"
+        # Wiktionary Forms (The "Other Forms" box)
+        forms_list = inf_wikt.get("forms_list") or []
+        if forms_list:
+            forms_str_list = []
+            for f in forms_list[:8]: # Show up to 8 forms
+                txt = f.get('form_text')
+                if txt: forms_str_list.append(txt)
+            if forms_str_list:
+                html += f"<tr><td class='inflection-label'>Weitere Formen (DB)</td><td>{', '.join(forms_str_list)}</td></tr>"
+        html += "</table></div>"
+        # --- Semantics Section ---
+        html += "<div class='ling-section'><div class='ling-subtitle'>Bedeutungen & Definitionen</div>"
+        wikt_senses = sem_comb.get("wiktionary_senses") or []
+        ode_senses = sem_comb.get("odenet_senses") or []
+        if not wikt_senses and not ode_senses:
+             html += "<div class='sense-item'><i>Keine Definitionen gefunden.</i></div>"
+        for s in wikt_senses[:3]:
+            gloss_raw = s.get("definition") or ""
+            gloss = str(gloss_raw).replace(";", "<br>")
+            if gloss:
+                html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
+        for s in ode_senses[:3]:
+            defi = s.get("definition") or ""
+            if defi:
+                html += f"<div class='sense-item'><span class='source-badge src-oewn'>OdeNet</span> {defi}</div>"
+        html += "</div>"
+        # --- Relations Section ---
+        rels = sem_comb.get("conceptnet_relations") or []
+        if rels:
+            html += "<div class='ling-section'><div class='ling-subtitle'>Wissensgraph (Kontext)</div>"
+            top_n_rels = 6
+            visible_rels = rels[:top_n_rels]
+            hidden_rels = rels[top_n_rels:]
+            def render_rel(r):
+                rel_name = r.get("relation", "Rel")
+                target = r.get("other_node") or "?"
+                if target == "?" and "surface" in r:
+                      parts = str(r["surface"]).split()
+                      if len(parts) > 2: target = parts[-1]
+                return f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
+            html += "<div>"
+            for r in visible_rels:
+                html += render_rel(r)
+            html += "</div>"
+            if hidden_rels:
+                html += f"""
+                <details class='kg-details'>
+                    <summary>Zeige {len(hidden_rels)} weitere Relationen</summary>
+                    <div class='kg-content'>
+                """
+                for r in hidden_rels:
+                    html += render_rel(r)
+                html += "</div></details>"
+            html += "</div>"
+        html += "</div>" # End Card
+    return html
+def _format_comprehensive_html(data: Dict[str, Any]) -> str:
+    """ Generates HTML for the comprehensive sentence analysis. """
+    if "error" in data:
+        return f"<div style='color:red'>{data['error']}</div>"
+    html = HTML_CSS
+    # 1. Grammar Check Banner
+    gc = data.get("grammar_check", [])
+    if isinstance(gc, list) and len(gc) == 1 and gc[0].get("status") == "perfect":
+        html += "<div class='grammar-alert alert-green'><strong>✓ Grammatikprüfung:</strong> Keine offensichtlichen Fehler gefunden.</div>"
+    elif isinstance(gc, list) and gc:
+        html += "<div class='grammar-alert alert-red'><strong>⚠ Grammatik-Hinweise:</strong><br>"
+        for err in gc:
+            msg = err.get("message", "Fehler")
+            bad = err.get("incorrect_text", "")
+            html += f"• {msg} (in: '<em>{bad}</em>')<br>"
+        html += "</div>"
+    # 2. Lemma Deep Dive Accordion
+    deep_dive = data.get("lemma_deep_dive", {})
+    if not deep_dive:
+        html += "<p>Keine Tiefenanalyse verfügbar.</p>"
+    else:
+        html += "<h3>Wort-für-Wort Analyse</h3>"
+        for lemma, details in deep_dive.items():
+            # Reconstruct a simplified data structure to reuse the word-formatter
+            html += f"<details><summary>{lemma}</summary>"
+            inflections = details.get("inflection_analysis", {})
+            semantics = details.get("semantic_analysis", {})
+            # Guess the POS keys present
+            all_keys = set([k.split('_')[0] for k in inflections.keys()])
+            reconstructed_data = {"analysis": {}}
+            for pos in all_keys:
+                entry = {
+                    "inflections_wiktionary": inflections.get(f"{pos}_wiktionary"),
+                    "inflections_pattern": inflections.get(f"{pos}_pattern"),
+                    "semantics_combined": {
+                        "lemma": lemma,
+                        "wiktionary_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'wiktionary'],
+                        "odenet_senses": [s for s in semantics.get(f"{pos}_senses", []) if s.get('source') == 'odenet'],
+                        "conceptnet_relations": semantics.get("conceptnet_relations", [])
+                    }
+                }
+                reconstructed_data["analysis"][pos] = [entry]
+            html += _format_word_analysis_html(reconstructed_data)
+            html += "</details>"
+    return html
 # ============================================================================
 # 8. GRADIO UI CREATION
 # ============================================================================
 def create_combined_tab():
     """Creates the UI for the CONTEXTUAL Comprehensive Analyzer tab."""
+    gr.Markdown("# 🚀 Umfassende Analyse (Kontextuell)")
+    gr.Markdown("Dieses Tool bietet eine tiefe, **lemma-basierte** Analyse *im Kontext*. Es integriert alle Tools und nutzt den **ganzen Satz**, um Bedeutungen nach Relevanz zu sortieren.")
     with gr.Column():
         text_input = gr.Textbox(
+            label="Deutscher Text",
+            placeholder="z.B., Die schnelle Katze springt über den faulen Hund.",
             lines=5
         )
         top_n_number = gr.Number(
+            label="Limit semantische Bedeutungen pro POS (0 für alle)",
+            value=0, step=1, minimum=0, interactive=True
         )
+        analyze_button = gr.Button("Umfassende Analyse starten", variant="primary")
     status_output = gr.Markdown(value="", visible=True)
+    # --- NEW: Visual Output ---
+    html_output = gr.HTML(label="Visueller Bericht")
+    json_output = gr.JSON(label="Rohdaten (JSON)")
+    def run_analysis_with_status_visual(text, top_n):
         try:
+            status = "🔄 Analyse läuft..."
+            yield status, "", {}
             result = comprehensive_german_analysis(text, top_n)
+            # Generate HTML
+            html = _format_comprehensive_html(result)
+            status = f"✅ Analyse abgeschlossen! {len(result.get('lemma_deep_dive', {}))} Lemmata analysiert."
+            yield status, html, result
         except Exception as e:
+            error_status = f"❌ Fehler: {str(e)}"
+            yield error_status, f"<div style='color:red'>{str(e)}</div>", {"error": str(e), "traceback": traceback.format_exc()}
     analyze_button.click(
+        fn=run_analysis_with_status_visual,
         inputs=[text_input, top_n_number],
+        outputs=[status_output, html_output, json_output],
         api_name="comprehensive_analysis"
     )
         [["Die Katze schlafen auf dem Tisch.", 3],
          ["Das ist ein Huas.", 0],
          ["Ich laufe schnell.", 3],
+         ["Der Gärtner pflanzt einen Baum.", 5]],
         inputs=[text_input, top_n_number],
+        outputs=[status_output, html_output, json_output],
+        fn=run_analysis_with_status_visual,
         cache_examples=False
     )
 def create_word_encyclopedia_tab():
     """--- UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
+    gr.Markdown("# 📖 Wort-Enzyklopädie (Nicht-Kontextuell)")
+    gr.Markdown("Analysiert ein **einzelnes Wort** auf alle grammatikalischen und semantischen Formen.")
     with gr.Column():
         word_input = gr.Textbox(
+            label="Einzelnes deutsches Wort",
+            placeholder="z.B., Lauf, See, schnell, heute"
         )
         with gr.Row():
             top_n_number = gr.Number(
+                label="Limit semantische Bedeutungen pro POS (0 für alle)",
+                value=0, step=1, minimum=0, interactive=True
             )
             engine_radio = gr.Radio(
+                label="Wähle Analyse-Engine (Automatischer Fallback)",
                 choices=[
+                    ("Wiktionary (Standard)", "wiktionary"),
+                    ("DWDSmor (Neu)", "dwdsmor"),
                     ("HanTa (Fallback 2)", "hanta"),
                     ("IWNLP (Fallback 3)", "iwnlp")
                 ],
                 value="wiktionary",
                 interactive=True
             )
+        analyze_button = gr.Button("Wort analysieren", variant="primary")
+    # --- NEW: Visual Output ---
+    html_output = gr.HTML(label="Visueller Bericht")
+    json_output = gr.JSON(label="Analyse Rohdaten (JSON)")
+    def run_word_visual(word, top_n, engine):
+        data = analyze_word_encyclopedia(word, top_n, engine)
+        html = _format_word_analysis_html(data)
+        return html, data
     analyze_button.click(
+        fn=run_word_visual,
         inputs=[word_input, top_n_number, engine_radio],
+        outputs=[html_output, json_output],
         api_name="analyze_word"
     )
         [["Lauf", 3, "wiktionary"],
          ["See", 0, "wiktionary"],
          ["schnell", 3, "wiktionary"],
+         ["gebildet", 0, "dwdsmor"]],
         inputs=[word_input, top_n_number, engine_radio],
+        outputs=[html_output, json_output],
+        fn=run_word_visual,
         cache_examples=False
     )