Spaces:

cstr
/

WiktionaryDE

Sleeping

App Files Files Community

cstr commited on 25 days ago

Commit

95d2426

verified ·

1 Parent(s): 55bd9f8

fix wiktionary ground truth overrides pattern guesses

Browse files

Files changed (1) hide show

app.py +460 -235

app.py CHANGED Viewed

@@ -951,74 +951,91 @@ def pattern_is_good_analysis(analysis, analysis_type):
 # --- Inflection Generators ---
-def pattern_analyze_as_noun(word: str, hint_lemma: str = None) -> Dict[str, Any]:
-    """Comprehensive noun inflection analysis."""
-    log(f"  Analyzing as noun (hint_lemma={hint_lemma})")
     analysis = {}
     singular = singularize(word)
     plural = pluralize(word)
-    log(f"    singularize({word}) = {singular}")
-    log(f"    pluralize({word}) = {plural}")
     if plural != word and singular != word:
         base = word
-        log(f"    Word changes when pluralized => base = {base}")
     elif singular != word:
         base = singular
-        log(f"    Word changes when singularized => base = {base}")
     elif hint_lemma and hint_lemma != word:
         base = hint_lemma
-        log(f"    Using hint lemma => base = {base}")
     else:
-        # This is a valid case, e.g. "Lauf" (singular)
         base = word
-        log(f"    Word is already base form => base = {base}")
-    g = gender(base, pos=NOUN)
-    log(f"    gender({base}) = {g}")
-    # --- AMBIGUITY HANDLING for Nouns (e.g., der/das See) ---
-    if isinstance(g, tuple):
-        genders = list(g)
-        log(f"    Detected ambiguous gender: {genders}")
-    elif g is None:
-        genders = [MALE] # Default
-        log(f"    Gender unknown, defaulting to MALE")
     else:
-        genders = [g]
     analysis["base_form"] = base
     analysis["plural"] = pluralize(base)
     analysis["singular"] = base
     analysis["declension_by_gender"] = {}
     for gen in genders:
         gender_str = {MALE: "Masculine", FEMALE: "Feminine", NEUTRAL: "Neuter"}.get(gen, "Unknown")
         gen_declension = {}
         for number, number_name in [(SINGULAR, "Singular"), (PLURAL, "Plural")]:
             word_form = base if number == SINGULAR else pluralize(base)
             word_form_cap = word_form.capitalize()
             gender_for_article = gen if number == SINGULAR else PLURAL
-            for case, case_name in [(NOMINATIVE, "Nominativ"), (ACCUSATIVE, "Akkusativ"),
                                     (DATIVE, "Dativ"), (GENITIVE, "Genitiv")]:
                 try:
                     def_art = article(word_form, DEFINITE, gender_for_article, case)
                     indef_art = article(word_form, INDEFINITE, gender_for_article, case)
                     indef_form = f"{indef_art} {word_form_cap}" if indef_art else word_form_cap
-                    if number == PLURAL:
-                        indef_form = "—"
                     gen_declension[f"{case_name} {number_name}"] = {
-                        "definite": f"{def_art} {word_form_cap}" if def_art else word_form_cap,
                         "indefinite": indef_form,
-                        "bare": word_form_cap
                     }
                 except Exception as e:
                     log(f"    Failed to get article for {gender_str}/{case_name} {number_name}: {e}")
         analysis["declension_by_gender"][gender_str] = gen_declension
-    log(f"    Generated declensions for {len(genders)} gender(s)")
     if len(genders) == 1:
-        analysis["declension"] = analysis["declension_by_gender"][list(analysis["declension_by_gender"].keys())[0]]
-        analysis["gender"] = list(analysis["declension_by_gender"].keys())[0]
     return analysis
@@ -2174,9 +2191,11 @@ def _wiktionary_format_semantics_block(
 def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
     """
-    (PRIMARY ENGINE) Analyzes a word using the Wiktionary DB.
-    Returns {} on failure to signal dispatcher to fall back.
     """
     final_result: Dict[str, Any] = {
         "input_word": word,
         "analysis": {}
@@ -2184,7 +2203,8 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
     conn = wiktionary_get_connection()
     if not conn:
-        return {} # Return empty dict to signal failure
     # --- 1. GET SPACY/IWNLP HINT FOR PRIORITIZATION ---
     spacy_pos_hint = None
@@ -2204,44 +2224,37 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
                 else: spacy_pos_hint = spacy_pos_raw
                 spacy_lemma_hint = token.lemma_
-                log(f"[DEBUG] Wiktionary Priority Hint: spaCy POS is '{spacy_pos_hint}', lemma is '{spacy_lemma_hint}'")
         except Exception as e:
-            log(f"[DEBUG] Wiktionary Priority Hint: spaCy/IWNLP failed: {e}")
     # --- 2. FIND ALL WIKTIONARY ENTRIES ---
     try:
         wiktionary_reports = _wiktionary_find_all_entries(word, conn)
     except Exception as e:
         log(f"[DEBUG] Wiktionary query failed: {e}")
-        return {} # Signal failure
     if not wiktionary_reports:
-        return {} # No results, signal to fallback
     # --- 3. PRIORITIZE/SORT THE WIKTIONARY ENTRIES ---
     def get_priority_score(report):
         wikt_pos = _wiktionary_map_pos_key(report.get("pos"))
         wikt_lemma = report.get("lemma")
         # Priority 1: Exact POS match with spaCy hint
         if spacy_pos_hint and wikt_pos == spacy_pos_hint:
-            # Bonus if lemma also matches
-            if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint:
-                return 1
             return 2
-        # Priority 2: Input word is the lemma (e.g., "Haus" -> "Haus")
-        if wikt_lemma.lower() == word.lower():
-            return 3
-        # Priority 3: Other inflected forms (e.g. "gehe" -> "gehen")
         return 4
     wiktionary_reports.sort(key=get_priority_score)
-    log(f"[DEBUG] Wiktionary: Sorted entries: {[r.get('lemma') + ' (' + r.get('pos') + ')' for r in wiktionary_reports]}")
-    # --- 4. BUILD AND *VALIDATE* THE FINAL REPORT (PATH-PURE) ---
     word_lower = word.lower()
     for wikt_report in wiktionary_reports:
@@ -2249,46 +2262,133 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
         lemma = wikt_report.get("lemma", word)
         pos_title = wikt_report.get("pos_title", "")
-        # --- A. Build Wiktionary Inflection Block ---
         inflections_wikt_block = {
             "base_form": lemma,
-            "forms_list": wikt_report.get("forms", []),
             "source": "wiktionary"
         }
-        # --- B. Build Pattern Inflection Block (CRITICAL for finding true lemma) ---
         pattern_block = {}
         if PATTERN_DE_AVAILABLE:
             try:
                 if pos_key == "noun" or "Substantiv" in pos_title:
-                    pattern_block = pattern_analyze_as_noun(lemma)
-                elif pos_key == "verb" or "Verb" in pos_title or "Konjugierte Form" in pos_title:
-                    # Use the *input word* for inflected forms to find the right lemma
-                    if "Konjugierte Form" in pos_title:
-                        pattern_block = pattern_analyze_as_verb(word)
                     else:
-                        pattern_block = pattern_analyze_as_verb(lemma)
                 elif pos_key == "adjective" or "Adjektiv" in pos_title or "Deklinierte Form" in pos_title:
-                    # Use the *input word* for inflected forms
-                    if "Deklinierte Form" in pos_title:
-                         pattern_block = pattern_analyze_as_adjective(word)
-                    else:
-                         pattern_block = pattern_analyze_as_adjective(lemma)
                 elif pos_key == "adverb":
                     pattern_block = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
             except Exception as e:
-                pattern_block = {"error": f"Pattern.de analysis for {pos_key}('{lemma}') failed: {e}"}
-        # --- C. Build Semantics Block (using correct lemma from pattern_block) ---
         semantics_block = _wiktionary_format_semantics_block(wikt_report, pattern_block, top_n)
-        # --- D. Assemble the report (pre-validation) ---
         pos_entry_report = {
             "inflections_wiktionary": inflections_wikt_block,
-            "inflections_pattern": pattern_block,
             "semantics_combined": semantics_block,
             "wiktionary_metadata": {
-                # --- Original Fields ---
                 "pos_title": pos_title,
                 "etymology": wikt_report.get("etymology_text"),
                 "pronunciation": wikt_report.get("sounds"),
@@ -2296,8 +2396,7 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
                 "examples": [ex for s in wikt_report.get("senses", []) for ex in s.get("examples", [])],
                 "entry_tags": wikt_report.get("entry_tags"),
                 "entry_categories": wikt_report.get("entry_categories"),
-                # Pass through all new fields from the full DB ---
                 "entry_notes": wikt_report.get("entry_notes"),
                 "other_pos": wikt_report.get("other_pos"),
                 "raw_tags": wikt_report.get("raw_tags"),
@@ -2307,50 +2406,46 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
                 "holonyms": wikt_report.get("holonyms"),
                 "meronyms": wikt_report.get("meronyms"),
                 "coordinate_terms": wikt_report.get("coordinate_terms"),
-                # We are now correctly getting the data we queried earlier.
                 "expressions": wikt_report.get("expressions"),
                 "proverbs": wikt_report.get("proverbs")
             }
         }
-        # --- E. VALIDATION FILTER (REVISED LOGIC) ---
         is_valid = False
         is_inflected_entry = "Konjugierte Form" in pos_title or "Deklinierte Form" in pos_title
-        # Check 1: Is the input word the lemma?
-        # This is true for base form entries (e.g., "Haus" -> "Haus (Substantiv)")
-        # AND for inflected form entries (e.g., "gießt" -> "gießt (Konjugierte Form)")
         if lemma.lower() == word_lower:
             is_valid = True
-            log(f"[DEBUG] Wiktionary: KEEPING entry '{lemma}' ({pos_key}) because input word matches entry lemma.")
-        # Check 2: Is the input word in the *bare* forms list?
-        # (This applies to base entries where the input is an inflection, e.g., "gießt" -> "gehen (Verb)")
-        # We only run this if Check 1 failed AND this is not an inflected entry (which have no forms)
         if not is_valid and not is_inflected_entry:
-            for form_entry in inflections_wikt_block.get("forms_list", []):
                 form_text = form_entry.get("form_text", "")
-                bare_form = re.sub(r"\(.*\)", "", form_text).strip()
-                bare_form = re.sub(r"^(der|die|das|ein|eine|am)\s+", "", bare_form, flags=re.IGNORECASE).strip()
-                bare_form = bare_form.rstrip("!.")
-                if bare_form.lower() == word_lower:
                     is_valid = True
-                    log(f"[DEBUG] Wiktionary: KEEPING entry '{lemma}' ({pos_key}) because input word found in form: '{form_text}'")
                     break
-        # --- F. Add to final result if valid ---
         if is_valid:
             if pos_key not in final_result["analysis"]:
                 final_result["analysis"][pos_key] = []
             final_result["analysis"][pos_key].append(pos_entry_report)
         else:
-             log(f"[DEBUG] Wiktionary: DROPPING entry '{lemma}' ({pos_key}, {pos_title}) because input word '{word}' was not found in its valid forms.")
-    # --- END OF VALIDATION ---
-    final_result["info"] = f"Analysis from Wiktionary (Primary Engine). Found {len(wiktionary_reports)} matching entries, kept {sum(len(v) for v in final_result.get('analysis', {}).values())}."
     return final_result
 # ============================================================================
@@ -3383,172 +3478,302 @@ HTML_CSS = """
 """
 def _format_word_analysis_html(data: Dict[str, Any]) -> str:
-    """ Generates HTML for a single word analysis result (German version). """
     if not data or "analysis" not in data:
         return f"{HTML_CSS}<div class='ling-card'>Keine Daten verfügbar. {data.get('info', '')}</div>"
     html = HTML_CSS
     analysis = data["analysis"]
-    # Iterate over POS
     for pos_key, entries in analysis.items():
         if not entries: continue
-        entry = entries[0] # Take best candidate
-        # --- POS Display Logic ---
-        display_pos = pos_key.upper()
-        css_class = "pos-other"
-        if pos_key == 'noun':
-            css_class = "pos-noun"
-            display_pos = "SUBSTANTIV"
-        elif pos_key == 'verb':
-            css_class = "pos-verb"
-            display_pos = "VERB"
-        elif pos_key == 'adj' or pos_key == 'adjective':
-            css_class = "pos-adj"
-            display_pos = "ADJEKTIV"
-        elif pos_key == 'adv' or pos_key == 'adverb':
-            css_class = "pos-adv"
-            display_pos = "ADVERB"
-        # Data Extraction
-        inf_wikt = entry.get("inflections_wiktionary") or {}
-        inf_pat = entry.get("inflections_pattern") or {}
-        sem_comb = entry.get("semantics_combined") or {}
-        lemma = inf_wikt.get("base_form") or \
-                inf_pat.get("base_form") or \
-                sem_comb.get("lemma") or \
-                data.get("input_word") or "?"
-        # --- CARD START ---
-        html += f"""
-        <div class="ling-card">
-            <div class="ling-header">
-                <span class="ling-lemma">{lemma}</span>
-                <span class="ling-pos {css_class}">{display_pos}</span>
-            </div>
-        """
-        # --- Inflections Section (Pattern.de logic) ---
-        html += "<div class='ling-section'><div class='ling-subtitle'>Morphologie & Flexion</div>"
-        html += "<table class='inflection-table'>"
-        has_pattern_data = bool(inf_pat) and "error" not in inf_pat
-        if pos_key == 'noun':
-            # Pattern.de returns 'declension' or 'declension_by_gender'
-            decl = inf_pat.get('declension')
-            # Fallback if declension is inside gender key
-            if not decl and inf_pat.get('declension_by_gender'):
-                first_gender = list(inf_pat['declension_by_gender'].keys())[0]
-                decl = inf_pat['declension_by_gender'][first_gender]
-            if decl:
-                # Extract singular/plural nominative for concise display
-                nom_sg = decl.get('Nominativ Singular', {}).get('bare', '-')
-                nom_pl = decl.get('Nominativ Plural', {}).get('bare', '-')
-                gen_sg = decl.get('Genitiv Singular', {}).get('bare', '-')
-                html += f"<tr><td class='inflection-label'>Singular (Nom)</td><td>{nom_sg}</td></tr>"
-                html += f"<tr><td class='inflection-label'>Plural (Nom)</td><td>{nom_pl}</td></tr>"
-                html += f"<tr><td class='inflection-label'>Genitiv (Sg)</td><td>{gen_sg}</td></tr>"
-                gender = inf_pat.get('gender', 'Unknown')
-                html += f"<tr><td class='inflection-label'>Genus</td><td>{gender}</td></tr>"
-            else:
-                html += f"<tr><td colspan='2'><i>Keine Flexionsdaten gefunden.</i></td></tr>"
-        elif pos_key == 'verb':
-            cj = inf_pat.get('conjugation') or {}
-            pres = cj.get('Präsens') or {}
-            past = cj.get('Präteritum') or {}
-            parts = inf_pat.get('participles') or {}
-            html += f"<tr><td class='inflection-label'>Infinitiv</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
-            html += f"<tr><td class='inflection-label'>3. Pers. Sg. (er/sie)</td><td>{pres.get('er/sie/es', '-')}</td></tr>"
-            html += f"<tr><td class='inflection-label'>Präteritum (ich)</td><td>{past.get('ich', '-')}</td></tr>"
-            html += f"<tr><td class='inflection-label'>Partizip II</td><td>{parts.get('Partizip Perfekt', '-')}</td></tr>"
-        elif pos_key in ['adjective', 'adj']:
-            html += f"<tr><td class='inflection-label'>Positiv</td><td>{inf_pat.get('predicative', lemma)}</td></tr>"
-            html += f"<tr><td class='inflection-label'>Komparativ</td><td>{inf_pat.get('comparative', '-')}</td></tr>"
-            html += f"<tr><td class='inflection-label'>Superlativ</td><td>{inf_pat.get('superlative', '-')}</td></tr>"
-        # Wiktionary Forms (The "Other Forms" box)
-        forms_list = inf_wikt.get("forms_list") or []
-        if forms_list:
-            forms_str_list = []
-            for f in forms_list[:8]: # Show up to 8 forms
-                txt = f.get('form_text')
-                if txt: forms_str_list.append(txt)
-            if forms_str_list:
-                html += f"<tr><td class='inflection-label'>Weitere Formen (DB)</td><td>{', '.join(forms_str_list)}</td></tr>"
-        html += "</table></div>"
-        # --- Semantics Section ---
-        html += "<div class='ling-section'><div class='ling-subtitle'>Bedeutungen & Definitionen</div>"
-        wikt_senses = sem_comb.get("wiktionary_senses") or []
-        ode_senses = sem_comb.get("odenet_senses") or []
-        if not wikt_senses and not ode_senses:
-             html += "<div class='sense-item'><i>Keine Definitionen gefunden.</i></div>"
-        for s in wikt_senses[:3]:
-            gloss_raw = s.get("definition") or ""
-            gloss = str(gloss_raw).replace(";", "<br>")
-            if gloss:
-                html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
-        for s in ode_senses[:3]:
-            defi = s.get("definition") or ""
-            if defi:
-                html += f"<div class='sense-item'><span class='source-badge src-oewn'>OdeNet</span> {defi}</div>"
-        html += "</div>"
-        # --- Relations Section ---
-        rels = sem_comb.get("conceptnet_relations") or []
-        if rels:
-            html += "<div class='ling-section'><div class='ling-subtitle'>Wissensgraph (Kontext)</div>"
-            top_n_rels = 6
-            visible_rels = rels[:top_n_rels]
-            hidden_rels = rels[top_n_rels:]
-            def render_rel(r):
-                rel_name = r.get("relation", "Rel")
-                target = r.get("other_node") or "?"
-                if target == "?" and "surface" in r:
-                      parts = str(r["surface"]).split()
-                      if len(parts) > 2: target = parts[-1]
-                return f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
-            html += "<div>"
-            for r in visible_rels:
-                html += render_rel(r)
             html += "</div>"
-            if hidden_rels:
-                html += f"""
-                <details class='kg-details'>
-                    <summary>Zeige {len(hidden_rels)} weitere Relationen</summary>
-                    <div class='kg-content'>
-                """
-                for r in hidden_rels:
-                    html += render_rel(r)
-                html += "</div></details>"
-            html += "</div>"
-        html += "</div>" # End Card
     return html
 def _format_comprehensive_html(data: Dict[str, Any]) -> str:
     """ Generates HTML for the comprehensive sentence analysis. """
     if "error" in data:

 # --- Inflection Generators ---
+def pattern_analyze_as_noun(word: str, hint_lemma: str = None, fixed_gender: int = None) -> Dict[str, Any]:
+    """
+    Comprehensive noun inflection analysis.
+    Args:
+        hint_lemma: A lemma suggestion to help Pattern.
+        fixed_gender: A pattern.de constant (MALE, FEMALE, NEUTRAL) to FORCE a specific gender.
+    """
+    log(f"   Analyzing as noun (hint_lemma={hint_lemma}, fixed_gender={fixed_gender})")
     analysis = {}
+    # 1. Determine Base Form
     singular = singularize(word)
     plural = pluralize(word)
     if plural != word and singular != word:
         base = word
     elif singular != word:
         base = singular
     elif hint_lemma and hint_lemma != word:
         base = hint_lemma
     else:
         base = word
+    # 2. Determine Gender
+    # If Wiktionary gave us a gender, USE IT. Ignore Pattern's internal dictionary.
+    if fixed_gender is not None:
+        genders = [fixed_gender]
+        log(f"   [Pattern] Enforcing gender from DB: {fixed_gender}")
     else:
+        # Fallback to auto-detection
+        g = gender(base, pos=NOUN)
+        if isinstance(g, tuple):
+            genders = list(g)
+        elif g is None:
+            genders = [MALE]
+        else:
+            genders = [g]
     analysis["base_form"] = base
     analysis["plural"] = pluralize(base)
     analysis["singular"] = base
     analysis["declension_by_gender"] = {}
+    # 3. Generate Declensions
     for gen in genders:
         gender_str = {MALE: "Masculine", FEMALE: "Feminine", NEUTRAL: "Neuter"}.get(gen, "Unknown")
         gen_declension = {}
         for number, number_name in [(SINGULAR, "Singular"), (PLURAL, "Plural")]:
             word_form = base if number == SINGULAR else pluralize(base)
             word_form_cap = word_form.capitalize()
             gender_for_article = gen if number == SINGULAR else PLURAL
+            for case, case_name in [(NOMINATIVE, "Nominativ"), (ACCUSATIVE, "Akkusativ"),
                                     (DATIVE, "Dativ"), (GENITIVE, "Genitiv")]:
                 try:
                     def_art = article(word_form, DEFINITE, gender_for_article, case)
                     indef_art = article(word_form, INDEFINITE, gender_for_article, case)
                     indef_form = f"{indef_art} {word_form_cap}" if indef_art else word_form_cap
+                    if number == PLURAL: indef_form = "—"
+                    # Fix for Pattern sometimes missing Genitive 's' suffix on Masculine/Neuter
+                    noun_text = word_form_cap
+                    if number == SINGULAR and case == GENITIVE and gen in [MALE, NEUTRAL] and not noun_text.endswith("s") and not noun_text.endswith("x") and not noun_text.endswith("z"):
+                         # Simple heuristic fix: German Genitive usually adds 's' or 'es'
+                         # Pattern handles this usually, but if we force gender on a word Pattern doesn't know, it might miss it.
+                         # For safety, we trust Pattern's output, but if you find Pattern fails here, you inject logic here.
+                         pass
                     gen_declension[f"{case_name} {number_name}"] = {
+                        "definite": f"{def_art} {noun_text}" if def_art else noun_text,
                         "indefinite": indef_form,
+                        "bare": noun_text
                     }
                 except Exception as e:
                     log(f"    Failed to get article for {gender_str}/{case_name} {number_name}: {e}")
         analysis["declension_by_gender"][gender_str] = gen_declension
+    # Flatten for the main keys if only one gender exists
     if len(genders) == 1:
+        first_gen_key = list(analysis["declension_by_gender"].keys())[0]
+        analysis["declension"] = analysis["declension_by_gender"][first_gen_key]
+        analysis["gender"] = first_gen_key
     return analysis
 def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
     """
+    (PRIMARY ENGINE) Analyzes a word using the Wiktionary DB as Ground Truth,
+    filling in missing gaps with Pattern.de generation.
     """
+    print(f"\n[Wiktionary Engine] Starting analysis for: {word}")
     final_result: Dict[str, Any] = {
         "input_word": word,
         "analysis": {}
     conn = wiktionary_get_connection()
     if not conn:
+        log("[Wiktionary Engine] No DB connection available.")
+        return {}
     # --- 1. GET SPACY/IWNLP HINT FOR PRIORITIZATION ---
     spacy_pos_hint = None
                 else: spacy_pos_hint = spacy_pos_raw
                 spacy_lemma_hint = token.lemma_
+                log(f"[DEBUG] Priority Hint: spaCy POS='{spacy_pos_hint}', Lemma='{spacy_lemma_hint}'")
         except Exception as e:
+            log(f"[DEBUG] Priority Hint failed: {e}")
     # --- 2. FIND ALL WIKTIONARY ENTRIES ---
     try:
         wiktionary_reports = _wiktionary_find_all_entries(word, conn)
     except Exception as e:
         log(f"[DEBUG] Wiktionary query failed: {e}")
+        return {}
     if not wiktionary_reports:
+        log(f"[DEBUG] No Wiktionary entries found for '{word}'.")
+        return {}
     # --- 3. PRIORITIZE/SORT THE WIKTIONARY ENTRIES ---
     def get_priority_score(report):
         wikt_pos = _wiktionary_map_pos_key(report.get("pos"))
         wikt_lemma = report.get("lemma")
         # Priority 1: Exact POS match with spaCy hint
         if spacy_pos_hint and wikt_pos == spacy_pos_hint:
+            if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint: return 1
             return 2
+        # Priority 2: Input word is the lemma
+        if wikt_lemma and wikt_lemma.lower() == word.lower(): return 3
         return 4
     wiktionary_reports.sort(key=get_priority_score)
+    log(f"[DEBUG] Sorted {len(wiktionary_reports)} entries: {[r.get('lemma') + ' (' + r.get('pos') + ')' for r in wiktionary_reports]}")
+    # --- 4. PROCESS ENTRIES (HYBRID STRATEGY) ---
     word_lower = word.lower()
     for wikt_report in wiktionary_reports:
         lemma = wikt_report.get("lemma", word)
         pos_title = wikt_report.get("pos_title", "")
+        log(f"\n--- Processing Entry: {lemma} ({pos_key}) ---")
+        # --- A. Raw Wiktionary Forms (Ground Truth) ---
+        wikt_forms_list = wikt_report.get("forms", [])
         inflections_wikt_block = {
             "base_form": lemma,
+            "forms_list": wikt_forms_list,
             "source": "wiktionary"
         }
+        # --- B. Generate Base Pattern Template (The Scaffold) ---
+        # We ALWAYS generate this if Pattern is available, to provide the table structure.
         pattern_block = {}
         if PATTERN_DE_AVAILABLE:
             try:
+                log(f"[DEBUG] Generating Pattern.de base template for '{lemma}' ({pos_key})...")
                 if pos_key == "noun" or "Substantiv" in pos_title:
+                    # Gender-Aware Generation
+                    wikt_tags = wikt_report.get("entry_tags", [])
+                    forced_gender = _map_wikt_gender_to_pattern(wikt_tags)
+                    if forced_gender:
+                        log(f"[DEBUG] Context: Forcing Pattern gender to {forced_gender} based on Wiktionary tags.")
                     else:
+                        log(f"[DEBUG] Context: No gender tags in Wiktionary. Letting Pattern auto-detect.")
+                    pattern_block = pattern_analyze_as_noun(lemma, fixed_gender=forced_gender)
+                elif pos_key == "verb" or "Verb" in pos_title or "Konjugierte Form" in pos_title:
+                    use_word = word if "Konjugierte Form" in pos_title else lemma
+                    pattern_block = pattern_analyze_as_verb(use_word)
                 elif pos_key == "adjective" or "Adjektiv" in pos_title or "Deklinierte Form" in pos_title:
+                    use_word = word if "Deklinierte Form" in pos_title else lemma
+                    pattern_block = pattern_analyze_as_adjective(use_word)
                 elif pos_key == "adverb":
                     pattern_block = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
             except Exception as e:
+                log(f"[ERROR] Pattern.de generation failed: {e}")
+                pattern_block = {"error": f"Pattern.de failed: {e}"}
+        # --- C. THE HYBRID MERGE: Overwrite Pattern data with Wiktionary Truth ---
+        # logic: If Wiktionary has a form for a specific slot, use it.
+        # If not, keep the Pattern generated form (thereby filling the gap).
+        if pattern_block and "error" not in pattern_block and wikt_forms_list:
+            log(f"[DEBUG] Starting Hybrid Merge (Wiktionary forms: {len(wikt_forms_list)})...")
+            overwrites_count = 0
+            for wikt_form in wikt_forms_list:
+                text = wikt_form.get("form_text")
+                tags = wikt_form.get("tags")
+                if not text or not tags: continue
+                # Map Wikt tags to the address inside pattern_block
+                path_keys = _map_wikt_form_to_pattern_keys(pos_key, tags)
+                if path_keys:
+                    # Navigate to the slot in pattern_block
+                    target = pattern_block
+                    # Special handling for Noun structure (declension_by_gender)
+                    if pos_key == "noun" and "declension_by_gender" in pattern_block:
+                        # We apply the overwrite to ALL genders present in the pattern block
+                        # (Usually only 1 if we forced it, but maybe more if ambiguous)
+                        for gender_key in pattern_block["declension_by_gender"]:
+                            # path_keys[0] is e.g. "Nominativ Singular"
+                            slot_key = path_keys[0]
+                            target_dict = pattern_block["declension_by_gender"][gender_key]
+                            if slot_key in target_dict:
+                                # Noun slots have subkeys: 'bare', 'definite', 'indefinite'
+                                # Wiktionary usually gives the form with article "der See" or without "Seen"
+                                # We try to be smart about updating 'bare' vs 'definite'
+                                current_bare = target_dict[slot_key].get('bare', '')
+                                # Simple clean: remove articles to get bare
+                                clean_text = re.sub(r"^(der|die|das|den|dem|des|ein|eine|einen|einem|einer|eines)\s+", "", text, flags=re.IGNORECASE).strip()
+                                if clean_text != current_bare:
+                                    log(f"[DEBUG] Merge: Overwriting {gender_key} -> {slot_key} | Old: '{current_bare}' -> New: '{clean_text}' (Source: Wiktionary)")
+                                    target_dict[slot_key]['bare'] = clean_text
+                                    # Also update full forms if possible
+                                    if "definite" in target_dict[slot_key]:
+                                        # We can reconstruct definite if we know the article, but let's just trust the bare text update
+                                        # because the HTML renderer often rebuilds the article.
+                                        # However, let's update 'definite' if the wikt text looks like it has an article
+                                        if " " in text:
+                                             target_dict[slot_key]['definite'] = text
+                                    overwrites_count += 1
+                    # Handling for Verbs/Adjectives (Nested Dicts)
+                    else:
+                        # Navigate deep
+                        valid_path = True
+                        for key in path_keys[:-1]:
+                            if key in target:
+                                target = target[key]
+                            else:
+                                valid_path = False
+                                break
+                        if valid_path:
+                            last_key = path_keys[-1]
+                            if last_key in target and target[last_key] != text:
+                                log(f"[DEBUG] Merge: Overwriting {path_keys} | Old: '{target[last_key]}' -> New: '{text}' (Source: Wiktionary)")
+                                target[last_key] = text
+                                overwrites_count += 1
+            log(f"[DEBUG] Merge complete. {overwrites_count} slots updated with Ground Truth.")
+            # Mark the block as hybrid so UI can verify validity
+            pattern_block["is_hybrid"] = True
+        # --- D. Build Semantics Block ---
+        # Use lemma from Wiktionary (Ground Truth)
+        semantics_lemma = lemma
         semantics_block = _wiktionary_format_semantics_block(wikt_report, pattern_block, top_n)
+        # --- E. Assemble Final Report ---
         pos_entry_report = {
             "inflections_wiktionary": inflections_wikt_block,
+            "inflections_pattern": pattern_block, # This is now the Hybrid Block
             "semantics_combined": semantics_block,
             "wiktionary_metadata": {
                 "pos_title": pos_title,
                 "etymology": wikt_report.get("etymology_text"),
                 "pronunciation": wikt_report.get("sounds"),
                 "examples": [ex for s in wikt_report.get("senses", []) for ex in s.get("examples", [])],
                 "entry_tags": wikt_report.get("entry_tags"),
                 "entry_categories": wikt_report.get("entry_categories"),
+                # New fields
                 "entry_notes": wikt_report.get("entry_notes"),
                 "other_pos": wikt_report.get("other_pos"),
                 "raw_tags": wikt_report.get("raw_tags"),
                 "holonyms": wikt_report.get("holonyms"),
                 "meronyms": wikt_report.get("meronyms"),
                 "coordinate_terms": wikt_report.get("coordinate_terms"),
                 "expressions": wikt_report.get("expressions"),
                 "proverbs": wikt_report.get("proverbs")
             }
         }
+        # --- F. Validation Filter ---
         is_valid = False
         is_inflected_entry = "Konjugierte Form" in pos_title or "Deklinierte Form" in pos_title
+        # Check 1: Lemma Match
         if lemma.lower() == word_lower:
             is_valid = True
+            log(f"[DEBUG] Validate: Accepted '{lemma}' (Lemma Match)")
+        # Check 2: Form Match
         if not is_valid and not is_inflected_entry:
+            # Look in Ground Truth (Wiktionary)
+            for form_entry in wikt_forms_list:
                 form_text = form_entry.get("form_text", "")
+                clean_form = re.sub(r"\(.*\)", "", form_text).strip() # Remove parens
+                clean_form = re.sub(r"^(der|die|das|ein|eine|...)\s+", "", clean_form, flags=re.IGNORECASE).strip() # Remove articles
+                if word_lower in clean_form.lower():
                     is_valid = True
+                    log(f"[DEBUG] Validate: Accepted '{lemma}' (Found in Wiktionary forms)")
                     break
+            # Look in Pattern Generation (if Wikt failed)
+            if not is_valid and pattern_block:
+                 if word_appears_in_inflections(word, pattern_block, pos_key):
+                     is_valid = True
+                     log(f"[DEBUG] Validate: Accepted '{lemma}' (Found in Pattern forms)")
         if is_valid:
             if pos_key not in final_result["analysis"]:
                 final_result["analysis"][pos_key] = []
             final_result["analysis"][pos_key].append(pos_entry_report)
         else:
+             log(f"[DEBUG] Validate: Dropped '{lemma}' ({pos_key}) - No match found.")
+    final_result["info"] = f"Analysis from Wiktionary (Hybrid Engine). Found {len(wiktionary_reports)} entries."
     return final_result
 # ============================================================================
 """
 def _format_word_analysis_html(data: Dict[str, Any]) -> str:
+    """
+    Generates HTML for a single word analysis (German version).
+    Renders the 'inflections_pattern' block, which contains the
+    Hybrid (Wiktionary-verified) data from the backend.
+    """
     if not data or "analysis" not in data:
         return f"{HTML_CSS}<div class='ling-card'>Keine Daten verfügbar. {data.get('info', '')}</div>"
     html = HTML_CSS
     analysis = data["analysis"]
+    # Iterate over POS categories (noun, verb, etc.)
     for pos_key, entries in analysis.items():
         if not entries: continue
+        # We usually display the best candidate, but if there are multiple distinct entries
+        # (like "der See" vs "die See"), the backend groups them in the list.
+        # We should ideally render ALL entries in the list to show the homonyms.
+        # This loop handles that.
+        for entry in entries:
+            # Data Extraction
+            inf_wikt = entry.get("inflections_wiktionary") or {}
+            inf_pat = entry.get("inflections_pattern") or {}
+            sem_comb = entry.get("semantics_combined") or {}
+            meta = entry.get("wiktionary_metadata") or {}
+            lemma = inf_wikt.get("base_form") or \
+                    inf_pat.get("base_form") or \
+                    sem_comb.get("lemma") or \
+                    data.get("input_word") or "?"
+            # --- POS Display Logic ---
+            display_pos = pos_key.upper()
+            css_class = "pos-other"
+            if pos_key == 'noun':
+                css_class = "pos-noun"
+                display_pos = "SUBSTANTIV"
+                # Append Gender to POS badge if available
+                if "gender" in inf_pat:
+                    gender_map = {"Masculine": "M", "Feminine": "F", "Neuter": "N"}
+                    g_short = gender_map.get(inf_pat['gender'], "?")
+                    display_pos += f" ({g_short})"
+            elif pos_key == 'verb':
+                css_class = "pos-verb"
+                display_pos = "VERB"
+            elif pos_key == 'adj' or pos_key == 'adjective':
+                css_class = "pos-adj"
+                display_pos = "ADJEKTIV"
+            elif pos_key == 'adv' or pos_key == 'adverb':
+                css_class = "pos-adv"
+                display_pos = "ADVERB"
+            # --- CARD START ---
+            html += f"""
+            <div class="ling-card">
+                <div class="ling-header">
+                    <span class="ling-lemma">{lemma}</span>
+                    <span class="ling-pos {css_class}">{display_pos}</span>
+            """
+            # Add small title if available (e.g., "Konjugierte Form")
+            if meta.get("pos_title"):
+                 html += f"<span style='margin-left:10px; color:#6b7280; font-size:0.85em;'>{meta['pos_title']}</span>"
+            html += "</div>" # End Header
+            # --- SOURCE BADGE LOGIC ---
+            # Determine credibility of the data
+            is_hybrid = inf_pat.get("is_hybrid", False)
+            wikt_forms_count = len(inf_wikt.get("forms_list", []))
+            badge_style = "float:right; font-weight:bold; font-size:0.75em; padding:2px 6px; border-radius:4px;"
+            if is_hybrid:
+                source_html = f"<span style='{badge_style} background:#ecfdf5; color:#065f46; border:1px solid #a7f3d0;'>Quelle: Wiktionary (Verifiziert)</span>"
+            elif wikt_forms_count > 0:
+                source_html = f"<span style='{badge_style} background:#ecfdf5; color:#065f46; border:1px solid #a7f3d0;'>Quelle: Wiktionary (DB)</span>"
+            elif inf_pat and "error" not in inf_pat:
+                source_html = f"<span style='{badge_style} background:#fffbeb; color:#92400e; border:1px solid #fcd34d;'>Quelle: Pattern (Generiert)</span>"
+            else:
+                source_html = ""
+            # --- INFLECTIONS SECTION ---
+            html += f"<div class='ling-section'>{source_html}<div class='ling-subtitle'>Morphologie & Flexion</div>"
+            html += "<table class='inflection-table'>"
+            # We render the table based on 'inf_pat' because the backend has already merged
+            # the Wiktionary truths into this structure.
+            if pos_key == 'noun':
+                decl = inf_pat.get('declension')
+                # Fallback if declension is nested in gender key
+                if not decl and inf_pat.get('declension_by_gender'):
+                    # If we have a specific gender from the analysis, try to grab that specific table
+                    target_gender = inf_pat.get("gender")
+                    if target_gender and target_gender in inf_pat['declension_by_gender']:
+                        decl = inf_pat['declension_by_gender'][target_gender]
+                    else:
+                        # Fallback: take the first available
+                        first_gender = list(inf_pat['declension_by_gender'].keys())[0]
+                        decl = inf_pat['declension_by_gender'][first_gender]
+                if decl:
+                    # Noun Table Rows
+                    nom_sg = decl.get('Nominativ Singular', {}).get('definite', '-')
+                    nom_pl = decl.get('Nominativ Plural', {}).get('definite', '-')
+                    gen_sg = decl.get('Genitiv Singular', {}).get('definite', '-')
+                    dat_pl = decl.get('Dativ Plural', {}).get('definite', '-')
+                    html += f"<tr><td class='inflection-label'>Nom. Singular</td><td>{nom_sg}</td></tr>"
+                    html += f"<tr><td class='inflection-label'>Nom. Plural</td><td>{nom_pl}</td></tr>"
+                    html += f"<tr><td class='inflection-label'>Gen. Singular</td><td>{gen_sg}</td></tr>"
+                    html += f"<tr><td class='inflection-label'>Dat. Plural</td><td>{dat_pl}</td></tr>"
+                else:
+                    html += f"<tr><td colspan='2'><i>Keine Flexionsdaten verfügbar.</i></td></tr>"
+            elif pos_key == 'verb':
+                cj = inf_pat.get('conjugation') or {}
+                pres = cj.get('Präsens') or {}
+                past = cj.get('Präteritum') or {}
+                parts = inf_pat.get('participles') or {}
+                html += f"<tr><td class='inflection-label'>Infinitiv</td><td>{inf_pat.get('infinitive', lemma)}</td></tr>"
+                html += f"<tr><td class='inflection-label'>3. Pers. Sg. (er/sie)</td><td>{pres.get('er/sie/es', '-')}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Präteritum (ich)</td><td>{past.get('ich', '-')}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Partizip II</td><td>{parts.get('Partizip Perfekt', '-')}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Konjunktiv II (ich)</td><td>{cj.get('Konjunktiv II', {}).get('ich', '-')}</td></tr>"
+            elif pos_key in ['adjective', 'adj']:
+                html += f"<tr><td class='inflection-label'>Positiv</td><td>{inf_pat.get('predicative', lemma)}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Komparativ</td><td>{inf_pat.get('comparative', '-')}</td></tr>"
+                html += f"<tr><td class='inflection-label'>Superlativ</td><td>{inf_pat.get('superlative', '-')}</td></tr>"
+            elif pos_key in ['adverb', 'adv']:
+                 html += f"<tr><td class='inflection-label'>Form</td><td>{lemma} (unveränderlich)</td></tr>"
+            html += "</table>"
+            # --- RAW FORMS FOOTER (The "Evidence") ---
+            # Display the raw forms list from DB if available, as this proves the ground truth
+            forms_list = inf_wikt.get("forms_list") or []
+            if forms_list:
+                # Deduplicate and flatten
+                unique_forms = sorted(list(set([f.get('form_text') for f in forms_list if f.get('form_text')])))
+                # Limit display to avoid wall of text
+                display_forms = ", ".join(unique_forms[:12])
+                if len(unique_forms) > 12: display_forms += f", ... ({len(unique_forms)-12} weitere)"
+                html += f"<div style='font-size:0.8em; color:#6b7280; margin-top:5px;'>"
+                html += f"<strong>Beobachtete Formen (DB):</strong> {display_forms}</div>"
+            html += "</div>"
+            # --- SEMANTICS SECTION ---
+            html += "<div class='ling-section'><div class='ling-subtitle'>Bedeutungen & Definitionen</div>"
+            wikt_senses = sem_comb.get("wiktionary_senses") or []
+            ode_senses = sem_comb.get("odenet_senses") or []
+            if not wikt_senses and not ode_senses:
+                 html += "<div class='sense-item'><i>Keine Definitionen gefunden.</i></div>"
+            # Render Wiktionary Senses
+            for s in wikt_senses[:3]:
+                gloss_raw = s.get("definition") or ""
+                gloss = str(gloss_raw).replace(";", "<br>")
+                if gloss:
+                    html += f"<div class='sense-item'><span class='source-badge src-wikt'>Wikt</span> {gloss}</div>"
+            # Render OdeNet Senses
+            for s in ode_senses[:3]:
+                defi = s.get("definition") or ""
+                if defi:
+                    html += f"<div class='sense-item'><span class='source-badge src-oewn'>OdeNet</span> {defi}</div>"
             html += "</div>"
+            # --- RELATIONS SECTION ---
+            rels = sem_comb.get("conceptnet_relations") or []
+            if rels:
+                html += "<div class='ling-section'><div class='ling-subtitle'>Wissensgraph (Kontext)</div>"
+                top_n_rels = 6
+                visible_rels = rels[:top_n_rels]
+                hidden_rels = rels[top_n_rels:]
+                def render_rel(r):
+                    rel_name = r.get("relation", "Rel")
+                    target = r.get("other_node") or "?"
+                    if target == "?" and "surface" in r:
+                          parts = str(r["surface"]).split()
+                          if len(parts) > 2: target = parts[-1]
+                    return f"<span class='rel-chip'><span class='rel-type'>{rel_name}:</span> {target}</span>"
+                html += "<div>"
+                for r in visible_rels:
+                    html += render_rel(r)
+                html += "</div>"
+                if hidden_rels:
+                    html += f"""
+                    <details class='kg-details'>
+                        <summary>Zeige {len(hidden_rels)} weitere Relationen</summary>
+                        <div class='kg-content'>
+                    """
+                    for r in hidden_rels:
+                        html += render_rel(r)
+                    html += "</div></details>"
+                html += "</div>"
+            html += "</div>" # End Card (div.ling-card)
     return html
+def _map_wikt_form_to_pattern_keys(pos_key: str, tags_str: str) -> Optional[List[str]]:
+    """
+    Parses a Wiktionary tag string and returns the corresponding path keys
+    for the Pattern.de dictionary structure.
+    """
+    if not tags_str: return None
+    t = tags_str.lower()
+    if pos_key == "noun":
+        # Pattern Structure: [Gender] -> "Nominativ Singular" -> "bare"/"definite"
+        case = ""
+        if "nominative" in t: case = "Nominativ"
+        elif "genitive" in t: case = "Genitiv"
+        elif "dative" in t: case = "Dativ"
+        elif "accusative" in t: case = "Akkusativ"
+        number = ""
+        if "singular" in t: number = "Singular"
+        elif "plural" in t: number = "Plural"
+        if case and number:
+            return [f"{case} {number}"]
+    elif pos_key == "verb":
+        # Pattern Structure: "conjugation" -> "Präsens" -> "ich"
+        tense = ""
+        if "present" in t: tense = "Präsens"
+        elif "past" in t or "preterite" in t: tense = "Präteritum"
+        elif "subjunctive i" in t: tense = "Konjunktiv I"
+        elif "subjunctive ii" in t: tense = "Konjunktiv II"
+        elif "imperative" in t: tense = "Imperativ"
+        person_key = ""
+        if "participle" in t:
+            if "past" in t or "perfect" in t: return ["participles", "Partizip Perfekt"]
+            if "present" in t: return ["participles", "Partizip Präsens"]
+        if "singular" in t:
+            if "1" in t: person_key = "ich" if tense != "Imperativ" else "du" # 1sg usually not imp, but handling safety
+            elif "2" in t: person_key = "du"
+            elif "3" in t: person_key = "er/sie/es"
+        elif "plural" in t:
+            if "1" in t: person_key = "wir"
+            elif "2" in t: person_key = "ihr"
+            elif "3" in t: person_key = "sie/Sie"
+        if tense and person_key:
+            return ["conjugation", tense, person_key]
+    elif pos_key == "adjective":
+        # Pattern Structure: "comparative", "superlative"
+        if "comparative" in t and "predicative" in t: return ["comparative"]
+        if "superlative" in t and "predicative" in t: return ["superlative"]
+        if "positive" in t and "predicative" in t: return ["predicative"]
+    return None
+def _map_wikt_gender_to_pattern(tags_list: List[str]) -> Optional[int]:
+    """
+    Maps Wiktionary tag strings (e.g., 'masculine') to pattern.de constants.
+    Returns None if no specific gender is found.
+    """
+    if not tags_list:
+        return None
+    # Flatten and normalize tags
+    # Wiktionary often provides tags like "masculine", "feminine", "neuter"
+    tags_lower = [str(t).lower() for t in tags_list]
+    if "masculine" in tags_lower or "m" in tags_lower:
+        return MALE
+    if "feminine" in tags_lower or "f" in tags_lower:
+        return FEMALE
+    if "neuter" in tags_lower or "n" in tags_lower:
+        return NEUTRAL
+    return None
 def _format_comprehensive_html(data: Dict[str, Any]) -> str:
     """ Generates HTML for the comprehensive sentence analysis. """
     if "error" in data: