WiktionaryEN

Running

App Files Files Community

cstr commited on Nov 10

Commit

5c5cf64

verified ·

1 Parent(s): 9da6dfa

path checks for fallback methods

Browse files

Files changed (1) hide show

app.py +221 -250

app.py CHANGED Viewed

@@ -2524,183 +2524,177 @@ def comprehensive_german_analysis(text: str, top_n_value: Optional[float] = 0) -
         }
 # --- 7b. NEW: Word Encyclopedia (Non-Contextual) Analyzer ---
-def _analyze_word_with_hanta(word: str, top_n_value: int) -> Dict[str, Any]:
     """
-    (PUBLIC DISPATCHER) Analyzes a single word for all possible forms.
-    (FALLBACK ENGINE 1) Analyzes a single word using HanTa + OdeNet + Pattern.
-    This function intelligently selects the best available engine:
-    1.  PRIMARY: Attempts to use the HanTa-led engine (V17) for maximum accuracy.
-    2.  FALLBACK: If HanTa is not available, it uses the spaCy-IWNLP-led
-        engine (V16 logic from 'analyze_word_comprehensively') as a robust fallback.
     """
-    if not word or not word.strip():
-        return {"info": "Please enter a word."}
     top_n = int(top_n_value) if top_n_value is not None else 0
-    # --- PRIMARY ENGINE: HanTa-led (V17) ---
-    if HANTA_AVAILABLE:
-        print(f"\n[Word Encyclopedia] Starting V18 (HanTa) analysis for: \"{word}\"")
-        final_result: Dict[str, Any] = {
-            "input_word": word,
-            "analysis": {}
-        }
-        try:
-            hanta_tagger = hanta_get_tagger()
-            if not hanta_tagger:
-                raise Exception("HanTa Tagger failed to initialize.") # Will be caught and trigger fallback
-            # --- 1. Get All Grammatical Candidates (HanTa) ---
-            hanta_tags = _hanta_get_candidates(word, hanta_tagger)
-            if not hanta_tags:
-                return {"info": f"No grammatical analysis found for '{word}'."}
-            # --- 2. Map Tags to POS Groups (with Adverb Heuristic) ---
-            pos_groups_map = _hanta_map_tags_to_pos(hanta_tags)
-            log(f"Found {len(pos_groups_map)} possible POS group(s): {list(pos_groups_map.keys())}")
-            # --- 3. Validate and Build Report for each POS Group ---
-            for pos_group, specific_tags in pos_groups_map.items():
-                print(f"--- Analyzing as: {pos_group.upper()} ---")
-                # --- 3a. Get Lemma (HanTa) ---
-                lemma = _hanta_get_lemma_for_pos(word, pos_group, hanta_tagger)
-                log(f"Lemma for {pos_group} is: '{lemma}'")
-                # --- 3b. Get Semantics & VALIDATE (OdeNet) ---
-                # We call the NEW, CORRECTED helper from Section 6c
-                all_odenet_senses = _get_odenet_senses_by_pos(lemma)
-                pos_odenet_senses = all_odenet_senses.get(pos_group, [])
-                # We only reject if OdeNet is working and returns no senses.
-                # If OdeNet is down, the list will contain a placeholder and we proceed.
-                if not pos_odenet_senses:
-                    log(f"✗ REJECTED {pos_group}: OdeNet is available but has no '{pos_group}' senses for lemma '{lemma}'.")
-                    continue
-                # Filter out the placeholder if OdeNet is down
-                if pos_odenet_senses and "info" in pos_odenet_senses[0]:
-                    log(f"✓ VERIFIED {pos_group}: OdeNet is unavailable, proceeding without validation.")
-                    pos_odenet_senses = [] # Clear the placeholder
-                else:
-                    log(f"✓ VERIFIED {pos_group}: OdeNet found {len(pos_odenet_senses)} sense(s).")
-                # --- 3c. Get Inflections (Pattern) ---
-                inflection_report = {}
-                if not PATTERN_DE_AVAILABLE:
-                    inflection_report = {"info": "pattern.de library not available. No inflections generated."}
-                else:
-                    try:
-                        if pos_group == "noun":
-                            inflection_report = pattern_analyze_as_noun(lemma)
-                        elif pos_group == "verb":
-                            inflection_report = pattern_analyze_as_verb(lemma)
-                        elif pos_group == "adjective":
-                            inflection_report = pattern_analyze_as_adjective(lemma)
-                        elif pos_group == "adverb":
-                            inflection_report = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
-                        if not pattern_is_good_analysis(inflection_report, pos_group) and pos_group != "adverb":
-                            log(f"⚠️ Warning: pattern.de generated a poor inflection table for {lemma} ({pos_group}).")
-                            inflection_report["warning"] = "Inflection table from pattern.de seems incomplete or invalid."
-                    except Exception as e:
-                        log(f"pattern.de inflection failed for {lemma} ({pos_group}): {e}")
-                        inflection_report = {"error": f"pattern.de failed: {e}", "traceback": traceback.format_exc()}
-                # --- 3d. Build Final Report Block ---
-                final_result["analysis"][pos_group] = [{
-                    "hanta_analysis": {
-                        "detected_tags": sorted(list(specific_tags)),
-                        "lemma": lemma,
-                        "morphemes": [
-                             hanta_tagger.analyze(word.capitalize() if pos_group == 'noun' else word.lower(), taglevel=3)
-                        ]
-                    },
-                    "inflections_pattern": inflection_report,
-                    "semantics_combined": _build_semantics_block_for_lemma(lemma, pos_group, top_n),
-                }]
-            if not final_result["analysis"]:
-                return {
-                    "input_word": word,
-                    "info": f"No valid, semantically-verified analysis found for '{word}'. It may be a typo or a function word."
-                }
-            final_result["info"] = "Analysis performed by HanTa-led fallback engine."
-            return final_result
-        except Exception as e:
-            print(f"[Word Encyclopedia] HanTa FALLBACK Engine FAILED: {e}")
-            traceback.print_exc()
-            return {} # Signal failure
-    # --- FALLBACK ENGINE: spaCy-IWNLP-led (V16) ---
-    if IWNLP_AVAILABLE:
-        try:
-            log("--- Dispatcher: HanTa not found or failed. Attempting IWNLP Fallback Engine ---")
-            result = _analyze_word_with_iwnlp(word, top_n_value)
-            result["info"] = result.get("info", "") + " (Analysis performed by IWNLP-based fallback engine)"
-            return result
-        except Exception as e:
-            log(f"--- IWNLP Fallback Engine FAILED: {e} ---")
-            traceback.print_exc()
-            return {"error": f"IWNLP Fallback Engine failed: {e}"}
-    # --- No engines available ---
-    log("--- Dispatcher: No valid analysis engines found. ---")
-    return {
-        "input_word": word,
-        "error": "Fatal Error: Neither HanTa nor spacy-iwnlp are available. "
-                 "Please install at least one to use the Word Encyclopedia."
-    }
-def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
     """
-    (FALLBACK ENGINE 2) Analyzes a single word using IWNLP + OdeNet + Pattern.
-    This was the V16 engine.
-    V19 UPDATE: This function *must* be modified to match the new
-    output format: `analysis: { "pos_key": [ ...list... ] }`
-    (NON-CONTEXTUAL) Analyzes a single word for ALL its possible
-    grammatical and semantic forms.
-    ** Strategy: IWNLP Lemmas + spaCy POS + Pattern.de Validators**
-    1. Get spaCy's primary POS (e.g., "ADV" for "heute").
-    2. Get IWNLP's list of *lemmas* (e.g., "Lauf" -> ['Lauf', 'laufen']).
-    3. Create a unique set of all possible lemmas from spaCy, IWNLP, and the word itself.
-    4. Iterate this lemma set:
-       - Try to analyze each lemma as NOUN (capitalized).
-       - Try to analyze each lemma as VERB.
-       - Try to analyze each lemma as ADJECTIVE.
-       - Validate each with pattern_is_good_analysis AND by checking for OdeNet senses.
-    5. After checking inflections, check if spaCy's POS was 'ADV'.
-       If so, and OdeNet has 'r' senses, add an 'adverb' report.
-    6. This finds all inflecting forms ("Lauf", "gut") AND non-inflecting
-       forms ("heute") while rejecting artifacts ("klauf", "heutst").
     """
     if not word or not word.strip():
-        return {"info": "Please enter a word."}
     if not IWNLP_AVAILABLE:
-        return {"error": "`spacy-iwnlp` library not available. This tab requires it."}
     top_n = int(top_n_value) if top_n_value is not None else 0
-    print(f"\n[Word Encyclopedia] Starting IWNP-fallback analysis for: \"{word}\" (top_n={top_n})")
     final_result: Dict[str, Any] = {
         "input_word": word,
         "analysis": {}
     }
     # --- Helper: Get OdeNet senses ---
-    def _get_odenet_senses_by_pos(w):
         """
         (Internal helper for IWNLP fallback)
-        *** V18 FIX: OdeNet uses 'a' for BOTH Adjective and Adverb. ***
         """
         senses_by_pos: Dict[str, List[Dict]] = {
             "noun": [], "verb": [], "adjective": [], "adverb": []
@@ -2712,68 +2706,36 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
                     "verb": [{"info": "OdeNet unavailable"}],
                     "adjective": [{"info": "OdeNet unavailable"}],
                     "adverb": [{"info": "OdeNet unavailable"}]}
         try:
             all_senses = odenet_get_thesaurus_info(w).get("senses", [])
             for sense in all_senses:
                 if "error" in sense: continue
                 pos_tag = sense.get("pos")
                 if pos_tag == 'n':
                     senses_by_pos["noun"].append(sense)
                 elif pos_tag == 'v':
                     senses_by_pos["verb"].append(sense)
-                # --- THIS IS THE CRITICAL FIX ---
                 elif pos_tag == 'a':
                     log(f"[IWNLP Fallback] Found OdeNet 'a' tag (Adj/Adv) for sense: {sense.get('definition', '...')[:30]}")
                     senses_by_pos["adjective"].append(sense)
                     senses_by_pos["adverb"].append(sense)
-                # --- END OF FIX ---
         except Exception as e:
             print(f"[Word Encyclopedia] OdeNet check failed: {e}")
         return senses_by_pos
-    # --- Helper: Build semantics block ---
-    def _build_semantics(lemma, odenet_senses, top_n):
-        conceptnet_relations = []
-        if REQUESTS_AVAILABLE:
-            try:
-                conceptnet_result = conceptnet_get_relations(lemma, language='de')
-                conceptnet_relations = conceptnet_result.get("relations", [])
-            except Exception as e:
-                conceptnet_relations = [{"error": str(e)}]
-        if top_n > 0:
-            odenet_senses = odenet_senses[:top_n]
-            conceptnet_relations.sort(key=lambda x: x.get('weight', 0.0), reverse=True)
-            conceptnet_relations = conceptnet_relations[:top_n]
-        return {
-            "lemma": lemma,
-            "odenet_senses": odenet_senses,
-            "conceptnet_relations": conceptnet_relations
-        }
     # --- 1. GET ALL LEMMA CANDIDATES & SPACY POS ---
     try:
         iwnlp = iwnlp_get_pipeline()
         if not iwnlp:
-            return {"error": "IWNLP pipeline failed to initialize."}
         doc = iwnlp(word)
         token = doc[0]
-        # Get spaCy's best POS guess
         spacy_pos = token.pos_ # e.g., "NOUN" for "Lauf", "ADV" for "heute"
         spacy_lemma = token.lemma_
-        # *** THIS IS THE FIX ***
-        # Get IWNLP's lemma list (it only registers 'iwnlp_lemmas')
         iwnlp_lemmas_list = token._.iwnlp_lemmas or []
-        # Combine all possible lemmas
         all_lemmas = set(iwnlp_lemmas_list)
         all_lemmas.add(spacy_lemma)
         all_lemmas.add(word) # Add the word itself
@@ -2783,25 +2745,19 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
     except Exception as e:
         traceback.print_exc()
-        return {"error": f"IWNLP analysis failed: {e}"}
     # --- 2. CHECK INFLECTING POSSIBILITIES FOR EACH LEMMA ---
-    # This dict will hold the *best* analysis for each POS
-    # e.g., "gut" -> { 'adjective': {...}, 'noun': {...} }
     valid_analyses: Dict[str, Dict[str, Any]] = {}
     for lemma in all_lemmas:
         if not lemma: continue
-        odenet_senses_by_pos = _get_odenet_senses_by_pos(lemma)
         # --- Check NOUN ---
         if 'noun' not in valid_analyses:
             noun_inflections = {}
             is_good_noun = False
             if not PATTERN_DE_AVAILABLE:
                 noun_inflections = {"info": "pattern.de not available."}
                 is_good_noun = True
@@ -2816,32 +2772,20 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
             if is_good_noun:
                 odenet_senses = odenet_senses_by_pos.get('noun', [])
                 if not odenet_senses and lemma.lower() == word.lower():
-                     odenet_senses = _get_odenet_senses_by_pos(lemma.capitalize()).get('noun', [])
-                # We accept if (senses exist) OR (OdeNet is down and we can't check)
                 if odenet_senses:
-                     # We must filter out the "unavailable" placeholder
-                    if "info" not in odenet_senses[0]:
                         log(f"  ✓ [IWNLP Fallback] Valid NOUN found: {lemma}")
                         valid_analyses['noun'] = {
                             "lemma": noun_inflections.get("base_form", lemma),
                             "inflections": noun_inflections,
-                            "odenet_senses": odenet_senses
-                        }
-                    elif not WN_AVAILABLE: # OdeNet is down
-                        log(f"  ✓ [IWNLP Fallback] Accepting NOUN (OdeNet unavailable): {lemma}")
-                        valid_analyses['noun'] = {
-                            "lemma": noun_inflections.get("base_form", lemma),
-                            "inflections": noun_inflections,
-                            "odenet_senses": [] # No senses to show
                         }
         # --- Check VERB ---
         if 'verb' not in valid_analyses:
             verb_inflections = {}
             is_good_verb = False
             if not PATTERN_DE_AVAILABLE:
                 verb_inflections = {"info": "pattern.de not available."}
                 is_good_verb = True
@@ -2855,28 +2799,19 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
             if is_good_verb:
                 odenet_senses = odenet_senses_by_pos.get('verb', [])
                 if odenet_senses:
-                    if "info" not in odenet_senses[0]:
                         log(f"  ✓ [IWNLP Fallback] Valid VERB found: {lemma}")
                         valid_analyses['verb'] = {
                             "lemma": verb_inflections.get("infinitive", lemma),
                             "inflections": verb_inflections,
-                            "odenet_senses": odenet_senses
-                        }
-                    elif not WN_AVAILABLE:
-                         log(f"  ✓ [IWNLP Fallback] Accepting VERB (OdeNet unavailable): {lemma}")
-                         valid_analyses['verb'] = {
-                            "lemma": verb_inflections.get("infinitive", lemma),
-                            "inflections": verb_inflections,
-                            "odenet_senses": []
                         }
         # --- Check ADJECTIVE ---
         if 'adjective' not in valid_analyses:
             adj_inflections = {}
             is_good_adj = False
             if not PATTERN_DE_AVAILABLE:
                 adj_inflections = {"info": "pattern.de not available."}
                 is_good_adj = True
@@ -2890,46 +2825,28 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
             if is_good_adj:
                 odenet_senses = odenet_senses_by_pos.get('adjective', [])
                 if odenet_senses:
-                    if "info" not in odenet_senses[0]:
                         log(f"  ✓ [IWNLP Fallback] Valid ADJECTIVE found: {lemma}")
                         valid_analyses['adjective'] = {
                             "lemma": adj_inflections.get("predicative", lemma),
                             "inflections": adj_inflections,
-                            "odenet_senses": odenet_senses
-                        }
-                    elif not WN_AVAILABLE:
-                        log(f"  ✓ [IWNLP Fallback] Accepting ADJECTIVE (OdeNet unavailable): {lemma}")
-                        valid_analyses['adjective'] = {
-                            "lemma": adj_inflections.get("predicative", lemma),
-                            "inflections": adj_inflections,
-                            "odenet_senses": []
                         }
     # --- 3. CHECK NON-INFLECTING POS (ADVERB) ---
     if spacy_pos == "ADV":
-        odenet_senses = _get_odenet_senses_by_pos(word).get('adverb', [])
         if odenet_senses:
-            if "info" not in odenet_senses[0]:
                 log(f"  ✓ [IWNLP Fallback] Valid ADVERB found: {word}")
                 valid_analyses['adverb'] = {
                     "lemma": word,
                     "inflections": {"base_form": word},
-                    "odenet_senses": odenet_senses
-                }
-            elif not WN_AVAILABLE:
-                log(f"  ✓ [IWNLP Fallback] Accepting ADVERB (OdeNet unavailable): {word}")
-                valid_analyses['adverb'] = {
-                    "lemma": word,
-                    "inflections": {"base_form": word},
-                    "odenet_senses": []
                 }
     # --- 4. CHECK OTHER FUNCTION WORDS (e.g. "mein" -> DET) ---
-    # We add this if spaCy found a function word AND we haven't found any
-    # content-word analyses (which are more informative).
     FUNCTION_POS = {"DET", "PRON", "ADP", "AUX", "CCONJ", "SCONJ", "PART", "PUNCT", "SYM"}
     if spacy_pos in FUNCTION_POS and not valid_analyses:
         pos_key = spacy_pos.lower()
@@ -2937,25 +2854,79 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
         valid_analyses[pos_key] = {
             "lemma": spacy_lemma,
             "inflections": {"base_form": spacy_lemma},
-            "odenet_senses": [], # Function words aren't in OdeNet
-            "spacy_analysis": { # Add the spaCy info
                 "word": token.text, "lemma": token.lemma_,
                 "pos_UPOS": token.pos_, "pos_TAG": token.tag_,
                 "morphology": str(token.morph)
             }
         }
-    # --- 5. BUILD FINAL REPORT ---
     for pos_key, analysis_data in valid_analyses.items():
         lemma = analysis_data["lemma"]
-        pos_report = {
-            "inflections_pattern": analysis_data["inflections"],
-            "semantics_combined": _build_semantics_block_for_lemma(lemma, pos_key, top_n)
-        }
-        if "spacy_analysis" in analysis_data:
-             pos_report["spacy_analysis"] = analysis_data["spacy_analysis"]
-        final_result["analysis"][pos_key] = [pos_report] # Wrap in list
     if not final_result["analysis"]:
        return {} # No results

         }
 # --- 7b. NEW: Word Encyclopedia (Non-Contextual) Analyzer ---
+def _analyze_word_with_hanta(word: str, top_n_value: Optional[float] = 0) -> Dict[str, Any]:
     """
+    (FALLBACK ENGINE 2) Analyzes a single word using HanTa + OdeNet + Pattern.
+    This was the V18 engine. Returns {} on failure.
     """
+    if not HANTA_AVAILABLE:
+        return {} # Signal failure
     top_n = int(top_n_value) if top_n_value is not None else 0
+    print(f"\n[Word Encyclopedia] Running V18 (HanTa) fallback for: \"{word}\"")
+    final_result: Dict[str, Any] = {
+        "input_word": word,
+        "analysis": {}
+    }
+    word_lower = word.lower() # For validation
+    try:
+        hanta_tagger = hanta_get_tagger()
+        if not hanta_tagger:
+            raise Exception("HanTa Tagger failed to initialize.")
+        hanta_tags = _hanta_get_candidates(word, hanta_tagger)
+        if not hanta_tags:
+            return {}
+        pos_groups_map = _hanta_map_tags_to_pos(hanta_tags)
+        log(f"Found {len(pos_groups_map)} possible POS group(s): {list(pos_groups_map.keys())}")
+        for pos_group, specific_tags in pos_groups_map.items():
+            print(f"--- Analyzing as: {pos_group.upper()} ---")
+            lemma = _hanta_get_lemma_for_pos(word, pos_group, hanta_tagger)
+            log(f"Lemma for {pos_group} is: '{lemma}'")
+            all_odenet_senses = _get_odenet_senses_by_pos(lemma)
+            pos_odenet_senses = all_odenet_senses.get(pos_group, [])
+            if not pos_odenet_senses:
+                log(f"✗ REJECTED {pos_group}: OdeNet is available but has no '{pos_group}' senses for lemma '{lemma}'.")
+                continue
+            if pos_odenet_senses and "info" in pos_odenet_senses[0]:
+                log(f"✓ VERIFIED {pos_group}: OdeNet is unavailable, proceeding without validation.")
+                pos_odenet_senses = []
+            else:
+                log(f"✓ VERIFIED {pos_group}: OdeNet found {len(pos_odenet_senses)} sense(s).")
+            # --- 1. Get Inflections (Pattern) ---
+            inflection_report = {}
+            if not PATTERN_DE_AVAILABLE:
+                inflection_report = {"info": "pattern.de library not available. No inflections generated."}
+            else:
+                try:
+                    if pos_group == "noun":
+                        inflection_report = pattern_analyze_as_noun(lemma)
+                    elif pos_group == "verb":
+                        inflection_report = pattern_analyze_as_verb(lemma)
+                    elif pos_group == "adjective":
+                        inflection_report = pattern_analyze_as_adjective(lemma)
+                    elif pos_group == "adverb":
+                        inflection_report = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
+                    if not pattern_is_good_analysis(inflection_report, pos_group) and pos_group != "adverb":
+                        log(f"⚠️ Warning: pattern.de generated a poor inflection table for {lemma} ({pos_group}).")
+                        inflection_report["warning"] = "Inflection table from pattern.de seems incomplete or invalid."
+                except Exception as e:
+                    log(f"pattern.de inflection failed for {lemma} ({pos_group}): {e}")
+                    inflection_report = {"error": f"pattern.de failed: {e}", "traceback": traceback.format_exc()}
+            # --- 2. Build Semantics Block ---
+            semantics_block = _build_semantics_block_for_lemma(lemma, pos_group, top_n)
+            # --- 3. Build Final Report Block ---
+            pos_entry_report = {
+                "hanta_analysis": {
+                    "detected_tags": sorted(list(specific_tags)),
+                    "lemma": lemma,
+                    "morphemes": [
+                         hanta_tagger.analyze(word.capitalize() if pos_group == 'noun' else word.lower(), taglevel=3)
+                    ]
+                },
+                "inflections_pattern": inflection_report,
+                "semantics_combined": semantics_block
+            }
+            # --- 4. *** VALIDATION FILTER *** ---
+            is_valid = False
+            if lemma.lower() == word_lower:
+                is_valid = True
+                log(f"[DEBUG] HanTa: KEEPING entry '{lemma}' ({pos_group}) because input word matches lemma.")
+            if not is_valid:
+                # Check pattern.de's lexeme (for verbs)
+                for form in inflection_report.get("lexeme", []):
+                    if form.lower() == word_lower:
+                        is_valid = True
+                        log(f"[DEBUG] HanTa: KEEPING entry '{lemma}' ({pos_group}) because input word found in pattern.de lexeme.")
+                        break
+            if not is_valid:
+                # Check pattern.de's participles (for "abgeschnitten")
+                for part_form in inflection_report.get("participles", {}).values():
+                    if part_form.lower() == word_lower:
+                        is_valid = True
+                        log(f"[DEBUG] HanTa: KEEPING entry '{lemma}' ({pos_group}) because input word found in pattern.de participles.")
+                        break
+            if not is_valid and pos_group == "adjective":
+                 # Check adjective forms
+                 if word_lower == inflection_report.get("predicative", "").lower() or \
+                    word_lower == inflection_report.get("comparative", "").lower() or \
+                    word_lower == inflection_report.get("superlative", "").lower():
+                     is_valid = True
+                     log(f"[DEBUG] HanTa: KEEPING entry '{lemma}' ({pos_group}) because input word matches adj comparison form.")
+            if not is_valid and pos_group == "noun":
+                 # Check noun forms
+                 if word_lower == inflection_report.get("singular", "").lower() or \
+                    word_lower == inflection_report.get("plural", "").lower():
+                     is_valid = True
+                     log(f"[DEBUG] HanTa: KEEPING entry '{lemma}' ({pos_group}) because input word matches noun singular/plural.")
+            if not is_valid and pos_group == "adverb":
+                is_valid = True # Adverbs are non-inflecting, always keep.
+            if is_valid:
+                if pos_group not in final_result["analysis"]:
+                    final_result["analysis"][pos_group] = []
+                final_result["analysis"][pos_group].append(pos_entry_report)
+            else:
+                 log(f"[DEBUG] HanTa: DROPPING entry '{lemma}' ({pos_group}) because input word '{word}' was not found in its valid forms.")
+            # --- END OF VALIDATION ---
+        if not final_result["analysis"]:
+            return {} # No results
+        final_result["info"] = "Analysis performed by HanTa-led fallback engine."
+        return final_result
+    except Exception as e:
+        print(f"[Word Encyclopedia] HanTa FALLBACK Engine FAILED: {e}")
+        traceback.print_exc()
+        return {} # Signal failure
+def _analyze_word_with_iwnlp(word: str, top_n_value: Optional[float] = 0) -> Dict[str, Any]:
     """
+    (FALLBACK ENGINE 3) Analyzes a single word using IWNLP + OdeNet + Pattern.
+    This is the full V16/V18 logic, restored and with the new validation filter.
+    Returns {} on failure.
     """
     if not word or not word.strip():
+        return {} # Use empty dict for "info"
     if not IWNLP_AVAILABLE:
+        return {} # Signal failure
     top_n = int(top_n_value) if top_n_value is not None else 0
+    print(f"\n[Word Encyclopedia] Running IWNLP-fallback analysis for: \"{word}\" (top_n={top_n})")
     final_result: Dict[str, Any] = {
         "input_word": word,
         "analysis": {}
     }
+    word_lower = word.lower() # For validation
     # --- Helper: Get OdeNet senses ---
+    def _get_odenet_senses_by_pos_internal(w):
         """
         (Internal helper for IWNLP fallback)
+        OdeNet uses 'a' for BOTH Adjective and Adverb.
         """
         senses_by_pos: Dict[str, List[Dict]] = {
             "noun": [], "verb": [], "adjective": [], "adverb": []
                     "verb": [{"info": "OdeNet unavailable"}],
                     "adjective": [{"info": "OdeNet unavailable"}],
                     "adverb": [{"info": "OdeNet unavailable"}]}
         try:
             all_senses = odenet_get_thesaurus_info(w).get("senses", [])
             for sense in all_senses:
                 if "error" in sense: continue
                 pos_tag = sense.get("pos")
                 if pos_tag == 'n':
                     senses_by_pos["noun"].append(sense)
                 elif pos_tag == 'v':
                     senses_by_pos["verb"].append(sense)
                 elif pos_tag == 'a':
                     log(f"[IWNLP Fallback] Found OdeNet 'a' tag (Adj/Adv) for sense: {sense.get('definition', '...')[:30]}")
                     senses_by_pos["adjective"].append(sense)
                     senses_by_pos["adverb"].append(sense)
         except Exception as e:
             print(f"[Word Encyclopedia] OdeNet check failed: {e}")
         return senses_by_pos
     # --- 1. GET ALL LEMMA CANDIDATES & SPACY POS ---
     try:
         iwnlp = iwnlp_get_pipeline()
         if not iwnlp:
+            return {} # Signal failure
         doc = iwnlp(word)
         token = doc[0]
         spacy_pos = token.pos_ # e.g., "NOUN" for "Lauf", "ADV" for "heute"
         spacy_lemma = token.lemma_
         iwnlp_lemmas_list = token._.iwnlp_lemmas or []
         all_lemmas = set(iwnlp_lemmas_list)
         all_lemmas.add(spacy_lemma)
         all_lemmas.add(word) # Add the word itself
     except Exception as e:
         traceback.print_exc()
+        return {} # Signal failure
     # --- 2. CHECK INFLECTING POSSIBILITIES FOR EACH LEMMA ---
     valid_analyses: Dict[str, Dict[str, Any]] = {}
     for lemma in all_lemmas:
         if not lemma: continue
+        odenet_senses_by_pos = _get_odenet_senses_by_pos_internal(lemma)
         # --- Check NOUN ---
         if 'noun' not in valid_analyses:
             noun_inflections = {}
             is_good_noun = False
             if not PATTERN_DE_AVAILABLE:
                 noun_inflections = {"info": "pattern.de not available."}
                 is_good_noun = True
             if is_good_noun:
                 odenet_senses = odenet_senses_by_pos.get('noun', [])
                 if not odenet_senses and lemma.lower() == word.lower():
+                     odenet_senses = _get_odenet_senses_by_pos_internal(lemma.capitalize()).get('noun', [])
                 if odenet_senses:
+                    if "info" not in odenet_senses[0] or not WN_AVAILABLE:
                         log(f"  ✓ [IWNLP Fallback] Valid NOUN found: {lemma}")
                         valid_analyses['noun'] = {
                             "lemma": noun_inflections.get("base_form", lemma),
                             "inflections": noun_inflections,
+                            "odenet_senses": [] if "info" in odenet_senses[0] else odenet_senses
                         }
         # --- Check VERB ---
         if 'verb' not in valid_analyses:
             verb_inflections = {}
             is_good_verb = False
             if not PATTERN_DE_AVAILABLE:
                 verb_inflections = {"info": "pattern.de not available."}
                 is_good_verb = True
             if is_good_verb:
                 odenet_senses = odenet_senses_by_pos.get('verb', [])
                 if odenet_senses:
+                    if "info" not in odenet_senses[0] or not WN_AVAILABLE:
                         log(f"  ✓ [IWNLP Fallback] Valid VERB found: {lemma}")
                         valid_analyses['verb'] = {
                             "lemma": verb_inflections.get("infinitive", lemma),
                             "inflections": verb_inflections,
+                            "odenet_senses": [] if "info" in odenet_senses[0] else odenet_senses
                         }
         # --- Check ADJECTIVE ---
         if 'adjective' not in valid_analyses:
             adj_inflections = {}
             is_good_adj = False
             if not PATTERN_DE_AVAILABLE:
                 adj_inflections = {"info": "pattern.de not available."}
                 is_good_adj = True
             if is_good_adj:
                 odenet_senses = odenet_senses_by_pos.get('adjective', [])
                 if odenet_senses:
+                    if "info" not in odenet_senses[0] or not WN_AVAILABLE:
                         log(f"  ✓ [IWNLP Fallback] Valid ADJECTIVE found: {lemma}")
                         valid_analyses['adjective'] = {
                             "lemma": adj_inflections.get("predicative", lemma),
                             "inflections": adj_inflections,
+                            "odenet_senses": [] if "info" in odenet_senses[0] else odenet_senses
                         }
     # --- 3. CHECK NON-INFLECTING POS (ADVERB) ---
     if spacy_pos == "ADV":
+        odenet_senses = _get_odenet_senses_by_pos_internal(word).get('adverb', [])
         if odenet_senses:
+            if "info" not in odenet_senses[0] or not WN_AVAILABLE:
                 log(f"  ✓ [IWNLP Fallback] Valid ADVERB found: {word}")
                 valid_analyses['adverb'] = {
                     "lemma": word,
                     "inflections": {"base_form": word},
+                    "odenet_senses": [] if "info" in odenet_senses[0] else odenet_senses
                 }
     # --- 4. CHECK OTHER FUNCTION WORDS (e.g. "mein" -> DET) ---
     FUNCTION_POS = {"DET", "PRON", "ADP", "AUX", "CCONJ", "SCONJ", "PART", "PUNCT", "SYM"}
     if spacy_pos in FUNCTION_POS and not valid_analyses:
         pos_key = spacy_pos.lower()
         valid_analyses[pos_key] = {
             "lemma": spacy_lemma,
             "inflections": {"base_form": spacy_lemma},
+            "odenet_senses": [],
+            "spacy_analysis": {
                 "word": token.text, "lemma": token.lemma_,
                 "pos_UPOS": token.pos_, "pos_TAG": token.tag_,
                 "morphology": str(token.morph)
             }
         }
+    # --- 5. BUILD FINAL REPORT (V21 MODIFIED + VALIDATION) ---
     for pos_key, analysis_data in valid_analyses.items():
         lemma = analysis_data["lemma"]
+        inflection_block = analysis_data["inflections"]
+        # --- E. VALIDATION FILTER ---
+        is_valid = False
+        if lemma.lower() == word_lower:
+            is_valid = True
+            log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because input word matches lemma.")
+        if not is_valid:
+            # Check pattern.de's lexeme (for verbs)
+            for form in inflection_block.get("lexeme", []):
+                if form.lower() == word_lower:
+                    is_valid = True
+                    log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because input word found in pattern.de lexeme.")
+                    break
+        if not is_valid:
+            # Check pattern.de's participles (for "abgeschnitten")
+            for part_form in inflection_block.get("participles", {}).values():
+                if part_form.lower() == word_lower:
+                    is_valid = True
+                    log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because input word found in pattern.de participles.")
+                    break
+        if not is_valid and pos_key == "adjective":
+             # Check adjective forms
+             if word_lower == inflection_block.get("predicative", "").lower() or \
+                word_lower == inflection_block.get("comparative", "").lower() or \
+                word_lower == inflection_block.get("superlative", "").lower():
+                 is_valid = True
+                 log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because input word matches adj comparison form.")
+        if not is_valid and pos_key == "noun":
+             # Check noun forms
+             if word_lower == inflection_block.get("singular", "").lower() or \
+                word_lower == inflection_block.get("plural", "").lower():
+                 is_valid = True
+                 log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because input word matches noun singular/plural.")
+        if not is_valid and (pos_key == "adverb" or "spacy_analysis" in analysis_data):
+            is_valid = True # Adverbs and Function Words are non-inflecting, always keep.
+            log(f"[DEBUG] IWNLP: KEEPING entry '{lemma}' ({pos_key}) because it is a non-inflecting word (ADV/FUNC).")
+        if is_valid:
+            pos_report = {
+                "inflections_pattern": inflection_block,
+                # Use the new global helper
+                "semantics_combined": _build_semantics_block_for_lemma(
+                    lemma,
+                    pos_key,
+                    top_n
+                )
+            }
+            if "spacy_analysis" in analysis_data:
+                 pos_report["spacy_analysis"] = analysis_data["spacy_analysis"]
+            if pos_key not in final_result["analysis"]:
+                 final_result["analysis"][pos_key] = []
+            final_result["analysis"][pos_key].append(pos_report)
+        else:
+             log(f"[DEBUG] IWNLP: DROPPING entry '{lemma}' ({pos_key}) because input word '{word}' was not found in its valid forms.")
+        # --- END VALIDATION ---
     if not final_result["analysis"]:
        return {} # No results