cstr commited on
Commit
d69777a
Β·
verified Β·
1 Parent(s): 3613031

drop results path if queried word not even in inflections data

Browse files
Files changed (1) hide show
  1. app.py +256 -94
app.py CHANGED
@@ -1837,20 +1837,32 @@ def _wiktionary_find_all_entries(word: str, conn: sqlite3.Connection) -> List[Di
1837
 
1838
  def _wiktionary_format_semantics_block(
1839
  wikt_report: Dict[str, Any],
 
1840
  top_n: int
1841
  ) -> Dict[str, Any]:
1842
  """
1843
  Combines Wiktionary senses with OdeNet/ConceptNet senses,
1844
- using the ground-truth lemma from THIS Wiktionary report.
1845
  """
1846
 
1847
  # --- THIS IS THE FIX ---
1848
- # The semantic lemma IS the lemma from this specific wiktionary entry.
1849
- # e.g., for the "heuen" entry, this will be "heuen".
1850
- # e.g., for the "heute" entry, this will be "heute".
1851
- semantic_lemma = wikt_report.get("lemma", "")
1852
  pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
1853
- log(f"Wiktionary Semantics: Building block for lemma='{semantic_lemma}', pos='{pos_key}'")
 
 
 
 
 
 
 
 
 
 
 
 
 
1854
  # --- END OF FIX ---
1855
 
1856
  # 1. Get Wiktionary senses (from the original report)
@@ -1872,7 +1884,7 @@ def _wiktionary_format_semantics_block(
1872
  if odenet_senses_raw and "info" not in odenet_senses_raw[0]:
1873
  odenet_senses = odenet_senses_raw
1874
  except Exception as e:
1875
- log(f"OdeNet lookup failed for {semantic_lemma} ({pos_key}): {e}")
1876
 
1877
  # 3. Get ConceptNet relations for the *semantic_lemma*
1878
  conceptnet_relations = []
@@ -1922,17 +1934,24 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
1922
  if iwnlp:
1923
  doc = iwnlp(word)
1924
  token = doc[0]
1925
- spacy_pos_hint = token.pos_.lower()
 
 
 
 
 
 
 
1926
  spacy_lemma_hint = token.lemma_
1927
- log(f"Wiktionary Priority Hint: spaCy POS is '{spacy_pos_hint}', lemma is '{spacy_lemma_hint}'")
1928
  except Exception as e:
1929
- log(f"Wiktionary Priority Hint: spaCy/IWNLP failed: {e}")
1930
 
1931
  # --- 2. FIND ALL WIKTIONARY ENTRIES ---
1932
  try:
1933
  wiktionary_reports = _wiktionary_find_all_entries(word, conn)
1934
  except Exception as e:
1935
- log(f"Wiktionary query failed: {e}")
1936
  return {} # Signal failure
1937
 
1938
  if not wiktionary_reports:
@@ -1945,11 +1964,11 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
1945
 
1946
  # Priority 1: Exact POS match with spaCy hint
1947
  if spacy_pos_hint and wikt_pos == spacy_pos_hint:
1948
- # Bonus if lemma also matches
1949
- if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint:
1950
- return 1
1951
- return 2
1952
-
1953
  # Priority 2: Input word is the lemma (e.g., "Haus" -> "Haus")
1954
  if wikt_lemma.lower() == word.lower():
1955
  return 3
@@ -1958,59 +1977,96 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
1958
  return 4
1959
 
1960
  wiktionary_reports.sort(key=get_priority_score)
1961
- log(f"Wiktionary: Sorted entries: {[r.get('lemma') + ' (' + r.get('pos') + ')' for r in wiktionary_reports]}")
1962
 
1963
 
1964
- # --- 4. BUILD THE FINAL REPORT (PATH-PURE) ---
 
 
1965
  for wikt_report in wiktionary_reports:
1966
  pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
1967
  lemma = wikt_report.get("lemma", word)
 
1968
 
1969
- # --- A. Build Semantics Block (USING WIKT LEMMA) ---
1970
- # This is the pure path. "heuen" entry looks up "heuen". "heute" entry looks up "heute".
1971
- semantics_block = _wiktionary_format_semantics_block(wikt_report, top_n)
1972
-
1973
- # --- B. Build Wiktionary Inflection Block ---
1974
  inflections_wikt_block = {
1975
  "base_form": lemma,
1976
  "forms_list": wikt_report.get("forms", []),
1977
  "source": "wiktionary"
1978
  }
1979
 
1980
- # --- C. Build Pattern Inflection Block (for comparison) ---
1981
  pattern_block = {}
1982
  if PATTERN_DE_AVAILABLE:
1983
  try:
1984
- if pos_key == "noun":
1985
  pattern_block = pattern_analyze_as_noun(lemma)
1986
- elif pos_key == "verb":
1987
- pattern_block = pattern_analyze_as_verb(lemma)
1988
- elif pos_key == "adjective":
1989
- pattern_block = pattern_analyze_as_adjective(lemma)
 
 
 
 
 
 
 
 
1990
  elif pos_key == "adverb":
1991
  pattern_block = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
1992
  except Exception as e:
1993
  pattern_block = {"error": f"Pattern.de analysis for {pos_key}('{lemma}') failed: {e}"}
1994
 
1995
- # --- D. Assemble the final report for this entry ---
 
 
 
1996
  pos_entry_report = {
1997
  "inflections_wiktionary": inflections_wikt_block,
1998
  "inflections_pattern": pattern_block,
1999
  "semantics_combined": semantics_block,
2000
  "wiktionary_metadata": {
2001
- "pos_title": wikt_report.get("pos_title"),
2002
  "pronunciation": wikt_report.get("sounds"),
2003
  "examples": wikt_report.get("examples")
2004
  }
2005
  }
2006
 
2007
- # Append to the list for this POS key
2008
- if pos_key not in final_result["analysis"]:
2009
- final_result["analysis"][pos_key] = []
 
 
 
 
 
2010
 
2011
- final_result["analysis"][pos_key].append(pos_entry_report)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2012
 
2013
- final_result["info"] = f"Analysis from Wiktionary (Primary Engine). Found {len(wiktionary_reports)} matching entry/entries."
2014
  return final_result
2015
 
2016
  # ============================================================================
@@ -2708,58 +2764,57 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
2708
  # --- 7b. NEW: Word Encyclopedia (Non-Contextual) Analyzer ---
2709
 
2710
  # --- THIS IS THE NEW PUBLIC DISPATCHER FUNCTION ---
2711
- def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0) -> Dict[str, Any]:
2712
  """
2713
- (PUBLIC DISPATCHER V19) Analyzes a single word for all possible forms.
2714
 
2715
- This function intelligently selects the best available engine in order:
2716
- 1. PRIMARY: Wiktionary DB (Accurate, pre-compiled data)
2717
- 2. FALLBACK 1: HanTa-led engine (Good heuristics)
2718
- 3. FALLBACK 2: IWNLP-led engine (Different heuristics)
2719
  """
2720
  if not word or not word.strip():
2721
  return {"info": "Please enter a word."}
2722
 
2723
  word = word.strip()
2724
  top_n = int(top_n_value) if top_n_value is not None else 0
 
2725
 
2726
- # --- 1. Try PRIMARY Engine: Wiktionary ---
2727
- if WIKTIONARY_AVAILABLE:
2728
- wikt_result = _analyze_word_with_wiktionary(word, top_n)
2729
- if wikt_result and wikt_result.get("analysis"):
2730
- log("V19 Dispatcher: Returning Wiktionary result.")
2731
- return wikt_result
2732
- elif WIKTIONARY_AVAILABLE:
2733
- log("V19 Dispatcher: Wiktionary is available but found no results.")
2734
- else:
2735
- log("V19 Dispatcher: Wiktionary failed to initialize, falling back.")
2736
 
2737
- # --- 2. Try FALLBACK 1: HanTa ---
2738
- if HANTA_AVAILABLE:
2739
- hanta_result = _analyze_word_with_hanta(word, top_n)
2740
- if hanta_result and hanta_result.get("analysis"):
2741
- log("V19 Dispatcher: Wiktionary failed, returning HanTa result.")
2742
- return hanta_result
 
 
 
 
 
 
 
 
 
 
2743
  else:
2744
- log("V19 Dispatcher: HanTa fallback found no results.")
2745
 
2746
- # --- 3. Try FALLBACK 2: IWNLP ---
2747
- if IWNLP_AVAILABLE:
2748
- iwnlp_result = _analyze_word_with_iwnlp(word, top_n)
2749
- if iwnlp_result and iwnlp_result.get("analysis"):
2750
- log("V19 Dispatcher: HanTa failed, returning IWNLP result.")
2751
- return iwnlp_result
2752
- else:
2753
- log("V19 Dispatcher: IWNLP fallback found no results.")
2754
 
2755
- # --- No engines available or no results ---
2756
- log("--- Dispatcher: No valid analysis engines found or no results. ---")
2757
- return {
2758
- "input_word": word,
2759
- "error": "No analysis found for this word.",
2760
- "info": "The word was not found in Wiktionary, and no fallback "
2761
- "engines (HanTa, IWNLP) could produce a valid analysis."
2762
- }
2763
 
2764
 
2765
  # ============================================================================
@@ -2955,40 +3010,136 @@ def create_combined_tab():
2955
  def create_word_encyclopedia_tab():
2956
  """--- NEW: Creates the UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
2957
  gr.Markdown("# πŸ“– Word Encyclopedia (Non-Contextual)")
2958
- gr.Markdown("This tool analyzes a **single word** for *all possible* grammatical and semantic forms. It's ideal for enriching word lists. It finds ambiguities (e.g., 'Lauf' as noun and verb) and groups all data by Part-of-Speech.")
 
2959
  with gr.Column():
2960
  word_input = gr.Textbox(
2961
  label="Single German Word",
2962
- placeholder="e.g., Lauf, See, schnell"
2963
- )
2964
- top_n_number = gr.Number(
2965
- label="Limit Semantic Senses per POS (0 for all)",
2966
- value=0,
2967
- step=1,
2968
- minimum=0,
2969
- interactive=True
2970
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2971
  analyze_button = gr.Button("Analyze Word", variant="primary")
2972
 
2973
  output = gr.JSON(label="Word Encyclopedia Analysis (JSON)")
2974
 
 
2975
  analyze_button.click(
2976
  fn=analyze_word_encyclopedia,
2977
- inputs=[word_input, top_n_number],
 
2978
  outputs=[output],
2979
  api_name="analyze_word"
2980
  )
2981
 
 
2982
  gr.Examples(
2983
- [["Lauf", 3],
2984
- ["See", 0],
2985
- ["schnell", 3],
2986
- ["Hund", 5]],
2987
- inputs=[word_input, top_n_number],
 
2988
  outputs=[output],
2989
  fn=analyze_word_encyclopedia
2990
  )
2991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2992
  # --- Main UI Builder ---
2993
  def create_consolidated_interface():
2994
  """Builds the final Gradio app with all tabs."""
@@ -2997,7 +3148,7 @@ def create_consolidated_interface():
2997
  gr.Markdown("A suite of advanced tools for German linguistics, providing both contextual and non-contextual analysis.")
2998
 
2999
  with gr.Tabs():
3000
- # --- NEW "Word Encyclopedia" TAB ---
3001
  with gr.Tab("πŸ“– Word Encyclopedia (DE)"):
3002
  create_word_encyclopedia_tab()
3003
 
@@ -3009,14 +3160,25 @@ def create_consolidated_interface():
3009
 
3010
  with gr.Tab("βœ… Grammar Check (DE)"):
3011
  create_languagetool_tab()
 
 
 
 
3012
 
3013
- with gr.Tab("πŸ“š Inflections (DE)"):
 
 
 
 
 
 
 
3014
  create_pattern_tab()
3015
 
3016
- with gr.Tab("πŸ“– Thesaurus (DE)"):
3017
  create_odenet_tab()
3018
 
3019
- with gr.Tab("🌐 ConceptNet (Direct)"):
3020
  create_conceptnet_tab()
3021
 
3022
  return demo
 
1837
 
1838
  def _wiktionary_format_semantics_block(
1839
  wikt_report: Dict[str, Any],
1840
+ pattern_block: Dict[str, Any],
1841
  top_n: int
1842
  ) -> Dict[str, Any]:
1843
  """
1844
  Combines Wiktionary senses with OdeNet/ConceptNet senses,
1845
+ using the CORRECT lemma from the pattern.de analysis block.
1846
  """
1847
 
1848
  # --- THIS IS THE FIX ---
1849
+ # Determine the true lemma from the pattern.de block, as it's more reliable
1850
+ # for semantic lookup than the wiktionary lemma (which could be an inflected form).
 
 
1851
  pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
1852
+
1853
+ semantic_lemma = ""
1854
+ if pos_key == "verb":
1855
+ semantic_lemma = pattern_block.get("infinitive")
1856
+ elif pos_key == "noun":
1857
+ semantic_lemma = pattern_block.get("base_form")
1858
+ elif pos_key == "adjective":
1859
+ semantic_lemma = pattern_block.get("predicative")
1860
+
1861
+ # Fallback if pattern.de fails or it's a non-inflecting POS
1862
+ if not semantic_lemma:
1863
+ semantic_lemma = wikt_report.get("lemma", "")
1864
+
1865
+ log(f"[DEBUG] Wiktionary Semantics: Building block for lemma='{semantic_lemma}', pos='{pos_key}'")
1866
  # --- END OF FIX ---
1867
 
1868
  # 1. Get Wiktionary senses (from the original report)
 
1884
  if odenet_senses_raw and "info" not in odenet_senses_raw[0]:
1885
  odenet_senses = odenet_senses_raw
1886
  except Exception as e:
1887
+ log(f"[DEBUG] OdeNet lookup failed for {semantic_lemma} ({pos_key}): {e}")
1888
 
1889
  # 3. Get ConceptNet relations for the *semantic_lemma*
1890
  conceptnet_relations = []
 
1934
  if iwnlp:
1935
  doc = iwnlp(word)
1936
  token = doc[0]
1937
+ # Map spaCy POS to our internal keys
1938
+ spacy_pos_raw = token.pos_.lower()
1939
+ if spacy_pos_raw == "adj": spacy_pos_hint = "adjective"
1940
+ elif spacy_pos_raw == "adv": spacy_pos_hint = "adverb"
1941
+ elif spacy_pos_raw == "verb": spacy_pos_hint = "verb"
1942
+ elif spacy_pos_raw == "noun": spacy_pos_hint = "noun"
1943
+ else: spacy_pos_hint = spacy_pos_raw
1944
+
1945
  spacy_lemma_hint = token.lemma_
1946
+ log(f"[DEBUG] Wiktionary Priority Hint: spaCy POS is '{spacy_pos_hint}', lemma is '{spacy_lemma_hint}'")
1947
  except Exception as e:
1948
+ log(f"[DEBUG] Wiktionary Priority Hint: spaCy/IWNLP failed: {e}")
1949
 
1950
  # --- 2. FIND ALL WIKTIONARY ENTRIES ---
1951
  try:
1952
  wiktionary_reports = _wiktionary_find_all_entries(word, conn)
1953
  except Exception as e:
1954
+ log(f"[DEBUG] Wiktionary query failed: {e}")
1955
  return {} # Signal failure
1956
 
1957
  if not wiktionary_reports:
 
1964
 
1965
  # Priority 1: Exact POS match with spaCy hint
1966
  if spacy_pos_hint and wikt_pos == spacy_pos_hint:
1967
+ # Bonus if lemma also matches
1968
+ if spacy_lemma_hint and wikt_lemma == spacy_lemma_hint:
1969
+ return 1
1970
+ return 2
1971
+
1972
  # Priority 2: Input word is the lemma (e.g., "Haus" -> "Haus")
1973
  if wikt_lemma.lower() == word.lower():
1974
  return 3
 
1977
  return 4
1978
 
1979
  wiktionary_reports.sort(key=get_priority_score)
1980
+ log(f"[DEBUG] Wiktionary: Sorted entries: {[r.get('lemma') + ' (' + r.get('pos') + ')' for r in wiktionary_reports]}")
1981
 
1982
 
1983
+ # --- 4. BUILD AND *VALIDATE* THE FINAL REPORT (PATH-PURE) ---
1984
+ word_lower = word.lower()
1985
+
1986
  for wikt_report in wiktionary_reports:
1987
  pos_key = _wiktionary_map_pos_key(wikt_report.get("pos"))
1988
  lemma = wikt_report.get("lemma", word)
1989
+ pos_title = wikt_report.get("pos_title", "")
1990
 
1991
+ # --- A. Build Wiktionary Inflection Block ---
 
 
 
 
1992
  inflections_wikt_block = {
1993
  "base_form": lemma,
1994
  "forms_list": wikt_report.get("forms", []),
1995
  "source": "wiktionary"
1996
  }
1997
 
1998
+ # --- B. Build Pattern Inflection Block (CRITICAL for finding true lemma) ---
1999
  pattern_block = {}
2000
  if PATTERN_DE_AVAILABLE:
2001
  try:
2002
+ if pos_key == "noun" or "Substantiv" in pos_title:
2003
  pattern_block = pattern_analyze_as_noun(lemma)
2004
+ elif pos_key == "verb" or "Verb" in pos_title or "Konjugierte Form" in pos_title:
2005
+ # Use the *input word* for inflected forms to find the right lemma
2006
+ if "Konjugierte Form" in pos_title:
2007
+ pattern_block = pattern_analyze_as_verb(word)
2008
+ else:
2009
+ pattern_block = pattern_analyze_as_verb(lemma)
2010
+ elif pos_key == "adjective" or "Adjektiv" in pos_title or "Deklinierte Form" in pos_title:
2011
+ # Use the *input word* for inflected forms
2012
+ if "Deklinierte Form" in pos_title:
2013
+ pattern_block = pattern_analyze_as_adjective(word)
2014
+ else:
2015
+ pattern_block = pattern_analyze_as_adjective(lemma)
2016
  elif pos_key == "adverb":
2017
  pattern_block = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
2018
  except Exception as e:
2019
  pattern_block = {"error": f"Pattern.de analysis for {pos_key}('{lemma}') failed: {e}"}
2020
 
2021
+ # --- C. Build Semantics Block (using correct lemma from pattern_block) ---
2022
+ semantics_block = _wiktionary_format_semantics_block(wikt_report, pattern_block, top_n)
2023
+
2024
+ # --- D. Assemble the report (pre-validation) ---
2025
  pos_entry_report = {
2026
  "inflections_wiktionary": inflections_wikt_block,
2027
  "inflections_pattern": pattern_block,
2028
  "semantics_combined": semantics_block,
2029
  "wiktionary_metadata": {
2030
+ "pos_title": pos_title,
2031
  "pronunciation": wikt_report.get("sounds"),
2032
  "examples": wikt_report.get("examples")
2033
  }
2034
  }
2035
 
2036
+ # --- E. *** YOUR NEW VALIDATION FILTER (Corrected) *** ---
2037
+ is_valid = False
2038
+ is_inflected_entry = "Konjugierte Form" in pos_title or "Deklinierte Form" in pos_title
2039
+
2040
+ # Check 1: Is the input word the lemma OF A BASE FORM entry?
2041
+ if not is_inflected_entry and lemma.lower() == word_lower:
2042
+ is_valid = True
2043
+ log(f"[DEBUG] Wiktionary: KEEPING entry '{lemma}' ({pos_key}) because input word matches lemma of a base entry.")
2044
 
2045
+ # Check 2: Is the input word in the *bare* forms list?
2046
+ # (This is the only check that should apply to inflected entries)
2047
+ if not is_valid:
2048
+ for form_entry in inflections_wikt_block.get("forms_list", []):
2049
+ form_text = form_entry.get("form_text", "")
2050
+ bare_form = re.sub(r"\(.*\)", "", form_text).strip()
2051
+ bare_form = re.sub(r"^(der|die|das|ein|eine|am)\s+", "", bare_form, flags=re.IGNORECASE).strip()
2052
+ bare_form = bare_form.rstrip("!.")
2053
+
2054
+ if bare_form.lower() == word_lower:
2055
+ is_valid = True
2056
+ log(f"[DEBUG] Wiktionary: KEEPING entry '{lemma}' ({pos_key}) because input word found in form: '{form_text}'")
2057
+ break
2058
+
2059
+ # --- F. Add to final result if valid ---
2060
+ if is_valid:
2061
+ if pos_key not in final_result["analysis"]:
2062
+ final_result["analysis"][pos_key] = []
2063
+ final_result["analysis"][pos_key].append(pos_entry_report)
2064
+ else:
2065
+ log(f"[DEBUG] Wiktionary: DROPPING entry '{lemma}' ({pos_key}, {pos_title}) because input word '{word}' was not found in its valid forms.")
2066
+
2067
+ # --- END OF VALIDATION ---
2068
 
2069
+ final_result["info"] = f"Analysis from Wiktionary (Primary Engine). Found {len(wiktionary_reports)} matching entries, kept {sum(len(v) for v in final_result.get('analysis', {}).values())}."
2070
  return final_result
2071
 
2072
  # ============================================================================
 
2764
  # --- 7b. NEW: Word Encyclopedia (Non-Contextual) Analyzer ---
2765
 
2766
  # --- THIS IS THE NEW PUBLIC DISPATCHER FUNCTION ---
2767
+ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engine_choice: str = "wiktionary") -> Dict[str, Any]:
2768
  """
2769
+ (PUBLIC DISPATCHER V20) Analyzes a single word using the selected engine.
2770
 
2771
+ The user can now choose which engine to run.
 
 
 
2772
  """
2773
  if not word or not word.strip():
2774
  return {"info": "Please enter a word."}
2775
 
2776
  word = word.strip()
2777
  top_n = int(top_n_value) if top_n_value is not None else 0
2778
+ result = {}
2779
 
2780
+ log(f"\n[Word Encyclopedia] User selected engine: '{engine_choice}' for word: '{word}'")
 
 
 
 
 
 
 
 
 
2781
 
2782
+ try:
2783
+ if engine_choice == "wiktionary":
2784
+ result = _analyze_word_with_wiktionary(word, top_n)
2785
+ if not result or not result.get("analysis"):
2786
+ result["info"] = f"Wiktionary (Primary Engine) found no results for '{word}'. You can try a fallback engine."
2787
+
2788
+ elif engine_choice == "hanta":
2789
+ result = _analyze_word_with_hanta(word, top_n)
2790
+ if not result or not result.get("analysis"):
2791
+ result["info"] = f"HanTa (Fallback 1) found no results for '{word}'."
2792
+
2793
+ elif engine_choice == "iwnlp":
2794
+ result = _analyze_word_with_iwnlp(word, top_n)
2795
+ if not result or not result.get("analysis"):
2796
+ result["info"] = f"IWNLP (Fallback 2) found no results for '{word}'."
2797
+
2798
  else:
2799
+ result = {"error": f"Unknown engine choice: {engine_choice}"}
2800
 
2801
+ except Exception as e:
2802
+ log(f"--- Dispatcher FAILED for engine {engine_choice}: {e} ---")
2803
+ traceback.print_exc()
2804
+ return {
2805
+ "input_word": word,
2806
+ "error": f"The '{engine_choice}' engine failed during analysis.",
2807
+ "traceback": traceback.format_exc()
2808
+ }
2809
 
2810
+ # If the engine ran but found nothing, return a clear info message
2811
+ if not result.get("analysis"):
2812
+ return {
2813
+ "input_word": word,
2814
+ "info": result.get("info", f"The selected engine '{engine_choice}' found no valid analysis for this word.")
2815
+ }
2816
+
2817
+ return result
2818
 
2819
 
2820
  # ============================================================================
 
3010
  def create_word_encyclopedia_tab():
3011
  """--- NEW: Creates the UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
3012
  gr.Markdown("# πŸ“– Word Encyclopedia (Non-Contextual)")
3013
+ gr.Markdown("This tool analyzes a **single word** for *all possible* grammatical and semantic forms. It finds ambiguities (e.g., 'Lauf' as noun and verb) and groups all data by Part-of-Speech.")
3014
+
3015
  with gr.Column():
3016
  word_input = gr.Textbox(
3017
  label="Single German Word",
3018
+ placeholder="e.g., Lauf, See, schnell, heute"
 
 
 
 
 
 
 
3019
  )
3020
+
3021
+ with gr.Row():
3022
+ top_n_number = gr.Number(
3023
+ label="Limit Semantic Senses per POS (0 for all)",
3024
+ value=0,
3025
+ step=1,
3026
+ minimum=0,
3027
+ interactive=True
3028
+ )
3029
+
3030
+ # --- THIS IS THE NEW UI ELEMENT ---
3031
+ engine_radio = gr.Radio(
3032
+ label="Select Analysis Engine",
3033
+ choices=[
3034
+ ("Wiktionary (Default)", "wiktionary"),
3035
+ ("HanTa (Fallback 1)", "hanta"),
3036
+ ("IWNLP (Fallback 2)", "iwnlp")
3037
+ ],
3038
+ value="wiktionary",
3039
+ interactive=True
3040
+ )
3041
+ # --- END OF NEW UI ELEMENT ---
3042
+
3043
  analyze_button = gr.Button("Analyze Word", variant="primary")
3044
 
3045
  output = gr.JSON(label="Word Encyclopedia Analysis (JSON)")
3046
 
3047
+ # --- UPDATE THE CLICK FUNCTION ---
3048
  analyze_button.click(
3049
  fn=analyze_word_encyclopedia,
3050
+ # Add 'engine_radio' to the inputs
3051
+ inputs=[word_input, top_n_number, engine_radio],
3052
  outputs=[output],
3053
  api_name="analyze_word"
3054
  )
3055
 
3056
+ # Update the examples to include the radio button
3057
  gr.Examples(
3058
+ [["Lauf", 3, "wiktionary"],
3059
+ ["See", 0, "wiktionary"],
3060
+ ["schnell", 3, "wiktionary"],
3061
+ ["heute", 0, "wiktionary"],
3062
+ ["heute", 0, "hanta"]], # Example to show a different engine
3063
+ inputs=[word_input, top_n_number, engine_radio],
3064
  outputs=[output],
3065
  fn=analyze_word_encyclopedia
3066
  )
3067
 
3068
+ def create_wiktionary_tab():
3069
+ """Creates the UI for the standalone Wiktionary lookup tab."""
3070
+ gr.Markdown("# πŸ“™ Wiktionary Lookup (Raw Engine)")
3071
+ gr.Markdown("Directly query the Wiktionary (Primary) engine. This shows the raw, combined data from the database, Pattern.de, and semantic sources.")
3072
+ with gr.Column():
3073
+ word_input = gr.Textbox(
3074
+ label="Single German Word",
3075
+ placeholder="e.g., Haus, gehe, heute"
3076
+ )
3077
+ analyze_button = gr.Button("Lookup Word in Wiktionary", variant="primary")
3078
+
3079
+ output = gr.JSON(label="Wiktionary Engine Analysis (JSON)")
3080
+
3081
+ # Call the internal engine function directly, hardcoding top_n=0
3082
+ analyze_button.click(
3083
+ fn=lambda word: _analyze_word_with_wiktionary(word, 0),
3084
+ inputs=[word_input],
3085
+ outputs=[output],
3086
+ api_name="wiktionary_lookup"
3087
+ )
3088
+ gr.Examples(
3089
+ [["Haus"], ["gehe"], ["heute"], ["Lauf"]],
3090
+ inputs=[word_input], outputs=[output], fn=lambda word: _analyze_word_with_wiktionary(word, 0)
3091
+ )
3092
+
3093
+ def create_hanta_tab():
3094
+ """Creates the UI for the standalone HanTa Engine tab."""
3095
+ gr.Markdown("# πŸ€– HanTa Lookup (Raw Engine)")
3096
+ gr.Markdown("Directly query the HanTa (Fallback 1) engine. This shows the raw, combined data from HanTa, Pattern.de, and semantic sources.")
3097
+ with gr.Column():
3098
+ word_input = gr.Textbox(
3099
+ label="Single German Word",
3100
+ placeholder="e.g., Haus, gehe, heute"
3101
+ )
3102
+ analyze_button = gr.Button("Lookup Word with HanTa", variant="primary")
3103
+
3104
+ output = gr.JSON(label="HanTa Engine Analysis (JSON)")
3105
+
3106
+ # Call the internal engine function directly, hardcoding top_n=0
3107
+ analyze_button.click(
3108
+ fn=lambda word: _analyze_word_with_hanta(word, 0),
3109
+ inputs=[word_input],
3110
+ outputs=[output],
3111
+ api_name="hanta_lookup"
3112
+ )
3113
+ gr.Examples(
3114
+ [["Haus"], ["gehe"], ["heute"], ["Lauf"]],
3115
+ inputs=[word_input], outputs=[output], fn=lambda word: _analyze_word_with_hanta(word, 0)
3116
+ )
3117
+
3118
+ def create_iwnlp_tab():
3119
+ """Creates the UI for the standalone IWNLP Engine tab."""
3120
+ gr.Markdown("# πŸ”¬ IWNLP-spaCy Lookup (Raw Engine)")
3121
+ gr.Markdown("Directly query the IWNLP-spaCy (Fallback 2) engine. This shows the raw, combined data from spaCy, IWNLP, Pattern.de, and semantic sources.")
3122
+ with gr.Column():
3123
+ word_input = gr.Textbox(
3124
+ label="Single German Word",
3125
+ placeholder="e.g., Haus, gehe, heute"
3126
+ )
3127
+ analyze_button = gr.Button("Lookup Word with IWNLP", variant="primary")
3128
+
3129
+ output = gr.JSON(label="IWNLP Engine Analysis (JSON)")
3130
+
3131
+ # Call the internal engine function directly, hardcoding top_n=0
3132
+ analyze_button.click(
3133
+ fn=lambda word: _analyze_word_with_iwnlp(word, 0),
3134
+ inputs=[word_input],
3135
+ outputs=[output],
3136
+ api_name="iwnlp_lookup"
3137
+ )
3138
+ gr.Examples(
3139
+ [["Haus"], ["gehe"], ["heute"], ["Lauf"]],
3140
+ inputs=[word_input], outputs=[output], fn=lambda word: _analyze_word_with_iwnlp(word, 0)
3141
+ )
3142
+
3143
  # --- Main UI Builder ---
3144
  def create_consolidated_interface():
3145
  """Builds the final Gradio app with all tabs."""
 
3148
  gr.Markdown("A suite of advanced tools for German linguistics, providing both contextual and non-contextual analysis.")
3149
 
3150
  with gr.Tabs():
3151
+ # --- Main Tools ---
3152
  with gr.Tab("πŸ“– Word Encyclopedia (DE)"):
3153
  create_word_encyclopedia_tab()
3154
 
 
3160
 
3161
  with gr.Tab("βœ… Grammar Check (DE)"):
3162
  create_languagetool_tab()
3163
+
3164
+ # --- Standalone Engine Tabs (NEW) ---
3165
+ with gr.Tab("πŸ“™ Engine: Wiktionary (DE)"):
3166
+ create_wiktionary_tab()
3167
 
3168
+ with gr.Tab("πŸ€– Engine: HanTa (DE)"):
3169
+ create_hanta_tab()
3170
+
3171
+ with gr.Tab("πŸ”¬ Engine: IWNLP-spaCy (DE)"):
3172
+ create_iwnlp_tab()
3173
+
3174
+ # --- Standalone Component Tabs ---
3175
+ with gr.Tab("πŸ“š Component: Inflections (DE)"):
3176
  create_pattern_tab()
3177
 
3178
+ with gr.Tab("πŸ“– Component: Thesaurus (DE)"):
3179
  create_odenet_tab()
3180
 
3181
+ with gr.Tab("🌐 Component: ConceptNet (Direct)"):
3182
  create_conceptnet_tab()
3183
 
3184
  return demo