Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -150,6 +150,24 @@ except ImportError:
|
|
| 150 |
print("Install with: pip install HanTa")
|
| 151 |
print("="*70)
|
| 152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
# ============================================================================
|
| 154 |
# 2. SHARED GLOBALS & CONFIG
|
| 155 |
# ============================================================================
|
|
@@ -174,6 +192,10 @@ CONCEPTNET_LOCK = threading.Lock()
|
|
| 174 |
HANTA_TAGGER_INSTANCE: Optional[HanoverTagger] = None
|
| 175 |
HANTA_TAGGER_LOCK = threading.Lock()
|
| 176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
# --- Helper ---
|
| 178 |
def _html_wrap(content: str, line_height: str = "2.0") -> str:
|
| 179 |
"""Wraps displaCy HTML in a consistent, scrollable div."""
|
|
@@ -1624,11 +1646,7 @@ def _build_semantics(lemma: str, odenet_senses: List[Dict], top_n: int) -> Dict[
|
|
| 1624 |
}
|
| 1625 |
|
| 1626 |
# ============================================================================
|
| 1627 |
-
# 6d. WIKTIONARY DATABASE LOGIC (
|
| 1628 |
-
# ============================================================================
|
| 1629 |
-
|
| 1630 |
-
# ============================================================================
|
| 1631 |
-
# 6d. WIKTIONARY DATABASE LOGIC (NEW PRIMARY ENGINE)
|
| 1632 |
# ============================================================================
|
| 1633 |
|
| 1634 |
def wiktionary_download_db() -> bool:
|
|
@@ -1913,7 +1931,7 @@ def _wiktionary_format_semantics_block(
|
|
| 1913 |
|
| 1914 |
def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
| 1915 |
"""
|
| 1916 |
-
(
|
| 1917 |
Returns {} on failure to signal dispatcher to fall back.
|
| 1918 |
"""
|
| 1919 |
final_result: Dict[str, Any] = {
|
|
@@ -2033,7 +2051,7 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 2033 |
}
|
| 2034 |
}
|
| 2035 |
|
| 2036 |
-
# --- E.
|
| 2037 |
is_valid = False
|
| 2038 |
is_inflected_entry = "Konjugierte Form" in pos_title or "Deklinierte Form" in pos_title
|
| 2039 |
|
|
@@ -2069,6 +2087,199 @@ def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
|
| 2069 |
final_result["info"] = f"Analysis from Wiktionary (Primary Engine). Found {len(wiktionary_reports)} matching entries, kept {sum(len(v) for v in final_result.get('analysis', {}).values())}."
|
| 2070 |
return final_result
|
| 2071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2072 |
# ============================================================================
|
| 2073 |
# 7. CONSOLIDATED ANALYZER LOGIC
|
| 2074 |
# ============================================================================
|
|
@@ -2079,7 +2290,7 @@ def comprehensive_german_analysis(text: str, top_n_value: Optional[float] = 0) -
|
|
| 2079 |
"""
|
| 2080 |
(CONTEXTUAL) Combines NLP tools for a deep analysis of German text.
|
| 2081 |
|
| 2082 |
-
|
| 2083 |
from `analyze_word_encyclopedia` and combines all senses for ranking.
|
| 2084 |
"""
|
| 2085 |
|
|
@@ -2256,7 +2467,6 @@ def comprehensive_german_analysis(text: str, top_n_value: Optional[float] = 0) -
|
|
| 2256 |
if key.endswith("_senses") and nlp_de:
|
| 2257 |
ranked_senses = []
|
| 2258 |
for sense in semantic_analysis[key]:
|
| 2259 |
-
# ... (your existing re-ranking code) ...
|
| 2260 |
if "error" in sense: continue
|
| 2261 |
definition = sense.get("definition", "")
|
| 2262 |
relevance = 0.0
|
|
@@ -2278,7 +2488,6 @@ def comprehensive_german_analysis(text: str, top_n_value: Optional[float] = 0) -
|
|
| 2278 |
# ConceptNet Relations
|
| 2279 |
if "conceptnet_relations" in semantic_analysis and nlp_de:
|
| 2280 |
ranked_relations = []
|
| 2281 |
-
# ... (your existing re-ranking code) ...
|
| 2282 |
for rel in semantic_analysis["conceptnet_relations"]:
|
| 2283 |
if "error" in rel: continue
|
| 2284 |
text_to_score = rel.get('surface') or rel.get('other_node', '')
|
|
@@ -2400,7 +2609,7 @@ def _analyze_word_with_hanta(word: str, top_n_value: int) -> Dict[str, Any]:
|
|
| 2400 |
inflection_report = {"error": f"pattern.de failed: {e}", "traceback": traceback.format_exc()}
|
| 2401 |
|
| 2402 |
# --- 3d. Build Final Report Block ---
|
| 2403 |
-
final_result["analysis"][pos_group] = {
|
| 2404 |
"hanta_analysis": {
|
| 2405 |
"detected_tags": sorted(list(specific_tags)),
|
| 2406 |
"lemma": lemma,
|
|
@@ -2408,9 +2617,9 @@ def _analyze_word_with_hanta(word: str, top_n_value: int) -> Dict[str, Any]:
|
|
| 2408 |
hanta_tagger.analyze(word.capitalize() if pos_group == 'noun' else word.lower(), taglevel=3)
|
| 2409 |
]
|
| 2410 |
},
|
| 2411 |
-
"
|
| 2412 |
-
"
|
| 2413 |
-
}
|
| 2414 |
|
| 2415 |
if not final_result["analysis"]:
|
| 2416 |
return {
|
|
@@ -2430,7 +2639,6 @@ def _analyze_word_with_hanta(word: str, top_n_value: int) -> Dict[str, Any]:
|
|
| 2430 |
if IWNLP_AVAILABLE:
|
| 2431 |
try:
|
| 2432 |
log("--- Dispatcher: HanTa not found or failed. Attempting IWNLP Fallback Engine ---")
|
| 2433 |
-
# We call your existing V16 function, which we just made robust in Step 2.
|
| 2434 |
result = _analyze_word_with_iwnlp(word, top_n_value)
|
| 2435 |
result["info"] = result.get("info", "") + " (Analysis performed by IWNLP-based fallback engine)"
|
| 2436 |
return result
|
|
@@ -2739,20 +2947,15 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
|
|
| 2739 |
|
| 2740 |
# --- 5. BUILD FINAL REPORT ---
|
| 2741 |
for pos_key, analysis_data in valid_analyses.items():
|
|
|
|
| 2742 |
pos_report = {
|
| 2743 |
"inflections_pattern": analysis_data["inflections"],
|
| 2744 |
-
"semantics_combined":
|
| 2745 |
-
analysis_data["lemma"],
|
| 2746 |
-
analysis_data["odenet_senses"],
|
| 2747 |
-
top_n
|
| 2748 |
-
)
|
| 2749 |
}
|
| 2750 |
-
# Add spaCy analysis if it was included
|
| 2751 |
if "spacy_analysis" in analysis_data:
|
| 2752 |
pos_report["spacy_analysis"] = analysis_data["spacy_analysis"]
|
| 2753 |
|
| 2754 |
-
# Wrap
|
| 2755 |
-
final_result["analysis"][pos_key] = [pos_report] # <--- THE CHANGE
|
| 2756 |
|
| 2757 |
if not final_result["analysis"]:
|
| 2758 |
return {} # No results
|
|
@@ -2761,14 +2964,16 @@ def _analyze_word_with_iwnlp(word: str, top_n_value: int) -> Dict[str, Any]:
|
|
| 2761 |
return final_result
|
| 2762 |
|
| 2763 |
|
| 2764 |
-
# --- 7b.
|
| 2765 |
|
| 2766 |
-
# ---
|
| 2767 |
# --- THIS IS THE NEW PUBLIC DISPATCHER FUNCTION ---
|
| 2768 |
def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engine_choice: str = "wiktionary") -> Dict[str, Any]:
|
| 2769 |
"""
|
| 2770 |
-
(PUBLIC DISPATCHER
|
| 2771 |
as a starting point, then automatically falls back if no results are found.
|
|
|
|
|
|
|
| 2772 |
"""
|
| 2773 |
if not word or not word.strip():
|
| 2774 |
return {"info": "Please enter a word."}
|
|
@@ -2781,37 +2986,41 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
|
|
| 2781 |
log(f"\n[Word Encyclopedia] User selected engine: '{engine_choice}' for word: '{word}'")
|
| 2782 |
|
| 2783 |
try:
|
| 2784 |
-
# --- 1. Try Wiktionary
|
| 2785 |
if engine_choice == "wiktionary":
|
| 2786 |
-
log(f"[DEBUG]
|
| 2787 |
result = _analyze_word_with_wiktionary(word, top_n)
|
| 2788 |
if result and result.get("analysis"):
|
| 2789 |
return result # Success
|
| 2790 |
info_log.append("Wiktionary found no results.")
|
| 2791 |
-
log(f"[DEBUG]
|
| 2792 |
-
|
| 2793 |
-
# --- 2. Try
|
| 2794 |
-
|
| 2795 |
-
|
| 2796 |
-
|
| 2797 |
-
|
| 2798 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2799 |
result = _analyze_word_with_hanta(word, top_n)
|
| 2800 |
if result and result.get("analysis"):
|
| 2801 |
-
result["info"] = f"Analysis from HanTa (Fallback
|
| 2802 |
return result # Success
|
| 2803 |
info_log.append("HanTa found no results.")
|
| 2804 |
-
log(f"[DEBUG]
|
| 2805 |
|
| 2806 |
-
# ---
|
| 2807 |
-
# This block runs if:
|
| 2808 |
-
# a) User selected "iwnlp" OR
|
| 2809 |
-
# b) The previous engines were tried and all failed (result['analysis'] is still empty)
|
| 2810 |
if engine_choice == "iwnlp" or (not result.get("analysis")):
|
| 2811 |
-
log(f"[DEBUG]
|
| 2812 |
result = _analyze_word_with_iwnlp(word, top_n)
|
| 2813 |
if result and result.get("analysis"):
|
| 2814 |
-
result["info"] = f"Analysis from IWNLP (Fallback
|
| 2815 |
return result # Success
|
| 2816 |
info_log.append("IWNLP found no results.")
|
| 2817 |
|
|
@@ -2825,7 +3034,7 @@ def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engin
|
|
| 2825 |
}
|
| 2826 |
|
| 2827 |
# --- No engines found anything ---
|
| 2828 |
-
log(f"[DEBUG]
|
| 2829 |
return {
|
| 2830 |
"input_word": word,
|
| 2831 |
"info": f"No analysis found. All engines failed. ({' '.join(info_log)})"
|
|
@@ -3023,7 +3232,7 @@ def create_combined_tab():
|
|
| 3023 |
)
|
| 3024 |
|
| 3025 |
def create_word_encyclopedia_tab():
|
| 3026 |
-
"""---
|
| 3027 |
gr.Markdown("# 📖 Word Encyclopedia (Non-Contextual)")
|
| 3028 |
gr.Markdown("This tool analyzes a **single word** for *all possible* grammatical and semantic forms. It finds ambiguities (e.g., 'Lauf' as noun and verb) and groups all data by Part-of-Speech.")
|
| 3029 |
|
|
@@ -3042,39 +3251,37 @@ def create_word_encyclopedia_tab():
|
|
| 3042 |
interactive=True
|
| 3043 |
)
|
| 3044 |
|
| 3045 |
-
# ---
|
| 3046 |
engine_radio = gr.Radio(
|
| 3047 |
-
label="Select Analysis Engine",
|
| 3048 |
choices=[
|
| 3049 |
("Wiktionary (Default)", "wiktionary"),
|
| 3050 |
-
("
|
| 3051 |
-
("
|
|
|
|
| 3052 |
],
|
| 3053 |
value="wiktionary",
|
| 3054 |
interactive=True
|
| 3055 |
)
|
| 3056 |
-
# --- END OF
|
| 3057 |
|
| 3058 |
analyze_button = gr.Button("Analyze Word", variant="primary")
|
| 3059 |
|
| 3060 |
output = gr.JSON(label="Word Encyclopedia Analysis (JSON)")
|
| 3061 |
|
| 3062 |
-
# --- UPDATE THE CLICK FUNCTION ---
|
| 3063 |
analyze_button.click(
|
| 3064 |
fn=analyze_word_encyclopedia,
|
| 3065 |
-
# Add 'engine_radio' to the inputs
|
| 3066 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3067 |
outputs=[output],
|
| 3068 |
api_name="analyze_word"
|
| 3069 |
)
|
| 3070 |
|
| 3071 |
-
# Update the examples to include the radio button
|
| 3072 |
gr.Examples(
|
| 3073 |
[["Lauf", 3, "wiktionary"],
|
| 3074 |
["See", 0, "wiktionary"],
|
| 3075 |
["schnell", 3, "wiktionary"],
|
| 3076 |
["heute", 0, "wiktionary"],
|
| 3077 |
-
["
|
| 3078 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3079 |
outputs=[output],
|
| 3080 |
fn=analyze_word_encyclopedia
|
|
@@ -3105,6 +3312,61 @@ def create_wiktionary_tab():
|
|
| 3105 |
inputs=[word_input], outputs=[output], fn=lambda word: _analyze_word_with_wiktionary(word, 0)
|
| 3106 |
)
|
| 3107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3108 |
def create_hanta_tab():
|
| 3109 |
"""Creates the UI for the standalone HanTa Engine tab."""
|
| 3110 |
gr.Markdown("# 🤖 HanTa Lookup (Raw Engine)")
|
|
@@ -3185,6 +3447,9 @@ def create_consolidated_interface():
|
|
| 3185 |
|
| 3186 |
with gr.Tab("🔬 Engine: IWNLP-spaCy (DE)"):
|
| 3187 |
create_iwnlp_tab()
|
|
|
|
|
|
|
|
|
|
| 3188 |
|
| 3189 |
# --- Standalone Component Tabs ---
|
| 3190 |
with gr.Tab("📚 Component: Inflections (DE)"):
|
|
@@ -3237,6 +3502,18 @@ if __name__ == "__main__":
|
|
| 3237 |
print(f"✗ FAILED to initialize Wiktionary: {e}")
|
| 3238 |
print("--- Wiktionary Done ---\n")
|
| 3239 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3240 |
# --- 4. Initialize HanTa Tagger ---
|
| 3241 |
print("--- Initializing HanTa Tagger ---")
|
| 3242 |
if HANTA_AVAILABLE:
|
|
|
|
| 150 |
print("Install with: pip install HanTa")
|
| 151 |
print("="*70)
|
| 152 |
|
| 153 |
+
# --- DWDSmor Import ---
|
| 154 |
+
DWDSMOR_AVAILABLE = False
|
| 155 |
+
DwdsmorLemmatizerClass = object # Dummy definition
|
| 156 |
+
try:
|
| 157 |
+
import dwdsmor
|
| 158 |
+
import dwdsmor.spacy # Test this import
|
| 159 |
+
DWDSMOR_AVAILABLE = True
|
| 160 |
+
print("✓ Successfully imported dwdsmor")
|
| 161 |
+
except ImportError as e:
|
| 162 |
+
DWDSMOR_AVAILABLE = False
|
| 163 |
+
print("="*70)
|
| 164 |
+
print(f"WARNING: `dwdsmor` or a dependency failed to import: {e}")
|
| 165 |
+
print("The DWDSmor engine will not be available.")
|
| 166 |
+
print("On macOS, run: brew install sfst")
|
| 167 |
+
print("On Debian/Ubuntu, run: apt-get install sfst")
|
| 168 |
+
print("Then, run: pip install dwdsmor")
|
| 169 |
+
print("="*70)
|
| 170 |
+
|
| 171 |
# ============================================================================
|
| 172 |
# 2. SHARED GLOBALS & CONFIG
|
| 173 |
# ============================================================================
|
|
|
|
| 192 |
HANTA_TAGGER_INSTANCE: Optional[HanoverTagger] = None
|
| 193 |
HANTA_TAGGER_LOCK = threading.Lock()
|
| 194 |
|
| 195 |
+
# --- DWDSmor Cache & Lock ---
|
| 196 |
+
DWDSMOR_LEMMATIZER: Optional[Any] = None
|
| 197 |
+
DWDSMOR_LEMMATIZER_LOCK = threading.Lock()
|
| 198 |
+
|
| 199 |
# --- Helper ---
|
| 200 |
def _html_wrap(content: str, line_height: str = "2.0") -> str:
|
| 201 |
"""Wraps displaCy HTML in a consistent, scrollable div."""
|
|
|
|
| 1646 |
}
|
| 1647 |
|
| 1648 |
# ============================================================================
|
| 1649 |
+
# 6d. WIKTIONARY DATABASE LOGIC (PRIMARY ENGINE)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1650 |
# ============================================================================
|
| 1651 |
|
| 1652 |
def wiktionary_download_db() -> bool:
|
|
|
|
| 1931 |
|
| 1932 |
def _analyze_word_with_wiktionary(word: str, top_n: int) -> Dict[str, Any]:
|
| 1933 |
"""
|
| 1934 |
+
(PRIMARY ENGINE) Analyzes a word using the Wiktionary DB.
|
| 1935 |
Returns {} on failure to signal dispatcher to fall back.
|
| 1936 |
"""
|
| 1937 |
final_result: Dict[str, Any] = {
|
|
|
|
| 2051 |
}
|
| 2052 |
}
|
| 2053 |
|
| 2054 |
+
# --- E. VALIDATION FILTER ---
|
| 2055 |
is_valid = False
|
| 2056 |
is_inflected_entry = "Konjugierte Form" in pos_title or "Deklinierte Form" in pos_title
|
| 2057 |
|
|
|
|
| 2087 |
final_result["info"] = f"Analysis from Wiktionary (Primary Engine). Found {len(wiktionary_reports)} matching entries, kept {sum(len(v) for v in final_result.get('analysis', {}).values())}."
|
| 2088 |
return final_result
|
| 2089 |
|
| 2090 |
+
# ============================================================================
|
| 2091 |
+
# 6e. SHARED SEMANTIC HELPER
|
| 2092 |
+
# ============================================================================
|
| 2093 |
+
|
| 2094 |
+
def _build_semantics_block_for_lemma(lemma: str, pos_key: str, top_n: int) -> Dict[str, Any]:
|
| 2095 |
+
"""
|
| 2096 |
+
(REUSABLE HELPER)
|
| 2097 |
+
Fetches OdeNet and ConceptNet data for a given lemma and POS.
|
| 2098 |
+
"""
|
| 2099 |
+
log(f"[DEBUG] Building semantics for lemma='{lemma}', pos='{pos_key}'")
|
| 2100 |
+
|
| 2101 |
+
# 1. Get OdeNet senses for this lemma + POS
|
| 2102 |
+
odenet_senses = []
|
| 2103 |
+
if WN_AVAILABLE:
|
| 2104 |
+
try:
|
| 2105 |
+
senses_by_pos = _get_odenet_senses_by_pos(lemma)
|
| 2106 |
+
odenet_senses_raw = senses_by_pos.get(pos_key, [])
|
| 2107 |
+
|
| 2108 |
+
# Filter out placeholder
|
| 2109 |
+
if odenet_senses_raw and "info" not in odenet_senses_raw[0]:
|
| 2110 |
+
odenet_senses = odenet_senses_raw
|
| 2111 |
+
except Exception as e:
|
| 2112 |
+
log(f"[DEBUG] OdeNet lookup failed for {lemma} ({pos_key}): {e}")
|
| 2113 |
+
|
| 2114 |
+
# 2. Get ConceptNet relations for this lemma
|
| 2115 |
+
conceptnet_relations = []
|
| 2116 |
+
if REQUESTS_AVAILABLE:
|
| 2117 |
+
try:
|
| 2118 |
+
conceptnet_result = conceptnet_get_relations(lemma, language='de')
|
| 2119 |
+
conceptnet_relations = conceptnet_result.get("relations", [])
|
| 2120 |
+
except Exception as e:
|
| 2121 |
+
conceptnet_relations = [{"error": str(e)}]
|
| 2122 |
+
|
| 2123 |
+
# 3. Apply top_n limit
|
| 2124 |
+
if top_n > 0:
|
| 2125 |
+
odenet_senses = odenet_senses[:top_n]
|
| 2126 |
+
conceptnet_relations.sort(key=lambda x: x.get('weight', 0.0), reverse=True)
|
| 2127 |
+
conceptnet_relations = conceptnet_relations[:top_n]
|
| 2128 |
+
|
| 2129 |
+
return {
|
| 2130 |
+
"lemma": lemma,
|
| 2131 |
+
"wiktionary_senses": [], # This block is for non-Wiktionary engines
|
| 2132 |
+
"odenet_senses": odenet_senses,
|
| 2133 |
+
"conceptnet_relations": conceptnet_relations,
|
| 2134 |
+
"wiktionary_synonyms": [],
|
| 2135 |
+
"wiktionary_antonyms": []
|
| 2136 |
+
}
|
| 2137 |
+
|
| 2138 |
+
# ============================================================================
|
| 2139 |
+
# 6f. DWDSMOR ENGINE (NEW FALLBACK 1)
|
| 2140 |
+
# ============================================================================
|
| 2141 |
+
|
| 2142 |
+
def dwdsmor_get_lemmatizer() -> Optional[Any]: # Return type is 'sfst.Transducer'
|
| 2143 |
+
"""
|
| 2144 |
+
Thread-safe function to get a single instance of the DWDSmor analyzer.
|
| 2145 |
+
It will automatically download/cache the 'open' automata from Hugging Face Hub.
|
| 2146 |
+
"""
|
| 2147 |
+
global DWDSMOR_LEMMATIZER
|
| 2148 |
+
if not DWDSMOR_AVAILABLE:
|
| 2149 |
+
raise ImportError("dwdsmor library is not installed.")
|
| 2150 |
+
|
| 2151 |
+
if DWDSMOR_LEMMATIZER:
|
| 2152 |
+
return DWDSMOR_LEMMATIZER
|
| 2153 |
+
|
| 2154 |
+
with DWDSMOR_LEMMATIZER_LOCK:
|
| 2155 |
+
if DWDSMOR_LEMMATIZER:
|
| 2156 |
+
return DWDSMOR_LEMMATIZER
|
| 2157 |
+
|
| 2158 |
+
try:
|
| 2159 |
+
print("Initializing DWDSmor lemmatizer (loading automata)...")
|
| 2160 |
+
|
| 2161 |
+
# --- THIS IS THE FIX ---
|
| 2162 |
+
# Use the correct API from dwdsmor's own tools (analysis.py)
|
| 2163 |
+
# This will find and download the HF repo automatically
|
| 2164 |
+
from dwdsmor import automaton
|
| 2165 |
+
automata = automaton.automata()
|
| 2166 |
+
analyzer = automata.analyzer("lemma") # Use the 'lemma' automaton
|
| 2167 |
+
# --- END OF FIX ---
|
| 2168 |
+
|
| 2169 |
+
# Force the traversal to actually run by converting to a list.
|
| 2170 |
+
print("[DEBUG] DWDSmor: Running warm-up call...")
|
| 2171 |
+
_ = list(analyzer.analyze("Test", join_tags=True))
|
| 2172 |
+
|
| 2173 |
+
print("✓ DWDSmor lemmatizer initialized successfully.")
|
| 2174 |
+
DWDSMOR_LEMMATIZER = analyzer
|
| 2175 |
+
return DWDSMOR_LEMMATIZER
|
| 2176 |
+
except Exception as e:
|
| 2177 |
+
print(f"✗ CRITICAL: Failed to initialize DWDSmor: {e}")
|
| 2178 |
+
traceback.print_exc()
|
| 2179 |
+
return None
|
| 2180 |
+
|
| 2181 |
+
def _dwdsmor_map_pos_key(dwdsmor_pos: str) -> str:
|
| 2182 |
+
"""Maps DWDSmor POS tags to our internal keys."""
|
| 2183 |
+
if dwdsmor_pos == "V": return "verb"
|
| 2184 |
+
if dwdsmor_pos == "NN": return "noun"
|
| 2185 |
+
if dwdsmor_pos == "NPROP": return "noun" # Proper Noun
|
| 2186 |
+
if dwdsmor_pos == "ADJ": return "adjective"
|
| 2187 |
+
if dwdsmor_pos == "ADV": return "adverb"
|
| 2188 |
+
return dwdsmor_pos.lower() # Fallback for others
|
| 2189 |
+
|
| 2190 |
+
def _analyze_word_with_dwdsmor(word: str, top_n: int) -> Dict[str, Any]:
|
| 2191 |
+
"""
|
| 2192 |
+
(FALLBACK ENGINE 1) Analyzes a single word using DWDSmor + Pattern + Semantics.
|
| 2193 |
+
Returns {} on failure.
|
| 2194 |
+
"""
|
| 2195 |
+
if not DWDSMOR_AVAILABLE:
|
| 2196 |
+
return {} # Signal failure
|
| 2197 |
+
|
| 2198 |
+
print(f"\n[Word Encyclopedia] Running V21 (DWDSmor) engine for: \"{word}\"")
|
| 2199 |
+
final_result: Dict[str, Any] = {
|
| 2200 |
+
"input_word": word,
|
| 2201 |
+
"analysis": {}
|
| 2202 |
+
}
|
| 2203 |
+
|
| 2204 |
+
try:
|
| 2205 |
+
analyzer = dwdsmor_get_lemmatizer()
|
| 2206 |
+
if not analyzer:
|
| 2207 |
+
raise Exception("DWDSmor lemmatizer failed to initialize.")
|
| 2208 |
+
|
| 2209 |
+
analyses = list(analyzer.analyze(word, join_tags=True))
|
| 2210 |
+
|
| 2211 |
+
if not analyses:
|
| 2212 |
+
return {} # No results
|
| 2213 |
+
|
| 2214 |
+
log(f"[DEBUG] DWDSmor: Found {len(analyses)} potential analyses.")
|
| 2215 |
+
|
| 2216 |
+
processed_lemmas_pos: Set[Tuple[str, str]] = set()
|
| 2217 |
+
|
| 2218 |
+
for analysis in analyses:
|
| 2219 |
+
|
| 2220 |
+
# --- THIS IS THE FIX ---
|
| 2221 |
+
# The 'Traversal' object from analyzer.analyze() uses:
|
| 2222 |
+
# .analysis -> for the lemma string (e.g., "Haus")
|
| 2223 |
+
# .pos -> for the POS tag (e.g., "NN")
|
| 2224 |
+
# .spec -> for the full analysis string
|
| 2225 |
+
if not analysis.analysis or not analysis.pos:
|
| 2226 |
+
continue
|
| 2227 |
+
|
| 2228 |
+
lemma = analysis.analysis # Use .analysis, not .lemma
|
| 2229 |
+
pos_key = _dwdsmor_map_pos_key(analysis.pos)
|
| 2230 |
+
# --- END OF FIX ---
|
| 2231 |
+
|
| 2232 |
+
if (lemma, pos_key) in processed_lemmas_pos:
|
| 2233 |
+
continue
|
| 2234 |
+
processed_lemmas_pos.add((lemma, pos_key))
|
| 2235 |
+
|
| 2236 |
+
log(f"--- Analyzing DWDSmor path: lemma='{lemma}', pos='{pos_key}' ---")
|
| 2237 |
+
|
| 2238 |
+
# --- 1. Get Inflections (Pattern) ---
|
| 2239 |
+
pattern_block = {}
|
| 2240 |
+
if PATTERN_DE_AVAILABLE:
|
| 2241 |
+
try:
|
| 2242 |
+
if pos_key == "noun":
|
| 2243 |
+
pattern_block = pattern_analyze_as_noun(lemma)
|
| 2244 |
+
elif pos_key == "verb":
|
| 2245 |
+
pattern_block = pattern_analyze_as_verb(lemma)
|
| 2246 |
+
elif pos_key == "adjective":
|
| 2247 |
+
pattern_block = pattern_analyze_as_adjective(lemma)
|
| 2248 |
+
elif pos_key == "adverb":
|
| 2249 |
+
pattern_block = {"base_form": lemma, "info": "Adverbs are non-inflecting."}
|
| 2250 |
+
except Exception as e:
|
| 2251 |
+
pattern_block = {"error": f"Pattern.de analysis for {pos_key}('{lemma}') failed: {e}"}
|
| 2252 |
+
|
| 2253 |
+
# --- 2. Build Semantics Block ---
|
| 2254 |
+
semantics_block = _build_semantics_block_for_lemma(lemma, pos_key, top_n)
|
| 2255 |
+
|
| 2256 |
+
# --- 3. Build Final Report Block ---
|
| 2257 |
+
pos_entry_report = {
|
| 2258 |
+
"dwdsmor_analysis": {
|
| 2259 |
+
"lemma": lemma,
|
| 2260 |
+
"pos": analysis.pos,
|
| 2261 |
+
"analysis_string": analysis.spec, # .spec is the full string
|
| 2262 |
+
"source": "dwdsmor"
|
| 2263 |
+
},
|
| 2264 |
+
"inflections_pattern": pattern_block,
|
| 2265 |
+
"semantics_combined": semantics_block
|
| 2266 |
+
}
|
| 2267 |
+
|
| 2268 |
+
if pos_key not in final_result["analysis"]:
|
| 2269 |
+
final_result["analysis"][pos_key] = []
|
| 2270 |
+
final_result["analysis"][pos_key].append(pos_entry_report)
|
| 2271 |
+
|
| 2272 |
+
if not final_result["analysis"]:
|
| 2273 |
+
return {} # No valid paths found
|
| 2274 |
+
|
| 2275 |
+
final_result["info"] = "Analysis performed by DWDSmor-led engine."
|
| 2276 |
+
return final_result
|
| 2277 |
+
|
| 2278 |
+
except Exception as e:
|
| 2279 |
+
print(f"[Word Encyclopedia] DWDSmor Engine FAILED: {e}")
|
| 2280 |
+
traceback.print_exc()
|
| 2281 |
+
return {} # Signal failure
|
| 2282 |
+
|
| 2283 |
# ============================================================================
|
| 2284 |
# 7. CONSOLIDATED ANALYZER LOGIC
|
| 2285 |
# ============================================================================
|
|
|
|
| 2290 |
"""
|
| 2291 |
(CONTEXTUAL) Combines NLP tools for a deep analysis of German text.
|
| 2292 |
|
| 2293 |
+
Reads the list-based, multi-engine output
|
| 2294 |
from `analyze_word_encyclopedia` and combines all senses for ranking.
|
| 2295 |
"""
|
| 2296 |
|
|
|
|
| 2467 |
if key.endswith("_senses") and nlp_de:
|
| 2468 |
ranked_senses = []
|
| 2469 |
for sense in semantic_analysis[key]:
|
|
|
|
| 2470 |
if "error" in sense: continue
|
| 2471 |
definition = sense.get("definition", "")
|
| 2472 |
relevance = 0.0
|
|
|
|
| 2488 |
# ConceptNet Relations
|
| 2489 |
if "conceptnet_relations" in semantic_analysis and nlp_de:
|
| 2490 |
ranked_relations = []
|
|
|
|
| 2491 |
for rel in semantic_analysis["conceptnet_relations"]:
|
| 2492 |
if "error" in rel: continue
|
| 2493 |
text_to_score = rel.get('surface') or rel.get('other_node', '')
|
|
|
|
| 2609 |
inflection_report = {"error": f"pattern.de failed: {e}", "traceback": traceback.format_exc()}
|
| 2610 |
|
| 2611 |
# --- 3d. Build Final Report Block ---
|
| 2612 |
+
final_result["analysis"][pos_group] = [{
|
| 2613 |
"hanta_analysis": {
|
| 2614 |
"detected_tags": sorted(list(specific_tags)),
|
| 2615 |
"lemma": lemma,
|
|
|
|
| 2617 |
hanta_tagger.analyze(word.capitalize() if pos_group == 'noun' else word.lower(), taglevel=3)
|
| 2618 |
]
|
| 2619 |
},
|
| 2620 |
+
"inflections_pattern": inflection_report,
|
| 2621 |
+
"semantics_combined": _build_semantics_block_for_lemma(lemma, pos_group, top_n),
|
| 2622 |
+
}]
|
| 2623 |
|
| 2624 |
if not final_result["analysis"]:
|
| 2625 |
return {
|
|
|
|
| 2639 |
if IWNLP_AVAILABLE:
|
| 2640 |
try:
|
| 2641 |
log("--- Dispatcher: HanTa not found or failed. Attempting IWNLP Fallback Engine ---")
|
|
|
|
| 2642 |
result = _analyze_word_with_iwnlp(word, top_n_value)
|
| 2643 |
result["info"] = result.get("info", "") + " (Analysis performed by IWNLP-based fallback engine)"
|
| 2644 |
return result
|
|
|
|
| 2947 |
|
| 2948 |
# --- 5. BUILD FINAL REPORT ---
|
| 2949 |
for pos_key, analysis_data in valid_analyses.items():
|
| 2950 |
+
lemma = analysis_data["lemma"]
|
| 2951 |
pos_report = {
|
| 2952 |
"inflections_pattern": analysis_data["inflections"],
|
| 2953 |
+
"semantics_combined": _build_semantics_block_for_lemma(lemma, pos_key, top_n)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2954 |
}
|
|
|
|
| 2955 |
if "spacy_analysis" in analysis_data:
|
| 2956 |
pos_report["spacy_analysis"] = analysis_data["spacy_analysis"]
|
| 2957 |
|
| 2958 |
+
final_result["analysis"][pos_key] = [pos_report] # Wrap in list
|
|
|
|
| 2959 |
|
| 2960 |
if not final_result["analysis"]:
|
| 2961 |
return {} # No results
|
|
|
|
| 2964 |
return final_result
|
| 2965 |
|
| 2966 |
|
| 2967 |
+
# --- 7b. Word Encyclopedia (Non-Contextual) Analyzer ---
|
| 2968 |
|
| 2969 |
+
# --- PUBLIC DISPATCHER FUNCTION ---
|
| 2970 |
# --- THIS IS THE NEW PUBLIC DISPATCHER FUNCTION ---
|
| 2971 |
def analyze_word_encyclopedia(word: str, top_n_value: Optional[float] = 0, engine_choice: str = "wiktionary") -> Dict[str, Any]:
|
| 2972 |
"""
|
| 2973 |
+
(PUBLIC DISPATCHER V22) Analyzes a single word using the selected engine
|
| 2974 |
as a starting point, then automatically falls back if no results are found.
|
| 2975 |
+
|
| 2976 |
+
Chain: Wiktionary -> DWDSmor -> HanTa -> IWNLP
|
| 2977 |
"""
|
| 2978 |
if not word or not word.strip():
|
| 2979 |
return {"info": "Please enter a word."}
|
|
|
|
| 2986 |
log(f"\n[Word Encyclopedia] User selected engine: '{engine_choice}' for word: '{word}'")
|
| 2987 |
|
| 2988 |
try:
|
| 2989 |
+
# --- 1. Try Wiktionary ---
|
| 2990 |
if engine_choice == "wiktionary":
|
| 2991 |
+
log(f"[DEBUG] V22 Dispatcher: Trying Wiktionary (Primary) for '{word}'...")
|
| 2992 |
result = _analyze_word_with_wiktionary(word, top_n)
|
| 2993 |
if result and result.get("analysis"):
|
| 2994 |
return result # Success
|
| 2995 |
info_log.append("Wiktionary found no results.")
|
| 2996 |
+
log(f"[DEBUG] V22 Dispatcher: Wiktionary found no results. Falling back to DWDSmor...")
|
| 2997 |
+
|
| 2998 |
+
# --- 2. Try DWDSmor (NEW) ---
|
| 2999 |
+
if engine_choice == "dwdsmor" or (engine_choice == "wiktionary" and not result.get("analysis")):
|
| 3000 |
+
log(f"[DEBUG] V22 Dispatcher: Trying DWDSmor (Fallback 1) for '{word}'...")
|
| 3001 |
+
result = _analyze_word_with_dwdsmor(word, top_n)
|
| 3002 |
+
if result and result.get("analysis"):
|
| 3003 |
+
result["info"] = f"Analysis from DWDSmor (Fallback 1). {(' '.join(info_log))}"
|
| 3004 |
+
return result # Success
|
| 3005 |
+
info_log.append("DWDSmor found no results.")
|
| 3006 |
+
log(f"[DEBUG] V22 Dispatcher: DWDSmor found no results. Falling back to HanTa...")
|
| 3007 |
+
|
| 3008 |
+
# --- 3. Try HanTa ---
|
| 3009 |
+
if engine_choice == "hanta" or (not result.get("analysis")):
|
| 3010 |
+
log(f"[DEBUG] V22 Dispatcher: Trying HanTa (Fallback 2) for '{word}'...")
|
| 3011 |
result = _analyze_word_with_hanta(word, top_n)
|
| 3012 |
if result and result.get("analysis"):
|
| 3013 |
+
result["info"] = f"Analysis from HanTa (Fallback 2). {(' '.join(info_log))}"
|
| 3014 |
return result # Success
|
| 3015 |
info_log.append("HanTa found no results.")
|
| 3016 |
+
log(f"[DEBUG] V22 Dispatcher: HanTa found no results. Falling back to IWNLP...")
|
| 3017 |
|
| 3018 |
+
# --- 4. Try IWNLP ---
|
|
|
|
|
|
|
|
|
|
| 3019 |
if engine_choice == "iwnlp" or (not result.get("analysis")):
|
| 3020 |
+
log(f"[DEBUG] V22 Dispatcher: Trying IWNLP (Fallback 3) for '{word}'...")
|
| 3021 |
result = _analyze_word_with_iwnlp(word, top_n)
|
| 3022 |
if result and result.get("analysis"):
|
| 3023 |
+
result["info"] = f"Analysis from IWNLP (Fallback 3). {(' '.join(info_log))}"
|
| 3024 |
return result # Success
|
| 3025 |
info_log.append("IWNLP found no results.")
|
| 3026 |
|
|
|
|
| 3034 |
}
|
| 3035 |
|
| 3036 |
# --- No engines found anything ---
|
| 3037 |
+
log(f"[DEBUG] V22 Dispatcher: All engines failed to find results for '{word}'.")
|
| 3038 |
return {
|
| 3039 |
"input_word": word,
|
| 3040 |
"info": f"No analysis found. All engines failed. ({' '.join(info_log)})"
|
|
|
|
| 3232 |
)
|
| 3233 |
|
| 3234 |
def create_word_encyclopedia_tab():
|
| 3235 |
+
"""--- UI for the NON-CONTEXTUAL Word Analyzer tab ---"""
|
| 3236 |
gr.Markdown("# 📖 Word Encyclopedia (Non-Contextual)")
|
| 3237 |
gr.Markdown("This tool analyzes a **single word** for *all possible* grammatical and semantic forms. It finds ambiguities (e.g., 'Lauf' as noun and verb) and groups all data by Part-of-Speech.")
|
| 3238 |
|
|
|
|
| 3251 |
interactive=True
|
| 3252 |
)
|
| 3253 |
|
| 3254 |
+
# --- ADD DWDSMOR TO THE RADIO BUTTONS ---
|
| 3255 |
engine_radio = gr.Radio(
|
| 3256 |
+
label="Select Analysis Engine (will auto-fallback)",
|
| 3257 |
choices=[
|
| 3258 |
("Wiktionary (Default)", "wiktionary"),
|
| 3259 |
+
("DWDSmor (New)", "dwdsmor"),
|
| 3260 |
+
("HanTa (Fallback 2)", "hanta"),
|
| 3261 |
+
("IWNLP (Fallback 3)", "iwnlp")
|
| 3262 |
],
|
| 3263 |
value="wiktionary",
|
| 3264 |
interactive=True
|
| 3265 |
)
|
| 3266 |
+
# --- END OF CHANGE ---
|
| 3267 |
|
| 3268 |
analyze_button = gr.Button("Analyze Word", variant="primary")
|
| 3269 |
|
| 3270 |
output = gr.JSON(label="Word Encyclopedia Analysis (JSON)")
|
| 3271 |
|
|
|
|
| 3272 |
analyze_button.click(
|
| 3273 |
fn=analyze_word_encyclopedia,
|
|
|
|
| 3274 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3275 |
outputs=[output],
|
| 3276 |
api_name="analyze_word"
|
| 3277 |
)
|
| 3278 |
|
|
|
|
| 3279 |
gr.Examples(
|
| 3280 |
[["Lauf", 3, "wiktionary"],
|
| 3281 |
["See", 0, "wiktionary"],
|
| 3282 |
["schnell", 3, "wiktionary"],
|
| 3283 |
["heute", 0, "wiktionary"],
|
| 3284 |
+
["gebildet", 0, "dwdsmor"]], # Example to show the new engine
|
| 3285 |
inputs=[word_input, top_n_number, engine_radio],
|
| 3286 |
outputs=[output],
|
| 3287 |
fn=analyze_word_encyclopedia
|
|
|
|
| 3312 |
inputs=[word_input], outputs=[output], fn=lambda word: _analyze_word_with_wiktionary(word, 0)
|
| 3313 |
)
|
| 3314 |
|
| 3315 |
+
def create_dwdsmor_tab():
|
| 3316 |
+
"""Creates the UI for the standalone DWDSmor lookup tab."""
|
| 3317 |
+
gr.Markdown("# 🏛️ DWDSmor Morphology (Raw Engine)")
|
| 3318 |
+
gr.Markdown("Directly query the `dwdsmor` FST-based engine. This is a high-precision morphological analyzer.")
|
| 3319 |
+
|
| 3320 |
+
def dwdsmor_raw_analysis(word):
|
| 3321 |
+
"""Wrapper to get raw DWDSmor analysis as JSON."""
|
| 3322 |
+
if not DWDSMOR_AVAILABLE:
|
| 3323 |
+
return {"error": "DWDSmor library not installed."}
|
| 3324 |
+
try:
|
| 3325 |
+
analyzer = dwdsmor_get_lemmatizer()
|
| 3326 |
+
if not analyzer:
|
| 3327 |
+
return {"error": "DWDSmor lemmatizer failed to initialize."}
|
| 3328 |
+
|
| 3329 |
+
# --- THIS IS THE FIX ---
|
| 3330 |
+
# The analyzer.analyze() returns a Traversal object, which is iterable
|
| 3331 |
+
analyses = list(analyzer.analyze(word, join_tags=True))
|
| 3332 |
+
# --- END OF FIX ---
|
| 3333 |
+
|
| 3334 |
+
if not analyses:
|
| 3335 |
+
return {"info": f"No analysis found for '{word}'."}
|
| 3336 |
+
|
| 3337 |
+
# Convert Traversal objects to plain dicts for JSON output
|
| 3338 |
+
results = []
|
| 3339 |
+
for analysis in analyses:
|
| 3340 |
+
results.append({
|
| 3341 |
+
"lemma": analysis.analysis, # In this object, .analysis is the lemma
|
| 3342 |
+
"pos": analysis.pos,
|
| 3343 |
+
"analysis_string": analysis.spec, # .spec is the full string
|
| 3344 |
+
"tags": analysis.tags
|
| 3345 |
+
})
|
| 3346 |
+
return {"input_word": word, "analyses": results}
|
| 3347 |
+
except Exception as e:
|
| 3348 |
+
return {"error": str(e), "traceback": traceback.format_exc()}
|
| 3349 |
+
|
| 3350 |
+
with gr.Column():
|
| 3351 |
+
word_input = gr.Textbox(
|
| 3352 |
+
label="Single German Word",
|
| 3353 |
+
placeholder="e.g., gebildet, schnell, Häuser"
|
| 3354 |
+
)
|
| 3355 |
+
analyze_button = gr.Button("Analyze Word with DWDSmor", variant="primary")
|
| 3356 |
+
|
| 3357 |
+
output = gr.JSON(label="DWDSmor Raw Analysis (JSON)")
|
| 3358 |
+
|
| 3359 |
+
analyze_button.click(
|
| 3360 |
+
fn=dwdsmor_raw_analysis,
|
| 3361 |
+
inputs=[word_input],
|
| 3362 |
+
outputs=[output],
|
| 3363 |
+
api_name="dwdsmor_lookup"
|
| 3364 |
+
)
|
| 3365 |
+
gr.Examples(
|
| 3366 |
+
[["gebildet"], ["schnell"], ["Häuser"], ["gehe"]],
|
| 3367 |
+
inputs=[word_input], outputs=[output], fn=dwdsmor_raw_analysis
|
| 3368 |
+
)
|
| 3369 |
+
|
| 3370 |
def create_hanta_tab():
|
| 3371 |
"""Creates the UI for the standalone HanTa Engine tab."""
|
| 3372 |
gr.Markdown("# 🤖 HanTa Lookup (Raw Engine)")
|
|
|
|
| 3447 |
|
| 3448 |
with gr.Tab("🔬 Engine: IWNLP-spaCy (DE)"):
|
| 3449 |
create_iwnlp_tab()
|
| 3450 |
+
|
| 3451 |
+
with gr.Tab("🏛️ Engine: DWDSmor (DE)"):
|
| 3452 |
+
create_dwdsmor_tab()
|
| 3453 |
|
| 3454 |
# --- Standalone Component Tabs ---
|
| 3455 |
with gr.Tab("📚 Component: Inflections (DE)"):
|
|
|
|
| 3502 |
print(f"✗ FAILED to initialize Wiktionary: {e}")
|
| 3503 |
print("--- Wiktionary Done ---\n")
|
| 3504 |
|
| 3505 |
+
# --- Initialize DWDSmor ---
|
| 3506 |
+
print("--- Initializing DWDSmor Lemmatizer ---")
|
| 3507 |
+
if DWDSMOR_AVAILABLE:
|
| 3508 |
+
try:
|
| 3509 |
+
dwdsmor_get_lemmatizer() # Call the function to load the model
|
| 3510 |
+
except Exception as e:
|
| 3511 |
+
print(f"✗ FAILED to start DWDSmor: {e}")
|
| 3512 |
+
print(" 'Word Encyclopedia' DWDSmor engine will fail.")
|
| 3513 |
+
else:
|
| 3514 |
+
print("INFO: DWDSmor library not available, skipping lemmatizer.")
|
| 3515 |
+
print("--- DWDSmor Done ---\n")
|
| 3516 |
+
|
| 3517 |
# --- 4. Initialize HanTa Tagger ---
|
| 3518 |
print("--- Initializing HanTa Tagger ---")
|
| 3519 |
if HANTA_AVAILABLE:
|