cstr commited on
Commit
0cbec99
·
verified ·
1 Parent(s): 279d509

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +209 -86
app.py CHANGED
@@ -85,40 +85,64 @@ except ImportError:
85
  WN_AVAILABLE = False
86
  print("CRITICAL WARNING: `wn` library not found.")
87
 
 
88
  # --- Pattern.en Import (ENGLISH) ---
89
  PATTERN_EN_AVAILABLE = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  try:
91
- # Try importing from the standard pattern library
92
  import pattern.en
 
93
  from pattern.en import (
94
- pluralize, singularize, conjugate, tenses, lemma, lexeme,
95
- attributive, predicative,
96
- article, MALE, FEMALE, NEUTRAL, PLURAL,
97
- INFINITIVE, PRESENT, PAST, PARTICIPLE,
98
- FIRST, SECOND, THIRD, SINGULAR, PLURAL as PL,
99
- INDICATIVE, IMPERATIVE, SUBJUNCTIVE,
100
  comparative, superlative,
101
- NOUN, VERB, ADJECTIVE,
 
102
  parse, split
103
  )
 
 
 
 
 
 
 
 
 
 
 
104
  PATTERN_EN_AVAILABLE = True
105
  print("✓ Successfully imported pattern.en")
 
106
  except ImportError:
107
- print("Using PatternLite fallback...")
108
  try:
109
- # Sometimes PatternLite structure is slightly different, but usually compatible
110
  import pattern.en
111
- from pattern.en import (
112
- pluralize, singularize, conjugate, tenses, lemma, lexeme,
113
- attributive, predicative,
114
- article, MALE, FEMALE, NEUTRAL, PLURAL,
115
- INFINITIVE, PRESENT, PAST, PARTICIPLE,
116
- FIRST, SECOND, THIRD, SINGULAR, PLURAL as PL,
117
- INDICATIVE, IMPERATIVE, SUBJUNCTIVE,
118
- comparative, superlative,
119
- NOUN, VERB, ADJECTIVE,
120
- parse, split
121
- )
122
  PATTERN_EN_AVAILABLE = True
123
  print("✓ Successfully imported pattern.en (via PatternLite)")
124
  except ImportError as e:
@@ -136,20 +160,45 @@ except ImportError:
136
  HANTA_AVAILABLE = False
137
  print("CRITICAL WARNING: `HanTa` library not found.")
138
 
139
- # --- NLTK Import ---
140
  try:
141
  import nltk
142
  from nltk.corpus import wordnet as nltk_wn
143
  from nltk.stem import WordNetLemmatizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  NLTK_AVAILABLE = True
145
- print("✓ Successfully imported nltk")
146
- # One-time downloads
147
- nltk.download('wordnet', quiet=True, raise_on_error=True)
148
- nltk.download('averaged_perceptron_tagger', quiet=True, raise_on_error=True)
149
- nltk.download('punkt', quiet=True, raise_on_error=True) # For TextBlob
150
  except Exception as e:
151
  NLTK_AVAILABLE = False
152
- print(f"WARNING: `nltk` or its data failed to load: {e}")
 
 
 
 
 
 
 
 
153
 
154
  # --- Stanza Import ---
155
  try:
@@ -160,14 +209,6 @@ except ImportError:
160
  STANZA_AVAILABLE = False
161
  print("WARNING: `stanza` library not found.")
162
 
163
- # --- TextBlob Import ---
164
- try:
165
- from textblob import TextBlob
166
- TEXTBLOB_AVAILABLE = True
167
- print("✓ Successfully imported textblob")
168
- except ImportError:
169
- TEXTBLOB_AVAILABLE = False
170
- print("WARNING: `textblob` library not found.")
171
 
172
  # --- German-specific imports are not needed ---
173
  IWNLP_AVAILABLE = False
@@ -220,7 +261,10 @@ def _conjugate_to_person_number_en(verb_lemma: str, person: str, number: str) ->
220
  return None
221
  try:
222
  p_num = int(person)
 
223
  n_num = SINGULAR if number == 'sg' else PLURAL
 
 
224
  return conjugate(verb_lemma, tense=PRESENT, person=p_num, number=n_num)
225
  except Exception:
226
  return None
@@ -748,80 +792,122 @@ def pattern_is_good_analysis(analysis, analysis_type):
748
 
749
  def pattern_analyze_as_noun_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
750
  """Comprehensive noun inflection analysis for English."""
751
- log(f" Analyzing as noun (hint_lemma={hint_lemma})")
752
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
753
 
754
- singular_form = singularize(word)
755
- base = singular_form
756
- plural_form = pluralize(base)
757
-
 
 
 
 
 
 
 
 
 
 
 
758
  analysis = {
759
- "base_form": base,
760
- "singular": base,
761
  "plural": plural_form,
 
762
  "declension": {
763
- "Singular": {"form": base},
764
  "Plural": {"form": plural_form}
765
  },
766
- "gender": "Neuter" # English nouns don't have grammatical gender
767
  }
768
  return analysis
769
 
770
  def pattern_analyze_as_verb_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
771
  """Comprehensive verb conjugation analysis for English."""
772
- log(f" Analyzing as verb (hint_lemma={hint_lemma})")
773
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
774
 
775
- verb_lemma = lemma(word)
776
- if not verb_lemma:
 
 
777
  verb_lemma = word
778
 
779
  analysis = {"infinitive": verb_lemma}
 
 
780
  try:
781
- analysis["lexeme"] = lexeme(verb_lemma)
 
 
 
782
  except Exception as e:
783
- log(f" Failed to get lexeme: {e}")
 
784
 
 
785
  analysis["conjugation"] = {}
786
  try:
 
787
  analysis["conjugation"]["Present"] = {
788
- "I": conjugate(verb_lemma, PRESENT, 1, SINGULAR),
789
- "you": conjugate(verb_lemma, PRESENT, 2, SINGULAR),
790
- "he/she/it": conjugate(verb_lemma, PRESENT, 3, SINGULAR),
791
- "we": conjugate(verb_lemma, PRESENT, 1, PLURAL),
792
- "you (pl)": conjugate(verb_lemma, PRESENT, 2, PLURAL),
793
- "they": conjugate(verb_lemma, PRESENT, 3, PLURAL),
794
  }
 
 
795
  analysis["conjugation"]["Past"] = {
796
- "I": conjugate(verb_lemma, PAST, 1, SINGULAR),
797
- "he/she/it": conjugate(verb_lemma, PAST, 3, SINGULAR),
 
798
  }
 
 
799
  analysis["participles"] = {
800
- "Present Participle": conjugate(verb_lemma, PARTICIPLE, tense=PRESENT),
801
- "Past Participle": conjugate(verb_lemma, PARTICIPLE, tense=PAST)
802
  }
803
  except Exception as e:
804
- log(f" Failed to conjugate: {e}")
805
 
806
  return analysis
807
 
808
  def pattern_analyze_as_adjective_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
809
  """Comprehensive adjective inflection analysis for English."""
810
- log(f" Analyzing as adjective (hint_lemma={hint_lemma})")
811
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
812
 
813
- base = predicative(word)
 
 
 
 
 
 
 
814
  analysis = {}
815
  analysis["predicative"] = base
 
816
  try:
817
- analysis["comparative"] = comparative(base)
818
- analysis["superlative"] = superlative(base)
 
 
 
 
 
 
 
 
 
819
  except Exception as e:
820
- log(f" Failed to get comparison: {e}")
 
821
 
822
- analysis["attributive"] = {
823
- "Base": {"form": base, "example": f"a {base} [noun]"}
824
- }
825
  return analysis
826
 
827
  # --- Public API (Adapted) ---
@@ -1635,15 +1721,29 @@ def nltk_get_lemmatizer() -> Optional[WordNetLemmatizer]:
1635
  """ Thread-safe function to get the NLTK Lemmatizer. """
1636
  global NLTK_LEMMATIZER
1637
  if not NLTK_AVAILABLE:
1638
- raise ImportError("NLTK library is not installed.")
 
1639
  if NLTK_LEMMATIZER:
1640
  return NLTK_LEMMATIZER
 
1641
  with NLTK_LEMMATIZER_LOCK:
1642
  if NLTK_LEMMATIZER:
1643
  return NLTK_LEMMATIZER
1644
- NLTK_LEMMATIZER = WordNetLemmatizer()
1645
- print("✓ NLTK Lemmatizer initialized.")
1646
- return NLTK_LEMMATIZER
 
 
 
 
 
 
 
 
 
 
 
 
1647
 
1648
  def _nltk_get_wordnet_pos(treebank_tag):
1649
  """Converts NLTK's Treebank POS tag to a WordNet tag."""
@@ -1654,7 +1754,7 @@ def _nltk_get_wordnet_pos(treebank_tag):
1654
  return None
1655
 
1656
  def _analyze_word_with_nltk(word: str, top_n: int) -> Dict[str, Any]:
1657
- """ (FALLBACK ENGINE 3) Analyzes with NLTK. Must match JSON. """
1658
  if not NLTK_AVAILABLE: return {}
1659
  print(f"\n[Word Encyclopedia] Running NLTK fallback for: \"{word}\"")
1660
  final_result = {"input_word": word, "analysis": {}}
@@ -1664,7 +1764,14 @@ def _analyze_word_with_nltk(word: str, top_n: int) -> Dict[str, Any]:
1664
  if not lemmatizer: return {}
1665
 
1666
  # NLTK's POS tagger needs a list
1667
- tag = nltk.pos_tag([word])[0][1]
 
 
 
 
 
 
 
1668
  wn_pos = _nltk_get_wordnet_pos(tag)
1669
 
1670
  if not wn_pos:
@@ -1672,13 +1779,17 @@ def _analyze_word_with_nltk(word: str, top_n: int) -> Dict[str, Any]:
1672
  return {}
1673
 
1674
  lemma = lemmatizer.lemmatize(word, wn_pos)
1675
- pos_map = {nltk_wn.NOUN: "noun", nltk_wn.VERB: "verb", nltk_wn.ADJ: "adjective", nltk_wn.ADV: "adverb"}
1676
- pos_key = pos_map[wn_pos]
1677
 
 
 
 
 
 
1678
  log(f"--- Analyzing NLTK path: lemma='{lemma}', pos='{pos_key}' ---")
1679
 
1680
  pattern_block = {}
1681
  if PATTERN_EN_AVAILABLE:
 
1682
  if pos_key == "noun": pattern_block = pattern_analyze_as_noun_en(lemma)
1683
  elif pos_key == "verb": pattern_block = pattern_analyze_as_verb_en(lemma)
1684
  elif pos_key == "adjective": pattern_block = pattern_analyze_as_adjective_en(lemma)
@@ -1709,18 +1820,17 @@ def _analyze_word_with_nltk(word: str, top_n: int) -> Dict[str, Any]:
1709
  return final_result
1710
  except Exception as e:
1711
  log(f"NLTK Engine FAILED: {e}")
1712
- traceback.print_exc()
1713
  return {}
1714
 
1715
  # --- FALLBACK 4: TEXTBLOB ---
1716
  def _analyze_word_with_textblob(word: str, top_n: int) -> Dict[str, Any]:
1717
- """ (FALLBACK ENGINE 4) Analyzes with TextBlob. Must match JSON. """
1718
  if not TEXTBLOB_AVAILABLE: return {}
1719
  print(f"\n[Word Encyclopedia] Running TextBlob fallback for: \"{word}\"")
1720
  final_result = {"input_word": word, "analysis": {}}
1721
 
1722
  def get_wordnet_pos_tb(treebank_tag):
1723
- """ Maps Treebank to TextBlob's lemmatizer tags (n, v, a, r) """
1724
  if treebank_tag.startswith('J'): return 'a'
1725
  if treebank_tag.startswith('V'): return 'v'
1726
  if treebank_tag.startswith('N'): return 'n'
@@ -1728,19 +1838,33 @@ def _analyze_word_with_textblob(word: str, top_n: int) -> Dict[str, Any]:
1728
  return None
1729
 
1730
  try:
1731
- blob = TextBlob(word)
1732
- if not blob.tags: return {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1733
 
1734
- # Process each tag TextBlob finds
1735
  processed_lemmas_pos: Set[Tuple[str, str]] = set()
1736
 
1737
- for tb_word, tag in blob.tags:
1738
  tb_pos = get_wordnet_pos_tb(tag)
1739
  if not tb_pos: continue
1740
 
1741
  lemma = tb_word.lemmatize(tb_pos)
1742
  pos_map = {'n': "noun", 'v': "verb", 'a': "adjective", 'r': "adverb"}
1743
- pos_key = pos_map[tb_pos]
 
1744
 
1745
  if (lemma, pos_key) in processed_lemmas_pos: continue
1746
  processed_lemmas_pos.add((lemma, pos_key))
@@ -1777,7 +1901,6 @@ def _analyze_word_with_textblob(word: str, top_n: int) -> Dict[str, Any]:
1777
  return final_result
1778
  except Exception as e:
1779
  log(f"TextBlob Engine FAILED: {e}")
1780
- traceback.print_exc()
1781
  return {}
1782
 
1783
 
 
85
  WN_AVAILABLE = False
86
  print("CRITICAL WARNING: `wn` library not found.")
87
 
88
+ # --- Pattern.en Import (ENGLISH) ---
89
  # --- Pattern.en Import (ENGLISH) ---
90
  PATTERN_EN_AVAILABLE = False
91
+
92
+ # Define constants locally as fallbacks (Pattern standard values) to prevent import errors
93
+ # Tenses
94
+ INFINITIVE = "inf"
95
+ PRESENT = "pres"
96
+ PAST = "pst"
97
+ FUTURE = "fut"
98
+ PARTICIPLE = "part"
99
+ # Person/Number
100
+ FIRST = 1
101
+ SECOND = 2
102
+ THIRD = 3
103
+ SINGULAR = "sg"
104
+ PLURAL = "pl"
105
+ # POS
106
+ NOUN = "NN"
107
+ VERB = "VB"
108
+ ADJECTIVE = "JJ"
109
+
110
  try:
 
111
  import pattern.en
112
+ # Import functions safely
113
  from pattern.en import (
114
+ pluralize, singularize,
115
+ conjugate, lemma, lexeme, tenses,
 
 
 
 
116
  comparative, superlative,
117
+ predicative, attributive,
118
+ article,
119
  parse, split
120
  )
121
+
122
+ # Try to import constants, but don't fail if they are missing (we use fallbacks)
123
+ try:
124
+ from pattern.en import (
125
+ INFINITIVE, PRESENT, PAST, PARTICIPLE,
126
+ FIRST, SECOND, THIRD, SINGULAR, PLURAL,
127
+ NOUN, VERB, ADJECTIVE
128
+ )
129
+ except ImportError:
130
+ print("Using local fallback constants for Pattern.en")
131
+
132
  PATTERN_EN_AVAILABLE = True
133
  print("✓ Successfully imported pattern.en")
134
+
135
  except ImportError:
136
+ print("Using PatternLite fallback logic...")
137
  try:
138
+ # Attempt simple import for PatternLite structure
139
  import pattern.en
140
+ from pattern.en import pluralize, singularize, conjugate, lemma, lexeme
141
+
142
+ # Manually map functions if they are missing in Lite but available under different names
143
+ if not 'comparative' in dir(pattern.en):
144
+ from pattern.en import comparative, superlative
145
+
 
 
 
 
 
146
  PATTERN_EN_AVAILABLE = True
147
  print("✓ Successfully imported pattern.en (via PatternLite)")
148
  except ImportError as e:
 
160
  HANTA_AVAILABLE = False
161
  print("CRITICAL WARNING: `HanTa` library not found.")
162
 
163
+ # --- NLTK & TextBlob Import ---
164
  try:
165
  import nltk
166
  from nltk.corpus import wordnet as nltk_wn
167
  from nltk.stem import WordNetLemmatizer
168
+
169
+ # --- CRITICAL: Download required NLTK data ---
170
+ # These are the specific packages causing your "LookupError" and "MissingCorpusError"
171
+ print("Downloading NLTK data...")
172
+ _nltk_packages = [
173
+ 'wordnet',
174
+ 'omw-1.4',
175
+ 'averaged_perceptron_tagger',
176
+ 'averaged_perceptron_tagger_eng', # Specific for newer NLTK
177
+ 'punkt',
178
+ 'punkt_tab' # Specific for newer TextBlob/NLTK
179
+ ]
180
+ for pkg in _nltk_packages:
181
+ try:
182
+ nltk.download(pkg, quiet=True)
183
+ except Exception as e:
184
+ print(f"Warning: Failed to download NLTK package '{pkg}': {e}")
185
+
186
  NLTK_AVAILABLE = True
187
+ print("✓ Successfully imported nltk and downloaded data")
188
+ except ImportError:
189
+ NLTK_AVAILABLE = False
190
+ print("WARNING: `nltk` library not found.")
 
191
  except Exception as e:
192
  NLTK_AVAILABLE = False
193
+ print(f"WARNING: `nltk` data download failed: {e}")
194
+
195
+ try:
196
+ from textblob import TextBlob
197
+ TEXTBLOB_AVAILABLE = True
198
+ print("✓ Successfully imported textblob")
199
+ except ImportError:
200
+ TEXTBLOB_AVAILABLE = False
201
+ print("WARNING: `textblob` library not found.")
202
 
203
  # --- Stanza Import ---
204
  try:
 
209
  STANZA_AVAILABLE = False
210
  print("WARNING: `stanza` library not found.")
211
 
 
 
 
 
 
 
 
 
212
 
213
  # --- German-specific imports are not needed ---
214
  IWNLP_AVAILABLE = False
 
261
  return None
262
  try:
263
  p_num = int(person)
264
+ # Use the constants defined in the import block
265
  n_num = SINGULAR if number == 'sg' else PLURAL
266
+
267
+ # Explicitly name arguments for safety across Pattern versions
268
  return conjugate(verb_lemma, tense=PRESENT, person=p_num, number=n_num)
269
  except Exception:
270
  return None
 
792
 
793
  def pattern_analyze_as_noun_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
794
  """Comprehensive noun inflection analysis for English."""
795
+ log(f" Analyzing as noun (hint_lemma={hint_lemma})")
796
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
797
 
798
+ # 1. Determine Singular/Plural base
799
+ # If the word is already plural, singularize it to get the lemma
800
+ try:
801
+ singular_form = singularize(word)
802
+ plural_form = pluralize(singular_form)
803
+ except Exception as e:
804
+ return {'error': f'Inflection failed: {e}'}
805
+
806
+ # 2. Get Indefinite Article (a/an)
807
+ try:
808
+ art = article(singular_form)
809
+ art_str = f"{art} {singular_form}"
810
+ except Exception:
811
+ art_str = f"a/an {singular_form}"
812
+
813
  analysis = {
814
+ "base_form": singular_form,
815
+ "singular": singular_form,
816
  "plural": plural_form,
817
+ "article": art_str,
818
  "declension": {
819
+ "Singular": {"form": singular_form},
820
  "Plural": {"form": plural_form}
821
  },
822
+ "gender": "N/A" # English nouns strictly do not have grammatical gender
823
  }
824
  return analysis
825
 
826
  def pattern_analyze_as_verb_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
827
  """Comprehensive verb conjugation analysis for English."""
828
+ log(f" Analyzing as verb (hint_lemma={hint_lemma})")
829
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
830
 
831
+ # 1. Get Lemma
832
+ try:
833
+ verb_lemma = lemma(word)
834
+ except:
835
  verb_lemma = word
836
 
837
  analysis = {"infinitive": verb_lemma}
838
+
839
+ # 2. Get Lexeme (List of all forms)
840
  try:
841
+ # lexeme returns: [infinitive, 3sg, present_participle, past, past_participle]
842
+ # e.g., be => ['be', 'is', 'being', 'was', 'been']
843
+ forms = lexeme(verb_lemma)
844
+ analysis["lexeme"] = forms
845
  except Exception as e:
846
+ log(f" Failed to get lexeme: {e}")
847
+ analysis["lexeme"] = []
848
 
849
+ # 3. Conjugation Table
850
  analysis["conjugation"] = {}
851
  try:
852
+ # Present Tense
853
  analysis["conjugation"]["Present"] = {
854
+ "I (1sg)": conjugate(verb_lemma, tense=PRESENT, person=1, number=SINGULAR),
855
+ "you (2sg)": conjugate(verb_lemma, tense=PRESENT, person=2, number=SINGULAR),
856
+ "he/she (3sg)": conjugate(verb_lemma, tense=PRESENT, person=3, number=SINGULAR),
857
+ "we (1pl)": conjugate(verb_lemma, tense=PRESENT, person=1, number=PLURAL),
858
+ "you (2pl)": conjugate(verb_lemma, tense=PRESENT, person=2, number=PLURAL),
859
+ "they (3pl)": conjugate(verb_lemma, tense=PRESENT, person=3, number=PLURAL),
860
  }
861
+
862
+ # Past Tense (Pattern usually handles simple past variations)
863
  analysis["conjugation"]["Past"] = {
864
+ "I (1sg)": conjugate(verb_lemma, tense=PAST, person=1, number=SINGULAR),
865
+ "he/she (3sg)": conjugate(verb_lemma, tense=PAST, person=3, number=SINGULAR),
866
+ "General": conjugate(verb_lemma, tense=PAST) # For regular verbs where all are same
867
  }
868
+
869
+ # Participles
870
  analysis["participles"] = {
871
+ "Present Participle (gerund)": conjugate(verb_lemma, tense=PRESENT, aspect="progressive"), # or aspect=PROGRESSIVE
872
+ "Past Participle": conjugate(verb_lemma, tense=PAST, aspect="perfective") # or use PARTICIPLE constant
873
  }
874
  except Exception as e:
875
+ log(f" Failed to conjugate: {e}")
876
 
877
  return analysis
878
 
879
  def pattern_analyze_as_adjective_en(word: str, hint_lemma: str = None) -> Dict[str, Any]:
880
  """Comprehensive adjective inflection analysis for English."""
881
+ log(f" Analyzing as adjective (hint_lemma={hint_lemma})")
882
  if not PATTERN_EN_AVAILABLE: return {'error': 'pattern.en not available'}
883
 
884
+ try:
885
+ # If the word is comparative/superlative, try to get the base (predicative)
886
+ # Note: Pattern doesn't have a strong 'un-grade' function, so we rely on lemma if available
887
+ # or assumes input is the base.
888
+ base = word
889
+ except Exception:
890
+ base = word
891
+
892
  analysis = {}
893
  analysis["predicative"] = base
894
+
895
  try:
896
+ comp = comparative(base)
897
+ sup = superlative(base)
898
+
899
+ analysis["comparative"] = comp
900
+ analysis["superlative"] = sup
901
+
902
+ analysis["grading"] = {
903
+ "Positive": base,
904
+ "Comparative": comp,
905
+ "Superlative": sup
906
+ }
907
  except Exception as e:
908
+ log(f" Failed to get comparison: {e}")
909
+ analysis["grading"] = {"error": "Could not grade adjective"}
910
 
 
 
 
911
  return analysis
912
 
913
  # --- Public API (Adapted) ---
 
1721
  """ Thread-safe function to get the NLTK Lemmatizer. """
1722
  global NLTK_LEMMATIZER
1723
  if not NLTK_AVAILABLE:
1724
+ return None # Don't raise error, just return None to trigger graceful fallback
1725
+
1726
  if NLTK_LEMMATIZER:
1727
  return NLTK_LEMMATIZER
1728
+
1729
  with NLTK_LEMMATIZER_LOCK:
1730
  if NLTK_LEMMATIZER:
1731
  return NLTK_LEMMATIZER
1732
+ try:
1733
+ # Ensure data is present one last time before init
1734
+ try:
1735
+ nltk.data.find('corpora/wordnet.zip')
1736
+ except LookupError:
1737
+ nltk.download('wordnet', quiet=True)
1738
+
1739
+ NLTK_LEMMATIZER = WordNetLemmatizer()
1740
+ # Warm up
1741
+ _ = NLTK_LEMMATIZER.lemmatize("cats")
1742
+ print("✓ NLTK Lemmatizer initialized.")
1743
+ return NLTK_LEMMATIZER
1744
+ except Exception as e:
1745
+ print(f"✗ NLTK Init Failed: {e}")
1746
+ return None
1747
 
1748
  def _nltk_get_wordnet_pos(treebank_tag):
1749
  """Converts NLTK's Treebank POS tag to a WordNet tag."""
 
1754
  return None
1755
 
1756
  def _analyze_word_with_nltk(word: str, top_n: int) -> Dict[str, Any]:
1757
+ """ (FALLBACK ENGINE 3) Analyzes with NLTK. """
1758
  if not NLTK_AVAILABLE: return {}
1759
  print(f"\n[Word Encyclopedia] Running NLTK fallback for: \"{word}\"")
1760
  final_result = {"input_word": word, "analysis": {}}
 
1764
  if not lemmatizer: return {}
1765
 
1766
  # NLTK's POS tagger needs a list
1767
+ # This specific call was crashing because 'averaged_perceptron_tagger_eng' was missing
1768
+ try:
1769
+ tag = nltk.pos_tag([word])[0][1]
1770
+ except LookupError:
1771
+ # Last ditch attempt to download if it was missing
1772
+ nltk.download('averaged_perceptron_tagger_eng', quiet=True)
1773
+ tag = nltk.pos_tag([word])[0][1]
1774
+
1775
  wn_pos = _nltk_get_wordnet_pos(tag)
1776
 
1777
  if not wn_pos:
 
1779
  return {}
1780
 
1781
  lemma = lemmatizer.lemmatize(word, wn_pos)
 
 
1782
 
1783
+ # Map NLTK WN constants to strings
1784
+ pos_map_rev = {nltk_wn.NOUN: "noun", nltk_wn.VERB: "verb", nltk_wn.ADJ: "adjective", nltk_wn.ADV: "adverb"}
1785
+ pos_key = pos_map_rev.get(wn_pos)
1786
+ if not pos_key: return {}
1787
+
1788
  log(f"--- Analyzing NLTK path: lemma='{lemma}', pos='{pos_key}' ---")
1789
 
1790
  pattern_block = {}
1791
  if PATTERN_EN_AVAILABLE:
1792
+ # Use the fixed pattern functions from previous step
1793
  if pos_key == "noun": pattern_block = pattern_analyze_as_noun_en(lemma)
1794
  elif pos_key == "verb": pattern_block = pattern_analyze_as_verb_en(lemma)
1795
  elif pos_key == "adjective": pattern_block = pattern_analyze_as_adjective_en(lemma)
 
1820
  return final_result
1821
  except Exception as e:
1822
  log(f"NLTK Engine FAILED: {e}")
1823
+ # traceback.print_exc() # Optional: Uncomment for deep debugging
1824
  return {}
1825
 
1826
  # --- FALLBACK 4: TEXTBLOB ---
1827
  def _analyze_word_with_textblob(word: str, top_n: int) -> Dict[str, Any]:
1828
+ """ (FALLBACK ENGINE 4) Analyzes with TextBlob. """
1829
  if not TEXTBLOB_AVAILABLE: return {}
1830
  print(f"\n[Word Encyclopedia] Running TextBlob fallback for: \"{word}\"")
1831
  final_result = {"input_word": word, "analysis": {}}
1832
 
1833
  def get_wordnet_pos_tb(treebank_tag):
 
1834
  if treebank_tag.startswith('J'): return 'a'
1835
  if treebank_tag.startswith('V'): return 'v'
1836
  if treebank_tag.startswith('N'): return 'n'
 
1838
  return None
1839
 
1840
  try:
1841
+ try:
1842
+ blob = TextBlob(word)
1843
+ # This access triggers the tokenizer
1844
+ tags = blob.tags
1845
+ except (LookupError, Exception) as e:
1846
+ if "punkt" in str(e):
1847
+ print("Attempting to download missing TextBlob/NLTK data...")
1848
+ import nltk
1849
+ nltk.download('punkt_tab', quiet=True)
1850
+ nltk.download('punkt', quiet=True)
1851
+ blob = TextBlob(word)
1852
+ tags = blob.tags
1853
+ else:
1854
+ raise e
1855
+
1856
+ if not tags: return {}
1857
 
 
1858
  processed_lemmas_pos: Set[Tuple[str, str]] = set()
1859
 
1860
+ for tb_word, tag in tags:
1861
  tb_pos = get_wordnet_pos_tb(tag)
1862
  if not tb_pos: continue
1863
 
1864
  lemma = tb_word.lemmatize(tb_pos)
1865
  pos_map = {'n': "noun", 'v': "verb", 'a': "adjective", 'r': "adverb"}
1866
+ pos_key = pos_map.get(tb_pos)
1867
+ if not pos_key: continue
1868
 
1869
  if (lemma, pos_key) in processed_lemmas_pos: continue
1870
  processed_lemmas_pos.add((lemma, pos_key))
 
1901
  return final_result
1902
  except Exception as e:
1903
  log(f"TextBlob Engine FAILED: {e}")
 
1904
  return {}
1905
 
1906