WildnerveAI commited on
Commit
a9865ad
·
verified ·
1 Parent(s): aaf3189

Upload preprocess.py

Browse files
Files changed (1) hide show
  1. preprocess.py +10 -0
preprocess.py CHANGED
@@ -20,6 +20,16 @@ try:
20
  except Exception as e:
21
  logger.warning(f"NLTK data download failed: {e}")
22
 
 
 
 
 
 
 
 
 
 
 
23
  def get_tokenizer_wrapper():
24
  try:
25
  tokenizer = get_tokenizer("bert-base-uncased")
 
20
  except Exception as e:
21
  logger.warning(f"NLTK data download failed: {e}")
22
 
23
+ # Guarded NLTK downloads
24
+ if hasattr(nltk, "download"):
25
+ try:
26
+ nltk.download('punkt', quiet=True)
27
+ nltk.download('averaged_perceptron_tagger', quiet=True)
28
+ except Exception as e:
29
+ logger.warning(f"NLTK download failed: {e}")
30
+ else:
31
+ logger.warning("NLTK.download not available; skipping corpus downloads")
32
+
33
  def get_tokenizer_wrapper():
34
  try:
35
  tokenizer = get_tokenizer("bert-base-uncased")