multimodalart HF Staff commited on
Commit
0196c78
·
verified ·
1 Parent(s): c25f3e5

Update preprocess/tools/g2p.py

Browse files
Files changed (1) hide show
  1. preprocess/tools/g2p.py +15 -0
preprocess/tools/g2p.py CHANGED
@@ -1,4 +1,19 @@
1
  import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  import ToJyutping
4
  from g2pM import G2pM
 
1
  import re
2
+ import sys
3
+ import nltk
4
+
5
+ # Ensure NLTK data is available
6
+ try:
7
+ nltk.data.find('taggers/averaged_perceptron_tagger_eng')
8
+ except LookupError:
9
+ print("[g2p] Downloading missing NLTK resource: averaged_perceptron_tagger_eng", file=sys.stderr)
10
+ nltk.download('averaged_perceptron_tagger_eng', quiet=True)
11
+
12
+ try:
13
+ nltk.data.find('corpora/cmudict')
14
+ except LookupError:
15
+ print("[g2p] Downloading missing NLTK resource: cmudict", file=sys.stderr)
16
+ nltk.download('cmudict', quiet=True)
17
 
18
  import ToJyutping
19
  from g2pM import G2pM