Spaces:
Sleeping
Sleeping
Commit ·
29744eb
1
Parent(s): f91e01a
simplify codes and bug fix
Browse files- chinesemeteor.py +3 -10
chinesemeteor.py
CHANGED
|
@@ -139,18 +139,15 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 139 |
nltk.download("punkt", quiet=True)
|
| 140 |
nltk.download('punkt_tab', quiet=True)
|
| 141 |
# CwnGraph auto-downloads on first use
|
| 142 |
-
|
| 143 |
-
def _tokenize_chinese(self, sentence):
|
| 144 |
-
"""Tokenize Chinese sentence using PyCantonese"""
|
| 145 |
-
return pycantonese.segment(sentence)
|
| 146 |
|
| 147 |
def _compute(self, predictions: List[str], references: List[str]) -> Dict[str, float]:
|
| 148 |
original_stdout = sys.stdout # store original output
|
| 149 |
sys.stdout = open(os.devnull, 'w')
|
| 150 |
|
| 151 |
try:
|
| 152 |
-
|
| 153 |
-
|
|
|
|
| 154 |
|
| 155 |
# --- Apply Real Chinese WordNet into METEOR algorithm ---
|
| 156 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
|
@@ -215,10 +212,6 @@ class ChineseMETEOR(evaluate.Metric):
|
|
| 215 |
|
| 216 |
scores = [
|
| 217 |
meteor_score.single_meteor_score(
|
| 218 |
-
#word_tokenize(ref),
|
| 219 |
-
#self._tokenize_chinese(ref),
|
| 220 |
-
#word_tokenize(hyp),
|
| 221 |
-
#self._tokenize_chinese(hyp),
|
| 222 |
ref,
|
| 223 |
hyp,
|
| 224 |
wordnet=chinese_wn
|
|
|
|
| 139 |
nltk.download("punkt", quiet=True)
|
| 140 |
nltk.download('punkt_tab', quiet=True)
|
| 141 |
# CwnGraph auto-downloads on first use
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
def _compute(self, predictions: List[str], references: List[str]) -> Dict[str, float]:
|
| 144 |
original_stdout = sys.stdout # store original output
|
| 145 |
sys.stdout = open(os.devnull, 'w')
|
| 146 |
|
| 147 |
try:
|
| 148 |
+
# Tokenize using PyCantonese
|
| 149 |
+
pred_seg = [pycantonese.segment(p.strip()) for p in predictions]
|
| 150 |
+
ref_seg = [pycantonese.segment(r.strip()) for r in references]
|
| 151 |
|
| 152 |
# --- Apply Real Chinese WordNet into METEOR algorithm ---
|
| 153 |
def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
|
|
|
|
| 212 |
|
| 213 |
scores = [
|
| 214 |
meteor_score.single_meteor_score(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
ref,
|
| 216 |
hyp,
|
| 217 |
wordnet=chinese_wn
|