raptorkwok commited on
Commit
e6953e3
·
1 Parent(s): 8f33fa3

Initial Commit

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +18 -44
  3. chinesemeteor.py +164 -63
  4. requirements.txt +5 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.bak
README.md CHANGED
@@ -1,50 +1,24 @@
1
  ---
2
- title: ChineseMETEOR
3
- datasets:
4
- -
5
  tags:
6
- - evaluate
7
- - metric
8
- description: "TODO: add a description here"
9
- sdk: gradio
10
- sdk_version: 3.19.1
11
- app_file: app.py
12
- pinned: false
13
  ---
14
 
15
- # Metric Card for ChineseMETEOR
16
 
17
- ***Module Card Instructions:*** *Fill out the following subsections. Feel free to take a look at existing metric cards if you'd like examples.*
18
 
19
- ## Metric Description
20
- *Give a brief overview of this metric, including what task(s) it is usually used for, if any.*
21
-
22
- ## How to Use
23
- *Give general statement of how to use the metric*
24
-
25
- *Provide simplest possible example for using the metric*
26
-
27
- ### Inputs
28
- *List all input arguments in the format below*
29
- - **input_field** *(type): Definition of input, with explanation if necessary. State any default value(s).*
30
-
31
- ### Output Values
32
-
33
- *Explain what this metric outputs and provide an example of what the metric output looks like. Modules should return a dictionary with one or multiple key-value pairs, e.g. {"bleu" : 6.02}*
34
-
35
- *State the range of possible values that the metric's output can take, as well as what in that range is considered good. For example: "This metric can take on any value between 0 and 100, inclusive. Higher scores are better."*
36
-
37
- #### Values from Popular Papers
38
- *Give examples, preferrably with links to leaderboards or publications, to papers that have reported this metric, along with the values they have reported.*
39
-
40
- ### Examples
41
- *Give code examples of the metric being used. Try to include examples that clear up any potential ambiguity left from the metric description above. If possible, provide a range of examples that show both typical and atypical results, as well as examples where a variety of input parameters are passed.*
42
-
43
- ## Limitations and Bias
44
- *Note any known limitations or biases that the metric has, with links and references if possible.*
45
-
46
- ## Citation
47
- *Cite the source where this metric was introduced.*
48
-
49
- ## Further References
50
- *Add any useful further references.*
 
1
  ---
2
+ library_name: evaluate
 
 
3
  tags:
4
+ - nlp
5
+ - translation
6
+ - chinese
7
+ - meteor
8
+ - jieba
9
+ license: apache-2.0
 
10
  ---
11
 
12
+ # METEOR (Chinese) with Jieba
13
 
14
+ Classic METEOR score, but **pre-segmented with Jieba** so it works on raw Chinese text.
15
 
16
+ ```python
17
+ import evaluate
18
+ meteor = evaluate.load("raptorkwok/chinese_meteor")
19
+ results = meteor.compute(
20
+ predictions=["我在這裡吃飯"],
21
+ references=["我在這裡吃飯"]
22
+ )
23
+ print(results)
24
+ # {'meteor': 1.0}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
chinesemeteor.py CHANGED
@@ -1,95 +1,196 @@
1
- # Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- """TODO: Add a description here."""
15
-
16
- import evaluate
17
  import datasets
 
 
 
 
 
 
 
18
 
 
 
 
 
19
 
20
- # TODO: Add BibTeX citation
21
- _CITATION = """\
22
- @InProceedings{huggingface:module,
23
- title = {A great new module},
24
- authors={huggingface, Inc.},
25
- year={2020}
26
- }
27
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- # TODO: Add description of the module here
30
  _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
-
35
- # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
  Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
 
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
58
-
59
-
60
- @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
  class ChineseMETEOR(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
-
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
- # This is the description that will appear on the modules page.
68
  module_type="metric",
69
  description=_DESCRIPTION,
70
- citation=_CITATION,
 
 
 
 
 
71
  inputs_description=_KWARGS_DESCRIPTION,
72
- # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
 
77
  # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
  # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
  """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
 
 
 
 
87
  pass
88
 
89
- def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
- }
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ METEOR (Chinese) with Jieba pre-segmentation + Real CwnGraph Chinese WordNet
4
+ HuggingFace evaluate metric template
5
+ """
6
+ import jieba_fast as jieba
 
 
 
 
 
 
 
 
 
 
7
  import datasets
8
+ from typing import List, Dict
9
+ import numpy as np
10
+ from nltk.translate import meteor_score
11
+ from nltk import word_tokenize
12
+ #import nltk
13
+ import evaluate
14
+ import re
15
 
16
+ # Download once
17
+ #nltk.download("wordnet", quiet=True)
18
+ #nltk.download("omw-1.4", quiet=True)
19
+ #nltk.download("punkt", quiet=True)
20
 
21
+ # ------------------------------------------------------------------- #
22
+ # REAL Chinese WordNet (CwnGraph) Integration
23
+ # ------------------------------------------------------------------- #
24
+ _cwn = None
25
+ def _load_cwn():
26
+ global _cwn
27
+ if _cwn is None:
28
+ try:
29
+ from CwnGraph import CwnImage
30
+ print("Loading Chinese WordNet (CwnGraph, first time only)...")
31
+ _cwn = CwnImage.latest()
32
+ except ImportError:
33
+ raise ImportError("CwnGraph failed to load. Run: pip install CwnGraph")
34
+ return _cwn
35
+
36
+ # Helper to get lemma name (with fallback for API versions)
37
+ def _get_lemma_name(lemma):
38
+ try:
39
+ return lemma.name
40
+ except AttributeError:
41
+ return str(lemma).split(': ')[1].split('_')[0]
42
+
43
+ # Custom Lemma & Synset for NLTK compatibility
44
+ class _CwnLemma:
45
+ def __init__(self, name): self._name = name
46
+ def name(self): return self._name
47
+
48
+ class _CwnSynset:
49
+ def __init__(self, lemmas, synset_id):
50
+ self._lemmas = lemmas
51
+ self._id = synset_id
52
+ def lemmas(self):
53
+ return [_CwnLemma(name) for name in self._lemmas]
54
+
55
+ # ------------------------------------------------------------------- #
56
+ # HuggingFace Evaluation Metric
57
+ # ------------------------------------------------------------------- #
58
 
 
59
  _DESCRIPTION = """\
60
+ This evaluation metric is tailor-made to evaluate the translation quality of Chinese translation.
61
  """
62
 
 
 
63
  _KWARGS_DESCRIPTION = """
64
  Calculates how good are predictions given some references, using certain scores
65
  Args:
66
+ predictions (str): translation sentence to score.
67
+ references (str): reference sentence for each translation.
 
 
68
  Returns:
69
+ meteor: the average METEOR score
70
+ scores: the METEOR score for each sentence pairs
71
+
72
  Examples:
73
  Examples should be written in doctest format, and should illustrate how
74
  to use the function.
75
 
76
+ >>> cmeteor = evaluate.load("raptorkwok/chinesemeteor")
77
+ >>> results = cmeteor.compute(references=["Reference Sentence in Chinese"], predictions=["Predicted Sentence in Chinese"])
78
  >>> print(results)
79
+ {'meteor': 0.5111111111111111, 'scores': [0.5111111111111111]}
80
  """
81
 
82
+ # ------------------------------------------------------------------- #
83
+ # HuggingFace evaluate template
84
+ # ------------------------------------------------------------------- #
 
 
85
  class ChineseMETEOR(evaluate.Metric):
 
 
86
  def _info(self):
 
87
  return evaluate.MetricInfo(
 
88
  module_type="metric",
89
  description=_DESCRIPTION,
90
+ citation="""@inproceedings{denkowski-lavie-2014-meteor,
91
+ title = "Meteor Universal: Language Specific Translation Evaluation for Any Target Language",
92
+ author = "Denkowski, Michael and Lavie, Alon",
93
+ booktitle = "Proceedings of the Ninth Workshop on Statistical Machine Translation",
94
+ year = "2014"
95
+ }""",
96
  inputs_description=_KWARGS_DESCRIPTION,
97
+ features=datasets.Features(
98
+ {
99
+ "predictions": datasets.Value("string"),
100
+ "references": datasets.Value("string"),
101
+ }
102
+ ),
103
  # Homepage of the module for documentation
104
+ homepage="https://yourappapp.com",
105
  # Additional links to the codebase or references
106
+ codebase_urls=["https://github.com/nltk/nltk"],
107
+ reference_urls=["https://www.cs.cmu.edu/~alavie/METEOR/"],
108
  )
109
 
110
+ def _download_and_prepare(self, dl_manager) -> None:
111
  """Optional: download external resources useful to compute the scores"""
112
+ # CwnGraph auto-downloads on first use
113
+ import nltk
114
+ nltk.download("wordnet", quiet=True)
115
+ nltk.download("omw-1.4", quiet=True)
116
+ nltk.download("punkt", quiet=True)
117
  pass
118
 
119
+ def _compute(self, predictions: List[str], references: List[str]) -> Dict[str, float]:
120
+ pred_seg = [" ".join(jieba.cut(p.strip())) for p in predictions]
121
+ ref_seg = [" ".join(jieba.cut(r.strip())) for r in references]
122
+
123
+ # --- FORCE Real CWN INTO METEOR ---
124
+ def _cwn_synsets(self, word, pos=None): # Matches NLTK method call
125
+ if not isinstance(word, str) or not word.strip():
126
+ print(f"DEBUG: Skipping non-string input: {type(word)}")
127
+ return []
128
+ cwn = _load_cwn()
129
+ try:
130
+ # Use escaped regex for exact match (CwnGraph expects string pattern)
131
+ pattern = f"^{re.escape(word)}$"
132
+ lemmas = cwn.find_lemma(pattern)
133
+ except Exception as e:
134
+ print(f"DEBUG: Error querying CWN for '{word}': {e}")
135
+ return []
136
+ # FIXED: Use _get_lemma_name for comparison (handles missing .name)
137
+ exact_lemmas = [l for l in lemmas if _get_lemma_name(l) == word]
138
+ if not exact_lemmas:
139
+ print(f"DEBUG: No exact lemma found for '{word}'")
140
+ return []
141
+ synsets_list = []
142
+ seen_synset_ids = set()
143
+ for lemma in exact_lemmas:
144
+ for sense in lemma.senses:
145
+ synset = sense.synset
146
+ if synset:
147
+ try:
148
+ synset_id = synset.id
149
+ except AttributeError:
150
+ synset_id = str(synset)
151
+ if synset_id not in seen_synset_ids:
152
+ seen_synset_ids.add(synset_id)
153
+ try:
154
+ synset_lemmas = synset.lemmas
155
+ syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
156
+ except AttributeError:
157
+ synset_lemmas = []
158
+ for s in synset.senses:
159
+ try:
160
+ # Access the single lemma via lemmas[0]
161
+ lemma = s.lemmas[0]
162
+ synset_lemmas.append(lemma)
163
+ except (AttributeError, IndexError, TypeError):
164
+ try:
165
+ lemma = s.lemma
166
+ synset_lemmas.append(lemma)
167
+ except AttributeError:
168
+ print(f"DEBUG: Could not extract lemma from sense {s}")
169
+ continue
170
+ syn_lemma_names = [_get_lemma_name(l) for l in synset_lemmas]
171
+ syn_lemmas_set = set(syn_lemma_names)
172
+ if syn_lemmas_set:
173
+ synsets_list.append(_CwnSynset(list(syn_lemmas_set), synset_id))
174
+ print(f"DEBUG: Found {len(synsets_list)} synsets for '{word}': {synsets_list[0]._lemmas if synsets_list else []}")
175
+ return synsets_list
176
+
177
+ # Use class for proper method binding
178
+ class ChineseWordNet:
179
+ def synsets(self, word, pos=None):
180
+ return _cwn_synsets(self, word, pos)
181
+
182
+ chinese_wn = ChineseWordNet()
183
+
184
+ scores = [
185
+ meteor_score.single_meteor_score(
186
+ word_tokenize(ref),
187
+ word_tokenize(hyp),
188
+ wordnet=chinese_wn
189
+ )
190
+ for ref, hyp in zip(ref_seg, pred_seg)
191
+ ]
192
+
193
  return {
194
+ "meteor": float(np.mean(scores)),
195
+ "scores": scores,
196
+ }
requirements.txt CHANGED
@@ -1 +1,5 @@
1
- git+https://github.com/huggingface/evaluate@main
 
 
 
 
 
1
+ evaluate>=0.4.1
2
+ jieba_fast
3
+ CwnGraph>=0.3.0
4
+ nltk>=3.8
5
+ numpy