Wind-xixi commited on
Commit
5e9461b
·
verified ·
1 Parent(s): 9b597a1

Update predictor.py

Browse files
Files changed (1) hide show
  1. predictor.py +43 -9
predictor.py CHANGED
@@ -149,6 +149,29 @@ class SentenceExtractor:
149
  sentences.append(s)
150
  return sentences
151
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  def _extract_relevant_sentences(self, text: str) -> Tuple[List[str], Dict[str, List[str]]]:
153
  """
154
  提取与关键词相关的句子
@@ -156,8 +179,7 @@ class SentenceExtractor:
156
  :return: 相关句子列表和按类别分组的句子字典
157
  """
158
  sentences = self._split_into_sentences(text)
159
- # 默认对所有分句进行评分,避免只命中少量关键词时数量过少
160
- relevant_sentences = list(sentences)
161
  categorized_sentences = {
162
  "main": [],
163
  "student_performance": {"positive": [], "negative": [], "nature": [], "suggestion": []},
@@ -166,14 +188,22 @@ class SentenceExtractor:
166
  }
167
 
168
  for sentence in sentences:
169
- # 检查是否包含主关键词(仅用于分类展示,不再决定是否计入评分)
 
 
170
  for category, keywords in self.main_keywords.items():
171
- if any(keyword in sentence for keyword in keywords):
172
- if sentence not in categorized_sentences["main"]:
173
- categorized_sentences["main"].append(sentence)
 
 
 
 
 
 
174
  break
175
 
176
- # 检查评估关键词库中的关键词
177
  for category in ["student_performance", "content_quality", "cross_scene"]:
178
  if category not in self.eval_keywords:
179
  continue
@@ -183,8 +213,12 @@ class SentenceExtractor:
183
  continue
184
 
185
  for keyword in self.eval_keywords[category][sentiment]:
186
- if keyword in sentence and sentence not in categorized_sentences[category][sentiment]:
187
- categorized_sentences[category][sentiment].append(sentence)
 
 
 
 
188
 
189
  return relevant_sentences, categorized_sentences
190
 
 
149
  sentences.append(s)
150
  return sentences
151
 
152
+ def _fuzzy_match_keyword(self, sentence: str, keyword: str) -> bool:
153
+ """
154
+ 模糊匹配关键词,支持部分匹配和相似词匹配
155
+ """
156
+ # 直接包含匹配
157
+ if keyword in sentence:
158
+ return True
159
+
160
+ # 去除标点符号后匹配
161
+ import string
162
+ sentence_clean = sentence.translate(str.maketrans('', '', string.punctuation))
163
+ keyword_clean = keyword.translate(str.maketrans('', '', string.punctuation))
164
+ if keyword_clean in sentence_clean:
165
+ return True
166
+
167
+ # 支持关键词的部分匹配(至少3个字符)
168
+ if len(keyword) >= 3:
169
+ for i in range(len(sentence) - len(keyword) + 1):
170
+ if sentence[i:i+len(keyword)] == keyword:
171
+ return True
172
+
173
+ return False
174
+
175
  def _extract_relevant_sentences(self, text: str) -> Tuple[List[str], Dict[str, List[str]]]:
176
  """
177
  提取与关键词相关的句子
 
179
  :return: 相关句子列表和按类别分组的句子字典
180
  """
181
  sentences = self._split_into_sentences(text)
182
+ relevant_sentences = []
 
183
  categorized_sentences = {
184
  "main": [],
185
  "student_performance": {"positive": [], "negative": [], "nature": [], "suggestion": []},
 
188
  }
189
 
190
  for sentence in sentences:
191
+ sentence_added = False
192
+
193
+ # 检查是否包含主关键词(使用模糊匹配)
194
  for category, keywords in self.main_keywords.items():
195
+ for keyword in keywords:
196
+ if self._fuzzy_match_keyword(sentence, keyword):
197
+ if not sentence_added:
198
+ relevant_sentences.append(sentence)
199
+ sentence_added = True
200
+ if sentence not in categorized_sentences["main"]:
201
+ categorized_sentences["main"].append(sentence)
202
+ break
203
+ if sentence_added:
204
  break
205
 
206
+ # 检查评估关键词库中的关键词(使用模糊匹配)
207
  for category in ["student_performance", "content_quality", "cross_scene"]:
208
  if category not in self.eval_keywords:
209
  continue
 
213
  continue
214
 
215
  for keyword in self.eval_keywords[category][sentiment]:
216
+ if self._fuzzy_match_keyword(sentence, keyword):
217
+ if not sentence_added:
218
+ relevant_sentences.append(sentence)
219
+ sentence_added = True
220
+ if sentence not in categorized_sentences[category][sentiment]:
221
+ categorized_sentences[category][sentiment].append(sentence)
222
 
223
  return relevant_sentences, categorized_sentences
224