Spaces:
Sleeping
Sleeping
Update predictor.py
Browse files- predictor.py +43 -9
predictor.py
CHANGED
|
@@ -149,6 +149,29 @@ class SentenceExtractor:
|
|
| 149 |
sentences.append(s)
|
| 150 |
return sentences
|
| 151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 152 |
def _extract_relevant_sentences(self, text: str) -> Tuple[List[str], Dict[str, List[str]]]:
|
| 153 |
"""
|
| 154 |
提取与关键词相关的句子
|
|
@@ -156,8 +179,7 @@ class SentenceExtractor:
|
|
| 156 |
:return: 相关句子列表和按类别分组的句子字典
|
| 157 |
"""
|
| 158 |
sentences = self._split_into_sentences(text)
|
| 159 |
-
|
| 160 |
-
relevant_sentences = list(sentences)
|
| 161 |
categorized_sentences = {
|
| 162 |
"main": [],
|
| 163 |
"student_performance": {"positive": [], "negative": [], "nature": [], "suggestion": []},
|
|
@@ -166,14 +188,22 @@ class SentenceExtractor:
|
|
| 166 |
}
|
| 167 |
|
| 168 |
for sentence in sentences:
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
for category, keywords in self.main_keywords.items():
|
| 171 |
-
|
| 172 |
-
if sentence
|
| 173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
break
|
| 175 |
|
| 176 |
-
#
|
| 177 |
for category in ["student_performance", "content_quality", "cross_scene"]:
|
| 178 |
if category not in self.eval_keywords:
|
| 179 |
continue
|
|
@@ -183,8 +213,12 @@ class SentenceExtractor:
|
|
| 183 |
continue
|
| 184 |
|
| 185 |
for keyword in self.eval_keywords[category][sentiment]:
|
| 186 |
-
if
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
|
| 189 |
return relevant_sentences, categorized_sentences
|
| 190 |
|
|
|
|
| 149 |
sentences.append(s)
|
| 150 |
return sentences
|
| 151 |
|
| 152 |
+
def _fuzzy_match_keyword(self, sentence: str, keyword: str) -> bool:
|
| 153 |
+
"""
|
| 154 |
+
模糊匹配关键词,支持部分匹配和相似词匹配
|
| 155 |
+
"""
|
| 156 |
+
# 直接包含匹配
|
| 157 |
+
if keyword in sentence:
|
| 158 |
+
return True
|
| 159 |
+
|
| 160 |
+
# 去除标点符号后匹配
|
| 161 |
+
import string
|
| 162 |
+
sentence_clean = sentence.translate(str.maketrans('', '', string.punctuation))
|
| 163 |
+
keyword_clean = keyword.translate(str.maketrans('', '', string.punctuation))
|
| 164 |
+
if keyword_clean in sentence_clean:
|
| 165 |
+
return True
|
| 166 |
+
|
| 167 |
+
# 支持关键词的部分匹配(至少3个字符)
|
| 168 |
+
if len(keyword) >= 3:
|
| 169 |
+
for i in range(len(sentence) - len(keyword) + 1):
|
| 170 |
+
if sentence[i:i+len(keyword)] == keyword:
|
| 171 |
+
return True
|
| 172 |
+
|
| 173 |
+
return False
|
| 174 |
+
|
| 175 |
def _extract_relevant_sentences(self, text: str) -> Tuple[List[str], Dict[str, List[str]]]:
|
| 176 |
"""
|
| 177 |
提取与关键词相关的句子
|
|
|
|
| 179 |
:return: 相关句子列表和按类别分组的句子字典
|
| 180 |
"""
|
| 181 |
sentences = self._split_into_sentences(text)
|
| 182 |
+
relevant_sentences = []
|
|
|
|
| 183 |
categorized_sentences = {
|
| 184 |
"main": [],
|
| 185 |
"student_performance": {"positive": [], "negative": [], "nature": [], "suggestion": []},
|
|
|
|
| 188 |
}
|
| 189 |
|
| 190 |
for sentence in sentences:
|
| 191 |
+
sentence_added = False
|
| 192 |
+
|
| 193 |
+
# 检查是否包含主关键词(使用模糊匹配)
|
| 194 |
for category, keywords in self.main_keywords.items():
|
| 195 |
+
for keyword in keywords:
|
| 196 |
+
if self._fuzzy_match_keyword(sentence, keyword):
|
| 197 |
+
if not sentence_added:
|
| 198 |
+
relevant_sentences.append(sentence)
|
| 199 |
+
sentence_added = True
|
| 200 |
+
if sentence not in categorized_sentences["main"]:
|
| 201 |
+
categorized_sentences["main"].append(sentence)
|
| 202 |
+
break
|
| 203 |
+
if sentence_added:
|
| 204 |
break
|
| 205 |
|
| 206 |
+
# 检查评估关键词库中的关键词(使用模糊匹配)
|
| 207 |
for category in ["student_performance", "content_quality", "cross_scene"]:
|
| 208 |
if category not in self.eval_keywords:
|
| 209 |
continue
|
|
|
|
| 213 |
continue
|
| 214 |
|
| 215 |
for keyword in self.eval_keywords[category][sentiment]:
|
| 216 |
+
if self._fuzzy_match_keyword(sentence, keyword):
|
| 217 |
+
if not sentence_added:
|
| 218 |
+
relevant_sentences.append(sentence)
|
| 219 |
+
sentence_added = True
|
| 220 |
+
if sentence not in categorized_sentences[category][sentiment]:
|
| 221 |
+
categorized_sentences[category][sentiment].append(sentence)
|
| 222 |
|
| 223 |
return relevant_sentences, categorized_sentences
|
| 224 |
|