Spaces:
Sleeping
Sleeping
Update predictor.py
Browse files- predictor.py +47 -6
predictor.py
CHANGED
|
@@ -155,6 +155,46 @@ class SentenceExtractor:
|
|
| 155 |
keyword_clean = keyword.translate(trans)
|
| 156 |
return keyword_clean in sentence_clean
|
| 157 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
def _extract_relevant_sentences(self, text: str) -> List[str]:
|
| 159 |
sentences = self._split_into_sentences(text)
|
| 160 |
relevant_sentences = []
|
|
@@ -188,8 +228,13 @@ class SentenceExtractor:
|
|
| 188 |
continue
|
| 189 |
for keyword in self.eval_keywords[category].get("positive", []):
|
| 190 |
if self._fuzzy_match_keyword(text, keyword):
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
for keyword in self.eval_keywords[category].get("negative", []):
|
| 194 |
if self._fuzzy_match_keyword(text, keyword):
|
| 195 |
negative_count += 1
|
|
@@ -248,7 +293,3 @@ class SentenceExtractor:
|
|
| 248 |
}
|
| 249 |
|
| 250 |
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
|
|
|
| 155 |
keyword_clean = keyword.translate(trans)
|
| 156 |
return keyword_clean in sentence_clean
|
| 157 |
|
| 158 |
+
def _is_negated_positive(self, text: str, keyword: str) -> bool:
|
| 159 |
+
"""检测积极关键词是否被否定词修饰,例如:
|
| 160 |
+
- 没有/无/不/非/未/并不/毫无 + 关键词
|
| 161 |
+
- 对以“有”开头的积极词(如“有创新性”),也匹配“没有/无/不/未/并不/毫无 + 去掉‘有’后的部分(如“创新性”)”
|
| 162 |
+
- 缺乏/不足/欠缺/缺少/不具备 + 关键词 或 关键词去“有”后的部分
|
| 163 |
+
"""
|
| 164 |
+
if not keyword:
|
| 165 |
+
return False
|
| 166 |
+
|
| 167 |
+
sentence = text.strip()
|
| 168 |
+
neg_prefixes = [
|
| 169 |
+
"没有", "没", "无", "不", "非", "未", "并不", "并没有", "并无", "毫无"
|
| 170 |
+
]
|
| 171 |
+
lack_prefixes = [
|
| 172 |
+
"缺乏", "不足", "欠缺", "缺少", "不具备", "不够"
|
| 173 |
+
]
|
| 174 |
+
|
| 175 |
+
# 构建安全的正则片段
|
| 176 |
+
import re
|
| 177 |
+
def any_prefix(prefixes: List[str]) -> str:
|
| 178 |
+
return "(?:" + "|".join(re.escape(p) for p in prefixes) + ")"
|
| 179 |
+
|
| 180 |
+
patterns: List[str] = []
|
| 181 |
+
# 直接:否定前缀 + 关键词
|
| 182 |
+
patterns.append(rf"{any_prefix(neg_prefixes)}\s*{re.escape(keyword)}")
|
| 183 |
+
|
| 184 |
+
# 直接:缺乏类前缀 + 关键词
|
| 185 |
+
patterns.append(rf"{any_prefix(lack_prefixes)}\s*{re.escape(keyword)}")
|
| 186 |
+
|
| 187 |
+
# 若积极词以“有”开头,额外匹配去掉“有”的尾部(例如 ‘有创新性’ → ‘创新性’)
|
| 188 |
+
if keyword.startswith("有") and len(keyword) > 1:
|
| 189 |
+
tail = keyword[1:]
|
| 190 |
+
patterns.append(rf"{any_prefix(neg_prefixes)}\s*{re.escape(tail)}")
|
| 191 |
+
patterns.append(rf"{any_prefix(lack_prefixes)}\s*{re.escape(tail)}")
|
| 192 |
+
|
| 193 |
+
for pat in patterns:
|
| 194 |
+
if re.search(pat, sentence):
|
| 195 |
+
return True
|
| 196 |
+
return False
|
| 197 |
+
|
| 198 |
def _extract_relevant_sentences(self, text: str) -> List[str]:
|
| 199 |
sentences = self._split_into_sentences(text)
|
| 200 |
relevant_sentences = []
|
|
|
|
| 228 |
continue
|
| 229 |
for keyword in self.eval_keywords[category].get("positive", []):
|
| 230 |
if self._fuzzy_match_keyword(text, keyword):
|
| 231 |
+
# 遇到被否定的积极词(如“没有创新性”含“有创新性”),按消极计分
|
| 232 |
+
if self._is_negated_positive(text, keyword):
|
| 233 |
+
negative_count += 1
|
| 234 |
+
total_score -= 1
|
| 235 |
+
else:
|
| 236 |
+
positive_count += 1
|
| 237 |
+
total_score += 1
|
| 238 |
for keyword in self.eval_keywords[category].get("negative", []):
|
| 239 |
if self._fuzzy_match_keyword(text, keyword):
|
| 240 |
negative_count += 1
|
|
|
|
| 293 |
}
|
| 294 |
|
| 295 |
|
|
|
|
|
|
|
|
|
|
|
|