Spaces:
Running
on
Zero
Running
on
Zero
Update response_processor.py
Browse files- response_processor.py +32 -135
response_processor.py
CHANGED
|
@@ -1218,157 +1218,54 @@ class ResponseProcessor:
|
|
| 1218 |
if not re.search(r'[.!?]', response):
|
| 1219 |
raise ResponseProcessingError("Response lacks proper sentence structure")
|
| 1220 |
|
| 1221 |
-
|
| 1222 |
"""
|
| 1223 |
-
|
| 1224 |
-
|
| 1225 |
Args:
|
| 1226 |
response: 包含可能注釋的回應
|
| 1227 |
-
|
| 1228 |
Returns:
|
| 1229 |
str: 移除注釋後的回應
|
| 1230 |
"""
|
| 1231 |
try:
|
| 1232 |
-
|
| 1233 |
-
|
| 1234 |
-
|
| 1235 |
-
original_response = response
|
| 1236 |
-
|
| 1237 |
-
# 階段1:移除明確的完整問題句型
|
| 1238 |
-
complete_problem_patterns = [
|
| 1239 |
-
# 完整的破碎句型(貪婪匹配)
|
| 1240 |
-
r'Note\s+that\s+I\s+have\s*[,.\s]*.*?(?:natural\s+flow|concise\s+manner)[,.\s]*',
|
| 1241 |
-
|
| 1242 |
-
# 從 Note that 開始到句號結束的整個片段
|
| 1243 |
-
r'Note\s+that\s+I\s+have\s*[,.\s]*.*?\.',
|
| 1244 |
-
|
| 1245 |
-
# 處理包含 avoiding assumptions 的整個片段
|
| 1246 |
-
r'[,.\s]*avoiding\s+any\s+assumptions.*?(?:manner|flow|locations)[,.\s]*',
|
| 1247 |
-
]
|
| 1248 |
-
|
| 1249 |
-
cleaned_text = response
|
| 1250 |
-
for pattern in complete_problem_patterns:
|
| 1251 |
-
cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.DOTALL)
|
| 1252 |
-
|
| 1253 |
-
# 階段2:移除具體的問題關鍵詞組合
|
| 1254 |
-
specific_fragments = [
|
| 1255 |
-
# 移除 "I have also" 相關片段
|
| 1256 |
-
r'\bI\s+have\s+also\s*[,.\s]*(?:and\s+detail\s+accuracy\s+rule\s*[,.\s]*)?',
|
| 1257 |
-
|
| 1258 |
-
# 移除 "and their locations" 孤立片段
|
| 1259 |
-
r'[,.\s]*and\s+their\s+locations[,.\s]*',
|
| 1260 |
-
|
| 1261 |
-
# 移除 "on describing in a clear" 片段
|
| 1262 |
-
r'[,.\s]*on\s+describing\s+in\s+a\s+clear(?:\s+and\s+concise)?(?:\s+manner)?[,.\s]*',
|
| 1263 |
-
|
| 1264 |
-
# 移除 "detail accuracy rule" 相關
|
| 1265 |
-
r'[,.\s]*(?:and\s+)?detail\s+accuracy\s+rule[,.\s]*',
|
| 1266 |
-
|
| 1267 |
-
# 移除孤立的 "avoiding any assumptions"
|
| 1268 |
-
r'[,.\s]*avoiding\s+any\s+assumptions[,.\s]*',
|
| 1269 |
-
|
| 1270 |
-
# 移除 "Additionally, I have" 開頭的破碎片段
|
| 1271 |
-
r'Additionally,?\s*I\s+have\s*[,.\s]*(?:and\s+have\s+focused\s*[,.\s]*)?',
|
| 1272 |
-
|
| 1273 |
-
# 移除 "using transitional phrases" 相關
|
| 1274 |
-
r'[,.\s]*using\s+transitional\s+phrases(?:\s+and\s+varying\s+sentence\s+structures)?[,.\s]*',
|
| 1275 |
-
|
| 1276 |
-
# 移除 "to create a natural flow"
|
| 1277 |
-
r'[,.\s]*to\s+create\s+a\s+natural\s+flow[,.\s]*',
|
| 1278 |
-
]
|
| 1279 |
-
|
| 1280 |
-
for pattern in specific_fragments:
|
| 1281 |
-
cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE)
|
| 1282 |
-
|
| 1283 |
-
# 階段3:移除任何以問題關鍵詞開頭的殘留片段
|
| 1284 |
-
problem_starters = [
|
| 1285 |
-
r'^[,.\s]*Note\s+that.*?[,.\s]*',
|
| 1286 |
-
r'^[,.\s]*I\s+have\s+(?:strictly\s+)?(?:adhered|followed|ensured).*?[,.\s]*',
|
| 1287 |
-
r'^[,.\s]*avoiding\s+any.*?[,.\s]*',
|
| 1288 |
-
r'^[,.\s]*Additionally.*?[,.\s]*',
|
| 1289 |
-
]
|
| 1290 |
-
|
| 1291 |
-
for pattern in problem_starters:
|
| 1292 |
-
cleaned_text = re.sub(pattern, '', cleaned_text, flags=re.IGNORECASE | re.MULTILINE)
|
| 1293 |
-
|
| 1294 |
-
# 階段4:清理標點符號和格式問題
|
| 1295 |
-
# 移除多餘的逗號和句號
|
| 1296 |
-
cleaned_text = re.sub(r'\s*,\s*,+\s*', ', ', cleaned_text)
|
| 1297 |
-
cleaned_text = re.sub(r'\s*\.+\s*\.+\s*', '. ', cleaned_text)
|
| 1298 |
-
cleaned_text = re.sub(r'\s*,\s*\.\s*', '. ', cleaned_text)
|
| 1299 |
-
|
| 1300 |
-
# 移除開頭和結尾的標點符號
|
| 1301 |
-
cleaned_text = re.sub(r'^[,.\s]+', '', cleaned_text)
|
| 1302 |
-
cleaned_text = re.sub(r'[,.\s]+$', '', cleaned_text)
|
| 1303 |
-
|
| 1304 |
-
# 修復句子間的標點問題
|
| 1305 |
-
cleaned_text = re.sub(r'([.!?])\s*,\s*([A-Z])', r'\1 \2', cleaned_text)
|
| 1306 |
-
cleaned_text = re.sub(r',\s*([A-Z])', r'. \1', cleaned_text)
|
| 1307 |
-
|
| 1308 |
-
# 階段5:傳統段落級處理(保持原有邏輯)
|
| 1309 |
-
traditional_note_patterns = [
|
| 1310 |
r'(?:^|\n)Note:.*?(?:\n|$)',
|
| 1311 |
r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
|
| 1312 |
r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
|
| 1313 |
r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
|
| 1314 |
]
|
| 1315 |
-
|
| 1316 |
-
|
| 1317 |
-
|
|
|
|
|
|
|
| 1318 |
if len(paragraphs) == 1:
|
| 1319 |
-
for pattern in
|
| 1320 |
paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
|
| 1321 |
-
|
| 1322 |
-
|
| 1323 |
-
|
| 1324 |
-
|
| 1325 |
-
|
| 1326 |
-
|
| 1327 |
-
|
| 1328 |
-
|
| 1329 |
-
break
|
| 1330 |
-
|
| 1331 |
-
if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
|
| 1332 |
is_note = True
|
| 1333 |
-
|
| 1334 |
-
|
| 1335 |
-
|
| 1336 |
-
|
| 1337 |
-
|
| 1338 |
-
|
| 1339 |
-
|
| 1340 |
-
|
| 1341 |
-
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
# 確保句子以適當的標點結尾
|
| 1345 |
-
result = result.strip()
|
| 1346 |
-
if result and not result.endswith(('.', '!', '?')):
|
| 1347 |
-
result += '.'
|
| 1348 |
-
|
| 1349 |
-
# 最終檢查:如果結果太短,使用更保守的方法
|
| 1350 |
-
if len(result.split()) < 5:
|
| 1351 |
-
conservative_result = original_response
|
| 1352 |
-
# 只移除最明顯的問題片段
|
| 1353 |
-
conservative_patterns = [
|
| 1354 |
-
r'Note\s+that\s+I\s+have.*?manner[,.\s]*',
|
| 1355 |
-
r'avoiding\s+any\s+assumptions.*?locations[,.\s]*',
|
| 1356 |
-
r'Additionally,?\s*I\s+have.*?flow[,.\s]*'
|
| 1357 |
-
]
|
| 1358 |
-
for pattern in conservative_patterns:
|
| 1359 |
-
conservative_result = re.sub(pattern, '', conservative_result, flags=re.IGNORECASE)
|
| 1360 |
-
|
| 1361 |
-
conservative_result = re.sub(r'\s+', ' ', conservative_result).strip()
|
| 1362 |
-
return conservative_result if conservative_result else original_response
|
| 1363 |
-
|
| 1364 |
-
return result
|
| 1365 |
-
|
| 1366 |
-
# 如果所有處理後結果為空,返回原始內容
|
| 1367 |
-
return original_response
|
| 1368 |
-
|
| 1369 |
except Exception as e:
|
| 1370 |
-
|
| 1371 |
-
self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
|
| 1372 |
return response
|
| 1373 |
|
| 1374 |
def get_processor_info(self) -> Dict[str, Any]:
|
|
|
|
| 1218 |
if not re.search(r'[.!?]', response):
|
| 1219 |
raise ResponseProcessingError("Response lacks proper sentence structure")
|
| 1220 |
|
| 1221 |
+
def remove_explanatory_notes(self, response: str) -> str:
|
| 1222 |
"""
|
| 1223 |
+
移除解釋性注釋和說明
|
| 1224 |
+
|
| 1225 |
Args:
|
| 1226 |
response: 包含可能注釋的回應
|
| 1227 |
+
|
| 1228 |
Returns:
|
| 1229 |
str: 移除注釋後的回應
|
| 1230 |
"""
|
| 1231 |
try:
|
| 1232 |
+
# 識別常見的注釋和解釋模式
|
| 1233 |
+
note_patterns = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1234 |
r'(?:^|\n)Note:.*?(?:\n|$)',
|
| 1235 |
r'(?:^|\n)I have (?:followed|adhered to|ensured).*?(?:\n|$)',
|
| 1236 |
r'(?:^|\n)This description (?:follows|adheres to|maintains).*?(?:\n|$)',
|
| 1237 |
r'(?:^|\n)The enhanced description (?:maintains|preserves).*?(?:\n|$)'
|
| 1238 |
]
|
| 1239 |
+
|
| 1240 |
+
# 尋找段落
|
| 1241 |
+
paragraphs = [p.strip() for p in response.split('\n\n') if p.strip()]
|
| 1242 |
+
|
| 1243 |
+
# 如果只有一個段落,檢查並清理它
|
| 1244 |
if len(paragraphs) == 1:
|
| 1245 |
+
for pattern in note_patterns:
|
| 1246 |
paragraphs[0] = re.sub(pattern, '', paragraphs[0], flags=re.IGNORECASE)
|
| 1247 |
+
return paragraphs[0].strip()
|
| 1248 |
+
|
| 1249 |
+
# 如果有多個段落,移除注釋段落
|
| 1250 |
+
content_paragraphs = []
|
| 1251 |
+
for paragraph in paragraphs:
|
| 1252 |
+
is_note = False
|
| 1253 |
+
for pattern in note_patterns:
|
| 1254 |
+
if re.search(pattern, paragraph, flags=re.IGNORECASE):
|
|
|
|
|
|
|
|
|
|
| 1255 |
is_note = True
|
| 1256 |
+
break
|
| 1257 |
+
|
| 1258 |
+
# 檢查段落是否以常見的注釋詞開頭
|
| 1259 |
+
if paragraph.lower().startswith(('note:', 'please note:', 'remember:')):
|
| 1260 |
+
is_note = True
|
| 1261 |
+
|
| 1262 |
+
if not is_note:
|
| 1263 |
+
content_paragraphs.append(paragraph)
|
| 1264 |
+
|
| 1265 |
+
return '\n\n'.join(content_paragraphs).strip()
|
| 1266 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1267 |
except Exception as e:
|
| 1268 |
+
self.logger.error(f"Failed to remove explanatory notes: {str(e)}")
|
|
|
|
| 1269 |
return response
|
| 1270 |
|
| 1271 |
def get_processor_info(self) -> Dict[str, Any]:
|