Update handler.py
Browse files- handler.py +22 -5
handler.py
CHANGED
|
@@ -317,10 +317,11 @@ class EndpointHandler:
|
|
| 317 |
s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
|
| 318 |
return s
|
| 319 |
|
| 320 |
-
def
|
| 321 |
-
|
|
|
|
| 322 |
if idx >= 0:
|
| 323 |
-
return
|
| 324 |
return phrase_lower
|
| 325 |
|
| 326 |
STOP = {
|
|
@@ -366,7 +367,23 @@ class EndpointHandler:
|
|
| 366 |
|
| 367 |
logger.info(f"After dedup: {kept_norms}")
|
| 368 |
|
| 369 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
logger.info(f"Final cleaned spans: {cleaned}")
|
| 371 |
|
| 372 |
if not cleaned and spans:
|
|
@@ -387,6 +404,6 @@ class EndpointHandler:
|
|
| 387 |
if best:
|
| 388 |
break
|
| 389 |
if best:
|
| 390 |
-
return [
|
| 391 |
|
| 392 |
return cleaned[:3]
|
|
|
|
| 317 |
s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
|
| 318 |
return s
|
| 319 |
|
| 320 |
+
def _extract_from_text(text: str, phrase_lower: str) -> str:
|
| 321 |
+
"""Extract phrase from text preserving original case."""
|
| 322 |
+
idx = text.lower().find(phrase_lower)
|
| 323 |
if idx >= 0:
|
| 324 |
+
return text[idx:idx+len(phrase_lower)]
|
| 325 |
return phrase_lower
|
| 326 |
|
| 327 |
STOP = {
|
|
|
|
| 367 |
|
| 368 |
logger.info(f"After dedup: {kept_norms}")
|
| 369 |
|
| 370 |
+
# Extract spans from either target or context (whichever contains them)
|
| 371 |
+
cleaned = []
|
| 372 |
+
for n in kept_norms:
|
| 373 |
+
# Try target first, then context
|
| 374 |
+
if n in target_lower:
|
| 375 |
+
extracted = _extract_from_text(target_text, n)
|
| 376 |
+
logger.info(f" Extracted '{extracted}' from TARGET")
|
| 377 |
+
cleaned.append(extracted)
|
| 378 |
+
elif n in context_text.lower():
|
| 379 |
+
extracted = _extract_from_text(context_text, n)
|
| 380 |
+
logger.info(f" Extracted '{extracted}' from CONTEXT")
|
| 381 |
+
cleaned.append(extracted)
|
| 382 |
+
else:
|
| 383 |
+
# Fallback - shouldn't happen given earlier validation
|
| 384 |
+
logger.warning(f" Phrase '{n}' not found in target or context, using normalized")
|
| 385 |
+
cleaned.append(n)
|
| 386 |
+
|
| 387 |
logger.info(f"Final cleaned spans: {cleaned}")
|
| 388 |
|
| 389 |
if not cleaned and spans:
|
|
|
|
| 404 |
if best:
|
| 405 |
break
|
| 406 |
if best:
|
| 407 |
+
return [_extract_from_text(target_text, best)]
|
| 408 |
|
| 409 |
return cleaned[:3]
|