Khriis commited on
Commit
e7e401a
verified
1 Parent(s): e0f2f75

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +22 -5
handler.py CHANGED
@@ -317,10 +317,11 @@ class EndpointHandler:
317
  s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
318
  return s
319
 
320
- def _extract_from_target(target: str, phrase_lower: str) -> str:
321
- idx = target.lower().find(phrase_lower)
 
322
  if idx >= 0:
323
- return target[idx:idx+len(phrase_lower)]
324
  return phrase_lower
325
 
326
  STOP = {
@@ -366,7 +367,23 @@ class EndpointHandler:
366
 
367
  logger.info(f"After dedup: {kept_norms}")
368
 
369
- cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  logger.info(f"Final cleaned spans: {cleaned}")
371
 
372
  if not cleaned and spans:
@@ -387,6 +404,6 @@ class EndpointHandler:
387
  if best:
388
  break
389
  if best:
390
- return [_extract_from_target(target_text, best)]
391
 
392
  return cleaned[:3]
 
317
  s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
318
  return s
319
 
320
+ def _extract_from_text(text: str, phrase_lower: str) -> str:
321
+ """Extract phrase from text preserving original case."""
322
+ idx = text.lower().find(phrase_lower)
323
  if idx >= 0:
324
+ return text[idx:idx+len(phrase_lower)]
325
  return phrase_lower
326
 
327
  STOP = {
 
367
 
368
  logger.info(f"After dedup: {kept_norms}")
369
 
370
+ # Extract spans from either target or context (whichever contains them)
371
+ cleaned = []
372
+ for n in kept_norms:
373
+ # Try target first, then context
374
+ if n in target_lower:
375
+ extracted = _extract_from_text(target_text, n)
376
+ logger.info(f" Extracted '{extracted}' from TARGET")
377
+ cleaned.append(extracted)
378
+ elif n in context_text.lower():
379
+ extracted = _extract_from_text(context_text, n)
380
+ logger.info(f" Extracted '{extracted}' from CONTEXT")
381
+ cleaned.append(extracted)
382
+ else:
383
+ # Fallback - shouldn't happen given earlier validation
384
+ logger.warning(f" Phrase '{n}' not found in target or context, using normalized")
385
+ cleaned.append(n)
386
+
387
  logger.info(f"Final cleaned spans: {cleaned}")
388
 
389
  if not cleaned and spans:
 
404
  if best:
405
  break
406
  if best:
407
+ return [_extract_from_text(target_text, best)]
408
 
409
  return cleaned[:3]