Update handler.py
Browse files- handler.py +20 -0
handler.py
CHANGED
|
@@ -180,12 +180,18 @@ class EndpointHandler:
|
|
| 180 |
emotion = item.get("emotion", "")
|
| 181 |
conversation_history = item.get("conversation_history", [])
|
| 182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if not utterance:
|
| 184 |
logger.warning(f"Empty utterance at index {i}")
|
| 185 |
continue
|
| 186 |
|
| 187 |
# Build context with conversation history
|
| 188 |
context = self._build_context(utterance, conversation_history)
|
|
|
|
| 189 |
|
| 190 |
# Format as QA task
|
| 191 |
question = self.question_template.format(emotion=emotion)
|
|
@@ -244,6 +250,7 @@ class EndpointHandler:
|
|
| 244 |
"RECCON raw spans (answer, score): %s",
|
| 245 |
[(p.get("answer"), p.get("score", 0.0)) for p in current_preds[:5]]
|
| 246 |
)
|
|
|
|
| 247 |
|
| 248 |
def is_good_span(ans: str) -> bool:
|
| 249 |
if not ans:
|
|
@@ -258,7 +265,10 @@ class EndpointHandler:
|
|
| 258 |
return True
|
| 259 |
|
| 260 |
raw_answers = [p.get("answer", "") for p in current_preds]
|
|
|
|
|
|
|
| 261 |
raw_answers = [a for a in raw_answers if is_good_span(a)]
|
|
|
|
| 262 |
|
| 263 |
# Extract context text (part before [TARGET] marker)
|
| 264 |
full_context = contexts[pred_idx]
|
|
@@ -291,6 +301,11 @@ class EndpointHandler:
|
|
| 291 |
Clean and filter extracted trigger spans.
|
| 292 |
Spans can come from either target_text or context_text.
|
| 293 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
target_text = target_text or ""
|
| 295 |
context_text = context_text or ""
|
| 296 |
full_text = (context_text + " " + target_text).lower()
|
|
@@ -340,14 +355,19 @@ class EndpointHandler:
|
|
| 340 |
})
|
| 341 |
|
| 342 |
candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
|
|
|
|
|
|
|
| 343 |
kept_norms = []
|
| 344 |
for c in list(candidates):
|
| 345 |
n = c["norm"]
|
| 346 |
if any(n in kn or kn in n for kn in kept_norms):
|
| 347 |
continue
|
| 348 |
kept_norms.append(n)
|
|
|
|
|
|
|
| 349 |
|
| 350 |
cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
|
|
|
|
| 351 |
|
| 352 |
if not cleaned and spans:
|
| 353 |
tt_tokens = target_lower.split()
|
|
|
|
| 180 |
emotion = item.get("emotion", "")
|
| 181 |
conversation_history = item.get("conversation_history", [])
|
| 182 |
|
| 183 |
+
# Log input details
|
| 184 |
+
logger.info(f"Turn {i}: utterance='{utterance[:50]}...', emotion={emotion}, history_len={len(conversation_history)}")
|
| 185 |
+
if conversation_history:
|
| 186 |
+
logger.info(f" History: {conversation_history}")
|
| 187 |
+
|
| 188 |
if not utterance:
|
| 189 |
logger.warning(f"Empty utterance at index {i}")
|
| 190 |
continue
|
| 191 |
|
| 192 |
# Build context with conversation history
|
| 193 |
context = self._build_context(utterance, conversation_history)
|
| 194 |
+
logger.info(f"Built context for turn {i}: '{context}'")
|
| 195 |
|
| 196 |
# Format as QA task
|
| 197 |
question = self.question_template.format(emotion=emotion)
|
|
|
|
| 250 |
"RECCON raw spans (answer, score): %s",
|
| 251 |
[(p.get("answer"), p.get("score", 0.0)) for p in current_preds[:5]]
|
| 252 |
)
|
| 253 |
+
logger.info(f"Total predictions received: {len(current_preds)}")
|
| 254 |
|
| 255 |
def is_good_span(ans: str) -> bool:
|
| 256 |
if not ans:
|
|
|
|
| 265 |
return True
|
| 266 |
|
| 267 |
raw_answers = [p.get("answer", "") for p in current_preds]
|
| 268 |
+
logger.info(f"Raw answers before filtering: {raw_answers}")
|
| 269 |
+
|
| 270 |
raw_answers = [a for a in raw_answers if is_good_span(a)]
|
| 271 |
+
logger.info(f"Answers after is_good_span filter: {raw_answers}")
|
| 272 |
|
| 273 |
# Extract context text (part before [TARGET] marker)
|
| 274 |
full_context = contexts[pred_idx]
|
|
|
|
| 301 |
Clean and filter extracted trigger spans.
|
| 302 |
Spans can come from either target_text or context_text.
|
| 303 |
"""
|
| 304 |
+
logger.info(f"_clean_spans called with {len(spans)} spans")
|
| 305 |
+
logger.info(f" Target: '{target_text}'")
|
| 306 |
+
logger.info(f" Context: '{context_text[:100]}...'" if len(context_text) > 100 else f" Context: '{context_text}'")
|
| 307 |
+
logger.info(f" Input spans: {spans}")
|
| 308 |
+
|
| 309 |
target_text = target_text or ""
|
| 310 |
context_text = context_text or ""
|
| 311 |
full_text = (context_text + " " + target_text).lower()
|
|
|
|
| 355 |
})
|
| 356 |
|
| 357 |
candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
|
| 358 |
+
logger.info(f"Built {len(candidates)} candidates: {[c['norm'] for c in candidates]}")
|
| 359 |
+
|
| 360 |
kept_norms = []
|
| 361 |
for c in list(candidates):
|
| 362 |
n = c["norm"]
|
| 363 |
if any(n in kn or kn in n for kn in kept_norms):
|
| 364 |
continue
|
| 365 |
kept_norms.append(n)
|
| 366 |
+
|
| 367 |
+
logger.info(f"After dedup: {kept_norms}")
|
| 368 |
|
| 369 |
cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
|
| 370 |
+
logger.info(f"Final cleaned spans: {cleaned}")
|
| 371 |
|
| 372 |
if not cleaned and spans:
|
| 373 |
tt_tokens = target_lower.split()
|