Khriis
/

RECCON

@@ -22,6 +22,7 @@ class EndpointHandler:
         if not cuda_available:
             logger.warning("GPU not detected. Running on CPU. Inference will be slower.")
         self.device_id = 0 if cuda_available else -1
         # Determine model path
@@ -29,6 +30,7 @@ class EndpointHandler:
         logger.info(f"Loading model from {model_path}...")
         try:
             tokenizer = AutoTokenizer.from_pretrained(model_path)
             model, loading_info = AutoModelForQuestionAnswering.from_pretrained(
                 model_path,
@@ -41,6 +43,8 @@ class EndpointHandler:
             logger.warning("Loaded model class: %s", model.__class__.__name__)
             logger.warning("Loaded model name_or_path: %s", getattr(model.config, "_name_or_path", None))
             self.pipe = pipeline(
                 "question-answering",
                 model=model,
@@ -49,12 +53,6 @@ class EndpointHandler:
                 top_k=20,
                 handle_impossible_answer=False
             )
-            # Store tokenizer for context window management
-            self.tokenizer = tokenizer
-            # Set max context length (adjust based on your model's max_position_embeddings)
-            self.max_context_tokens = 384  # Conservative limit for BERT-based models
             logger.info("Model loaded successfully.")
         except Exception as e:
             logger.error(f"Failed to load model: {e}")
@@ -65,100 +63,11 @@ class EndpointHandler:
             "Extract the exact short phrase (<= 8 words) from the target "
             "utterance that most strongly signals the emotion {emotion}. "
             "Return only a substring of the target utterance."
-        )
-    def _build_context(self, target_utterance: str, conversation_history: List[Dict[str, str]],
-                       max_history: int = 5) -> str:
-        """
-        Build conversational context by prepending previous utterances.
-        Args:
-            target_utterance: The main utterance to analyze
-            conversation_history: List of previous utterances, each with 'speaker' and 'text'
-                                 Format: [{"speaker": "A", "text": "..."}, ...]
-            max_history: Maximum number of previous turns to include
-        Returns:
-            Formatted context string
-        """
-        if not conversation_history:
-            return target_utterance
-        # Take the most recent turns (up to max_history)
-        recent_history = conversation_history[-max_history:] if len(conversation_history) > max_history else conversation_history
-        # Build context string
-        context_parts = []
-        for turn in recent_history:
-            speaker = turn.get("speaker", "")
-            text = turn.get("text", "").strip()
-            if text:
-                if speaker:
-                    context_parts.append(f"{speaker}: {text}")
-                else:
-                    context_parts.append(text)
-        # Add separator before target utterance
-        context_parts.append(f"[TARGET] {target_utterance}")
-        full_context = " ".join(context_parts)
-        # Token-based truncation to fit within model limits
-        return self._truncate_context(full_context, target_utterance)
-    def _truncate_context(self, full_context: str, target_utterance: str) -> str:
-        """
-        Truncate context to fit within token limits while preserving target utterance.
-        """
-        # Tokenize to check length
-        tokens = self.tokenizer.encode(full_context, add_special_tokens=True)
-        if len(tokens) <= self.max_context_tokens:
-            return full_context
-        # If too long, ensure target utterance is fully preserved
-        # and truncate from the beginning of the context
-        target_marker = "[TARGET]"
-        if target_marker in full_context:
-            parts = full_context.split(target_marker)
-            if len(parts) == 2:
-                prefix, target_part = parts
-                target_with_marker = f"{target_marker} {target_part}"
-                # Calculate tokens for target
-                target_tokens = self.tokenizer.encode(target_with_marker, add_special_tokens=False)
-                available_for_prefix = self.max_context_tokens - len(target_tokens) - 10  # Buffer for special tokens
-                if available_for_prefix > 0:
-                    # Truncate prefix from the left (keep most recent context)
-                    prefix_tokens = self.tokenizer.encode(prefix, add_special_tokens=False)
-                    if len(prefix_tokens) > available_for_prefix:
-                        prefix_tokens = prefix_tokens[-available_for_prefix:]
-                        prefix = self.tokenizer.decode(prefix_tokens, skip_special_tokens=True)
-                    return f"{prefix} {target_with_marker}"
-        # Fallback: just return target utterance
-        logger.warning("Context truncation fallback - returning target only")
-        return target_utterance
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Process inference request.
-        Expected input format (NEW):
-        {
-            "inputs": [
-                {
-                    "utterance": "I'm so happy today!",
-                    "emotion": "joy",
-                    "conversation_history": [  # OPTIONAL
-                        {"speaker": "A", "text": "How are you doing?"},
-                        {"speaker": "B", "text": "Pretty good, thanks!"}
-                    ]
-                }
-            ]
-        }
         """
         # Extract inputs
         inputs = data.pop("inputs", data)
@@ -173,40 +82,30 @@ class EndpointHandler:
         # Validate and format inputs for the pipeline
         pipeline_inputs = []
         valid_indices = []
-        contexts = []  # Store contexts for later use in cleaning
         for i, item in enumerate(inputs):
             utterance = item.get("utterance", "").strip()
             emotion = item.get("emotion", "")
-            conversation_history = item.get("conversation_history", [])
-            # Log input details
-            logger.info(f"Turn {i}: utterance='{utterance[:50]}...', emotion={emotion}, history_len={len(conversation_history)}")
-            if conversation_history:
-                logger.info(f"  History: {conversation_history}")
             if not utterance:
                 logger.warning(f"Empty utterance at index {i}")
                 continue
-            # Build context with conversation history
-            context = self._build_context(utterance, conversation_history)
-            logger.info(f"Built context for turn {i}: '{context}'")
             # Format as QA task
             question = self.question_template.format(emotion=emotion)
             pipeline_inputs.append({
                 'question': question,
-                'context': context  # Now includes conversation history
             })
             valid_indices.append(i)
-            contexts.append(context)  # Store for later use
         # Run prediction
         results = []
         if not pipeline_inputs:
             for item in inputs:
                 results.append({
                     "utterance": item.get("utterance", ""),
@@ -217,13 +116,21 @@ class EndpointHandler:
             return results
         try:
             predictions = self.pipe(pipeline_inputs, batch_size=8)
-            if isinstance(predictions, dict):
-                predictions = [predictions]
             elif isinstance(predictions, list) and len(predictions) > 0 and isinstance(predictions[0], dict):
-                if len(pipeline_inputs) == 1:
-                    predictions = [predictions]
             logger.debug(f"Raw predictions: {predictions}")
@@ -241,16 +148,19 @@ class EndpointHandler:
                         "triggers": []
                     })
                 else:
                     current_preds = predictions[pred_idx]
                     if isinstance(current_preds, dict):
                         current_preds = [current_preds]
                     logger.info(
                         "RECCON raw spans (answer, score): %s",
-                        [(p.get("answer"), p.get("score", 0.0)) for p in current_preds[:5]]
                     )
-                    logger.info(f"Total predictions received: {len(current_preds)}")
                     def is_good_span(ans: str) -> bool:
                         if not ans:
@@ -258,30 +168,17 @@ class EndpointHandler:
                         a = ans.strip()
                         if len(a) < 3:
                             return False
                         if all(ch in ".,!?;:-—'\"()[]{}" for ch in a):
                             return False
                         if not any(ch.isalpha() for ch in a):
                             return False
-                        # Filter out speaker labels and prompt artifacts
-                        a_lower = a.lower()
-                        if "patient:" in a_lower or "therapist:" in a_lower or "[target]" in a_lower:
-                            return False
-                        if a_lower in ["patient", "therapist"]:
-                            return False
                         return True
                     raw_answers = [p.get("answer", "") for p in current_preds]
-                    logger.info(f"Raw answers before filtering: {raw_answers}")
                     raw_answers = [a for a in raw_answers if is_good_span(a)]
-                    logger.info(f"Answers after is_good_span filter: {raw_answers}")
-                    # Extract context text (part before [TARGET] marker)
-                    full_context = contexts[pred_idx]
-                    context_without_target = full_context.split("[TARGET]")[0].strip() if "[TARGET]" in full_context else ""
-                    # Clean spans against BOTH target utterance AND context
-                    triggers = self._clean_spans(raw_answers, utterance, context_without_target)
                     results.append({
                         "utterance": utterance,
@@ -302,19 +199,12 @@ class EndpointHandler:
                 "triggers": []
             } for item in inputs]
-    def _clean_spans(self, spans: List[str], target_text: str, context_text: str = "") -> List[str]:
         """
         Clean and filter extracted trigger spans.
-        Spans can come from either target_text or context_text.
         """
-        logger.info(f"_clean_spans called with {len(spans)} spans")
-        logger.info(f"  Target: '{target_text}'")
-        logger.info(f"  Context: '{context_text[:100]}...'" if len(context_text) > 100 else f"  Context: '{context_text}'")
-        logger.info(f"  Input spans: {spans}")
         target_text = target_text or ""
-        context_text = context_text or ""
-        full_text = (context_text + " " + target_text).lower()
         target_lower = target_text.lower()
         def _norm(s: str) -> str:
@@ -323,11 +213,10 @@ class EndpointHandler:
             s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
             return s
-        def _extract_from_text(text: str, phrase_lower: str) -> str:
-            """Extract phrase from text preserving original case."""
-            idx = text.lower().find(phrase_lower)
             if idx >= 0:
-                return text[idx:idx+len(phrase_lower)]
             return phrase_lower
         STOP = {
@@ -346,8 +235,7 @@ class EndpointHandler:
             s_norm = _norm(s)
             if not s_norm:
                 continue
-            # Check if span exists in EITHER target OR context
-            if full_text and s_norm not in full_text:
                 continue
             tokens = s_norm.split()
             if len(tokens) > 8 or len(s_norm) > 80:
@@ -362,35 +250,14 @@ class EndpointHandler:
             })
         candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
-        logger.info(f"Built {len(candidates)} candidates: {[c['norm'] for c in candidates]}")
         kept_norms = []
         for c in list(candidates):
             n = c["norm"]
             if any(n in kn or kn in n for kn in kept_norms):
                 continue
             kept_norms.append(n)
-        logger.info(f"After dedup: {kept_norms}")
-        # Extract spans from either target or context (whichever contains them)
-        cleaned = []
-        for n in kept_norms:
-            # Try target first, then context
-            if n in target_lower:
-                extracted = _extract_from_text(target_text, n)
-                logger.info(f"  Extracted '{extracted}' from TARGET")
-                cleaned.append(extracted)
-            elif n in context_text.lower():
-                extracted = _extract_from_text(context_text, n)
-                logger.info(f"  Extracted '{extracted}' from CONTEXT")
-                cleaned.append(extracted)
-            else:
-                # Fallback - shouldn't happen given earlier validation
-                logger.warning(f"  Phrase '{n}' not found in target or context, using normalized")
-                cleaned.append(n)
-        logger.info(f"Final cleaned spans: {cleaned}")
         if not cleaned and spans:
             tt_tokens = target_lower.split()
@@ -410,6 +277,6 @@ class EndpointHandler:
                     if best:
                         break
             if best:
-                return [_extract_from_text(target_text, best)]
         return cleaned[:3]

         if not cuda_available:
             logger.warning("GPU not detected. Running on CPU. Inference will be slower.")
+        # In 'pipeline', device is an integer (-1 for CPU, 0+ for GPU)
         self.device_id = 0 if cuda_available else -1
         # Determine model path
         logger.info(f"Loading model from {model_path}...")
         try:
+            # Load tokenizer and model explicitly to ensure correct loading
             tokenizer = AutoTokenizer.from_pretrained(model_path)
             model, loading_info = AutoModelForQuestionAnswering.from_pretrained(
                 model_path,
             logger.warning("Loaded model class: %s", model.__class__.__name__)
             logger.warning("Loaded model name_or_path: %s", getattr(model.config, "_name_or_path", None))
+            # Initialize the pipeline
+            # top_k=20 matches your previous 'n_best_size=20' logic
             self.pipe = pipeline(
                 "question-answering",
                 model=model,
                 top_k=20,
                 handle_impossible_answer=False
             )
             logger.info("Model loaded successfully.")
         except Exception as e:
             logger.error(f"Failed to load model: {e}")
             "Extract the exact short phrase (<= 8 words) from the target "
             "utterance that most strongly signals the emotion {emotion}. "
             "Return only a substring of the target utterance."
+        )
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Process inference request.
         """
         # Extract inputs
         inputs = data.pop("inputs", data)
         # Validate and format inputs for the pipeline
         pipeline_inputs = []
         valid_indices = []
         for i, item in enumerate(inputs):
             utterance = item.get("utterance", "").strip()
             emotion = item.get("emotion", "")
             if not utterance:
                 logger.warning(f"Empty utterance at index {i}")
                 continue
             # Format as QA task
             question = self.question_template.format(emotion=emotion)
+            # The pipeline expects a list of dicts with 'question' and 'context'
             pipeline_inputs.append({
                 'question': question,
+                'context': utterance
             })
             valid_indices.append(i)
         # Run prediction
         results = []
         if not pipeline_inputs:
+            # All inputs were invalid
             for item in inputs:
                 results.append({
                     "utterance": item.get("utterance", ""),
             return results
         try:
+            # Run inference (batch_size helps with multiple inputs)
             predictions = self.pipe(pipeline_inputs, batch_size=8)
+            # If batch_size=1 or single input, pipeline might return a single list/dict
+            # We ensure it's a list of lists (since top_k > 1)
+            if isinstance(predictions, dict): # Single input result
+                predictions = [predictions] # Wrap in list
             elif isinstance(predictions, list) and len(predictions) > 0 and isinstance(predictions[0], dict):
+                 # This happens if we have multiple inputs but top_k=1 (which is not the case here),
+                 # OR if we have a single input and top_k > 1.
+                 # If we have multiple inputs and top_k > 1, it returns a list of lists.
+                 if len(pipeline_inputs) == 1:
+                     predictions = [predictions]
+                 # If multiple inputs and list of dicts, that implies top_k=1.
+                 # But we set top_k=20. So it should be list of lists.
             logger.debug(f"Raw predictions: {predictions}")
                         "triggers": []
                     })
                 else:
+                    # Get prediction for this item
+                    # Because top_k=20, 'current_preds' is a list of dicts: [{'answer': '...', 'score': ...}, ...]
                     current_preds = predictions[pred_idx]
+                    # Ensure it is a list
                     if isinstance(current_preds, dict):
                         current_preds = [current_preds]
                     logger.info(
                         "RECCON raw spans (answer, score): %s",
+                        [(p.get("answer"), p.get("score", 0.0), 3) for p in current_preds[:5]]
                     )
                     def is_good_span(ans: str) -> bool:
                         if not ans:
                         a = ans.strip()
                         if len(a) < 3:
                             return False
+                        # reject pure punctuation
                         if all(ch in ".,!?;:-—'\"()[]{}" for ch in a):
                             return False
+                        # require at least one letter
                         if not any(ch.isalpha() for ch in a):
                             return False
                         return True
                     raw_answers = [p.get("answer", "") for p in current_preds]
                     raw_answers = [a for a in raw_answers if is_good_span(a)]
+                    triggers = self._clean_spans(raw_answers, utterance)
                     results.append({
                         "utterance": utterance,
                 "triggers": []
             } for item in inputs]
+    def _clean_spans(self, spans: List[str], target_text: str) -> List[str]:
         """
         Clean and filter extracted trigger spans.
+        (Logic preserved exactly as provided)
         """
         target_text = target_text or ""
         target_lower = target_text.lower()
         def _norm(s: str) -> str:
             s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
             return s
+        def _extract_from_target(target: str, phrase_lower: str) -> str:
+            idx = target.lower().find(phrase_lower)
             if idx >= 0:
+                return target[idx:idx+len(phrase_lower)]
             return phrase_lower
         STOP = {
             s_norm = _norm(s)
             if not s_norm:
                 continue
+            if target_text and s_norm not in target_lower:
                 continue
             tokens = s_norm.split()
             if len(tokens) > 8 or len(s_norm) > 80:
             })
         candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
         kept_norms = []
         for c in list(candidates):
             n = c["norm"]
             if any(n in kn or kn in n for kn in kept_norms):
                 continue
             kept_norms.append(n)
+        cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
         if not cleaned and spans:
             tt_tokens = target_lower.split()
                     if best:
                         break
             if best:
+                return [_extract_from_target(target_text, best)]
         return cleaned[:3]