Khriis
/

RECCON

@@ -2,18 +2,16 @@ import torch
 import logging
 import re
 from typing import Dict, List, Any
-from simpletransformers.question_answering import QuestionAnsweringModel
-# Configure logging (no file I/O for serverless environment)
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EndpointHandler:
     def __init__(self, path=""):
         """
-        Initialize the RECCON emotional trigger extraction model.
         Args:
             path: Path to model directory (provided by HuggingFace Inference Endpoints)
         """
@@ -23,33 +21,28 @@ class EndpointHandler:
         cuda_available = torch.cuda.is_available()
         if not cuda_available:
             logger.warning("GPU not detected. Running on CPU. Inference will be slower.")
-        self.device = torch.device("cuda" if cuda_available else "cpu")
-        cuda_device = 0 if cuda_available else -1
         # Determine model path
-        if not path or path == ".":
-            model_path = "."
-        else:
-            model_path = path
         logger.info(f"Loading model from {model_path}...")
-        # Load the QuestionAnsweringModel using simpletransformers
         try:
-            self.model = QuestionAnsweringModel(
-                "roberta",
-                model_path,
-                args={
-                    "silent_tf_logger": True,
-                    "eval_batch_size": 8,
-                    "device_map": None,
-                    "max_seq_length": 512,
-                    "max_answer_length": 200,
-                    "n_best_size": 20,
-                    "doc_stride": 512
-                },
-                use_cuda=cuda_available,
-                cuda_device=cuda_device
             )
             logger.info("Model loaded successfully.")
         except Exception as e:
@@ -66,39 +59,19 @@ class EndpointHandler:
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Process inference request.
-        Args:
-            data: Request data with structure:
-                {
-                    "inputs": [
-                        {"utterance": "text", "emotion": "happiness"},
-                        ...
-                    ]
-                }
-        Returns:
-            List of results:
-                [
-                    {
-                        "utterance": "text",
-                        "emotion": "happiness",
-                        "triggers": ["trigger phrase 1", "trigger phrase 2"]
-                    },
-                    ...
-                ]
         """
         # Extract inputs
         inputs = data.pop("inputs", data)
-        # Normalize to list format (handle single dict)
         if isinstance(inputs, dict):
             inputs = [inputs]
         if not inputs:
             return [{"error": "No inputs provided", "triggers": []}]
-        # Validate and format inputs
-        qa_inputs = []
         valid_indices = []
         for i, item in enumerate(inputs):
@@ -111,19 +84,18 @@ class EndpointHandler:
             # Format as QA task
             question = self.question_template.format(emotion=emotion)
-            qa_inputs.append({
-                'context': utterance,
-                'qas': [{
-                    'id': f'temp_id_{i}',
-                    'question': question
-                }]
             })
             valid_indices.append(i)
         # Run prediction
         results = []
-        if not qa_inputs:
             # All inputs were invalid
             for item in inputs:
                 results.append({
@@ -135,17 +107,31 @@ class EndpointHandler:
             return results
         try:
-            predictions, _ = self.model.predict(qa_inputs)
             logger.debug(f"Raw predictions: {predictions}")
             # Post-process results
-            result_idx = 0
             for i, item in enumerate(inputs):
                 utterance = item.get("utterance", "").strip()
                 emotion = item.get("emotion", "")
                 if i not in valid_indices:
-                    # Invalid input
                     results.append({
                         "utterance": utterance,
                         "emotion": emotion,
@@ -153,32 +139,32 @@ class EndpointHandler:
                         "triggers": []
                     })
                 else:
-                    # Valid input - process prediction
-                    prediction = predictions[result_idx]
-                    answer = prediction.get('answer')
-                    # Extract and clean spans
-                    if isinstance(answer, list) and len(answer) > 0:
-                        non_empty_answers = [a for a in answer if a]
-                        triggers = self._clean_spans(non_empty_answers, utterance)
-                    elif isinstance(answer, str):
-                        triggers = self._clean_spans([answer], utterance)
-                    else:
-                        triggers = []
                     results.append({
                         "utterance": utterance,
                         "emotion": emotion,
                         "triggers": triggers
                     })
-                    result_idx += 1
             logger.debug(f"Cleaned results: {results}")
             return results
         except Exception as e:
             logger.error(f"Model prediction failed: {e}")
-            # Return error for all inputs
             return [{
                 "utterance": item.get("utterance", ""),
                 "emotion": item.get("emotion", ""),
@@ -189,36 +175,23 @@ class EndpointHandler:
     def _clean_spans(self, spans: List[str], target_text: str) -> List[str]:
         """
         Clean and filter extracted trigger spans.
-        This function preserves all the post-processing logic from predict_trigger.py
-        (lines 78-153) including stopword filtering, length constraints, deduplication,
-        and n-gram fallback.
-        Args:
-            spans: Raw spans extracted by the model
-            target_text: Original utterance text
-        Returns:
-            List of up to 3 cleaned trigger phrases
         """
         target_text = target_text or ""
         target_lower = target_text.lower()
         def _norm(s: str) -> str:
-            """Normalize a string: strip, lowercase, remove extra spaces and punctuation."""
             s = (s or "").strip().lower()
             s = re.sub(r"\s+", " ", s)
             s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
             return s
         def _extract_from_target(target: str, phrase_lower: str) -> str:
-            """Extract phrase from target with original casing."""
             idx = target.lower().find(phrase_lower)
             if idx >= 0:
                 return target[idx:idx+len(phrase_lower)]
             return phrase_lower
-        # Stopwords to filter out
         STOP = {
             "a", "an", "the", "and", "or", "but", "so", "to", "of", "in", "on", "at",
             "with", "for", "from", "is", "am", "are", "was", "were", "be", "been",
@@ -227,7 +200,6 @@ class EndpointHandler:
             "those"
         }
-        # Collect candidate spans that are substrings of target and reasonable length
         candidates = []
         for s in spans:
             s = (s or "").strip()
@@ -250,7 +222,6 @@ class EndpointHandler:
                 "char_len": len(s_norm)
             })
-        # Prefer longer phrases; remove subsumed/duplicate fragments
         candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
         kept_norms = []
         for c in list(candidates):
@@ -262,8 +233,6 @@ class EndpointHandler:
         cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
         if not cleaned and spans:
-            # Fallback: try to salvage a sub-span that actually exists
-            # in the target utterance by scanning n-grams up to 8 words
             tt_tokens = target_lower.split()
             best = None
             for s in spans:
@@ -271,7 +240,6 @@ class EndpointHandler:
                 for L in range(min(8, len(words)), 0, -1):
                     for i in range(len(words) - L + 1):
                         phrase = words[i:i+L]
-                        # contiguous n-gram match on token boundaries
                         for j in range(len(tt_tokens) - L + 1):
                             if tt_tokens[j:j+L] == phrase:
                                 cand = " ".join(phrase)
@@ -284,4 +252,4 @@ class EndpointHandler:
             if best:
                 return [_extract_from_target(target_text, best)]
-        return cleaned[:3]

 import logging
 import re
 from typing import Dict, List, Any
+from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer
+# Configure logging
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 class EndpointHandler:
     def __init__(self, path=""):
         """
+        Initialize the RECCON emotional trigger extraction model using native transformers.
         Args:
             path: Path to model directory (provided by HuggingFace Inference Endpoints)
         """
         cuda_available = torch.cuda.is_available()
         if not cuda_available:
             logger.warning("GPU not detected. Running on CPU. Inference will be slower.")
+        # In 'pipeline', device is an integer (-1 for CPU, 0+ for GPU)
+        self.device_id = 0 if cuda_available else -1
         # Determine model path
+        model_path = path if path and path != "." else "."
         logger.info(f"Loading model from {model_path}...")
         try:
+            # Load tokenizer and model explicitly to ensure correct loading
+            tokenizer = AutoTokenizer.from_pretrained(model_path)
+            model = AutoModelForQuestionAnswering.from_pretrained(model_path)
+            # Initialize the pipeline
+            # top_k=20 matches your previous 'n_best_size=20' logic
+            self.pipe = pipeline(
+                "question-answering",
+                model=model,
+                tokenizer=tokenizer,
+                device=self.device_id,
+                top_k=20,
+                handle_impossible_answer=False
             )
             logger.info("Model loaded successfully.")
         except Exception as e:
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Process inference request.
         """
         # Extract inputs
         inputs = data.pop("inputs", data)
+        # Normalize to list format
         if isinstance(inputs, dict):
             inputs = [inputs]
         if not inputs:
             return [{"error": "No inputs provided", "triggers": []}]
+        # Validate and format inputs for the pipeline
+        pipeline_inputs = []
         valid_indices = []
         for i, item in enumerate(inputs):
             # Format as QA task
             question = self.question_template.format(emotion=emotion)
+            # The pipeline expects a list of dicts with 'question' and 'context'
+            pipeline_inputs.append({
+                'question': question,
+                'context': utterance
             })
             valid_indices.append(i)
         # Run prediction
         results = []
+        if not pipeline_inputs:
             # All inputs were invalid
             for item in inputs:
                 results.append({
             return results
         try:
+            # Run inference (batch_size helps with multiple inputs)
+            predictions = self.pipe(pipeline_inputs, batch_size=8)
+            # If batch_size=1 or single input, pipeline might return a single list/dict
+            # We ensure it's a list of lists (since top_k > 1)
+            if isinstance(predictions, dict): # Single input result
+                predictions = [predictions] # Wrap in list
+            elif isinstance(predictions, list) and len(predictions) > 0 and isinstance(predictions[0], dict):
+                 # This happens if we have multiple inputs but top_k=1 (which is not the case here),
+                 # OR if we have a single input and top_k > 1.
+                 # If we have multiple inputs and top_k > 1, it returns a list of lists.
+                 if len(pipeline_inputs) == 1:
+                     predictions = [predictions]
+                 # If multiple inputs and list of dicts, that implies top_k=1.
+                 # But we set top_k=20. So it should be list of lists.
             logger.debug(f"Raw predictions: {predictions}")
             # Post-process results
+            pred_idx = 0
             for i, item in enumerate(inputs):
                 utterance = item.get("utterance", "").strip()
                 emotion = item.get("emotion", "")
                 if i not in valid_indices:
                     results.append({
                         "utterance": utterance,
                         "emotion": emotion,
                         "triggers": []
                     })
                 else:
+                    # Get prediction for this item
+                    # Because top_k=20, 'current_preds' is a list of dicts: [{'answer': '...', 'score': ...}, ...]
+                    current_preds = predictions[pred_idx]
+                    # Ensure it is a list
+                    if isinstance(current_preds, dict):
+                        current_preds = [current_preds]
+                    # Extract the answer strings
+                    raw_answers = [p.get('answer', '') for p in current_preds]
+                    # Clean spans using your original logic
+                    triggers = self._clean_spans(raw_answers, utterance)
                     results.append({
                         "utterance": utterance,
                         "emotion": emotion,
                         "triggers": triggers
                     })
+                    pred_idx += 1
             logger.debug(f"Cleaned results: {results}")
             return results
         except Exception as e:
             logger.error(f"Model prediction failed: {e}")
             return [{
                 "utterance": item.get("utterance", ""),
                 "emotion": item.get("emotion", ""),
     def _clean_spans(self, spans: List[str], target_text: str) -> List[str]:
         """
         Clean and filter extracted trigger spans.
+        (Logic preserved exactly as provided)
         """
         target_text = target_text or ""
         target_lower = target_text.lower()
         def _norm(s: str) -> str:
             s = (s or "").strip().lower()
             s = re.sub(r"\s+", " ", s)
             s = re.sub(r"^[^\w]+|[^\w]+$", "", s)
             return s
         def _extract_from_target(target: str, phrase_lower: str) -> str:
             idx = target.lower().find(phrase_lower)
             if idx >= 0:
                 return target[idx:idx+len(phrase_lower)]
             return phrase_lower
         STOP = {
             "a", "an", "the", "and", "or", "but", "so", "to", "of", "in", "on", "at",
             "with", "for", "from", "is", "am", "are", "was", "were", "be", "been",
             "those"
         }
         candidates = []
         for s in spans:
             s = (s or "").strip()
                 "char_len": len(s_norm)
             })
         candidates.sort(key=lambda x: (x["tok_len"], x["char_len"]), reverse=True)
         kept_norms = []
         for c in list(candidates):
         cleaned = [_extract_from_target(target_text, n) for n in kept_norms]
         if not cleaned and spans:
             tt_tokens = target_lower.split()
             best = None
             for s in spans:
                 for L in range(min(8, len(words)), 0, -1):
                     for i in range(len(words) - L + 1):
                         phrase = words[i:i+L]
                         for j in range(len(tt_tokens) - L + 1):
                             if tt_tokens[j:j+L] == phrase:
                                 cand = " ".join(phrase)
             if best:
                 return [_extract_from_target(target_text, best)]
+        return cleaned[:3]