Spaces:

se4ai2526-uniba-nygaard
/

NygaardCodeComment-backend

Runtime error

App Files Files Community

Update nygaardcodecommentclassification/api/controllers.py

by Fonty02 - opened Dec 11, 2025

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+32

-26

Files changed (1) hide show

nygaardcodecommentclassification/api/controllers.py +32 -26

nygaardcodecommentclassification/api/controllers.py CHANGED Viewed

@@ -113,24 +113,19 @@ class PredictionController:
         Raises:
             ValueError: If language is not supported or model type unavailable
             RuntimeError: If prediction fails or labels configuration is missing
-        Example:
-            ```python
-            results = controller.predict(
-                texts=["This calculates fibonacci", "TODO: optimize"],
-                language="python",
-                model_type="catboost"
-            )
-            # Returns:
-            # [
-            #     {"text": "This calculates fibonacci", "labels": ["summary"]},
-            #     {"text": "TODO: optimize", "labels": ["expand"]}
-            # ]
-            ```
         """
-        # --- Request Validation ---
-        if language not in config.LANGUAGES:
             raise ValueError(f"Language '{language}' not supported. Available: {config.LANGUAGES}")
         available_types = ["catboost"]  # Currently only CatBoost is supported
         if model_type not in available_types:
@@ -138,31 +133,42 @@ class PredictionController:
                 f"Model '{model_type}' unavailable for {language}. Available: {available_types}"
             )
-        # --- Model Inference ---
         try:
-            y_pred = self.predictor.predict(texts, language, model_type)
         except Exception as e:
-            logger.error("Prediction failed for %s/%s: %s", language, model_type, e)
             raise RuntimeError(f"Internal model error: {e}") from e
-        # --- Result Formatting ---
-        # Get the label mapping for this language
         try:
-            labels_map = config.LABELS[language]
         except KeyError as e:
-            raise RuntimeError(f"Configuration error: Labels map missing for {language}") from e
         # Convert numeric predictions to human-readable labels
         results: List[Dict[str, Any]] = []
         for i, text_input in enumerate(texts):
             row_pred = y_pred[i]  # Binary array (1 = label present, 0 = absent)
             # Find indices where prediction is 1 (positive class)
             predicted_indices = np.where(row_pred == 1)[0]
             # Map indices to label strings
             predicted_labels = [labels_map[idx] for idx in predicted_indices]
             results.append({"text": text_input, "labels": predicted_labels})
         return results

         Raises:
             ValueError: If language is not supported or model type unavailable
             RuntimeError: If prediction fails or labels configuration is missing
         """
+        # --- 1. Robust Request Validation (Case-Insensitive) ---
+        # Crea una mappa { "python": "Python", "java": "Java" } basata sul config
+        # Questo permette di trovare la chiave corretta anche se l'input è minuscolo
+        supported_languages_map = {l.lower(): l for l in config.LANGUAGES}
+        input_lang_lower = language.lower()
+        if input_lang_lower not in supported_languages_map:
             raise ValueError(f"Language '{language}' not supported. Available: {config.LANGUAGES}")
+        # Recupera la stringa esatta usata nel config e nel registry (es. "Python" o "python")
+        canonical_language = supported_languages_map[input_lang_lower]
         available_types = ["catboost"]  # Currently only CatBoost is supported
         if model_type not in available_types:
                 f"Model '{model_type}' unavailable for {language}. Available: {available_types}"
             )
+        # --- 2. Model Inference ---
         try:
+            # Usiamo canonical_language per essere sicuri di matchare la chiave nel Registry
+            y_pred = self.predictor.predict(texts, canonical_language, model_type)
         except Exception as e:
+            logger.error("Prediction failed for %s/%s: %s", canonical_language, model_type, e)
+            # Loggo anche le chiavi disponibili nel registry per debug
+            try:
+                available_keys = list(self.registry._registry.keys())
+                logger.error("Debug - Registry keys available: %s", available_keys)
+            except:
+                pass
             raise RuntimeError(f"Internal model error: {e}") from e
+        # --- 3. Result Formatting ---
+        # Get the label mapping using the canonical language key
         try:
+            labels_map = config.LABELS[canonical_language]
         except KeyError as e:
+            raise RuntimeError(f"Configuration error: Labels map missing for {canonical_language}") from e
         # Convert numeric predictions to human-readable labels
         results: List[Dict[str, Any]] = []
+        # Se c'è solo un testo, predict potrebbe ritornare un array 1D invece di 2D.
+        # Assicuriamoci che y_pred sia sempre 2D (n_samples, n_labels)
+        if y_pred.ndim == 1:
+            y_pred = y_pred.reshape(1, -1)
         for i, text_input in enumerate(texts):
             row_pred = y_pred[i]  # Binary array (1 = label present, 0 = absent)
             # Find indices where prediction is 1 (positive class)
             predicted_indices = np.where(row_pred == 1)[0]
             # Map indices to label strings
             predicted_labels = [labels_map[idx] for idx in predicted_indices]
             results.append({"text": text_input, "labels": predicted_labels})
         return results