Spaces:

se4ai2526-uniba-nygaard
/

NygaardCodeComment-backend

Runtime error

App Files Files Community

Fonty02 commited on Dec 15, 2025

Commit

7bdb4b9

verified ·

1 Parent(s): c2d3410

Update nygaardcodecommentclassification/api/controllers.py

Browse files

Files changed (1) hide show

nygaardcodecommentclassification/api/controllers.py +34 -41

nygaardcodecommentclassification/api/controllers.py CHANGED Viewed

@@ -12,7 +12,6 @@ The controller is designed to be thread-safe for concurrent access.
 """
 import logging
-from pathlib import Path
 from typing import Any, Dict, List
 import numpy as np
@@ -36,48 +35,42 @@ class PredictionController:
     Attributes:
         registry: ModelRegistry instance for model storage
         predictor: ModelPredictor instance for inference
-        models_dir: Path to the directory containing trained models
     Example:
         ```python
-        controller = PredictionController(Path("./models"))
-        controller.startup()  # Load models
         results = controller.predict(
             texts=["# Calculate sum"],
             language="python",
             model_type="catboost"
         )
-        # results: [{"text": "# Calculate sum", "labels": ["summary"]}]
         controller.shutdown()  # Release resources
         ```
     """
-    def __init__(self, models_dir: Path) -> None:
-        """Initialize the prediction controller.
-        Args:
-            models_dir: Path to directory containing trained ML models.
-                        Expected structure: models_dir/{language}/{feature_type}/
-        """
         self.registry = ModelRegistry()
         self.predictor = ModelPredictor(self.registry)
-        self.models_dir = models_dir
     def startup(self) -> None:
-        """Load all ML models into memory.
         This method should be called during application startup.
-        It scans the models directory and loads all available models
-        into the registry for fast inference.
         Note:
             This operation may take several seconds depending on
             the number and size of models.
         """
-        logger.info("Loading models from %s...", self.models_dir)
-        self.registry.load_all_models(self.models_dir)
         logger.info("Models loaded successfully")
     def shutdown(self) -> None:
@@ -103,10 +96,7 @@ class PredictionController:
         return info
     def predict(
-        self,
-        texts: List[str],
-        language: str,
-        model_type: str
     ) -> List[Dict[str, Any]]:
         """Execute multi-label classification on code comments.
@@ -114,13 +104,15 @@ class PredictionController:
         the results with human-readable labels.
         Args:
-            texts: List of code comment strings to classify
             language: Programming language context ("java", "python", "pharo")
             model_type: Type of model to use ("catboost")
         Returns:
             List of dicts with classification results. Each dict contains:
             - "text": The original input text
             - "labels": List of predicted category labels (strings)
         Raises:
@@ -131,33 +123,37 @@ class PredictionController:
             ```python
             results = controller.predict(
                 texts=["This calculates fibonacci", "TODO: optimize"],
                 language="python",
                 model_type="catboost"
             )
             # Returns:
             # [
-            #     {"text": "This calculates fibonacci", "labels": ["summary"]},
-            #     {"text": "TODO: optimize", "labels": ["expand"]}
             # ]
             ```
         """
         # --- Request Validation ---
         if language not in config.LANGUAGES:
-            raise ValueError(
-                f"Language '{language}' not supported. "
-                f"Available: {config.LANGUAGES}"
-            )
         available_types = ["catboost"]  # Currently only CatBoost is supported
         if model_type not in available_types:
             raise ValueError(
-                f"Model '{model_type}' unavailable for {language}. "
-                f"Available: {available_types}"
             )
         # --- Model Inference ---
         try:
-            y_pred = self.predictor.predict(texts, language, model_type)
         except Exception as e:
             logger.error("Prediction failed for %s/%s: %s", language, model_type, e)
             raise RuntimeError(f"Internal model error: {e}") from e
@@ -167,13 +163,11 @@ class PredictionController:
         try:
             labels_map = config.LABELS[language]
         except KeyError as e:
-            raise RuntimeError(
-                f"Configuration error: Labels map missing for {language}"
-            ) from e
         # Convert numeric predictions to human-readable labels
         results: List[Dict[str, Any]] = []
-        for i, text_input in enumerate(texts):
             row_pred = y_pred[i]  # Binary array (1 = label present, 0 = absent)
             # Find indices where prediction is 1 (positive class)
@@ -182,9 +176,8 @@ class PredictionController:
             # Map indices to label strings
             predicted_labels = [labels_map[idx] for idx in predicted_indices]
-            results.append({
-                "text": text_input,
-                "labels": predicted_labels
-            })
-        return results

 """
 import logging
 from typing import Any, Dict, List
 import numpy as np
     Attributes:
         registry: ModelRegistry instance for model storage
         predictor: ModelPredictor instance for inference
     Example:
         ```python
+        controller = PredictionController()
+        controller.startup()  # Load models from MLflow
         results = controller.predict(
             texts=["# Calculate sum"],
+            class_names=["Utils"],
             language="python",
             model_type="catboost"
         )
+        # results: [{"text": "# Calculate sum", "class_name": "Utils", "labels": ["summary"]}]
         controller.shutdown()  # Release resources
         ```
     """
+    def __init__(self) -> None:
+        """Initialize the prediction controller."""
         self.registry = ModelRegistry()
         self.predictor = ModelPredictor(self.registry)
     def startup(self) -> None:
+        """Load all ML models into memory from MLflow.
         This method should be called during application startup.
+        It connects to the MLflow tracking server and loads all available
+        models into the registry for fast inference.
         Note:
             This operation may take several seconds depending on
             the number and size of models.
         """
+        logger.info("Loading models from MLflow...")
+        self.registry.load_all_models()
         logger.info("Models loaded successfully")
     def shutdown(self) -> None:
         return info
     def predict(
+        self, texts: List[str], class_names: List[str], language: str, model_type: str
     ) -> List[Dict[str, Any]]:
         """Execute multi-label classification on code comments.
         the results with human-readable labels.
         Args:
+            texts: List of code comment strings
+            class_names: List of class names corresponding to each comment
             language: Programming language context ("java", "python", "pharo")
             model_type: Type of model to use ("catboost")
         Returns:
             List of dicts with classification results. Each dict contains:
             - "text": The original input text
+            - "class_name": The class name corresponding to the input text
             - "labels": List of predicted category labels (strings)
         Raises:
             ```python
             results = controller.predict(
                 texts=["This calculates fibonacci", "TODO: optimize"],
+                class_names=["MathUtils", "Calculator"],
                 language="python",
                 model_type="catboost"
             )
             # Returns:
             # [
+            #     {"text": "This calculates fibonacci", "class_name": "MathUtils", "labels": ["summary"]},
+            #     {"text": "TODO: optimize", "class_name": "Calculator", "labels": ["expand"]}
             # ]
             ```
         """
         # --- Request Validation ---
         if language not in config.LANGUAGES:
+            raise ValueError(f"Language '{language}' not supported. Available: {config.LANGUAGES}")
+        if len(texts) != len(class_names):
+            raise ValueError(f"Mismatch: {len(texts)} texts but {len(class_names)} class names")
         available_types = ["catboost"]  # Currently only CatBoost is supported
         if model_type not in available_types:
             raise ValueError(
+                f"Model '{model_type}' unavailable for {language}. Available: {available_types}"
             )
+        # --- Combine texts with class names ---
+        # Format: "comment | class_name" for the model
+        combined_texts = [f"{text} | {class_name}" for text, class_name in zip(texts, class_names)]
         # --- Model Inference ---
         try:
+            y_pred = self.predictor.predict(combined_texts, language, model_type)
         except Exception as e:
             logger.error("Prediction failed for %s/%s: %s", language, model_type, e)
             raise RuntimeError(f"Internal model error: {e}") from e
         try:
             labels_map = config.LABELS[language]
         except KeyError as e:
+            raise RuntimeError(f"Configuration error: Labels map missing for {language}") from e
         # Convert numeric predictions to human-readable labels
         results: List[Dict[str, Any]] = []
+        for i, (text_input, class_name) in enumerate(zip(texts, class_names)):
             row_pred = y_pred[i]  # Binary array (1 = label present, 0 = absent)
             # Find indices where prediction is 1 (positive class)
             # Map indices to label strings
             predicted_labels = [labels_map[idx] for idx in predicted_indices]
+            results.append(
+                {"text": text_input, "class_name": class_name, "labels": predicted_labels}
+            )
+        return results