| | """Controller Layer - Business logic for prediction operations. |
| | |
| | This module implements the business logic layer following the MVC pattern. |
| | It acts as an intermediary between the API endpoints (views) and the |
| | ML models (models layer), handling: |
| | - Model lifecycle management (loading/unloading) |
| | - Request validation and preprocessing |
| | - Response formatting and label mapping |
| | - Error handling and logging |
| | |
| | The controller is designed to be thread-safe for concurrent access. |
| | """ |
| |
|
| | import logging |
| | from typing import Any, Dict, List |
| |
|
| | import numpy as np |
| |
|
| | from nygaardcodecommentclassification import config |
| | from nygaardcodecommentclassification.api.models import ModelPredictor, ModelRegistry |
| |
|
| | |
| | logger = logging.getLogger("controllers") |
| |
|
| |
|
| | class PredictionController: |
| | """Manages prediction logic, model lifecycle, and response formatting. |
| | |
| | This controller orchestrates the ML prediction pipeline, including: |
| | - Loading and managing ML models via ModelRegistry |
| | - Validating prediction requests against supported languages/models |
| | - Executing predictions through ModelPredictor |
| | - Mapping numeric predictions to human-readable labels |
| | |
| | Attributes: |
| | registry: ModelRegistry instance for model storage |
| | predictor: ModelPredictor instance for inference |
| | |
| | Example: |
| | ```python |
| | controller = PredictionController() |
| | controller.startup() # Load models from MLflow |
| | |
| | results = controller.predict( |
| | texts=["# Calculate sum"], |
| | language="python", |
| | model_type="catboost" |
| | ) |
| | # results: [{"text": "# Calculate sum", "labels": ["summary"]}] |
| | |
| | controller.shutdown() # Release resources |
| | ``` |
| | """ |
| |
|
| | def __init__(self) -> None: |
| | """Initialize the prediction controller.""" |
| | self.registry = ModelRegistry() |
| | self.predictor = ModelPredictor(self.registry) |
| |
|
| | def startup(self) -> None: |
| | """Load all ML models into memory from MLflow. |
| | |
| | This method should be called during application startup. |
| | It connects to the MLflow tracking server and loads all available |
| | models into the registry for fast inference. |
| | |
| | Note: |
| | This operation may take several seconds depending on |
| | the number and size of models. |
| | """ |
| | logger.info("Loading models from MLflow...") |
| | self.registry.load_all_models() |
| | logger.info("Models loaded successfully") |
| |
|
| | def shutdown(self) -> None: |
| | """Release all model resources. |
| | |
| | Clears the model registry and frees GPU memory if applicable. |
| | This should be called during application shutdown. |
| | """ |
| | self.registry.clear() |
| | logger.info("Models cleared and resources released") |
| |
|
| | def get_models_info(self) -> Dict[str, List[str]]: |
| | """Return available models grouped by programming language. |
| | |
| | Returns: |
| | Dict mapping language codes to lists of available model types. |
| | Example: {"java": ["catboost"], "python": ["catboost"], "pharo": ["catboost"]} |
| | """ |
| | info: Dict[str, List[str]] = {} |
| | for lang in config.LANGUAGES: |
| | |
| | info[lang] = ["catboost"] |
| | return info |
| |
|
| | def predict(self, texts: List[str], language: str, model_type: str) -> List[Dict[str, Any]]: |
| | """Execute multi-label classification on code comments. |
| | |
| | This method validates the request, runs ML inference, and formats |
| | the results with human-readable labels. |
| | |
| | Args: |
| | texts: List of code comment strings to classify |
| | language: Programming language context ("java", "python", "pharo") |
| | model_type: Type of model to use ("catboost") |
| | |
| | Returns: |
| | List of dicts with classification results. Each dict contains: |
| | - "text": The original input text |
| | - "labels": List of predicted category labels (strings) |
| | |
| | Raises: |
| | ValueError: If language is not supported or model type unavailable |
| | RuntimeError: If prediction fails or labels configuration is missing |
| | """ |
| | |
| | |
| | |
| | |
| | supported_languages_map = {l.lower(): l for l in config.LANGUAGES} |
| | input_lang_lower = language.lower() |
| |
|
| | if input_lang_lower not in supported_languages_map: |
| | raise ValueError(f"Language '{language}' not supported. Available: {config.LANGUAGES}") |
| | |
| | |
| | canonical_language = supported_languages_map[input_lang_lower] |
| |
|
| | available_types = ["catboost"] |
| | if model_type not in available_types: |
| | raise ValueError( |
| | f"Model '{model_type}' unavailable for {language}. Available: {available_types}" |
| | ) |
| |
|
| | |
| | try: |
| | |
| | y_pred = self.predictor.predict(texts, canonical_language, model_type) |
| | except Exception as e: |
| | logger.error("Prediction failed for %s/%s: %s", canonical_language, model_type, e) |
| | |
| | try: |
| | available_keys = list(self.registry._registry.keys()) |
| | logger.error("Debug - Registry keys available: %s", available_keys) |
| | except: |
| | pass |
| | raise RuntimeError(f"Internal model error: {e}") from e |
| |
|
| | |
| | |
| | try: |
| | labels_map = config.LABELS[canonical_language] |
| | except KeyError as e: |
| | raise RuntimeError(f"Configuration error: Labels map missing for {canonical_language}") from e |
| |
|
| | |
| | results: List[Dict[str, Any]] = [] |
| | |
| | |
| | |
| | if y_pred.ndim == 1: |
| | y_pred = y_pred.reshape(1, -1) |
| |
|
| | for i, text_input in enumerate(texts): |
| | row_pred = y_pred[i] |
| | |
| | predicted_indices = np.where(row_pred == 1)[0] |
| | |
| | predicted_labels = [labels_map[idx] for idx in predicted_indices] |
| | results.append({"text": text_input, "labels": predicted_labels}) |
| |
|
| | return results |
| |
|
| |
|