File size: 4,590 Bytes
713632e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b33ef
713632e
 
 
 
 
 
 
 
 
13b33ef
713632e
 
 
 
 
 
 
 
 
 
13b33ef
713632e
13b33ef
c2d3410
13b33ef
 
 
 
 
 
713632e
 
13b33ef
713632e
 
 
 
 
 
 
 
13b33ef
 
713632e
13b33ef
713632e
13b33ef
713632e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b33ef
713632e
 
 
 
 
 
 
 
 
 
13b33ef
713632e
 
 
 
13b33ef
 
713632e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13b33ef
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
"""Pydantic schemas for API request and response validation.

This module defines the data models used for validating incoming API requests
and structuring outgoing responses. Using Pydantic ensures automatic validation,
serialization, and OpenAPI schema generation.

Classes:
    PredictionRequest: Schema for the /predict endpoint request body
    ModelInfo: Schema for model information responses
    PredictionResult: Schema for individual prediction results
    PredictionResponse: Schema for the complete prediction response
"""

from typing import List, Literal

from pydantic import BaseModel, ConfigDict, Field


class PredictionRequest(BaseModel):
    """Request model for the /predict endpoint.

    This schema validates the input data for code comment classification requests.
    All fields are validated automatically by Pydantic, with clear error messages
    for invalid inputs.

    Attributes:
        texts: List of code comments to classify. Must contain at least one item.
        class_names: List of class names corresponding to each comment.
        language: The programming language context for classification.
                  Currently supports "java", "python", and "pharo".
        model_type: The ML model to use for classification.
                    Currently only "catboost" is supported.

    Example:
        ```python
        request = PredictionRequest(
            texts=["This method calculates fibonacci", "TODO: Fix bug"],
            class_names=["MathUtils", "Calculator"],
            language="python",
            model_type="catboost"
        )
        ```
    """

    texts: List[str] = Field(
        ...,
        description="List of code comments to classify",
        min_length=1,
        json_schema_extra={"example": ["This method calculates fibonacci"]},
    )
    class_names: List[str] = Field(
        ...,
        description="List of class names corresponding to each comment",
        min_length=1,
        json_schema_extra={"example": ["MathUtils"]},
    )
    language: Literal["java", "python", "pharo"] = Field(
        ..., description="Programming language context (java, python, or pharo)"
    )
    model_type: Literal["catboost"] = Field(
        default="catboost", description="ML model type to use for classification"
    )

    # Pydantic v2 configuration for schema customization
    # extra="forbid" rejects requests with unexpected fields (strict validation)
    model_config = ConfigDict(
        extra="forbid",
        json_schema_extra={
            "example": {
                "texts": ["This method calculates the fibonacci sequence", "TODO: Fix this bug"],
                "class_names": ["MathUtils", "Calculator"],
                "language": "python",
                "model_type": "catboost",
            }
        },
    )


class ModelInfo(BaseModel):
    """Schema for model availability information.

    Used in responses to describe which models are available for a given language.

    Attributes:
        language: The programming language identifier
        available_types: List of available model types for this language
    """

    language: str = Field(..., description="Programming language identifier")
    available_types: List[str] = Field(
        ..., description="List of available model types (e.g., ['catboost'])"
    )


class PredictionResult(BaseModel):
    """Schema for a single prediction result.

    Represents the classification result for one code comment.

    Attributes:
        text: The original input text that was classified
        class_name: The class name corresponding to the input text
        labels: List of predicted category labels for the comment
    """

    text: str = Field(..., description="The original input comment")
    class_name: str = Field(..., description="The class name corresponding to the input comment")
    labels: List[str] = Field(..., description="List of predicted category labels")


class PredictionResponse(BaseModel):
    """Schema for the complete prediction response data.

    Contains the full response payload from a prediction request.

    Attributes:
        model_used: The model type that was used for prediction
        language: The language context used for classification
        results: List of prediction results for each input text
    """

    model_used: str = Field(..., description="Model type used for prediction")
    language: str = Field(..., description="Language context for classification")
    results: List[PredictionResult] = Field(..., description="List of classification results")