Spaces:

se4ai2526-uniba-nygaard
/

NygaardCodeComment-backend

Sleeping

App Files Files Community

NygaardCodeComment-backend / nygaardcodecommentclassification /api /schemas.py

Fonty02

Update nygaardcodecommentclassification/api/schemas.py

13b33ef verified about 2 months ago

raw

history blame contribute delete

4.59 kB

	"""Pydantic schemas for API request and response validation.

	This module defines the data models used for validating incoming API requests
	and structuring outgoing responses. Using Pydantic ensures automatic validation,
	serialization, and OpenAPI schema generation.

	Classes:
	PredictionRequest: Schema for the /predict endpoint request body
	ModelInfo: Schema for model information responses
	PredictionResult: Schema for individual prediction results
	PredictionResponse: Schema for the complete prediction response
	"""

	from typing import List, Literal

	from pydantic import BaseModel, ConfigDict, Field


	class PredictionRequest(BaseModel):
	"""Request model for the /predict endpoint.

	This schema validates the input data for code comment classification requests.
	All fields are validated automatically by Pydantic, with clear error messages
	for invalid inputs.

	Attributes:
	texts: List of code comments to classify. Must contain at least one item.
	class_names: List of class names corresponding to each comment.
	language: The programming language context for classification.
	Currently supports "java", "python", and "pharo".
	model_type: The ML model to use for classification.
	Currently only "catboost" is supported.

	Example:
	```python
	request = PredictionRequest(
	texts=["This method calculates fibonacci", "TODO: Fix bug"],
	class_names=["MathUtils", "Calculator"],
	language="python",
	model_type="catboost"
	)
	```
	"""

	texts: List[str] = Field(
	...,
	description="List of code comments to classify",
	min_length=1,
	json_schema_extra={"example": ["This method calculates fibonacci"]},
	)
	class_names: List[str] = Field(
	...,
	description="List of class names corresponding to each comment",
	min_length=1,
	json_schema_extra={"example": ["MathUtils"]},
	)
	language: Literal["java", "python", "pharo"] = Field(
	..., description="Programming language context (java, python, or pharo)"
	)
	model_type: Literal["catboost"] = Field(
	default="catboost", description="ML model type to use for classification"
	)

	# Pydantic v2 configuration for schema customization
	# extra="forbid" rejects requests with unexpected fields (strict validation)
	model_config = ConfigDict(
	extra="forbid",
	json_schema_extra={
	"example": {
	"texts": ["This method calculates the fibonacci sequence", "TODO: Fix this bug"],
	"class_names": ["MathUtils", "Calculator"],
	"language": "python",
	"model_type": "catboost",
	}
	},
	)


	class ModelInfo(BaseModel):
	"""Schema for model availability information.

	Used in responses to describe which models are available for a given language.

	Attributes:
	language: The programming language identifier
	available_types: List of available model types for this language
	"""

	language: str = Field(..., description="Programming language identifier")
	available_types: List[str] = Field(
	..., description="List of available model types (e.g., ['catboost'])"
	)


	class PredictionResult(BaseModel):
	"""Schema for a single prediction result.

	Represents the classification result for one code comment.

	Attributes:
	text: The original input text that was classified
	class_name: The class name corresponding to the input text
	labels: List of predicted category labels for the comment
	"""

	text: str = Field(..., description="The original input comment")
	class_name: str = Field(..., description="The class name corresponding to the input comment")
	labels: List[str] = Field(..., description="List of predicted category labels")


	class PredictionResponse(BaseModel):
	"""Schema for the complete prediction response data.

	Contains the full response payload from a prediction request.

	Attributes:
	model_used: The model type that was used for prediction
	language: The language context used for classification
	results: List of prediction results for each input text
	"""

	model_used: str = Field(..., description="Model type used for prediction")
	language: str = Field(..., description="Language context for classification")
	results: List[PredictionResult] = Field(..., description="List of classification results")