Spaces:
Running
Running
File size: 4,918 Bytes
0231daa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
"""
Model management endpoints.
This module provides routes for listing and inspecting
available embedding models.
"""
from typing import Dict, Any
from fastapi import APIRouter, Depends, HTTPException, Path, status
from loguru import logger
from src.models.schemas import ModelsListResponse, ModelInfo
from src.core.manager import ModelManager
from src.core.exceptions import ModelNotFoundError
from src.api.dependencies import get_model_manager
router = APIRouter(prefix="/models", tags=["models"])
@router.get(
"",
response_model=ModelsListResponse,
summary="List all models",
description="Get a list of all available embedding models"
)
async def list_models(
manager: ModelManager = Depends(get_model_manager)
):
"""
List all available embedding models.
Returns information about all configured models including:
- Model ID
- Model name (Hugging Face path)
- Model type (dense or sparse)
- Load status
- Repository URL
Args:
manager: Model manager dependency
Returns:
ModelsListResponse with list of models and total count
"""
try:
models_info = manager.list_models()
return ModelsListResponse(
models=[ModelInfo(**info) for info in models_info],
total=len(models_info)
)
except Exception as e:
logger.exception("Failed to list models")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to list models"
)
@router.get(
"/{model_id}",
response_model=ModelInfo,
summary="Get model info",
description="Get detailed information about a specific model"
)
async def get_model_info(
model_id: str = Path(..., description="Model identifier"),
manager: ModelManager = Depends(get_model_manager)
):
"""
Get detailed information about a specific model.
Args:
model_id: The model identifier
manager: Model manager dependency
Returns:
ModelInfo with model details
Raises:
HTTPException: If model not found
"""
try:
info = manager.get_model_info(model_id)
if not info:
raise ModelNotFoundError(model_id)
return ModelInfo(**info)
except ModelNotFoundError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
except Exception as e:
logger.exception(f"Failed to get info for model {model_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to get model info: {str(e)}"
)
@router.get(
"/{model_id}/load",
summary="Load a model",
description="Manually trigger loading of a specific model"
)
async def load_model(
model_id: str = Path(..., description="Model identifier"),
manager: ModelManager = Depends(get_model_manager)
) -> Dict[str, Any]:
"""
Manually load a specific model into memory.
This endpoint is useful for preloading models on-demand
without waiting for the first request.
Args:
model_id: The model identifier
manager: Model manager dependency
Returns:
Dictionary with load status
Raises:
HTTPException: If model not found or fails to load
"""
try:
# This will load the model if not already loaded
model = manager.get_model(model_id)
return {
"status": "success",
"message": f"Model '{model_id}' loaded successfully",
"model_id": model_id,
"loaded": model.is_loaded
}
except ModelNotFoundError as e:
raise HTTPException(status_code=e.status_code, detail=e.message)
except Exception as e:
logger.exception(f"Failed to load model {model_id}")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=f"Failed to load model: {str(e)}"
)
@router.get(
"/memory/usage",
summary="Get memory usage",
description="Get memory usage statistics for loaded models"
)
async def get_memory_usage(
manager: ModelManager = Depends(get_model_manager)
) -> Dict[str, Any]:
"""
Get memory usage statistics.
Returns information about:
- Total available models
- Currently loaded models
- Preload status
Args:
manager: Model manager dependency
Returns:
Dictionary with memory usage statistics
"""
try:
return manager.get_memory_usage()
except Exception as e:
logger.exception("Failed to get memory usage")
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to get memory usage"
) |