File size: 2,353 Bytes
a2e3298
a243ca8
a2e3298
a243ca8
 
a2e3298
 
a243ca8
a2e3298
 
 
a243ca8
a2e3298
a243ca8
 
 
 
 
 
 
 
a2e3298
a243ca8
 
 
a2e3298
 
a243ca8
a2e3298
 
 
 
 
a243ca8
a2e3298
a243ca8
a2e3298
a243ca8
a2e3298
a243ca8
 
a2e3298
a243ca8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from fastapi import APIRouter, HTTPException
from fastapi.responses import JSONResponse, StreamingResponse

from logging_config import get_logger
from models.schemas import ChatRequest, ServiceStatusResponse
from services.text_service import text_service

logger = get_logger("routers.text_router")

router = APIRouter(prefix="/v1/text", tags=["Text Generation"])


@router.post("/chat/completions")
async def create_chat_completion(request: ChatRequest) -> JSONResponse:
    logger.info(
        "Received chat completion request: messages=%d, stream=%s, return_json=%s",
        len(request.messages),
        request.stream,
        request.return_json,
    )

    if not text_service.is_ready():
        logger.warning("Chat completion request rejected: text model not ready")
        raise HTTPException(status_code=503, detail="Text model is not ready")

    try:
        messages = [msg.model_dump() for msg in request.messages]

        result = await text_service.generate_completion(
            messages=messages,
            temperature=request.temperature,
            max_tokens=request.max_tokens,
            stream=request.stream,
            return_json=request.return_json,
        )

        if request.stream:
            logger.info("Returning streaming response for chat completion")
            return StreamingResponse(result, media_type="text/event-stream")

        logger.info("Chat completion request fulfilled successfully")
        return JSONResponse(content=result)

    except ValueError as exc:
        logger.warning("Invalid chat completion parameters: %s", exc)
        raise HTTPException(status_code=400, detail=str(exc)) from exc
    except RuntimeError as exc:
        logger.error("Runtime error during chat completion: %s", exc)
        raise HTTPException(status_code=503, detail=str(exc)) from exc
    except Exception as exc:
        logger.exception("Unexpected error during chat completion")
        raise HTTPException(status_code=500, detail="Internal server error") from exc


@router.get("/health", response_model=ServiceStatusResponse)
async def text_health() -> ServiceStatusResponse:
    ready = text_service.is_ready()
    logger.debug("Text health check: ready=%s", ready)
    return ServiceStatusResponse(
        status="healthy" if ready else "initializing",
        model_ready=ready,
    )