leideng/QCFuse / srt /entrypoints /openai /serving_score.py
leideng's picture
download
raw
1.99 kB
import logging
from typing import Union
from fastapi import Request
from sglang.srt.entrypoints.openai.protocol import (
ErrorResponse,
ScoringRequest,
ScoringResponse,
)
from sglang.srt.entrypoints.openai.serving_base import OpenAIServingBase
logger = logging.getLogger(__name__)
class OpenAIServingScore(OpenAIServingBase):
"""Handler for /v1/score requests"""
# NOTE: /v1/rerank is not an official OpenAI endpoint. This module may be moved
# to another module in the future.
def _request_id_prefix(self) -> str:
return "score-"
def _convert_to_internal_request(
self,
request: ScoringRequest,
raw_request: Request = None,
) -> tuple[ScoringRequest, ScoringRequest]:
"""Convert OpenAI scoring request to internal format"""
# For scoring, we pass the request directly as the tokenizer_manager
# has a specialized score_request method that doesn't use GenerateReqInput
return request, request
async def _handle_non_streaming_request(
self,
adapted_request: ScoringRequest,
request: ScoringRequest,
raw_request: Request,
) -> Union[ScoringResponse, ErrorResponse]:
"""Handle the scoring request"""
try:
# Use tokenizer_manager's score_request method directly
scores = await self.tokenizer_manager.score_request(
query=request.query,
items=request.items,
label_token_ids=request.label_token_ids,
apply_softmax=request.apply_softmax,
item_first=request.item_first,
request=raw_request,
)
# Create response with just the scores, without usage info
response = ScoringResponse(
scores=scores,
model=request.model,
)
return response
except ValueError as e:
return self.create_error_response(str(e))

Xet Storage Details

Size:
1.99 kB
·
Xet hash:
d11542c42f3cc23398b12d618f93342121288c9ad0a1a514b414be2db28c3073

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.