SummarizerApp / app /api /v2 /summarize.py
ming
Migrate to Ruff for linting/formatting and add comprehensive import tests
29ed661
"""
V2 Summarization endpoints using HuggingFace streaming.
"""
import json
from fastapi import APIRouter
from fastapi.responses import StreamingResponse
from app.api.v2.schemas import SummarizeRequest
from app.services.hf_streaming_summarizer import hf_streaming_service
router = APIRouter()
@router.post("/stream")
async def summarize_stream(payload: SummarizeRequest):
"""Stream text summarization using HuggingFace TextIteratorStreamer via SSE."""
return StreamingResponse(
_stream_generator(payload),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
async def _stream_generator(payload: SummarizeRequest):
"""Generator function for streaming SSE responses using HuggingFace."""
try:
# Calculate adaptive max_new_tokens based on text length
text_length = len(payload.text)
if text_length < 1500:
# Short texts: use 60-100 tokens
adaptive_max_tokens = min(100, max(60, text_length // 15))
else:
# Longer texts: scale proportionally but cap appropriately
adaptive_max_tokens = min(400, max(100, text_length // 20))
# Use adaptive calculation by default, but allow user override
# Check if max_tokens was explicitly provided (not just the default 256)
if (
hasattr(payload, "model_fields_set")
and "max_tokens" in payload.model_fields_set
):
max_new_tokens = payload.max_tokens
else:
max_new_tokens = adaptive_max_tokens
async for chunk in hf_streaming_service.summarize_text_stream(
text=payload.text,
max_new_tokens=max_new_tokens,
temperature=payload.temperature, # Use user-provided temperature
top_p=payload.top_p, # Use user-provided top_p
prompt=payload.prompt,
):
# Format as SSE event (same format as V1)
sse_data = json.dumps(chunk)
yield f"data: {sse_data}\n\n"
except Exception as e:
# Send error event in SSE format (same as V1)
error_chunk = {
"content": "",
"done": True,
"error": f"HuggingFace summarization failed: {str(e)}",
}
sse_data = json.dumps(error_chunk)
yield f"data: {sse_data}\n\n"