Spaces:

colin730
/

SummarizerApp

Running

SummarizerApp / app /api /v2 /summarize.py

ming

Migrate to Ruff for linting/formatting and add comprehensive import tests

29ed661 about 2 months ago

2.45 kB

	"""
	V2 Summarization endpoints using HuggingFace streaming.
	"""

	import json

	from fastapi import APIRouter
	from fastapi.responses import StreamingResponse

	from app.api.v2.schemas import SummarizeRequest
	from app.services.hf_streaming_summarizer import hf_streaming_service

	router = APIRouter()


	@router.post("/stream")
	async def summarize_stream(payload: SummarizeRequest):
	"""Stream text summarization using HuggingFace TextIteratorStreamer via SSE."""
	return StreamingResponse(
	_stream_generator(payload),
	media_type="text/event-stream",
	headers={
	"Cache-Control": "no-cache",
	"Connection": "keep-alive",
	"X-Accel-Buffering": "no",
	},
	)


	async def _stream_generator(payload: SummarizeRequest):
	"""Generator function for streaming SSE responses using HuggingFace."""
	try:
	# Calculate adaptive max_new_tokens based on text length
	text_length = len(payload.text)
	if text_length < 1500:
	# Short texts: use 60-100 tokens
	adaptive_max_tokens = min(100, max(60, text_length // 15))
	else:
	# Longer texts: scale proportionally but cap appropriately
	adaptive_max_tokens = min(400, max(100, text_length // 20))

	# Use adaptive calculation by default, but allow user override
	# Check if max_tokens was explicitly provided (not just the default 256)
	if (
	hasattr(payload, "model_fields_set")
	and "max_tokens" in payload.model_fields_set
	):
	max_new_tokens = payload.max_tokens
	else:
	max_new_tokens = adaptive_max_tokens

	async for chunk in hf_streaming_service.summarize_text_stream(
	text=payload.text,
	max_new_tokens=max_new_tokens,
	temperature=payload.temperature, # Use user-provided temperature
	top_p=payload.top_p, # Use user-provided top_p
	prompt=payload.prompt,
	):
	# Format as SSE event (same format as V1)
	sse_data = json.dumps(chunk)
	yield f"data: {sse_data}\n\n"

	except Exception as e:
	# Send error event in SSE format (same as V1)
	error_chunk = {
	"content": "",
	"done": True,
	"error": f"HuggingFace summarization failed: {str(e)}",
	}
	sse_data = json.dumps(error_chunk)
	yield f"data: {sse_data}\n\n"