Spaces:

colin730
/

SummarizerApp

Running

File size: 2,454 Bytes

0b6e76d
 
 
2ed2bd7
0b6e76d
2ed2bd7
29ed661
0b6e76d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fc9914e
2ed2bd7
0b6e76d
 
 
 
 
 
9884884
 
 
 
 
 
 
 
2ed2bd7
9884884
 
2ed2bd7
 
 
 
9884884
 
 
2ed2bd7
0b6e76d
 
9884884
 
 
 
0b6e76d
 
 
 
2ed2bd7
0b6e76d
 
 
 
 
2ed2bd7
0b6e76d

"""
V2 Summarization endpoints using HuggingFace streaming.
"""

import json

from fastapi import APIRouter
from fastapi.responses import StreamingResponse

from app.api.v2.schemas import SummarizeRequest
from app.services.hf_streaming_summarizer import hf_streaming_service

router = APIRouter()


@router.post("/stream")
async def summarize_stream(payload: SummarizeRequest):
    """Stream text summarization using HuggingFace TextIteratorStreamer via SSE."""
    return StreamingResponse(
        _stream_generator(payload),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


async def _stream_generator(payload: SummarizeRequest):
    """Generator function for streaming SSE responses using HuggingFace."""
    try:
        # Calculate adaptive max_new_tokens based on text length
        text_length = len(payload.text)
        if text_length < 1500:
            # Short texts: use 60-100 tokens
            adaptive_max_tokens = min(100, max(60, text_length // 15))
        else:
            # Longer texts: scale proportionally but cap appropriately
            adaptive_max_tokens = min(400, max(100, text_length // 20))

        # Use adaptive calculation by default, but allow user override
        # Check if max_tokens was explicitly provided (not just the default 256)
        if (
            hasattr(payload, "model_fields_set")
            and "max_tokens" in payload.model_fields_set
        ):
            max_new_tokens = payload.max_tokens
        else:
            max_new_tokens = adaptive_max_tokens

        async for chunk in hf_streaming_service.summarize_text_stream(
            text=payload.text,
            max_new_tokens=max_new_tokens,
            temperature=payload.temperature,  # Use user-provided temperature
            top_p=payload.top_p,  # Use user-provided top_p
            prompt=payload.prompt,
        ):
            # Format as SSE event (same format as V1)
            sse_data = json.dumps(chunk)
            yield f"data: {sse_data}\n\n"

    except Exception as e:
        # Send error event in SSE format (same as V1)
        error_chunk = {
            "content": "",
            "done": True,
            "error": f"HuggingFace summarization failed: {str(e)}",
        }
        sse_data = json.dumps(error_chunk)
        yield f"data: {sse_data}\n\n"