Spaces:
Running
Running
| """ | |
| V2 Summarization endpoints using HuggingFace streaming. | |
| """ | |
| import json | |
| from fastapi import APIRouter | |
| from fastapi.responses import StreamingResponse | |
| from app.api.v2.schemas import SummarizeRequest | |
| from app.services.hf_streaming_summarizer import hf_streaming_service | |
| router = APIRouter() | |
| async def summarize_stream(payload: SummarizeRequest): | |
| """Stream text summarization using HuggingFace TextIteratorStreamer via SSE.""" | |
| return StreamingResponse( | |
| _stream_generator(payload), | |
| media_type="text/event-stream", | |
| headers={ | |
| "Cache-Control": "no-cache", | |
| "Connection": "keep-alive", | |
| "X-Accel-Buffering": "no", | |
| }, | |
| ) | |
| async def _stream_generator(payload: SummarizeRequest): | |
| """Generator function for streaming SSE responses using HuggingFace.""" | |
| try: | |
| # Calculate adaptive max_new_tokens based on text length | |
| text_length = len(payload.text) | |
| if text_length < 1500: | |
| # Short texts: use 60-100 tokens | |
| adaptive_max_tokens = min(100, max(60, text_length // 15)) | |
| else: | |
| # Longer texts: scale proportionally but cap appropriately | |
| adaptive_max_tokens = min(400, max(100, text_length // 20)) | |
| # Use adaptive calculation by default, but allow user override | |
| # Check if max_tokens was explicitly provided (not just the default 256) | |
| if ( | |
| hasattr(payload, "model_fields_set") | |
| and "max_tokens" in payload.model_fields_set | |
| ): | |
| max_new_tokens = payload.max_tokens | |
| else: | |
| max_new_tokens = adaptive_max_tokens | |
| async for chunk in hf_streaming_service.summarize_text_stream( | |
| text=payload.text, | |
| max_new_tokens=max_new_tokens, | |
| temperature=payload.temperature, # Use user-provided temperature | |
| top_p=payload.top_p, # Use user-provided top_p | |
| prompt=payload.prompt, | |
| ): | |
| # Format as SSE event (same format as V1) | |
| sse_data = json.dumps(chunk) | |
| yield f"data: {sse_data}\n\n" | |
| except Exception as e: | |
| # Send error event in SSE format (same as V1) | |
| error_chunk = { | |
| "content": "", | |
| "done": True, | |
| "error": f"HuggingFace summarization failed: {str(e)}", | |
| } | |
| sse_data = json.dumps(error_chunk) | |
| yield f"data: {sse_data}\n\n" | |