""" V3 API endpoint for scraping articles and streaming summarization. """ import json import time from fastapi import APIRouter, HTTPException, Request from fastapi.responses import StreamingResponse from app.api.v3.schemas import ScrapeAndSummarizeRequest from app.core.logging import get_logger from app.services.article_scraper import article_scraper_service from app.services.hf_streaming_summarizer import hf_streaming_service router = APIRouter() logger = get_logger(__name__) @router.post("/scrape-and-summarize/stream") async def scrape_and_summarize_stream( request: Request, payload: ScrapeAndSummarizeRequest ): """ Scrape article from URL OR summarize provided text. Supports two modes: 1. URL mode: Scrape article from URL then summarize 2. Text mode: Summarize provided text directly Process: - URL mode: Scrape article (with caching) -> Validate -> Stream summarization - Text mode: Validate text -> Stream summarization Returns: Server-Sent Events stream with: - Metadata event (input_type, title/author for URL mode, text_length for text mode) - Content chunks (streaming summary tokens) - Done event (final latency) """ request_id = getattr(request.state, "request_id", "unknown") # Determine input mode and prepare data if payload.url: # URL Mode: Scrape + Summarize logger.info(f"[{request_id}] V3 URL mode: {payload.url[:80]}...") scrape_start = time.time() try: article_data = await article_scraper_service.scrape_article( url=payload.url, use_cache=payload.use_cache ) except Exception as e: logger.error(f"[{request_id}] Scraping failed: {e}") raise HTTPException( status_code=502, detail=f"Failed to scrape article: {str(e)}" ) scrape_latency_ms = (time.time() - scrape_start) * 1000 logger.info( f"[{request_id}] Scraped in {scrape_latency_ms:.2f}ms, " f"extracted {len(article_data['text'])} chars" ) # Validate scraped content if len(article_data["text"]) < 100: raise HTTPException( status_code=422, detail="Insufficient content extracted from URL. " "Article may be behind paywall or site may block scrapers.", ) text_to_summarize = article_data["text"] metadata = { "input_type": "url", "url": payload.url, "title": article_data.get("title"), "author": article_data.get("author"), "date": article_data.get("date"), "site_name": article_data.get("site_name"), "scrape_method": article_data.get("method", "static"), "scrape_latency_ms": scrape_latency_ms, "extracted_text_length": len(article_data["text"]), } else: # Text Mode: Direct Summarization logger.info(f"[{request_id}] V3 text mode: {len(payload.text)} chars") text_to_summarize = payload.text metadata = { "input_type": "text", "text_length": len(payload.text), } # Stream summarization (same for both modes) return StreamingResponse( _stream_generator(text_to_summarize, payload, metadata, request_id), media_type="text/event-stream", headers={ "Cache-Control": "no-cache", "Connection": "keep-alive", "X-Accel-Buffering": "no", "X-Request-ID": request_id, }, ) async def _stream_generator(text: str, payload, metadata: dict, request_id: str): """Generate SSE stream for summarization (works for both URL and text modes).""" # Send metadata event first if payload.include_metadata: metadata_event = {"type": "metadata", "data": metadata} yield f"data: {json.dumps(metadata_event)}\n\n" # Stream summarization chunks summarization_start = time.time() tokens_used = 0 try: async for chunk in hf_streaming_service.summarize_text_stream( text=text, max_new_tokens=payload.max_tokens, temperature=payload.temperature, top_p=payload.top_p, prompt=payload.prompt, ): # Forward V2 chunks as-is if not chunk.get("done", False): tokens_used = chunk.get("tokens_used", tokens_used) yield f"data: {json.dumps(chunk)}\n\n" except Exception as e: logger.error(f"[{request_id}] Summarization failed: {e}") error_event = {"type": "error", "error": str(e), "done": True} yield f"data: {json.dumps(error_event)}\n\n" return summarization_latency_ms = (time.time() - summarization_start) * 1000 # Calculate total latency (include scrape time for URL mode) total_latency_ms = summarization_latency_ms if metadata.get("input_type") == "url": total_latency_ms += metadata.get("scrape_latency_ms", 0) logger.info( f"[{request_id}] V3 request completed in {total_latency_ms:.2f}ms " f"(scrape: {metadata.get('scrape_latency_ms', 0):.2f}ms, " f"summary: {summarization_latency_ms:.2f}ms)" ) else: logger.info( f"[{request_id}] V3 text mode completed in {total_latency_ms:.2f}ms" )