File size: 5,437 Bytes
2ed2bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f724bab
 
 
 
 
2ed2bd7
 
f724bab
 
2ed2bd7
 
 
f724bab
2ed2bd7
 
 
 
 
f724bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ed2bd7
 
f724bab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2ed2bd7
f724bab
 
 
 
 
 
 
 
 
2ed2bd7
f724bab
2ed2bd7
f724bab
2ed2bd7
 
 
 
 
 
 
 
 
 
f724bab
 
2ed2bd7
 
 
f724bab
2ed2bd7
 
f724bab
2ed2bd7
 
 
 
 
f724bab
6a1e8a3
2ed2bd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f724bab
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
V3 API endpoint for scraping articles and streaming summarization.
"""

import json
import time

from fastapi import APIRouter, HTTPException, Request
from fastapi.responses import StreamingResponse

from app.api.v3.schemas import ScrapeAndSummarizeRequest
from app.core.logging import get_logger
from app.services.article_scraper import article_scraper_service
from app.services.hf_streaming_summarizer import hf_streaming_service

router = APIRouter()
logger = get_logger(__name__)


@router.post("/scrape-and-summarize/stream")
async def scrape_and_summarize_stream(
    request: Request, payload: ScrapeAndSummarizeRequest
):
    """
    Scrape article from URL OR summarize provided text.

    Supports two modes:
    1. URL mode: Scrape article from URL then summarize
    2. Text mode: Summarize provided text directly

    Process:
    - URL mode: Scrape article (with caching) -> Validate -> Stream summarization
    - Text mode: Validate text -> Stream summarization

    Returns:
        Server-Sent Events stream with:
        - Metadata event (input_type, title/author for URL mode, text_length for text mode)
        - Content chunks (streaming summary tokens)
        - Done event (final latency)
    """
    request_id = getattr(request.state, "request_id", "unknown")

    # Determine input mode and prepare data
    if payload.url:
        # URL Mode: Scrape + Summarize
        logger.info(f"[{request_id}] V3 URL mode: {payload.url[:80]}...")

        scrape_start = time.time()
        try:
            article_data = await article_scraper_service.scrape_article(
                url=payload.url, use_cache=payload.use_cache
            )
        except Exception as e:
            logger.error(f"[{request_id}] Scraping failed: {e}")
            raise HTTPException(
                status_code=502, detail=f"Failed to scrape article: {str(e)}"
            )

        scrape_latency_ms = (time.time() - scrape_start) * 1000
        logger.info(
            f"[{request_id}] Scraped in {scrape_latency_ms:.2f}ms, "
            f"extracted {len(article_data['text'])} chars"
        )

        # Validate scraped content
        if len(article_data["text"]) < 100:
            raise HTTPException(
                status_code=422,
                detail="Insufficient content extracted from URL. "
                "Article may be behind paywall or site may block scrapers.",
            )

        text_to_summarize = article_data["text"]
        metadata = {
            "input_type": "url",
            "url": payload.url,
            "title": article_data.get("title"),
            "author": article_data.get("author"),
            "date": article_data.get("date"),
            "site_name": article_data.get("site_name"),
            "scrape_method": article_data.get("method", "static"),
            "scrape_latency_ms": scrape_latency_ms,
            "extracted_text_length": len(article_data["text"]),
        }

    else:
        # Text Mode: Direct Summarization
        logger.info(f"[{request_id}] V3 text mode: {len(payload.text)} chars")

        text_to_summarize = payload.text
        metadata = {
            "input_type": "text",
            "text_length": len(payload.text),
        }

    # Stream summarization (same for both modes)
    return StreamingResponse(
        _stream_generator(text_to_summarize, payload, metadata, request_id),
        media_type="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
            "X-Request-ID": request_id,
        },
    )


async def _stream_generator(text: str, payload, metadata: dict, request_id: str):
    """Generate SSE stream for summarization (works for both URL and text modes)."""

    # Send metadata event first
    if payload.include_metadata:
        metadata_event = {"type": "metadata", "data": metadata}
        yield f"data: {json.dumps(metadata_event)}\n\n"

    # Stream summarization chunks
    summarization_start = time.time()
    tokens_used = 0

    try:
        async for chunk in hf_streaming_service.summarize_text_stream(
            text=text,
            max_new_tokens=payload.max_tokens,
            temperature=payload.temperature,
            top_p=payload.top_p,
            prompt=payload.prompt,
        ):
            # Forward V2 chunks as-is
            if not chunk.get("done", False):
                tokens_used = chunk.get("tokens_used", tokens_used)

            yield f"data: {json.dumps(chunk)}\n\n"
    except Exception as e:
        logger.error(f"[{request_id}] Summarization failed: {e}")
        error_event = {"type": "error", "error": str(e), "done": True}
        yield f"data: {json.dumps(error_event)}\n\n"
        return

    summarization_latency_ms = (time.time() - summarization_start) * 1000

    # Calculate total latency (include scrape time for URL mode)
    total_latency_ms = summarization_latency_ms
    if metadata.get("input_type") == "url":
        total_latency_ms += metadata.get("scrape_latency_ms", 0)
        logger.info(
            f"[{request_id}] V3 request completed in {total_latency_ms:.2f}ms "
            f"(scrape: {metadata.get('scrape_latency_ms', 0):.2f}ms, "
            f"summary: {summarization_latency_ms:.2f}ms)"
        )
    else:
        logger.info(
            f"[{request_id}] V3 text mode completed in {total_latency_ms:.2f}ms"
        )