Spaces:

colin730
/

SummarizerApp

Running

ming commited on Dec 10, 2025

Commit

d25a17f

1 Parent(s): 29ed661

Remove outlines library and all related code

- Remove outlines dependency from requirements.txt (NumPy compatibility issue)
- Remove outlines import validation from Dockerfile
- Remove /stream-json endpoint from V4 API (outlines-based)
- Remove all outlines-related code from structured_summarizer.py
- Remove outlines import tests
- Delete test_v4_live.py (outlines integration tests)
- Remove all stream-json endpoint tests from test_v4_api.py

This resolves the Hugging Face build failure caused by outlines==0.0.44
incompatibility with newer NumPy versions (ModuleNotFoundError: numpy.lib.function_base).

V4 API now only supports /stream and /stream-ndjson endpoints using
NDJSON patch-based structured summarization (no external dependencies).

Files changed (7) hide show

Dockerfile +1 -3
app/api/v4/structured_summary.py +0 -96
app/services/structured_summarizer.py +0 -187
requirements.txt +0 -1
tests/test_imports.py +0 -9
tests/test_v4_api.py +0 -421
tests/test_v4_live.py +0 -267

Dockerfile CHANGED Viewed

@@ -29,9 +29,7 @@ COPY requirements.txt .
 # Install Python dependencies
 RUN pip install --no-cache-dir --upgrade pip && \
-    pip install --no-cache-dir -r requirements.txt && \
-    python -c "import outlines; print(f'✅ Outlines installed: {outlines.__version__ if hasattr(outlines, \"__version__\") else \"version unknown\"}')" || \
-    (echo "❌ Outlines installation failed!" && pip list | grep -i outline && exit 1)
 # Copy application code
 COPY app/ ./app/

 # Install Python dependencies
 RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r requirements.txt
 # Copy application code
 COPY app/ ./app/

app/api/v4/structured_summary.py CHANGED Viewed

@@ -303,99 +303,3 @@ async def _stream_generator_ndjson(text: str, payload, metadata: dict, request_i
         logger.info(
             f"[{request_id}] V4 NDJSON text mode completed in {total_latency_ms:.2f}ms"
         )
-@router.post("/scrape-and-summarize/stream-json")
-async def scrape_and_summarize_stream_json(
-    request: Request, payload: StructuredSummaryRequest
-):
-    """
-    V4: Full JSON structured summarization with streaming using Outlines.
-    This endpoint streams a single JSON object token-by-token via SSE.
-    The final concatenated response is a valid JSON matching StructuredSummary.
-    """
-    request_id = getattr(request.state, "request_id", "unknown")
-    # Determine input mode (same logic as other endpoints)
-    if payload.url:
-        logger.info(f"[{request_id}] V4 JSON URL mode: {payload.url[:80]}...")
-        scrape_start = time.time()
-        try:
-            article_data = await article_scraper_service.scrape_article(
-                url=payload.url, use_cache=payload.use_cache
-            )
-        except Exception as e:
-            logger.error(f"[{request_id}] Scraping failed: {e}")
-            raise HTTPException(
-                status_code=502, detail=f"Failed to scrape article: {str(e)}"
-            )
-        scrape_latency_ms = (time.time() - scrape_start) * 1000
-        logger.info(
-            f"[{request_id}] Scraped in {scrape_latency_ms:.2f}ms, "
-            f"extracted {len(article_data['text'])} chars"
-        )
-        if len(article_data["text"]) < 100:
-            raise HTTPException(
-                status_code=422,
-                detail="Insufficient content extracted from URL. "
-                "Article may be behind paywall or site may block scrapers.",
-            )
-        text_to_summarize = article_data["text"]
-        metadata = {
-            "input_type": "url",
-            "url": payload.url,
-            "title": article_data.get("title"),
-            "author": article_data.get("author"),
-            "date": article_data.get("date"),
-            "site_name": article_data.get("site_name"),
-            "scrape_method": article_data.get("method", "static"),
-            "scrape_latency_ms": scrape_latency_ms,
-            "extracted_text_length": len(article_data["text"]),
-            "style": payload.style.value,
-        }
-    else:
-        logger.info(f"[{request_id}] V4 JSON text mode: {len(payload.text)} chars")
-        text_to_summarize = payload.text
-        metadata = {
-            "input_type": "text",
-            "text_length": len(payload.text),
-            "style": payload.style.value,
-        }
-    async def _stream_generator_json():
-        # Optional: send metadata as first event
-        if payload.include_metadata:
-            metadata_event = {"type": "metadata", "data": metadata}
-            yield f"data: {json.dumps(metadata_event)}\n\n"
-        # Now stream the JSON tokens from the service
-        try:
-            async for (
-                token
-            ) in structured_summarizer_service.summarize_structured_stream_json(
-                text=text_to_summarize,
-                style=payload.style.value,
-            ):
-                # Each token is a raw JSON fragment; just forward it
-                yield f"data: {token}\n\n"
-        except Exception as e:
-            logger.error(f"[{request_id}] V4 JSON streaming failed: {e}")
-            error_event = {"type": "error", "error": str(e), "done": True}
-            yield f"data: {json.dumps(error_event)}\n\n"
-    return StreamingResponse(
-        _stream_generator_json(),
-        media_type="text/event-stream",
-        headers={
-            "Cache-Control": "no-cache",
-            "Connection": "keep-alive",
-            "X-Accel-Buffering": "no",
-            "X-Request-ID": request_id,
-        },
-    )

         logger.info(
             f"[{request_id}] V4 NDJSON text mode completed in {total_latency_ms:.2f}ms"
         )

app/services/structured_summarizer.py CHANGED Viewed

@@ -54,50 +54,6 @@ except ImportError:
 # Import Pydantic for schema definition
 from pydantic import BaseModel
-# Try to import Outlines for JSON schema enforcement
-OUTLINES_AVAILABLE = False
-outlines_models = None
-outlines_generate = None
-try:
-    import outlines
-    # Check what's available in outlines module
-    available_attrs = [attr for attr in dir(outlines) if not attr.startswith("_")]
-    logger.info(f"Outlines module attributes: {available_attrs}")
-    # Try to import models
-    try:
-        from outlines import models as outlines_models
-    except ImportError:
-        logger.warning("Could not import outlines.models")
-        raise
-    # Try to import generate module (for outlines.generate.json)
-    try:
-        from outlines import generate as outlines_generate
-        logger.info("✅ Found outlines.generate module")
-    except ImportError as e:
-        logger.warning(f"Could not import outlines.generate: {e}")
-        outlines_generate = None
-    if outlines_generate is None:
-        raise ImportError(
-            f"Could not import outlines.generate. Available in outlines: {available_attrs[:10]}..."
-        )
-    OUTLINES_AVAILABLE = True
-    logger.info("✅ Outlines library imported successfully")
-except ImportError as e:
-    logger.warning(
-        f"Outlines library not available: {e}. V4 JSON streaming endpoints will be disabled."
-    )
-except Exception as e:
-    logger.warning(
-        f"Error importing Outlines library: {e}. V4 JSON streaming endpoints will be disabled."
-    )
 class StructuredSummary(BaseModel):
     """Pydantic schema for structured summary output."""
@@ -117,7 +73,6 @@ class StructuredSummarizer:
         """Initialize the Qwen model and tokenizer with GPU/INT4 when possible."""
         self.tokenizer: AutoTokenizer | None = None
         self.model: AutoModelForCausalLM | None = None
-        self.outlines_model = None  # Outlines wrapper over the HF model
         if not TRANSFORMERS_AVAILABLE:
             logger.warning("⚠️ Transformers not available - V4 endpoints will not work")
@@ -234,22 +189,6 @@ class StructuredSummarizer:
             logger.info(f"   Model device: {next(self.model.parameters()).device}")
             logger.info(f"   Torch dtype: {next(self.model.parameters()).dtype}")
-            # Wrap the HF model + tokenizer in an Outlines Transformers model
-            if OUTLINES_AVAILABLE:
-                try:
-                    self.outlines_model = outlines_models.Transformers(
-                        self.model, self.tokenizer
-                    )
-                    logger.info("✅ Outlines model wrapper initialized for V4")
-                except Exception as e:
-                    logger.error(f"❌ Failed to initialize Outlines wrapper: {e}")
-                    self.outlines_model = None
-            else:
-                logger.warning(
-                    "⚠️ Outlines not available - V4 JSON streaming endpoints will be disabled"
-                )
-                self.outlines_model = None
         except Exception as e:
             logger.error(f"❌ Failed to initialize V4 model: {e}")
             logger.error(f"Model ID: {settings.v4_model_id}")
@@ -272,28 +211,6 @@ class StructuredSummarizer:
         except Exception as e:
             logger.error(f"❌ V4 model warmup failed: {e}")
-        # Also warm up Outlines JSON generation
-        if (
-            OUTLINES_AVAILABLE
-            and self.outlines_model is not None
-            and outlines_generate is not None
-        ):
-            try:
-                # Use outlines.generate.json(model, schema) pattern
-                json_generator = outlines_generate.json(
-                    self.outlines_model, StructuredSummary
-                )
-                # Try to call it with a simple prompt
-                result = json_generator("Warmup text for Outlines structured summary.")
-                # Consume the generator if it's a generator
-                if hasattr(result, "__iter__") and not isinstance(result, str):
-                    _ = list(result)[:1]  # Just consume first item for warmup
-                logger.info("✅ V4 Outlines JSON warmup successful")
-            except Exception as e:
-                logger.warning(f"⚠️ V4 Outlines JSON warmup failed: {e}")
     def _generate_test(self, prompt: str):
         """Test generation for warmup."""
         inputs = self.tokenizer(prompt, return_tensors="pt")
@@ -969,110 +886,6 @@ Rules:
                 "error": "V4 NDJSON summarization failed. See server logs.",
             }
-    async def summarize_structured_stream_json(
-        self,
-        text: str,
-        style: str = "executive",
-    ) -> AsyncGenerator[str, None]:
-        """
-        Stream a single JSON object (StructuredSummary) token-by-token
-        using Outlines constrained decoding.
-        Yields:
-            Raw string tokens that, when concatenated, form a valid JSON object.
-        """
-        if not self.outlines_model:
-            logger.error("❌ Outlines model not available for V4")
-            # Provide detailed error information
-            if not OUTLINES_AVAILABLE:
-                error_msg = (
-                    "Outlines library not installed. Please install outlines>=0.0.34."
-                )
-            elif not self.model or not self.tokenizer:
-                error_msg = (
-                    "Base V4 model not loaded. Outlines wrapper cannot be created."
-                )
-            else:
-                error_msg = "Outlines model wrapper initialization failed. Check server logs for details."
-            error_obj = {
-                "error": "V4 Outlines model not available",
-                "detail": error_msg,
-            }
-            yield json.dumps(error_obj)
-            return
-        # Map existing styles to a short instruction
-        style_prompts = {
-            "skimmer": "Summarize concisely using only hard facts and data.",
-            "executive": "Summarize for a CEO. Focus on key facts and business impact. Be concise.",
-            "eli5": "Explain in very simple language with minimal jargon.",
-        }
-        style_instruction = style_prompts.get(style, style_prompts["executive"])
-        # Truncate text to prevent token overflow (reuse your existing max_chars idea)
-        max_chars = 10000
-        if len(text) > max_chars:
-            logger.warning(
-                f"Truncating input text from {len(text)} to {max_chars} chars for V4 JSON streaming."
-            )
-            text = text[:max_chars]
-        # Build a compact prompt; Outlines will handle the schema, so no huge system prompt needed
-        prompt = (
-            f"{style_instruction}\n\n"
-            f"Produce a JSON object that matches this schema exactly:\n"
-            f"- title: short headline\n"
-            f"- main_summary: 2-4 sentences\n"
-            f"- key_points: 3-5 concise bullet points\n"
-            f"- category: 1-2 word topic label (e.g. 'Crime', 'Tech')\n"
-            f"- sentiment: one of ['positive', 'negative', 'neutral']\n"
-            f"- read_time_min: integer reading time in minutes\n\n"
-            f"ARTICLE:\n{text}"
-        )
-        logger.info(f"V4 Outlines JSON streaming: {len(text)} chars, style={style}")
-        try:
-            # Check if Outlines is available
-            if not OUTLINES_AVAILABLE or outlines_generate is None:
-                error_obj = {
-                    "error": "Outlines library not available. Please install outlines>=0.0.34."
-                }
-                yield json.dumps(error_obj)
-                return
-            start_time = time.time()
-            # Create an Outlines generator bound to the StructuredSummary schema
-            # Modern Outlines API: outlines.generate.json(model, schema)
-            json_generator = outlines_generate.json(
-                self.outlines_model, StructuredSummary
-            )
-            # Call the generator with the prompt to get streaming tokens
-            # The generator returns an iterable of string tokens
-            token_iter = json_generator(prompt)
-            # Stream tokens; each token is a string fragment of the final JSON object
-            for token in token_iter:
-                # Each `token` is a raw string fragment; just pass it through
-                if token:
-                    yield token
-                    # Let the event loop breathe
-                    await asyncio.sleep(0)
-            latency_ms = (time.time() - start_time) * 1000.0
-            logger.info(
-                f"✅ V4 Outlines JSON streaming completed in {latency_ms:.2f}ms"
-            )
-        except Exception as e:
-            logger.exception("❌ V4 Outlines JSON streaming failed")
-            # Yield a minimal JSON error object as final output
-            error_obj = {"error": "V4 JSON streaming failed", "detail": str(e)}
-            yield json.dumps(error_obj)
 # Global service instance
 structured_summarizer_service = StructuredSummarizer()

 # Import Pydantic for schema definition
 from pydantic import BaseModel
 class StructuredSummary(BaseModel):
     """Pydantic schema for structured summary output."""
         """Initialize the Qwen model and tokenizer with GPU/INT4 when possible."""
         self.tokenizer: AutoTokenizer | None = None
         self.model: AutoModelForCausalLM | None = None
         if not TRANSFORMERS_AVAILABLE:
             logger.warning("⚠️ Transformers not available - V4 endpoints will not work")
             logger.info(f"   Model device: {next(self.model.parameters()).device}")
             logger.info(f"   Torch dtype: {next(self.model.parameters()).dtype}")
         except Exception as e:
             logger.error(f"❌ Failed to initialize V4 model: {e}")
             logger.error(f"Model ID: {settings.v4_model_id}")
         except Exception as e:
             logger.error(f"❌ V4 model warmup failed: {e}")
     def _generate_test(self, prompt: str):
         """Test generation for warmup."""
         inputs = self.tokenizer(prompt, return_tensors="pt")
                 "error": "V4 NDJSON summarization failed. See server logs.",
             }
 # Global service instance
 structured_summarizer_service = StructuredSummarizer()

requirements.txt CHANGED Viewed

@@ -20,7 +20,6 @@ accelerate>=0.33.0,<1.0.0  # Required for GPU quantization (V4)
 bitsandbytes>=0.44.0  # 4-bit NF4 quantization for GPU (V4)
 einops>=0.6.0,<1.0.0  # Required for model architecture (V4)
 scipy>=1.10.0,<2.0.0  # Often needed for unquantized models (V4)
-outlines==0.0.44  # JSON schema enforcement for V4 structured summarization (pinned version tested and working)
 # Testing
 pytest>=7.0.0,<8.0.0

 bitsandbytes>=0.44.0  # 4-bit NF4 quantization for GPU (V4)
 einops>=0.6.0,<1.0.0  # Required for model architecture (V4)
 scipy>=1.10.0,<2.0.0  # Often needed for unquantized models (V4)
 # Testing
 pytest>=7.0.0,<8.0.0

tests/test_imports.py CHANGED Viewed

@@ -70,15 +70,6 @@ class TestExternalDependencies:
         except ImportError:
             pytest.skip("torch not available (optional)")
-    def test_outlines_import(self):
-        """Test outlines can be imported."""
-        try:
-            import outlines  # noqa: F401
-            assert True
-        except ImportError:
-            pytest.skip("outlines not available (optional)")
     def test_trafilatura_import(self):
         """Test trafilatura can be imported."""
         try:

         except ImportError:
             pytest.skip("torch not available (optional)")
     def test_trafilatura_import(self):
         """Test trafilatura can be imported."""
         try:

tests/test_v4_api.py CHANGED Viewed

@@ -353,424 +353,3 @@ async def test_v4_sse_headers(client: TestClient):
         assert response.headers["cache-control"] == "no-cache"
         assert response.headers["connection"] == "keep-alive"
         assert "x-request-id" in response.headers
-# ============================================================================
-# Tests for /api/v4/scrape-and-summarize/stream-json endpoint
-# ============================================================================
-def test_v4_stream_json_url_mode_success(client: TestClient):
-    """Test stream-json endpoint with URL input (successful scraping and JSON streaming)."""
-    with patch(
-        "app.services.article_scraper.article_scraper_service.scrape_article"
-    ) as mock_scrape:
-        mock_scrape.return_value = {
-            "text": "Artificial intelligence is transforming modern technology. "
-            "Machine learning algorithms are becoming more sophisticated. "
-            "Deep learning models can now process vast amounts of data efficiently."
-            * 10,
-            "title": "AI Revolution 2024",
-            "author": "Dr. Jane Smith",
-            "date": "2024-11-30",
-            "site_name": "Tech Insights",
-            "url": "https://techinsights.com/ai-2024",
-            "method": "static",
-            "scrape_time_ms": 425.8,
-        }
-        # Mock JSON streaming from Outlines
-        async def mock_json_stream(*args, **kwargs):
-            # Yield raw JSON token fragments (simulating Outlines output)
-            yield '{"title": "'
-            yield "AI Revolution"
-            yield '", "main_summary": "'
-            yield "Artificial intelligence is rapidly evolving"
-            yield '", "key_points": ['
-            yield '"AI is transforming technology"'
-            yield ', "ML algorithms are improving"'
-            yield ', "Deep learning processes data efficiently"'
-            yield '], "category": "'
-            yield "Technology"
-            yield '", "sentiment": "'
-            yield "positive"
-            yield '", "read_time_min": '
-            yield "3"
-            yield "}"
-        with patch(
-            "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-            side_effect=mock_json_stream,
-        ):
-            response = client.post(
-                "/api/v4/scrape-and-summarize/stream-json",
-                json={
-                    "url": "https://techinsights.com/ai-2024",
-                    "style": "executive",
-                    "max_tokens": 512,
-                    "include_metadata": True,
-                },
-            )
-            assert response.status_code == 200
-            assert (
-                response.headers["content-type"] == "text/event-stream; charset=utf-8"
-            )
-            # Parse SSE stream
-            events = []
-            for line in response.text.split("\n"):
-                if line.startswith("data: "):
-                    events.append(line[6:])  # Keep raw data
-            # First event should be metadata JSON
-            metadata_event = json.loads(events[0])
-            assert metadata_event["type"] == "metadata"
-            assert metadata_event["data"]["input_type"] == "url"
-            assert metadata_event["data"]["url"] == "https://techinsights.com/ai-2024"
-            assert metadata_event["data"]["title"] == "AI Revolution 2024"
-            assert metadata_event["data"]["author"] == "Dr. Jane Smith"
-            assert metadata_event["data"]["style"] == "executive"
-            assert "scrape_latency_ms" in metadata_event["data"]
-            # Rest should be raw JSON tokens
-            json_tokens = events[1:]
-            complete_json = "".join(json_tokens)
-            # Verify it's valid JSON
-            parsed_json = json.loads(complete_json)
-            assert parsed_json["title"] == "AI Revolution"
-            assert "AI is transforming technology" in parsed_json["key_points"]
-            assert parsed_json["category"] == "Technology"
-            assert parsed_json["sentiment"] == "positive"
-            assert parsed_json["read_time_min"] == 3
-def test_v4_stream_json_text_mode_success(client: TestClient):
-    """Test stream-json endpoint with direct text input (no scraping)."""
-    test_text = (
-        "Climate change poses significant challenges to global ecosystems. "
-        "Rising temperatures affect weather patterns worldwide. "
-        "Scientists emphasize the need for immediate action."
-    )
-    async def mock_json_stream(*args, **kwargs):
-        yield '{"title": "Climate Change Impact", '
-        yield '"main_summary": "Climate change affects global ecosystems", '
-        yield '"key_points": ["Rising temperatures", "Weather patterns"], '
-        yield '"category": "Environment", '
-        yield '"sentiment": "neutral", '
-        yield '"read_time_min": 1}'
-    with patch(
-        "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-        side_effect=mock_json_stream,
-    ):
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "text": test_text,
-                "style": "skimmer",
-                "max_tokens": 256,
-                "include_metadata": True,
-            },
-        )
-        assert response.status_code == 200
-        # Parse events
-        events = []
-        for line in response.text.split("\n"):
-            if line.startswith("data: "):
-                events.append(line[6:])
-        # Check metadata for text mode
-        metadata_event = json.loads(events[0])
-        assert metadata_event["type"] == "metadata"
-        assert metadata_event["data"]["input_type"] == "text"
-        assert metadata_event["data"]["text_length"] == len(test_text)
-        assert metadata_event["data"]["style"] == "skimmer"
-        assert "url" not in metadata_event["data"]  # URL mode fields not present
-        # Verify JSON output
-        complete_json = "".join(events[1:])
-        parsed_json = json.loads(complete_json)
-        assert parsed_json["title"] == "Climate Change Impact"
-        assert parsed_json["category"] == "Environment"
-def test_v4_stream_json_no_metadata(client: TestClient):
-    """Test stream-json endpoint with include_metadata=false."""
-    async def mock_json_stream(*args, **kwargs):
-        yield '{"title": "Test", '
-        yield '"main_summary": "Summary", '
-        yield '"key_points": ["A"], '
-        yield '"category": "Test", '
-        yield '"sentiment": "neutral", '
-        yield '"read_time_min": 1}'
-    with patch(
-        "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-        side_effect=mock_json_stream,
-    ):
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "text": "Test article content for summary generation with enough characters to pass validation."
-                * 2,
-                "style": "eli5",
-                "include_metadata": False,
-            },
-        )
-        assert response.status_code == 200
-        # Parse events
-        events = []
-        for line in response.text.split("\n"):
-            if line.startswith("data: "):
-                events.append(line[6:])
-        # Should NOT have metadata event (check first event)
-        # Metadata events are complete JSON with "type": "metadata"
-        if events and events[0]:
-            try:
-                first_event = json.loads(events[0])
-                assert first_event.get("type") != "metadata", (
-                    "Metadata should not be included"
-                )
-            except json.JSONDecodeError:
-                # First event is not complete JSON, so it's raw tokens (good!)
-                pass
-        # All events should be JSON tokens that combine to valid JSON
-        complete_json = "".join(events)
-        parsed_json = json.loads(complete_json)
-        assert parsed_json["title"] == "Test"
-def test_v4_stream_json_different_styles(client: TestClient):
-    """Test stream-json endpoint with different summarization styles."""
-    styles_to_test = ["skimmer", "executive", "eli5"]
-    for style in styles_to_test:
-        # Capture loop variable in closure
-        def make_mock_stream(style_name: str):
-            async def mock_json_stream(*args, **kwargs):
-                yield f'{{"title": "{style_name.upper()}", '
-                yield '"main_summary": "Test", '
-                yield '"key_points": ["A"], '
-                yield '"category": "Test", '
-                yield '"sentiment": "positive", '
-                yield '"read_time_min": 1}'
-            return mock_json_stream
-        with patch(
-            "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-            side_effect=make_mock_stream(style),
-        ):
-            response = client.post(
-                "/api/v4/scrape-and-summarize/stream-json",
-                json={
-                    "text": "Test content for different styles with sufficient character count to pass validation requirements."
-                    * 2,
-                    "style": style,
-                    "include_metadata": False,
-                },
-            )
-            assert response.status_code == 200, f"Failed for style: {style}"
-def test_v4_stream_json_custom_max_tokens(client: TestClient):
-    """Test stream-json endpoint with custom max_tokens parameter."""
-    async def mock_json_stream(text, style, max_tokens=None):
-        # Verify max_tokens is passed through
-        assert max_tokens == 1536
-        yield '{"title": "Custom Tokens", '
-        yield '"main_summary": "Test", '
-        yield '"key_points": ["A"], '
-        yield '"category": "Test", '
-        yield '"sentiment": "neutral", '
-        yield '"read_time_min": 1}'
-    with patch(
-        "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-        side_effect=mock_json_stream,
-    ):
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "text": "Test content with custom max tokens that meets minimum character requirements."
-                * 3,
-                "style": "executive",
-                "max_tokens": 1536,
-                "include_metadata": False,
-            },
-        )
-        assert response.status_code == 200
-def test_v4_stream_json_scraping_failure(client: TestClient):
-    """Test stream-json endpoint when article scraping fails."""
-    with patch(
-        "app.services.article_scraper.article_scraper_service.scrape_article"
-    ) as mock_scrape:
-        mock_scrape.side_effect = Exception("Network timeout")
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "url": "https://example.com/unreachable",
-                "style": "executive",
-            },
-        )
-        assert response.status_code == 502
-        assert "detail" in response.json()
-        assert "scrape" in response.json()["detail"].lower()
-def test_v4_stream_json_content_too_short(client: TestClient):
-    """Test stream-json endpoint when scraped content is too short."""
-    with patch(
-        "app.services.article_scraper.article_scraper_service.scrape_article"
-    ) as mock_scrape:
-        mock_scrape.return_value = {
-            "text": "Too short",  # Less than 100 characters
-            "title": "Short Article",
-            "url": "https://example.com/short",
-            "method": "static",
-            "scrape_time_ms": 200.0,
-        }
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "url": "https://example.com/short",
-                "style": "executive",
-            },
-        )
-        assert response.status_code == 422
-        assert "detail" in response.json()
-        assert "insufficient" in response.json()["detail"].lower()
-def test_v4_stream_json_ssrf_protection(client: TestClient):
-    """Test stream-json endpoint blocks SSRF attempts."""
-    ssrf_urls = [
-        "http://localhost/admin",
-        "http://127.0.0.1/secrets",
-        "http://192.168.1.1/internal",
-        "http://10.0.0.1/private",
-    ]
-    for url in ssrf_urls:
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "url": url,
-                "style": "executive",
-            },
-        )
-        assert response.status_code == 422, f"SSRF not blocked for: {url}"
-        # FastAPI validation errors return detail array
-        assert "detail" in response.json()
-def test_v4_stream_json_validation_errors(client: TestClient):
-    """Test stream-json endpoint input validation."""
-    # Missing both url and text
-    response = client.post(
-        "/api/v4/scrape-and-summarize/stream-json",
-        json={"style": "executive"},
-    )
-    assert response.status_code == 422
-    # Both url and text provided
-    response = client.post(
-        "/api/v4/scrape-and-summarize/stream-json",
-        json={
-            "url": "https://example.com",
-            "text": "Some text",
-            "style": "executive",
-        },
-    )
-    assert response.status_code == 422
-    # Text too short
-    response = client.post(
-        "/api/v4/scrape-and-summarize/stream-json",
-        json={
-            "text": "Short",
-            "style": "executive",
-        },
-    )
-    assert response.status_code == 422
-    # Invalid style
-    response = client.post(
-        "/api/v4/scrape-and-summarize/stream-json",
-        json={
-            "text": "Valid length text for testing validation" * 5,
-            "style": "invalid_style",
-        },
-    )
-    assert response.status_code == 422
-def test_v4_stream_json_response_headers(client: TestClient):
-    """Test stream-json endpoint returns correct SSE headers."""
-    async def mock_json_stream(*args, **kwargs):
-        yield '{"title": "Test", "main_summary": "Test", "key_points": [], '
-        yield '"category": "Test", "sentiment": "neutral", "read_time_min": 1}'
-    with patch(
-        "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-        side_effect=mock_json_stream,
-    ):
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "text": "Test content for header validation." * 10,
-                "style": "executive",
-            },
-        )
-        # Verify SSE headers
-        assert response.headers["content-type"] == "text/event-stream; charset=utf-8"
-        assert response.headers["cache-control"] == "no-cache"
-        assert response.headers["connection"] == "keep-alive"
-        assert response.headers["x-accel-buffering"] == "no"
-        assert "x-request-id" in response.headers
-def test_v4_stream_json_request_id_tracking(client: TestClient):
-    """Test stream-json endpoint respects X-Request-ID header."""
-    custom_request_id = "test-request-12345"
-    async def mock_json_stream(*args, **kwargs):
-        yield '{"title": "Test", "main_summary": "Test", "key_points": [], '
-        yield '"category": "Test", "sentiment": "neutral", "read_time_min": 1}'
-    with patch(
-        "app.services.structured_summarizer.structured_summarizer_service.summarize_structured_stream_json",
-        side_effect=mock_json_stream,
-    ):
-        response = client.post(
-            "/api/v4/scrape-and-summarize/stream-json",
-            json={
-                "text": "Test content for request ID tracking." * 10,
-                "style": "executive",
-            },
-            headers={"X-Request-ID": custom_request_id},
-        )
-        assert response.headers["x-request-id"] == custom_request_id

         assert response.headers["cache-control"] == "no-cache"
         assert response.headers["connection"] == "keep-alive"
         assert "x-request-id" in response.headers

tests/test_v4_live.py DELETED Viewed

@@ -1,267 +0,0 @@
-"""
-Live integration tests for V4 Outlines functionality.
-These tests actually exercise the Outlines library (not mocked) to verify
-it's working correctly. They require the Outlines library to be installed
-and will fail if there are API compatibility issues.
-Run with: pytest tests/test_v4_live.py -v
-"""
-import json
-import pytest
-from pydantic import ValidationError
-# Mark all tests in this file as integration tests
-pytestmark = pytest.mark.integration
-def test_outlines_library_imports():
-    """Test that Outlines library can be imported successfully."""
-    try:
-        import outlines
-        from outlines import generate as outlines_generate
-        from outlines import models as outlines_models
-        # Verify key components exist
-        assert outlines is not None
-        assert outlines_models is not None
-        assert outlines_generate is not None
-        assert hasattr(outlines_generate, "json"), (
-            "outlines.generate should have 'json' method"
-        )
-        print("✅ Outlines library imported successfully")
-    except ImportError as e:
-        pytest.fail(f"Failed to import Outlines library: {e}")
-def test_outlines_availability_flag():
-    """Test that the OUTLINES_AVAILABLE flag is set correctly."""
-    from app.services.structured_summarizer import OUTLINES_AVAILABLE
-    assert OUTLINES_AVAILABLE is True, (
-        "OUTLINES_AVAILABLE should be True if Outlines is installed. "
-        "Check app/services/structured_summarizer.py import section."
-    )
-@pytest.mark.asyncio
-async def test_structured_summarizer_initialization():
-    """Test that StructuredSummarizer initializes with Outlines wrapper."""
-    from app.services.structured_summarizer import structured_summarizer_service
-    # Check that the service was initialized
-    assert structured_summarizer_service is not None
-    # Check that Outlines model wrapper was created
-    assert hasattr(structured_summarizer_service, "outlines_model"), (
-        "StructuredSummarizer should have 'outlines_model' attribute"
-    )
-    assert structured_summarizer_service.outlines_model is not None, (
-        "Outlines model wrapper should be initialized. "
-        "Check StructuredSummarizer.__init__() for errors."
-    )
-    print("✅ StructuredSummarizer initialized with Outlines wrapper")
-@pytest.mark.asyncio
-async def test_outlines_json_streaming_basic():
-    """
-    Test that Outlines can generate structured JSON stream.
-    This is a REAL test - no mocking. It will fail if:
-    - Outlines library has API compatibility issues
-    - The model wrapper isn't working
-    - The JSON schema binding fails
-    - The streaming doesn't produce valid JSON
-    """
-    from app.api.v4.schemas import StructuredSummary, SummarizationStyle
-    from app.services.structured_summarizer import structured_summarizer_service
-    # Use a simple test text
-    test_text = (
-        "Artificial intelligence is transforming the technology industry. "
-        "Machine learning models are becoming more powerful and accessible. "
-        "Companies are investing billions in AI research and development."
-    )
-    # Call the actual Outlines-based streaming method
-    json_tokens = []
-    async for token in structured_summarizer_service.summarize_structured_stream_json(
-        text=test_text, style=SummarizationStyle.EXECUTIVE, max_tokens=256
-    ):
-        json_tokens.append(token)
-    # Combine all tokens into complete JSON string
-    complete_json = "".join(json_tokens)
-    print(f"\n📝 Generated JSON ({len(complete_json)} chars):")
-    print(complete_json)
-    # Verify it's valid JSON
-    try:
-        parsed_json = json.loads(complete_json)
-    except json.JSONDecodeError as e:
-        pytest.fail(
-            f"Outlines generated invalid JSON: {e}\n\nGenerated content:\n{complete_json}"
-        )
-    # Verify it matches the StructuredSummary schema
-    try:
-        structured_summary = StructuredSummary(**parsed_json)
-        # Verify required fields are present and non-empty
-        assert structured_summary.title, "title should not be empty"
-        assert structured_summary.main_summary, "main_summary should not be empty"
-        assert structured_summary.key_points, "key_points should not be empty"
-        assert len(structured_summary.key_points) > 0, (
-            "key_points should have at least one item"
-        )
-        assert structured_summary.category, "category should not be empty"
-        assert structured_summary.sentiment in ["positive", "negative", "neutral"], (
-            f"sentiment should be valid enum value, got: {structured_summary.sentiment}"
-        )
-        assert structured_summary.read_time_min > 0, "read_time_min should be positive"
-        print("✅ Outlines generated valid StructuredSummary:")
-        print(f"   Title: {structured_summary.title}")
-        print(f"   Summary: {structured_summary.main_summary[:100]}...")
-        print(f"   Key Points: {len(structured_summary.key_points)} items")
-        print(f"   Category: {structured_summary.category}")
-        print(f"   Sentiment: {structured_summary.sentiment}")
-        print(f"   Read Time: {structured_summary.read_time_min} min")
-    except ValidationError as e:
-        pytest.fail(
-            f"Outlines generated JSON doesn't match StructuredSummary schema: {e}\n\nGenerated JSON:\n{complete_json}"
-        )
-@pytest.mark.asyncio
-async def test_outlines_json_streaming_different_styles():
-    """Test that Outlines works with different summarization styles."""
-    from app.api.v4.schemas import StructuredSummary, SummarizationStyle
-    from app.services.structured_summarizer import structured_summarizer_service
-    test_text = "Climate change is affecting global weather patterns. Scientists warn of rising temperatures."
-    styles_to_test = [
-        SummarizationStyle.SKIMMER,
-        SummarizationStyle.EXECUTIVE,
-        SummarizationStyle.ELI5,
-    ]
-    for style in styles_to_test:
-        json_tokens = []
-        async for (
-            token
-        ) in structured_summarizer_service.summarize_structured_stream_json(
-            text=test_text, style=style, max_tokens=128
-        ):
-            json_tokens.append(token)
-        complete_json = "".join(json_tokens)
-        try:
-            parsed_json = json.loads(complete_json)
-            StructuredSummary(**parsed_json)
-            print(f"✅ Style {style.value}: Generated valid summary")
-        except (json.JSONDecodeError, ValidationError) as e:
-            pytest.fail(
-                f"Failed to generate valid summary for style {style.value}: {e}"
-            )
-@pytest.mark.asyncio
-async def test_outlines_with_longer_text():
-    """Test Outlines with longer text that triggers truncation."""
-    from app.api.v4.schemas import StructuredSummary, SummarizationStyle
-    from app.services.structured_summarizer import structured_summarizer_service
-    # Create a longer text (will be truncated to 10000 chars)
-    test_text = (
-        "The history of artificial intelligence dates back to the 1950s. "
-        "Alan Turing proposed the Turing Test as a measure of machine intelligence. "
-        "In the decades that followed, AI research went through cycles of optimism and setbacks. "
-    ) * 100  # Repeat to make it long
-    json_tokens = []
-    async for token in structured_summarizer_service.summarize_structured_stream_json(
-        text=test_text, style=SummarizationStyle.EXECUTIVE, max_tokens=256
-    ):
-        json_tokens.append(token)
-    complete_json = "".join(json_tokens)
-    try:
-        parsed_json = json.loads(complete_json)
-        StructuredSummary(**parsed_json)
-        print(f"✅ Long text: Generated valid summary from {len(test_text)} chars")
-    except (json.JSONDecodeError, ValidationError) as e:
-        pytest.fail(f"Failed to generate valid summary for long text: {e}")
-@pytest.mark.asyncio
-async def test_outlines_error_handling_when_model_unavailable():
-    """Test that proper error JSON is returned if Outlines model is unavailable."""
-    from app.api.v4.schemas import SummarizationStyle
-    from app.services.structured_summarizer import StructuredSummarizer
-    # Create a StructuredSummarizer instance without initializing the model
-    # This simulates the case where Outlines is unavailable
-    fake_summarizer = StructuredSummarizer.__new__(StructuredSummarizer)
-    fake_summarizer.outlines_model = None  # Simulate unavailable Outlines
-    fake_summarizer.model = None
-    fake_summarizer.tokenizer = None
-    json_tokens = []
-    async for token in fake_summarizer.summarize_structured_stream_json(
-        text="Test text", style=SummarizationStyle.EXECUTIVE, max_tokens=128
-    ):
-        json_tokens.append(token)
-    complete_json = "".join(json_tokens)
-    # Should return error JSON
-    try:
-        parsed_json = json.loads(complete_json)
-        assert "error" in parsed_json, "Error response should contain 'error' field"
-        print(f"✅ Error handling: {parsed_json['error']}")
-    except json.JSONDecodeError as e:
-        pytest.fail(f"Error response is not valid JSON: {e}")
-if __name__ == "__main__":
-    # Allow running this file directly for quick testing
-    import asyncio
-    print("Running Outlines integration tests...\n")
-    # Run synchronous tests
-    print("1. Testing Outlines imports...")
-    test_outlines_library_imports()
-    print("\n2. Testing Outlines availability flag...")
-    test_outlines_availability_flag()
-    # Run async tests
-    print("\n3. Testing StructuredSummarizer initialization...")
-    asyncio.run(test_structured_summarizer_initialization())
-    print("\n4. Testing Outlines JSON streaming (basic)...")
-    asyncio.run(test_outlines_json_streaming_basic())
-    print("\n5. Testing different summarization styles...")
-    asyncio.run(test_outlines_json_streaming_different_styles())
-    print("\n6. Testing with longer text...")
-    asyncio.run(test_outlines_with_longer_text())
-    print("\n7. Testing error handling...")
-    asyncio.run(test_outlines_error_handling_when_model_unavailable())
-    print("\n✅ All Outlines integration tests passed!")