Spaces:
Running
Running
File size: 8,200 Bytes
c900ebc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 | """Sprint S8.7 β couverture des branches HTTP error des adapters
OCR cloud (Azure Document Intelligence + Google Vision REST).
Cible (avant) :
- ``azure_doc_intel.py`` 91% β lignes 305-322 (HTTPError sur POST
initial), 342-343 (Exception sur polling).
- ``google_vision.py`` 88% β lignes 272-288 (HTTPError sur call),
295 (responses vides).
Pourquoi tester ces branches
----------------------------
Les adapters cloud parlent Γ des APIs distantes β un 400 (key
invalide), un 429 (quota), un 500 (panne serveur), une exception
rΓ©seau pendant le polling : tous doivent Γͺtre transformΓ©s en
``OCRAdapterError`` lisible (avec code HTTP + body) plutΓ΄t que de
remonter une ``HTTPError`` brute qui confond le caller du
``CorpusRunner``.
Le mock cible ``urllib.request.urlopen`` (la lib stdlib utilisΓ©e
en mode REST quand le SDK officiel n'est pas installΓ©). Pas de
mock du SDK β on teste explicitement le chemin REST direct.
"""
from __future__ import annotations
import io
import urllib.error
from pathlib import Path
from unittest.mock import patch
import pytest
from picarones.adapters.ocr.base import OCRAdapterError
def _png_bytes() -> bytes:
"""1Γ1 PNG transparent valide."""
import base64
return base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk"
"AAIAAAoAAv/lpgAAAABJRU5ErkJggg=="
)
def _make_image(tmp_path) -> Path:
img = tmp_path / "test.png"
img.write_bytes(_png_bytes())
return img
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Azure Document Intelligence β HTTP errors via REST
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestAzureDocIntelHTTPErrors:
def _adapter(self):
from picarones.adapters.ocr.azure_doc_intel import (
AzureDocIntelAdapter,
)
return AzureDocIntelAdapter(
endpoint="https://fake.azure.com",
api_key="fake-key",
timeout_seconds=1.0,
max_polling_attempts=2,
polling_interval_base=0.01,
)
def test_http_error_with_readable_body_raises_with_code(
self, tmp_path,
) -> None:
"""Un HTTP 401 (Unauthorized) sur le POST initial doit Γͺtre
transformΓ© en ``OCRAdapterError`` qui inclut le code et le
body (utile pour debug : "key invalide" vs "endpoint inconnu").
Couvre lignes 305-320."""
adapter = self._adapter()
image = _make_image(tmp_path)
body = b'{"error": {"code": "Unauthorized"}}'
http_err = urllib.error.HTTPError(
url="https://fake.azure.com/x",
code=401, msg="Unauthorized",
hdrs=None, fp=io.BytesIO(body),
)
with patch(
"urllib.request.urlopen", side_effect=http_err,
):
with pytest.raises(OCRAdapterError) as exc_info:
adapter._call_via_rest(
image, "https://fake.azure.com", "fake-key",
)
msg = str(exc_info.value)
assert "401" in msg
assert "Unauthorized" in msg or "Azure" in msg
def test_http_error_with_unreadable_body_still_raises(
self, tmp_path,
) -> None:
"""Si lire le body de l'HTTPError Γ©choue (fp cassΓ©,
encoding inattendu), on lΓ¨ve quand mΓͺme avec le code seul.
Couvre la branche ``except Exception as read_exc`` lignes
309-316. Code 404 non-retryable pour Γ©viter le backoff
retry (test rapide)."""
adapter = self._adapter()
image = _make_image(tmp_path)
class BrokenFp:
def read(self):
raise UnicodeDecodeError("utf-8", b"", 0, 1, "broken")
def close(self): # __del__ de TemporaryFileCloser appelle close()
pass
http_err = urllib.error.HTTPError(
url="https://fake.azure.com/x",
code=404, msg="Not Found",
hdrs=None, fp=BrokenFp(),
)
with patch(
"urllib.request.urlopen", side_effect=http_err,
):
with pytest.raises(OCRAdapterError, match="404"):
adapter._call_via_rest(
image, "https://fake.azure.com", "fake-key",
)
def test_generic_exception_wrapped_with_type_name(
self, tmp_path,
) -> None:
"""Une exception non-HTTP non-retryable doit Γͺtre
transformΓ©e en ``OCRAdapterError`` avec le type d'origine.
Couvre lignes 321-325. ``ValueError`` non-retryable pour
Γ©viter le backoff."""
adapter = self._adapter()
image = _make_image(tmp_path)
with patch(
"urllib.request.urlopen",
side_effect=ValueError("malformed URL"),
):
with pytest.raises(OCRAdapterError) as exc_info:
adapter._call_via_rest(
image, "https://fake.azure.com", "fake-key",
)
# Le wrapping doit nommer le type d'exception d'origine.
assert "ValueError" in str(exc_info.value)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
# Google Vision β HTTP errors via REST
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
class TestGoogleVisionHTTPErrors:
def _adapter(self):
from picarones.adapters.ocr.google_vision import (
GoogleVisionAdapter,
)
return GoogleVisionAdapter(
api_key="fake-key",
timeout_seconds=1.0,
)
def test_http_error_with_body_raises_with_code(
self, tmp_path,
) -> None:
"""Couvre lignes 272-286."""
adapter = self._adapter()
image = _make_image(tmp_path)
body = b'{"error": {"message": "API key invalid"}}'
http_err = urllib.error.HTTPError(
url="https://vision.googleapis.com/x",
code=403, msg="Forbidden",
hdrs=None, fp=io.BytesIO(body),
)
with patch(
"urllib.request.urlopen", side_effect=http_err,
):
with pytest.raises(OCRAdapterError) as exc_info:
adapter._call_via_rest(image, "fake-key")
msg = str(exc_info.value)
assert "403" in msg
def test_http_error_with_unreadable_body(self, tmp_path) -> None:
"""Couvre la branche ``except Exception as read_exc``
lignes 276-283. Code 404 non-retryable pour Γ©viter le
backoff (test rapide)."""
adapter = self._adapter()
image = _make_image(tmp_path)
class BrokenFp:
def read(self):
raise OSError("disk read error")
def close(self):
pass
http_err = urllib.error.HTTPError(
url="https://vision.googleapis.com/x",
code=404, msg="Not Found",
hdrs=None, fp=BrokenFp(),
)
with patch(
"urllib.request.urlopen", side_effect=http_err,
):
with pytest.raises(OCRAdapterError, match="404"):
adapter._call_via_rest(image, "fake-key")
def test_generic_exception_wrapped(self, tmp_path) -> None:
"""Couvre lignes 287-291. ``ValueError`` non-retryable."""
adapter = self._adapter()
image = _make_image(tmp_path)
with patch(
"urllib.request.urlopen",
side_effect=ValueError("malformed payload"),
):
with pytest.raises(OCRAdapterError) as exc_info:
adapter._call_via_rest(image, "fake-key")
assert "ValueError" in str(exc_info.value)
|