diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d05b0f0a9e02d3688e9610caa73d2a29b4f88ae9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +__pycache__/ +*.pyc +.venv/ + +app/frontend/node_modules/ +app/frontend/dist/ diff --git a/README.md b/README.md index 5c6f48ab46db306045b9e81d006949683a96c5a5..eefd0f62cedbe0861887f94591ebb1ea82592188 100644 --- a/README.md +++ b/README.md @@ -145,6 +145,34 @@ Le moteur fédéré ne doit jamais échouer globalement à cause d’un seul con - `POST /api/resolve-manifest` - `POST /api/import` +### Heuristiques MVP de `/api/import` + +Le connecteur générique `manifest_by_url` applique des heuristiques minimales et explicites : + +1. **Manifest direct** : l’URL est considérée comme manifest si son chemin contient `manifest` + (ou se termine par `manifest.json`). +2. **Notice -> manifest** : si l’URL ne ressemble pas à un manifest, le backend tente des suffixes + courants, dans cet ordre : + - `/manifest` + - `/manifest.json` + - `/iiif/manifest` + - `/iiif/manifest.json` + +Ces heuristiques sont volontairement simples au MVP et seront enrichies par source aux lots +connecteurs réels. + +### Sécurité MVP import URL (validation + SSRF basique) + +`/api/import` applique une validation stricte avant résolution : + +- schémas autorisés : `http`, `https` uniquement ; +- rejet explicite de `localhost`/hôtes locaux ; +- rejet des IP privées/loopback/link-local/réservées/unspecified ; +- rejet des hôtes DNS qui résolvent vers ces plages privées/locales. + +Limite connue MVP : cette protection SSRF reste basique et devra être durcie (allowlist, +résolution DNS contrôlée, protections réseau infra) avant production. + ## Outils MCP prévus - `search_items` @@ -165,11 +193,10 @@ Le moteur fédéré ne doit jamais échouer globalement à cause d’un seul con ### Backend ```bash -cd app/backend python -m venv .venv source .venv/bin/activate -pip install -r requirements.txt -uvicorn app.main:app --reload +pip install -e '.[dev]' +uvicorn app.main:app --app-dir app/backend --reload ``` ### Frontend @@ -180,6 +207,14 @@ npm install npm run dev ``` +Par défaut, le frontend appelle `http://localhost:8000`. + +Optionnel : + +```bash +VITE_API_BASE_URL=http://localhost:8000 npm run dev +``` + ## Variables d’environnement Créer un fichier `.env` à partir de `.env.example`. @@ -210,6 +245,38 @@ docker run -p 8000:8000 universal-iiif-portal - Europeana - connecteur générique `manifest-by-url` +## Connecteur Gallica (lot 5) + +### Hypothèses de mapping `NormalizedItem` + +- `source_item_id` : ARK extrait des identifiants Gallica (`ark:/...`) ; +- `id` global : `gallica:{source_item_id}` ; +- `title` : premier champ `dc:title` disponible ; +- `creators` : liste des `dc:creator` ; +- `date_display` : premier `dc:date` ; +- `object_type` : dérivé de `dc:type` via mapping simple (`manuscript`, `book`, `map`, `image`, `newspaper`, `other`) ; +- `record_url` : premier `dc:identifier` ; +- `manifest_url` : construit depuis l’ARK (`https://gallica.bnf.fr/iiif/{ark}/manifest.json`) ; +- `institution` : `Bibliothèque nationale de France`. + +### Stratégie de résolution de manifest + +1. si `item.manifest_url` est déjà présent, il est renvoyé ; +2. sinon, extraction d’un ARK depuis `record_url` (ou URL fournie) ; +3. construction déterministe de l’URL IIIF manifest Gallica. + +### Robustesse / mode fallback + +- Le connecteur tente un mode live SRU Gallica ; +- pour éviter de casser la suite en environnement instable, un mode fixtures est disponible (`CLAFOUTIS_GALLICA_USE_FIXTURES=true` au MVP, valeur par défaut) ; +- en cas d’échec live, le connecteur renvoie un succès dégradé avec données fixtures et `partial_failures` explicite. + +### Limites connues (MVP) + +- le parsing SRU est volontairement minimal et basé sur un sous-ensemble Dublin Core ; +- certains champs Gallica restent absents/incertains selon les notices ; +- la détection fine des types documentaires sera améliorée aux lots suivants. + ## Principes de développement - code modulaire ; diff --git a/app/backend/app/__init__.py b/app/backend/app/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/api/__init__.py b/app/backend/app/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/api/dependencies.py b/app/backend/app/api/dependencies.py new file mode 100644 index 0000000000000000000000000000000000000000..94f6ec778cf8379c6941663dcd6439a587288925 --- /dev/null +++ b/app/backend/app/api/dependencies.py @@ -0,0 +1,54 @@ +"""Dependency providers for API routes.""" + +from functools import lru_cache + +from app.connectors.gallica import GallicaConnector +from app.connectors.manifest_by_url_connector import ManifestByUrlConnector +from app.connectors.mock_connector import MockConnector +from app.connectors.registry import ConnectorRegistry +from app.services.import_service import ImportService +from app.services.item_service import ItemService +from app.services.manifest_resolver import ManifestResolver +from app.services.search_orchestrator import SearchOrchestrator +from app.services.source_service import SourceService + + +@lru_cache(maxsize=1) +def get_registry() -> ConnectorRegistry: + """Create and cache connector registry with MVP connectors.""" + + registry = ConnectorRegistry() + registry.register(MockConnector()) + registry.register(GallicaConnector()) + registry.register(ManifestByUrlConnector()) + return registry + + +def get_search_orchestrator() -> SearchOrchestrator: + """Return orchestrator instance wired with connector registry.""" + + return SearchOrchestrator(get_registry()) + + +def get_source_service() -> SourceService: + """Return source service instance.""" + + return SourceService(get_registry()) + + +def get_item_service() -> ItemService: + """Return item service instance.""" + + return ItemService(get_registry()) + + +def get_manifest_resolver() -> ManifestResolver: + """Return manifest resolver instance.""" + + return ManifestResolver(get_registry()) + + +def get_import_service() -> ImportService: + """Return import service instance.""" + + return ImportService(get_registry()) diff --git a/app/backend/app/api/health.py b/app/backend/app/api/health.py new file mode 100644 index 0000000000000000000000000000000000000000..c96aa5ae77ba5880ff70d5e99f7d98a14d59c172 --- /dev/null +++ b/app/backend/app/api/health.py @@ -0,0 +1,12 @@ +"""Health endpoint.""" + +from fastapi import APIRouter + +router = APIRouter(tags=["health"]) + + +@router.get("/health") +async def health() -> dict[str, str]: + """Return backend health status.""" + + return {"status": "ok"} diff --git a/app/backend/app/api/import_.py b/app/backend/app/api/import_.py new file mode 100644 index 0000000000000000000000000000000000000000..27eeece0b6e757c4bfdcf6fb2445be5d076685de --- /dev/null +++ b/app/backend/app/api/import_.py @@ -0,0 +1,19 @@ +"""Import endpoint for notice or manifest URLs.""" + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_import_service +from app.models.import_models import ImportRequest, ImportResponse +from app.services.import_service import ImportService + +router = APIRouter(tags=["import"]) + + +@router.post("/import", response_model=ImportResponse) +async def import_item( + payload: ImportRequest, + service: ImportService = Depends(get_import_service), +) -> ImportResponse: + """Import an external URL and attempt to resolve source and manifest.""" + + return await service.import_url(payload.url) diff --git a/app/backend/app/api/items.py b/app/backend/app/api/items.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3ac1f77d6b400b0665992b07ebeb3a0486cb9d --- /dev/null +++ b/app/backend/app/api/items.py @@ -0,0 +1,19 @@ +"""Item detail endpoint.""" + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_item_service +from app.models.normalized_item import NormalizedItem +from app.services.item_service import ItemService + +router = APIRouter(tags=["items"]) + + +@router.get("/item/{global_id}", response_model=NormalizedItem) +async def get_item( + global_id: str, + service: ItemService = Depends(get_item_service), +) -> NormalizedItem: + """Return a normalized item by global identifier.""" + + return await service.get_item(global_id) diff --git a/app/backend/app/api/manifest.py b/app/backend/app/api/manifest.py new file mode 100644 index 0000000000000000000000000000000000000000..5206fd9489c34c959052eef86f1c277129d707f3 --- /dev/null +++ b/app/backend/app/api/manifest.py @@ -0,0 +1,19 @@ +"""Manifest resolution endpoint.""" + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_manifest_resolver +from app.models.manifest_models import ResolveManifestRequest, ResolveManifestResponse +from app.services.manifest_resolver import ManifestResolver + +router = APIRouter(tags=["manifest"]) + + +@router.post("/resolve-manifest", response_model=ResolveManifestResponse) +async def resolve_manifest( + payload: ResolveManifestRequest, + resolver: ManifestResolver = Depends(get_manifest_resolver), +) -> ResolveManifestResponse: + """Resolve manifest URL for a source item.""" + + return await resolver.resolve(payload) diff --git a/app/backend/app/api/router.py b/app/backend/app/api/router.py new file mode 100644 index 0000000000000000000000000000000000000000..f02e57843a28ac12ec707e8b74a3173c68e5f686 --- /dev/null +++ b/app/backend/app/api/router.py @@ -0,0 +1,13 @@ +"""Top-level API router.""" + +from fastapi import APIRouter + +from app.api import health, import_, items, manifest, search, sources + +api_router = APIRouter(prefix="/api") +api_router.include_router(health.router) +api_router.include_router(sources.router) +api_router.include_router(search.router) +api_router.include_router(items.router) +api_router.include_router(manifest.router) +api_router.include_router(import_.router) diff --git a/app/backend/app/api/search.py b/app/backend/app/api/search.py new file mode 100644 index 0000000000000000000000000000000000000000..aa3e0ea8c95bd1499957658b929290354347ae0f --- /dev/null +++ b/app/backend/app/api/search.py @@ -0,0 +1,19 @@ +"""Search endpoint.""" + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_search_orchestrator +from app.models.search_models import SearchRequest, SearchResponse +from app.services.search_orchestrator import SearchOrchestrator + +router = APIRouter(tags=["search"]) + + +@router.post("/search", response_model=SearchResponse) +async def search_items( + payload: SearchRequest, + orchestrator: SearchOrchestrator = Depends(get_search_orchestrator), +) -> SearchResponse: + """Run federated search and return normalized results.""" + + return await orchestrator.search(payload) diff --git a/app/backend/app/api/sources.py b/app/backend/app/api/sources.py new file mode 100644 index 0000000000000000000000000000000000000000..5b29678d591bee1867d1068129092b747d8233e5 --- /dev/null +++ b/app/backend/app/api/sources.py @@ -0,0 +1,16 @@ +"""Source listing endpoint.""" + +from fastapi import APIRouter, Depends + +from app.api.dependencies import get_source_service +from app.models.source_models import SourcesResponse +from app.services.source_service import SourceService + +router = APIRouter(tags=["sources"]) + + +@router.get("/sources", response_model=SourcesResponse) +async def list_sources(service: SourceService = Depends(get_source_service)) -> SourcesResponse: + """List registered sources and capabilities.""" + + return await service.list_sources() diff --git a/app/backend/app/config/__init__.py b/app/backend/app/config/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/config/settings.py b/app/backend/app/config/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..e4568be2e3b80bd867b11a8ded135062a9cdc009 --- /dev/null +++ b/app/backend/app/config/settings.py @@ -0,0 +1,21 @@ +"""Application settings loaded from environment variables.""" + +from pydantic import Field +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """Runtime settings for the backend application.""" + + app_name: str = "Clafoutis Backend" + app_version: str = "0.1.0" + debug: bool = False + request_timeout_seconds: float = Field(default=8.0, gt=0) + cors_allow_origins: list[str] = Field(default_factory=lambda: ["http://localhost:5173"]) + gallica_sru_base_url: str = "https://gallica.bnf.fr/SRU" + gallica_use_fixtures: bool = True + + model_config = SettingsConfigDict(env_prefix="CLAFOUTIS_", extra="ignore") + + +settings = Settings() diff --git a/app/backend/app/connectors/__init__.py b/app/backend/app/connectors/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/connectors/base.py b/app/backend/app/connectors/base.py new file mode 100644 index 0000000000000000000000000000000000000000..48f1525009265bee4ab0f1031a7c86033574a8ed --- /dev/null +++ b/app/backend/app/connectors/base.py @@ -0,0 +1,47 @@ +"""Abstract connector interface for all external sources.""" + +from __future__ import annotations + +from abc import ABC, abstractmethod + +from app.models.normalized_item import NormalizedItem +from app.models.search_models import SearchResponse +from app.models.source_models import SourceCapabilities + + +class BaseConnector(ABC): + """Common contract implemented by every source connector.""" + + name: str + label: str + source_type: str + + @abstractmethod + async def search( + self, + query: str, + filters: dict[str, object], + page: int, + page_size: int, + ) -> SearchResponse: + """Execute source search and return normalized results.""" + + @abstractmethod + async def get_item(self, source_item_id: str) -> NormalizedItem | None: + """Get a single normalized item by source-specific identifier.""" + + @abstractmethod + async def resolve_manifest( + self, + item: NormalizedItem | None = None, + record_url: str | None = None, + ) -> str | None: + """Resolve a IIIF manifest URL from item metadata or record URL.""" + + @abstractmethod + async def healthcheck(self) -> dict[str, str]: + """Check connector health and return a compact status report.""" + + @abstractmethod + async def capabilities(self) -> SourceCapabilities: + """Declare static connector capabilities.""" diff --git a/app/backend/app/connectors/gallica/__init__.py b/app/backend/app/connectors/gallica/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..369cf45d800c807626b8b1e8511fa56e514471fc --- /dev/null +++ b/app/backend/app/connectors/gallica/__init__.py @@ -0,0 +1,5 @@ +"""Gallica connector package.""" + +from .connector import GallicaConnector + +__all__ = ["GallicaConnector"] diff --git a/app/backend/app/connectors/gallica/connector.py b/app/backend/app/connectors/gallica/connector.py new file mode 100644 index 0000000000000000000000000000000000000000..47cf89caac86707b7cae9d8e58f82ff1d360a22a --- /dev/null +++ b/app/backend/app/connectors/gallica/connector.py @@ -0,0 +1,264 @@ +"""Gallica connector implementation.""" + +from __future__ import annotations + +import time +import xml.etree.ElementTree as ET +from urllib.parse import quote_plus + +from app.config.settings import settings +from app.connectors.base import BaseConnector +from app.connectors.gallica.fixtures import FIXTURE_GALLICA_RECORDS +from app.models.normalized_item import NormalizedItem +from app.models.search_models import PartialFailure, SearchResponse +from app.models.source_models import SourceCapabilities +from app.utils.http_client import build_async_client +from app.utils.ids import make_global_id + + +class GallicaConnector(BaseConnector): + """Gallica/BnF connector with live SRU mode and deterministic fixture fallback.""" + + name = "gallica" + label = "Gallica / BnF" + source_type = "institution" + + async def search( + self, + query: str, + filters: dict[str, object], + page: int, + page_size: int, + ) -> SearchResponse: + """Search Gallica through SRU and map records to NormalizedItem.""" + + start = time.perf_counter() + try: + records = await self._fetch_search_records(query=query, page=page, page_size=page_size) + items = [self._map_record(record, index) for index, record in enumerate(records)] + partial = [PartialFailure(source=self.name, status="ok")] + except Exception as exc: + records = self._search_fixtures(query) + items = [self._map_fixture_record(record, index) for index, record in enumerate(records)] + partial = [ + PartialFailure( + source=self.name, + status="degraded", + error=f"live_gallica_unavailable: {exc}", + ) + ] + + start_index = (page - 1) * page_size + page_items = items[start_index : start_index + page_size] + + return SearchResponse( + query=query, + page=page, + page_size=page_size, + total_estimated=len(items), + results=page_items, + sources_used=[self.name], + partial_failures=partial, + duration_ms=int((time.perf_counter() - start) * 1000), + ) + + async def get_item(self, source_item_id: str) -> NormalizedItem | None: + """Retrieve one Gallica item by ARK, using live mode then fixtures fallback.""" + + try: + records = await self._fetch_search_records( + query=f'ark all "{source_item_id}"', + page=1, + page_size=1, + raw_query=True, + ) + if records: + return self._map_record(records[0], 0) + except Exception: + pass + + for fixture in FIXTURE_GALLICA_RECORDS: + if fixture["source_item_id"] == source_item_id: + return self._map_fixture_record(fixture, 0) + return None + + async def resolve_manifest( + self, + item: NormalizedItem | None = None, + record_url: str | None = None, + ) -> str | None: + """Resolve Gallica IIIF manifest from normalized item or record URL.""" + + if item and item.manifest_url: + return item.manifest_url + + source = record_url or (item.record_url if item else None) + if not source: + return None + + ark = self._extract_ark(source) + if not ark: + return None + + return self._manifest_from_ark(ark) + + async def healthcheck(self) -> dict[str, str]: + """Run lightweight Gallica availability check.""" + + if settings.gallica_use_fixtures: + return {"status": "ok", "mode": "fixtures"} + + params_query = quote_plus('dc.title all "dante"') + url = ( + f"{settings.gallica_sru_base_url}?version=1.2&operation=searchRetrieve" + f"&query={params_query}&maximumRecords=1" + ) + + async with await build_async_client() as client: + response = await client.get(url) + if response.status_code >= 400: + return {"status": "error", "mode": "live"} + return {"status": "ok", "mode": "live"} + + async def capabilities(self) -> SourceCapabilities: + """Return Gallica connector capabilities.""" + + return SourceCapabilities(search=True, get_item=True, resolve_manifest=True) + + async def _fetch_search_records( + self, + query: str, + page: int, + page_size: int, + raw_query: bool = False, + ) -> list[ET.Element]: + if settings.gallica_use_fixtures: + raise RuntimeError("fixtures mode enabled") + + sru_query = query if raw_query else f'dc.title all "{query}"' + start_record = ((page - 1) * page_size) + 1 + encoded_query = quote_plus(sru_query) + + url = ( + f"{settings.gallica_sru_base_url}?version=1.2&operation=searchRetrieve" + f"&query={encoded_query}&startRecord={start_record}&maximumRecords={page_size}" + ) + + async with await build_async_client() as client: + response = await client.get(url) + response.raise_for_status() + + root = ET.fromstring(response.text) + return [record for record in root.iter() if record.tag.endswith("record")] + + def _map_record(self, record: ET.Element, index: int) -> NormalizedItem: + dc_values = self._extract_dc_values(record) + + source_item_id = self._extract_ark(dc_values.get("identifier", [])) + if not source_item_id: + source_item_id = f"gallica-record-{index}" + + title = self._first(dc_values.get("title", []), default="Document Gallica") + creators = dc_values.get("creator", []) + date_display = self._first(dc_values.get("date", []), default=None) + object_type = self._map_object_type(self._first(dc_values.get("type", []), default="other")) + record_url = self._first(dc_values.get("identifier", []), default=None) + manifest_url = self._manifest_from_ark(source_item_id) if source_item_id.startswith("ark:/") else None + + warnings: list[str] = [] + if not creators: + warnings.append("missing_creators") + if record_url is None: + warnings.append("missing_record_url") + + return NormalizedItem( + id=make_global_id(self.name, source_item_id), + source=self.name, + source_label=self.label, + source_item_id=source_item_id, + title=title, + creators=creators, + date_display=date_display, + object_type=object_type, + institution="Bibliothèque nationale de France", + thumbnail_url=None, + record_url=record_url, + manifest_url=manifest_url, + has_iiif_manifest=manifest_url is not None, + has_images=True, + has_ocr=False, + availability="public", + relevance_score=max(0.0, 1.0 - (index * 0.01)), + normalization_warnings=warnings, + ) + + def _map_fixture_record(self, fixture: dict[str, object], index: int) -> NormalizedItem: + source_item_id = str(fixture["source_item_id"]) + manifest_url = self._manifest_from_ark(source_item_id) + return NormalizedItem( + id=make_global_id(self.name, source_item_id), + source=self.name, + source_label=self.label, + source_item_id=source_item_id, + title=str(fixture["title"]), + creators=[str(value) for value in fixture.get("creators", [])], + date_display=str(fixture.get("date_display")) if fixture.get("date_display") else None, + object_type=str(fixture.get("object_type", "other")), + institution=str(fixture.get("institution")) if fixture.get("institution") else None, + thumbnail_url=str(fixture.get("thumbnail_url")) if fixture.get("thumbnail_url") else None, + record_url=str(fixture.get("record_url")) if fixture.get("record_url") else None, + manifest_url=manifest_url, + has_iiif_manifest=True, + has_images=True, + has_ocr=False, + availability="public", + relevance_score=max(0.0, 1.0 - (index * 0.01)), + normalization_warnings=["fixture_mode"], + ) + + def _extract_dc_values(self, record: ET.Element) -> dict[str, list[str]]: + values: dict[str, list[str]] = {} + for node in record.iter(): + if not node.tag.startswith("{"): + continue + local_name = node.tag.split("}", maxsplit=1)[1] + if local_name in {"title", "creator", "date", "identifier", "type"} and node.text: + values.setdefault(local_name, []).append(node.text.strip()) + return values + + def _extract_ark(self, identifiers: list[str] | str) -> str | None: + values = [identifiers] if isinstance(identifiers, str) else identifiers + for value in values: + if "ark:/" not in value: + continue + ark = value[value.index("ark:/") :] + return ark.split("?")[0].rstrip("/") + return None + + def _manifest_from_ark(self, ark: str) -> str: + return f"https://gallica.bnf.fr/iiif/{ark}/manifest.json" + + def _search_fixtures(self, query: str) -> list[dict[str, object]]: + lowered = query.lower().strip() + return [ + record + for record in FIXTURE_GALLICA_RECORDS + if lowered in str(record["title"]).lower() or lowered in " ".join(record.get("creators", [])).lower() + ] + + def _map_object_type(self, raw_type: str) -> str: + lowered = raw_type.lower() + if "manus" in lowered: + return "manuscript" + if "book" in lowered or "livre" in lowered: + return "book" + if "map" in lowered or "carte" in lowered: + return "map" + if "image" in lowered or "estampe" in lowered: + return "image" + if "journal" in lowered or "newspaper" in lowered: + return "newspaper" + return "other" + + def _first(self, values: list[str], default: str | None) -> str | None: + return values[0] if values else default diff --git a/app/backend/app/connectors/gallica/fixtures.py b/app/backend/app/connectors/gallica/fixtures.py new file mode 100644 index 0000000000000000000000000000000000000000..9b565e1d65e7dd668ee005625a67b9c76473f24a --- /dev/null +++ b/app/backend/app/connectors/gallica/fixtures.py @@ -0,0 +1,24 @@ +"""Static fixture records for Gallica connector fallback mode.""" + +FIXTURE_GALLICA_RECORDS: list[dict[str, object]] = [ + { + "source_item_id": "ark:/12148/btv1b55002481n", + "title": "Livre d'heures à l'usage de Rome", + "creators": ["Anonyme"], + "date_display": "XVe siècle", + "object_type": "manuscript", + "institution": "Bibliothèque nationale de France", + "record_url": "https://gallica.bnf.fr/ark:/12148/btv1b55002481n", + "thumbnail_url": "https://gallica.bnf.fr/ark:/12148/btv1b55002481n.thumbnail", + }, + { + "source_item_id": "ark:/12148/bpt6k1512248m", + "title": "La Divine Comédie de Dante Alighieri", + "creators": ["Dante Alighieri"], + "date_display": "1898", + "object_type": "book", + "institution": "Bibliothèque nationale de France", + "record_url": "https://gallica.bnf.fr/ark:/12148/bpt6k1512248m", + "thumbnail_url": "https://gallica.bnf.fr/ark:/12148/bpt6k1512248m.thumbnail", + }, +] diff --git a/app/backend/app/connectors/manifest_by_url_connector.py b/app/backend/app/connectors/manifest_by_url_connector.py new file mode 100644 index 0000000000000000000000000000000000000000..8229b8d1986534c639e71d83a05f3c037525a90a --- /dev/null +++ b/app/backend/app/connectors/manifest_by_url_connector.py @@ -0,0 +1,105 @@ +"""Generic connector resolving IIIF manifests from arbitrary URLs.""" + +from __future__ import annotations + +from urllib.parse import urlparse + +from app.connectors.base import BaseConnector +from app.models.normalized_item import NormalizedItem +from app.models.search_models import PartialFailure, SearchResponse +from app.models.source_models import SourceCapabilities + + +class ManifestByUrlConnector(BaseConnector): + """Connector dedicated to URL import and generic manifest detection heuristics.""" + + name = "manifest_by_url" + label = "Manifest by URL" + source_type = "generic" + + async def search( + self, + query: str, + filters: dict[str, object], + page: int, + page_size: int, + ) -> SearchResponse: + """Return empty search results because this connector is import-only.""" + + return SearchResponse( + query=query, + page=page, + page_size=page_size, + total_estimated=0, + results=[], + sources_used=[self.name], + partial_failures=[PartialFailure(source=self.name, status="ok")], + duration_ms=1, + ) + + async def get_item(self, source_item_id: str) -> NormalizedItem | None: + """Return no item because this connector does not expose source IDs.""" + + return None + + async def resolve_manifest( + self, + item: NormalizedItem | None = None, + record_url: str | None = None, + ) -> str | None: + """Resolve manifest URL by direct detection or lightweight notice heuristics.""" + + candidate_url = record_url or (item.record_url if item else None) + if not candidate_url: + return None + + # Heuristic 1: direct manifest URL patterns + if self._looks_like_manifest_url(candidate_url): + return candidate_url + + # Heuristic 2: common notice -> manifest patterns + generated_candidates = self._notice_to_manifest_candidates(candidate_url) + for candidate in generated_candidates: + if self._looks_like_manifest_url(candidate): + return candidate + + return None + + async def healthcheck(self) -> dict[str, str]: + """Return healthy status for the local heuristic connector.""" + + return {"status": "ok"} + + async def capabilities(self) -> SourceCapabilities: + """Declare capabilities for this connector.""" + + return SourceCapabilities(search=False, get_item=False, resolve_manifest=True) + + def _looks_like_manifest_url(self, url: str) -> bool: + parsed = urlparse(url) + lowered_path = parsed.path.lower() + lowered_query = parsed.query.lower() + return ( + "manifest" in lowered_path + or lowered_path.endswith("manifest.json") + or lowered_query.startswith("manifest=") + or "iiif_manifest" in lowered_query + ) + + def _notice_to_manifest_candidates(self, url: str) -> list[str]: + parsed = urlparse(url) + clean_path = parsed.path.rstrip("/") + + suffixes = [ + "/manifest", + "/manifest.json", + "/iiif/manifest", + "/iiif/manifest.json", + ] + + candidates: list[str] = [] + for suffix in suffixes: + candidate = parsed._replace(path=f"{clean_path}{suffix}", query="").geturl() + candidates.append(candidate) + + return candidates diff --git a/app/backend/app/connectors/mock_connector.py b/app/backend/app/connectors/mock_connector.py new file mode 100644 index 0000000000000000000000000000000000000000..239804ce3d420d87a9bfed2b0daeba4c8628be64 --- /dev/null +++ b/app/backend/app/connectors/mock_connector.py @@ -0,0 +1,109 @@ +"""Mock connector used by backend lot 1 to provide stable demo data.""" + +from app.connectors.base import BaseConnector +from app.models.normalized_item import NormalizedItem +from app.models.search_models import PartialFailure, SearchResponse +from app.models.source_models import SourceCapabilities +from app.utils.ids import make_global_id + + +class MockConnector(BaseConnector): + """Simple in-memory connector implementing BaseConnector contract.""" + + name = "mock" + label = "Mock Heritage Source" + source_type = "mock" + + def __init__(self) -> None: + self._items = { + "ms-1": NormalizedItem( + id=make_global_id("mock", "ms-1"), + source="mock", + source_label=self.label, + source_item_id="ms-1", + title="Book of Hours (Mock)", + creators=["Unknown"], + institution="Mock Institution", + object_type="manuscript", + record_url="https://mock.example.org/items/ms-1", + manifest_url="https://mock.example.org/iiif/ms-1/manifest", + has_iiif_manifest=True, + has_images=True, + has_ocr=False, + availability="public", + relevance_score=0.9, + ), + "ms-2": NormalizedItem( + id=make_global_id("mock", "ms-2"), + source="mock", + source_label=self.label, + source_item_id="ms-2", + title="Dante Manuscript (Mock)", + creators=["Anonymous"], + institution="Mock Institution", + object_type="manuscript", + record_url="https://mock.example.org/items/ms-2", + manifest_url=None, + has_iiif_manifest=False, + has_images=True, + has_ocr=True, + availability="public", + relevance_score=0.8, + ), + } + + async def search( + self, + query: str, + filters: dict[str, object], + page: int, + page_size: int, + ) -> SearchResponse: + """Return normalized in-memory results filtered by query substring.""" + + lowered = query.lower().strip() + filtered = [item for item in self._items.values() if lowered in item.title.lower()] + start = (page - 1) * page_size + end = start + page_size + page_items = filtered[start:end] + return SearchResponse( + query=query, + page=page, + page_size=page_size, + total_estimated=len(filtered), + results=page_items, + sources_used=[self.name], + partial_failures=[PartialFailure(source=self.name, status="ok")], + duration_ms=1, + ) + + async def get_item(self, source_item_id: str) -> NormalizedItem | None: + """Return normalized item when available.""" + + return self._items.get(source_item_id) + + async def resolve_manifest( + self, + item: NormalizedItem | None = None, + record_url: str | None = None, + ) -> str | None: + """Resolve manifest URL from provided item or known record URL.""" + + if item is not None and item.manifest_url: + return item.manifest_url + + if record_url: + for candidate in self._items.values(): + if candidate.record_url == record_url: + return candidate.manifest_url + return None + + async def healthcheck(self) -> dict[str, str]: + """Return static healthy status for demonstration connector.""" + + return {"status": "ok"} + + async def capabilities(self) -> SourceCapabilities: + """Return static capabilities for the mock connector.""" + + return SourceCapabilities(search=True, get_item=True, resolve_manifest=True) diff --git a/app/backend/app/connectors/registry.py b/app/backend/app/connectors/registry.py new file mode 100644 index 0000000000000000000000000000000000000000..2760307212461f28ce8906061cda27ac3e710c5e --- /dev/null +++ b/app/backend/app/connectors/registry.py @@ -0,0 +1,35 @@ +"""Registry storing available connectors.""" + +from app.connectors.base import BaseConnector + + +class ConnectorRegistry: + """In-memory registry for connector instances.""" + + def __init__(self) -> None: + self._connectors: dict[str, BaseConnector] = {} + + def register(self, connector: BaseConnector) -> None: + """Register a connector instance by unique connector name.""" + + self._connectors[connector.name] = connector + + def list_names(self) -> list[str]: + """Return sorted connector names.""" + + return sorted(self._connectors.keys()) + + def get(self, name: str) -> BaseConnector: + """Return connector instance for the provided name.""" + + return self._connectors[name] + + def has(self, name: str) -> bool: + """Return whether a connector with the given name is registered.""" + + return name in self._connectors + + def list_connectors(self) -> list[BaseConnector]: + """Return registered connector instances.""" + + return [self._connectors[name] for name in self.list_names()] diff --git a/app/backend/app/main.py b/app/backend/app/main.py new file mode 100644 index 0000000000000000000000000000000000000000..957337c9b54d46b381c8d90d2642806628480036 --- /dev/null +++ b/app/backend/app/main.py @@ -0,0 +1,44 @@ +"""FastAPI application entrypoint for backend lot 1.""" + +from fastapi import FastAPI, Request +from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse + +from app.api.router import api_router +from app.config.settings import settings +from app.models.error_models import ErrorResponse +from app.utils.errors import AppError, BadRequestError, NotFoundError + + +def create_app() -> FastAPI: + """Create and configure FastAPI application.""" + + application = FastAPI(title=settings.app_name, version=settings.app_version, debug=settings.debug) + application.add_middleware( + CORSMiddleware, + allow_origins=settings.cors_allow_origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + application.include_router(api_router) + + @application.exception_handler(BadRequestError) + async def handle_bad_request(_: Request, exc: BadRequestError) -> JSONResponse: + payload = ErrorResponse(error="bad_request", details=str(exc)).model_dump() + return JSONResponse(status_code=400, content=payload) + + @application.exception_handler(NotFoundError) + async def handle_not_found(_: Request, exc: NotFoundError) -> JSONResponse: + payload = ErrorResponse(error="not_found", details=str(exc)).model_dump() + return JSONResponse(status_code=404, content=payload) + + @application.exception_handler(AppError) + async def handle_app_error(_: Request, exc: AppError) -> JSONResponse: + payload = ErrorResponse(error="application_error", details=str(exc)).model_dump() + return JSONResponse(status_code=500, content=payload) + + return application + + +app = create_app() diff --git a/app/backend/app/models/__init__.py b/app/backend/app/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/models/error_models.py b/app/backend/app/models/error_models.py new file mode 100644 index 0000000000000000000000000000000000000000..de7901c712ace492f16ede39a3e395f299e28c88 --- /dev/null +++ b/app/backend/app/models/error_models.py @@ -0,0 +1,10 @@ +"""Error payload model for consistent API error responses.""" + +from pydantic import BaseModel + + +class ErrorResponse(BaseModel): + """Structured error payload returned by exception handlers.""" + + error: str + details: str | None = None diff --git a/app/backend/app/models/import_models.py b/app/backend/app/models/import_models.py new file mode 100644 index 0000000000000000000000000000000000000000..2a2f4ea801b9bf313f5c8330b88a2bffd3508b8a --- /dev/null +++ b/app/backend/app/models/import_models.py @@ -0,0 +1,22 @@ +"""Models for import-by-URL endpoint.""" + +from pydantic import BaseModel, ConfigDict, Field + +from app.models.normalized_item import NormalizedItem + + +class ImportRequest(BaseModel): + """Input payload for item import using a URL.""" + + model_config = ConfigDict(extra="forbid") + + url: str = Field(min_length=1) + + +class ImportResponse(BaseModel): + """Result of import URL analysis and manifest resolution.""" + + detected_source: str | None = None + record_url: str | None = None + manifest_url: str | None = None + item: NormalizedItem | None = None diff --git a/app/backend/app/models/manifest_models.py b/app/backend/app/models/manifest_models.py new file mode 100644 index 0000000000000000000000000000000000000000..1f3214729d89730b9d0c75e9ef80845027091356 --- /dev/null +++ b/app/backend/app/models/manifest_models.py @@ -0,0 +1,21 @@ +"""Models for manifest resolution operations.""" + +from pydantic import BaseModel, ConfigDict, Field + + +class ResolveManifestRequest(BaseModel): + """Input payload for manifest resolution.""" + + model_config = ConfigDict(extra="forbid") + + source: str = Field(min_length=1) + source_item_id: str = Field(min_length=1) + record_url: str | None = None + + +class ResolveManifestResponse(BaseModel): + """Output payload for manifest resolution.""" + + manifest_url: str | None = None + status: str + method: str | None = None diff --git a/app/backend/app/models/normalized_item.py b/app/backend/app/models/normalized_item.py new file mode 100644 index 0000000000000000000000000000000000000000..2ea63d86ae99ba3fba4510bc9b1ad02dff9e1db2 --- /dev/null +++ b/app/backend/app/models/normalized_item.py @@ -0,0 +1,37 @@ +"""Normalized item model shared by all connectors and APIs.""" + +from pydantic import BaseModel, ConfigDict, Field, model_validator + + +class NormalizedItem(BaseModel): + """Normalized representation of an item from any source.""" + + model_config = ConfigDict(extra="forbid") + + id: str = Field(description="Global identifier in format source:source_item_id.") + source: str + source_label: str + source_item_id: str + title: str + creators: list[str] = Field(default_factory=list) + date_display: str | None = None + object_type: str = "other" + institution: str | None = None + thumbnail_url: str | None = None + record_url: str | None = None + manifest_url: str | None = None + has_iiif_manifest: bool = False + has_images: bool = False + has_ocr: bool = False + availability: str = "unknown" + relevance_score: float = 0.0 + normalization_warnings: list[str] = Field(default_factory=list) + + @model_validator(mode="after") + def validate_global_id(self) -> "NormalizedItem": + """Ensure `id` follows the stable MVP rule `source:source_item_id`.""" + + expected_id = f"{self.source}:{self.source_item_id}" + if self.id != expected_id: + raise ValueError("id must match source:source_item_id") + return self diff --git a/app/backend/app/models/search_models.py b/app/backend/app/models/search_models.py new file mode 100644 index 0000000000000000000000000000000000000000..ffaa4876671b0ed3a94c9580a6737c22d3a3a9a7 --- /dev/null +++ b/app/backend/app/models/search_models.py @@ -0,0 +1,38 @@ +"""Models for search requests and responses.""" + +from pydantic import BaseModel, ConfigDict, Field + +from app.models.normalized_item import NormalizedItem + + +class SearchRequest(BaseModel): + """Input payload for federated search.""" + + model_config = ConfigDict(extra="forbid") + + query: str = Field(min_length=1) + sources: list[str] | None = None + filters: dict[str, object] = Field(default_factory=dict) + page: int = Field(default=1, ge=1) + page_size: int = Field(default=24, ge=1, le=100) + + +class PartialFailure(BaseModel): + """Per-source failure report for partial success responses.""" + + source: str + status: str + error: str | None = None + + +class SearchResponse(BaseModel): + """Unified search response returned by backend APIs.""" + + query: str + page: int + page_size: int + total_estimated: int + results: list[NormalizedItem] + sources_used: list[str] + partial_failures: list[PartialFailure] = Field(default_factory=list) + duration_ms: int diff --git a/app/backend/app/models/source_models.py b/app/backend/app/models/source_models.py new file mode 100644 index 0000000000000000000000000000000000000000..7365c5bbcb2b5311679391b8e74229858d43e957 --- /dev/null +++ b/app/backend/app/models/source_models.py @@ -0,0 +1,28 @@ +"""Models describing source capabilities and source listing responses.""" + +from pydantic import BaseModel, Field + + +class SourceCapabilities(BaseModel): + """Capabilities declared by a connector.""" + + search: bool = True + get_item: bool = True + resolve_manifest: bool = True + + +class SourceDescriptor(BaseModel): + """Source metadata exposed through /api/sources.""" + + name: str + label: str + source_type: str + capabilities: SourceCapabilities + healthy: bool + notes: str | None = None + + +class SourcesResponse(BaseModel): + """Response payload for source listing endpoint.""" + + sources: list[SourceDescriptor] = Field(default_factory=list) diff --git a/app/backend/app/services/__init__.py b/app/backend/app/services/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/services/import_service.py b/app/backend/app/services/import_service.py new file mode 100644 index 0000000000000000000000000000000000000000..e3600a3b47a0ee0d8e550fd8926514564475da55 --- /dev/null +++ b/app/backend/app/services/import_service.py @@ -0,0 +1,61 @@ +"""Service for URL import and initial source detection.""" + +from app.connectors.registry import ConnectorRegistry +from app.models.import_models import ImportResponse +from app.utils.url_validation import validate_http_url + + +class ImportService: + """Handle import flow from URL to normalized item and manifest.""" + + def __init__(self, registry: ConnectorRegistry) -> None: + self._registry = registry + + async def import_url(self, url: str) -> ImportResponse: + """Validate URL then resolve manifest through source and generic connectors. + + Resolution order: + 1. Source-specific connectors (e.g. mock) try exact record URL mapping. + 2. `manifest_by_url` connector applies generic URL heuristics: + - direct manifest pattern detection; + - notice -> manifest candidate generation. + """ + + safe_url = validate_http_url(url) + + source_connectors = [ + connector + for connector in self._registry.list_connectors() + if connector.name != "manifest_by_url" + ] + generic_connectors = [ + connector + for connector in self._registry.list_connectors() + if connector.name == "manifest_by_url" + ] + + for connector in [*source_connectors, *generic_connectors]: + matched_item = None + if connector.name == "mock": + # Lot 4 keeps source-specific matching minimal for mock demo data. + for candidate_id in ("ms-1", "ms-2"): + candidate = await connector.get_item(candidate_id) + if candidate is not None and candidate.record_url == safe_url: + matched_item = candidate + break + + manifest = await connector.resolve_manifest(item=matched_item, record_url=safe_url) + if manifest: + return ImportResponse( + detected_source=connector.name, + record_url=safe_url, + manifest_url=manifest, + item=matched_item, + ) + + return ImportResponse( + detected_source=None, + record_url=safe_url, + manifest_url=None, + item=None, + ) diff --git a/app/backend/app/services/item_service.py b/app/backend/app/services/item_service.py new file mode 100644 index 0000000000000000000000000000000000000000..fbfa66a0c931861304840c051ec0638a04d94cce --- /dev/null +++ b/app/backend/app/services/item_service.py @@ -0,0 +1,27 @@ +"""Service for item retrieval by global identifier.""" + +from app.connectors.registry import ConnectorRegistry +from app.models.normalized_item import NormalizedItem +from app.utils.errors import BadRequestError, NotFoundError +from app.utils.ids import split_global_id + + +class ItemService: + """Resolve item details using global id policy source:source_item_id.""" + + def __init__(self, registry: ConnectorRegistry) -> None: + self._registry = registry + + async def get_item(self, global_id: str) -> NormalizedItem: + """Fetch normalized item from connector using global id.""" + + try: + source, source_item_id = split_global_id(global_id) + except ValueError: + raise BadRequestError("Invalid id format, expected source:source_item_id") + if not self._registry.has(source): + raise NotFoundError(f"Unknown source '{source}'") + item = await self._registry.get(source).get_item(source_item_id) + if item is None: + raise NotFoundError(f"Item '{global_id}' not found") + return item diff --git a/app/backend/app/services/manifest_resolver.py b/app/backend/app/services/manifest_resolver.py new file mode 100644 index 0000000000000000000000000000000000000000..4e3f906c14a559451bd844159ba29334360d40b2 --- /dev/null +++ b/app/backend/app/services/manifest_resolver.py @@ -0,0 +1,30 @@ +"""Service responsible for manifest resolution through connectors.""" + +from app.connectors.registry import ConnectorRegistry +from app.models.manifest_models import ResolveManifestRequest, ResolveManifestResponse +from app.utils.errors import NotFoundError +from app.utils.ids import make_global_id + + +class ManifestResolver: + """Resolve manifests by delegating to source connectors.""" + + def __init__(self, registry: ConnectorRegistry) -> None: + self._registry = registry + + async def resolve(self, request: ResolveManifestRequest) -> ResolveManifestResponse: + """Resolve manifest URL for a source item identifier.""" + + if not self._registry.has(request.source): + raise NotFoundError(f"Unknown source '{request.source}'") + connector = self._registry.get(request.source) + item = await connector.get_item(request.source_item_id) + manifest_url = await connector.resolve_manifest(item=item, record_url=request.record_url) + status = "resolved" if manifest_url else "not_found" + method = "metadata" if manifest_url else None + return ResolveManifestResponse(manifest_url=manifest_url, status=status, method=method) + + async def openable_global_id(self, source: str, source_item_id: str) -> str: + """Return deterministic global id associated to manifest operation.""" + + return make_global_id(source, source_item_id) diff --git a/app/backend/app/services/search_orchestrator.py b/app/backend/app/services/search_orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..71ca34472ec3e9bbcdb3b932bf12092dc805544c --- /dev/null +++ b/app/backend/app/services/search_orchestrator.py @@ -0,0 +1,63 @@ +"""Federated search orchestration over registered connectors.""" + +from __future__ import annotations + +import asyncio +import time + +from app.connectors.registry import ConnectorRegistry +from app.models.search_models import PartialFailure, SearchRequest, SearchResponse + + +class SearchOrchestrator: + """Coordinate multi-source search with partial failure tolerance.""" + + def __init__(self, registry: ConnectorRegistry) -> None: + self._registry = registry + + async def search(self, request: SearchRequest) -> SearchResponse: + """Run federated search across selected connectors and merge results.""" + + selected = request.sources or self._registry.list_names() + tasks = [self._search_one(source, request) for source in selected] + start = time.perf_counter() + gathered = await asyncio.gather(*tasks, return_exceptions=True) + duration_ms = int((time.perf_counter() - start) * 1000) + + merged_results = [] + partial_failures: list[PartialFailure] = [] + sources_used: list[str] = [] + + for source_name, outcome in zip(selected, gathered, strict=True): + if isinstance(outcome, Exception): + partial_failures.append( + PartialFailure(source=source_name, status="error", error=str(outcome)) + ) + continue + sources_used.append(source_name) + merged_results.extend(outcome.results) + partial_failures.extend(outcome.partial_failures) + + merged_results.sort(key=lambda item: item.relevance_score, reverse=True) + + return SearchResponse( + query=request.query, + page=request.page, + page_size=request.page_size, + total_estimated=len(merged_results), + results=merged_results, + sources_used=sources_used, + partial_failures=partial_failures, + duration_ms=duration_ms, + ) + + async def _search_one(self, source: str, request: SearchRequest) -> SearchResponse: + if not self._registry.has(source): + raise ValueError(f"Unknown source '{source}'") + connector = self._registry.get(source) + return await connector.search( + query=request.query, + filters=request.filters, + page=request.page, + page_size=request.page_size, + ) diff --git a/app/backend/app/services/source_service.py b/app/backend/app/services/source_service.py new file mode 100644 index 0000000000000000000000000000000000000000..97269c6b5b891da4f11272b39ac9c2d852687e8c --- /dev/null +++ b/app/backend/app/services/source_service.py @@ -0,0 +1,29 @@ +"""Service exposing source capabilities and health state.""" + +from app.connectors.registry import ConnectorRegistry +from app.models.source_models import SourceDescriptor, SourcesResponse + + +class SourceService: + """Provide data for `/api/sources` endpoint.""" + + def __init__(self, registry: ConnectorRegistry) -> None: + self._registry = registry + + async def list_sources(self) -> SourcesResponse: + """Return registered sources with capabilities and health flags.""" + + sources: list[SourceDescriptor] = [] + for connector in self._registry.list_connectors(): + capabilities = await connector.capabilities() + health = await connector.healthcheck() + sources.append( + SourceDescriptor( + name=connector.name, + label=connector.label, + source_type=connector.source_type, + capabilities=capabilities, + healthy=health.get("status") == "ok", + ) + ) + return SourcesResponse(sources=sources) diff --git a/app/backend/app/utils/__init__.py b/app/backend/app/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/app/backend/app/utils/errors.py b/app/backend/app/utils/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..fe39e511ad1abec154f95edfcd92291051e0cda8 --- /dev/null +++ b/app/backend/app/utils/errors.py @@ -0,0 +1,13 @@ +"""Domain exceptions used across backend services and API layers.""" + + +class AppError(Exception): + """Base class for application-level errors.""" + + +class NotFoundError(AppError): + """Raised when an entity cannot be found.""" + + +class BadRequestError(AppError): + """Raised when user input is invalid for business logic.""" diff --git a/app/backend/app/utils/http_client.py b/app/backend/app/utils/http_client.py new file mode 100644 index 0000000000000000000000000000000000000000..484bc06ab788f02ba799fe549c9f4412eb25e44c --- /dev/null +++ b/app/backend/app/utils/http_client.py @@ -0,0 +1,12 @@ +"""Async HTTP client utilities for connector network calls.""" + +import httpx + +from app.config.settings import settings + + +async def build_async_client() -> httpx.AsyncClient: + """Create an AsyncClient configured with MVP-safe defaults.""" + + timeout = httpx.Timeout(settings.request_timeout_seconds) + return httpx.AsyncClient(timeout=timeout, follow_redirects=True) diff --git a/app/backend/app/utils/ids.py b/app/backend/app/utils/ids.py new file mode 100644 index 0000000000000000000000000000000000000000..e128d5043a0df9c4849287be6139ee18cb8159f6 --- /dev/null +++ b/app/backend/app/utils/ids.py @@ -0,0 +1,18 @@ +"""Helpers to create and parse stable global identifiers.""" + + +def make_global_id(source: str, source_item_id: str) -> str: + """Build stable global id using source:source_item_id format.""" + + return f"{source}:{source_item_id}" + + +def split_global_id(global_id: str) -> tuple[str, str]: + """Split global identifier into source and source_item_id.""" + + if ":" not in global_id: + raise ValueError("global id must include ':' separator") + source, source_item_id = global_id.split(":", maxsplit=1) + if not source or not source_item_id: + raise ValueError("global id must contain source and source_item_id") + return source, source_item_id diff --git a/app/backend/app/utils/url_validation.py b/app/backend/app/utils/url_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..ab4a6b300e61dbaadec96a1b080b19532aa43644 --- /dev/null +++ b/app/backend/app/utils/url_validation.py @@ -0,0 +1,58 @@ +"""URL validation helpers for import and manifest resolution endpoints.""" + +from __future__ import annotations + +import ipaddress +import socket +from urllib.parse import urlparse + +from app.utils.errors import BadRequestError + + +def validate_http_url(url: str) -> str: + """Validate URL scheme/host and apply basic SSRF protections for MVP.""" + + parsed = urlparse(url) + if parsed.scheme not in {"http", "https"}: + raise BadRequestError("URL scheme must be http or https") + + if not parsed.netloc or parsed.hostname is None: + raise BadRequestError("URL must contain a valid host") + + _reject_local_hostnames(parsed.hostname) + _reject_private_or_local_ip_literals(parsed.hostname) + _reject_hostnames_resolving_to_private_or_local_ips(parsed.hostname) + + return url + + +def _reject_local_hostnames(hostname: str) -> None: + lowered = hostname.lower() + blocked = {"localhost", "localhost.localdomain"} + if lowered in blocked: + raise BadRequestError("Local hosts are not allowed") + + +def _reject_private_or_local_ip_literals(hostname: str) -> None: + try: + ip = ipaddress.ip_address(hostname) + except ValueError: + return + + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_unspecified: + raise BadRequestError("Private or local IPs are not allowed") + + +def _reject_hostnames_resolving_to_private_or_local_ips(hostname: str) -> None: + try: + addrinfos = socket.getaddrinfo(hostname, None) + except socket.gaierror: + # If hostname cannot be resolved, keep request valid and let downstream + # connector/network errors explain the failure. + return + + for info in addrinfos: + raw_ip = info[4][0] + ip = ipaddress.ip_address(raw_ip) + if ip.is_private or ip.is_loopback or ip.is_link_local or ip.is_reserved or ip.is_unspecified: + raise BadRequestError("Resolved host points to a private or local IP") diff --git a/app/frontend/index.html b/app/frontend/index.html new file mode 100644 index 0000000000000000000000000000000000000000..806ed4232a48eb1f697b68e375c50133596c64aa --- /dev/null +++ b/app/frontend/index.html @@ -0,0 +1,12 @@ + + + + + + Clafoutis + + +
+ + + diff --git a/app/frontend/package.json b/app/frontend/package.json new file mode 100644 index 0000000000000000000000000000000000000000..cc5d452af51fbf14982a9e34453a9828d9e8c691 --- /dev/null +++ b/app/frontend/package.json @@ -0,0 +1,30 @@ +{ + "name": "clafoutis-frontend", + "version": "0.1.0", + "private": true, + "type": "module", + "scripts": { + "dev": "vite", + "build": "tsc -b && vite build", + "preview": "vite preview", + "typecheck": "tsc --noEmit" + }, + "dependencies": { + "@tanstack/react-query": "^5.59.16", + "mirador": "^3.3.0", + "react": "^18.3.1", + "react-dom": "^18.3.1", + "react-router-dom": "^6.28.0", + "zustand": "^5.0.1" + }, + "devDependencies": { + "@types/react": "^18.3.12", + "@types/react-dom": "^18.3.1", + "@vitejs/plugin-react": "^4.3.3", + "autoprefixer": "^10.4.20", + "postcss": "^8.4.49", + "tailwindcss": "^3.4.15", + "typescript": "^5.6.3", + "vite": "^5.4.10" + } +} diff --git a/app/frontend/postcss.config.js b/app/frontend/postcss.config.js new file mode 100644 index 0000000000000000000000000000000000000000..2e7af2b7f1a6f391da1631d93968a9d487ba977d --- /dev/null +++ b/app/frontend/postcss.config.js @@ -0,0 +1,6 @@ +export default { + plugins: { + tailwindcss: {}, + autoprefixer: {}, + }, +} diff --git a/app/frontend/src/app/providers.tsx b/app/frontend/src/app/providers.tsx new file mode 100644 index 0000000000000000000000000000000000000000..7f0e648c7b206dcb4d991c9417f8a38b4adca2d0 --- /dev/null +++ b/app/frontend/src/app/providers.tsx @@ -0,0 +1,17 @@ +import { QueryClientProvider } from '@tanstack/react-query' +import { ReactNode } from 'react' +import { BrowserRouter } from 'react-router-dom' + +import { queryClient } from '../lib/queryClient' + +interface ProvidersProps { + children: ReactNode +} + +export function Providers({ children }: ProvidersProps) { + return ( + + {children} + + ) +} diff --git a/app/frontend/src/app/router.tsx b/app/frontend/src/app/router.tsx new file mode 100644 index 0000000000000000000000000000000000000000..214efc33af8d0789f064a8006fd83439c842c1a4 --- /dev/null +++ b/app/frontend/src/app/router.tsx @@ -0,0 +1,45 @@ +import { Link, Navigate, Route, Routes } from 'react-router-dom' + +import { AboutPage } from '../pages/AboutPage' +import { ImportPage } from '../pages/ImportPage' +import { ReaderPage } from '../pages/ReaderPage' +import { SearchPage } from '../pages/SearchPage' +import { SourcesPage } from '../pages/SourcesPage' + +function AppLayout() { + return ( +
+
+ +
+ + } path="/" /> + } path="/search" /> + } path="/reader" /> + } path="/import" /> + } path="/sources" /> + } path="/about" /> + +
+ ) +} + +export function AppRouter() { + return +} diff --git a/app/frontend/src/components/results/ResultCard.tsx b/app/frontend/src/components/results/ResultCard.tsx new file mode 100644 index 0000000000000000000000000000000000000000..cf7e90b19d20f980408ef2959045066ddf07442b --- /dev/null +++ b/app/frontend/src/components/results/ResultCard.tsx @@ -0,0 +1,34 @@ +import type { NormalizedItem } from '../../types/normalized' + +interface ResultCardProps { + item: NormalizedItem + selected: boolean + onToggleCompare: (itemId: string) => void + onPrepareRead: (item: NormalizedItem) => void +} + +export function ResultCard({ item, selected, onToggleCompare, onPrepareRead }: ResultCardProps) { + return ( +
+

{item.title}

+

{item.source_label}

+

{item.institution ?? 'Institution inconnue'}

+
+ + +
+
+ ) +} diff --git a/app/frontend/src/components/search/ResultsGrid.tsx b/app/frontend/src/components/search/ResultsGrid.tsx new file mode 100644 index 0000000000000000000000000000000000000000..e8a52c8e29cce05fbac62ce7301c8fdc70b23923 --- /dev/null +++ b/app/frontend/src/components/search/ResultsGrid.tsx @@ -0,0 +1,34 @@ +import { ResultCard } from '../results/ResultCard' +import type { NormalizedItem } from '../../types/normalized' + +interface ResultsGridProps { + items: NormalizedItem[] + selectedForComparison: string[] + onToggleCompare: (itemId: string) => void + onPrepareRead: (item: NormalizedItem) => void +} + +export function ResultsGrid({ + items, + selectedForComparison, + onToggleCompare, + onPrepareRead, +}: ResultsGridProps) { + if (items.length === 0) { + return

Aucun résultat.

+ } + + return ( +
+ {items.map((item) => ( + + ))} +
+ ) +} diff --git a/app/frontend/src/components/search/SearchBar.tsx b/app/frontend/src/components/search/SearchBar.tsx new file mode 100644 index 0000000000000000000000000000000000000000..5561a7ee075816aaa8b2b6c24f75882bcffed60c --- /dev/null +++ b/app/frontend/src/components/search/SearchBar.tsx @@ -0,0 +1,29 @@ +import { FormEvent, useState } from 'react' + +interface SearchBarProps { + initialQuery: string + onSubmit: (query: string) => void +} + +export function SearchBar({ initialQuery, onSubmit }: SearchBarProps) { + const [query, setQuery] = useState(initialQuery) + + const handleSubmit = (event: FormEvent) => { + event.preventDefault() + onSubmit(query.trim()) + } + + return ( +
+ setQuery(event.target.value)} + /> + +
+ ) +} diff --git a/app/frontend/src/components/search/SearchFilters.tsx b/app/frontend/src/components/search/SearchFilters.tsx new file mode 100644 index 0000000000000000000000000000000000000000..32db6207238fdc8623e9f0f90edd0ae60104ae4a --- /dev/null +++ b/app/frontend/src/components/search/SearchFilters.tsx @@ -0,0 +1,50 @@ +import { useSources } from '../../hooks/useSources' +import type { SearchFilters as SearchFiltersType } from '../../types/filters' + +interface SearchFiltersProps { + value: SearchFiltersType + onChange: (filters: SearchFiltersType) => void +} + +export function SearchFilters({ value, onChange }: SearchFiltersProps) { + const sourcesQuery = useSources() + + return ( + + ) +} diff --git a/app/frontend/src/hooks/useSearch.ts b/app/frontend/src/hooks/useSearch.ts new file mode 100644 index 0000000000000000000000000000000000000000..b8499252c693e0e911fb01e1f92237630a83bb6b --- /dev/null +++ b/app/frontend/src/hooks/useSearch.ts @@ -0,0 +1,16 @@ +import { useMutation } from '@tanstack/react-query' + +import { searchItems } from '../lib/apiClient' +import { useSearchStore } from '../store/searchStore' +import type { SearchRequest, SearchResponse } from '../types/api' + +export function useSearch() { + const setResults = useSearchStore((state) => state.setResults) + + return useMutation({ + mutationFn: searchItems, + onSuccess: (data) => { + setResults(data.results) + }, + }) +} diff --git a/app/frontend/src/hooks/useSources.ts b/app/frontend/src/hooks/useSources.ts new file mode 100644 index 0000000000000000000000000000000000000000..f4055473f82f9c44face40336c0461446ef7d7ee --- /dev/null +++ b/app/frontend/src/hooks/useSources.ts @@ -0,0 +1,11 @@ +import { useQuery } from '@tanstack/react-query' + +import { listSources } from '../lib/apiClient' +import type { SourcesResponse } from '../types/api' + +export function useSources() { + return useQuery({ + queryKey: ['sources'], + queryFn: listSources, + }) +} diff --git a/app/frontend/src/index.css b/app/frontend/src/index.css new file mode 100644 index 0000000000000000000000000000000000000000..b5c61c956711f981a41e95f7fcf0038436cfbb22 --- /dev/null +++ b/app/frontend/src/index.css @@ -0,0 +1,3 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; diff --git a/app/frontend/src/lib/apiClient.ts b/app/frontend/src/lib/apiClient.ts new file mode 100644 index 0000000000000000000000000000000000000000..b7a0114659fe09b6a0ad1600d2ddbb980f832601 --- /dev/null +++ b/app/frontend/src/lib/apiClient.ts @@ -0,0 +1,44 @@ +import type { + ImportRequest, + ImportResponse, + SearchRequest, + SearchResponse, + SourcesResponse, +} from '../types/api' + +const BASE_URL = import.meta.env.VITE_API_BASE_URL ?? 'http://localhost:8000' + +async function request(path: string, init?: RequestInit): Promise { + const response = await fetch(`${BASE_URL}${path}`, { + ...init, + headers: { + 'Content-Type': 'application/json', + ...(init?.headers ?? {}), + }, + }) + + if (!response.ok) { + const fallback = `${response.status} ${response.statusText}` + throw new Error((await response.text()) || fallback) + } + + return (await response.json()) as T +} + +export async function searchItems(payload: SearchRequest): Promise { + return request('/api/search', { + method: 'POST', + body: JSON.stringify(payload), + }) +} + +export async function listSources(): Promise { + return request('/api/sources') +} + +export async function importByUrl(payload: ImportRequest): Promise { + return request('/api/import', { + method: 'POST', + body: JSON.stringify(payload), + }) +} diff --git a/app/frontend/src/lib/mirador/MiradorWorkspace.tsx b/app/frontend/src/lib/mirador/MiradorWorkspace.tsx new file mode 100644 index 0000000000000000000000000000000000000000..7d57eb34057316227303d209c5d400fef0c74f86 --- /dev/null +++ b/app/frontend/src/lib/mirador/MiradorWorkspace.tsx @@ -0,0 +1,111 @@ +import { useEffect, useMemo, useRef, useState } from 'react' + +import { buildMiradorConfig } from './miradorConfig' + +import 'mirador/dist/css/mirador.min.css' + +export interface MiradorWorkspaceProps { + manifestUrls: string[] + initialView?: 'single' | 'compare' + showMetadata?: boolean + onStateChange?: (state: unknown) => void +} + +interface MiradorModule { + viewer: (config: unknown, container: HTMLElement) => { + store?: { + getState: () => unknown + subscribe: (listener: () => void) => () => void + } + } +} + +/** + * Mirador-based reading workspace. + * It receives already-resolved manifest URLs and does not perform discovery logic. + */ +export function MiradorWorkspace({ + manifestUrls, + initialView = 'single', + showMetadata = true, + onStateChange, +}: MiradorWorkspaceProps) { + const containerRef = useRef(null) + const [loadError, setLoadError] = useState(null) + + const stableUrls = useMemo( + () => Array.from(new Set(manifestUrls.filter((manifestUrl) => manifestUrl.length > 0))), + [manifestUrls], + ) + + useEffect(() => { + if (!containerRef.current || stableUrls.length === 0) { + return + } + + let isMounted = true + let unsubscribe: (() => void) | undefined + + async function mountMirador() { + try { + const module = (await import('mirador')) as unknown as { default?: MiradorModule } & MiradorModule + const mirador = module.default ?? module + + if (!isMounted || !containerRef.current) { + return + } + + const config = buildMiradorConfig({ + manifestUrls: stableUrls, + initialView, + showMetadata, + }) + + containerRef.current.innerHTML = '' + const instance = mirador.viewer(config, containerRef.current) + + if (onStateChange && instance.store) { + unsubscribe = instance.store.subscribe(() => { + onStateChange(instance.store?.getState()) + }) + } + + setLoadError(null) + } catch (error) { + if (isMounted) { + setLoadError(error instanceof Error ? error.message : 'Unable to initialize Mirador') + } + } + } + + mountMirador() + + return () => { + isMounted = false + if (unsubscribe) { + unsubscribe() + } + if (containerRef.current) { + containerRef.current.innerHTML = '' + } + } + }, [stableUrls, initialView, showMetadata, onStateChange]) + + if (stableUrls.length === 0) { + return ( +
+ Aucun manifest ouvert. Revenez à la recherche pour préparer une lecture. +
+ ) + } + + if (loadError) { + return ( +
+ Impossible de charger Mirador : {loadError} +
+ ) + } + + return
+} diff --git a/app/frontend/src/lib/mirador/miradorConfig.ts b/app/frontend/src/lib/mirador/miradorConfig.ts new file mode 100644 index 0000000000000000000000000000000000000000..5f03e6fe56d7e1857b9d06e583c973a166588f0f --- /dev/null +++ b/app/frontend/src/lib/mirador/miradorConfig.ts @@ -0,0 +1,52 @@ +export interface MiradorWorkspaceConfigOptions { + manifestUrls: string[] + initialView?: 'single' | 'compare' + showMetadata?: boolean +} + +interface MiradorWindow { + manifestId: string +} + +interface MiradorCatalogItem { + manifestId: string +} + +interface MiradorWorkspace { + showZoomControls: boolean +} + +interface MiradorConfig { + id: string + windows: MiradorWindow[] + catalog: MiradorCatalogItem[] + workspace: MiradorWorkspace + workspaceControlPanel: { + enabled: boolean + } + sideBarOpenByDefault: boolean +} + +/** + * Build a minimal Mirador config focused on reading provided manifests. + */ +export function buildMiradorConfig({ + manifestUrls, + initialView = 'single', + showMetadata = true, +}: MiradorWorkspaceConfigOptions): MiradorConfig { + const windows = manifestUrls.map((manifestId) => ({ manifestId })) + + return { + id: 'mirador-root', + windows, + catalog: manifestUrls.map((manifestId) => ({ manifestId })), + workspace: { + showZoomControls: true, + }, + workspaceControlPanel: { + enabled: initialView === 'compare', + }, + sideBarOpenByDefault: showMetadata, + } +} diff --git a/app/frontend/src/lib/queryClient.ts b/app/frontend/src/lib/queryClient.ts new file mode 100644 index 0000000000000000000000000000000000000000..828e6f536fb33241d045d61b1aa8a9601993bea6 --- /dev/null +++ b/app/frontend/src/lib/queryClient.ts @@ -0,0 +1,10 @@ +import { QueryClient } from '@tanstack/react-query' + +export const queryClient = new QueryClient({ + defaultOptions: { + queries: { + staleTime: 30_000, + retry: 1, + }, + }, +}) diff --git a/app/frontend/src/main.tsx b/app/frontend/src/main.tsx new file mode 100644 index 0000000000000000000000000000000000000000..0bf3c81fdc2d161b41d6f84bb7ed82f3a84efdca --- /dev/null +++ b/app/frontend/src/main.tsx @@ -0,0 +1,14 @@ +import React from 'react' +import ReactDOM from 'react-dom/client' + +import { Providers } from './app/providers' +import { AppRouter } from './app/router' +import './index.css' + +ReactDOM.createRoot(document.getElementById('root')!).render( + + + + + , +) diff --git a/app/frontend/src/pages/AboutPage.tsx b/app/frontend/src/pages/AboutPage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..5110661b880cd0a2d2e9a7f5a80185cbb36bc28f --- /dev/null +++ b/app/frontend/src/pages/AboutPage.tsx @@ -0,0 +1,11 @@ +export function AboutPage() { + return ( +
+

À propos

+

+ Clafoutis est structuré en 3 couches: découverte, lecture et interopérabilité. +

+

Mirador reste strictement une couche de lecture.

+
+ ) +} diff --git a/app/frontend/src/pages/ImportPage.tsx b/app/frontend/src/pages/ImportPage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..324b753b3e068a4441d6f56c537e4d71cefe6601 --- /dev/null +++ b/app/frontend/src/pages/ImportPage.tsx @@ -0,0 +1,109 @@ +import { FormEvent, useState } from 'react' +import { useNavigate } from 'react-router-dom' + +import { importByUrl } from '../lib/apiClient' +import { useReaderStore } from '../store/readerStore' + +export function ImportPage() { + const [url, setUrl] = useState('') + const [loading, setLoading] = useState(false) + const [error, setError] = useState(null) + const [result, setResult] = useState<{ + detectedSource: string | null + recordUrl: string | null + manifestUrl: string | null + } | null>(null) + + const setOpenManifestUrls = useReaderStore((state) => state.setOpenManifestUrls) + const setViewMode = useReaderStore((state) => state.setViewMode) + + const navigate = useNavigate() + + const handleSubmit = async (event: FormEvent) => { + event.preventDefault() + const trimmed = url.trim() + if (!trimmed) { + return + } + + setLoading(true) + setError(null) + + try { + const response = await importByUrl({ url: trimmed }) + setResult({ + detectedSource: response.detected_source, + recordUrl: response.record_url, + manifestUrl: response.manifest_url, + }) + + if (response.manifest_url) { + setOpenManifestUrls([response.manifest_url]) + setViewMode('single') + } + } catch (submissionError) { + setError(submissionError instanceof Error ? submissionError.message : 'Erreur d’import') + setResult(null) + } finally { + setLoading(false) + } + } + + return ( +
+

Import manuel

+

+ Collez une URL de manifest IIIF ou une URL de notice, puis laissez le backend tenter la + résolution vers un manifest. +

+ +
+ + setUrl(event.target.value)} + /> + +
+ + {error && ( +

{error}

+ )} + + {result && ( +
+

+ Source détectée : {result.detectedSource ?? 'inconnue'} +

+

+ URL notice : {result.recordUrl ?? 'n/a'} +

+

+ Manifest résolu : {result.manifestUrl ?? 'non trouvé'} +

+ + {result.manifestUrl && ( + + )} +
+ )} +
+ ) +} diff --git a/app/frontend/src/pages/ReaderPage.tsx b/app/frontend/src/pages/ReaderPage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..5d777bb7da2a839d1dfce5552b3432ba6f5d8590 --- /dev/null +++ b/app/frontend/src/pages/ReaderPage.tsx @@ -0,0 +1,73 @@ +import { Link } from 'react-router-dom' + +import { MiradorWorkspace } from '../lib/mirador/MiradorWorkspace' +import { useReaderStore } from '../store/readerStore' +import { useSearchStore } from '../store/searchStore' + +export function ReaderPage() { + const openManifestUrls = useReaderStore((state) => state.openManifestUrls) + const viewMode = useReaderStore((state) => state.readerConfig.viewMode) + const showMetadata = useReaderStore((state) => state.readerConfig.showMetadata) + const setViewMode = useReaderStore((state) => state.setViewMode) + const setShowMetadata = useReaderStore((state) => state.setShowMetadata) + + const selectedForComparison = useSearchStore((state) => state.selectedForComparison) + const results = useSearchStore((state) => state.results) + + const selectedManifestUrls = results + .filter((item) => selectedForComparison.includes(item.id) && item.manifest_url) + .map((item) => item.manifest_url as string) + + const manifestUrls = viewMode === 'compare' ? selectedManifestUrls : openManifestUrls + + return ( +
+
+

Lecture

+ + Retour vers Recherche + +
+ +

Mirador est utilisé ici uniquement comme workspace de lecture.

+ +
+ + + +
+ + { + // Lot 3: hook kept minimal for future share/persistence extensions. + }} + /> +
+ ) +} diff --git a/app/frontend/src/pages/SearchPage.tsx b/app/frontend/src/pages/SearchPage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..1d1cca141b526b067fd7719322f3ece6a4f8c0e7 --- /dev/null +++ b/app/frontend/src/pages/SearchPage.tsx @@ -0,0 +1,69 @@ +import { SearchBar } from '../components/search/SearchBar' +import { SearchFilters } from '../components/search/SearchFilters' +import { ResultsGrid } from '../components/search/ResultsGrid' +import { useSearch } from '../hooks/useSearch' +import { useReaderStore } from '../store/readerStore' +import { useSearchStore } from '../store/searchStore' +import type { SearchRequest } from '../types/api' +import type { NormalizedItem } from '../types/normalized' + +export function SearchPage() { + const query = useSearchStore((state) => state.query) + const filters = useSearchStore((state) => state.filters) + const results = useSearchStore((state) => state.results) + const selectedForComparison = useSearchStore((state) => state.selectedForComparison) + const setQuery = useSearchStore((state) => state.setQuery) + const setFilters = useSearchStore((state) => state.setFilters) + const toggleCompareSelection = useSearchStore((state) => state.toggleCompareSelection) + + const setOpenManifestUrls = useReaderStore((state) => state.setOpenManifestUrls) + const setViewMode = useReaderStore((state) => state.setViewMode) + + const search = useSearch() + + const triggerSearch = (newQuery: string) => { + if (!newQuery) { + return + } + setQuery(newQuery) + const payload: SearchRequest = { + query: newQuery, + sources: filters.sources.length > 0 ? filters.sources : undefined, + filters: filters.hasIiifOnly ? { has_iiif_manifest: true } : {}, + page: 1, + page_size: 24, + } + search.mutate(payload) + } + + const prepareRead = (item: NormalizedItem) => { + const urls = item.manifest_url ? [item.manifest_url] : [] + setOpenManifestUrls(urls) + setViewMode('single') + } + + return ( +
+ +
+ + + {search.isPending &&

Chargement des résultats…

} + {search.isError && ( +

+ Erreur: {search.error.message} +

+ )} + + {!search.isPending && !search.isError && ( + + )} +
+
+ ) +} diff --git a/app/frontend/src/pages/SourcesPage.tsx b/app/frontend/src/pages/SourcesPage.tsx new file mode 100644 index 0000000000000000000000000000000000000000..68b12f618f9873bd7a7ad76a5d7aab083f3818f0 --- /dev/null +++ b/app/frontend/src/pages/SourcesPage.tsx @@ -0,0 +1,38 @@ +import { useSources } from '../hooks/useSources' + +export function SourcesPage() { + const sources = useSources() + + if (sources.isPending) { + return

Chargement des sources…

+ } + + if (sources.isError) { + return ( +

+ Erreur: {sources.error.message} +

+ ) + } + + return ( +
+

Sources

+
+ {sources.data.sources.map((source) => ( +
+

{source.label}

+

Nom: {source.name}

+

Type: {source.source_type}

+

Statut: {source.healthy ? 'ok' : 'error'}

+

+ Capacités: search={String(source.capabilities.search)}, get_item= + {String(source.capabilities.get_item)}, resolve_manifest= + {String(source.capabilities.resolve_manifest)} +

+
+ ))} +
+
+ ) +} diff --git a/app/frontend/src/store/readerStore.ts b/app/frontend/src/store/readerStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..8722fbe491a34432cda9f56913ca5e13bf3ce600 --- /dev/null +++ b/app/frontend/src/store/readerStore.ts @@ -0,0 +1,23 @@ +import { create } from 'zustand' + +interface ReaderConfig { + viewMode: 'single' | 'compare' + showMetadata: boolean +} + +interface ReaderState { + openManifestUrls: string[] + readerConfig: ReaderConfig + setOpenManifestUrls: (urls: string[]) => void + setViewMode: (mode: ReaderConfig['viewMode']) => void + setShowMetadata: (enabled: boolean) => void +} + +export const useReaderStore = create((set) => ({ + openManifestUrls: [], + readerConfig: { viewMode: 'single', showMetadata: true }, + setOpenManifestUrls: (openManifestUrls) => set({ openManifestUrls }), + setViewMode: (mode) => set((state) => ({ readerConfig: { ...state.readerConfig, viewMode: mode } })), + setShowMetadata: (enabled) => + set((state) => ({ readerConfig: { ...state.readerConfig, showMetadata: enabled } })), +})) diff --git a/app/frontend/src/store/searchStore.ts b/app/frontend/src/store/searchStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..115283410b234dca8f9636986f49c16ed9de1545 --- /dev/null +++ b/app/frontend/src/store/searchStore.ts @@ -0,0 +1,37 @@ +import { create } from 'zustand' + +import type { NormalizedItem } from '../types/normalized' +import type { SearchFilters } from '../types/filters' +import { defaultSearchFilters } from '../types/filters' + +interface SearchState { + query: string + filters: SearchFilters + results: NormalizedItem[] + selectedForComparison: string[] + setQuery: (query: string) => void + setFilters: (filters: SearchFilters) => void + setResults: (items: NormalizedItem[]) => void + toggleCompareSelection: (itemId: string) => void + clearCompareSelection: () => void +} + +export const useSearchStore = create((set) => ({ + query: '', + filters: defaultSearchFilters, + results: [], + selectedForComparison: [], + setQuery: (query) => set({ query }), + setFilters: (filters) => set({ filters }), + setResults: (results) => set({ results }), + toggleCompareSelection: (itemId) => + set((state) => { + const isSelected = state.selectedForComparison.includes(itemId) + return { + selectedForComparison: isSelected + ? state.selectedForComparison.filter((id) => id !== itemId) + : [...state.selectedForComparison, itemId], + } + }), + clearCompareSelection: () => set({ selectedForComparison: [] }), +})) diff --git a/app/frontend/src/store/uiStore.ts b/app/frontend/src/store/uiStore.ts new file mode 100644 index 0000000000000000000000000000000000000000..6e43a10bb5c5118ac107b12e798380138dcc581f --- /dev/null +++ b/app/frontend/src/store/uiStore.ts @@ -0,0 +1,11 @@ +import { create } from 'zustand' + +interface UiState { + sidebarOpen: boolean + setSidebarOpen: (open: boolean) => void +} + +export const useUiStore = create((set) => ({ + sidebarOpen: true, + setSidebarOpen: (sidebarOpen) => set({ sidebarOpen }), +})) diff --git a/app/frontend/src/types/api.ts b/app/frontend/src/types/api.ts new file mode 100644 index 0000000000000000000000000000000000000000..f86d01b5c48b0af13371e3fde1f5267af38245b9 --- /dev/null +++ b/app/frontend/src/types/api.ts @@ -0,0 +1,61 @@ +import type { NormalizedItem } from './normalized' + +export interface PartialFailure { + source: string + status: string + error: string | null +} + +export interface SearchRequest { + query: string + sources?: string[] + filters: Record + page: number + page_size: number +} + +export interface SearchResponse { + query: string + page: number + page_size: number + total_estimated: number + results: NormalizedItem[] + sources_used: string[] + partial_failures: PartialFailure[] + duration_ms: number +} + +export interface SourceCapabilities { + search: boolean + get_item: boolean + resolve_manifest: boolean +} + +export interface SourceDescriptor { + name: string + label: string + source_type: string + capabilities: SourceCapabilities + healthy: boolean + notes: string | null +} + +export interface SourcesResponse { + sources: SourceDescriptor[] +} + +export interface ApiError { + error: string + details: string | null +} + +export interface ImportRequest { + url: string +} + +export interface ImportResponse { + detected_source: string | null + record_url: string | null + manifest_url: string | null + item: NormalizedItem | null +} diff --git a/app/frontend/src/types/filters.ts b/app/frontend/src/types/filters.ts new file mode 100644 index 0000000000000000000000000000000000000000..c08ce46403c55a968701af3be3e1434cbfa6be79 --- /dev/null +++ b/app/frontend/src/types/filters.ts @@ -0,0 +1,9 @@ +export interface SearchFilters { + sources: string[] + hasIiifOnly: boolean +} + +export const defaultSearchFilters: SearchFilters = { + sources: [], + hasIiifOnly: false, +} diff --git a/app/frontend/src/types/mirador.d.ts b/app/frontend/src/types/mirador.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..580f81a2339c6196febbee7d3e2186cb2da9e23f --- /dev/null +++ b/app/frontend/src/types/mirador.d.ts @@ -0,0 +1,12 @@ +declare module 'mirador' { + const mirador: { + viewer: (config: unknown, container: HTMLElement) => { + store?: { + getState: () => unknown + subscribe: (listener: () => void) => () => void + } + } + } + + export default mirador +} diff --git a/app/frontend/src/types/normalized.ts b/app/frontend/src/types/normalized.ts new file mode 100644 index 0000000000000000000000000000000000000000..f28ae1db6d09995aacb63f41f027f383a2376849 --- /dev/null +++ b/app/frontend/src/types/normalized.ts @@ -0,0 +1,20 @@ +export interface NormalizedItem { + id: string + source: string + source_label: string + source_item_id: string + title: string + creators: string[] + date_display: string | null + object_type: string + institution: string | null + thumbnail_url: string | null + record_url: string | null + manifest_url: string | null + has_iiif_manifest: boolean + has_images: boolean + has_ocr: boolean + availability: string + relevance_score: number + normalization_warnings: string[] +} diff --git a/app/frontend/tailwind.config.ts b/app/frontend/tailwind.config.ts new file mode 100644 index 0000000000000000000000000000000000000000..7e29af24cd2e95d3fa3cbb4cf361a1fb080dacbf --- /dev/null +++ b/app/frontend/tailwind.config.ts @@ -0,0 +1,7 @@ +import type { Config } from 'tailwindcss' + +export default { + content: ['./index.html', './src/**/*.{ts,tsx}'], + theme: { extend: {} }, + plugins: [], +} satisfies Config diff --git a/app/frontend/tsconfig.json b/app/frontend/tsconfig.json new file mode 100644 index 0000000000000000000000000000000000000000..521eb7608b2371663461de3071d56111b3324020 --- /dev/null +++ b/app/frontend/tsconfig.json @@ -0,0 +1,16 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "ESNext", + "moduleResolution": "Bundler", + "jsx": "react-jsx", + "strict": true, + "noUnusedLocals": true, + "noUnusedParameters": true, + "noFallthroughCasesInSwitch": true, + "skipLibCheck": true, + "types": ["vite/client"], + "baseUrl": "./src" + }, + "include": ["src"] +} diff --git a/app/frontend/vite.config.ts b/app/frontend/vite.config.ts new file mode 100644 index 0000000000000000000000000000000000000000..9ffcc675746194c4a34eb43404d15db40b810265 --- /dev/null +++ b/app/frontend/vite.config.ts @@ -0,0 +1,6 @@ +import { defineConfig } from 'vite' +import react from '@vitejs/plugin-react' + +export default defineConfig({ + plugins: [react()], +}) diff --git a/app/tests/integration/api/test_endpoints.py b/app/tests/integration/api/test_endpoints.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce115072a4c4a892b017cf57ca8652fde7a3686 --- /dev/null +++ b/app/tests/integration/api/test_endpoints.py @@ -0,0 +1,104 @@ +from fastapi.testclient import TestClient + +from app.main import create_app + + +client = TestClient(create_app()) + + +def test_health() -> None: + response = client.get("/api/health") + assert response.status_code == 200 + assert response.json()["status"] == "ok" + + +def test_sources() -> None: + response = client.get("/api/sources") + assert response.status_code == 200 + body = response.json() + assert body["sources"] + names = [source["name"] for source in body["sources"]] + assert "mock" in names + assert "gallica" in names + assert "manifest_by_url" in names + + +def test_search() -> None: + response = client.post("/api/search", json={"query": "book", "page": 1, "page_size": 10}) + assert response.status_code == 200 + body = response.json() + assert body["results"] + assert body["results"][0]["id"].startswith("mock:") + + +def test_search_gallica_fixture_source() -> None: + response = client.post( + "/api/search", + json={"query": "dante", "sources": ["gallica"], "page": 1, "page_size": 10}, + ) + assert response.status_code == 200 + body = response.json() + assert body["results"] + assert body["results"][0]["source"] == "gallica" + + +def test_item() -> None: + response = client.get("/api/item/mock:ms-1") + assert response.status_code == 200 + assert response.json()["source_item_id"] == "ms-1" + + +def test_resolve_manifest() -> None: + response = client.post( + "/api/resolve-manifest", + json={"source": "mock", "source_item_id": "ms-1"}, + ) + assert response.status_code == 200 + assert response.json()["status"] == "resolved" + + +def test_import() -> None: + response = client.post("/api/import", json={"url": "https://mock.example.org/items/ms-1"}) + assert response.status_code == 200 + assert response.json()["detected_source"] == "mock" + assert response.json()["item"]["id"] == "mock:ms-1" + + +def test_import_detects_direct_manifest_url() -> None: + response = client.post( + "/api/import", + json={"url": "https://example.org/iiif/manifest/abc"}, + ) + assert response.status_code == 200 + body = response.json() + assert body["detected_source"] == "manifest_by_url" + assert body["manifest_url"] == "https://example.org/iiif/manifest/abc" + + +def test_import_notice_heuristic_generates_manifest_candidate() -> None: + response = client.post( + "/api/import", + json={"url": "https://example.org/notice/42"}, + ) + assert response.status_code == 200 + body = response.json() + assert body["detected_source"] == "manifest_by_url" + assert body["manifest_url"] == "https://example.org/notice/42/manifest" + + +def test_item_rejects_invalid_global_id_format() -> None: + response = client.get("/api/item/invalid-id") + assert response.status_code == 400 + assert response.json()["error"] == "bad_request" + + +def test_import_rejects_non_http_url() -> None: + response = client.post("/api/import", json={"url": "file:///etc/passwd"}) + assert response.status_code == 400 + assert response.json()["error"] == "bad_request" + + +def test_import_rejects_localhost_url() -> None: + response = client.post("/api/import", json={"url": "http://localhost/internal"}) + assert response.status_code == 400 + assert response.json()["error"] == "bad_request" diff --git a/app/tests/unit/backend/test_gallica_connector.py b/app/tests/unit/backend/test_gallica_connector.py new file mode 100644 index 0000000000000000000000000000000000000000..c3fbc4fdae374cfac0f11b9826846ecfb9ce4a99 --- /dev/null +++ b/app/tests/unit/backend/test_gallica_connector.py @@ -0,0 +1,29 @@ +import asyncio + +from app.config.settings import settings +from app.connectors.gallica import GallicaConnector + + +def test_gallica_search_returns_normalized_items_in_fixture_mode() -> None: + settings.gallica_use_fixtures = True + connector = GallicaConnector() + + response = asyncio.run(connector.search(query="dante", filters={}, page=1, page_size=10)) + + assert response.results + first = response.results[0] + assert first.source == "gallica" + assert first.id.startswith("gallica:") + assert first.record_url is not None + assert first.manifest_url is not None + assert first.has_iiif_manifest is True + + +def test_gallica_resolve_manifest_from_record_url() -> None: + connector = GallicaConnector() + + resolved = asyncio.run( + connector.resolve_manifest(record_url="https://gallica.bnf.fr/ark:/12148/bpt6k1512248m") + ) + + assert resolved == "https://gallica.bnf.fr/iiif/ark:/12148/bpt6k1512248m/manifest.json" diff --git a/app/tests/unit/backend/test_ids.py b/app/tests/unit/backend/test_ids.py new file mode 100644 index 0000000000000000000000000000000000000000..d0d7024a875c5f18eaa6ed9872df93b7dbfad9e8 --- /dev/null +++ b/app/tests/unit/backend/test_ids.py @@ -0,0 +1,15 @@ +from app.utils.ids import make_global_id, split_global_id + + +def test_global_id_roundtrip() -> None: + global_id = make_global_id("mock", "ms-1") + assert global_id == "mock:ms-1" + assert split_global_id(global_id) == ("mock", "ms-1") + + +def test_split_global_id_rejects_invalid_values() -> None: + try: + split_global_id("invalid") + assert False, "split_global_id should reject ids without ':'" + except ValueError: + pass diff --git a/app/tests/unit/backend/test_manifest_by_url_connector.py b/app/tests/unit/backend/test_manifest_by_url_connector.py new file mode 100644 index 0000000000000000000000000000000000000000..86b46fe27a044ef96838ed0026e299167a0cd10c --- /dev/null +++ b/app/tests/unit/backend/test_manifest_by_url_connector.py @@ -0,0 +1,23 @@ +import asyncio + +from app.connectors.manifest_by_url_connector import ManifestByUrlConnector + + +def test_manifest_by_url_connector_detects_direct_manifest() -> None: + connector = ManifestByUrlConnector() + + resolved = asyncio.run( + connector.resolve_manifest( + record_url="https://example.org/iiif/manifest/123", + ) + ) + + assert resolved == "https://example.org/iiif/manifest/123" + + +def test_manifest_by_url_connector_generates_notice_candidates() -> None: + connector = ManifestByUrlConnector() + + resolved = asyncio.run(connector.resolve_manifest(record_url="https://example.org/item/123")) + + assert resolved == "https://example.org/item/123/manifest" diff --git a/app/tests/unit/backend/test_normalized_item.py b/app/tests/unit/backend/test_normalized_item.py new file mode 100644 index 0000000000000000000000000000000000000000..fd81b2d90e2994afabf1d31ee18aade2a8e37252 --- /dev/null +++ b/app/tests/unit/backend/test_normalized_item.py @@ -0,0 +1,17 @@ +from pydantic import ValidationError + +from app.models.normalized_item import NormalizedItem + + +def test_normalized_item_enforces_global_id_policy() -> None: + try: + NormalizedItem( + id="wrong:ms-1", + source="mock", + source_label="Mock", + source_item_id="ms-1", + title="Item", + ) + assert False, "NormalizedItem should enforce source:source_item_id id policy" + except ValidationError: + pass diff --git a/app/tests/unit/backend/test_registry.py b/app/tests/unit/backend/test_registry.py new file mode 100644 index 0000000000000000000000000000000000000000..f6926869c6f9b05949cfcdf46a2c1722343bfa85 --- /dev/null +++ b/app/tests/unit/backend/test_registry.py @@ -0,0 +1,10 @@ +from app.connectors.mock_connector import MockConnector +from app.connectors.registry import ConnectorRegistry + + +def test_registry_register_and_get() -> None: + registry = ConnectorRegistry() + connector = MockConnector() + registry.register(connector) + assert registry.list_names() == ["mock"] + assert registry.get("mock") is connector diff --git a/app/tests/unit/backend/test_search_orchestrator.py b/app/tests/unit/backend/test_search_orchestrator.py new file mode 100644 index 0000000000000000000000000000000000000000..89991ebcd79e95dd0b332341f335ae36d5d80d59 --- /dev/null +++ b/app/tests/unit/backend/test_search_orchestrator.py @@ -0,0 +1,32 @@ +import asyncio + +from app.connectors.mock_connector import MockConnector +from app.connectors.registry import ConnectorRegistry +from app.models.search_models import SearchRequest +from app.services.search_orchestrator import SearchOrchestrator + + +def test_search_orchestrator_returns_normalized_results() -> None: + registry = ConnectorRegistry() + registry.register(MockConnector()) + orchestrator = SearchOrchestrator(registry) + + response = asyncio.run(orchestrator.search(SearchRequest(query="book"))) + + assert response.results + assert response.results[0].id.startswith("mock:") + assert response.sources_used == ["mock"] + + +def test_search_orchestrator_reports_unknown_source_as_partial_failure() -> None: + registry = ConnectorRegistry() + registry.register(MockConnector()) + orchestrator = SearchOrchestrator(registry) + + response = asyncio.run(orchestrator.search(SearchRequest(query="book", sources=["unknown"]))) + + assert response.results == [] + assert response.sources_used == [] + assert response.partial_failures + assert response.partial_failures[0].source == "unknown" + assert response.partial_failures[0].status == "error" diff --git a/app/tests/unit/backend/test_url_validation.py b/app/tests/unit/backend/test_url_validation.py new file mode 100644 index 0000000000000000000000000000000000000000..9563a94ab4b433f61b72442ba98a3f88631c0632 --- /dev/null +++ b/app/tests/unit/backend/test_url_validation.py @@ -0,0 +1,18 @@ +from app.utils.errors import BadRequestError +from app.utils.url_validation import validate_http_url + + +def test_validate_http_url_rejects_localhost() -> None: + try: + validate_http_url("http://localhost:8000/test") + assert False, "localhost should be rejected" + except BadRequestError: + pass + + +def test_validate_http_url_rejects_unsupported_scheme() -> None: + try: + validate_http_url("file:///tmp/a") + assert False, "unsupported scheme should be rejected" + except BadRequestError: + pass diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..5fe2be98bd3c651048b5c5d8d61a98fb491eb463 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "clafoutis" +version = "0.1.0" +description = "Backend socle for Clafoutis" +requires-python = ">=3.11" +dependencies = [ + "fastapi>=0.110,<1.0", + "uvicorn>=0.27,<1.0", + "pydantic>=2.6,<3.0", + "pydantic-settings>=2.2,<3.0", + "httpx>=0.27,<1.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=8.0,<9.0", + "pytest-asyncio>=0.23,<1.0", +] + +[tool.pytest.ini_options] +asyncio_mode = "auto" +pythonpath = [ + "app/backend", +] +testpaths = [ + "app/tests", +]