"""Tests vision analīzes endpointiem.""" from __future__ import annotations import base64 import io from unittest.mock import AsyncMock, patch import pytest from PIL import Image from maris_core.memory_context import ConversationMemoryStore from maris_core.vision.analyze import ( _LIVE_CAMERAS, BoundingBox, FrameAnalysis, FrameSequenceRequest, ImageSourceRequest, LiveCameraConfigRequest, LiveCameraConnectRequest, LiveFrameRequest, LiveSessionCommandRequest, OCRTextBlock, VisionDetection, analyze_frames, analyze_image, configure_live_camera, connect_live_camera, estimate_pose, list_live_cameras, ocr_image, process_live_frame, scene_timeline, start_live_camera, track_objects, ) def _sample_image_base64( size: tuple[int, int] = (16, 12), color: tuple[int, int, int] = (220, 80, 80), ) -> str: image = Image.new("RGB", size, color) buffer = io.BytesIO() image.save(buffer, format="PNG") return base64.b64encode(buffer.getvalue()).decode() @pytest.mark.asyncio async def test_analyze_image_fallback_without_detector() -> None: with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._detect_image_payload", return_value=([], "fallback/basic-image-summary", True), ), ): response = await analyze_image(ImageSourceRequest(image_base64=_sample_image_base64())) assert response.fallback_used is True assert response.width == 16 assert response.height == 12 assert response.detections == [] assert "Fallback vision summary" in response.summary @pytest.mark.asyncio async def test_analyze_image_maps_detector_output() -> None: detections = [ VisionDetection( label="person", confidence=0.93, bbox=BoundingBox(x=1, y=2, width=10, height=12), ), VisionDetection( label="cell phone", confidence=0.42, bbox=BoundingBox(x=4, y=5, width=4, height=5), ), ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._detect_image_payload", return_value=(detections, "facebook/detr-resnet-50", False), ), ): response = await analyze_image( ImageSourceRequest(image_base64=_sample_image_base64(size=(20, 18))) ) assert response.fallback_used is False assert response.model == "facebook/detr-resnet-50" assert [item.label for item in response.detections] == ["person", "cell phone"] assert "person" in response.summary @pytest.mark.asyncio async def test_analyze_image_persists_summary_into_shared_session_memory() -> None: memory = ConversationMemoryStore() with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._detect_image_payload", return_value=([], "fallback/basic-image-summary", True), ), patch("maris_core.vision.analyze.memory_store", memory), ): response = await analyze_image( ImageSourceRequest( image_base64=_sample_image_base64(), session_id="vision-session", camera_id="cam-1", ) ) matches = memory.retrieve_relevant_context("vision-session", "Fallback vision summary") assert matches assert response.summary == matches[0].content @pytest.mark.asyncio async def test_ocr_image_returns_text_blocks() -> None: blocks = [ OCRTextBlock( text="MARIS AI", confidence=0.88, bbox=BoundingBox(x=2, y=3, width=40, height=12), language="lv", ) ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._extract_ocr_blocks", return_value=(blocks, "pytesseract", False), ), ): response = await ocr_image( ImageSourceRequest(image_base64=_sample_image_base64(size=(64, 32))) ) assert response.model == "pytesseract" assert response.fallback_used is False assert response.results[0].text == "MARIS AI" assert "OCR pabeigts" in response.summary @pytest.mark.asyncio async def test_pose_estimate_derives_keypoints_from_person_detections() -> None: detections = [ VisionDetection( label="person", confidence=0.91, bbox=BoundingBox(x=10, y=20, width=40, height=120), ) ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._detect_image_payload", return_value=(detections, "facebook/detr-resnet-50", False), ), ): response = await estimate_pose( ImageSourceRequest(image_base64=_sample_image_base64(size=(100, 160))) ) assert response.model == "bbox-derived-pose-v1" assert response.fallback_used is False assert len(response.poses) == 1 assert any(point.name == "nose" for point in response.poses[0].keypoints) assert any(connection.start == "nose" for connection in response.poses[0].connections) @pytest.mark.asyncio async def test_tracking_builds_tracks_from_frame_analysis() -> None: frames = [Image.new("RGB", (80, 60), (20, 20, 20)), Image.new("RGB", (80, 60), (30, 30, 30))] analyses = [ FrameAnalysis( frame_index=0, summary="frame 0", detections=[ VisionDetection( label="person", confidence=0.9, bbox=BoundingBox(x=10, y=10, width=20, height=40), ) ], dominant_labels=["person"], brightness=90.0, ), FrameAnalysis( frame_index=1, summary="frame 1", detections=[ VisionDetection( label="person", confidence=0.87, bbox=BoundingBox(x=14, y=11, width=20, height=40), ) ], dominant_labels=["person"], brightness=94.0, ), ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch("maris_core.vision.analyze._load_frames", new=AsyncMock(return_value=frames)), patch( "maris_core.vision.analyze._build_frame_analysis", return_value=(analyses, "facebook/detr-resnet-50", False), ), ): response = await track_objects( FrameSequenceRequest(frames_base64=[_sample_image_base64(), _sample_image_base64()]) ) assert response.frame_count == 2 assert len(response.tracks) == 1 assert response.tracks[0].track_id == 1 assert len(response.tracks[0].observations) == 2 @pytest.mark.asyncio async def test_scene_timeline_groups_frames_into_scenes() -> None: frames = [Image.new("RGB", (80, 60), (20, 20, 20)) for _ in range(3)] analyses = [ FrameAnalysis( frame_index=0, summary="scene a", detections=[], dominant_labels=["person"], brightness=80.0, ), FrameAnalysis( frame_index=1, summary="scene a2", detections=[], dominant_labels=["person", "chair"], brightness=84.0, ), FrameAnalysis( frame_index=2, summary="scene b", detections=[], dominant_labels=["car"], brightness=150.0, ), ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch("maris_core.vision.analyze._load_frames", new=AsyncMock(return_value=frames)), patch( "maris_core.vision.analyze._build_frame_analysis", return_value=(analyses, "facebook/detr-resnet-50", False), ), ): response = await scene_timeline( FrameSequenceRequest( frames_base64=[ _sample_image_base64(), _sample_image_base64(), _sample_image_base64(), ] ) ) assert response.frame_count == 3 assert len(response.scenes) == 2 assert response.scenes[0].start_frame == 0 assert response.scenes[1].start_frame == 2 assert "Scene timeline pabeigta" in response.summary @pytest.mark.asyncio async def test_frame_analysis_returns_per_frame_summaries() -> None: frames = [Image.new("RGB", (80, 60), (20, 20, 20))] analyses = [ FrameAnalysis( frame_index=0, summary="frame one", detections=[], dominant_labels=[], brightness=42.0, ) ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch("maris_core.vision.analyze._load_frames", new=AsyncMock(return_value=frames)), patch( "maris_core.vision.analyze._build_frame_analysis", return_value=(analyses, "fallback/basic-image-summary", True), ), ): response = await analyze_frames( FrameSequenceRequest(frames_base64=[_sample_image_base64()]) ) assert response.frame_count == 1 assert response.frames[0].summary == "frame one" assert response.fallback_used is True @pytest.mark.asyncio async def test_live_camera_connect_and_list_registry() -> None: _LIVE_CAMERAS.clear() with patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ): response = await connect_live_camera( LiveCameraConnectRequest( camera_id="cam-browser", source_type="browser_camera", transport="getUserMedia", device_id="device-1", ) ) registry = await list_live_cameras() assert response.camera.camera_id == "cam-browser" assert response.camera.status == "connected" assert registry.cameras[0].camera_id == "cam-browser" assert registry.cameras[0].health.connected is True @pytest.mark.asyncio async def test_live_frame_processing_updates_tracks_timeline_and_events() -> None: _LIVE_CAMERAS.clear() sample = _sample_image_base64(size=(40, 30)) detections = [ VisionDetection( label="person", confidence=0.91, bbox=BoundingBox(x=5, y=4, width=18, height=22), ) ] with ( patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ), patch( "maris_core.vision.analyze._frame_detections", return_value=(detections, "facebook/detr-resnet-50", False), ), patch( "maris_core.vision.analyze._extract_ocr_blocks", return_value=([], "pytesseract", False), ), ): await connect_live_camera( LiveCameraConnectRequest( camera_id="cam-live", source_type="browser_camera", transport="getUserMedia", device_id="device-1", ) ) await start_live_camera(LiveSessionCommandRequest(camera_id="cam-live")) response = await process_live_frame( LiveFrameRequest(camera_id="cam-live", image_base64=sample, frame_index=0) ) assert response.camera.status == "streaming" assert response.camera.health.analysis_active is True assert response.camera.latest_result["detections"][0]["label"] == "person" assert response.camera.tracks[0].label == "person" assert response.events[0].type == "analysis_result" @pytest.mark.asyncio async def test_live_camera_config_updates_roi_and_rules() -> None: _LIVE_CAMERAS.clear() with patch( "maris_core.utils.hf_integration.HFIntegration.save_generation", new_callable=AsyncMock, ): await connect_live_camera( LiveCameraConnectRequest( camera_id="cam-config", source_type="ip_camera", transport="rtsp", url="rtsp://camera.local/live", ) ) response = await configure_live_camera( LiveCameraConfigRequest( camera_id="cam-config", roi_zones=[{"label": "gate", "x": 10, "y": 20, "width": 30, "height": 40}], alert_rules=["person_zone:person:1:0.7"], fps_budget=8.0, ) ) assert response.camera.roi_zones[0]["label"] == "gate" assert response.camera.alert_rules == ["person_zone:person:1:0.7"] assert response.camera.fps_budget == 8.0