File size: 13,310 Bytes
aaa787c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb59bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8492a0e
cb59bc2
aaa787c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99f61a7
 
e297f75
 
 
99f61a7
 
e297f75
99f61a7
e297f75
 
 
 
 
 
99f61a7
e297f75
99f61a7
 
e297f75
 
 
 
99f61a7
e297f75
 
99f61a7
e297f75
 
 
 
 
 
99f61a7
 
e297f75
 
 
99f61a7
 
e297f75
 
99f61a7
e297f75
 
 
 
 
 
aaa787c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb59bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
8492a0e
cb59bc2
aaa787c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb59bc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aaa787c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
"""Tests for TelemetryTracker."""

import json
import tempfile
import time
from pathlib import Path

import pytest

from mosaic.telemetry import TelemetryTracker, TelemetryConfig


@pytest.fixture
def temp_dir():
    """Create a temporary directory for telemetry storage."""
    with tempfile.TemporaryDirectory() as tmpdir:
        yield Path(tmpdir)


@pytest.fixture
def tracker(temp_dir):
    """Create a fresh tracker instance for each test."""
    TelemetryTracker.reset_instance()
    config = TelemetryConfig(
        enabled=True,
        base_dir=temp_dir,
        hourly_rate=0.40,
        idle_timeout_min=30,
        is_hf_spaces=False,
    )
    tracker = TelemetryTracker.get_instance(config)
    yield tracker
    TelemetryTracker.reset_instance()


class TestTelemetryTrackerSingleton:
    """Tests for singleton behavior."""

    def test_get_instance_returns_same_instance(self, temp_dir):
        """Test that get_instance always returns the same instance."""
        TelemetryTracker.reset_instance()
        config = TelemetryConfig(base_dir=temp_dir)
        tracker1 = TelemetryTracker.get_instance(config)
        tracker2 = TelemetryTracker.get_instance()

        assert tracker1 is tracker2
        TelemetryTracker.reset_instance()

    def test_reset_instance_creates_new_instance(self, temp_dir):
        """Test that reset_instance allows creating a new instance."""
        TelemetryTracker.reset_instance()
        config = TelemetryConfig(base_dir=temp_dir)
        tracker1 = TelemetryTracker.get_instance(config)

        TelemetryTracker.reset_instance()
        tracker2 = TelemetryTracker.get_instance(config)

        assert tracker1 is not tracker2
        TelemetryTracker.reset_instance()


class TestAppSessionEvents:
    """Tests for app session event logging."""

    def test_log_app_start(self, tracker, temp_dir):
        """Test logging app start event."""
        tracker.log_app_start()

        session_files = list((temp_dir / "daily").glob("session_*.jsonl"))
        assert len(session_files) == 1

        with open(session_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["event_type"] == "app_start"
        assert event["uptime_sec"] == 0.0
        assert event["analysis_count"] == 0
        assert event["estimated_cost_usd"] == 0.0

    def test_log_heartbeat(self, tracker, temp_dir):
        """Test logging heartbeat event."""
        tracker.log_app_start()
        time.sleep(0.1)  # Small delay to accumulate uptime
        tracker.log_heartbeat()

        session_files = list((temp_dir / "daily").glob("session_*.jsonl"))
        assert len(session_files) == 1

        with open(session_files[0]) as f:
            lines = f.readlines()

        assert len(lines) == 2
        heartbeat = json.loads(lines[1])
        assert heartbeat["event_type"] == "heartbeat"
        assert heartbeat["uptime_sec"] > 0

    def test_log_app_shutdown(self, tracker, temp_dir):
        """Test logging app shutdown event."""
        tracker.log_app_start()
        tracker.log_app_shutdown()

        session_files = list((temp_dir / "daily").glob("session_*.jsonl"))
        with open(session_files[0]) as f:
            lines = f.readlines()

        shutdown = json.loads(lines[-1])
        assert shutdown["event_type"] == "app_shutdown"


class TestUsageEvents:
    """Tests for usage event logging."""

    def test_log_analysis_start(self, tracker, temp_dir):
        """Test logging analysis start event."""
        tracker.log_usage_event(
            event_type="analysis_start",
            analysis_id="test-123",
            slide_count=5,
            session_hash="abc123",
            site_type="Primary",
            cancer_subtype="LUAD",
            seg_config="Biopsy",
            gpu_type="T4",
        )

        usage_files = list((temp_dir / "daily").glob("usage_*.jsonl"))
        assert len(usage_files) == 1

        with open(usage_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["event_type"] == "analysis_start"
        assert event["analysis_id"] == "test-123"
        assert event["slide_count"] == 5
        assert event["site_type"] == "Primary"
        # Session hash should be hashed
        assert event["session_hash"] is not None
        assert event["session_hash"] != "abc123"

    def test_log_usage_event_with_user_info(self, tracker, temp_dir):
        """Test that is_logged_in and hf_username are persisted in usage events."""
        tracker.log_usage_event(
            event_type="analysis_start",
            analysis_id="test-user-info",
            slide_count=1,
            is_logged_in=True,
            hf_username="testuser",
        )

        usage_files = list((temp_dir / "daily").glob("usage_*.jsonl"))
        with open(usage_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["is_logged_in"] is True
        assert event["hf_username"] == "testuser"

    def test_log_analysis_complete(self, tracker, temp_dir):
        """Test logging analysis complete event."""
        tracker.log_app_start()
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-123",
            slide_count=5,
            duration_sec=120.5,
            success=True,
            gpu_type="T4",
        )

        usage_files = list((temp_dir / "daily").glob("usage_*.jsonl"))
        with open(usage_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["event_type"] == "analysis_complete"
        assert event["duration_sec"] == 120.5
        assert event["success"] is True

    def test_analysis_complete_updates_session_metrics(self, tracker):
        """Test that analysis complete updates session metrics."""
        tracker.log_app_start()
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-1",
            slide_count=3,
            duration_sec=60.0,
            success=True,
        )
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-2",
            slide_count=2,
            duration_sec=45.0,
            success=True,
        )

        metrics = tracker._get_session_metrics()
        assert metrics["analysis_count"] == 2
        assert metrics["analysis_time_sec"] == 105.0

    def test_log_usage_event_with_cached_slides(self, tracker, temp_dir):
        """Test that cached_slide_count is persisted in usage events."""
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-cache",
            slide_count=10,
            duration_sec=50.0,
            success=True,
            cached_slide_count=3,
        )

        usage_files = list((temp_dir / "daily").glob("usage_*.jsonl"))
        with open(usage_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["cached_slide_count"] == 3

    def test_fully_cached_analysis_excludes_duration_from_metrics(self, tracker):
        """Test that fully cached analyses don't count toward analysis_time_sec."""
        tracker.log_app_start()
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-cache",
            slide_count=5,
            duration_sec=2.0,
            success=True,
            cached_slide_count=5,  # All slides cached
        )

        metrics = tracker._get_session_metrics()
        assert metrics["analysis_count"] == 1
        assert metrics["analysis_time_sec"] == 0.0

    def test_mixed_cache_analysis_includes_duration_in_metrics(self, tracker):
        """Test that mixed analyses (some cached) count toward analysis_time_sec."""
        tracker.log_app_start()
        tracker.log_usage_event(
            event_type="analysis_complete",
            analysis_id="test-mixed",
            slide_count=10,
            duration_sec=60.0,
            success=True,
            cached_slide_count=3,  # Only 3/10 cached
        )

        metrics = tracker._get_session_metrics()
        assert metrics["analysis_count"] == 1
        assert metrics["analysis_time_sec"] == 60.0


class TestResourceEvents:
    """Tests for resource event logging."""

    def test_log_resource_event(self, tracker, temp_dir):
        """Test logging resource event."""
        tracker.log_resource_event(
            analysis_id="test-123",
            session_hash="abc123",
            total_duration_sec=180.5,
            tile_count=1000,
            filtered_tile_count=800,
            gpu_type="T4",
            peak_gpu_memory_gb=12.5,
        )

        resource_files = list((temp_dir / "daily").glob("resource_*.jsonl"))
        assert len(resource_files) == 1

        with open(resource_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["analysis_id"] == "test-123"
        assert event["total_duration_sec"] == 180.5
        assert event["tile_count"] == 1000
        assert event["peak_gpu_memory_gb"] == 12.5

    def test_log_resource_event_with_user_info(self, tracker, temp_dir):
        """Test that is_logged_in and hf_username are persisted in resource events."""
        tracker.log_resource_event(
            analysis_id="test-user-info",
            total_duration_sec=60.0,
            is_logged_in=True,
            hf_username="testuser",
        )

        resource_files = list((temp_dir / "daily").glob("resource_*.jsonl"))
        with open(resource_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["is_logged_in"] is True
        assert event["hf_username"] == "testuser"


class TestFailureEvents:
    """Tests for failure event logging."""

    def test_log_failure_event(self, tracker, temp_dir):
        """Test logging failure event."""
        tracker.log_failure_event(
            error_type="ValueError",
            error_message="Invalid slide format",
            error_stage="upload",
            analysis_id="test-123",
            slide_count=1,
            gpu_type="T4",
        )

        failure_files = list((temp_dir / "daily").glob("failure_*.jsonl"))
        assert len(failure_files) == 1

        with open(failure_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["error_type"] == "ValueError"
        assert event["error_stage"] == "upload"
        assert event["analysis_id"] == "test-123"

    def test_log_failure_event_with_user_info(self, tracker, temp_dir):
        """Test that is_logged_in and hf_username are persisted in failure events."""
        tracker.log_failure_event(
            error_type="ValueError",
            error_message="test error",
            error_stage="upload",
            is_logged_in=False,
            hf_username=None,
        )

        failure_files = list((temp_dir / "daily").glob("failure_*.jsonl"))
        with open(failure_files[0]) as f:
            event = json.loads(f.read().strip())

        assert event["is_logged_in"] is False
        assert event["hf_username"] is None

    def test_error_message_sanitized(self, tracker, temp_dir):
        """Test that error messages are sanitized."""
        tracker.log_failure_event(
            error_type="FileNotFoundError",
            error_message="File not found: /home/user/secret/data.svs",
            error_stage="upload",
        )

        failure_files = list((temp_dir / "daily").glob("failure_*.jsonl"))
        with open(failure_files[0]) as f:
            event = json.loads(f.read().strip())

        # Path should be sanitized
        assert "/home/user" not in event["error_message"]
        assert "[PATH]" in event["error_message"]


class TestDisabledTelemetry:
    """Tests for disabled telemetry."""

    def test_disabled_telemetry_no_events(self, temp_dir):
        """Test that disabled telemetry doesn't write events."""
        TelemetryTracker.reset_instance()
        config = TelemetryConfig(
            enabled=False,
            base_dir=temp_dir,
        )
        tracker = TelemetryTracker.get_instance(config)

        tracker.log_app_start()
        tracker.log_usage_event(
            event_type="analysis_start",
            analysis_id="test",
            slide_count=1,
        )

        # No files should be created
        all_files = list((temp_dir / "daily").glob("*.jsonl"))
        assert len(all_files) == 0
        TelemetryTracker.reset_instance()


class TestCostCalculation:
    """Tests for cost calculation."""

    def test_cost_calculation(self, tracker):
        """Test cost calculation formula."""
        # Test with 1 hour of uptime at $0.40/hr
        cost = tracker._calculate_cost(3600)  # 1 hour in seconds
        assert cost == 0.40

    def test_cost_in_session_events(self, tracker, temp_dir):
        """Test that cost is included in session events."""
        tracker.log_app_start()
        time.sleep(0.1)
        tracker.log_app_shutdown()

        session_files = list((temp_dir / "daily").glob("session_*.jsonl"))
        with open(session_files[0]) as f:
            lines = f.readlines()

        shutdown = json.loads(lines[-1])
        assert "estimated_cost_usd" in shutdown
        assert shutdown["estimated_cost_usd"] >= 0