raylim Claude (claude-sonnet-4.5) commited on
Commit
248a8b1
·
unverified ·
1 Parent(s): 4f8c1cd

test: add comprehensive tests for OncoTree API mapping

Browse files

Added TestOncoTreeMapping class with 8 tests covering:
- Precomputed TCGA map verification
- Direct match in CANCER_TYPE_TO_INT_MAP
- Ancestor lookup via OncoTree API
- No match handling
- API success and failure paths
- Caching behavior of _oncotree_ancestors_cache
- 404 response handling

Test coverage for src/mosaic/tcga.py improved from 33% to 78%.
All 265 project tests pass with no regressions.

Co-Authored-By: Claude (claude-sonnet-4.5) <noreply@anthropic.com>

Files changed (1) hide show
  1. tests/test_tcga.py +122 -0
tests/test_tcga.py CHANGED
@@ -28,6 +28,10 @@ from mosaic.tcga import (
28
  _map_gdc_site_to_tissue_site,
29
  _map_project_id_to_cancer_subtype,
30
  _map_sample_type_to_site_type,
 
 
 
 
31
  )
32
 
33
 
@@ -223,6 +227,124 @@ class TestMetadataMapping:
223
  assert _map_sample_type_to_site_type("") == "Primary"
224
 
225
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  class TestConvertGDCMetadata:
227
  """Tests for full GDC metadata conversion."""
228
 
 
28
  _map_gdc_site_to_tissue_site,
29
  _map_project_id_to_cancer_subtype,
30
  _map_sample_type_to_site_type,
31
+ _find_paladin_subtype_for_tcga,
32
+ _get_oncotree_ancestors,
33
+ _TCGA_TO_PALADIN_MAP,
34
+ _oncotree_ancestors_cache,
35
  )
36
 
37
 
 
227
  assert _map_sample_type_to_site_type("") == "Primary"
228
 
229
 
230
+ class TestOncoTreeMapping:
231
+ """Tests for OncoTree hierarchy and cancer subtype mapping."""
232
+
233
+ def test_precomputed_tcga_map_coverage(self):
234
+ """Test that the precomputed TCGA map has expected common codes."""
235
+ # Verify some of the most common TCGA codes are in the precomputed map
236
+ common_codes = ["LUAD", "BRCA", "PRAD", "COAD", "BLCA", "LUSC", "HNSC", "STAD"]
237
+ for code in common_codes:
238
+ assert code in _TCGA_TO_PALADIN_MAP, f"{code} should be in precomputed map"
239
+
240
+ def test_find_paladin_subtype_direct_match(self):
241
+ """Test _find_paladin_subtype_for_tcga with direct match in CANCER_TYPE_TO_INT_MAP."""
242
+ # LUAD is in CANCER_TYPE_TO_INT_MAP and precomputed map
243
+ result = _find_paladin_subtype_for_tcga("LUAD")
244
+ assert result == "LUAD"
245
+
246
+ # BRCA is in CANCER_TYPE_TO_INT_MAP and precomputed map
247
+ result = _find_paladin_subtype_for_tcga("BRCA")
248
+ assert result == "BRCA"
249
+
250
+ @patch("mosaic.tcga._get_oncotree_ancestors")
251
+ def test_find_paladin_subtype_via_ancestors(self, mock_get_ancestors):
252
+ """Test _find_paladin_subtype_for_tcga using OncoTree ancestor lookup.
253
+
254
+ Tests that if TCGA code is not in CANCER_TYPE_TO_INT_MAP,
255
+ it checks ancestors and returns the first matching Paladin subtype.
256
+ """
257
+ # Mock a TCGA code "TESTCODE" that has "LUAD" as an ancestor
258
+ mock_get_ancestors.return_value = ["LUAD", "NSCLC", "TISSUE"]
259
+
260
+ result = _find_paladin_subtype_for_tcga("TESTCODE")
261
+ assert result == "LUAD"
262
+ mock_get_ancestors.assert_called_once_with("TESTCODE")
263
+
264
+ @patch("mosaic.tcga._get_oncotree_ancestors")
265
+ def test_find_paladin_subtype_no_match(self, mock_get_ancestors):
266
+ """Test _find_paladin_subtype_for_tcga with no matching Paladin subtype."""
267
+ # Mock a TCGA code with no Paladin ancestors
268
+ mock_get_ancestors.return_value = ["UNKNOWN_PARENT", "TISSUE"]
269
+
270
+ result = _find_paladin_subtype_for_tcga("UNKNOWN_CODE")
271
+ assert result is None
272
+
273
+ @patch("mosaic.tcga.requests.get")
274
+ def test_get_oncotree_ancestors_success(self, mock_requests_get):
275
+ """Test _get_oncotree_ancestors with successful API calls."""
276
+ # Clear cache for this test
277
+ _oncotree_ancestors_cache.clear()
278
+
279
+ # Mock the OncoTree API responses
280
+ # IDC -> BRCA -> TISSUE
281
+ mock_responses = [
282
+ # First call for "IDC"
283
+ Mock(
284
+ status_code=200,
285
+ json=lambda: [{"code": "IDC", "parent": "BRCA"}]
286
+ ),
287
+ # Second call for "BRCA"
288
+ Mock(
289
+ status_code=200,
290
+ json=lambda: [{"code": "BRCA", "parent": "TISSUE"}]
291
+ ),
292
+ ]
293
+ mock_requests_get.side_effect = mock_responses
294
+
295
+ result = _get_oncotree_ancestors("IDC")
296
+ assert result == ["BRCA"] # Only returns ancestors, not TISSUE
297
+
298
+ # Verify caching
299
+ assert "IDC" in _oncotree_ancestors_cache
300
+ assert _oncotree_ancestors_cache["IDC"] == ["BRCA"]
301
+
302
+ @patch("mosaic.tcga.requests.get")
303
+ def test_get_oncotree_ancestors_api_failure(self, mock_requests_get):
304
+ """Test _get_oncotree_ancestors handles API failures gracefully."""
305
+ # Clear cache for this test
306
+ _oncotree_ancestors_cache.clear()
307
+
308
+ # Mock API failure
309
+ mock_requests_get.side_effect = Exception("API error")
310
+
311
+ result = _get_oncotree_ancestors("TESTCODE")
312
+ assert result == [] # Should return empty list on error
313
+
314
+ @patch("mosaic.tcga.requests.get")
315
+ def test_get_oncotree_ancestors_caching(self, mock_requests_get):
316
+ """Test _get_oncotree_ancestors uses cache to avoid repeated API calls."""
317
+ # Clear cache for this test
318
+ _oncotree_ancestors_cache.clear()
319
+
320
+ # Mock response - simulate a single-level hierarchy (IDC -> TISSUE)
321
+ mock_requests_get.return_value = Mock(
322
+ status_code=200,
323
+ json=lambda: [{"code": "IDC", "parent": "TISSUE"}]
324
+ )
325
+
326
+ # First call should hit the API
327
+ result1 = _get_oncotree_ancestors("IDC")
328
+ initial_call_count = mock_requests_get.call_count
329
+ assert initial_call_count >= 1 # At least one API call
330
+
331
+ # Second call should use cache (no additional API calls)
332
+ result2 = _get_oncotree_ancestors("IDC")
333
+ assert mock_requests_get.call_count == initial_call_count # No additional calls
334
+ assert result1 == result2
335
+
336
+ @patch("mosaic.tcga.requests.get")
337
+ def test_get_oncotree_ancestors_not_found(self, mock_requests_get):
338
+ """Test _get_oncotree_ancestors handles 404 responses."""
339
+ # Clear cache for this test
340
+ _oncotree_ancestors_cache.clear()
341
+
342
+ mock_requests_get.return_value = Mock(status_code=404)
343
+
344
+ result = _get_oncotree_ancestors("INVALID_CODE")
345
+ assert result == []
346
+
347
+
348
  class TestConvertGDCMetadata:
349
  """Tests for full GDC metadata conversion."""
350