yakilee Claude Opus 4.6 commited on
Commit
c986538
·
1 Parent(s): cb1a9ab

feat(mcp): enhance search with intervention and eligibility dimensions

Browse files

- search_direct() now uses structured query params (query.cond,
query.intr, query.term) instead of single query.term
- search_multi_variant() adds intervention-specific (variant 4) and
eligibility-keyword (variant 5) search variants
- search() (MCP path) includes interventions in query

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. trialpath/services/mcp_client.py +49 -12
trialpath/services/mcp_client.py CHANGED
@@ -112,6 +112,8 @@ class ClinicalTrialsMCPClient:
112
  query_parts.append(anchors.subtype)
113
  if anchors.biomarkers:
114
  query_parts.extend(anchors.biomarkers)
 
 
115
 
116
  query = " ".join(query_parts)
117
 
@@ -283,24 +285,37 @@ class ClinicalTrialsMCPClient:
283
  async def search_direct(self, anchors: SearchAnchors) -> list[dict]:
284
  """Search ClinicalTrials.gov API v2 directly with retry.
285
 
286
- Uses requests instead of httpx because ClinicalTrials.gov blocks
287
- httpx via TLS fingerprinting (403 Forbidden).
288
- Retries on ConnectionError/Timeout with exponential backoff.
 
 
 
 
289
  """
290
  import requests
291
 
292
- query_parts = [anchors.condition]
293
- if anchors.biomarkers:
294
- query_parts.append(anchors.biomarkers[0].split()[0])
295
-
296
- query = " ".join(query_parts)
297
- if len(query) > 100:
298
- query = anchors.condition
299
 
300
  params: dict = {
301
- "query.term": query,
302
  "pageSize": 50,
303
  }
 
 
 
 
 
 
 
 
 
 
 
304
  if anchors.geography:
305
  params["query.locn"] = anchors.geography.country
306
 
@@ -349,9 +364,11 @@ class ClinicalTrialsMCPClient:
349
  """Fire multiple search variants in parallel for broader recall.
350
 
351
  Variants:
352
- 1. Full query (condition + biomarkers) -- high precision
353
  2. Condition-only query -- broader recall
354
  3. Per-biomarker queries (top 2) -- catches niche trials
 
 
355
 
356
  Results are merged and deduplicated by NCT ID.
357
  """
@@ -376,6 +393,26 @@ class ClinicalTrialsMCPClient:
376
  bio_anchors.geography = anchors.geography
377
  queries.append(self.search_direct(bio_anchors))
378
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  # Fire all variants in parallel
380
  results = await asyncio.gather(*queries, return_exceptions=True)
381
 
 
112
  query_parts.append(anchors.subtype)
113
  if anchors.biomarkers:
114
  query_parts.extend(anchors.biomarkers)
115
+ if anchors.interventions:
116
+ query_parts.extend(anchors.interventions[:2])
117
 
118
  query = " ".join(query_parts)
119
 
 
285
  async def search_direct(self, anchors: SearchAnchors) -> list[dict]:
286
  """Search ClinicalTrials.gov API v2 directly with retry.
287
 
288
+ Uses structured query parameters:
289
+ - query.cond: condition (+ subtype)
290
+ - query.intr: first intervention (drug name)
291
+ - query.term: eligibility keywords (ECOG, stage, biomarkers)
292
+
293
+ Falls back to query.term-only when new fields are absent (backward
294
+ compatible).
295
  """
296
  import requests
297
 
298
+ # Build condition query (query.cond)
299
+ cond_parts = [anchors.condition]
300
+ if anchors.subtype:
301
+ cond_parts.append(anchors.subtype)
302
+ cond_query = " ".join(cond_parts)
 
 
303
 
304
  params: dict = {
305
+ "query.cond": cond_query,
306
  "pageSize": 50,
307
  }
308
+
309
+ # Intervention query (query.intr) — first drug name
310
+ if anchors.interventions:
311
+ params["query.intr"] = anchors.interventions[0]
312
+
313
+ # General term query (query.term) — eligibility keywords or biomarker fallback
314
+ if anchors.eligibility_keywords:
315
+ params["query.term"] = " ".join(anchors.eligibility_keywords)
316
+ elif anchors.biomarkers:
317
+ params["query.term"] = anchors.biomarkers[0].split()[0]
318
+
319
  if anchors.geography:
320
  params["query.locn"] = anchors.geography.country
321
 
 
364
  """Fire multiple search variants in parallel for broader recall.
365
 
366
  Variants:
367
+ 1. Full query (condition + interventions + eligibility keywords)
368
  2. Condition-only query -- broader recall
369
  3. Per-biomarker queries (top 2) -- catches niche trials
370
+ 4. Per-intervention queries -- finds drug-specific trials
371
+ 5. Condition + eligibility keywords -- pre-filters by clinical features
372
 
373
  Results are merged and deduplicated by NCT ID.
374
  """
 
393
  bio_anchors.geography = anchors.geography
394
  queries.append(self.search_direct(bio_anchors))
395
 
396
+ # Variant 4: Per-intervention queries (drug-specific trials)
397
+ for intervention in (anchors.interventions or [])[:3]:
398
+ intr_anchors = SearchAnchors(
399
+ condition=anchors.condition,
400
+ interventions=[intervention],
401
+ )
402
+ if anchors.geography:
403
+ intr_anchors.geography = anchors.geography
404
+ queries.append(self.search_direct(intr_anchors))
405
+
406
+ # Variant 5: Condition + eligibility keywords (clinical feature pre-filter)
407
+ if anchors.eligibility_keywords:
408
+ elig_anchors = SearchAnchors(
409
+ condition=anchors.condition,
410
+ eligibility_keywords=anchors.eligibility_keywords,
411
+ )
412
+ if anchors.geography:
413
+ elig_anchors.geography = anchors.geography
414
+ queries.append(self.search_direct(elig_anchors))
415
+
416
  # Fire all variants in parallel
417
  results = await asyncio.gather(*queries, return_exceptions=True)
418