mwmathis's picture
default country changed
68f7bb3 verified
"""
Gradio app for Clinical Trial Matcher
For Hugging Face Spaces deployment
"""
import gradio as gr
import requests
import re
import os
from typing import Tuple, Optional
# ClinicalTrials.gov API base URL
CLINICALTRIALS_API_BASE = "https://clinicaltrials.gov/api/v2/studies"
# Import LLM service (optional - will gracefully degrade if not available)
LLM_AVAILABLE = False
get_llm_service = None
# Check environment variables
USE_HF_API = os.environ.get('USE_HF_API', 'false').lower() == 'true'
HF_TOKEN = os.environ.get('HUGGINGFACE_API_TOKEN', '')
print(f"Environment check - USE_HF_API: {USE_HF_API}, HF_TOKEN set: {bool(HF_TOKEN)}")
try:
from llm_service import get_llm_service
LLM_AVAILABLE = True
print("LLM service imported successfully.")
except ImportError as e:
print(f"LLM service not available. Ranking will be disabled. Error: {str(e)}")
except Exception as e:
print(f"Error importing LLM service: {str(e)}")
import traceback
traceback.print_exc()
def search_trials(query: str, country: str, status: str, ranking_terms: str = "") -> Tuple[str, int, Optional[str]]:
"""
Search clinical trials and return formatted results
Returns:
tuple: (results_html, total_count, llm_model_name)
"""
if not query:
return "Please enter search keywords.", 0, None
try:
# Build API request parameters
params = {
'format': 'json',
'pageSize': 20
}
# Build query filter
query_parts = []
if query:
query_parts.append(query)
if country:
query_parts.append(f'AREA[LocationCountry]{country}')
if status:
query_parts.append(f'AREA[OverallStatus]{status}')
if query_parts:
params['query.term'] = ' AND '.join(query_parts)
# Make request to ClinicalTrials.gov API
response = requests.get(CLINICALTRIALS_API_BASE, params=params, timeout=30)
response.raise_for_status()
data = response.json()
# Extract and format relevant information
studies = []
if 'studies' in data:
for study in data['studies']:
protocol_section = study.get('protocolSection', {})
identification = protocol_section.get('identificationModule', {})
nct_id = identification.get('nctId', '')
organization = identification.get('organization', {})
sponsor = organization.get('fullName', '') if organization else ''
status_module = protocol_section.get('statusModule', {})
description = protocol_section.get('descriptionModule', {})
conditions = protocol_section.get('conditionsModule', {})
locations_module = protocol_section.get('contactsLocationsModule', {})
# Fetch full study details to get ALL locations and eligibility criteria
locations = []
inclusion_criteria = []
exclusion_criteria = []
detail_data = None
if nct_id:
try:
detail_url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}"
detail_response = requests.get(detail_url, params={'format': 'json'}, timeout=60)
detail_response.raise_for_status()
detail_data = detail_response.json()
detail_protocol = detail_data.get('protocolSection', {})
detail_locations_module = detail_protocol.get('contactsLocationsModule', {})
if 'locations' in detail_locations_module:
all_locations = detail_locations_module['locations']
if isinstance(all_locations, list) and len(all_locations) > 0:
for loc in all_locations:
locations.append({
'facility': loc.get('facility', ''),
'city': loc.get('city', ''),
'country': loc.get('country', '')
})
except Exception:
pass
# Extract eligibility criteria from detail data if available
if detail_data:
detail_protocol = detail_data.get('protocolSection', {})
eligibility_module = detail_protocol.get('eligibilityModule', {})
eligibility_text = eligibility_module.get('eligibilityCriteria', '')
if eligibility_text:
# Parse inclusion and exclusion criteria with multiple format support
inclusion_patterns = [
r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)',
r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)',
]
exclusion_patterns = [
r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
]
inclusion_match = None
exclusion_match = None
for pattern in inclusion_patterns:
inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
if inclusion_match:
break
for pattern in exclusion_patterns:
exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
if exclusion_match:
break
if inclusion_match:
inclusion_text = inclusion_match.group(1).strip()
inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()]
inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()]
inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3]
if exclusion_match:
exclusion_text = exclusion_match.group(1).strip()
exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()]
exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()]
exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3]
else:
# Try to get eligibility from search results as fallback
eligibility_module = protocol_section.get('eligibilityModule', {})
eligibility_text = eligibility_module.get('eligibilityCriteria', '')
if eligibility_text:
inclusion_patterns = [
r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)',
r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)',
]
exclusion_patterns = [
r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
]
inclusion_match = None
exclusion_match = None
for pattern in inclusion_patterns:
inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
if inclusion_match:
break
for pattern in exclusion_patterns:
exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
if exclusion_match:
break
if inclusion_match:
inclusion_text = inclusion_match.group(1).strip()
inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()]
inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()]
inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3]
if exclusion_match:
exclusion_text = exclusion_match.group(1).strip()
exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()]
exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()]
exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3]
# If detail fetch failed or returned no locations, use search results as fallback
if len(locations) == 0 and 'locations' in locations_module:
all_locations = locations_module.get('locations', [])
if isinstance(all_locations, list):
for loc in all_locations:
locations.append({
'facility': loc.get('facility', ''),
'city': loc.get('city', ''),
'country': loc.get('country', '')
})
# Sort locations: searched country first, then others
if country and locations:
search_country_lower = country.lower().strip()
def location_sort_key(loc):
loc_country = loc.get('country', '').lower().strip()
if loc_country == search_country_lower:
return 0
elif search_country_lower in loc_country or loc_country in search_country_lower:
return 1
else:
return 2
locations.sort(key=location_sort_key)
study_info = {
'nctId': nct_id,
'title': identification.get('officialTitle') or identification.get('briefTitle', ''),
'sponsor': sponsor,
'status': status_module.get('overallStatus', ''),
'conditions': conditions.get('conditions', []),
'briefSummary': description.get('briefSummary', ''),
'locations': locations,
'inclusionCriteria': inclusion_criteria,
'exclusionCriteria': exclusion_criteria,
'lastUpdateDate': status_module.get('lastUpdateSubmitDate', '')
}
studies.append(study_info)
# Apply LLM-based ranking if ranking terms provided
llm_model_name = None
if ranking_terms and ranking_terms.strip():
if not LLM_AVAILABLE:
print("Warning: LLM service not available. Ranking disabled.")
print(f" - LLM_AVAILABLE: {LLM_AVAILABLE}")
print(f" - get_llm_service: {get_llm_service}")
elif not get_llm_service:
print("Warning: get_llm_service is None. Ranking disabled.")
else:
try:
llm_service = get_llm_service()
if llm_service:
llm_model_name = llm_service.model_name
print(f"Ranking studies with {llm_model_name} using terms: {ranking_terms}")
studies = llm_service.rank_studies(studies, ranking_terms)
print(f"Ranking completed. {len(studies)} studies ranked.")
else:
print("Warning: LLM service returned None. Ranking disabled.")
print(f" - Check environment variables:")
print(f" USE_HF_API: {os.environ.get('USE_HF_API', 'NOT SET')}")
print(f" HUGGINGFACE_API_TOKEN: {'SET' if os.environ.get('HUGGINGFACE_API_TOKEN') else 'NOT SET'}")
except Exception as e:
print(f"Error during LLM ranking: {str(e)}")
import traceback
traceback.print_exc()
# Format results as HTML
if not studies:
return "No studies found. Try different search criteria.", 0, None
# Count how many studies were actually ranked (have relevance_score)
ranked_count = sum(1 for s in studies if 'relevance_score' in s)
total_before_ranking = len(studies)
html_results = f"<h2>Found {len(studies)} studies</h2>"
if ranking_terms and ranking_terms.strip() and llm_model_name:
html_results += f'<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 8px; margin: 1rem 0;"><strong>🤖 Results ranked by {llm_model_name}</strong><br>Ranking terms: "{ranking_terms}"</div>'
html_results += f'<p style="margin: 1rem 0; padding: 0.75rem; background: #f0f8ff; border-left: 3px solid #667eea; border-radius: 4px;"><strong>Ranking applied:</strong> Studies have been reordered by AI relevance to "{ranking_terms}". The model analyzed each study\'s title, summary, conditions, and inclusion criteria to determine how closely they match your ranking terms. Higher-ranked studies appear first.</p>'
elif ranking_terms and ranking_terms.strip() and not llm_model_name:
# Check what's wrong
use_hf_api = os.environ.get('USE_HF_API', 'false').lower() == 'true'
hf_token = os.environ.get('HUGGINGFACE_API_TOKEN', '')
error_msg = "LLM service is not available."
if not use_hf_api:
error_msg += " Set USE_HF_API=true in your Space secrets."
if not hf_token:
error_msg += " Set HUGGINGFACE_API_TOKEN in your Space secrets."
html_results += f'<div style="background: #fff3cd; color: #856404; padding: 1rem; border-radius: 8px; margin: 1rem 0; border: 1px solid #ffc107;"><strong>⚠️ Ranking not applied:</strong> {error_msg}<br><small>Check your Space Settings → Secrets to add the required environment variables.</small></div>'
for idx, study in enumerate(studies):
status_badge_color = {
'RECRUITING': '#28a745',
'NOT_YET_RECRUITING': '#ffc107',
'ACTIVE_NOT_RECRUITING': '#17a2b8',
'COMPLETED': '#6c757d',
'SUSPENDED': '#dc3545',
'TERMINATED': '#dc3545',
'WITHDRAWN': '#6c757d'
}.get(study['status'], '#6c757d')
# Add relevance score indicator if ranking was applied
relevance_indicator = ""
if 'relevance_score' in study:
score = study.get('relevance_score', 0)
score_percent = int(score * 100)
# Color based on score: green for high, yellow for medium, gray for low
if score >= 0.7:
score_color = '#28a745'
elif score >= 0.4:
score_color = '#ffc107'
else:
score_color = '#6c757d'
relevance_indicator = f'<span style="background: {score_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600; margin-left: 0.5rem;">Relevance: {score_percent}%</span>'
html_results += f"""
<div style="border: 1px solid #e0e0e0; border-radius: 8px; padding: 1.5rem; margin: 1rem 0; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
<div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 1rem;">
<h3 style="margin: 0; color: #333; flex: 1;">{study['title']}</h3>
<div style="display: flex; align-items: center; gap: 0.5rem;">
<span style="background: {status_badge_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
{study['status']}
</span>
{relevance_indicator}
</div>
</div>
<p><strong>NCT ID:</strong> {study['nctId']}</p>
"""
if study.get('sponsor'):
html_results += f"<p><strong>Sponsor:</strong> {study['sponsor']}</p>"
if study.get('conditions'):
html_results += f"<p><strong>Conditions:</strong> {', '.join(study['conditions'])}</p>"
if study.get('briefSummary'):
summary = study['briefSummary'][:600] + '...' if len(study['briefSummary']) > 600 else study['briefSummary']
html_results += f'<p style="color: #666; line-height: 1.6;"><strong>Summary:</strong> {summary}</p>'
if study.get('inclusionCriteria'):
html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Inclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>"
for criterion in study['inclusionCriteria']:
html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>"
html_results += "</ul></details>"
if study.get('exclusionCriteria'):
html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Exclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>"
for criterion in study['exclusionCriteria']:
html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>"
html_results += "</ul></details>"
if study.get('ranking_reasoning'):
html_results += f"<p style='margin-top: 1rem; padding: 0.75rem; background: #e8f4f8; border-left: 3px solid #667eea; border-radius: 4px; font-size: 0.9rem;'><strong>Ranking Reasoning:</strong> {study['ranking_reasoning']}</p>"
html_results += f"""
<div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #e0e0e0; display: flex; justify-content: space-between; align-items: center;">
<span style="color: #888; font-size: 0.9rem;">Last updated: {study['lastUpdateDate']}</span>
<a href="https://clinicaltrials.gov/study/{study['nctId']}" target="_blank" style="color: #667eea; text-decoration: none; font-weight: 600;">View on ClinicalTrials.gov →</a>
</div>
</div>
"""
return html_results, len(studies), llm_model_name
except Exception as e:
return f"Error: {str(e)}", 0, None
# Create Gradio interface
with gr.Blocks(title="Clinical Trial Matcher", theme=gr.themes.Soft()) as demo:
# Inject PWA meta tags for iOS/Android installation
gr.HTML("""
<script>
(function() {
// Add PWA meta tags to document head
const metaTags = [
{ name: 'apple-mobile-web-app-capable', content: 'yes' },
{ name: 'apple-mobile-web-app-status-bar-style', content: 'default' },
{ name: 'apple-mobile-web-app-title', content: 'Trial Matcher' },
{ name: 'mobile-web-app-capable', content: 'yes' },
{ name: 'theme-color', content: '#4a90e2' }
];
metaTags.forEach(tag => {
let meta = document.querySelector(`meta[name="${tag.name}"]`);
if (!meta) {
meta = document.createElement('meta');
meta.setAttribute('name', tag.name);
document.head.appendChild(meta);
}
meta.setAttribute('content', tag.content);
});
// Add manifest link
let manifestLink = document.querySelector('link[rel="manifest"]');
if (!manifestLink) {
manifestLink = document.createElement('link');
manifestLink.setAttribute('rel', 'manifest');
document.head.appendChild(manifestLink);
}
manifestLink.setAttribute('href', 'manifest.json');
})();
</script>
""", visible=False)
gr.Markdown("""
# 🔬 Clinical Trial Matcher
Search and filter clinical trials from [ClinicalTrials.gov](https://clinicaltrials.gov/) with AI-powered fine-tuning of the results based on your query.
""")
with gr.Row():
with gr.Column(scale=1):
query_input = gr.Textbox(
label="Search Keywords",
placeholder="e.g., pancreatic cancer, PDAC, KRAS, etc.",
value=""
)
country_input = gr.Dropdown(
label="Country (Optional)",
choices=[
"", "Germany", "United States", "United Kingdom", "Canada", "France",
"Italy", "Spain", "Netherlands", "Belgium", "Switzerland", "Austria",
"Sweden", "Norway", "Denmark", "Finland", "Poland", "Czech Republic",
"Australia", "New Zealand", "Japan", "China", "India", "South Korea",
"Brazil", "Mexico", "Argentina", "Chile", "South Africa", "Israel",
"Turkey", "Russia", "Greece", "Portugal", "Ireland"
],
value="",
filterable=True,
interactive=True
)
status_input = gr.Dropdown(
label="Status (Optional)",
choices=["", "RECRUITING", "NOT_YET_RECRUITING", "ACTIVE_NOT_RECRUITING",
"COMPLETED", "SUSPENDED", "TERMINATED", "WITHDRAWN"],
value="",
interactive=True
)
ranking_input = gr.Textbox(
label="✨ Use AI to sort the results based on my query:",
placeholder="e.g., KRAS mutation, immunotherapy",
value="",
visible=False
)
ranking_btn = gr.Button("Rank Results", variant="secondary", visible=False)
search_btn = gr.Button("Search Clinical Trials", variant="primary", size="lg")
with gr.Column(scale=2):
results_output = gr.HTML(label="Results")
count_output = gr.Textbox(label="Total Studies Found", visible=False)
# Search function
def perform_search(query, country, status, ranking_terms=""):
html, count, model = search_trials(query, country, status, ranking_terms)
# Show ranking input and button after search
return html, count, gr.update(visible=True), gr.update(visible=True)
# Ranking function
def perform_ranking(query, country, status, ranking_terms):
if not ranking_terms or not ranking_terms.strip():
return "Please enter ranking terms to sort the results.", 0
print(f"Ranking requested with terms: {ranking_terms}")
html, count, model = search_trials(query, country, status, ranking_terms)
return html, count
search_btn.click(
fn=perform_search,
inputs=[query_input, country_input, status_input, ranking_input],
outputs=[results_output, count_output, ranking_input, ranking_btn]
)
ranking_btn.click(
fn=perform_ranking,
inputs=[query_input, country_input, status_input, ranking_input],
outputs=[results_output, count_output]
)
gr.Markdown("""
---
**App. developed by [Mackenzie Weygandt Mathis](https://en.wikipedia.org/wiki/Mackenzie_Weygandt_Mathis). Data sourced from [ClinicalTrials.gov](https://clinicaltrials.gov)**
This app uses AI-powered ranking with Hugging Face models (default: DeepSeek-V3.2) to intelligently rank search results by relevance.
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)