| | """ |
| | Gradio app for Clinical Trial Matcher |
| | For Hugging Face Spaces deployment |
| | """ |
| | import gradio as gr |
| | import requests |
| | import re |
| | import os |
| | from typing import Tuple, Optional |
| |
|
| | |
| | CLINICALTRIALS_API_BASE = "https://clinicaltrials.gov/api/v2/studies" |
| |
|
| | |
| | LLM_AVAILABLE = False |
| | get_llm_service = None |
| |
|
| | |
| | USE_HF_API = os.environ.get('USE_HF_API', 'false').lower() == 'true' |
| | HF_TOKEN = os.environ.get('HUGGINGFACE_API_TOKEN', '') |
| | print(f"Environment check - USE_HF_API: {USE_HF_API}, HF_TOKEN set: {bool(HF_TOKEN)}") |
| |
|
| | try: |
| | from llm_service import get_llm_service |
| | LLM_AVAILABLE = True |
| | print("LLM service imported successfully.") |
| | except ImportError as e: |
| | print(f"LLM service not available. Ranking will be disabled. Error: {str(e)}") |
| | except Exception as e: |
| | print(f"Error importing LLM service: {str(e)}") |
| | import traceback |
| | traceback.print_exc() |
| |
|
| |
|
| | def search_trials(query: str, country: str, status: str, ranking_terms: str = "") -> Tuple[str, int, Optional[str]]: |
| | """ |
| | Search clinical trials and return formatted results |
| | |
| | Returns: |
| | tuple: (results_html, total_count, llm_model_name) |
| | """ |
| | if not query: |
| | return "Please enter search keywords.", 0, None |
| | |
| | try: |
| | |
| | params = { |
| | 'format': 'json', |
| | 'pageSize': 20 |
| | } |
| | |
| | |
| | query_parts = [] |
| | if query: |
| | query_parts.append(query) |
| | |
| | if country: |
| | query_parts.append(f'AREA[LocationCountry]{country}') |
| | |
| | if status: |
| | query_parts.append(f'AREA[OverallStatus]{status}') |
| | |
| | if query_parts: |
| | params['query.term'] = ' AND '.join(query_parts) |
| | |
| | |
| | response = requests.get(CLINICALTRIALS_API_BASE, params=params, timeout=30) |
| | response.raise_for_status() |
| | |
| | data = response.json() |
| | |
| | |
| | studies = [] |
| | if 'studies' in data: |
| | for study in data['studies']: |
| | protocol_section = study.get('protocolSection', {}) |
| | identification = protocol_section.get('identificationModule', {}) |
| | nct_id = identification.get('nctId', '') |
| | organization = identification.get('organization', {}) |
| | sponsor = organization.get('fullName', '') if organization else '' |
| | status_module = protocol_section.get('statusModule', {}) |
| | description = protocol_section.get('descriptionModule', {}) |
| | conditions = protocol_section.get('conditionsModule', {}) |
| | locations_module = protocol_section.get('contactsLocationsModule', {}) |
| | |
| | |
| | locations = [] |
| | inclusion_criteria = [] |
| | exclusion_criteria = [] |
| | detail_data = None |
| | if nct_id: |
| | try: |
| | detail_url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}" |
| | detail_response = requests.get(detail_url, params={'format': 'json'}, timeout=60) |
| | detail_response.raise_for_status() |
| | detail_data = detail_response.json() |
| | detail_protocol = detail_data.get('protocolSection', {}) |
| | detail_locations_module = detail_protocol.get('contactsLocationsModule', {}) |
| | if 'locations' in detail_locations_module: |
| | all_locations = detail_locations_module['locations'] |
| | if isinstance(all_locations, list) and len(all_locations) > 0: |
| | for loc in all_locations: |
| | locations.append({ |
| | 'facility': loc.get('facility', ''), |
| | 'city': loc.get('city', ''), |
| | 'country': loc.get('country', '') |
| | }) |
| | except Exception: |
| | pass |
| | |
| | |
| | if detail_data: |
| | detail_protocol = detail_data.get('protocolSection', {}) |
| | eligibility_module = detail_protocol.get('eligibilityModule', {}) |
| | eligibility_text = eligibility_module.get('eligibilityCriteria', '') |
| | |
| | if eligibility_text: |
| | |
| | inclusion_patterns = [ |
| | r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', |
| | r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', |
| | ] |
| | exclusion_patterns = [ |
| | r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', |
| | r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', |
| | ] |
| | |
| | inclusion_match = None |
| | exclusion_match = None |
| | for pattern in inclusion_patterns: |
| | inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) |
| | if inclusion_match: |
| | break |
| | |
| | for pattern in exclusion_patterns: |
| | exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) |
| | if exclusion_match: |
| | break |
| | |
| | if inclusion_match: |
| | inclusion_text = inclusion_match.group(1).strip() |
| | inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] |
| | inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] |
| | inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] |
| | |
| | if exclusion_match: |
| | exclusion_text = exclusion_match.group(1).strip() |
| | exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] |
| | exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] |
| | exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] |
| | else: |
| | |
| | eligibility_module = protocol_section.get('eligibilityModule', {}) |
| | eligibility_text = eligibility_module.get('eligibilityCriteria', '') |
| | if eligibility_text: |
| | inclusion_patterns = [ |
| | r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', |
| | r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', |
| | ] |
| | exclusion_patterns = [ |
| | r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', |
| | r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', |
| | ] |
| | |
| | inclusion_match = None |
| | exclusion_match = None |
| | for pattern in inclusion_patterns: |
| | inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) |
| | if inclusion_match: |
| | break |
| | |
| | for pattern in exclusion_patterns: |
| | exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) |
| | if exclusion_match: |
| | break |
| | |
| | if inclusion_match: |
| | inclusion_text = inclusion_match.group(1).strip() |
| | inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] |
| | inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] |
| | inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] |
| | |
| | if exclusion_match: |
| | exclusion_text = exclusion_match.group(1).strip() |
| | exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] |
| | exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] |
| | exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] |
| | |
| | |
| | if len(locations) == 0 and 'locations' in locations_module: |
| | all_locations = locations_module.get('locations', []) |
| | if isinstance(all_locations, list): |
| | for loc in all_locations: |
| | locations.append({ |
| | 'facility': loc.get('facility', ''), |
| | 'city': loc.get('city', ''), |
| | 'country': loc.get('country', '') |
| | }) |
| | |
| | |
| | if country and locations: |
| | search_country_lower = country.lower().strip() |
| | def location_sort_key(loc): |
| | loc_country = loc.get('country', '').lower().strip() |
| | if loc_country == search_country_lower: |
| | return 0 |
| | elif search_country_lower in loc_country or loc_country in search_country_lower: |
| | return 1 |
| | else: |
| | return 2 |
| | locations.sort(key=location_sort_key) |
| | |
| | study_info = { |
| | 'nctId': nct_id, |
| | 'title': identification.get('officialTitle') or identification.get('briefTitle', ''), |
| | 'sponsor': sponsor, |
| | 'status': status_module.get('overallStatus', ''), |
| | 'conditions': conditions.get('conditions', []), |
| | 'briefSummary': description.get('briefSummary', ''), |
| | 'locations': locations, |
| | 'inclusionCriteria': inclusion_criteria, |
| | 'exclusionCriteria': exclusion_criteria, |
| | 'lastUpdateDate': status_module.get('lastUpdateSubmitDate', '') |
| | } |
| | studies.append(study_info) |
| | |
| | |
| | llm_model_name = None |
| | if ranking_terms and ranking_terms.strip(): |
| | if not LLM_AVAILABLE: |
| | print("Warning: LLM service not available. Ranking disabled.") |
| | print(f" - LLM_AVAILABLE: {LLM_AVAILABLE}") |
| | print(f" - get_llm_service: {get_llm_service}") |
| | elif not get_llm_service: |
| | print("Warning: get_llm_service is None. Ranking disabled.") |
| | else: |
| | try: |
| | llm_service = get_llm_service() |
| | if llm_service: |
| | llm_model_name = llm_service.model_name |
| | print(f"Ranking studies with {llm_model_name} using terms: {ranking_terms}") |
| | studies = llm_service.rank_studies(studies, ranking_terms) |
| | print(f"Ranking completed. {len(studies)} studies ranked.") |
| | else: |
| | print("Warning: LLM service returned None. Ranking disabled.") |
| | print(f" - Check environment variables:") |
| | print(f" USE_HF_API: {os.environ.get('USE_HF_API', 'NOT SET')}") |
| | print(f" HUGGINGFACE_API_TOKEN: {'SET' if os.environ.get('HUGGINGFACE_API_TOKEN') else 'NOT SET'}") |
| | except Exception as e: |
| | print(f"Error during LLM ranking: {str(e)}") |
| | import traceback |
| | traceback.print_exc() |
| | |
| | |
| | if not studies: |
| | return "No studies found. Try different search criteria.", 0, None |
| | |
| | |
| | ranked_count = sum(1 for s in studies if 'relevance_score' in s) |
| | total_before_ranking = len(studies) |
| | |
| | html_results = f"<h2>Found {len(studies)} studies</h2>" |
| | if ranking_terms and ranking_terms.strip() and llm_model_name: |
| | html_results += f'<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 8px; margin: 1rem 0;"><strong>🤖 Results ranked by {llm_model_name}</strong><br>Ranking terms: "{ranking_terms}"</div>' |
| | html_results += f'<p style="margin: 1rem 0; padding: 0.75rem; background: #f0f8ff; border-left: 3px solid #667eea; border-radius: 4px;"><strong>Ranking applied:</strong> Studies have been reordered by AI relevance to "{ranking_terms}". The model analyzed each study\'s title, summary, conditions, and inclusion criteria to determine how closely they match your ranking terms. Higher-ranked studies appear first.</p>' |
| | elif ranking_terms and ranking_terms.strip() and not llm_model_name: |
| | |
| | use_hf_api = os.environ.get('USE_HF_API', 'false').lower() == 'true' |
| | hf_token = os.environ.get('HUGGINGFACE_API_TOKEN', '') |
| | error_msg = "LLM service is not available." |
| | if not use_hf_api: |
| | error_msg += " Set USE_HF_API=true in your Space secrets." |
| | if not hf_token: |
| | error_msg += " Set HUGGINGFACE_API_TOKEN in your Space secrets." |
| | html_results += f'<div style="background: #fff3cd; color: #856404; padding: 1rem; border-radius: 8px; margin: 1rem 0; border: 1px solid #ffc107;"><strong>⚠️ Ranking not applied:</strong> {error_msg}<br><small>Check your Space Settings → Secrets to add the required environment variables.</small></div>' |
| | |
| | for idx, study in enumerate(studies): |
| | status_badge_color = { |
| | 'RECRUITING': '#28a745', |
| | 'NOT_YET_RECRUITING': '#ffc107', |
| | 'ACTIVE_NOT_RECRUITING': '#17a2b8', |
| | 'COMPLETED': '#6c757d', |
| | 'SUSPENDED': '#dc3545', |
| | 'TERMINATED': '#dc3545', |
| | 'WITHDRAWN': '#6c757d' |
| | }.get(study['status'], '#6c757d') |
| | |
| | |
| | relevance_indicator = "" |
| | if 'relevance_score' in study: |
| | score = study.get('relevance_score', 0) |
| | score_percent = int(score * 100) |
| | |
| | if score >= 0.7: |
| | score_color = '#28a745' |
| | elif score >= 0.4: |
| | score_color = '#ffc107' |
| | else: |
| | score_color = '#6c757d' |
| | relevance_indicator = f'<span style="background: {score_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600; margin-left: 0.5rem;">Relevance: {score_percent}%</span>' |
| | |
| | html_results += f""" |
| | <div style="border: 1px solid #e0e0e0; border-radius: 8px; padding: 1.5rem; margin: 1rem 0; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);"> |
| | <div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 1rem;"> |
| | <h3 style="margin: 0; color: #333; flex: 1;">{study['title']}</h3> |
| | <div style="display: flex; align-items: center; gap: 0.5rem;"> |
| | <span style="background: {status_badge_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;"> |
| | {study['status']} |
| | </span> |
| | {relevance_indicator} |
| | </div> |
| | </div> |
| | |
| | <p><strong>NCT ID:</strong> {study['nctId']}</p> |
| | """ |
| | |
| | if study.get('sponsor'): |
| | html_results += f"<p><strong>Sponsor:</strong> {study['sponsor']}</p>" |
| | |
| | if study.get('conditions'): |
| | html_results += f"<p><strong>Conditions:</strong> {', '.join(study['conditions'])}</p>" |
| | |
| | if study.get('briefSummary'): |
| | summary = study['briefSummary'][:600] + '...' if len(study['briefSummary']) > 600 else study['briefSummary'] |
| | html_results += f'<p style="color: #666; line-height: 1.6;"><strong>Summary:</strong> {summary}</p>' |
| | |
| | if study.get('inclusionCriteria'): |
| | html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Inclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>" |
| | for criterion in study['inclusionCriteria']: |
| | html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>" |
| | html_results += "</ul></details>" |
| | |
| | if study.get('exclusionCriteria'): |
| | html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Exclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>" |
| | for criterion in study['exclusionCriteria']: |
| | html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>" |
| | html_results += "</ul></details>" |
| | |
| | if study.get('ranking_reasoning'): |
| | html_results += f"<p style='margin-top: 1rem; padding: 0.75rem; background: #e8f4f8; border-left: 3px solid #667eea; border-radius: 4px; font-size: 0.9rem;'><strong>Ranking Reasoning:</strong> {study['ranking_reasoning']}</p>" |
| | |
| | html_results += f""" |
| | <div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #e0e0e0; display: flex; justify-content: space-between; align-items: center;"> |
| | <span style="color: #888; font-size: 0.9rem;">Last updated: {study['lastUpdateDate']}</span> |
| | <a href="https://clinicaltrials.gov/study/{study['nctId']}" target="_blank" style="color: #667eea; text-decoration: none; font-weight: 600;">View on ClinicalTrials.gov →</a> |
| | </div> |
| | </div> |
| | """ |
| | |
| | return html_results, len(studies), llm_model_name |
| | |
| | except Exception as e: |
| | return f"Error: {str(e)}", 0, None |
| |
|
| |
|
| | |
| | with gr.Blocks(title="Clinical Trial Matcher", theme=gr.themes.Soft()) as demo: |
| | |
| | gr.HTML(""" |
| | <script> |
| | (function() { |
| | // Add PWA meta tags to document head |
| | const metaTags = [ |
| | { name: 'apple-mobile-web-app-capable', content: 'yes' }, |
| | { name: 'apple-mobile-web-app-status-bar-style', content: 'default' }, |
| | { name: 'apple-mobile-web-app-title', content: 'Trial Matcher' }, |
| | { name: 'mobile-web-app-capable', content: 'yes' }, |
| | { name: 'theme-color', content: '#4a90e2' } |
| | ]; |
| | |
| | metaTags.forEach(tag => { |
| | let meta = document.querySelector(`meta[name="${tag.name}"]`); |
| | if (!meta) { |
| | meta = document.createElement('meta'); |
| | meta.setAttribute('name', tag.name); |
| | document.head.appendChild(meta); |
| | } |
| | meta.setAttribute('content', tag.content); |
| | }); |
| | |
| | // Add manifest link |
| | let manifestLink = document.querySelector('link[rel="manifest"]'); |
| | if (!manifestLink) { |
| | manifestLink = document.createElement('link'); |
| | manifestLink.setAttribute('rel', 'manifest'); |
| | document.head.appendChild(manifestLink); |
| | } |
| | manifestLink.setAttribute('href', 'manifest.json'); |
| | })(); |
| | </script> |
| | """, visible=False) |
| | gr.Markdown(""" |
| | # 🔬 Clinical Trial Matcher |
| | |
| | Search and filter clinical trials from [ClinicalTrials.gov](https://clinicaltrials.gov/) with AI-powered fine-tuning of the results based on your query. |
| | |
| | """) |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | query_input = gr.Textbox( |
| | label="Search Keywords", |
| | placeholder="e.g., pancreatic cancer, PDAC, KRAS, etc.", |
| | value="" |
| | ) |
| | country_input = gr.Dropdown( |
| | label="Country (Optional)", |
| | choices=[ |
| | "", "Germany", "United States", "United Kingdom", "Canada", "France", |
| | "Italy", "Spain", "Netherlands", "Belgium", "Switzerland", "Austria", |
| | "Sweden", "Norway", "Denmark", "Finland", "Poland", "Czech Republic", |
| | "Australia", "New Zealand", "Japan", "China", "India", "South Korea", |
| | "Brazil", "Mexico", "Argentina", "Chile", "South Africa", "Israel", |
| | "Turkey", "Russia", "Greece", "Portugal", "Ireland" |
| | ], |
| | value="", |
| | filterable=True, |
| | interactive=True |
| | ) |
| | status_input = gr.Dropdown( |
| | label="Status (Optional)", |
| | choices=["", "RECRUITING", "NOT_YET_RECRUITING", "ACTIVE_NOT_RECRUITING", |
| | "COMPLETED", "SUSPENDED", "TERMINATED", "WITHDRAWN"], |
| | value="", |
| | interactive=True |
| | ) |
| | ranking_input = gr.Textbox( |
| | label="✨ Use AI to sort the results based on my query:", |
| | placeholder="e.g., KRAS mutation, immunotherapy", |
| | value="", |
| | visible=False |
| | ) |
| | ranking_btn = gr.Button("Rank Results", variant="secondary", visible=False) |
| | search_btn = gr.Button("Search Clinical Trials", variant="primary", size="lg") |
| | |
| | with gr.Column(scale=2): |
| | results_output = gr.HTML(label="Results") |
| | count_output = gr.Textbox(label="Total Studies Found", visible=False) |
| | |
| | |
| | def perform_search(query, country, status, ranking_terms=""): |
| | html, count, model = search_trials(query, country, status, ranking_terms) |
| | |
| | return html, count, gr.update(visible=True), gr.update(visible=True) |
| | |
| | |
| | def perform_ranking(query, country, status, ranking_terms): |
| | if not ranking_terms or not ranking_terms.strip(): |
| | return "Please enter ranking terms to sort the results.", 0 |
| | print(f"Ranking requested with terms: {ranking_terms}") |
| | html, count, model = search_trials(query, country, status, ranking_terms) |
| | return html, count |
| | |
| | search_btn.click( |
| | fn=perform_search, |
| | inputs=[query_input, country_input, status_input, ranking_input], |
| | outputs=[results_output, count_output, ranking_input, ranking_btn] |
| | ) |
| | |
| | ranking_btn.click( |
| | fn=perform_ranking, |
| | inputs=[query_input, country_input, status_input, ranking_input], |
| | outputs=[results_output, count_output] |
| | ) |
| | |
| | gr.Markdown(""" |
| | --- |
| | **App. developed by [Mackenzie Weygandt Mathis](https://en.wikipedia.org/wiki/Mackenzie_Weygandt_Mathis). Data sourced from [ClinicalTrials.gov](https://clinicaltrials.gov)** |
| | |
| | This app uses AI-powered ranking with Hugging Face models (default: DeepSeek-V3.2) to intelligently rank search results by relevance. |
| | """) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch(server_name="0.0.0.0", server_port=7860) |
| |
|
| |
|