""" Gradio app for Clinical Trial Matcher For Hugging Face Spaces deployment """ import gradio as gr import requests import re import os from typing import Tuple, Optional # ClinicalTrials.gov API base URL CLINICALTRIALS_API_BASE = "https://clinicaltrials.gov/api/v2/studies" # Import LLM service (optional - will gracefully degrade if not available) LLM_AVAILABLE = False get_llm_service = None # Check environment variables USE_HF_API = os.environ.get('USE_HF_API', 'false').lower() == 'true' HF_TOKEN = os.environ.get('HUGGINGFACE_API_TOKEN', '') print(f"Environment check - USE_HF_API: {USE_HF_API}, HF_TOKEN set: {bool(HF_TOKEN)}") try: from llm_service import get_llm_service LLM_AVAILABLE = True print("LLM service imported successfully.") except ImportError as e: print(f"LLM service not available. Ranking will be disabled. Error: {str(e)}") except Exception as e: print(f"Error importing LLM service: {str(e)}") import traceback traceback.print_exc() def search_trials(query: str, country: str, status: str, ranking_terms: str = "") -> Tuple[str, int, Optional[str]]: """ Search clinical trials and return formatted results Returns: tuple: (results_html, total_count, llm_model_name) """ if not query: return "Please enter search keywords.", 0, None try: # Build API request parameters params = { 'format': 'json', 'pageSize': 20 } # Build query filter query_parts = [] if query: query_parts.append(query) if country: query_parts.append(f'AREA[LocationCountry]{country}') if status: query_parts.append(f'AREA[OverallStatus]{status}') if query_parts: params['query.term'] = ' AND '.join(query_parts) # Make request to ClinicalTrials.gov API response = requests.get(CLINICALTRIALS_API_BASE, params=params, timeout=30) response.raise_for_status() data = response.json() # Extract and format relevant information studies = [] if 'studies' in data: for study in data['studies']: protocol_section = study.get('protocolSection', {}) identification = protocol_section.get('identificationModule', {}) nct_id = identification.get('nctId', '') organization = identification.get('organization', {}) sponsor = organization.get('fullName', '') if organization else '' status_module = protocol_section.get('statusModule', {}) description = protocol_section.get('descriptionModule', {}) conditions = protocol_section.get('conditionsModule', {}) locations_module = protocol_section.get('contactsLocationsModule', {}) # Fetch full study details to get ALL locations and eligibility criteria locations = [] inclusion_criteria = [] exclusion_criteria = [] detail_data = None if nct_id: try: detail_url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}" detail_response = requests.get(detail_url, params={'format': 'json'}, timeout=60) detail_response.raise_for_status() detail_data = detail_response.json() detail_protocol = detail_data.get('protocolSection', {}) detail_locations_module = detail_protocol.get('contactsLocationsModule', {}) if 'locations' in detail_locations_module: all_locations = detail_locations_module['locations'] if isinstance(all_locations, list) and len(all_locations) > 0: for loc in all_locations: locations.append({ 'facility': loc.get('facility', ''), 'city': loc.get('city', ''), 'country': loc.get('country', '') }) except Exception: pass # Extract eligibility criteria from detail data if available if detail_data: detail_protocol = detail_data.get('protocolSection', {}) eligibility_module = detail_protocol.get('eligibilityModule', {}) eligibility_text = eligibility_module.get('eligibilityCriteria', '') if eligibility_text: # Parse inclusion and exclusion criteria with multiple format support inclusion_patterns = [ r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', ] exclusion_patterns = [ r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', ] inclusion_match = None exclusion_match = None for pattern in inclusion_patterns: inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if inclusion_match: break for pattern in exclusion_patterns: exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if exclusion_match: break if inclusion_match: inclusion_text = inclusion_match.group(1).strip() inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] if exclusion_match: exclusion_text = exclusion_match.group(1).strip() exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] else: # Try to get eligibility from search results as fallback eligibility_module = protocol_section.get('eligibilityModule', {}) eligibility_text = eligibility_module.get('eligibilityCriteria', '') if eligibility_text: inclusion_patterns = [ r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', ] exclusion_patterns = [ r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', ] inclusion_match = None exclusion_match = None for pattern in inclusion_patterns: inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if inclusion_match: break for pattern in exclusion_patterns: exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if exclusion_match: break if inclusion_match: inclusion_text = inclusion_match.group(1).strip() inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] if exclusion_match: exclusion_text = exclusion_match.group(1).strip() exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] # If detail fetch failed or returned no locations, use search results as fallback if len(locations) == 0 and 'locations' in locations_module: all_locations = locations_module.get('locations', []) if isinstance(all_locations, list): for loc in all_locations: locations.append({ 'facility': loc.get('facility', ''), 'city': loc.get('city', ''), 'country': loc.get('country', '') }) # Sort locations: searched country first, then others if country and locations: search_country_lower = country.lower().strip() def location_sort_key(loc): loc_country = loc.get('country', '').lower().strip() if loc_country == search_country_lower: return 0 elif search_country_lower in loc_country or loc_country in search_country_lower: return 1 else: return 2 locations.sort(key=location_sort_key) study_info = { 'nctId': nct_id, 'title': identification.get('officialTitle') or identification.get('briefTitle', ''), 'sponsor': sponsor, 'status': status_module.get('overallStatus', ''), 'conditions': conditions.get('conditions', []), 'briefSummary': description.get('briefSummary', ''), 'locations': locations, 'inclusionCriteria': inclusion_criteria, 'exclusionCriteria': exclusion_criteria, 'lastUpdateDate': status_module.get('lastUpdateSubmitDate', '') } studies.append(study_info) # Apply LLM-based ranking if ranking terms provided llm_model_name = None if ranking_terms and ranking_terms.strip(): if not LLM_AVAILABLE: print("Warning: LLM service not available. Ranking disabled.") print(f" - LLM_AVAILABLE: {LLM_AVAILABLE}") print(f" - get_llm_service: {get_llm_service}") elif not get_llm_service: print("Warning: get_llm_service is None. Ranking disabled.") else: try: llm_service = get_llm_service() if llm_service: llm_model_name = llm_service.model_name print(f"Ranking studies with {llm_model_name} using terms: {ranking_terms}") studies = llm_service.rank_studies(studies, ranking_terms) print(f"Ranking completed. {len(studies)} studies ranked.") else: print("Warning: LLM service returned None. Ranking disabled.") print(f" - Check environment variables:") print(f" USE_HF_API: {os.environ.get('USE_HF_API', 'NOT SET')}") print(f" HUGGINGFACE_API_TOKEN: {'SET' if os.environ.get('HUGGINGFACE_API_TOKEN') else 'NOT SET'}") except Exception as e: print(f"Error during LLM ranking: {str(e)}") import traceback traceback.print_exc() # Format results as HTML if not studies: return "No studies found. Try different search criteria.", 0, None # Count how many studies were actually ranked (have relevance_score) ranked_count = sum(1 for s in studies if 'relevance_score' in s) total_before_ranking = len(studies) html_results = f"

Found {len(studies)} studies

" if ranking_terms and ranking_terms.strip() and llm_model_name: html_results += f'
🤖 Results ranked by {llm_model_name}
Ranking terms: "{ranking_terms}"
' html_results += f'

Ranking applied: Studies have been reordered by AI relevance to "{ranking_terms}". The model analyzed each study\'s title, summary, conditions, and inclusion criteria to determine how closely they match your ranking terms. Higher-ranked studies appear first.

' elif ranking_terms and ranking_terms.strip() and not llm_model_name: # Check what's wrong use_hf_api = os.environ.get('USE_HF_API', 'false').lower() == 'true' hf_token = os.environ.get('HUGGINGFACE_API_TOKEN', '') error_msg = "LLM service is not available." if not use_hf_api: error_msg += " Set USE_HF_API=true in your Space secrets." if not hf_token: error_msg += " Set HUGGINGFACE_API_TOKEN in your Space secrets." html_results += f'
⚠️ Ranking not applied: {error_msg}
Check your Space Settings → Secrets to add the required environment variables.
' for idx, study in enumerate(studies): status_badge_color = { 'RECRUITING': '#28a745', 'NOT_YET_RECRUITING': '#ffc107', 'ACTIVE_NOT_RECRUITING': '#17a2b8', 'COMPLETED': '#6c757d', 'SUSPENDED': '#dc3545', 'TERMINATED': '#dc3545', 'WITHDRAWN': '#6c757d' }.get(study['status'], '#6c757d') # Add relevance score indicator if ranking was applied relevance_indicator = "" if 'relevance_score' in study: score = study.get('relevance_score', 0) score_percent = int(score * 100) # Color based on score: green for high, yellow for medium, gray for low if score >= 0.7: score_color = '#28a745' elif score >= 0.4: score_color = '#ffc107' else: score_color = '#6c757d' relevance_indicator = f'Relevance: {score_percent}%' html_results += f"""

{study['title']}

{study['status']} {relevance_indicator}

NCT ID: {study['nctId']}

""" if study.get('sponsor'): html_results += f"

Sponsor: {study['sponsor']}

" if study.get('conditions'): html_results += f"

Conditions: {', '.join(study['conditions'])}

" if study.get('briefSummary'): summary = study['briefSummary'][:600] + '...' if len(study['briefSummary']) > 600 else study['briefSummary'] html_results += f'

Summary: {summary}

' if study.get('inclusionCriteria'): html_results += "
Inclusion Criteria ▼
" if study.get('exclusionCriteria'): html_results += "
Exclusion Criteria ▼
" if study.get('ranking_reasoning'): html_results += f"

Ranking Reasoning: {study['ranking_reasoning']}

" html_results += f"""
Last updated: {study['lastUpdateDate']} View on ClinicalTrials.gov →
""" return html_results, len(studies), llm_model_name except Exception as e: return f"Error: {str(e)}", 0, None # Create Gradio interface with gr.Blocks(title="Clinical Trial Matcher", theme=gr.themes.Soft()) as demo: # Inject PWA meta tags for iOS/Android installation gr.HTML(""" """, visible=False) gr.Markdown(""" # 🔬 Clinical Trial Matcher Search and filter clinical trials from [ClinicalTrials.gov](https://clinicaltrials.gov/) with AI-powered fine-tuning of the results based on your query. """) with gr.Row(): with gr.Column(scale=1): query_input = gr.Textbox( label="Search Keywords", placeholder="e.g., pancreatic cancer, PDAC, KRAS, etc.", value="" ) country_input = gr.Dropdown( label="Country (Optional)", choices=[ "", "Germany", "United States", "United Kingdom", "Canada", "France", "Italy", "Spain", "Netherlands", "Belgium", "Switzerland", "Austria", "Sweden", "Norway", "Denmark", "Finland", "Poland", "Czech Republic", "Australia", "New Zealand", "Japan", "China", "India", "South Korea", "Brazil", "Mexico", "Argentina", "Chile", "South Africa", "Israel", "Turkey", "Russia", "Greece", "Portugal", "Ireland" ], value="", filterable=True, interactive=True ) status_input = gr.Dropdown( label="Status (Optional)", choices=["", "RECRUITING", "NOT_YET_RECRUITING", "ACTIVE_NOT_RECRUITING", "COMPLETED", "SUSPENDED", "TERMINATED", "WITHDRAWN"], value="", interactive=True ) ranking_input = gr.Textbox( label="✨ Use AI to sort the results based on my query:", placeholder="e.g., KRAS mutation, immunotherapy", value="", visible=False ) ranking_btn = gr.Button("Rank Results", variant="secondary", visible=False) search_btn = gr.Button("Search Clinical Trials", variant="primary", size="lg") with gr.Column(scale=2): results_output = gr.HTML(label="Results") count_output = gr.Textbox(label="Total Studies Found", visible=False) # Search function def perform_search(query, country, status, ranking_terms=""): html, count, model = search_trials(query, country, status, ranking_terms) # Show ranking input and button after search return html, count, gr.update(visible=True), gr.update(visible=True) # Ranking function def perform_ranking(query, country, status, ranking_terms): if not ranking_terms or not ranking_terms.strip(): return "Please enter ranking terms to sort the results.", 0 print(f"Ranking requested with terms: {ranking_terms}") html, count, model = search_trials(query, country, status, ranking_terms) return html, count search_btn.click( fn=perform_search, inputs=[query_input, country_input, status_input, ranking_input], outputs=[results_output, count_output, ranking_input, ranking_btn] ) ranking_btn.click( fn=perform_ranking, inputs=[query_input, country_input, status_input, ranking_input], outputs=[results_output, count_output] ) gr.Markdown(""" --- **App. developed by [Mackenzie Weygandt Mathis](https://en.wikipedia.org/wiki/Mackenzie_Weygandt_Mathis). Data sourced from [ClinicalTrials.gov](https://clinicaltrials.gov)** This app uses AI-powered ranking with Hugging Face models (default: DeepSeek-V3.2) to intelligently rank search results by relevance. """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)