""" Gradio app for Clinical Trial Matcher For Hugging Face Spaces deployment """ import gradio as gr import requests import re import os from typing import Tuple, Optional # ClinicalTrials.gov API base URL CLINICALTRIALS_API_BASE = "https://clinicaltrials.gov/api/v2/studies" # Import LLM service (optional - will gracefully degrade if not available) LLM_AVAILABLE = False get_llm_service = None # Check environment variables USE_HF_API = os.environ.get('USE_HF_API', 'false').lower() == 'true' HF_TOKEN = os.environ.get('HUGGINGFACE_API_TOKEN', '') print(f"Environment check - USE_HF_API: {USE_HF_API}, HF_TOKEN set: {bool(HF_TOKEN)}") try: from llm_service import get_llm_service LLM_AVAILABLE = True print("LLM service imported successfully.") except ImportError as e: print(f"LLM service not available. Ranking will be disabled. Error: {str(e)}") except Exception as e: print(f"Error importing LLM service: {str(e)}") import traceback traceback.print_exc() def search_trials(query: str, country: str, status: str, ranking_terms: str = "") -> Tuple[str, int, Optional[str]]: """ Search clinical trials and return formatted results Returns: tuple: (results_html, total_count, llm_model_name) """ if not query: return "Please enter search keywords.", 0, None try: # Build API request parameters params = { 'format': 'json', 'pageSize': 20 } # Build query filter query_parts = [] if query: query_parts.append(query) if country: query_parts.append(f'AREA[LocationCountry]{country}') if status: query_parts.append(f'AREA[OverallStatus]{status}') if query_parts: params['query.term'] = ' AND '.join(query_parts) # Make request to ClinicalTrials.gov API response = requests.get(CLINICALTRIALS_API_BASE, params=params, timeout=30) response.raise_for_status() data = response.json() # Extract and format relevant information studies = [] if 'studies' in data: for study in data['studies']: protocol_section = study.get('protocolSection', {}) identification = protocol_section.get('identificationModule', {}) nct_id = identification.get('nctId', '') organization = identification.get('organization', {}) sponsor = organization.get('fullName', '') if organization else '' status_module = protocol_section.get('statusModule', {}) description = protocol_section.get('descriptionModule', {}) conditions = protocol_section.get('conditionsModule', {}) locations_module = protocol_section.get('contactsLocationsModule', {}) # Fetch full study details to get ALL locations and eligibility criteria locations = [] inclusion_criteria = [] exclusion_criteria = [] detail_data = None if nct_id: try: detail_url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}" detail_response = requests.get(detail_url, params={'format': 'json'}, timeout=60) detail_response.raise_for_status() detail_data = detail_response.json() detail_protocol = detail_data.get('protocolSection', {}) detail_locations_module = detail_protocol.get('contactsLocationsModule', {}) if 'locations' in detail_locations_module: all_locations = detail_locations_module['locations'] if isinstance(all_locations, list) and len(all_locations) > 0: for loc in all_locations: locations.append({ 'facility': loc.get('facility', ''), 'city': loc.get('city', ''), 'country': loc.get('country', '') }) except Exception: pass # Extract eligibility criteria from detail data if available if detail_data: detail_protocol = detail_data.get('protocolSection', {}) eligibility_module = detail_protocol.get('eligibilityModule', {}) eligibility_text = eligibility_module.get('eligibilityCriteria', '') if eligibility_text: # Parse inclusion and exclusion criteria with multiple format support inclusion_patterns = [ r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', ] exclusion_patterns = [ r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', ] inclusion_match = None exclusion_match = None for pattern in inclusion_patterns: inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if inclusion_match: break for pattern in exclusion_patterns: exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if exclusion_match: break if inclusion_match: inclusion_text = inclusion_match.group(1).strip() inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] if exclusion_match: exclusion_text = exclusion_match.group(1).strip() exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] else: # Try to get eligibility from search results as fallback eligibility_module = protocol_section.get('eligibilityModule', {}) eligibility_text = eligibility_module.get('eligibilityCriteria', '') if eligibility_text: inclusion_patterns = [ r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)', r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)', ] exclusion_patterns = [ r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$', ] inclusion_match = None exclusion_match = None for pattern in inclusion_patterns: inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if inclusion_match: break for pattern in exclusion_patterns: exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL) if exclusion_match: break if inclusion_match: inclusion_text = inclusion_match.group(1).strip() inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()] inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()] inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3] if exclusion_match: exclusion_text = exclusion_match.group(1).strip() exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()] exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()] exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3] # If detail fetch failed or returned no locations, use search results as fallback if len(locations) == 0 and 'locations' in locations_module: all_locations = locations_module.get('locations', []) if isinstance(all_locations, list): for loc in all_locations: locations.append({ 'facility': loc.get('facility', ''), 'city': loc.get('city', ''), 'country': loc.get('country', '') }) # Sort locations: searched country first, then others if country and locations: search_country_lower = country.lower().strip() def location_sort_key(loc): loc_country = loc.get('country', '').lower().strip() if loc_country == search_country_lower: return 0 elif search_country_lower in loc_country or loc_country in search_country_lower: return 1 else: return 2 locations.sort(key=location_sort_key) study_info = { 'nctId': nct_id, 'title': identification.get('officialTitle') or identification.get('briefTitle', ''), 'sponsor': sponsor, 'status': status_module.get('overallStatus', ''), 'conditions': conditions.get('conditions', []), 'briefSummary': description.get('briefSummary', ''), 'locations': locations, 'inclusionCriteria': inclusion_criteria, 'exclusionCriteria': exclusion_criteria, 'lastUpdateDate': status_module.get('lastUpdateSubmitDate', '') } studies.append(study_info) # Apply LLM-based ranking if ranking terms provided llm_model_name = None if ranking_terms and ranking_terms.strip(): if not LLM_AVAILABLE: print("Warning: LLM service not available. Ranking disabled.") print(f" - LLM_AVAILABLE: {LLM_AVAILABLE}") print(f" - get_llm_service: {get_llm_service}") elif not get_llm_service: print("Warning: get_llm_service is None. Ranking disabled.") else: try: llm_service = get_llm_service() if llm_service: llm_model_name = llm_service.model_name print(f"Ranking studies with {llm_model_name} using terms: {ranking_terms}") studies = llm_service.rank_studies(studies, ranking_terms) print(f"Ranking completed. {len(studies)} studies ranked.") else: print("Warning: LLM service returned None. Ranking disabled.") print(f" - Check environment variables:") print(f" USE_HF_API: {os.environ.get('USE_HF_API', 'NOT SET')}") print(f" HUGGINGFACE_API_TOKEN: {'SET' if os.environ.get('HUGGINGFACE_API_TOKEN') else 'NOT SET'}") except Exception as e: print(f"Error during LLM ranking: {str(e)}") import traceback traceback.print_exc() # Format results as HTML if not studies: return "No studies found. Try different search criteria.", 0, None # Count how many studies were actually ranked (have relevance_score) ranked_count = sum(1 for s in studies if 'relevance_score' in s) total_before_ranking = len(studies) html_results = f"
Ranking applied: Studies have been reordered by AI relevance to "{ranking_terms}". The model analyzed each study\'s title, summary, conditions, and inclusion criteria to determine how closely they match your ranking terms. Higher-ranked studies appear first.
' elif ranking_terms and ranking_terms.strip() and not llm_model_name: # Check what's wrong use_hf_api = os.environ.get('USE_HF_API', 'false').lower() == 'true' hf_token = os.environ.get('HUGGINGFACE_API_TOKEN', '') error_msg = "LLM service is not available." if not use_hf_api: error_msg += " Set USE_HF_API=true in your Space secrets." if not hf_token: error_msg += " Set HUGGINGFACE_API_TOKEN in your Space secrets." html_results += f'NCT ID: {study['nctId']}
""" if study.get('sponsor'): html_results += f"Sponsor: {study['sponsor']}
" if study.get('conditions'): html_results += f"Conditions: {', '.join(study['conditions'])}
" if study.get('briefSummary'): summary = study['briefSummary'][:600] + '...' if len(study['briefSummary']) > 600 else study['briefSummary'] html_results += f'Summary: {summary}
' if study.get('inclusionCriteria'): html_results += "Ranking Reasoning: {study['ranking_reasoning']}
" html_results += f"""