"""
Gradio app for Clinical Trial Matcher
For Hugging Face Spaces deployment
"""
import gradio as gr
import requests
import re
import os
from typing import Tuple, Optional

# ClinicalTrials.gov API base URL
CLINICALTRIALS_API_BASE = "https://clinicaltrials.gov/api/v2/studies"

# Import LLM service (optional - will gracefully degrade if not available)
LLM_AVAILABLE = False
get_llm_service = None

# Check environment variables
USE_HF_API = os.environ.get('USE_HF_API', 'false').lower() == 'true'
HF_TOKEN = os.environ.get('HUGGINGFACE_API_TOKEN', '')
print(f"Environment check - USE_HF_API: {USE_HF_API}, HF_TOKEN set: {bool(HF_TOKEN)}")

try:
    from llm_service import get_llm_service
    LLM_AVAILABLE = True
    print("LLM service imported successfully.")
except ImportError as e:
    print(f"LLM service not available. Ranking will be disabled. Error: {str(e)}")
except Exception as e:
    print(f"Error importing LLM service: {str(e)}")
    import traceback
    traceback.print_exc()


def search_trials(query: str, country: str, status: str, ranking_terms: str = "") -> Tuple[str, int, Optional[str]]:
    """
    Search clinical trials and return formatted results
    
    Returns:
        tuple: (results_html, total_count, llm_model_name)
    """
    if not query:
        return "Please enter search keywords.", 0, None
    
    try:
        # Build API request parameters
        params = {
            'format': 'json',
            'pageSize': 20
        }
        
        # Build query filter
        query_parts = []
        if query:
            query_parts.append(query)
        
        if country:
            query_parts.append(f'AREA[LocationCountry]{country}')
            
        if status:
            query_parts.append(f'AREA[OverallStatus]{status}')
        
        if query_parts:
            params['query.term'] = ' AND '.join(query_parts)
        
        # Make request to ClinicalTrials.gov API
        response = requests.get(CLINICALTRIALS_API_BASE, params=params, timeout=30)
        response.raise_for_status()
        
        data = response.json()
        
        # Extract and format relevant information
        studies = []
        if 'studies' in data:
            for study in data['studies']:
                protocol_section = study.get('protocolSection', {})
                identification = protocol_section.get('identificationModule', {})
                nct_id = identification.get('nctId', '')
                organization = identification.get('organization', {})
                sponsor = organization.get('fullName', '') if organization else ''
                status_module = protocol_section.get('statusModule', {})
                description = protocol_section.get('descriptionModule', {})
                conditions = protocol_section.get('conditionsModule', {})
                locations_module = protocol_section.get('contactsLocationsModule', {})
                
                # Fetch full study details to get ALL locations and eligibility criteria
                locations = []
                inclusion_criteria = []
                exclusion_criteria = []
                detail_data = None
                if nct_id:
                    try:
                        detail_url = f"https://clinicaltrials.gov/api/v2/studies/{nct_id}"
                        detail_response = requests.get(detail_url, params={'format': 'json'}, timeout=60)
                        detail_response.raise_for_status()
                        detail_data = detail_response.json()
                        detail_protocol = detail_data.get('protocolSection', {})
                        detail_locations_module = detail_protocol.get('contactsLocationsModule', {})
                        if 'locations' in detail_locations_module:
                            all_locations = detail_locations_module['locations']
                            if isinstance(all_locations, list) and len(all_locations) > 0:
                                for loc in all_locations:
                                    locations.append({
                                        'facility': loc.get('facility', ''),
                                        'city': loc.get('city', ''),
                                        'country': loc.get('country', '')
                                    })
                    except Exception:
                        pass
                
                # Extract eligibility criteria from detail data if available
                if detail_data:
                    detail_protocol = detail_data.get('protocolSection', {})
                    eligibility_module = detail_protocol.get('eligibilityModule', {})
                    eligibility_text = eligibility_module.get('eligibilityCriteria', '')
                    
                    if eligibility_text:
                        # Parse inclusion and exclusion criteria with multiple format support
                        inclusion_patterns = [
                            r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)',
                            r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)',
                        ]
                        exclusion_patterns = [
                            r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
                            r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
                        ]
                        
                        inclusion_match = None
                        exclusion_match = None
                        for pattern in inclusion_patterns:
                            inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
                            if inclusion_match:
                                break
                        
                        for pattern in exclusion_patterns:
                            exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
                            if exclusion_match:
                                break
                        
                        if inclusion_match:
                            inclusion_text = inclusion_match.group(1).strip()
                            inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()]
                            inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()]
                            inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3]
                        
                        if exclusion_match:
                            exclusion_text = exclusion_match.group(1).strip()
                            exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()]
                            exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()]
                            exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3]
                else:
                    # Try to get eligibility from search results as fallback
                    eligibility_module = protocol_section.get('eligibilityModule', {})
                    eligibility_text = eligibility_module.get('eligibilityCriteria', '')
                    if eligibility_text:
                        inclusion_patterns = [
                            r'(?:Key\s+)?Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?|$)',
                            r'Inclusion\s+Criteria[^:\n]*:?\s*(.*?)(?=Exclusion\s+Criteria[^:\n]*:?|$)',
                        ]
                        exclusion_patterns = [
                            r'(?:Key\s+)?Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
                            r'Exclusion\s+Criteria[^:\n]*:?\s*(.*?)$',
                        ]
                        
                        inclusion_match = None
                        exclusion_match = None
                        for pattern in inclusion_patterns:
                            inclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
                            if inclusion_match:
                                break
                        
                        for pattern in exclusion_patterns:
                            exclusion_match = re.search(pattern, eligibility_text, re.IGNORECASE | re.DOTALL)
                            if exclusion_match:
                                break
                        
                        if inclusion_match:
                            inclusion_text = inclusion_match.group(1).strip()
                            inclusion_lines = [line.strip() for line in inclusion_text.split('\n') if line.strip()]
                            inclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in inclusion_lines if line.strip()]
                            inclusion_criteria = [item for item in inclusion_criteria if item and len(item) > 3]
                        
                        if exclusion_match:
                            exclusion_text = exclusion_match.group(1).strip()
                            exclusion_lines = [line.strip() for line in exclusion_text.split('\n') if line.strip()]
                            exclusion_criteria = [re.sub(r'^[\*\-\•]\s*', '', line).strip() for line in exclusion_lines if line.strip()]
                            exclusion_criteria = [item for item in exclusion_criteria if item and len(item) > 3]
                
                # If detail fetch failed or returned no locations, use search results as fallback
                if len(locations) == 0 and 'locations' in locations_module:
                    all_locations = locations_module.get('locations', [])
                    if isinstance(all_locations, list):
                        for loc in all_locations:
                            locations.append({
                                'facility': loc.get('facility', ''),
                                'city': loc.get('city', ''),
                                'country': loc.get('country', '')
                            })
                
                # Sort locations: searched country first, then others
                if country and locations:
                    search_country_lower = country.lower().strip()
                    def location_sort_key(loc):
                        loc_country = loc.get('country', '').lower().strip()
                        if loc_country == search_country_lower:
                            return 0
                        elif search_country_lower in loc_country or loc_country in search_country_lower:
                            return 1
                        else:
                            return 2
                    locations.sort(key=location_sort_key)
                
                study_info = {
                    'nctId': nct_id,
                    'title': identification.get('officialTitle') or identification.get('briefTitle', ''),
                    'sponsor': sponsor,
                    'status': status_module.get('overallStatus', ''),
                    'conditions': conditions.get('conditions', []),
                    'briefSummary': description.get('briefSummary', ''),
                    'locations': locations,
                    'inclusionCriteria': inclusion_criteria,
                    'exclusionCriteria': exclusion_criteria,
                    'lastUpdateDate': status_module.get('lastUpdateSubmitDate', '')
                }
                studies.append(study_info)
        
        # Apply LLM-based ranking if ranking terms provided
        llm_model_name = None
        if ranking_terms and ranking_terms.strip():
            if not LLM_AVAILABLE:
                print("Warning: LLM service not available. Ranking disabled.")
                print(f"  - LLM_AVAILABLE: {LLM_AVAILABLE}")
                print(f"  - get_llm_service: {get_llm_service}")
            elif not get_llm_service:
                print("Warning: get_llm_service is None. Ranking disabled.")
            else:
                try:
                    llm_service = get_llm_service()
                    if llm_service:
                        llm_model_name = llm_service.model_name
                        print(f"Ranking studies with {llm_model_name} using terms: {ranking_terms}")
                        studies = llm_service.rank_studies(studies, ranking_terms)
                        print(f"Ranking completed. {len(studies)} studies ranked.")
                    else:
                        print("Warning: LLM service returned None. Ranking disabled.")
                        print(f"  - Check environment variables:")
                        print(f"    USE_HF_API: {os.environ.get('USE_HF_API', 'NOT SET')}")
                        print(f"    HUGGINGFACE_API_TOKEN: {'SET' if os.environ.get('HUGGINGFACE_API_TOKEN') else 'NOT SET'}")
                except Exception as e:
                    print(f"Error during LLM ranking: {str(e)}")
                    import traceback
                    traceback.print_exc()
        
        # Format results as HTML
        if not studies:
            return "No studies found. Try different search criteria.", 0, None
        
        # Count how many studies were actually ranked (have relevance_score)
        ranked_count = sum(1 for s in studies if 'relevance_score' in s)
        total_before_ranking = len(studies)
        
        html_results = f"<h2>Found {len(studies)} studies</h2>"
        if ranking_terms and ranking_terms.strip() and llm_model_name:
            html_results += f'<div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 1rem; border-radius: 8px; margin: 1rem 0;"><strong>🤖 Results ranked by {llm_model_name}</strong><br>Ranking terms: "{ranking_terms}"</div>'
            html_results += f'<p style="margin: 1rem 0; padding: 0.75rem; background: #f0f8ff; border-left: 3px solid #667eea; border-radius: 4px;"><strong>Ranking applied:</strong> Studies have been reordered by AI relevance to "{ranking_terms}". The model analyzed each study\'s title, summary, conditions, and inclusion criteria to determine how closely they match your ranking terms. Higher-ranked studies appear first.</p>'
        elif ranking_terms and ranking_terms.strip() and not llm_model_name:
            # Check what's wrong
            use_hf_api = os.environ.get('USE_HF_API', 'false').lower() == 'true'
            hf_token = os.environ.get('HUGGINGFACE_API_TOKEN', '')
            error_msg = "LLM service is not available."
            if not use_hf_api:
                error_msg += " Set USE_HF_API=true in your Space secrets."
            if not hf_token:
                error_msg += " Set HUGGINGFACE_API_TOKEN in your Space secrets."
            html_results += f'<div style="background: #fff3cd; color: #856404; padding: 1rem; border-radius: 8px; margin: 1rem 0; border: 1px solid #ffc107;"><strong>⚠️ Ranking not applied:</strong> {error_msg}<br><small>Check your Space Settings → Secrets to add the required environment variables.</small></div>'
        
        for idx, study in enumerate(studies):
            status_badge_color = {
                'RECRUITING': '#28a745',
                'NOT_YET_RECRUITING': '#ffc107',
                'ACTIVE_NOT_RECRUITING': '#17a2b8',
                'COMPLETED': '#6c757d',
                'SUSPENDED': '#dc3545',
                'TERMINATED': '#dc3545',
                'WITHDRAWN': '#6c757d'
            }.get(study['status'], '#6c757d')
            
            # Add relevance score indicator if ranking was applied
            relevance_indicator = ""
            if 'relevance_score' in study:
                score = study.get('relevance_score', 0)
                score_percent = int(score * 100)
                # Color based on score: green for high, yellow for medium, gray for low
                if score >= 0.7:
                    score_color = '#28a745'
                elif score >= 0.4:
                    score_color = '#ffc107'
                else:
                    score_color = '#6c757d'
                relevance_indicator = f'<span style="background: {score_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600; margin-left: 0.5rem;">Relevance: {score_percent}%</span>'
            
            html_results += f"""
            <div style="border: 1px solid #e0e0e0; border-radius: 8px; padding: 1.5rem; margin: 1rem 0; background: white; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                <div style="display: flex; justify-content: space-between; align-items: start; margin-bottom: 1rem;">
                    <h3 style="margin: 0; color: #333; flex: 1;">{study['title']}</h3>
                    <div style="display: flex; align-items: center; gap: 0.5rem;">
                        <span style="background: {status_badge_color}; color: white; padding: 0.25rem 0.75rem; border-radius: 12px; font-size: 0.85rem; font-weight: 600;">
                            {study['status']}
                        </span>
                        {relevance_indicator}
                    </div>
                </div>
                
                <p><strong>NCT ID:</strong> {study['nctId']}</p>
            """
            
            if study.get('sponsor'):
                html_results += f"<p><strong>Sponsor:</strong> {study['sponsor']}</p>"
            
            if study.get('conditions'):
                html_results += f"<p><strong>Conditions:</strong> {', '.join(study['conditions'])}</p>"
            
            if study.get('briefSummary'):
                summary = study['briefSummary'][:600] + '...' if len(study['briefSummary']) > 600 else study['briefSummary']
                html_results += f'<p style="color: #666; line-height: 1.6;"><strong>Summary:</strong> {summary}</p>'
            
            if study.get('inclusionCriteria'):
                html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Inclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>"
                for criterion in study['inclusionCriteria']:
                    html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>"
                html_results += "</ul></details>"
            
            if study.get('exclusionCriteria'):
                html_results += "<details style='margin-top: 1rem;'><summary style='cursor: pointer; color: #667eea; font-weight: 600;'>Exclusion Criteria ▼</summary><ul style='margin-top: 0.5rem; padding-left: 1.5rem;'>"
                for criterion in study['exclusionCriteria']:
                    html_results += f"<li style='margin-bottom: 0.5rem;'>{criterion}</li>"
                html_results += "</ul></details>"
            
            if study.get('ranking_reasoning'):
                html_results += f"<p style='margin-top: 1rem; padding: 0.75rem; background: #e8f4f8; border-left: 3px solid #667eea; border-radius: 4px; font-size: 0.9rem;'><strong>Ranking Reasoning:</strong> {study['ranking_reasoning']}</p>"
            
            html_results += f"""
                <div style="margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #e0e0e0; display: flex; justify-content: space-between; align-items: center;">
                    <span style="color: #888; font-size: 0.9rem;">Last updated: {study['lastUpdateDate']}</span>
                    <a href="https://clinicaltrials.gov/study/{study['nctId']}" target="_blank" style="color: #667eea; text-decoration: none; font-weight: 600;">View on ClinicalTrials.gov →</a>
                </div>
            </div>
            """
        
        return html_results, len(studies), llm_model_name
        
    except Exception as e:
        return f"Error: {str(e)}", 0, None


# Create Gradio interface
with gr.Blocks(title="Clinical Trial Matcher", theme=gr.themes.Soft()) as demo:
    # Inject PWA meta tags for iOS/Android installation
    gr.HTML("""
    <script>
    (function() {
        // Add PWA meta tags to document head
        const metaTags = [
            { name: 'apple-mobile-web-app-capable', content: 'yes' },
            { name: 'apple-mobile-web-app-status-bar-style', content: 'default' },
            { name: 'apple-mobile-web-app-title', content: 'Trial Matcher' },
            { name: 'mobile-web-app-capable', content: 'yes' },
            { name: 'theme-color', content: '#4a90e2' }
        ];
        
        metaTags.forEach(tag => {
            let meta = document.querySelector(`meta[name="${tag.name}"]`);
            if (!meta) {
                meta = document.createElement('meta');
                meta.setAttribute('name', tag.name);
                document.head.appendChild(meta);
            }
            meta.setAttribute('content', tag.content);
        });
        
        // Add manifest link
        let manifestLink = document.querySelector('link[rel="manifest"]');
        if (!manifestLink) {
            manifestLink = document.createElement('link');
            manifestLink.setAttribute('rel', 'manifest');
            document.head.appendChild(manifestLink);
        }
        manifestLink.setAttribute('href', 'manifest.json');
    })();
    </script>
    """, visible=False)
    gr.Markdown("""
    # 🔬 Clinical Trial Matcher
    
    Search and filter clinical trials from [ClinicalTrials.gov](https://clinicaltrials.gov/) with AI-powered fine-tuning of the results based on your query.
    
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            query_input = gr.Textbox(
                label="Search Keywords",
                placeholder="e.g., pancreatic cancer, PDAC, KRAS, etc.",
                value=""
            )
            country_input = gr.Dropdown(
                label="Country (Optional)",
                choices=[
                    "", "Germany", "United States", "United Kingdom", "Canada", "France", 
                    "Italy", "Spain", "Netherlands", "Belgium", "Switzerland", "Austria",
                    "Sweden", "Norway", "Denmark", "Finland", "Poland", "Czech Republic",
                    "Australia", "New Zealand", "Japan", "China", "India", "South Korea",
                    "Brazil", "Mexico", "Argentina", "Chile", "South Africa", "Israel",
                    "Turkey", "Russia", "Greece", "Portugal", "Ireland"
                ],
                value="",
                filterable=True,
                interactive=True
            )
            status_input = gr.Dropdown(
                label="Status (Optional)",
                choices=["", "RECRUITING", "NOT_YET_RECRUITING", "ACTIVE_NOT_RECRUITING", 
                        "COMPLETED", "SUSPENDED", "TERMINATED", "WITHDRAWN"],
                value="",
                interactive=True
            )
            ranking_input = gr.Textbox(
                label="✨ Use AI to sort the results based on my query:",
                placeholder="e.g., KRAS mutation, immunotherapy",
                value="",
                visible=False
            )
            ranking_btn = gr.Button("Rank Results", variant="secondary", visible=False)
            search_btn = gr.Button("Search Clinical Trials", variant="primary", size="lg")
        
        with gr.Column(scale=2):
            results_output = gr.HTML(label="Results")
            count_output = gr.Textbox(label="Total Studies Found", visible=False)
    
    # Search function
    def perform_search(query, country, status, ranking_terms=""):
        html, count, model = search_trials(query, country, status, ranking_terms)
        # Show ranking input and button after search
        return html, count, gr.update(visible=True), gr.update(visible=True)
    
    # Ranking function
    def perform_ranking(query, country, status, ranking_terms):
        if not ranking_terms or not ranking_terms.strip():
            return "Please enter ranking terms to sort the results.", 0
        print(f"Ranking requested with terms: {ranking_terms}")
        html, count, model = search_trials(query, country, status, ranking_terms)
        return html, count
    
    search_btn.click(
        fn=perform_search,
        inputs=[query_input, country_input, status_input, ranking_input],
        outputs=[results_output, count_output, ranking_input, ranking_btn]
    )
    
    ranking_btn.click(
        fn=perform_ranking,
        inputs=[query_input, country_input, status_input, ranking_input],
        outputs=[results_output, count_output]
    )
    
    gr.Markdown("""
    ---
    **App. developed by [Mackenzie Weygandt Mathis](https://en.wikipedia.org/wiki/Mackenzie_Weygandt_Mathis). Data sourced from [ClinicalTrials.gov](https://clinicaltrials.gov)**
    
    This app uses AI-powered ranking with Hugging Face models (default: DeepSeek-V3.2) to intelligently rank search results by relevance.
    """)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)