"""Application configuration using Pydantic settings."""

import logging
import sys

from pydantic_settings import BaseSettings
from typing import Literal

logger = logging.getLogger(__name__)


class Settings(BaseSettings):
    """Application settings loaded from environment variables."""

    # Deployment Mode: "development" or "production"
    # In production mode, security features are enforced and insecure defaults are rejected.
    deployment_mode: Literal["development", "production"] = "development"

    # Hugging Face
    hf_token: str = ""

    # MedGemma backend: "local" loads transformers in-process (needs GPU/RAM);
    # "hf-inference" routes generation to Hugging Face Inference Providers
    # using HF_TOKEN. Use "hf-inference" for free CPU deployments where
    # the 4b model would not fit or be too slow.
    medgemma_provider: Literal["local", "hf-inference"] = "local"
    hf_inference_provider: str = ""   # "" = auto; or e.g. "fireworks-ai", "together"
    hf_inference_timeout: int = 120   # Seconds before HF Inference call aborts

    # Models
    model_cache_dir: str = "/app/models"
    medasr_model: str = "google/medasr"
    medgemma_model: str = "google/medgemma-1.5-4b-it"
    medgemma_vision_model: str = "google/medgemma-4b-it"
    whisper_model: str = "openai/whisper-small"
    multilingual_asr_enabled: bool = True
    
    # Image Analysis
    enable_image_analysis: bool = True
    max_image_size_mb: int = 10
    
    # Device
    device: Literal["cuda", "cpu"] = "cpu"
    enable_gpu: bool = False
    
    # MedGemma Generation Parameters
    medgemma_max_tokens: int = 1024  # Sufficient for complete documentation
    medgemma_repetition_penalty: float = 1.1  # Prevent repetitive output

    # Compliance Controls
    allow_phi_logging: bool = False
    enable_phi_persistence: bool = False
    medgemma_terms_acknowledged: bool = False
    enforce_medgemma_terms_acknowledgement: bool = True

    # Audit Logging
    audit_logging_enabled: bool = False
    
    # Audio
    max_audio_duration_seconds: int = 300
    audio_sample_rate: int = 16000
    
    # Streaming Transcription
    streaming_interval_seconds: float = 2.0  # How often to run ASR on buffer (GPU: 2s, CPU: 4s)
    
    # Rate Limiting & Queue
    rate_limiting_enabled: bool = True
    rate_limit_general_rpm: int = 60       # General endpoints: requests per minute
    rate_limit_inference_rpm: int = 10     # Inference endpoints: requests per minute
    queue_max_concurrent_inferences: int = 2  # Max parallel model inference tasks
    queue_max_size: int = 20              # Max queued requests before rejecting
    queue_timeout_seconds: float = 120.0  # Max seconds a request waits in queue
    queue_estimated_inference_seconds: float = 10.0  # Default estimate before measurements

    # HIPAA Encryption at Rest
    encryption_at_rest_enabled: bool = False
    encryption_master_key: str = "CHANGE_ME_IN_PRODUCTION"
    encryption_kdf_iterations: int = 100000

    # Data Retention & Auto-Purge
    retention_sessions_days: int = 365       # Intake sessions retention (0 = keep forever)
    retention_audit_logs_days: int = 2555    # ~7 years (HIPAA requires min 6 years)
    auto_purge_enabled: bool = False
    auto_purge_interval_hours: int = 24      # How often auto-purge runs

    # Monitoring & Observability
    metrics_enabled: bool = True
    structured_logging_enabled: bool = True
    metrics_endpoint_auth_required: bool = False  # /metrics endpoint — set True for production
    metrics_alert_window_seconds: int = 300       # Window for alert evaluation (5 min)
    metrics_error_rate_warning: float = 0.1       # 10% error rate triggers warning
    metrics_error_rate_critical: float = 0.25     # 25% error rate triggers critical
    metrics_latency_warning_seconds: float = 15.0
    metrics_latency_critical_seconds: float = 30.0

    # API
    api_host: str = "0.0.0.0"
    api_port: int = 8000
    api_reload: bool = False

    # RAG (Retrieval-Augmented Generation)
    rag_enabled: bool = False
    rag_embedding_model: str = "NeuML/pubmedbert-base-embeddings"
    rag_persist_dir: str = "./rag_store"
    rag_top_k: int = 3
    rag_similarity_threshold: float = 0.65       # Min cosine similarity to include a result
    rag_initial_retrieval_k: int = 20            # Candidates fetched before reranking
    rag_reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
    rag_reranker_enabled: bool = True
    rag_chunking_enabled: bool = True            # Split SOAP into per-section chunks

    # Knowledge Base (Phase 2)
    knowledge_base_enabled: bool = False
    knowledge_base_persist_dir: str = "./knowledge_store"
    knowledge_base_guidelines_top_k: int = 3
    knowledge_base_guidelines_threshold: float = 0.60
    icd10_lookup_enabled: bool = True             # Semantic ICD-10 code matching
    icd10_top_k: int = 5                          # Max ICD-10 suggestions per symptom
    icd10_similarity_threshold: float = 0.60
    drug_interaction_check_enabled: bool = True    # Auto-check medication interactions

    # Multi-Tenancy & Isolation (Phase 3)
    multi_tenancy_enabled: bool = False
    default_organization_id: str = "default"
    default_provider_id: str = "system"

    # RAG Security (Phase 3)
    rag_audit_enabled: bool = True                # Log every RAG retrieval for HIPAA
    rag_vector_store_encryption_enabled: bool = False  # Encrypt vector store at rest

    # RAG Evaluation & Observability (Phase 4)
    rag_evaluation_enabled: bool = True
    rag_evaluation_persist_dir: str = "./rag_eval"
    rag_drift_detection_enabled: bool = True
    rag_drift_window_size: int = 50               # Embeddings to track per window
    rag_drift_threshold: float = 0.15             # Cosine distance shift triggering alert
    rag_hallucination_check_enabled: bool = True   # Cross-ref generated text vs evidence

    # Voice Assistant & Conversation
    conversation_mode_enabled: bool = False
    tts_engine: str = "piper"          # "piper" or "webspeech" (browser fallback)
    piper_model_path: str = "./models/piper/en_US-amy-medium.onnx"
    piper_config_path: str = "./models/piper/en_US-amy-medium.onnx.json"
    tts_sample_rate: int = 22050
    tts_max_text_length: int = 500
    conversation_max_turns: int = 20
    conversation_followup_rounds: int = 3
    conversation_streaming_interval: float = 0.5  # Faster ASR for conversation mode
    conversation_llm_model: str = ""  # Empty = reuse medgemma_model
    conversation_llm_separate: bool = False  # Load separate model for conversation

    # Phase 3: Voice Activity Detection
    vad_enabled: bool = True
    vad_threshold: float = 0.5         # Speech probability threshold (0-1)
    vad_min_silence_ms: int = 800      # Silence duration to trigger end-of-turn (ms)
    vad_min_speech_ms: int = 250       # Minimum speech duration to accept (ms)
    vad_window_size_ms: int = 32       # VAD analysis window (Silero uses 32ms chunks)

    # Phase 3: TTS Caching & Streaming
    tts_cache_greetings: bool = True   # Pre-cache greeting audio at startup
    tts_streaming_enabled: bool = True # Send TTS sentence-by-sentence

    # Phase 3: Multi-Language
    conversation_auto_detect_language: bool = True
    conversation_default_language: str = "en"
    piper_voice_models: str = ""       # JSON map: {"es": "./models/piper/es_ES-...", ...}

    # Phase 8: Infrastructure & Scalability
    database_url: str = ""  # Empty = use SQLite default; set to postgresql+asyncpg://... for Postgres
    redis_url: str = ""  # Empty = disabled; set to redis://localhost:6379/0
    redis_cache_ttl_seconds: int = 300  # Default cache TTL
    task_queue_enabled: bool = False  # Enable Celery/ARQ background workers
    task_queue_broker_url: str = ""  # e.g. redis://localhost:6379/1
    model_quantization_enabled: bool = False  # Enable 4-bit/8-bit quantization
    model_quantization_bits: int = 4  # 4 or 8
    colab_mode: bool = False  # Enable Colab-specific optimizations
    colab_ngrok_token: str = ""  # Ngrok auth token for Colab tunneling

    # Phase 7: EHR Integration
    webhook_enabled: bool = False
    webhook_url: str = ""  # Default webhook endpoint for session finalization
    webhook_auth_token: str = ""
    hl7v2_export_enabled: bool = True
    ccda_export_enabled: bool = True

    # Phase 5: Clinical Intelligence
    specialty_detection_enabled: bool = True
    default_specialty: str = "general"  # general, emergency, primary_care, psychiatry, ob_gyn, pediatrics
    vitals_extraction_enabled: bool = True
    differential_diagnosis_enabled: bool = True
    ambient_mode_enabled: bool = False
    diarization_enabled: bool = False
    icd10_umls_mode: str = "semantic"  # "semantic" (current) or "umls_linker" (requires scispacy linker)

    # Phase 4: Authentication & Security
    auth_enabled: bool = False                    # False = dev mode (current stub behavior)
    jwt_secret_key: str = "CHANGE_ME_IN_PRODUCTION"
    jwt_algorithm: str = "HS256"
    jwt_access_token_expire_minutes: int = 30
    jwt_refresh_token_expire_days: int = 7
    mfa_enabled: bool = False                     # TOTP MFA for provider/admin roles
    session_inactivity_timeout_minutes: int = 15  # Frontend inactivity timer
    consent_tracking_enabled: bool = True         # Require verbal consent before intake
    cors_allowed_origins: str = "*"               # Comma-separated origins; "*" for dev
    # Iframe parents allowed to embed the app (CSP frame-ancestors).
    # Default empty = X-Frame-Options: DENY. For HF Spaces canonical URL,
    # set to "https://huggingface.co".
    allow_iframe_embedding_origins: str = ""

    # OAuth2/OIDC SSO (Phase 1)
    oidc_enabled: bool = False                     # Enable OIDC login flow
    oidc_issuer_url: str = ""                      # e.g. https://accounts.google.com or https://login.microsoftonline.com/{tenant}/v2.0
    oidc_client_id: str = ""
    oidc_client_secret: str = ""
    oidc_redirect_uri: str = ""                    # e.g. https://your-app.com/api/auth/oidc/callback
    oidc_scopes: str = "openid email profile"      # Space-separated scopes
    oidc_role_claim: str = "role"                   # OIDC claim that maps to UserRole
    oidc_default_role: str = "viewer"               # Default role for new OIDC users

    # Phase 4: Multi-region / Data Residency
    data_region: str = "us-east-1"                   # Deployment region for PHI locality
    allowed_data_regions: str = "us-east-1,us-west-2,eu-west-1"  # Comma-separated
    enforce_data_residency: bool = False              # Reject cross-region data transfers
    region_encryption_key_arn: str = ""               # AWS KMS ARN for region-specific encryption

    # Phase 4: vLLM Serving
    vllm_enabled: bool = False
    vllm_url: str = "http://localhost:8001"
    vllm_model: str = "google/medgemma-4b-it"

    # Phase 4: OpenTelemetry
    otel_enabled: bool = False
    otel_endpoint: str = "http://localhost:4317"

    # Phase 4: Wake Word
    picovoice_access_key: str = ""

    # Logging
    log_level: str = "INFO"

    class Config:
        env_file = ".env"
        case_sensitive = False


# Global settings instance
settings = Settings()


# ---------------------------------------------------------------------------
# Production-mode startup validation
# ---------------------------------------------------------------------------

_INSECURE_DEFAULTS = {"CHANGE_ME_IN_PRODUCTION", "", "changeme", "secret"}


def validate_production_settings() -> None:
    """Validate that security-critical settings are configured for production.

    Called during application startup. In production mode, insecure defaults
    cause a hard failure. In development mode, they emit warnings.
    """
    is_prod = settings.deployment_mode == "production"
    issues: list[str] = []

    # --- Secrets must not be default values ---
    if settings.jwt_secret_key.lower() in _INSECURE_DEFAULTS:
        issues.append(
            "JWT_SECRET_KEY is set to an insecure default. "
            "Generate a strong random secret (e.g. `openssl rand -hex 32`)."
        )

    if settings.encryption_master_key.lower() in _INSECURE_DEFAULTS:
        issues.append(
            "ENCRYPTION_MASTER_KEY is set to an insecure default. "
            "Generate a strong random secret for HIPAA encryption at rest."
        )

    # --- Production requires security features enabled ---
    if is_prod:
        if not settings.auth_enabled:
            issues.append("AUTH_ENABLED must be True in production mode.")

        if not settings.encryption_at_rest_enabled:
            issues.append("ENCRYPTION_AT_REST_ENABLED must be True in production mode.")

        if not settings.audit_logging_enabled:
            issues.append("AUDIT_LOGGING_ENABLED must be True in production mode.")

        if settings.cors_allowed_origins.strip() == "*":
            issues.append(
                "CORS_ALLOWED_ORIGINS must not be '*' in production mode. "
                "Specify allowed origins explicitly."
            )

        if not settings.metrics_endpoint_auth_required:
            issues.append(
                "METRICS_ENDPOINT_AUTH_REQUIRED should be True in production "
                "to prevent information leakage via /metrics."
            )

    # --- Report ---
    if issues:
        header = (
            "FATAL: Production security validation failed"
            if is_prod
            else "WARNING: Insecure configuration detected (development mode)"
        )
        msg = f"\n{'=' * 60}\n{header}\n{'=' * 60}\n"
        for i, issue in enumerate(issues, 1):
            msg += f"  {i}. {issue}\n"
        msg += "=" * 60

        if is_prod:
            # Hard-fail in production — do not start with insecure config
            print(msg, file=sys.stderr)
            sys.exit(1)
        else:
            logger.warning(msg)