VoxDoc / app /config.py
joelthomas77's picture
Allow iframe embedding via CSP frame-ancestors (config-driven)
9fdeae0 verified
"""Application configuration using Pydantic settings."""
import logging
import sys
from pydantic_settings import BaseSettings
from typing import Literal
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
# Deployment Mode: "development" or "production"
# In production mode, security features are enforced and insecure defaults are rejected.
deployment_mode: Literal["development", "production"] = "development"
# Hugging Face
hf_token: str = ""
# MedGemma backend: "local" loads transformers in-process (needs GPU/RAM);
# "hf-inference" routes generation to Hugging Face Inference Providers
# using HF_TOKEN. Use "hf-inference" for free CPU deployments where
# the 4b model would not fit or be too slow.
medgemma_provider: Literal["local", "hf-inference"] = "local"
hf_inference_provider: str = "" # "" = auto; or e.g. "fireworks-ai", "together"
hf_inference_timeout: int = 120 # Seconds before HF Inference call aborts
# Models
model_cache_dir: str = "/app/models"
medasr_model: str = "google/medasr"
medgemma_model: str = "google/medgemma-1.5-4b-it"
medgemma_vision_model: str = "google/medgemma-4b-it"
whisper_model: str = "openai/whisper-small"
multilingual_asr_enabled: bool = True
# Image Analysis
enable_image_analysis: bool = True
max_image_size_mb: int = 10
# Device
device: Literal["cuda", "cpu"] = "cpu"
enable_gpu: bool = False
# MedGemma Generation Parameters
medgemma_max_tokens: int = 1024 # Sufficient for complete documentation
medgemma_repetition_penalty: float = 1.1 # Prevent repetitive output
# Compliance Controls
allow_phi_logging: bool = False
enable_phi_persistence: bool = False
medgemma_terms_acknowledged: bool = False
enforce_medgemma_terms_acknowledgement: bool = True
# Audit Logging
audit_logging_enabled: bool = False
# Audio
max_audio_duration_seconds: int = 300
audio_sample_rate: int = 16000
# Streaming Transcription
streaming_interval_seconds: float = 2.0 # How often to run ASR on buffer (GPU: 2s, CPU: 4s)
# Rate Limiting & Queue
rate_limiting_enabled: bool = True
rate_limit_general_rpm: int = 60 # General endpoints: requests per minute
rate_limit_inference_rpm: int = 10 # Inference endpoints: requests per minute
queue_max_concurrent_inferences: int = 2 # Max parallel model inference tasks
queue_max_size: int = 20 # Max queued requests before rejecting
queue_timeout_seconds: float = 120.0 # Max seconds a request waits in queue
queue_estimated_inference_seconds: float = 10.0 # Default estimate before measurements
# HIPAA Encryption at Rest
encryption_at_rest_enabled: bool = False
encryption_master_key: str = "CHANGE_ME_IN_PRODUCTION"
encryption_kdf_iterations: int = 100000
# Data Retention & Auto-Purge
retention_sessions_days: int = 365 # Intake sessions retention (0 = keep forever)
retention_audit_logs_days: int = 2555 # ~7 years (HIPAA requires min 6 years)
auto_purge_enabled: bool = False
auto_purge_interval_hours: int = 24 # How often auto-purge runs
# Monitoring & Observability
metrics_enabled: bool = True
structured_logging_enabled: bool = True
metrics_endpoint_auth_required: bool = False # /metrics endpoint — set True for production
metrics_alert_window_seconds: int = 300 # Window for alert evaluation (5 min)
metrics_error_rate_warning: float = 0.1 # 10% error rate triggers warning
metrics_error_rate_critical: float = 0.25 # 25% error rate triggers critical
metrics_latency_warning_seconds: float = 15.0
metrics_latency_critical_seconds: float = 30.0
# API
api_host: str = "0.0.0.0"
api_port: int = 8000
api_reload: bool = False
# RAG (Retrieval-Augmented Generation)
rag_enabled: bool = False
rag_embedding_model: str = "NeuML/pubmedbert-base-embeddings"
rag_persist_dir: str = "./rag_store"
rag_top_k: int = 3
rag_similarity_threshold: float = 0.65 # Min cosine similarity to include a result
rag_initial_retrieval_k: int = 20 # Candidates fetched before reranking
rag_reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
rag_reranker_enabled: bool = True
rag_chunking_enabled: bool = True # Split SOAP into per-section chunks
# Knowledge Base (Phase 2)
knowledge_base_enabled: bool = False
knowledge_base_persist_dir: str = "./knowledge_store"
knowledge_base_guidelines_top_k: int = 3
knowledge_base_guidelines_threshold: float = 0.60
icd10_lookup_enabled: bool = True # Semantic ICD-10 code matching
icd10_top_k: int = 5 # Max ICD-10 suggestions per symptom
icd10_similarity_threshold: float = 0.60
drug_interaction_check_enabled: bool = True # Auto-check medication interactions
# Multi-Tenancy & Isolation (Phase 3)
multi_tenancy_enabled: bool = False
default_organization_id: str = "default"
default_provider_id: str = "system"
# RAG Security (Phase 3)
rag_audit_enabled: bool = True # Log every RAG retrieval for HIPAA
rag_vector_store_encryption_enabled: bool = False # Encrypt vector store at rest
# RAG Evaluation & Observability (Phase 4)
rag_evaluation_enabled: bool = True
rag_evaluation_persist_dir: str = "./rag_eval"
rag_drift_detection_enabled: bool = True
rag_drift_window_size: int = 50 # Embeddings to track per window
rag_drift_threshold: float = 0.15 # Cosine distance shift triggering alert
rag_hallucination_check_enabled: bool = True # Cross-ref generated text vs evidence
# Voice Assistant & Conversation
conversation_mode_enabled: bool = False
tts_engine: str = "piper" # "piper" or "webspeech" (browser fallback)
piper_model_path: str = "./models/piper/en_US-amy-medium.onnx"
piper_config_path: str = "./models/piper/en_US-amy-medium.onnx.json"
tts_sample_rate: int = 22050
tts_max_text_length: int = 500
conversation_max_turns: int = 20
conversation_followup_rounds: int = 3
conversation_streaming_interval: float = 0.5 # Faster ASR for conversation mode
conversation_llm_model: str = "" # Empty = reuse medgemma_model
conversation_llm_separate: bool = False # Load separate model for conversation
# Phase 3: Voice Activity Detection
vad_enabled: bool = True
vad_threshold: float = 0.5 # Speech probability threshold (0-1)
vad_min_silence_ms: int = 800 # Silence duration to trigger end-of-turn (ms)
vad_min_speech_ms: int = 250 # Minimum speech duration to accept (ms)
vad_window_size_ms: int = 32 # VAD analysis window (Silero uses 32ms chunks)
# Phase 3: TTS Caching & Streaming
tts_cache_greetings: bool = True # Pre-cache greeting audio at startup
tts_streaming_enabled: bool = True # Send TTS sentence-by-sentence
# Phase 3: Multi-Language
conversation_auto_detect_language: bool = True
conversation_default_language: str = "en"
piper_voice_models: str = "" # JSON map: {"es": "./models/piper/es_ES-...", ...}
# Phase 8: Infrastructure & Scalability
database_url: str = "" # Empty = use SQLite default; set to postgresql+asyncpg://... for Postgres
redis_url: str = "" # Empty = disabled; set to redis://localhost:6379/0
redis_cache_ttl_seconds: int = 300 # Default cache TTL
task_queue_enabled: bool = False # Enable Celery/ARQ background workers
task_queue_broker_url: str = "" # e.g. redis://localhost:6379/1
model_quantization_enabled: bool = False # Enable 4-bit/8-bit quantization
model_quantization_bits: int = 4 # 4 or 8
colab_mode: bool = False # Enable Colab-specific optimizations
colab_ngrok_token: str = "" # Ngrok auth token for Colab tunneling
# Phase 7: EHR Integration
webhook_enabled: bool = False
webhook_url: str = "" # Default webhook endpoint for session finalization
webhook_auth_token: str = ""
hl7v2_export_enabled: bool = True
ccda_export_enabled: bool = True
# Phase 5: Clinical Intelligence
specialty_detection_enabled: bool = True
default_specialty: str = "general" # general, emergency, primary_care, psychiatry, ob_gyn, pediatrics
vitals_extraction_enabled: bool = True
differential_diagnosis_enabled: bool = True
ambient_mode_enabled: bool = False
diarization_enabled: bool = False
icd10_umls_mode: str = "semantic" # "semantic" (current) or "umls_linker" (requires scispacy linker)
# Phase 4: Authentication & Security
auth_enabled: bool = False # False = dev mode (current stub behavior)
jwt_secret_key: str = "CHANGE_ME_IN_PRODUCTION"
jwt_algorithm: str = "HS256"
jwt_access_token_expire_minutes: int = 30
jwt_refresh_token_expire_days: int = 7
mfa_enabled: bool = False # TOTP MFA for provider/admin roles
session_inactivity_timeout_minutes: int = 15 # Frontend inactivity timer
consent_tracking_enabled: bool = True # Require verbal consent before intake
cors_allowed_origins: str = "*" # Comma-separated origins; "*" for dev
# Iframe parents allowed to embed the app (CSP frame-ancestors).
# Default empty = X-Frame-Options: DENY. For HF Spaces canonical URL,
# set to "https://huggingface.co".
allow_iframe_embedding_origins: str = ""
# OAuth2/OIDC SSO (Phase 1)
oidc_enabled: bool = False # Enable OIDC login flow
oidc_issuer_url: str = "" # e.g. https://accounts.google.com or https://login.microsoftonline.com/{tenant}/v2.0
oidc_client_id: str = ""
oidc_client_secret: str = ""
oidc_redirect_uri: str = "" # e.g. https://your-app.com/api/auth/oidc/callback
oidc_scopes: str = "openid email profile" # Space-separated scopes
oidc_role_claim: str = "role" # OIDC claim that maps to UserRole
oidc_default_role: str = "viewer" # Default role for new OIDC users
# Phase 4: Multi-region / Data Residency
data_region: str = "us-east-1" # Deployment region for PHI locality
allowed_data_regions: str = "us-east-1,us-west-2,eu-west-1" # Comma-separated
enforce_data_residency: bool = False # Reject cross-region data transfers
region_encryption_key_arn: str = "" # AWS KMS ARN for region-specific encryption
# Phase 4: vLLM Serving
vllm_enabled: bool = False
vllm_url: str = "http://localhost:8001"
vllm_model: str = "google/medgemma-4b-it"
# Phase 4: OpenTelemetry
otel_enabled: bool = False
otel_endpoint: str = "http://localhost:4317"
# Phase 4: Wake Word
picovoice_access_key: str = ""
# Logging
log_level: str = "INFO"
class Config:
env_file = ".env"
case_sensitive = False
# Global settings instance
settings = Settings()
# ---------------------------------------------------------------------------
# Production-mode startup validation
# ---------------------------------------------------------------------------
_INSECURE_DEFAULTS = {"CHANGE_ME_IN_PRODUCTION", "", "changeme", "secret"}
def validate_production_settings() -> None:
"""Validate that security-critical settings are configured for production.
Called during application startup. In production mode, insecure defaults
cause a hard failure. In development mode, they emit warnings.
"""
is_prod = settings.deployment_mode == "production"
issues: list[str] = []
# --- Secrets must not be default values ---
if settings.jwt_secret_key.lower() in _INSECURE_DEFAULTS:
issues.append(
"JWT_SECRET_KEY is set to an insecure default. "
"Generate a strong random secret (e.g. `openssl rand -hex 32`)."
)
if settings.encryption_master_key.lower() in _INSECURE_DEFAULTS:
issues.append(
"ENCRYPTION_MASTER_KEY is set to an insecure default. "
"Generate a strong random secret for HIPAA encryption at rest."
)
# --- Production requires security features enabled ---
if is_prod:
if not settings.auth_enabled:
issues.append("AUTH_ENABLED must be True in production mode.")
if not settings.encryption_at_rest_enabled:
issues.append("ENCRYPTION_AT_REST_ENABLED must be True in production mode.")
if not settings.audit_logging_enabled:
issues.append("AUDIT_LOGGING_ENABLED must be True in production mode.")
if settings.cors_allowed_origins.strip() == "*":
issues.append(
"CORS_ALLOWED_ORIGINS must not be '*' in production mode. "
"Specify allowed origins explicitly."
)
if not settings.metrics_endpoint_auth_required:
issues.append(
"METRICS_ENDPOINT_AUTH_REQUIRED should be True in production "
"to prevent information leakage via /metrics."
)
# --- Report ---
if issues:
header = (
"FATAL: Production security validation failed"
if is_prod
else "WARNING: Insecure configuration detected (development mode)"
)
msg = f"\n{'=' * 60}\n{header}\n{'=' * 60}\n"
for i, issue in enumerate(issues, 1):
msg += f" {i}. {issue}\n"
msg += "=" * 60
if is_prod:
# Hard-fail in production — do not start with insecure config
print(msg, file=sys.stderr)
sys.exit(1)
else:
logger.warning(msg)