Spaces:

joelthomas77
/

VoxDoc

Sleeping

App Files Files Community

VoxDoc / app /config.py

joelthomas77

Allow iframe embedding via CSP frame-ancestors (config-driven)

9fdeae0 verified 30 days ago

raw

history blame contribute delete

14.3 kB

	"""Application configuration using Pydantic settings."""

	import logging
	import sys

	from pydantic_settings import BaseSettings
	from typing import Literal

	logger = logging.getLogger(__name__)


	class Settings(BaseSettings):
	"""Application settings loaded from environment variables."""

	# Deployment Mode: "development" or "production"
	# In production mode, security features are enforced and insecure defaults are rejected.
	deployment_mode: Literal["development", "production"] = "development"

	# Hugging Face
	hf_token: str = ""

	# MedGemma backend: "local" loads transformers in-process (needs GPU/RAM);
	# "hf-inference" routes generation to Hugging Face Inference Providers
	# using HF_TOKEN. Use "hf-inference" for free CPU deployments where
	# the 4b model would not fit or be too slow.
	medgemma_provider: Literal["local", "hf-inference"] = "local"
	hf_inference_provider: str = "" # "" = auto; or e.g. "fireworks-ai", "together"
	hf_inference_timeout: int = 120 # Seconds before HF Inference call aborts

	# Models
	model_cache_dir: str = "/app/models"
	medasr_model: str = "google/medasr"
	medgemma_model: str = "google/medgemma-1.5-4b-it"
	medgemma_vision_model: str = "google/medgemma-4b-it"
	whisper_model: str = "openai/whisper-small"
	multilingual_asr_enabled: bool = True

	# Image Analysis
	enable_image_analysis: bool = True
	max_image_size_mb: int = 10

	# Device
	device: Literal["cuda", "cpu"] = "cpu"
	enable_gpu: bool = False

	# MedGemma Generation Parameters
	medgemma_max_tokens: int = 1024 # Sufficient for complete documentation
	medgemma_repetition_penalty: float = 1.1 # Prevent repetitive output

	# Compliance Controls
	allow_phi_logging: bool = False
	enable_phi_persistence: bool = False
	medgemma_terms_acknowledged: bool = False
	enforce_medgemma_terms_acknowledgement: bool = True

	# Audit Logging
	audit_logging_enabled: bool = False

	# Audio
	max_audio_duration_seconds: int = 300
	audio_sample_rate: int = 16000

	# Streaming Transcription
	streaming_interval_seconds: float = 2.0 # How often to run ASR on buffer (GPU: 2s, CPU: 4s)

	# Rate Limiting & Queue
	rate_limiting_enabled: bool = True
	rate_limit_general_rpm: int = 60 # General endpoints: requests per minute
	rate_limit_inference_rpm: int = 10 # Inference endpoints: requests per minute
	queue_max_concurrent_inferences: int = 2 # Max parallel model inference tasks
	queue_max_size: int = 20 # Max queued requests before rejecting
	queue_timeout_seconds: float = 120.0 # Max seconds a request waits in queue
	queue_estimated_inference_seconds: float = 10.0 # Default estimate before measurements

	# HIPAA Encryption at Rest
	encryption_at_rest_enabled: bool = False
	encryption_master_key: str = "CHANGE_ME_IN_PRODUCTION"
	encryption_kdf_iterations: int = 100000

	# Data Retention & Auto-Purge
	retention_sessions_days: int = 365 # Intake sessions retention (0 = keep forever)
	retention_audit_logs_days: int = 2555 # ~7 years (HIPAA requires min 6 years)
	auto_purge_enabled: bool = False
	auto_purge_interval_hours: int = 24 # How often auto-purge runs

	# Monitoring & Observability
	metrics_enabled: bool = True
	structured_logging_enabled: bool = True
	metrics_endpoint_auth_required: bool = False # /metrics endpoint — set True for production
	metrics_alert_window_seconds: int = 300 # Window for alert evaluation (5 min)
	metrics_error_rate_warning: float = 0.1 # 10% error rate triggers warning
	metrics_error_rate_critical: float = 0.25 # 25% error rate triggers critical
	metrics_latency_warning_seconds: float = 15.0
	metrics_latency_critical_seconds: float = 30.0

	# API
	api_host: str = "0.0.0.0"
	api_port: int = 8000
	api_reload: bool = False

	# RAG (Retrieval-Augmented Generation)
	rag_enabled: bool = False
	rag_embedding_model: str = "NeuML/pubmedbert-base-embeddings"
	rag_persist_dir: str = "./rag_store"
	rag_top_k: int = 3
	rag_similarity_threshold: float = 0.65 # Min cosine similarity to include a result
	rag_initial_retrieval_k: int = 20 # Candidates fetched before reranking
	rag_reranker_model: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
	rag_reranker_enabled: bool = True
	rag_chunking_enabled: bool = True # Split SOAP into per-section chunks

	# Knowledge Base (Phase 2)
	knowledge_base_enabled: bool = False
	knowledge_base_persist_dir: str = "./knowledge_store"
	knowledge_base_guidelines_top_k: int = 3
	knowledge_base_guidelines_threshold: float = 0.60
	icd10_lookup_enabled: bool = True # Semantic ICD-10 code matching
	icd10_top_k: int = 5 # Max ICD-10 suggestions per symptom
	icd10_similarity_threshold: float = 0.60
	drug_interaction_check_enabled: bool = True # Auto-check medication interactions

	# Multi-Tenancy & Isolation (Phase 3)
	multi_tenancy_enabled: bool = False
	default_organization_id: str = "default"
	default_provider_id: str = "system"

	# RAG Security (Phase 3)
	rag_audit_enabled: bool = True # Log every RAG retrieval for HIPAA
	rag_vector_store_encryption_enabled: bool = False # Encrypt vector store at rest

	# RAG Evaluation & Observability (Phase 4)
	rag_evaluation_enabled: bool = True
	rag_evaluation_persist_dir: str = "./rag_eval"
	rag_drift_detection_enabled: bool = True
	rag_drift_window_size: int = 50 # Embeddings to track per window
	rag_drift_threshold: float = 0.15 # Cosine distance shift triggering alert
	rag_hallucination_check_enabled: bool = True # Cross-ref generated text vs evidence

	# Voice Assistant & Conversation
	conversation_mode_enabled: bool = False
	tts_engine: str = "piper" # "piper" or "webspeech" (browser fallback)
	piper_model_path: str = "./models/piper/en_US-amy-medium.onnx"
	piper_config_path: str = "./models/piper/en_US-amy-medium.onnx.json"
	tts_sample_rate: int = 22050
	tts_max_text_length: int = 500
	conversation_max_turns: int = 20
	conversation_followup_rounds: int = 3
	conversation_streaming_interval: float = 0.5 # Faster ASR for conversation mode
	conversation_llm_model: str = "" # Empty = reuse medgemma_model
	conversation_llm_separate: bool = False # Load separate model for conversation

	# Phase 3: Voice Activity Detection
	vad_enabled: bool = True
	vad_threshold: float = 0.5 # Speech probability threshold (0-1)
	vad_min_silence_ms: int = 800 # Silence duration to trigger end-of-turn (ms)
	vad_min_speech_ms: int = 250 # Minimum speech duration to accept (ms)
	vad_window_size_ms: int = 32 # VAD analysis window (Silero uses 32ms chunks)

	# Phase 3: TTS Caching & Streaming
	tts_cache_greetings: bool = True # Pre-cache greeting audio at startup
	tts_streaming_enabled: bool = True # Send TTS sentence-by-sentence

	# Phase 3: Multi-Language
	conversation_auto_detect_language: bool = True
	conversation_default_language: str = "en"
	piper_voice_models: str = "" # JSON map: {"es": "./models/piper/es_ES-...", ...}

	# Phase 8: Infrastructure & Scalability
	database_url: str = "" # Empty = use SQLite default; set to postgresql+asyncpg://... for Postgres
	redis_url: str = "" # Empty = disabled; set to redis://localhost:6379/0
	redis_cache_ttl_seconds: int = 300 # Default cache TTL
	task_queue_enabled: bool = False # Enable Celery/ARQ background workers
	task_queue_broker_url: str = "" # e.g. redis://localhost:6379/1
	model_quantization_enabled: bool = False # Enable 4-bit/8-bit quantization
	model_quantization_bits: int = 4 # 4 or 8
	colab_mode: bool = False # Enable Colab-specific optimizations
	colab_ngrok_token: str = "" # Ngrok auth token for Colab tunneling

	# Phase 7: EHR Integration
	webhook_enabled: bool = False
	webhook_url: str = "" # Default webhook endpoint for session finalization
	webhook_auth_token: str = ""
	hl7v2_export_enabled: bool = True
	ccda_export_enabled: bool = True

	# Phase 5: Clinical Intelligence
	specialty_detection_enabled: bool = True
	default_specialty: str = "general" # general, emergency, primary_care, psychiatry, ob_gyn, pediatrics
	vitals_extraction_enabled: bool = True
	differential_diagnosis_enabled: bool = True
	ambient_mode_enabled: bool = False
	diarization_enabled: bool = False
	icd10_umls_mode: str = "semantic" # "semantic" (current) or "umls_linker" (requires scispacy linker)

	# Phase 4: Authentication & Security
	auth_enabled: bool = False # False = dev mode (current stub behavior)
	jwt_secret_key: str = "CHANGE_ME_IN_PRODUCTION"
	jwt_algorithm: str = "HS256"
	jwt_access_token_expire_minutes: int = 30
	jwt_refresh_token_expire_days: int = 7
	mfa_enabled: bool = False # TOTP MFA for provider/admin roles
	session_inactivity_timeout_minutes: int = 15 # Frontend inactivity timer
	consent_tracking_enabled: bool = True # Require verbal consent before intake
	cors_allowed_origins: str = "" # Comma-separated origins; "" for dev
	# Iframe parents allowed to embed the app (CSP frame-ancestors).
	# Default empty = X-Frame-Options: DENY. For HF Spaces canonical URL,
	# set to "https://huggingface.co".
	allow_iframe_embedding_origins: str = ""

	# OAuth2/OIDC SSO (Phase 1)
	oidc_enabled: bool = False # Enable OIDC login flow
	oidc_issuer_url: str = "" # e.g. https://accounts.google.com or https://login.microsoftonline.com/{tenant}/v2.0
	oidc_client_id: str = ""
	oidc_client_secret: str = ""
	oidc_redirect_uri: str = "" # e.g. https://your-app.com/api/auth/oidc/callback
	oidc_scopes: str = "openid email profile" # Space-separated scopes
	oidc_role_claim: str = "role" # OIDC claim that maps to UserRole
	oidc_default_role: str = "viewer" # Default role for new OIDC users

	# Phase 4: Multi-region / Data Residency
	data_region: str = "us-east-1" # Deployment region for PHI locality
	allowed_data_regions: str = "us-east-1,us-west-2,eu-west-1" # Comma-separated
	enforce_data_residency: bool = False # Reject cross-region data transfers
	region_encryption_key_arn: str = "" # AWS KMS ARN for region-specific encryption

	# Phase 4: vLLM Serving
	vllm_enabled: bool = False
	vllm_url: str = "http://localhost:8001"
	vllm_model: str = "google/medgemma-4b-it"

	# Phase 4: OpenTelemetry
	otel_enabled: bool = False
	otel_endpoint: str = "http://localhost:4317"

	# Phase 4: Wake Word
	picovoice_access_key: str = ""

	# Logging
	log_level: str = "INFO"

	class Config:
	env_file = ".env"
	case_sensitive = False


	# Global settings instance
	settings = Settings()


	# ---------------------------------------------------------------------------
	# Production-mode startup validation
	# ---------------------------------------------------------------------------

	_INSECURE_DEFAULTS = {"CHANGE_ME_IN_PRODUCTION", "", "changeme", "secret"}


	def validate_production_settings() -> None:
	"""Validate that security-critical settings are configured for production.

	Called during application startup. In production mode, insecure defaults
	cause a hard failure. In development mode, they emit warnings.
	"""
	is_prod = settings.deployment_mode == "production"
	issues: list[str] = []

	# --- Secrets must not be default values ---
	if settings.jwt_secret_key.lower() in _INSECURE_DEFAULTS:
	issues.append(
	"JWT_SECRET_KEY is set to an insecure default. "
	"Generate a strong random secret (e.g. `openssl rand -hex 32`)."
	)

	if settings.encryption_master_key.lower() in _INSECURE_DEFAULTS:
	issues.append(
	"ENCRYPTION_MASTER_KEY is set to an insecure default. "
	"Generate a strong random secret for HIPAA encryption at rest."
	)

	# --- Production requires security features enabled ---
	if is_prod:
	if not settings.auth_enabled:
	issues.append("AUTH_ENABLED must be True in production mode.")

	if not settings.encryption_at_rest_enabled:
	issues.append("ENCRYPTION_AT_REST_ENABLED must be True in production mode.")

	if not settings.audit_logging_enabled:
	issues.append("AUDIT_LOGGING_ENABLED must be True in production mode.")

	if settings.cors_allowed_origins.strip() == "*":
	issues.append(
	"CORS_ALLOWED_ORIGINS must not be '*' in production mode. "
	"Specify allowed origins explicitly."
	)

	if not settings.metrics_endpoint_auth_required:
	issues.append(
	"METRICS_ENDPOINT_AUTH_REQUIRED should be True in production "
	"to prevent information leakage via /metrics."
	)

	# --- Report ---
	if issues:
	header = (
	"FATAL: Production security validation failed"
	if is_prod
	else "WARNING: Insecure configuration detected (development mode)"
	)
	msg = f"\n{'=' * 60}\n{header}\n{'=' * 60}\n"
	for i, issue in enumerate(issues, 1):
	msg += f" {i}. {issue}\n"
	msg += "=" * 60

	if is_prod:
	# Hard-fail in production — do not start with insecure config
	print(msg, file=sys.stderr)
	sys.exit(1)
	else:
	logger.warning(msg)