open-navigator / config /settings.py
jcbowyer's picture
Clean HuggingFace deployment without binary files
61d29fc
"""
Configuration settings for the Oral Health Policy Pulse system.
"""
from typing import List, Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""Application settings with environment variable support."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=False,
extra="ignore"
)
# API Keys
openai_api_key: Optional[str] = Field(None, description="OpenAI API key (optional for local mode)")
anthropic_api_key: Optional[str] = Field(None, description="Anthropic API key")
huggingface_token: Optional[str] = Field(None, description="HuggingFace API token for dataset uploads")
census_api_key: Optional[str] = Field(None, description="U.S. Census Bureau API key (free, increases rate limit from 500 to 5000/day)")
dataverse_api_key: Optional[str] = Field(None, description="Harvard Dataverse API key (optional, improves rate limits)")
openstates_api_key: Optional[str] = Field(None, description="Open States API key (free tier: 50k requests/month)")
google_civic_api_key: Optional[str] = Field(None, description="Google Civic Information API key (free tier: 25k requests/day)")
# Paid services (for reference only - not recommended for free/OSS projects)
ballotpedia_api_key: Optional[str] = Field(None, description="Ballotpedia API v3.0 key (PAID SERVICE - contact Ballotpedia)")
cicero_api_key: Optional[str] = Field(None, description="Cicero API key (PAID SERVICE - enterprise pricing)")
# HuggingFace Configuration
hf_organization: Optional[str] = Field(None, description="HuggingFace organization name (e.g., 'CommunityOne')")
hf_dataset_prefix: str = Field("open-navigator", description="Prefix for dataset names")
# Databricks Configuration
databricks_host: Optional[str] = Field(None, description="Databricks workspace URL")
databricks_token: Optional[str] = Field(None, description="Databricks access token")
databricks_warehouse_id: Optional[str] = Field(None, description="SQL warehouse ID")
# Delta Lake Configuration
# For local mode: use "data/delta"
# For Databricks: use "dbfs:/open-navigator"
delta_lake_path: str = Field("data/delta", description="Delta Lake base path")
catalog_name: str = Field("oral_health", description="Unity Catalog name")
schema_name: str = Field("policy_analysis", description="Schema name")
# MLflow Configuration (for Databricks Agent Bricks)
mlflow_tracking_uri: str = Field("databricks", description="MLflow tracking URI")
mlflow_experiment_name: str = Field("/Users/shared/oral-health-agents", description="MLflow experiment")
mlflow_model_name_prefix: str = Field("oral_health", description="Model name prefix in Unity Catalog")
# Agent LLM Configuration
classifier_model: str = Field("gpt-4-turbo-preview", description="LLM model for classification")
sentiment_model_llm: str = Field("gpt-3.5-turbo", description="LLM model for sentiment analysis")
advocacy_model: str = Field("gpt-4-turbo-preview", description="LLM model for advocacy generation")
# Agent Configuration
max_concurrent_agents: int = Field(5, description="Maximum concurrent agent operations")
scraper_timeout: int = Field(30, description="Scraper timeout in seconds")
classifier_batch_size: int = Field(50, description="Batch size for classification")
sentiment_model: str = Field(
"distilbert-base-uncased-finetuned-sst-2-english",
description="HuggingFace sentiment model"
)
# Data Sources (these are FREE public data - no API keys needed)
municode_api_key: Optional[str] = Field(None, description="Municode API key (not required - public data)")
legistar_api_key: Optional[str] = Field(None, description="Legistar API key (not required - public data)")
# Logging
log_level: str = Field("INFO", description="Logging level")
log_file: str = Field("logs/open-navigator.log", description="Log file path")
# API Configuration
api_host: str = Field("0.0.0.0", description="API host")
api_port: int = Field(8000, description="API port")
api_workers: int = Field(4, description="Number of API workers")
# Vector Database
qdrant_host: str = Field("localhost", description="Qdrant host")
qdrant_port: int = Field(6333, description="Qdrant port")
qdrant_collection: str = Field("policy_minutes", description="Qdrant collection name")
# Email Configuration
smtp_host: str = Field("smtp.gmail.com", description="SMTP host")
smtp_port: int = Field(587, description="SMTP port")
smtp_user: Optional[str] = Field(None, description="SMTP username")
smtp_password: Optional[str] = Field(None, description="SMTP password")
# Policy Topics of Interest
policy_topics: List[str] = Field(
default=[
"water fluoridation",
"fluoride",
"school dental screening",
"dental care funding",
"medicaid dental",
"children's dental health",
"oral health",
"dental clinic",
"community dental"
],
description="Topics to monitor"
)
# Geographic Configuration
target_states: Optional[List[str]] = Field(
None,
description="Specific states to monitor (None = all states)"
)
min_population_threshold: int = Field(
10000,
description="Minimum city population to include"
)
# Global settings instance
settings = Settings()