File size: 5,698 Bytes
61d29fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Configuration settings for the Oral Health Policy Pulse system.
"""
from typing import List, Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """Application settings with environment variable support."""
    
    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        case_sensitive=False,
        extra="ignore"
    )
    
    # API Keys
    openai_api_key: Optional[str] = Field(None, description="OpenAI API key (optional for local mode)")
    anthropic_api_key: Optional[str] = Field(None, description="Anthropic API key")
    huggingface_token: Optional[str] = Field(None, description="HuggingFace API token for dataset uploads")
    census_api_key: Optional[str] = Field(None, description="U.S. Census Bureau API key (free, increases rate limit from 500 to 5000/day)")
    dataverse_api_key: Optional[str] = Field(None, description="Harvard Dataverse API key (optional, improves rate limits)")
    openstates_api_key: Optional[str] = Field(None, description="Open States API key (free tier: 50k requests/month)")
    google_civic_api_key: Optional[str] = Field(None, description="Google Civic Information API key (free tier: 25k requests/day)")
    
    # Paid services (for reference only - not recommended for free/OSS projects)
    ballotpedia_api_key: Optional[str] = Field(None, description="Ballotpedia API v3.0 key (PAID SERVICE - contact Ballotpedia)")
    cicero_api_key: Optional[str] = Field(None, description="Cicero API key (PAID SERVICE - enterprise pricing)")
    
    # HuggingFace Configuration
    hf_organization: Optional[str] = Field(None, description="HuggingFace organization name (e.g., 'CommunityOne')")
    hf_dataset_prefix: str = Field("open-navigator", description="Prefix for dataset names")
    
    # Databricks Configuration
    databricks_host: Optional[str] = Field(None, description="Databricks workspace URL")
    databricks_token: Optional[str] = Field(None, description="Databricks access token")
    databricks_warehouse_id: Optional[str] = Field(None, description="SQL warehouse ID")
    
    # Delta Lake Configuration
    # For local mode: use "data/delta" 
    # For Databricks: use "dbfs:/open-navigator"
    delta_lake_path: str = Field("data/delta", description="Delta Lake base path")
    catalog_name: str = Field("oral_health", description="Unity Catalog name")
    schema_name: str = Field("policy_analysis", description="Schema name")
    
    # MLflow Configuration (for Databricks Agent Bricks)
    mlflow_tracking_uri: str = Field("databricks", description="MLflow tracking URI")
    mlflow_experiment_name: str = Field("/Users/shared/oral-health-agents", description="MLflow experiment")
    mlflow_model_name_prefix: str = Field("oral_health", description="Model name prefix in Unity Catalog")
    
    # Agent LLM Configuration
    classifier_model: str = Field("gpt-4-turbo-preview", description="LLM model for classification")
    sentiment_model_llm: str = Field("gpt-3.5-turbo", description="LLM model for sentiment analysis")
    advocacy_model: str = Field("gpt-4-turbo-preview", description="LLM model for advocacy generation")
    
    # Agent Configuration
    max_concurrent_agents: int = Field(5, description="Maximum concurrent agent operations")
    scraper_timeout: int = Field(30, description="Scraper timeout in seconds")
    classifier_batch_size: int = Field(50, description="Batch size for classification")
    sentiment_model: str = Field(
        "distilbert-base-uncased-finetuned-sst-2-english",
        description="HuggingFace sentiment model"
    )
    
    # Data Sources (these are FREE public data - no API keys needed)
    municode_api_key: Optional[str] = Field(None, description="Municode API key (not required - public data)")
    legistar_api_key: Optional[str] = Field(None, description="Legistar API key (not required - public data)")
    
    # Logging
    log_level: str = Field("INFO", description="Logging level")
    log_file: str = Field("logs/open-navigator.log", description="Log file path")
    
    # API Configuration
    api_host: str = Field("0.0.0.0", description="API host")
    api_port: int = Field(8000, description="API port")
    api_workers: int = Field(4, description="Number of API workers")
    
    # Vector Database
    qdrant_host: str = Field("localhost", description="Qdrant host")
    qdrant_port: int = Field(6333, description="Qdrant port")
    qdrant_collection: str = Field("policy_minutes", description="Qdrant collection name")
    
    # Email Configuration
    smtp_host: str = Field("smtp.gmail.com", description="SMTP host")
    smtp_port: int = Field(587, description="SMTP port")
    smtp_user: Optional[str] = Field(None, description="SMTP username")
    smtp_password: Optional[str] = Field(None, description="SMTP password")
    
    # Policy Topics of Interest
    policy_topics: List[str] = Field(
        default=[
            "water fluoridation",
            "fluoride",
            "school dental screening",
            "dental care funding",
            "medicaid dental",
            "children's dental health",
            "oral health",
            "dental clinic",
            "community dental"
        ],
        description="Topics to monitor"
    )
    
    # Geographic Configuration
    target_states: Optional[List[str]] = Field(
        None,
        description="Specific states to monitor (None = all states)"
    )
    
    min_population_threshold: int = Field(
        10000,
        description="Minimum city population to include"
    )


# Global settings instance
settings = Settings()