File size: 7,318 Bytes
a6e70b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""

VentureForge Configuration

==========================

Pydantic v2 settings loaded from environment variables.

Single LLM_BASE_URL switches between OpenAI/OpenRouter/AMD vLLM.



Usage:

    from src.config import settings

    print(settings.llm_base_url)

"""

from functools import lru_cache

from pydantic import Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict


class Settings(BaseSettings):
    """All application settings loaded from .env or environment."""

    model_config = SettingsConfigDict(
        env_file=".env",
        env_file_encoding="utf-8",
        extra="ignore",
    )

    # ------------------------------------------------------------------
    # LLM Provider β€” REASONING tier (scorer, critic)
    # ------------------------------------------------------------------
    llm_base_url: str = Field(
        default="https://api.openai.com/v1",
        description="OpenAI-compatible API base URL for reasoning tasks",
    )
    llm_api_key: str = Field(
        default="",
        description="API key for the reasoning LLM provider",
    )
    llm_model: str = Field(
        default="gpt-4o-mini",
        description="Model name for reasoning tasks (scorer, critic)",
    )

    # ------------------------------------------------------------------
    # LLM Provider β€” FAST tier (pain_point_miner, idea_generator, pitch_writer)
    # Falls back to reasoning tier if unset.
    # ------------------------------------------------------------------
    fast_llm_base_url: str | None = Field(
        default=None,
        description="Base URL for fast/non-reasoning LLM. Defaults to llm_base_url.",
    )
    fast_llm_api_key: str | None = Field(
        default=None,
        description="API key for fast LLM. Defaults to llm_api_key.",
    )
    fast_llm_model: str | None = Field(
        default=None,
        description="Model name for fast tasks. Defaults to llm_model.",
    )

    # Optional: separate OpenRouter config (falls back to llm_* if unset)
    openrouter_api_key: str | None = Field(default=None)
    openrouter_base_url: str = Field(default="https://openrouter.ai/api/v1")

    # ------------------------------------------------------------------
    # LLM Generation Parameters (per-agent overrides possible)
    # ------------------------------------------------------------------
    default_temperature: float = Field(default=0.2, ge=0.0, le=2.0)
    max_tokens: int = Field(default=4096, ge=1)
    request_timeout: int = Field(default=120, ge=1)

    # ------------------------------------------------------------------
    # Reddit β€” NOT required.  We use public `.json` endpoints (no PRAW).
    # Only set these if you later want PRAW features.
    # ------------------------------------------------------------------
    reddit_client_id: str | None = Field(default=None)
    reddit_client_secret: str | None = Field(default=None)
    reddit_user_agent: str = Field(
        default="ventureforge:v1.0 by u/username",
        description="Optional PRAW user agent string",
    )

    # ------------------------------------------------------------------
    # Tavily β€” used for community-discovery fallback
    # ------------------------------------------------------------------
    tavily_api_key: str | None = Field(default=None)

    # ------------------------------------------------------------------
    # Product Hunt β€” for scraping product comments and feedback
    # Get key at: https://api.producthunt.com/v1/oauth/authorize
    # ------------------------------------------------------------------
    product_hunt_api_key: str | None = Field(default=None)

    # ------------------------------------------------------------------
    # YouTube Data API v3 β€” for scraping video comments
    # Get key at: https://console.cloud.google.com/apis/credentials
    # Free quota: 10,000 units/day
    # ------------------------------------------------------------------
    youtube_api_key: str | None = Field(default=None)

    # ------------------------------------------------------------------
    # HuggingFace (for AMD vLLM model download)
    # ------------------------------------------------------------------
    hf_token: str | None = Field(default=None)

    # ------------------------------------------------------------------
    # Pipeline Defaults
    # ------------------------------------------------------------------
    max_pain_points: int = Field(default=30, ge=5, le=100)
    # TEMPORARY: Reduced from 5 to 2 due to vLLM server max_tokens limit (~2048)
    # The server truncates responses at ~1600 tokens, so we generate fewer ideas per call
    # TODO: Increase back to 5 once server --max-tokens is increased to 16384
    ideas_per_run: int = Field(default=2, ge=1, le=20)
    top_n_pitches: int = Field(default=3, ge=1, le=10)
    max_revisions: int = Field(default=2, ge=0, le=5)
    lookback_days: int = Field(default=90, ge=7, le=365)

    # ------------------------------------------------------------------
    # Caching
    # ------------------------------------------------------------------
    cache_dir: str = Field(default=".cache")
    cache_ttl_hours: int = Field(default=24, ge=1)

    # ------------------------------------------------------------------
    # UI
    # ------------------------------------------------------------------
    gradio_port: int = Field(default=7860, ge=1024, le=65535)
    gradio_host: str = Field(default="0.0.0.0")

    # ------------------------------------------------------------------
    # Validators
    # ------------------------------------------------------------------
    @field_validator("reddit_user_agent", mode="after")
    @classmethod
    def _warn_default_agent(cls, v: str) -> str:
        if "username" in v:
            # Allow it but it's clearly a placeholder
            pass
        return v

    @property
    def tavily_enabled(self) -> bool:
        return bool(self.tavily_api_key)

    @property
    def product_hunt_enabled(self) -> bool:
        return bool(self.product_hunt_api_key)

    @property
    def effective_llm_config(self) -> dict:
        """Return the active LLM configuration as a dict."""
        return {
            "base_url": self.llm_base_url,
            "api_key": self.llm_api_key,
            "model": self.llm_model,
            "timeout": self.request_timeout,
        }

    def get_llm_config(self, *, reasoning: bool = False) -> dict:
        """Return LLM config for a given tier.



        reasoning=True  β†’ scorer, critic (large model)

        reasoning=False β†’ pain_point_miner, idea_generator, pitch_writer (fast model)

        """
        if reasoning:
            return self.effective_llm_config
        return {
            "base_url": self.fast_llm_base_url or self.llm_base_url,
            "api_key": self.fast_llm_api_key or self.llm_api_key,
            "model": self.fast_llm_model or self.llm_model,
            "timeout": self.request_timeout,
        }


@lru_cache
def get_settings() -> Settings:
    """Return cached settings instance."""
    return Settings()


settings = get_settings()