riazmo commited on
Commit
9f73abe
·
verified ·
1 Parent(s): 31d44fa

Upload settings.py

Browse files
Files changed (1) hide show
  1. config/settings.py +226 -0
config/settings.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Application Settings
3
+ Design System Extractor v2
4
+
5
+ Loads configuration from environment variables and YAML files.
6
+ """
7
+
8
+ import os
9
+ from pathlib import Path
10
+ from typing import Optional
11
+ from dataclasses import dataclass, field
12
+ from dotenv import load_dotenv
13
+ import yaml
14
+
15
+ # Load environment variables from .env file
16
+ env_path = Path(__file__).parent / ".env"
17
+ if env_path.exists():
18
+ load_dotenv(env_path)
19
+ else:
20
+ # Try loading from parent directory (for development)
21
+ load_dotenv(Path(__file__).parent.parent / ".env")
22
+
23
+
24
+ @dataclass
25
+ class HFSettings:
26
+ """Hugging Face configuration."""
27
+ hf_token: str = field(default_factory=lambda: os.getenv("HF_TOKEN", ""))
28
+ hf_space_name: str = field(default_factory=lambda: os.getenv("HF_SPACE_NAME", ""))
29
+ use_inference_api: bool = field(default_factory=lambda: os.getenv("USE_HF_INFERENCE_API", "true").lower() == "true")
30
+ inference_timeout: int = field(default_factory=lambda: int(os.getenv("HF_INFERENCE_TIMEOUT", "120")))
31
+ max_new_tokens: int = field(default_factory=lambda: int(os.getenv("HF_MAX_NEW_TOKENS", "2048")))
32
+ temperature: float = field(default_factory=lambda: float(os.getenv("HF_TEMPERATURE", "0.3")))
33
+
34
+
35
+ @dataclass
36
+ class ModelSettings:
37
+ """Model configuration for each agent — Diverse providers."""
38
+ # Agent 1: Rule-based, no LLM needed
39
+
40
+ # Agent 2 (Normalizer): Fast structured output
41
+ # Default: Microsoft Phi (fast, great structured output)
42
+ agent2_model: str = field(default_factory=lambda: os.getenv("AGENT2_MODEL", "microsoft/Phi-3.5-mini-instruct"))
43
+
44
+ # Agent 3 (Advisor): Strong reasoning - MOST IMPORTANT
45
+ # Default: Qwen 2.5 72B (freely available on HF serverless, no gated access needed)
46
+ # Alternative: meta-llama/Llama-3.1-70B-Instruct (requires Meta license acceptance)
47
+ agent3_model: str = field(default_factory=lambda: os.getenv("AGENT3_MODEL", "Qwen/Qwen2.5-72B-Instruct"))
48
+
49
+ # Agent 4 (Generator): Code/JSON specialist
50
+ # Default: Mistral Codestral (code specialist)
51
+ agent4_model: str = field(default_factory=lambda: os.getenv("AGENT4_MODEL", "mistralai/Codestral-22B-v0.1"))
52
+
53
+ # Fallback (must be freely available on HF serverless inference)
54
+ fallback_model: str = field(default_factory=lambda: os.getenv("FALLBACK_MODEL", "Qwen/Qwen2.5-7B-Instruct"))
55
+
56
+
57
+ @dataclass
58
+ class APISettings:
59
+ """API key configuration (optional alternatives)."""
60
+ anthropic_api_key: str = field(default_factory=lambda: os.getenv("ANTHROPIC_API_KEY", ""))
61
+ openai_api_key: str = field(default_factory=lambda: os.getenv("OPENAI_API_KEY", ""))
62
+
63
+
64
+ @dataclass
65
+ class BrowserSettings:
66
+ """Playwright browser configuration."""
67
+ browser_type: str = field(default_factory=lambda: os.getenv("BROWSER_TYPE", "chromium"))
68
+ headless: bool = field(default_factory=lambda: os.getenv("BROWSER_HEADLESS", "true").lower() == "true")
69
+ timeout: int = field(default_factory=lambda: int(os.getenv("BROWSER_TIMEOUT", "30000")))
70
+ network_idle_timeout: int = field(default_factory=lambda: int(os.getenv("NETWORK_IDLE_TIMEOUT", "5000")))
71
+
72
+
73
+ @dataclass
74
+ class CrawlSettings:
75
+ """Website crawling configuration."""
76
+ max_pages: int = field(default_factory=lambda: int(os.getenv("MAX_PAGES", "20")))
77
+ min_pages: int = field(default_factory=lambda: int(os.getenv("MIN_PAGES", "10")))
78
+ crawl_delay_ms: int = field(default_factory=lambda: int(os.getenv("CRAWL_DELAY_MS", "1000")))
79
+ max_concurrent: int = field(default_factory=lambda: int(os.getenv("MAX_CONCURRENT_CRAWLS", "3")))
80
+ respect_robots_txt: bool = field(default_factory=lambda: os.getenv("RESPECT_ROBOTS_TXT", "true").lower() == "true")
81
+
82
+
83
+ @dataclass
84
+ class ViewportSettings:
85
+ """Viewport configuration for extraction."""
86
+ desktop_width: int = 1440
87
+ desktop_height: int = 900
88
+ mobile_width: int = 375
89
+ mobile_height: int = 812
90
+
91
+
92
+ @dataclass
93
+ class StorageSettings:
94
+ """Persistent storage configuration."""
95
+ storage_path: str = field(default_factory=lambda: os.getenv("STORAGE_PATH", "/data"))
96
+ enable_persistence: bool = field(default_factory=lambda: os.getenv("ENABLE_PERSISTENCE", "true").lower() == "true")
97
+ max_versions: int = field(default_factory=lambda: int(os.getenv("MAX_VERSIONS", "10")))
98
+
99
+
100
+ @dataclass
101
+ class UISettings:
102
+ """UI configuration."""
103
+ server_port: int = field(default_factory=lambda: int(os.getenv("SERVER_PORT", "7860")))
104
+ share: bool = field(default_factory=lambda: os.getenv("SHARE", "false").lower() == "true")
105
+ theme: str = field(default_factory=lambda: os.getenv("UI_THEME", "soft"))
106
+
107
+
108
+ @dataclass
109
+ class FeatureFlags:
110
+ """Feature toggles."""
111
+ color_ramps: bool = field(default_factory=lambda: os.getenv("FEATURE_COLOR_RAMPS", "true").lower() == "true")
112
+ type_scales: bool = field(default_factory=lambda: os.getenv("FEATURE_TYPE_SCALES", "true").lower() == "true")
113
+ a11y_checks: bool = field(default_factory=lambda: os.getenv("FEATURE_A11Y_CHECKS", "true").lower() == "true")
114
+ parallel_extraction: bool = field(default_factory=lambda: os.getenv("FEATURE_PARALLEL_EXTRACTION", "true").lower() == "true")
115
+
116
+
117
+ @dataclass
118
+ class Settings:
119
+ """Main settings container."""
120
+ debug: bool = field(default_factory=lambda: os.getenv("DEBUG", "false").lower() == "true")
121
+ log_level: str = field(default_factory=lambda: os.getenv("LOG_LEVEL", "INFO"))
122
+
123
+ hf: HFSettings = field(default_factory=HFSettings)
124
+ models: ModelSettings = field(default_factory=ModelSettings)
125
+ api: APISettings = field(default_factory=APISettings)
126
+ browser: BrowserSettings = field(default_factory=BrowserSettings)
127
+ crawl: CrawlSettings = field(default_factory=CrawlSettings)
128
+ viewport: ViewportSettings = field(default_factory=ViewportSettings)
129
+ storage: StorageSettings = field(default_factory=StorageSettings)
130
+ ui: UISettings = field(default_factory=UISettings)
131
+ features: FeatureFlags = field(default_factory=FeatureFlags)
132
+
133
+ # Agent configuration loaded from YAML
134
+ agents_config: dict = field(default_factory=dict)
135
+
136
+ def __post_init__(self):
137
+ """Load agent configuration from YAML after initialization."""
138
+ self.load_agents_config()
139
+
140
+ def load_agents_config(self):
141
+ """Load agent personas and settings from YAML file."""
142
+ yaml_path = Path(__file__).parent / "agents.yaml"
143
+ if yaml_path.exists():
144
+ with open(yaml_path, "r") as f:
145
+ self.agents_config = yaml.safe_load(f)
146
+ else:
147
+ print(f"Warning: agents.yaml not found at {yaml_path}")
148
+ self.agents_config = {}
149
+
150
+ def get_agent_persona(self, agent_name: str) -> str:
151
+ """Get persona string for an agent."""
152
+ agent_key = f"agent_{agent_name}"
153
+ if agent_key in self.agents_config:
154
+ return self.agents_config[agent_key].get("persona", "")
155
+ return ""
156
+
157
+ def get_agent_config(self, agent_name: str) -> dict:
158
+ """Get full configuration for an agent."""
159
+ agent_key = f"agent_{agent_name}"
160
+ return self.agents_config.get(agent_key, {})
161
+
162
+ def get_model_for_agent(self, agent_name: str) -> str:
163
+ """Get the model ID for a specific agent."""
164
+ model_map = {
165
+ # Legacy agents
166
+ "normalizer": self.models.agent2_model,
167
+ "advisor": self.models.agent3_model,
168
+ "generator": self.models.agent4_model,
169
+
170
+ # Stage 2 New Architecture agents
171
+ "brand_identifier": self.models.agent3_model, # Llama 70B - needs context understanding
172
+ "benchmark_advisor": self.models.agent3_model, # Llama 70B - needs reasoning
173
+ "best_practices_validator": self.models.agent3_model, # Llama 70B - needs judgment
174
+ "head_synthesizer": self.models.agent3_model, # Llama 70B - needs synthesis
175
+ "benchmark_extractor": self.models.agent2_model, # Phi-3.5 - structured extraction
176
+ }
177
+ return model_map.get(agent_name, self.models.fallback_model)
178
+
179
+ def validate(self) -> list[str]:
180
+ """Validate settings and return list of errors."""
181
+ errors = []
182
+
183
+ if not self.hf.hf_token:
184
+ errors.append("HF_TOKEN is required for model inference")
185
+
186
+ if self.crawl.max_pages < self.crawl.min_pages:
187
+ errors.append("MAX_PAGES must be >= MIN_PAGES")
188
+
189
+ return errors
190
+
191
+
192
+ # Global settings instance
193
+ settings = Settings()
194
+
195
+
196
+ def get_settings() -> Settings:
197
+ """Get the global settings instance."""
198
+ return settings
199
+
200
+
201
+ def reload_settings() -> Settings:
202
+ """Reload settings from environment and config files."""
203
+ global settings
204
+ settings = Settings()
205
+ return settings
206
+
207
+
208
+ # Convenience functions
209
+ def is_debug() -> bool:
210
+ """Check if debug mode is enabled."""
211
+ return settings.debug
212
+
213
+
214
+ def get_hf_token() -> str:
215
+ """Get HuggingFace token."""
216
+ return settings.hf.hf_token
217
+
218
+
219
+ def get_agent_persona(agent_name: str) -> str:
220
+ """Get persona for an agent."""
221
+ return settings.get_agent_persona(agent_name)
222
+
223
+
224
+ def get_model_for_agent(agent_name: str) -> str:
225
+ """Get model ID for an agent."""
226
+ return settings.get_model_for_agent(agent_name)