riazmo commited on
Commit
31d44fa
·
verified ·
1 Parent(s): 468fa3d

Delete config/settings.py

Browse files
Files changed (1) hide show
  1. config/settings.py +0 -226
config/settings.py DELETED
@@ -1,226 +0,0 @@
1
- """
2
- Application Settings
3
- Design System Extractor v2
4
-
5
- Loads configuration from environment variables and YAML files.
6
- """
7
-
8
- import os
9
- from pathlib import Path
10
- from typing import Optional
11
- from dataclasses import dataclass, field
12
- from dotenv import load_dotenv
13
- import yaml
14
-
15
- # Load environment variables from .env file
16
- env_path = Path(__file__).parent / ".env"
17
- if env_path.exists():
18
- load_dotenv(env_path)
19
- else:
20
- # Try loading from parent directory (for development)
21
- load_dotenv(Path(__file__).parent.parent / ".env")
22
-
23
-
24
- @dataclass
25
- class HFSettings:
26
- """Hugging Face configuration."""
27
- hf_token: str = field(default_factory=lambda: os.getenv("HF_TOKEN", ""))
28
- hf_space_name: str = field(default_factory=lambda: os.getenv("HF_SPACE_NAME", ""))
29
- use_inference_api: bool = field(default_factory=lambda: os.getenv("USE_HF_INFERENCE_API", "true").lower() == "true")
30
- inference_timeout: int = field(default_factory=lambda: int(os.getenv("HF_INFERENCE_TIMEOUT", "120")))
31
- max_new_tokens: int = field(default_factory=lambda: int(os.getenv("HF_MAX_NEW_TOKENS", "2048")))
32
- temperature: float = field(default_factory=lambda: float(os.getenv("HF_TEMPERATURE", "0.3")))
33
-
34
-
35
- @dataclass
36
- class ModelSettings:
37
- """Model configuration for each agent — Diverse providers."""
38
- # Agent 1: Rule-based, no LLM needed
39
-
40
- # Agent 2 (Normalizer): Fast structured output
41
- # Default: Microsoft Phi (fast, great structured output)
42
- agent2_model: str = field(default_factory=lambda: os.getenv("AGENT2_MODEL", "microsoft/Phi-3.5-mini-instruct"))
43
-
44
- # Agent 3 (Advisor): Strong reasoning - MOST IMPORTANT
45
- # Default: Qwen 2.5 72B (freely available on HF serverless, no gated access needed)
46
- # Alternative: meta-llama/Llama-3.1-70B-Instruct (requires Meta license acceptance)
47
- agent3_model: str = field(default_factory=lambda: os.getenv("AGENT3_MODEL", "Qwen/Qwen2.5-72B-Instruct"))
48
-
49
- # Agent 4 (Generator): Code/JSON specialist
50
- # Default: Mistral Codestral (code specialist)
51
- agent4_model: str = field(default_factory=lambda: os.getenv("AGENT4_MODEL", "mistralai/Codestral-22B-v0.1"))
52
-
53
- # Fallback (must be freely available on HF serverless inference)
54
- fallback_model: str = field(default_factory=lambda: os.getenv("FALLBACK_MODEL", "Qwen/Qwen2.5-7B-Instruct"))
55
-
56
-
57
- @dataclass
58
- class APISettings:
59
- """API key configuration (optional alternatives)."""
60
- anthropic_api_key: str = field(default_factory=lambda: os.getenv("ANTHROPIC_API_KEY", ""))
61
- openai_api_key: str = field(default_factory=lambda: os.getenv("OPENAI_API_KEY", ""))
62
-
63
-
64
- @dataclass
65
- class BrowserSettings:
66
- """Playwright browser configuration."""
67
- browser_type: str = field(default_factory=lambda: os.getenv("BROWSER_TYPE", "chromium"))
68
- headless: bool = field(default_factory=lambda: os.getenv("BROWSER_HEADLESS", "true").lower() == "true")
69
- timeout: int = field(default_factory=lambda: int(os.getenv("BROWSER_TIMEOUT", "30000")))
70
- network_idle_timeout: int = field(default_factory=lambda: int(os.getenv("NETWORK_IDLE_TIMEOUT", "5000")))
71
-
72
-
73
- @dataclass
74
- class CrawlSettings:
75
- """Website crawling configuration."""
76
- max_pages: int = field(default_factory=lambda: int(os.getenv("MAX_PAGES", "20")))
77
- min_pages: int = field(default_factory=lambda: int(os.getenv("MIN_PAGES", "10")))
78
- crawl_delay_ms: int = field(default_factory=lambda: int(os.getenv("CRAWL_DELAY_MS", "1000")))
79
- max_concurrent: int = field(default_factory=lambda: int(os.getenv("MAX_CONCURRENT_CRAWLS", "3")))
80
- respect_robots_txt: bool = field(default_factory=lambda: os.getenv("RESPECT_ROBOTS_TXT", "true").lower() == "true")
81
-
82
-
83
- @dataclass
84
- class ViewportSettings:
85
- """Viewport configuration for extraction."""
86
- desktop_width: int = 1440
87
- desktop_height: int = 900
88
- mobile_width: int = 375
89
- mobile_height: int = 812
90
-
91
-
92
- @dataclass
93
- class StorageSettings:
94
- """Persistent storage configuration."""
95
- storage_path: str = field(default_factory=lambda: os.getenv("STORAGE_PATH", "/data"))
96
- enable_persistence: bool = field(default_factory=lambda: os.getenv("ENABLE_PERSISTENCE", "true").lower() == "true")
97
- max_versions: int = field(default_factory=lambda: int(os.getenv("MAX_VERSIONS", "10")))
98
-
99
-
100
- @dataclass
101
- class UISettings:
102
- """UI configuration."""
103
- server_port: int = field(default_factory=lambda: int(os.getenv("SERVER_PORT", "7860")))
104
- share: bool = field(default_factory=lambda: os.getenv("SHARE", "false").lower() == "true")
105
- theme: str = field(default_factory=lambda: os.getenv("UI_THEME", "soft"))
106
-
107
-
108
- @dataclass
109
- class FeatureFlags:
110
- """Feature toggles."""
111
- color_ramps: bool = field(default_factory=lambda: os.getenv("FEATURE_COLOR_RAMPS", "true").lower() == "true")
112
- type_scales: bool = field(default_factory=lambda: os.getenv("FEATURE_TYPE_SCALES", "true").lower() == "true")
113
- a11y_checks: bool = field(default_factory=lambda: os.getenv("FEATURE_A11Y_CHECKS", "true").lower() == "true")
114
- parallel_extraction: bool = field(default_factory=lambda: os.getenv("FEATURE_PARALLEL_EXTRACTION", "true").lower() == "true")
115
-
116
-
117
- @dataclass
118
- class Settings:
119
- """Main settings container."""
120
- debug: bool = field(default_factory=lambda: os.getenv("DEBUG", "false").lower() == "true")
121
- log_level: str = field(default_factory=lambda: os.getenv("LOG_LEVEL", "INFO"))
122
-
123
- hf: HFSettings = field(default_factory=HFSettings)
124
- models: ModelSettings = field(default_factory=ModelSettings)
125
- api: APISettings = field(default_factory=APISettings)
126
- browser: BrowserSettings = field(default_factory=BrowserSettings)
127
- crawl: CrawlSettings = field(default_factory=CrawlSettings)
128
- viewport: ViewportSettings = field(default_factory=ViewportSettings)
129
- storage: StorageSettings = field(default_factory=StorageSettings)
130
- ui: UISettings = field(default_factory=UISettings)
131
- features: FeatureFlags = field(default_factory=FeatureFlags)
132
-
133
- # Agent configuration loaded from YAML
134
- agents_config: dict = field(default_factory=dict)
135
-
136
- def __post_init__(self):
137
- """Load agent configuration from YAML after initialization."""
138
- self.load_agents_config()
139
-
140
- def load_agents_config(self):
141
- """Load agent personas and settings from YAML file."""
142
- yaml_path = Path(__file__).parent / "agents.yaml"
143
- if yaml_path.exists():
144
- with open(yaml_path, "r") as f:
145
- self.agents_config = yaml.safe_load(f)
146
- else:
147
- print(f"Warning: agents.yaml not found at {yaml_path}")
148
- self.agents_config = {}
149
-
150
- def get_agent_persona(self, agent_name: str) -> str:
151
- """Get persona string for an agent."""
152
- agent_key = f"agent_{agent_name}"
153
- if agent_key in self.agents_config:
154
- return self.agents_config[agent_key].get("persona", "")
155
- return ""
156
-
157
- def get_agent_config(self, agent_name: str) -> dict:
158
- """Get full configuration for an agent."""
159
- agent_key = f"agent_{agent_name}"
160
- return self.agents_config.get(agent_key, {})
161
-
162
- def get_model_for_agent(self, agent_name: str) -> str:
163
- """Get the model ID for a specific agent."""
164
- model_map = {
165
- # Legacy agents
166
- "normalizer": self.models.agent2_model,
167
- "advisor": self.models.agent3_model,
168
- "generator": self.models.agent4_model,
169
-
170
- # Stage 2 New Architecture agents
171
- "brand_identifier": self.models.agent3_model, # Llama 70B - needs context understanding
172
- "benchmark_advisor": self.models.agent3_model, # Llama 70B - needs reasoning
173
- "best_practices_validator": self.models.agent3_model, # Llama 70B - needs judgment
174
- "head_synthesizer": self.models.agent3_model, # Llama 70B - needs synthesis
175
- "benchmark_extractor": self.models.agent2_model, # Phi-3.5 - structured extraction
176
- }
177
- return model_map.get(agent_name, self.models.fallback_model)
178
-
179
- def validate(self) -> list[str]:
180
- """Validate settings and return list of errors."""
181
- errors = []
182
-
183
- if not self.hf.hf_token:
184
- errors.append("HF_TOKEN is required for model inference")
185
-
186
- if self.crawl.max_pages < self.crawl.min_pages:
187
- errors.append("MAX_PAGES must be >= MIN_PAGES")
188
-
189
- return errors
190
-
191
-
192
- # Global settings instance
193
- settings = Settings()
194
-
195
-
196
- def get_settings() -> Settings:
197
- """Get the global settings instance."""
198
- return settings
199
-
200
-
201
- def reload_settings() -> Settings:
202
- """Reload settings from environment and config files."""
203
- global settings
204
- settings = Settings()
205
- return settings
206
-
207
-
208
- # Convenience functions
209
- def is_debug() -> bool:
210
- """Check if debug mode is enabled."""
211
- return settings.debug
212
-
213
-
214
- def get_hf_token() -> str:
215
- """Get HuggingFace token."""
216
- return settings.hf.hf_token
217
-
218
-
219
- def get_agent_persona(agent_name: str) -> str:
220
- """Get persona for an agent."""
221
- return settings.get_agent_persona(agent_name)
222
-
223
-
224
- def get_model_for_agent(agent_name: str) -> str:
225
- """Get model ID for an agent."""
226
- return settings.get_model_for_agent(agent_name)