Spaces:
Running
Running
File size: 10,819 Bytes
f64b002 cd8f75a f64b002 cd8f75a f64b002 cd8f75a f64b002 cd8f75a 8c122be 89ed119 f64b002 5e59c7d aa191f1 f64b002 269f729 cd8f75a 269f729 b3b36f7 aa0e151 f64b002 b3b36f7 f64b002 8a34489 1e6ab4d aa191f1 3ef755a b020cb9 3ef755a b020cb9 aa191f1 3ef755a b020cb9 1e6ab4d 3ef755a 1e6ab4d f1e4efb 8a34489 bab1273 5e59c7d bab1273 cd8f75a 1e6ab4d aa191f1 4993f5e 8d66265 b3b36f7 e57e9d1 b3b36f7 cd8f75a f64b002 cd8f75a f64b002 1e6ab4d aa191f1 1e6ab4d aa191f1 1e6ab4d aa191f1 1e6ab4d f64b002 cd8f75a 3cb20f2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 | """
Configuration management using pydantic-settings.
All settings are loaded from environment variables.
"""
import hashlib
import json
import logging
from functools import lru_cache
from pathlib import Path
from typing import Optional
from pydantic_settings import BaseSettings, SettingsConfigDict
logger = logging.getLogger(__name__)
class Settings(BaseSettings):
"""Application settings loaded from environment variables."""
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore"
)
# Database
database_url: str = "sqlite:////data/app.db"
# Pipeline lock & model storage
pipeline_lock_file: str = "/data/pipeline.lock"
model_dir: str = "/data/models"
# News sources
newsapi_key: Optional[str] = None
news_query: str = "copper OR copper price OR copper futures OR copper mining"
news_language: str = "en"
# Symbol set configuration
symbol_set: str = "active" # active | champion | challenger
# Price data (yfinance) - Dashboard symbols (backward compatible)
yfinance_symbols: str = "HG=F,DX-Y.NYB,CL=F,FXI,COPX,COPJ,BHP,FCX,SCCO,RIO,TECK,LUN.TO,IVN.TO,2899.HK"
lookback_days: int = 730 # 2 years for better pattern learning
# Fuzzy deduplication
fuzzy_dedup_threshold: int = 85
fuzzy_dedup_window_hours: int = 48
# Sentiment aggregation
sentiment_tau_hours: float = 12.0
sentiment_missing_fill: float = 0.0
sentiment_non_neutral_boost: float = 1.35
sentiment_soft_neutral_polarity_threshold: float = 0.12
sentiment_soft_neutral_max_mag: float = 0.25
sentiment_soft_neutral_scale: float = 0.8
sentiment_relevance_min: float = 0.35
sentiment_escalate_conflict_threshold: float = 0.55
sentiment_horizon_days: int = 5
scoring_source: str = "news_processed"
# API settings
analysis_ttl_minutes: int = 30
log_level: str = "INFO"
# Futures vs Spot adjustment factor
futures_spot_adjustment: float = 0.985
# Scheduler (DEPRECATED in API - external scheduler only)
# These are kept for backward compatibility but scheduler no longer runs in API
schedule_time: str = "02:00"
tz: str = "Europe/Istanbul"
scheduler_enabled: bool = False # Default to False - scheduler is external now
# Redis Queue (for worker)
redis_url: str = "redis://localhost:6379/0"
# OpenRouter AI Commentary
openrouter_api_key: Optional[str] = None
# Deprecated - kept for backward compatibility
openrouter_model: str = "arcee-ai/trinity-large-preview:free"
# Scoring models:
# fast → stepfun/step-3.5-flash:free (196B MoE, 256K ctx, system prompt + JSON OK)
# reliable → mistralai/mistral-small-3.1-24b-instruct:free (128K ctx, 24B, reliable JSON)
# commentary → same as fast for balanced quality/speed
# NOTE: google/gemma-3-4b-it:free fails on Google AI Studio (system prompt blocked).
# google/gemma-3n-e4b-it:free (nano) also blocks system prompts — do NOT use.
openrouter_model_scoring: str = "stepfun/step-3.5-flash:free"
openrouter_model_scoring_fast: Optional[str] = None
openrouter_model_scoring_reliable: Optional[str] = "mistralai/mistral-small-3.1-24b-instruct:free"
openrouter_model_commentary: str = "stepfun/step-3.5-flash:free"
openrouter_rpm: int = 18
openrouter_max_retries: int = 3
# Free tier: 50 req/day. At 12 articles/chunk, 100 articles = ~9 chunks = ~9-18 req.
# Keep well under the daily limit to avoid rate-limit cascades mid-run.
max_llm_articles_per_run: int = 100
openrouter_fallback_models: Optional[str] = None
tokenizers_parallelism: str = "false"
# Twelve Data (Live Price)
twelvedata_api_key: Optional[str] = None
# Inference sentiment adjustment (aggressive but capped)
inference_sentiment_multiplier_max: float = 2.0
inference_sentiment_multiplier_min: float = 0.5
inference_sentiment_news_ref: int = 30
inference_sentiment_power_ref: float = 0.20
inference_tiny_signal_threshold: float = 0.0015
inference_tiny_signal_floor: float = 0.0025
inference_return_cap: float = 0.02
# LLM Sentiment Analysis
# Deprecated - kept for backward compatibility
llm_sentiment_model: str = "arcee-ai/trinity-large-preview:free"
# Pipeline trigger authentication
pipeline_trigger_secret: Optional[str] = None
# Faz 2: Market cut-off for news aggregation
market_timezone: str = "America/New_York" # NYSE timezone
market_close_time: str = "16:00" # 4 PM ET
cutoff_buffer_minutes: int = 30 # Allow 30 min after close for late news
# TFT-ASRO Deep Learning
tft_enabled: bool = True
tft_embedding_batch_size: int = 64
tft_pca_dim: int = 32
tft_embedding_backfill_days: int = 30
tft_train_on_pipeline: bool = False
nasdaq_data_link_api_key: Optional[str] = None
def _load_symbol_set_file(self, set_name: str) -> Optional[dict]:
"""Load symbol set from JSON file. Returns None on error."""
try:
# Path relative to backend root
backend_root = Path(__file__).resolve().parent.parent
symbol_file = backend_root / "config" / "symbol_sets" / f"{set_name}.json"
if not symbol_file.exists():
logger.warning(f"Symbol set file not found: {symbol_file}")
return None
with open(symbol_file) as f:
data = json.load(f)
symbols = data.get("symbols", [])
if not symbols:
logger.warning(f"Symbol set {set_name} has empty symbols list")
return None
return data
except Exception as e:
logger.error(f"Error loading symbol set {set_name}: {e}")
return None
def _compute_symbols_hash(self, symbols: list[str]) -> str:
"""Compute deterministic hash of symbol list."""
canonical = json.dumps(sorted(symbols), sort_keys=True)
return f"sha256:{hashlib.sha256(canonical.encode()).hexdigest()[:16]}"
@property
def training_symbols(self) -> list[str]:
"""
Symbols for ML training - loaded from symbol set file.
Falls back to dashboard symbols on error.
"""
data = self._load_symbol_set_file(self.symbol_set)
if data:
symbols = data.get("symbols", [])
logger.info(f"Loaded training symbols from file: {self.symbol_set}.json ({len(symbols)}) hash={self._compute_symbols_hash(symbols)}")
return symbols
# Fallback to env variable
logger.warning(f"Falling back to YFINANCE_SYMBOLS for training")
return self.symbols_list
@property
def training_symbols_source(self) -> str:
"""Source of training symbols for audit."""
data = self._load_symbol_set_file(self.symbol_set)
if data:
return f"file:{self.symbol_set}.json"
return "env:YFINANCE_SYMBOLS"
@property
def training_symbols_hash(self) -> str:
"""Hash of training symbols for audit."""
return self._compute_symbols_hash(self.training_symbols)
@property
def symbols_list(self) -> list[str]:
"""
Dashboard symbols - backward compatible with frontend.
Always uses env variable (14 symbols).
"""
return [s.strip() for s in self.yfinance_symbols.split(",") if s.strip()]
@property
def target_symbol(self) -> str:
"""Primary symbol for predictions (first in list)."""
symbols = self.symbols_list
return symbols[0] if symbols else "HG=F"
@staticmethod
def _first_non_empty(*values: Optional[str]) -> Optional[str]:
"""Return first non-empty string value."""
for value in values:
if value and value.strip():
return value.strip()
return None
@property
def resolved_scoring_model(self) -> str:
"""Preferred scoring model with backward-compatible fallback chain."""
return (
self._first_non_empty(
self.openrouter_model_scoring_fast,
self.openrouter_model_scoring,
self.llm_sentiment_model,
self.openrouter_model,
)
or "arcee-ai/trinity-large-preview:free"
)
@property
def resolved_scoring_fast_model(self) -> str:
"""Fast model used for primary sentiment scoring."""
return self.resolved_scoring_model
@property
def resolved_scoring_reliable_model(self) -> str:
"""Reliable model used for escalation/retry on malformed outputs."""
return (
self._first_non_empty(
self.openrouter_model_scoring_reliable,
self.openrouter_model,
self.llm_sentiment_model,
self.openrouter_model_scoring,
)
or "arcee-ai/trinity-large-preview:free"
)
@property
def resolved_commentary_model(self) -> str:
"""Preferred commentary model with backward-compatible fallback chain."""
return (
self._first_non_empty(
self.openrouter_model_commentary,
self.openrouter_model,
self.llm_sentiment_model,
)
or "arcee-ai/trinity-large-preview:free"
)
@property
def openrouter_fallback_models_list(self) -> list[str]:
"""
Parse comma-separated fallback models.
Empty/whitespace items are ignored.
"""
if not self.openrouter_fallback_models:
return []
return [m.strip() for m in self.openrouter_fallback_models.split(",") if m.strip()]
@lru_cache
def get_settings() -> Settings:
"""Get cached settings instance."""
return Settings()
def mask_api_key(text: str, settings: Settings = None) -> str:
"""
Mask API keys in text to prevent leaking in logs.
Replaces known API key patterns with masked versions.
"""
import re
if settings is None:
settings = get_settings()
result = text
# Mask known API keys
keys_to_mask = [
settings.twelvedata_api_key,
settings.openrouter_api_key,
settings.newsapi_key,
settings.pipeline_trigger_secret,
]
for key in keys_to_mask:
if key and len(key) > 8:
masked = f"{key[:4]}...{key[-4:]}"
result = result.replace(key, masked)
# Also mask any apikey= query params
result = re.sub(r'apikey=[a-zA-Z0-9_-]+', 'apikey=***MASKED***', result)
return result
|