Spaces:

ifieryarrows
/

copper-mind

Running

App Files Files Community

copper-mind / deep_learning /config.py

ifieryarrows

Sync from GitHub (tests passed)

3fd2ce8 verified 10 days ago

raw

history blame contribute delete

6.11 kB

	"""
	Central configuration for the TFT-ASRO deep learning pipeline.

	All hyperparameters, feature dimensions, and training settings live here
	so every module draws from a single source of truth.

	Model paths honour the MODEL_DIR environment variable so they work both
	locally (``data/models``) and inside the HF Space container
	(``/data/models``).
	"""

	from __future__ import annotations

	import os
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Optional


	def _model_dir() -> str:
	"""Resolve the base model directory from env (same as app.settings)."""
	return os.environ.get("MODEL_DIR", "/data/models")


	@dataclass(frozen=True)
	class EmbeddingConfig:
	model_name: str = "ProsusAI/finbert"
	full_dim: int = 768
	pca_dim: int = 32
	max_token_length: int = 512
	batch_size: int = 64
	pca_model_path: str = ""


	@dataclass(frozen=True)
	class SentimentFeatureConfig:
	momentum_windows: tuple[int, ...] = (5, 10, 30)
	surprise_lookback: int = 30
	surprise_threshold: float = 2.0
	event_types: tuple[str, ...] = (
	"supply_disruption",
	"supply_expansion",
	"demand_increase",
	"demand_decrease",
	"inventory_draw",
	"inventory_build",
	"policy_support",
	"policy_drag",
	"macro_usd_up",
	"macro_usd_down",
	"cost_push",
	)


	@dataclass(frozen=True)
	class LMEConfig:
	nasdaq_api_key_env: str = "NASDAQ_DATA_LINK_API_KEY"
	quandl_dataset: str = "LME/PR_CU"
	stock_change_windows: tuple[int, ...] = (1, 5, 10, 20)
	depletion_window: int = 20
	futures_symbols: tuple[str, ...] = ("HG=F",)
	futures_months_ahead: tuple[int, ...] = (3, 6, 12)
	max_ffill_days: int = 5


	@dataclass(frozen=True)
	class TFTModelConfig:
	max_encoder_length: int = 60
	max_prediction_length: int = 5
	# hidden_size 64→32: VSN encoder had 3.2M params for only 313 training
	# samples (344 features × hidden_size × hidden_continuous_size).
	# Reducing halves the dominant layer while keeping expressiveness.
	hidden_size: int = 32
	# attention_head_size 4→2: fewer heads for a small, single-series dataset.
	attention_head_size: int = 2
	# dropout 0.1→0.3: 313 samples / ~900K params still demands heavy regularisation.
	dropout: float = 0.3
	hidden_continuous_size: int = 16 # was 32; paired reduction with hidden_size
	quantiles: tuple[float, ...] = (0.02, 0.10, 0.25, 0.50, 0.75, 0.90, 0.98)
	# lr 1e-3→3e-4: smaller batches produce noisier gradients; conservative LR
	# reduces the risk of overshooting the narrow-loss landscape.
	learning_rate: float = 3e-4
	reduce_on_plateau_patience: int = 4
	# clip 0.5→1.0: tanh-based Sharpe gradients are inherently bounded;
	# relaxing the clip lets the model escape flat regions more aggressively.
	gradient_clip_val: float = 1.0


	@dataclass(frozen=True)
	class ASROConfig:
	# Total loss = lambda_quantile * calibration + (1-lambda_quantile) * sharpe
	#
	# lambda_quantile is the EXPLICIT weight of the quantile calibration bundle:
	# calibration = q_loss + lambda_vol * vol_loss
	# w_sharpe = 1 - lambda_quantile (the complementary directional weight)
	#
	# This normalised (sum-to-1) formulation makes both components interpretable
	# and prevents either from silently dominating across loss-magnitude regimes.
	#
	# 0.4 / 0.6 split: 40% calibration (keeps TFT probabilistic),
	# 60% Sharpe (drives directional / amplitude learning)
	lambda_quantile: float = 0.4 # w_quantile; was 0.3 (unnormalised old formula)
	# lambda_vol is a sub-weight within the calibration bundle only.
	# It controls how much the Q90-Q10 spread tracks 2× actual σ.
	# Two independent Optuna runs (20 trials each) both converged on 0.35 —
	# updating default to match confirmed optimal value.
	lambda_vol: float = 0.35
	risk_free_rate: float = 0.0
	sharpe_window: int = 20


	@dataclass(frozen=True)
	class TrainingConfig:
	max_epochs: int = 100
	# patience 10→15: with 19 batches/epoch (vs 4 before) each epoch carries
	# more information; give the model more time to converge.
	early_stopping_patience: int = 15
	# batch_size 64→16: 313 samples / 64 = 4 batches/epoch → noisy gradients.
	# 313 / 16 ≈ 19 batches/epoch gives stable, consistent gradient estimates.
	batch_size: int = 16
	val_ratio: float = 0.15
	test_ratio: float = 0.10
	lookback_days: int = 730
	seed: int = 42
	num_workers: int = 0
	optuna_n_trials: int = 50
	checkpoint_dir: str = ""
	best_model_path: str = ""
	hf_model_repo: str = "ifieryarrows/copper-mind-tft"


	@dataclass(frozen=True)
	class FeatureStoreConfig:
	target_symbol: str = "HG=F"
	max_ffill: int = 3
	calendar_features: bool = True
	macro_event_features: bool = True


	@dataclass
	class TFTASROConfig:
	"""Top-level config aggregating all sub-configs."""

	embedding: EmbeddingConfig = field(default_factory=EmbeddingConfig)
	sentiment: SentimentFeatureConfig = field(default_factory=SentimentFeatureConfig)
	lme: LMEConfig = field(default_factory=LMEConfig)
	model: TFTModelConfig = field(default_factory=TFTModelConfig)
	asro: ASROConfig = field(default_factory=ASROConfig)
	training: TrainingConfig = field(default_factory=TrainingConfig)
	feature_store: FeatureStoreConfig = field(default_factory=FeatureStoreConfig)

	@property
	def model_root(self) -> Path:
	return Path(self.training.checkpoint_dir).parent


	def get_tft_config() -> TFTASROConfig:
	"""
	Return the default TFT-ASRO configuration with paths resolved from
	MODEL_DIR (``/data/models`` on HF Space, configurable locally).
	"""
	base = Path(_model_dir()) / "tft"
	return TFTASROConfig(
	embedding=EmbeddingConfig(
	pca_model_path=str(base / "pca_finbert.joblib"),
	),
	training=TrainingConfig(
	checkpoint_dir=str(base / "checkpoints"),
	best_model_path=str(base / "best_tft_asro.ckpt"),
	),
	)