Spaces:

ifieryarrows
/

copper-mind

Running

App Files Files Community

ifieryarrows commited on Feb 26

Commit

db6c149

verified ·

1 Parent(s): e57e9d1

Sync from GitHub (tests passed)

Browse files

Files changed (6) hide show

Dockerfile +2 -0
deep_learning/config.py +28 -5
deep_learning/inference/predictor.py +31 -0
deep_learning/models/hub.py +123 -0
deep_learning/training/trainer.py +17 -0
worker/tasks.py +2 -1

Dockerfile CHANGED Viewed

@@ -22,6 +22,8 @@ COPY ./adapters /code/adapters
 COPY ./worker /code/worker
 COPY ./pipelines /code/pipelines
 COPY ./migrations /code/migrations
 # Copy pre-trained model files (from Kaggle)
 COPY ./data/models /data/models

 COPY ./worker /code/worker
 COPY ./pipelines /code/pipelines
 COPY ./migrations /code/migrations
+COPY ./deep_learning /code/deep_learning
+COPY ./backtest /code/backtest
 # Copy pre-trained model files (from Kaggle)
 COPY ./data/models /data/models

deep_learning/config.py CHANGED Viewed

@@ -3,15 +3,25 @@ Central configuration for the TFT-ASRO deep learning pipeline.
 All hyperparameters, feature dimensions, and training settings live here
 so every module draws from a single source of truth.
 """
 from __future__ import annotations
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional
 @dataclass(frozen=True)
 class EmbeddingConfig:
     model_name: str = "ProsusAI/finbert"
@@ -19,7 +29,7 @@ class EmbeddingConfig:
     pca_dim: int = 32
     max_token_length: int = 512
     batch_size: int = 64
-    pca_model_path: str = "models/tft/pca_finbert.joblib"
 @dataclass(frozen=True)
@@ -86,8 +96,9 @@ class TrainingConfig:
     seed: int = 42
     num_workers: int = 0
     optuna_n_trials: int = 50
-    checkpoint_dir: str = "models/tft/checkpoints"
-    best_model_path: str = "models/tft/best_tft_asro.ckpt"
 @dataclass(frozen=True)
@@ -116,5 +127,17 @@ class TFTASROConfig:
 def get_tft_config() -> TFTASROConfig:
-    """Return the default TFT-ASRO configuration."""
-    return TFTASROConfig()

 All hyperparameters, feature dimensions, and training settings live here
 so every module draws from a single source of truth.
+Model paths honour the MODEL_DIR environment variable so they work both
+locally (``data/models``) and inside the HF Space container
+(``/data/models``).
 """
 from __future__ import annotations
+import os
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Optional
+def _model_dir() -> str:
+    """Resolve the base model directory from env (same as app.settings)."""
+    return os.environ.get("MODEL_DIR", "/data/models")
 @dataclass(frozen=True)
 class EmbeddingConfig:
     model_name: str = "ProsusAI/finbert"
     pca_dim: int = 32
     max_token_length: int = 512
     batch_size: int = 64
+    pca_model_path: str = ""
 @dataclass(frozen=True)
     seed: int = 42
     num_workers: int = 0
     optuna_n_trials: int = 50
+    checkpoint_dir: str = ""
+    best_model_path: str = ""
+    hf_model_repo: str = "ifieryarrows/copper-mind-tft"
 @dataclass(frozen=True)
 def get_tft_config() -> TFTASROConfig:
+    """
+    Return the default TFT-ASRO configuration with paths resolved from
+    MODEL_DIR (``/data/models`` on HF Space, configurable locally).
+    """
+    base = Path(_model_dir()) / "tft"
+    return TFTASROConfig(
+        embedding=EmbeddingConfig(
+            pca_model_path=str(base / "pca_finbert.joblib"),
+        ),
+        training=TrainingConfig(
+            checkpoint_dir=str(base / "checkpoints"),
+            best_model_path=str(base / "best_tft_asro.ckpt"),
+        ),
+    )

deep_learning/inference/predictor.py CHANGED Viewed

@@ -42,10 +42,40 @@ class TFTPredictor:
         self._checkpoint_path = checkpoint_path or self.cfg.training.best_model_path
         self._model = None
         self._pca = None
     @property
     def model(self):
         if self._model is None:
             from deep_learning.models.tft_copper import load_tft_model
             self._model = load_tft_model(self._checkpoint_path)
         return self._model
@@ -53,6 +83,7 @@ class TFTPredictor:
     @property
     def pca(self):
         if self._pca is None:
             pca_path = self.cfg.embedding.pca_model_path
             if Path(pca_path).exists():
                 from deep_learning.data.embeddings import load_pca

         self._checkpoint_path = checkpoint_path or self.cfg.training.best_model_path
         self._model = None
         self._pca = None
+        self._hub_checked = False
+    def _ensure_local_artifacts(self) -> None:
+        """Download checkpoint from HF Hub if not present locally."""
+        if self._hub_checked:
+            return
+        self._hub_checked = True
+        if Path(self._checkpoint_path).exists():
+            return
+        try:
+            from deep_learning.models.hub import download_tft_artifacts
+            tft_dir = Path(self._checkpoint_path).parent
+            downloaded = download_tft_artifacts(
+                local_dir=tft_dir,
+                repo_id=self.cfg.training.hf_model_repo,
+            )
+            if downloaded:
+                logger.info("TFT checkpoint downloaded from HF Hub")
+            else:
+                logger.warning("TFT checkpoint not available on HF Hub")
+        except Exception as exc:
+            logger.warning("HF Hub download attempt failed: %s", exc)
     @property
     def model(self):
         if self._model is None:
+            self._ensure_local_artifacts()
+            if not Path(self._checkpoint_path).exists():
+                raise FileNotFoundError(
+                    f"TFT checkpoint not found: {self._checkpoint_path}"
+                )
             from deep_learning.models.tft_copper import load_tft_model
             self._model = load_tft_model(self._checkpoint_path)
         return self._model
     @property
     def pca(self):
         if self._pca is None:
+            self._ensure_local_artifacts()
             pca_path = self.cfg.embedding.pca_model_path
             if Path(pca_path).exists():
                 from deep_learning.data.embeddings import load_pca

deep_learning/models/hub.py ADDED Viewed

	@@ -0,0 +1,123 @@

+"""
+HuggingFace Hub integration for TFT-ASRO model persistence.
+Solves the ephemeral storage problem on HF Spaces: after training,
+checkpoints are uploaded to a dedicated HF model repo; before inference,
+they are downloaded if not present locally.
+"""
+from __future__ import annotations
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+logger = logging.getLogger(__name__)
+_HF_TOKEN_ENV = "HF_TOKEN"
+_ARTIFACTS = [
+    "best_tft_asro.ckpt",
+    "pca_finbert.joblib",
+]
+def _get_token() -> Optional[str]:
+    return os.environ.get(_HF_TOKEN_ENV)
+def upload_tft_artifacts(
+    local_dir: str | Path,
+    repo_id: str,
+    commit_message: str = "Update TFT-ASRO checkpoint",
+) -> bool:
+    """
+    Upload all TFT artifacts from *local_dir* to a HuggingFace model repo.
+    Returns True on success, False if upload fails or token is missing.
+    """
+    token = _get_token()
+    if not token:
+        logger.warning("HF_TOKEN not set – skipping model upload to Hub")
+        return False
+    local_dir = Path(local_dir)
+    files_to_upload = [
+        local_dir / name
+        for name in _ARTIFACTS
+        if (local_dir / name).exists()
+    ]
+    if not files_to_upload:
+        logger.warning("No TFT artifacts found in %s", local_dir)
+        return False
+    try:
+        from huggingface_hub import HfApi
+        api = HfApi(token=token)
+        api.create_repo(repo_id, repo_type="model", exist_ok=True, private=True)
+        for fpath in files_to_upload:
+            api.upload_file(
+                path_or_fileobj=str(fpath),
+                path_in_repo=fpath.name,
+                repo_id=repo_id,
+                repo_type="model",
+                commit_message=commit_message,
+            )
+            logger.info("Uploaded %s → %s/%s", fpath.name, repo_id, fpath.name)
+        return True
+    except Exception as exc:
+        logger.error("HF Hub upload failed: %s", exc)
+        return False
+def download_tft_artifacts(
+    local_dir: str | Path,
+    repo_id: str,
+) -> bool:
+    """
+    Download TFT artifacts from HuggingFace Hub to *local_dir*.
+    Skips files that already exist locally.
+    Returns True if at least the checkpoint was retrieved.
+    """
+    token = _get_token()
+    local_dir = Path(local_dir)
+    local_dir.mkdir(parents=True, exist_ok=True)
+    ckpt_path = local_dir / "best_tft_asro.ckpt"
+    if ckpt_path.exists():
+        logger.debug("TFT checkpoint already present locally: %s", ckpt_path)
+        return True
+    try:
+        from huggingface_hub import hf_hub_download
+        for name in _ARTIFACTS:
+            dest = local_dir / name
+            if dest.exists():
+                continue
+            try:
+                hf_hub_download(
+                    repo_id=repo_id,
+                    filename=name,
+                    local_dir=str(local_dir),
+                    token=token,
+                )
+                logger.info("Downloaded %s/%s → %s", repo_id, name, dest)
+            except Exception:
+                logger.debug("Artifact %s not found in %s (may not exist yet)", name, repo_id)
+        return ckpt_path.exists()
+    except ImportError:
+        logger.warning("huggingface_hub not installed – cannot download model")
+        return False
+    except Exception as exc:
+        logger.warning("HF Hub download failed: %s", exc)
+        return False

deep_learning/training/trainer.py CHANGED Viewed

@@ -183,6 +183,23 @@ def train_tft_model(
     _persist_tft_metadata(cfg.feature_store.target_symbol, result)
     return result

     _persist_tft_metadata(cfg.feature_store.target_symbol, result)
+    # ---- 10. Upload to HF Hub (for persistence across HF Space rebuilds) ----
+    try:
+        from deep_learning.models.hub import upload_tft_artifacts
+        tft_dir = final_path.parent
+        uploaded = upload_tft_artifacts(
+            local_dir=tft_dir,
+            repo_id=cfg.training.hf_model_repo,
+            commit_message=f"TFT-ASRO checkpoint (val_loss={trainer.checkpoint_callback.best_model_score:.4f})"
+            if trainer.checkpoint_callback.best_model_score
+            else "TFT-ASRO checkpoint",
+        )
+        result["hub_uploaded"] = uploaded
+    except Exception as exc:
+        logger.warning("HF Hub upload skipped: %s", exc)
+        result["hub_uploaded"] = False
     return result

worker/tasks.py CHANGED Viewed

@@ -575,9 +575,10 @@ async def _execute_pipeline_stages_v2(
     logger.info(f"[run_id={run_id}] Stage 5.5: TFT-ASRO snapshot")
     try:
         from deep_learning.inference.predictor import generate_tft_analysis
         from pathlib import Path
-        ckpt = Path("models/tft/best_tft_asro.ckpt")
         if ckpt.exists():
             tft_report = generate_tft_analysis(session, "HG=F")

     logger.info(f"[run_id={run_id}] Stage 5.5: TFT-ASRO snapshot")
     try:
         from deep_learning.inference.predictor import generate_tft_analysis
+        from deep_learning.config import get_tft_config
         from pathlib import Path
+        ckpt = Path(get_tft_config().training.best_model_path)
         if ckpt.exists():
             tft_report = generate_tft_analysis(session, "HG=F")