P2SAMAPA
Update train_and_push.yml
accc7f8 unverified
# .github/workflows/train_and_push.yml
# Three triggers:
# 1. train β€” daily 7am EST (start_year=2016) + manual single-year
# 2. sweep_YYYY β€” manual consensus sweep (parallel jobs, missing years only)
# 3. cleanup β€” daily midnight EST, wipes single-year outputs only
name: Daily TFT Training
on:
schedule:
- cron: "0 12 * * 1-5" # 12:00 UTC = 7am EST Mon-Fri (single-year default)
- cron: "0 5 * * *" # 05:00 UTC = midnight EST daily cleanup
- cron: "0 1 * * 2-6" # 01:00 UTC = 8pm EST Mon-Fri (daily consensus sweep)
- cron: "0 0 * * 2-6" # 00:00 UTC = 7pm EST Mon-Fri (wipe previous day's sweep)
workflow_dispatch:
inputs:
start_year:
description: "Start year for single-year run"
required: false
default: "2016"
type: string
sweep_mode:
description: "Comma-separated sweep years e.g. 2008,2014,2016,2019,2021 (leave blank for single-year)"
required: false
default: ""
type: string
force_refresh:
description: "Force full dataset rebuild"
required: false
default: "false"
type: boolean
jobs:
# ── 7pm EST: wipe previous day sweep cache from HF dataset ─────────────────
wipe_sweep_cache:
if: >
github.event_name == 'schedule' &&
github.event.schedule == '0 0 * * 2-6'
runs-on: ubuntu-latest
timeout-minutes: 10
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
steps:
- name: Delete stale dated sweep files from HF dataset
run: |
pip install huggingface_hub -q
python3 - <<'PYEOF'
import os
from datetime import datetime, timezone, timedelta
from huggingface_hub import HfApi
api = HfApi()
token = os.environ["HF_TOKEN"]
repo = "P2SAMAPA/p2-etf-tft-outputs"
today = (datetime.now(timezone.utc) - timedelta(hours=5)).strftime("%Y%m%d")
try:
files = api.list_repo_files(repo_id=repo, repo_type="dataset", token=token)
except Exception as e:
print(f"Could not list files: {e}"); files = []
deleted = 0
for f in files:
if f.startswith("signals_") and f.endswith(".json") and "_" in f[9:]:
parts = f.replace(".json","").split("_")
if len(parts) == 3 and parts[2] != today:
try:
api.delete_file(path_in_repo=f, repo_id=repo,
repo_type="dataset", token=token,
commit_message=f"Wipe stale sweep: {f}")
print(f"Deleted {f}"); deleted += 1
except Exception as e:
print(f"Could not delete {f}: {e}")
print(f"Wiped {deleted} stale sweep files")
PYEOF
# ── Midnight cleanup β€” wipes single-year outputs only ────────────────────────
cleanup:
if: >
github.event_name == 'schedule' &&
github.event.schedule == '0 5 * * *'
runs-on: ubuntu-latest
timeout-minutes: 10
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
steps:
- name: Wipe single-year outputs (preserve sweep cache)
run: |
pip install huggingface_hub -q
python3 - <<'PYEOF'
import os
from huggingface_hub import HfApi
api = HfApi()
token = os.environ["HF_TOKEN"]
repo_id = "P2SAMAPA/p2-etf-tft-outputs"
for filename in ["model_outputs.npz", "signals.json", "training_meta.json"]:
try:
api.delete_file(path_in_repo=filename, repo_id=repo_id,
repo_type="dataset", token=token,
commit_message=f"Midnight cleanup: {filename}")
print(f"Deleted {filename}")
except Exception as e:
print(f"{filename} not found: {e}")
print("Cleanup done")
PYEOF
# ── Single-year train β€” daily default + manual (no sweep_mode) ──────────────
train:
if: >
(github.event_name == 'schedule' &&
github.event.schedule == '0 12 * * 1-5') ||
(github.event_name == 'workflow_dispatch' &&
(github.event.inputs.sweep_mode == '' || github.event.inputs.sweep_mode == null))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- name: Install dependencies
run: pip install --no-cache-dir -r requirements.txt
- name: Train
run: |
START_YEAR="${{ github.event.inputs.start_year }}"
START_YEAR="${START_YEAR:-2016}"
ARGS="--start-year $START_YEAR"
if [ "${{ github.event.inputs.force_refresh }}" = "true" ]; then
ARGS="$ARGS --force-refresh"
fi
python train_pipeline.py $ARGS
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-single-${{ github.run_id }}
path: "*.log"
retention-days: 7
# ── Sweep 2008 ───────────────────────────────────────────────────────────────
sweep_2008:
if: >
(github.event_name == 'schedule' && github.event.schedule == '0 1 * * 2-6') ||
(github.event_name == 'workflow_dispatch' && contains(github.event.inputs.sweep_mode, '2008'))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- run: pip install --no-cache-dir -r requirements.txt
- run: python train_pipeline.py --start-year 2008 --sweep-date $(date -u -d "-5 hours" +%Y%m%d 2>/dev/null || date -u +%Y%m%d)
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-sweep-2008-${{ github.run_id }}
path: "*.log"
retention-days: 7
# ── Sweep 2014 ───────────────────────────────────────────────────────────────
sweep_2014:
if: >
(github.event_name == 'schedule' && github.event.schedule == '0 1 * * 2-6') ||
(github.event_name == 'workflow_dispatch' && contains(github.event.inputs.sweep_mode, '2014'))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- run: pip install --no-cache-dir -r requirements.txt
- run: python train_pipeline.py --start-year 2014 --sweep-date $(date -u -d "-5 hours" +%Y%m%d 2>/dev/null || date -u +%Y%m%d)
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-sweep-2014-${{ github.run_id }}
path: "*.log"
retention-days: 7
# ── Sweep 2016 ───────────────────────────────────────────────────────────────
sweep_2016:
if: >
(github.event_name == 'schedule' && github.event.schedule == '0 1 * * 2-6') ||
(github.event_name == 'workflow_dispatch' && contains(github.event.inputs.sweep_mode, '2016'))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- run: pip install --no-cache-dir -r requirements.txt
- run: python train_pipeline.py --start-year 2016 --sweep-date $(date -u -d "-5 hours" +%Y%m%d 2>/dev/null || date -u +%Y%m%d)
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-sweep-2016-${{ github.run_id }}
path: "*.log"
retention-days: 7
# ── Sweep 2019 ───────────────────────────────────────────────────────────────
sweep_2019:
if: >
(github.event_name == 'schedule' && github.event.schedule == '0 1 * * 2-6') ||
(github.event_name == 'workflow_dispatch' && contains(github.event.inputs.sweep_mode, '2019'))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- run: pip install --no-cache-dir -r requirements.txt
- run: python train_pipeline.py --start-year 2019 --sweep-date $(date -u -d "-5 hours" +%Y%m%d 2>/dev/null || date -u +%Y%m%d)
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-sweep-2019-${{ github.run_id }}
path: "*.log"
retention-days: 7
# ── Sweep 2021 ───────────────────────────────────────────────────────────────
sweep_2021:
if: >
(github.event_name == 'schedule' && github.event.schedule == '0 1 * * 2-6') ||
(github.event_name == 'workflow_dispatch' && contains(github.event.inputs.sweep_mode, '2021'))
runs-on: ubuntu-latest
timeout-minutes: 180
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
FRED_API_KEY: ${{ secrets.FRED_API_KEY }}
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.11"
cache: "pip"
- run: pip install --no-cache-dir -r requirements.txt
- run: python train_pipeline.py --start-year 2021 --sweep-date $(date -u -d "-5 hours" +%Y%m%d 2>/dev/null || date -u +%Y%m%d)
- if: always()
uses: actions/upload-artifact@v4
with:
name: logs-sweep-2021-${{ github.run_id }}
path: "*.log"
retention-days: 7