feat(wofo): add prices, research, backtest, and Phase-1 agent layers
Browse filesEnd-to-end plumbing for the wofo Phase-1 (research-only) loop.
- wofo/prices/: PriceSource protocol with synthetic (deterministic, for
tests) and Stooq (free daily CSV) adapters. Production feeds plug in
behind the same protocol.
- wofo/research/: follow-the-filer strategy turns a 13F panel into a
dated TargetWeights series with provenance. Effective-date is the
filing date (not period-of-report) to avoid look-ahead bias.
Issuer-name -> ticker mapping is heuristic with manual overrides
since CUSIP -> ticker is licensed (CGS).
- wofo/backtest/: minimal portfolio backtester (target weights x prices
-> daily NAV) with CAGR / Sharpe / max-drawdown metrics. No numpy
dependency. Unknown tickers are skipped to cash.
- wofo/agent/: Phase-1 agent skeleton. Defines read-only research tools
(list_local_filings, summarize_panel, top_holdings, qoq_activity)
and a Claude tool-use runner. Explicitly NO order-entry tools — that
requires Phase-2 promotion + counsel sign-off.
- tests/: 18 tests pass against committed sample data, no network.
- pytest.ini + confcutdir scope test discovery away from the legacy
Zuup app at the repo root.
- wofo/agent/demo_e2e.py: end-to-end plumbing demo (panel -> strategy
-> synthetic backtest), runs offline.
https://claude.ai/code/session_01C97VcztNaYLWwesHfWn6iE
- pytest.ini +4 -0
- tests/test_agent_tools.py +41 -0
- tests/test_backtest.py +88 -0
- tests/test_research.py +58 -0
- tests/test_thirteenf.py +56 -0
- wofo/README.md +62 -0
- wofo/agent/__init__.py +16 -0
- wofo/agent/demo_e2e.py +105 -0
- wofo/agent/runner.py +98 -0
- wofo/agent/tools.py +156 -0
- wofo/backtest/__init__.py +12 -0
- wofo/backtest/metrics.py +67 -0
- wofo/backtest/portfolio.py +166 -0
- wofo/prices/__init__.py +16 -0
- wofo/prices/source.py +33 -0
- wofo/prices/stooq.py +82 -0
- wofo/prices/synthetic.py +57 -0
- wofo/research/__init__.py +15 -0
- wofo/research/follow_the_filer.py +193 -0
- wofo/research/issuer_map.py +127 -0
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[pytest]
|
| 2 |
+
testpaths = tests
|
| 3 |
+
addopts = --import-mode=importlib --confcutdir=tests --rootdir=tests
|
| 4 |
+
pythonpath = .
|
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test the research tools without invoking the model."""
|
| 2 |
+
from wofo.agent import dispatch_tool, TOOLS
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def test_tools_registry_shape():
|
| 6 |
+
names = {t["name"] for t in TOOLS}
|
| 7 |
+
assert names == {"list_local_filings", "summarize_panel", "top_holdings", "qoq_activity"}
|
| 8 |
+
for t in TOOLS:
|
| 9 |
+
assert "input_schema" in t and t["input_schema"]["type"] == "object"
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def test_list_local_filings_returns_periods():
|
| 13 |
+
r = dispatch_tool("list_local_filings", {})
|
| 14 |
+
assert r.ok and "periods" in r.content
|
| 15 |
+
assert len(r.content["periods"]) >= 5
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def test_summarize_panel():
|
| 19 |
+
r = dispatch_tool("summarize_panel", {})
|
| 20 |
+
assert r.ok
|
| 21 |
+
assert r.content["manager"] == "Situational Awareness LP"
|
| 22 |
+
assert r.content["cik"] == "0002045724"
|
| 23 |
+
assert len(r.content["periods"]) >= 5
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_top_holdings_known_period():
|
| 27 |
+
r = dispatch_tool("top_holdings", {"period": "2025-12-31", "n": 3})
|
| 28 |
+
assert r.ok
|
| 29 |
+
issuers = [h["issuer"] for h in r.content["holdings"]]
|
| 30 |
+
assert "COREWEAVE INC" in issuers
|
| 31 |
+
assert len(r.content["holdings"]) == 3
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_unknown_period_errors_cleanly():
|
| 35 |
+
r = dispatch_tool("top_holdings", {"period": "1999-12-31"})
|
| 36 |
+
assert not r.ok and "unknown period" in r.error
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def test_unknown_tool_errors_cleanly():
|
| 40 |
+
r = dispatch_tool("place_trade", {})
|
| 41 |
+
assert not r.ok and "unknown tool" in r.error
|
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Backtester tests using the synthetic price source."""
|
| 2 |
+
from datetime import date
|
| 3 |
+
|
| 4 |
+
from wofo.backtest import run_backtest, summary, max_drawdown, sharpe
|
| 5 |
+
from wofo.prices import SyntheticPriceSource
|
| 6 |
+
from wofo.research.follow_the_filer import TargetWeights, Snapshot
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def _two_snapshot_strategy() -> TargetWeights:
|
| 10 |
+
return TargetWeights(
|
| 11 |
+
manager_cik="0000000000",
|
| 12 |
+
manager_name="Test Manager",
|
| 13 |
+
snapshots=[
|
| 14 |
+
Snapshot(
|
| 15 |
+
effective_date=date(2024, 1, 15),
|
| 16 |
+
period_of_report=date(2023, 12, 31),
|
| 17 |
+
weights={"AAA": 0.6, "BBB": 0.4},
|
| 18 |
+
unmapped_value_share=0.0,
|
| 19 |
+
provenance={"test": "snap1"},
|
| 20 |
+
),
|
| 21 |
+
Snapshot(
|
| 22 |
+
effective_date=date(2024, 7, 15),
|
| 23 |
+
period_of_report=date(2024, 6, 30),
|
| 24 |
+
weights={"AAA": 0.3, "BBB": 0.3, "CCC": 0.4},
|
| 25 |
+
unmapped_value_share=0.0,
|
| 26 |
+
provenance={"test": "snap2"},
|
| 27 |
+
),
|
| 28 |
+
],
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def test_backtest_runs_and_produces_nav():
|
| 33 |
+
src = SyntheticPriceSource(drift=0.0, vol=0.01)
|
| 34 |
+
res = run_backtest(_two_snapshot_strategy(), src, start_cash=1_000_000.0, end_date=date(2024, 12, 31))
|
| 35 |
+
assert len(res.nav) == len(res.dates) > 100
|
| 36 |
+
# NAV should not be NaN/inf and should be in a reasonable range.
|
| 37 |
+
assert all(v > 0 and v == v for v in res.nav)
|
| 38 |
+
# Two snapshots -> at least two rebalances.
|
| 39 |
+
assert len(res.rebalance_dates) >= 2
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def test_backtest_metrics_make_sense():
|
| 43 |
+
src = SyntheticPriceSource(drift=0.0, vol=0.005)
|
| 44 |
+
res = run_backtest(_two_snapshot_strategy(), src, end_date=date(2024, 12, 31))
|
| 45 |
+
s = summary(res.dates, res.nav)
|
| 46 |
+
assert s["n_days"] == len(res.dates)
|
| 47 |
+
# Metrics should be finite numbers.
|
| 48 |
+
assert isinstance(s["sharpe"], float)
|
| 49 |
+
assert 0.0 <= s["max_drawdown"] <= 1.0
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_unknown_ticker_skipped_not_crashed(monkeypatch):
|
| 53 |
+
src = SyntheticPriceSource()
|
| 54 |
+
# Inject a target with a ticker the synthetic source still happily prices —
|
| 55 |
+
# the synthetic source returns data for any string. To force a NotFound,
|
| 56 |
+
# use a wrapper.
|
| 57 |
+
from wofo.prices import NotFound
|
| 58 |
+
class FlakySource:
|
| 59 |
+
def __init__(self, inner):
|
| 60 |
+
self.inner = inner
|
| 61 |
+
def daily(self, ticker, start, end):
|
| 62 |
+
if ticker == "MISSING":
|
| 63 |
+
raise NotFound(ticker)
|
| 64 |
+
return self.inner.daily(ticker, start, end)
|
| 65 |
+
|
| 66 |
+
tw = TargetWeights(
|
| 67 |
+
manager_cik="0", manager_name="t",
|
| 68 |
+
snapshots=[Snapshot(
|
| 69 |
+
effective_date=date(2024, 1, 15),
|
| 70 |
+
period_of_report=date(2023, 12, 31),
|
| 71 |
+
weights={"AAA": 0.5, "MISSING": 0.5},
|
| 72 |
+
unmapped_value_share=0.0,
|
| 73 |
+
provenance={},
|
| 74 |
+
)],
|
| 75 |
+
)
|
| 76 |
+
res = run_backtest(tw, FlakySource(src), end_date=date(2024, 6, 30))
|
| 77 |
+
# The portfolio should still run; cash share should be ~50% because half the target was unbuyable.
|
| 78 |
+
assert max(res.cash_share[10:]) >= 0.4
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def test_max_drawdown_known_series():
|
| 82 |
+
nav = [100, 110, 105, 90, 95, 120]
|
| 83 |
+
# peak 110 -> trough 90 -> mdd = (110-90)/110 = 0.1818...
|
| 84 |
+
assert abs(max_drawdown(nav) - (110 - 90) / 110) < 1e-9
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def test_sharpe_zero_for_flat_series():
|
| 88 |
+
assert sharpe([100.0] * 200) == 0.0
|
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for the follow-the-filer strategy + issuer mapping."""
|
| 2 |
+
from datetime import date
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
|
| 5 |
+
from wofo.research import follow_the_filer, IssuerOverride
|
| 6 |
+
from wofo.research.follow_the_filer import load_filing_refs
|
| 7 |
+
from wofo.research.issuer_map import _norm
|
| 8 |
+
from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def test_norm_strips_corporate_suffixes():
|
| 15 |
+
assert _norm("Constellation Energy Corp") == _norm("Constellation Energy Corporation")
|
| 16 |
+
assert _norm("BLOOM ENERGY CORP") == "bloom energy"
|
| 17 |
+
assert _norm("Lumentum Hldgs Inc") == "lumentum"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def test_follow_the_filer_with_overrides():
|
| 21 |
+
# Manual map covers what we need without hitting the network.
|
| 22 |
+
overrides = IssuerOverride(by_issuer={
|
| 23 |
+
"CONSTELLATION ENERGY CORP": "CEG",
|
| 24 |
+
"MARVELL TECHNOLOGY INC": "MRVL",
|
| 25 |
+
"MODINE MFG CO": "MOD",
|
| 26 |
+
"ANTERIX INC": "ATEX",
|
| 27 |
+
"CIPHER MINING INC": "CIFR",
|
| 28 |
+
"VISTRA CORP": "VST",
|
| 29 |
+
})
|
| 30 |
+
|
| 31 |
+
pairs = []
|
| 32 |
+
for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
|
| 33 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 34 |
+
panel = build_panel(pairs)
|
| 35 |
+
refs = load_filing_refs(RAW)
|
| 36 |
+
|
| 37 |
+
# Build cusip -> name from panel issuers, then resolve via overrides only
|
| 38 |
+
# (skip SEC fetch by pre-populating the mapping ourselves).
|
| 39 |
+
cusip_to_ticker: dict[str, str | None] = {}
|
| 40 |
+
for cusip, name in panel["issuers"].items():
|
| 41 |
+
cusip_to_ticker[cusip] = overrides.by_issuer.get(name)
|
| 42 |
+
|
| 43 |
+
tw = follow_the_filer(
|
| 44 |
+
panel,
|
| 45 |
+
filing_refs=refs,
|
| 46 |
+
cusip_to_ticker=cusip_to_ticker,
|
| 47 |
+
manager_cik="0002045724",
|
| 48 |
+
manager_name="Situational Awareness LP",
|
| 49 |
+
)
|
| 50 |
+
assert tw.manager_cik == "0002045724"
|
| 51 |
+
assert len(tw.snapshots) == 5
|
| 52 |
+
# First snapshot: most positions will be unmapped because we only added 6 overrides.
|
| 53 |
+
s0 = tw.snapshots[0]
|
| 54 |
+
# Mapped weights + unmapped share == 1.
|
| 55 |
+
assert abs(sum(s0.weights.values()) + s0.unmapped_value_share - 1.0) < 1e-6
|
| 56 |
+
# Effective date should be on or after period-of-report.
|
| 57 |
+
for s in tw.snapshots:
|
| 58 |
+
assert s.effective_date >= s.period_of_report
|
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tests for the 13F pipeline using committed sample data."""
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
|
| 5 |
+
|
| 6 |
+
RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def test_parse_2024q4_meta():
|
| 10 |
+
m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
|
| 11 |
+
assert m.cik == "0002045724"
|
| 12 |
+
assert m.manager_name == "Situational Awareness LP"
|
| 13 |
+
assert m.period_iso == "2024-12-31"
|
| 14 |
+
assert m.crd_number == "000333011"
|
| 15 |
+
assert m.sec_file_number == "801-132039"
|
| 16 |
+
assert m.is_amendment is False
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def test_parse_2024q4_holdings_match_summary():
|
| 20 |
+
m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
|
| 21 |
+
h = parse_infotable(RAW / "2024Q4" / "infotable.xml")
|
| 22 |
+
assert len(h) == m.table_entry_total
|
| 23 |
+
assert sum(x.value_usd for x in h) == m.table_value_total
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def test_panel_periods_sorted_and_unique():
|
| 27 |
+
pairs = []
|
| 28 |
+
for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
|
| 29 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 30 |
+
panel = build_panel(pairs)
|
| 31 |
+
assert panel["periods"] == sorted(set(panel["periods"]))
|
| 32 |
+
assert len(panel["periods"]) == 5
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_qoq_initial_period():
|
| 36 |
+
pairs = []
|
| 37 |
+
for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
|
| 38 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 39 |
+
panel = build_panel(pairs)
|
| 40 |
+
deltas = qoq_changes(panel)
|
| 41 |
+
initials = [d for d in deltas if d["period"] == panel["periods"][0]]
|
| 42 |
+
assert all(d["action"] == "INITIAL" for d in initials)
|
| 43 |
+
# Every position in the first quarter should be classified.
|
| 44 |
+
assert {d["cusip"] for d in initials} == {c for (p, c) in panel["rows"] if p == panel["periods"][0]}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def test_concentration_monotonic_aum():
|
| 48 |
+
pairs = []
|
| 49 |
+
for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
|
| 50 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 51 |
+
panel = build_panel(pairs)
|
| 52 |
+
conc = concentration(panel)
|
| 53 |
+
aums = [conc[p]["total_value_usd"] for p in panel["periods"]]
|
| 54 |
+
# SA LP grew every quarter in the sample window; if this changes when re-pulled,
|
| 55 |
+
# this test should be updated, not silenced.
|
| 56 |
+
assert all(b > a for a, b in zip(aums, aums[1:])), aums
|
|
@@ -14,6 +14,20 @@ wofo/
|
|
| 14 |
│ ├── parse.py # XML → dataclasses
|
| 15 |
│ ├── analyze.py # panel + qoq + concentration
|
| 16 |
│ └── cli.py # `python -m wofo.thirteenf.cli {pull,analyze}`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
└── data/
|
| 18 |
└── 13f/
|
| 19 |
├── raw/ # one dir per quarter, primary_doc + infotable
|
|
@@ -51,6 +65,54 @@ python -m wofo.thirteenf.cli analyze
|
|
| 51 |
The analyze step prints a per-quarter summary and writes JSON +
|
| 52 |
`REPORT.md` to `wofo/data/13f/processed/`.
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
## What `wofo` will not do
|
| 55 |
|
| 56 |
- File legal or tax documents on your behalf.
|
|
|
|
| 14 |
│ ├── parse.py # XML → dataclasses
|
| 15 |
│ ├── analyze.py # panel + qoq + concentration
|
| 16 |
│ └── cli.py # `python -m wofo.thirteenf.cli {pull,analyze}`
|
| 17 |
+
├── prices/ # Pluggable price data sources
|
| 18 |
+
│ ├── source.py # PriceSource protocol
|
| 19 |
+
│ ├── synthetic.py # Deterministic random walk (tests / offline)
|
| 20 |
+
│ └── stooq.py # Free Stooq daily-CSV adapter (prototyping)
|
| 21 |
+
├── research/ # Strategy generators
|
| 22 |
+
│ ├── follow_the_filer.py # 13F panel → dated target weights
|
| 23 |
+
│ └── issuer_map.py # Issuer name → ticker (heuristic + overrides)
|
| 24 |
+
├── backtest/ # Minimal portfolio backtester
|
| 25 |
+
│ ├── portfolio.py # Target weights × prices → daily NAV
|
| 26 |
+
│ └── metrics.py # CAGR / Sharpe / max drawdown
|
| 27 |
+
├── agent/ # Phase-1 (research-only) agent
|
| 28 |
+
│ ├── tools.py # Read-only tools the agent may call
|
| 29 |
+
│ ├── runner.py # Claude tool-use loop
|
| 30 |
+
│ └── demo_e2e.py # Plumbing demo (no API key required)
|
| 31 |
└── data/
|
| 32 |
└── 13f/
|
| 33 |
├── raw/ # one dir per quarter, primary_doc + infotable
|
|
|
|
| 65 |
The analyze step prints a per-quarter summary and writes JSON +
|
| 66 |
`REPORT.md` to `wofo/data/13f/processed/`.
|
| 67 |
|
| 68 |
+
## Quick start: end-to-end strategy → backtest demo
|
| 69 |
+
|
| 70 |
+
The demo wires panel → follow-the-filer → synthetic backtest. Synthetic
|
| 71 |
+
prices are NOT real returns — this is a plumbing check.
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
python -m wofo.agent.demo_e2e
|
| 75 |
+
```
|
| 76 |
+
|
| 77 |
+
To run with real prices, swap `SyntheticPriceSource` for a real adapter:
|
| 78 |
+
|
| 79 |
+
```python
|
| 80 |
+
from wofo.prices.stooq import StooqPriceSource
|
| 81 |
+
src = StooqPriceSource()
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
Or implement your own adapter against the `wofo.prices.PriceSource`
|
| 85 |
+
protocol — Polygon, Tiingo, IBKR historical, etc.
|
| 86 |
+
|
| 87 |
+
## Quick start: agent loop (Phase 1, research only)
|
| 88 |
+
|
| 89 |
+
```bash
|
| 90 |
+
pip install anthropic
|
| 91 |
+
export ANTHROPIC_API_KEY=sk-ant-...
|
| 92 |
+
|
| 93 |
+
python - <<'PY'
|
| 94 |
+
from wofo.agent import run_research_loop
|
| 95 |
+
out = run_research_loop(
|
| 96 |
+
"Summarize Situational Awareness LP's Q4 2025 portfolio "
|
| 97 |
+
"and the largest position changes from Q3 to Q4."
|
| 98 |
+
)
|
| 99 |
+
print(out["final_text"])
|
| 100 |
+
PY
|
| 101 |
+
```
|
| 102 |
+
|
| 103 |
+
The agent has access only to read-only research tools
|
| 104 |
+
(`list_local_filings`, `summarize_panel`, `top_holdings`,
|
| 105 |
+
`qoq_activity`). It cannot place orders, transfer funds, or modify any
|
| 106 |
+
account. See `wofo/agent/tools.py` for the tool schemas.
|
| 107 |
+
|
| 108 |
+
## Tests
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
python -m pytest
|
| 112 |
+
```
|
| 113 |
+
|
| 114 |
+
All tests run against committed sample data (no network required).
|
| 115 |
+
|
| 116 |
## What `wofo` will not do
|
| 117 |
|
| 118 |
- File legal or tax documents on your behalf.
|
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""wofo agent — Phase 1 (research only).
|
| 2 |
+
|
| 3 |
+
This module defines the *tools* the wofo agent is allowed to call and
|
| 4 |
+
the orchestration loop that drives it. By construction, Phase 1
|
| 5 |
+
exposes only **read-only research tools** — no execution, no order
|
| 6 |
+
entry, no broker connectivity. Phase 2 / 3 will live in separate
|
| 7 |
+
modules and require a deliberate code change (and counsel sign-off)
|
| 8 |
+
to enable.
|
| 9 |
+
|
| 10 |
+
The agent uses the Anthropic Python SDK if available; if it is not
|
| 11 |
+
installed, the tools can still be invoked directly from Python.
|
| 12 |
+
"""
|
| 13 |
+
from .tools import TOOLS, dispatch_tool, ToolResult
|
| 14 |
+
from .runner import run_research_loop
|
| 15 |
+
|
| 16 |
+
__all__ = ["TOOLS", "dispatch_tool", "ToolResult", "run_research_loop"]
|
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""End-to-end demo: 13F panel -> follow-the-filer strategy -> synthetic backtest.
|
| 2 |
+
|
| 3 |
+
Runs entirely offline using `SyntheticPriceSource`, so the resulting
|
| 4 |
+
NAV is **not** a real return — it is a sanity check that the plumbing
|
| 5 |
+
works. Replace the price source with a real one (Stooq, Polygon, etc.)
|
| 6 |
+
to get meaningful numbers.
|
| 7 |
+
|
| 8 |
+
python -m wofo.agent.demo_e2e
|
| 9 |
+
"""
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
from datetime import date
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
|
| 15 |
+
from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
|
| 16 |
+
from wofo.research import IssuerOverride, follow_the_filer
|
| 17 |
+
from wofo.research.follow_the_filer import load_filing_refs
|
| 18 |
+
from wofo.backtest import run_backtest, summary
|
| 19 |
+
from wofo.prices import SyntheticPriceSource
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# Hand-curated overrides covering most names that appear in SA LP filings.
|
| 23 |
+
# Update as needed; this is a documentation artifact, not a complete map.
|
| 24 |
+
DEFAULT_OVERRIDES = IssuerOverride(by_issuer={
|
| 25 |
+
"COREWEAVE INC": "CRWV",
|
| 26 |
+
"BLOOM ENERGY CORP": "BE",
|
| 27 |
+
"INTEL CORP": "INTC",
|
| 28 |
+
"LUMENTUM HLDGS INC": "LITE",
|
| 29 |
+
"CORE SCIENTIFIC INC NEW": "CORZ",
|
| 30 |
+
"IREN LIMITED": "IREN",
|
| 31 |
+
"APPLIED DIGITAL CORP": "APLD",
|
| 32 |
+
"SANDISK CORP": "SNDK",
|
| 33 |
+
"EQT CORP": "EQT",
|
| 34 |
+
"CIPHER MINING INC": "CIFR",
|
| 35 |
+
"COHERENT CORP": "COHR",
|
| 36 |
+
"CONSTELLATION ENERGY CORP": "CEG",
|
| 37 |
+
"MARVELL TECHNOLOGY INC": "MRVL",
|
| 38 |
+
"MODINE MFG CO": "MOD",
|
| 39 |
+
"ANTERIX INC": "ATEX",
|
| 40 |
+
"VISTRA CORP": "VST",
|
| 41 |
+
"NVIDIA CORPORATION": "NVDA",
|
| 42 |
+
"BROADCOM INC": "AVGO",
|
| 43 |
+
"TAIWAN SEMICONDUCTOR MFG LTD": "TSM",
|
| 44 |
+
"MICRON TECHNOLOGY INC": "MU",
|
| 45 |
+
"WESTERN DIGITAL CORP": "WDC",
|
| 46 |
+
"SEAGATE TECHNOLOGY HLDNGS PL": "STX",
|
| 47 |
+
"GALAXY DIGITAL INC.": "GLXY",
|
| 48 |
+
"VANECK ETF TRUST": None, # ETF; mapping is ambiguous without ticker
|
| 49 |
+
"CLEANSPARK INC": "CLSK",
|
| 50 |
+
"BITFARMS LTD": "BITF",
|
| 51 |
+
"LIBERTY ENERGY INC": "LBRT",
|
| 52 |
+
"INFOSYS LTD": "INFY",
|
| 53 |
+
"PROPETRO HLDG CORP": "PUMP",
|
| 54 |
+
"BABCOCK & WILCOX ENTERPRISES": "BW",
|
| 55 |
+
"POWER SOLUTIONS INTL INC": "PSIX",
|
| 56 |
+
"WHITEFIBER INC": "WYFI",
|
| 57 |
+
"KILROY RLTY CORP": "KRC",
|
| 58 |
+
})
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def main() -> None:
|
| 62 |
+
raw = Path(__file__).resolve().parents[2] / "wofo" / "data" / "13f" / "raw"
|
| 63 |
+
pairs = []
|
| 64 |
+
for q in sorted(p for p in raw.iterdir() if p.is_dir()):
|
| 65 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 66 |
+
panel = build_panel(pairs)
|
| 67 |
+
refs = load_filing_refs(raw)
|
| 68 |
+
|
| 69 |
+
cusip_to_ticker: dict[str, str | None] = {}
|
| 70 |
+
for cusip, name in panel["issuers"].items():
|
| 71 |
+
cusip_to_ticker[cusip] = DEFAULT_OVERRIDES.by_issuer.get(name)
|
| 72 |
+
|
| 73 |
+
tw = follow_the_filer(
|
| 74 |
+
panel,
|
| 75 |
+
filing_refs=refs,
|
| 76 |
+
cusip_to_ticker=cusip_to_ticker,
|
| 77 |
+
manager_cik="0002045724",
|
| 78 |
+
manager_name="Situational Awareness LP",
|
| 79 |
+
run_id="demo_e2e",
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
print(f"Manager: {tw.manager_name}")
|
| 83 |
+
for s in tw.snapshots:
|
| 84 |
+
mapped_count = len(s.weights)
|
| 85 |
+
print(
|
| 86 |
+
f" effective {s.effective_date} report {s.period_of_report} "
|
| 87 |
+
f"mapped={mapped_count} weight_total={sum(s.weights.values()):.1%} "
|
| 88 |
+
f"unmapped={s.unmapped_value_share:.1%}"
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
# Synthetic backtest is a plumbing check, not a real return.
|
| 92 |
+
src = SyntheticPriceSource(drift=0.0004, vol=0.02)
|
| 93 |
+
res = run_backtest(tw, src, start_cash=1_000_000.0, end_date=date(2026, 4, 30))
|
| 94 |
+
s = summary(res.dates, res.nav)
|
| 95 |
+
print()
|
| 96 |
+
print("Synthetic backtest summary (NOT real returns):")
|
| 97 |
+
for k, v in s.items():
|
| 98 |
+
if isinstance(v, float):
|
| 99 |
+
print(f" {k:<14} {v:>12,.4f}")
|
| 100 |
+
else:
|
| 101 |
+
print(f" {k:<14} {v}")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if __name__ == "__main__":
|
| 105 |
+
main()
|
|
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Research-loop runner.
|
| 2 |
+
|
| 3 |
+
Drives a Claude model through a tool-use loop using only the read-only
|
| 4 |
+
research tools defined in `wofo.agent.tools`. Requires the `anthropic`
|
| 5 |
+
SDK at runtime; if it is not installed the module still imports so the
|
| 6 |
+
tools can be used without a model.
|
| 7 |
+
"""
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
from .tools import TOOLS, dispatch_tool
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
SYSTEM_PROMPT = """You are wofo, the Wooden Family Office research agent.
|
| 17 |
+
|
| 18 |
+
You operate in Phase 1 — research only. You have NO ability to place
|
| 19 |
+
orders, transfer funds, or modify any account. You may only call the
|
| 20 |
+
read-only research tools provided.
|
| 21 |
+
|
| 22 |
+
Your job is to produce well-sourced, dated research notes:
|
| 23 |
+
- Cite filings by accession number when relevant.
|
| 24 |
+
- Be explicit about staleness (13F is delayed by up to 45 days).
|
| 25 |
+
- When you don't know, say so. Never fabricate tickers or numbers.
|
| 26 |
+
- Distinguish "the manager held X" (fact) from "X is a good buy"
|
| 27 |
+
(opinion that needs justification).
|
| 28 |
+
|
| 29 |
+
When you finish, return a research note in markdown.
|
| 30 |
+
""".strip()
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def run_research_loop(
|
| 34 |
+
user_prompt: str,
|
| 35 |
+
*,
|
| 36 |
+
model: str = "claude-opus-4-7",
|
| 37 |
+
max_iterations: int = 8,
|
| 38 |
+
max_tokens: int = 4096,
|
| 39 |
+
) -> dict:
|
| 40 |
+
"""Run a single research-task loop and return the final transcript.
|
| 41 |
+
|
| 42 |
+
The function returns a dict with `final_text`, `messages` (full
|
| 43 |
+
transcript), and `tool_calls` (audit log). It does not stream.
|
| 44 |
+
"""
|
| 45 |
+
try:
|
| 46 |
+
import anthropic # type: ignore
|
| 47 |
+
except ImportError as e:
|
| 48 |
+
raise RuntimeError(
|
| 49 |
+
"anthropic SDK is required to run the agent loop; "
|
| 50 |
+
"`pip install anthropic` and set ANTHROPIC_API_KEY."
|
| 51 |
+
) from e
|
| 52 |
+
|
| 53 |
+
if not os.environ.get("ANTHROPIC_API_KEY"):
|
| 54 |
+
raise RuntimeError("ANTHROPIC_API_KEY is not set.")
|
| 55 |
+
|
| 56 |
+
client = anthropic.Anthropic()
|
| 57 |
+
messages: list[dict[str, Any]] = [{"role": "user", "content": user_prompt}]
|
| 58 |
+
tool_calls: list[dict[str, Any]] = []
|
| 59 |
+
|
| 60 |
+
for _ in range(max_iterations):
|
| 61 |
+
resp = client.messages.create(
|
| 62 |
+
model=model,
|
| 63 |
+
max_tokens=max_tokens,
|
| 64 |
+
system=SYSTEM_PROMPT,
|
| 65 |
+
tools=TOOLS,
|
| 66 |
+
messages=messages,
|
| 67 |
+
)
|
| 68 |
+
# Append assistant turn.
|
| 69 |
+
messages.append({"role": "assistant", "content": resp.content})
|
| 70 |
+
|
| 71 |
+
if resp.stop_reason != "tool_use":
|
| 72 |
+
text = "".join(b.text for b in resp.content if getattr(b, "type", None) == "text")
|
| 73 |
+
return {"final_text": text, "messages": messages, "tool_calls": tool_calls}
|
| 74 |
+
|
| 75 |
+
# Run every tool call in the assistant turn.
|
| 76 |
+
tool_results: list[dict[str, Any]] = []
|
| 77 |
+
for block in resp.content:
|
| 78 |
+
if getattr(block, "type", None) != "tool_use":
|
| 79 |
+
continue
|
| 80 |
+
tr = dispatch_tool(block.name, block.input)
|
| 81 |
+
tool_calls.append(
|
| 82 |
+
{"name": block.name, "input": block.input, "ok": tr.ok, "error": tr.error}
|
| 83 |
+
)
|
| 84 |
+
tool_results.append(
|
| 85 |
+
{
|
| 86 |
+
"type": "tool_result",
|
| 87 |
+
"tool_use_id": block.id,
|
| 88 |
+
"content": tr.to_message()["content"],
|
| 89 |
+
"is_error": tr.to_message()["is_error"],
|
| 90 |
+
}
|
| 91 |
+
)
|
| 92 |
+
messages.append({"role": "user", "content": tool_results})
|
| 93 |
+
|
| 94 |
+
return {
|
| 95 |
+
"final_text": "(max iterations reached)",
|
| 96 |
+
"messages": messages,
|
| 97 |
+
"tool_calls": tool_calls,
|
| 98 |
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tools the wofo Phase-1 agent can call.
|
| 2 |
+
|
| 3 |
+
Each tool is a pure-Python function with a JSON-Schema-style signature.
|
| 4 |
+
The signatures double as the Anthropic tool-use definitions when the
|
| 5 |
+
agent is wired to Claude.
|
| 6 |
+
|
| 7 |
+
**No tool in this module places orders, transfers funds, or modifies
|
| 8 |
+
any account.** Adding such a tool requires:
|
| 9 |
+
1. Counsel sign-off (see docs/family-office-counsel-packet.md).
|
| 10 |
+
2. Phase-2 promotion in docs/wofo-architecture.md.
|
| 11 |
+
3. Hard guardrails (per-trade caps, daily caps, kill switch).
|
| 12 |
+
"""
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import json
|
| 16 |
+
from dataclasses import dataclass
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Any, Callable
|
| 19 |
+
|
| 20 |
+
from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
REPO_ROOT = Path(__file__).resolve().parents[2]
|
| 24 |
+
RAW_DIR = REPO_ROOT / "wofo" / "data" / "13f" / "raw"
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class ToolResult:
|
| 29 |
+
name: str
|
| 30 |
+
ok: bool
|
| 31 |
+
content: Any
|
| 32 |
+
error: str | None = None
|
| 33 |
+
|
| 34 |
+
def to_message(self) -> dict:
|
| 35 |
+
return {
|
| 36 |
+
"type": "tool_result",
|
| 37 |
+
"content": json.dumps(self.content) if self.ok else self.error or "error",
|
| 38 |
+
"is_error": not self.ok,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# --- Tool implementations ---------------------------------------------------
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def t_list_local_filings(manager_dir: str = "") -> dict:
|
| 46 |
+
"""List 13F filing periods we have on disk."""
|
| 47 |
+
base = RAW_DIR if not manager_dir else (REPO_ROOT / manager_dir)
|
| 48 |
+
if not base.exists():
|
| 49 |
+
return {"periods": [], "base": str(base)}
|
| 50 |
+
periods = sorted(p.name for p in base.iterdir() if p.is_dir())
|
| 51 |
+
return {"periods": periods, "base": str(base)}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def t_summarize_panel() -> dict:
|
| 55 |
+
"""Build a panel from local 13F filings and return a summary."""
|
| 56 |
+
pairs = []
|
| 57 |
+
for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
|
| 58 |
+
meta = parse_primary_doc(q / "primary_doc.xml")
|
| 59 |
+
rows = parse_infotable(q / "infotable.xml")
|
| 60 |
+
pairs.append((meta, rows))
|
| 61 |
+
panel = build_panel(pairs)
|
| 62 |
+
conc = concentration(panel)
|
| 63 |
+
return {
|
| 64 |
+
"manager": pairs[-1][0].manager_name if pairs else None,
|
| 65 |
+
"cik": pairs[-1][0].cik if pairs else None,
|
| 66 |
+
"periods": panel["periods"],
|
| 67 |
+
"concentration": conc,
|
| 68 |
+
"totals": panel["totals"],
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def t_top_holdings(period: str, n: int = 10) -> dict:
|
| 73 |
+
"""Top N holdings for a given period."""
|
| 74 |
+
pairs = []
|
| 75 |
+
for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
|
| 76 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 77 |
+
panel = build_panel(pairs)
|
| 78 |
+
if period not in panel["periods"]:
|
| 79 |
+
raise ValueError(f"unknown period {period}; have {panel['periods']}")
|
| 80 |
+
rows = [
|
| 81 |
+
{"cusip": c, "issuer": panel["issuers"].get(c, ""), "value_usd": r["value_usd"], "shares": r["shares"]}
|
| 82 |
+
for (p, c), r in panel["rows"].items() if p == period
|
| 83 |
+
]
|
| 84 |
+
rows.sort(key=lambda r: r["value_usd"], reverse=True)
|
| 85 |
+
return {"period": period, "holdings": rows[:n]}
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def t_qoq_activity(period: str) -> dict:
|
| 89 |
+
"""Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period."""
|
| 90 |
+
pairs = []
|
| 91 |
+
for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
|
| 92 |
+
pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
|
| 93 |
+
panel = build_panel(pairs)
|
| 94 |
+
deltas = qoq_changes(panel)
|
| 95 |
+
return {"period": period, "rows": [d for d in deltas if d["period"] == period]}
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
# --- Registry ---------------------------------------------------------------
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def _schema(name: str, description: str, props: dict, required: list[str]) -> dict:
|
| 102 |
+
return {
|
| 103 |
+
"name": name,
|
| 104 |
+
"description": description,
|
| 105 |
+
"input_schema": {"type": "object", "properties": props, "required": required},
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
TOOLS: list[dict] = [
|
| 110 |
+
_schema(
|
| 111 |
+
"list_local_filings",
|
| 112 |
+
"List 13F filing periods available on local disk.",
|
| 113 |
+
{"manager_dir": {"type": "string", "description": "optional path; default = the SA LP raw dir"}},
|
| 114 |
+
[],
|
| 115 |
+
),
|
| 116 |
+
_schema(
|
| 117 |
+
"summarize_panel",
|
| 118 |
+
"Summarize the panel of 13F filings on disk: manager, periods, totals, concentration.",
|
| 119 |
+
{},
|
| 120 |
+
[],
|
| 121 |
+
),
|
| 122 |
+
_schema(
|
| 123 |
+
"top_holdings",
|
| 124 |
+
"Return the top-N holdings (by reported value) for a given period (YYYY-MM-DD).",
|
| 125 |
+
{
|
| 126 |
+
"period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"},
|
| 127 |
+
"n": {"type": "integer", "description": "How many top holdings to return", "default": 10},
|
| 128 |
+
},
|
| 129 |
+
["period"],
|
| 130 |
+
),
|
| 131 |
+
_schema(
|
| 132 |
+
"qoq_activity",
|
| 133 |
+
"Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period.",
|
| 134 |
+
{"period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"}},
|
| 135 |
+
["period"],
|
| 136 |
+
),
|
| 137 |
+
]
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
_DISPATCH: dict[str, Callable[..., dict]] = {
|
| 141 |
+
"list_local_filings": t_list_local_filings,
|
| 142 |
+
"summarize_panel": t_summarize_panel,
|
| 143 |
+
"top_holdings": t_top_holdings,
|
| 144 |
+
"qoq_activity": t_qoq_activity,
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def dispatch_tool(name: str, args: dict | None) -> ToolResult:
|
| 149 |
+
fn = _DISPATCH.get(name)
|
| 150 |
+
if fn is None:
|
| 151 |
+
return ToolResult(name=name, ok=False, content=None, error=f"unknown tool: {name}")
|
| 152 |
+
try:
|
| 153 |
+
result = fn(**(args or {}))
|
| 154 |
+
return ToolResult(name=name, ok=True, content=result)
|
| 155 |
+
except Exception as e: # surface errors to the model so it can recover
|
| 156 |
+
return ToolResult(name=name, ok=False, content=None, error=f"{type(e).__name__}: {e}")
|
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Minimal portfolio backtester.
|
| 2 |
+
|
| 3 |
+
Takes a `TargetWeights` series and a `PriceSource`, simulates a
|
| 4 |
+
periodically rebalanced long-only portfolio, and reports daily NAV +
|
| 5 |
+
summary metrics. Intentionally simple — fancier features (transaction
|
| 6 |
+
costs beyond a flat bps, slippage models, partial fills) belong in a
|
| 7 |
+
dedicated backtest engine; see `docs/repos.md`.
|
| 8 |
+
"""
|
| 9 |
+
from .portfolio import run_backtest, BacktestResult
|
| 10 |
+
from .metrics import summary, sharpe, max_drawdown, cagr
|
| 11 |
+
|
| 12 |
+
__all__ = ["run_backtest", "BacktestResult", "summary", "sharpe", "max_drawdown", "cagr"]
|
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Backtest summary metrics.
|
| 2 |
+
|
| 3 |
+
No numpy dependency. The math is straightforward and the input series
|
| 4 |
+
are short (daily bars over a few years).
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import math
|
| 9 |
+
from datetime import date
|
| 10 |
+
from typing import Sequence
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def _returns(nav: Sequence[float]) -> list[float]:
|
| 14 |
+
rets: list[float] = []
|
| 15 |
+
for i in range(1, len(nav)):
|
| 16 |
+
prev = nav[i - 1]
|
| 17 |
+
rets.append((nav[i] - prev) / prev if prev else 0.0)
|
| 18 |
+
return rets
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def cagr(dates: Sequence[date], nav: Sequence[float]) -> float:
|
| 22 |
+
if len(nav) < 2 or nav[0] <= 0:
|
| 23 |
+
return 0.0
|
| 24 |
+
years = max((dates[-1] - dates[0]).days / 365.25, 1e-9)
|
| 25 |
+
return (nav[-1] / nav[0]) ** (1 / years) - 1
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def sharpe(nav: Sequence[float], *, rf_annual: float = 0.0, periods_per_year: int = 252) -> float:
|
| 29 |
+
rets = _returns(nav)
|
| 30 |
+
if len(rets) < 2:
|
| 31 |
+
return 0.0
|
| 32 |
+
rf_per = rf_annual / periods_per_year
|
| 33 |
+
excess = [r - rf_per for r in rets]
|
| 34 |
+
mean = sum(excess) / len(excess)
|
| 35 |
+
var = sum((r - mean) ** 2 for r in excess) / (len(excess) - 1)
|
| 36 |
+
sd = math.sqrt(var)
|
| 37 |
+
if sd == 0:
|
| 38 |
+
return 0.0
|
| 39 |
+
return (mean / sd) * math.sqrt(periods_per_year)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def max_drawdown(nav: Sequence[float]) -> float:
|
| 43 |
+
"""Returns max drawdown as a positive fraction (e.g. 0.25 == 25% peak-to-trough)."""
|
| 44 |
+
peak = -math.inf
|
| 45 |
+
mdd = 0.0
|
| 46 |
+
for v in nav:
|
| 47 |
+
if v > peak:
|
| 48 |
+
peak = v
|
| 49 |
+
if peak > 0:
|
| 50 |
+
dd = (peak - v) / peak
|
| 51 |
+
if dd > mdd:
|
| 52 |
+
mdd = dd
|
| 53 |
+
return mdd
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def summary(dates: Sequence[date], nav: Sequence[float]) -> dict:
|
| 57 |
+
return {
|
| 58 |
+
"start_date": dates[0].isoformat() if dates else None,
|
| 59 |
+
"end_date": dates[-1].isoformat() if dates else None,
|
| 60 |
+
"n_days": len(dates),
|
| 61 |
+
"start_nav": nav[0] if nav else 0.0,
|
| 62 |
+
"end_nav": nav[-1] if nav else 0.0,
|
| 63 |
+
"total_return": (nav[-1] / nav[0] - 1) if (nav and nav[0]) else 0.0,
|
| 64 |
+
"cagr": cagr(dates, nav),
|
| 65 |
+
"sharpe": sharpe(nav),
|
| 66 |
+
"max_drawdown": max_drawdown(nav),
|
| 67 |
+
}
|
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Portfolio backtester: target weights + price source -> daily NAV."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from dataclasses import dataclass, field
|
| 5 |
+
from datetime import date, timedelta
|
| 6 |
+
from typing import Sequence
|
| 7 |
+
|
| 8 |
+
from wofo.prices import PriceSource, NotFound
|
| 9 |
+
from wofo.research import TargetWeights, Snapshot
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class BacktestResult:
|
| 14 |
+
dates: list[date]
|
| 15 |
+
nav: list[float] # portfolio value indexed to start_cash
|
| 16 |
+
weights_history: list[dict[str, float]] # per-day actual weights
|
| 17 |
+
rebalance_dates: list[date]
|
| 18 |
+
skipped_tickers: dict[date, list[str]] = field(default_factory=dict)
|
| 19 |
+
cash_share: list[float] = field(default_factory=list)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def _build_price_panel(
|
| 23 |
+
tickers: set[str],
|
| 24 |
+
start: date,
|
| 25 |
+
end: date,
|
| 26 |
+
source: PriceSource,
|
| 27 |
+
) -> tuple[dict[date, dict[str, float]], dict[str, list]]:
|
| 28 |
+
"""Pull daily closes for each ticker; return {date: {ticker: close}} aligned."""
|
| 29 |
+
raw: dict[str, dict[date, float]] = {}
|
| 30 |
+
missing: list[str] = []
|
| 31 |
+
for t in sorted(tickers):
|
| 32 |
+
try:
|
| 33 |
+
bars = source.daily(t, start, end)
|
| 34 |
+
except NotFound:
|
| 35 |
+
missing.append(t)
|
| 36 |
+
continue
|
| 37 |
+
raw[t] = {b.d: b.close for b in bars}
|
| 38 |
+
# Trading-day axis = union of dates seen in any series.
|
| 39 |
+
all_dates = sorted({d for series in raw.values() for d in series})
|
| 40 |
+
panel: dict[date, dict[str, float]] = {}
|
| 41 |
+
for d in all_dates:
|
| 42 |
+
panel[d] = {}
|
| 43 |
+
for t, series in raw.items():
|
| 44 |
+
if d in series:
|
| 45 |
+
panel[d][t] = series[d]
|
| 46 |
+
return panel, {"missing_tickers": missing}
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _active_snapshot(snapshots: Sequence[Snapshot], d: date) -> Snapshot | None:
|
| 50 |
+
"""Most recent snapshot whose effective_date <= d."""
|
| 51 |
+
active: Snapshot | None = None
|
| 52 |
+
for s in snapshots:
|
| 53 |
+
if s.effective_date <= d:
|
| 54 |
+
active = s
|
| 55 |
+
else:
|
| 56 |
+
break
|
| 57 |
+
return active
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def run_backtest(
|
| 61 |
+
target_weights: TargetWeights,
|
| 62 |
+
price_source: PriceSource,
|
| 63 |
+
*,
|
| 64 |
+
start_cash: float = 1_000_000.0,
|
| 65 |
+
rebalance_threshold_bps: float = 50.0, # rebalance if any weight drifts > this many bps from target
|
| 66 |
+
cost_bps: float = 5.0, # round-trip-ish slippage+commission per side
|
| 67 |
+
end_date: date | None = None,
|
| 68 |
+
) -> BacktestResult:
|
| 69 |
+
"""Run the backtest.
|
| 70 |
+
|
| 71 |
+
Behavior:
|
| 72 |
+
- Start with `start_cash` in cash on the first effective_date.
|
| 73 |
+
- On each new snapshot's effective_date, mark the new target.
|
| 74 |
+
- Between snapshots, rebalance only if any target ticker drifts beyond
|
| 75 |
+
the threshold from its target.
|
| 76 |
+
- Tickers in the target that the price source doesn't know are
|
| 77 |
+
*skipped*; their target weight is reallocated to cash for that
|
| 78 |
+
snapshot.
|
| 79 |
+
"""
|
| 80 |
+
if not target_weights.snapshots:
|
| 81 |
+
raise ValueError("no snapshots")
|
| 82 |
+
|
| 83 |
+
snapshots = sorted(target_weights.snapshots, key=lambda s: s.effective_date)
|
| 84 |
+
start = snapshots[0].effective_date
|
| 85 |
+
end = end_date or (snapshots[-1].effective_date + timedelta(days=365))
|
| 86 |
+
all_tickers = {t for s in snapshots for t in s.weights}
|
| 87 |
+
|
| 88 |
+
panel, meta = _build_price_panel(all_tickers, start, end, price_source)
|
| 89 |
+
missing = set(meta["missing_tickers"])
|
| 90 |
+
|
| 91 |
+
if not panel:
|
| 92 |
+
raise RuntimeError("price source returned no data for any ticker")
|
| 93 |
+
|
| 94 |
+
cash = start_cash
|
| 95 |
+
holdings: dict[str, float] = {} # ticker -> shares
|
| 96 |
+
out = BacktestResult(dates=[], nav=[], weights_history=[], rebalance_dates=[])
|
| 97 |
+
|
| 98 |
+
last_target_id: int | None = None
|
| 99 |
+
for d in sorted(panel):
|
| 100 |
+
prices = panel[d]
|
| 101 |
+
# Mark to market
|
| 102 |
+
nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
|
| 103 |
+
|
| 104 |
+
snap = _active_snapshot(snapshots, d)
|
| 105 |
+
if snap is None:
|
| 106 |
+
out.dates.append(d); out.nav.append(nav_val); out.weights_history.append({}); out.cash_share.append(1.0)
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
# Filter snapshot to tickers we have *some* prices for and a price today.
|
| 110 |
+
tradable_targets = {t: w for t, w in snap.weights.items() if t in panel.get(d, {}) and t not in missing}
|
| 111 |
+
skipped_today = sorted(set(snap.weights) - set(tradable_targets))
|
| 112 |
+
if skipped_today:
|
| 113 |
+
out.skipped_tickers[d] = skipped_today
|
| 114 |
+
|
| 115 |
+
# Renormalize after dropping unknown / no-price-today tickers; balance to cash.
|
| 116 |
+
target_norm_total = sum(tradable_targets.values()) or 0
|
| 117 |
+
# Note: we do NOT renormalize to 1; missing weight stays in cash.
|
| 118 |
+
|
| 119 |
+
snap_id = id(snap)
|
| 120 |
+
is_new_snapshot = snap_id != last_target_id
|
| 121 |
+
last_target_id = snap_id
|
| 122 |
+
|
| 123 |
+
# Compute current weights (excluding cash).
|
| 124 |
+
current_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
|
| 125 |
+
max_drift_bps = max(
|
| 126 |
+
(abs(current_w.get(t, 0.0) - tradable_targets.get(t, 0.0)) for t in set(current_w) | set(tradable_targets)),
|
| 127 |
+
default=0.0,
|
| 128 |
+
) * 10_000
|
| 129 |
+
|
| 130 |
+
should_rebalance = is_new_snapshot or max_drift_bps > rebalance_threshold_bps
|
| 131 |
+
if should_rebalance:
|
| 132 |
+
# Liquidate everything to cash, then buy targets.
|
| 133 |
+
for t, sh in holdings.items():
|
| 134 |
+
px = prices.get(t, _last_close(panel, t, d))
|
| 135 |
+
cash += sh * px
|
| 136 |
+
cash -= abs(sh * px) * (cost_bps / 10_000)
|
| 137 |
+
holdings = {}
|
| 138 |
+
for t, w in tradable_targets.items():
|
| 139 |
+
px = prices[t]
|
| 140 |
+
target_dollars = nav_val * w
|
| 141 |
+
shares = target_dollars / px
|
| 142 |
+
cost = abs(target_dollars) * (cost_bps / 10_000)
|
| 143 |
+
holdings[t] = shares
|
| 144 |
+
cash -= target_dollars + cost
|
| 145 |
+
out.rebalance_dates.append(d)
|
| 146 |
+
# Recompute NAV after costs.
|
| 147 |
+
nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
|
| 148 |
+
|
| 149 |
+
actual_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
|
| 150 |
+
cash_w = cash / nav_val if nav_val else 1.0
|
| 151 |
+
out.dates.append(d)
|
| 152 |
+
out.nav.append(nav_val)
|
| 153 |
+
out.weights_history.append(actual_w)
|
| 154 |
+
out.cash_share.append(cash_w)
|
| 155 |
+
|
| 156 |
+
return out
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def _last_close(panel: dict[date, dict[str, float]], ticker: str, d: date) -> float:
|
| 160 |
+
"""Most-recent close on or before `d`. 0 if never seen (fully out)."""
|
| 161 |
+
cur = d
|
| 162 |
+
for _ in range(10): # short walk-back; enough for weekends/holidays
|
| 163 |
+
cur -= timedelta(days=1)
|
| 164 |
+
if cur in panel and ticker in panel[cur]:
|
| 165 |
+
return panel[cur][ticker]
|
| 166 |
+
return 0.0
|
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pluggable price-data sources.
|
| 2 |
+
|
| 3 |
+
The agent must work with multiple data vendors over time. Calling code
|
| 4 |
+
depends on the `PriceSource` protocol; concrete adapters live alongside.
|
| 5 |
+
|
| 6 |
+
Built-in adapters:
|
| 7 |
+
- `synthetic.SyntheticPriceSource` — deterministic random walk; for tests.
|
| 8 |
+
- `stooq.StooqPriceSource` — free daily CSV from stooq.com; for prototyping.
|
| 9 |
+
|
| 10 |
+
Production deployments should swap in a paid feed (Polygon, Tiingo, etc.)
|
| 11 |
+
behind the same protocol.
|
| 12 |
+
"""
|
| 13 |
+
from .source import PriceSource, PriceBar, NotFound
|
| 14 |
+
from .synthetic import SyntheticPriceSource
|
| 15 |
+
|
| 16 |
+
__all__ = ["PriceSource", "PriceBar", "NotFound", "SyntheticPriceSource"]
|
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Price-source protocol shared by all adapters."""
|
| 2 |
+
from __future__ import annotations
|
| 3 |
+
|
| 4 |
+
from dataclasses import dataclass
|
| 5 |
+
from datetime import date
|
| 6 |
+
from typing import Protocol, runtime_checkable
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class NotFound(LookupError):
|
| 10 |
+
"""Raised when a ticker is unknown to the source."""
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass(frozen=True)
|
| 14 |
+
class PriceBar:
|
| 15 |
+
d: date
|
| 16 |
+
open: float
|
| 17 |
+
high: float
|
| 18 |
+
low: float
|
| 19 |
+
close: float
|
| 20 |
+
volume: int
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
@runtime_checkable
|
| 24 |
+
class PriceSource(Protocol):
|
| 25 |
+
"""Daily-bar price source.
|
| 26 |
+
|
| 27 |
+
Implementations must be deterministic for a given (ticker, start, end).
|
| 28 |
+
They should raise `NotFound` (not return empty) for unknown tickers so
|
| 29 |
+
callers can distinguish "no data" from "delisted on this date."
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
|
| 33 |
+
...
|
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Stooq.com daily-CSV adapter.
|
| 2 |
+
|
| 3 |
+
Free, no API key. Reasonable for prototyping; not appropriate for
|
| 4 |
+
production trading. Stooq's coverage and corporate-action handling are
|
| 5 |
+
not as clean as paid feeds.
|
| 6 |
+
|
| 7 |
+
URL pattern:
|
| 8 |
+
https://stooq.com/q/d/l/?s={symbol}&i=d&d1=YYYYMMDD&d2=YYYYMMDD
|
| 9 |
+
|
| 10 |
+
US tickers need a `.us` suffix on stooq.
|
| 11 |
+
"""
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import csv
|
| 15 |
+
import io
|
| 16 |
+
import os
|
| 17 |
+
import time
|
| 18 |
+
from datetime import date, datetime
|
| 19 |
+
from urllib.error import HTTPError, URLError
|
| 20 |
+
from urllib.request import Request, urlopen
|
| 21 |
+
|
| 22 |
+
from .source import NotFound, PriceBar
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class StooqPriceSource:
|
| 26 |
+
BASE = "https://stooq.com/q/d/l/"
|
| 27 |
+
|
| 28 |
+
def __init__(self, *, suffix: str = ".us", request_interval_s: float = 0.25, timeout_s: float = 30.0):
|
| 29 |
+
self.suffix = suffix
|
| 30 |
+
self.request_interval_s = request_interval_s
|
| 31 |
+
self.timeout_s = timeout_s
|
| 32 |
+
self._last_request: float = 0.0
|
| 33 |
+
|
| 34 |
+
def _pace(self) -> None:
|
| 35 |
+
elapsed = time.monotonic() - self._last_request
|
| 36 |
+
if elapsed < self.request_interval_s:
|
| 37 |
+
time.sleep(self.request_interval_s - elapsed)
|
| 38 |
+
self._last_request = time.monotonic()
|
| 39 |
+
|
| 40 |
+
def _fetch(self, url: str) -> str:
|
| 41 |
+
self._pace()
|
| 42 |
+
ua = os.environ.get("WOFO_HTTP_UA", "wofo-research/0.1")
|
| 43 |
+
req = Request(url, headers={"User-Agent": ua})
|
| 44 |
+
try:
|
| 45 |
+
with urlopen(req, timeout=self.timeout_s) as resp:
|
| 46 |
+
return resp.read().decode("utf-8", errors="replace")
|
| 47 |
+
except HTTPError as e:
|
| 48 |
+
if e.code == 404:
|
| 49 |
+
raise NotFound(url) from e
|
| 50 |
+
raise
|
| 51 |
+
except URLError as e:
|
| 52 |
+
raise RuntimeError(f"stooq fetch failed: {e}") from e
|
| 53 |
+
|
| 54 |
+
def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
|
| 55 |
+
symbol = ticker.lower()
|
| 56 |
+
if "." not in symbol:
|
| 57 |
+
symbol += self.suffix
|
| 58 |
+
url = (
|
| 59 |
+
f"{self.BASE}?s={symbol}&i=d"
|
| 60 |
+
f"&d1={start.strftime('%Y%m%d')}&d2={end.strftime('%Y%m%d')}"
|
| 61 |
+
)
|
| 62 |
+
body = self._fetch(url)
|
| 63 |
+
# Stooq returns "No data" or empty for unknown / out-of-range queries.
|
| 64 |
+
if not body or body.startswith("No data") or "Date,Open" not in body:
|
| 65 |
+
raise NotFound(f"stooq: no data for {ticker} {start}..{end}")
|
| 66 |
+
bars: list[PriceBar] = []
|
| 67 |
+
reader = csv.DictReader(io.StringIO(body))
|
| 68 |
+
for row in reader:
|
| 69 |
+
try:
|
| 70 |
+
bars.append(
|
| 71 |
+
PriceBar(
|
| 72 |
+
d=datetime.strptime(row["Date"], "%Y-%m-%d").date(),
|
| 73 |
+
open=float(row["Open"]),
|
| 74 |
+
high=float(row["High"]),
|
| 75 |
+
low=float(row["Low"]),
|
| 76 |
+
close=float(row["Close"]),
|
| 77 |
+
volume=int(float(row.get("Volume") or 0)),
|
| 78 |
+
)
|
| 79 |
+
)
|
| 80 |
+
except (ValueError, KeyError):
|
| 81 |
+
continue
|
| 82 |
+
return bars
|
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Deterministic synthetic price source for tests / offline development.
|
| 2 |
+
|
| 3 |
+
The price path is a seeded random walk. Same (ticker, start, end) always
|
| 4 |
+
returns the same series, which lets backtests be reproducible without
|
| 5 |
+
network access.
|
| 6 |
+
"""
|
| 7 |
+
from __future__ import annotations
|
| 8 |
+
|
| 9 |
+
import hashlib
|
| 10 |
+
import random
|
| 11 |
+
from datetime import date, timedelta
|
| 12 |
+
|
| 13 |
+
from .source import PriceBar, PriceSource
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class SyntheticPriceSource:
|
| 17 |
+
"""Random-walk prices with per-ticker seeded reproducibility."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, *, drift: float = 0.0003, vol: float = 0.02, start_price: float = 100.0):
|
| 20 |
+
self.drift = drift
|
| 21 |
+
self.vol = vol
|
| 22 |
+
self.start_price = start_price
|
| 23 |
+
|
| 24 |
+
def _seed(self, ticker: str) -> int:
|
| 25 |
+
return int(hashlib.sha256(ticker.encode()).hexdigest()[:12], 16)
|
| 26 |
+
|
| 27 |
+
def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
|
| 28 |
+
if end < start:
|
| 29 |
+
raise ValueError("end < start")
|
| 30 |
+
rng = random.Random(self._seed(ticker))
|
| 31 |
+
bars: list[PriceBar] = []
|
| 32 |
+
price = self.start_price
|
| 33 |
+
d = start
|
| 34 |
+
while d <= end:
|
| 35 |
+
# Skip weekends to mimic NYSE calendar (rough — does not skip holidays).
|
| 36 |
+
if d.weekday() < 5:
|
| 37 |
+
shock = rng.gauss(self.drift, self.vol)
|
| 38 |
+
price = max(0.01, price * (1 + shock))
|
| 39 |
+
hi = price * (1 + abs(rng.gauss(0, self.vol / 4)))
|
| 40 |
+
lo = price * (1 - abs(rng.gauss(0, self.vol / 4)))
|
| 41 |
+
op = lo + (hi - lo) * rng.random()
|
| 42 |
+
bars.append(
|
| 43 |
+
PriceBar(
|
| 44 |
+
d=d,
|
| 45 |
+
open=round(op, 4),
|
| 46 |
+
high=round(hi, 4),
|
| 47 |
+
low=round(lo, 4),
|
| 48 |
+
close=round(price, 4),
|
| 49 |
+
volume=rng.randint(100_000, 10_000_000),
|
| 50 |
+
)
|
| 51 |
+
)
|
| 52 |
+
d += timedelta(days=1)
|
| 53 |
+
return bars
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# Self-check that this satisfies the protocol.
|
| 57 |
+
_: PriceSource = SyntheticPriceSource()
|
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Research strategy modules.
|
| 2 |
+
|
| 3 |
+
Strategies turn parsed filing/data panels into dated, provenance-stamped
|
| 4 |
+
target-weight series that backtest and paper-trade modules consume.
|
| 5 |
+
"""
|
| 6 |
+
from .follow_the_filer import follow_the_filer, TargetWeights, Snapshot
|
| 7 |
+
from .issuer_map import resolve_tickers, IssuerOverride
|
| 8 |
+
|
| 9 |
+
__all__ = [
|
| 10 |
+
"follow_the_filer",
|
| 11 |
+
"TargetWeights",
|
| 12 |
+
"Snapshot",
|
| 13 |
+
"resolve_tickers",
|
| 14 |
+
"IssuerOverride",
|
| 15 |
+
]
|
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Follow-the-filer strategy: mirror a 13F manager's reported long book.
|
| 2 |
+
|
| 3 |
+
Important caveats — read before using:
|
| 4 |
+
|
| 5 |
+
1. **Stale by construction.** 13F filings are due 45 days after quarter
|
| 6 |
+
end. The earliest a follower can act on a filing is the filing date,
|
| 7 |
+
not the period-of-report date. We use `effective_date = file_date`
|
| 8 |
+
for that reason. Backtests using `period_of_report` as the trade
|
| 9 |
+
date are look-ahead-biased.
|
| 10 |
+
2. **Long-only.** 13Fs report long positions in 13F-eligible securities
|
| 11 |
+
only. The manager's true exposure (shorts, swaps, non-US, cash,
|
| 12 |
+
options on non-13F names) is invisible.
|
| 13 |
+
3. **Cap-structure ambiguity.** Same issuer can appear under different
|
| 14 |
+
CUSIPs (common vs. converts). We aggregate to issuer level when the
|
| 15 |
+
caller requests it; otherwise CUSIP-level weights are reported.
|
| 16 |
+
4. **Survivorship/relisting.** Tickers can change. The mapping from
|
| 17 |
+
13F issuer -> ticker uses SEC's free file and is heuristic.
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
from dataclasses import dataclass
|
| 22 |
+
from datetime import date, datetime
|
| 23 |
+
from typing import Iterable
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass(frozen=True)
|
| 27 |
+
class Snapshot:
|
| 28 |
+
"""One target-weight snapshot active starting on `effective_date`."""
|
| 29 |
+
|
| 30 |
+
effective_date: date # date the follower can first act on this filing
|
| 31 |
+
period_of_report: date # what the manager held at this date
|
| 32 |
+
weights: dict[str, float] # ticker -> weight (sums ~= 1 over mapped names)
|
| 33 |
+
unmapped_value_share: float # share of the manager's reported value
|
| 34 |
+
# whose CUSIPs we couldn't map to a ticker
|
| 35 |
+
provenance: dict # filing refs, source, run timestamp
|
| 36 |
+
|
| 37 |
+
def __post_init__(self):
|
| 38 |
+
total = sum(self.weights.values())
|
| 39 |
+
if self.weights and not (0.99 <= total + self.unmapped_value_share <= 1.01):
|
| 40 |
+
raise ValueError(
|
| 41 |
+
f"weights+unmapped should sum to 1; got {total + self.unmapped_value_share}"
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
@dataclass(frozen=True)
|
| 46 |
+
class TargetWeights:
|
| 47 |
+
"""Time series of target-weight snapshots for one strategy run."""
|
| 48 |
+
|
| 49 |
+
manager_cik: str
|
| 50 |
+
manager_name: str
|
| 51 |
+
snapshots: list[Snapshot]
|
| 52 |
+
|
| 53 |
+
def as_dict(self) -> dict:
|
| 54 |
+
return {
|
| 55 |
+
"manager_cik": self.manager_cik,
|
| 56 |
+
"manager_name": self.manager_name,
|
| 57 |
+
"snapshots": [
|
| 58 |
+
{
|
| 59 |
+
"effective_date": s.effective_date.isoformat(),
|
| 60 |
+
"period_of_report": s.period_of_report.isoformat(),
|
| 61 |
+
"weights": s.weights,
|
| 62 |
+
"unmapped_value_share": s.unmapped_value_share,
|
| 63 |
+
"provenance": s.provenance,
|
| 64 |
+
}
|
| 65 |
+
for s in self.snapshots
|
| 66 |
+
],
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def follow_the_filer(
|
| 71 |
+
panel: dict,
|
| 72 |
+
filing_refs: dict, # period_iso -> {cik, accession, file_date, form}
|
| 73 |
+
cusip_to_ticker: dict[str, str | None],
|
| 74 |
+
*,
|
| 75 |
+
manager_cik: str,
|
| 76 |
+
manager_name: str,
|
| 77 |
+
top_n: int | None = None,
|
| 78 |
+
run_id: str | None = None,
|
| 79 |
+
) -> TargetWeights:
|
| 80 |
+
"""Build target-weight snapshots from a 13F panel.
|
| 81 |
+
|
| 82 |
+
Args:
|
| 83 |
+
panel: output of `wofo.thirteenf.analyze.build_panel`.
|
| 84 |
+
filing_refs: per-period filing metadata. Must include `file_date`
|
| 85 |
+
(YYYY-MM-DD) so we can set `effective_date` correctly.
|
| 86 |
+
cusip_to_ticker: mapping from `wofo.research.resolve_tickers`.
|
| 87 |
+
top_n: if set, keep only the largest N positions per snapshot.
|
| 88 |
+
run_id: optional tag for provenance (e.g. a git SHA).
|
| 89 |
+
|
| 90 |
+
Weights are normalized over MAPPED positions only; the share of
|
| 91 |
+
reported value that came from unmapped CUSIPs is reported as
|
| 92 |
+
`unmapped_value_share` so callers can decide how to handle it
|
| 93 |
+
(skip the snapshot, hold cash, etc.).
|
| 94 |
+
"""
|
| 95 |
+
snapshots: list[Snapshot] = []
|
| 96 |
+
run_ts = datetime.utcnow().isoformat(timespec="seconds") + "Z"
|
| 97 |
+
for period in panel["periods"]:
|
| 98 |
+
ref = filing_refs.get(period)
|
| 99 |
+
if not ref or "file_date" not in ref:
|
| 100 |
+
raise ValueError(f"missing file_date for period {period}")
|
| 101 |
+
effective = _parse_iso(ref["file_date"])
|
| 102 |
+
report = _parse_iso(period)
|
| 103 |
+
|
| 104 |
+
period_rows = [
|
| 105 |
+
(cusip, r["value_usd"]) for (p, cusip), r in panel["rows"].items() if p == period
|
| 106 |
+
]
|
| 107 |
+
period_rows.sort(key=lambda x: x[1], reverse=True)
|
| 108 |
+
if top_n:
|
| 109 |
+
period_rows = period_rows[:top_n]
|
| 110 |
+
|
| 111 |
+
total = sum(v for _, v in period_rows) or 1
|
| 112 |
+
mapped_value = 0
|
| 113 |
+
weights: dict[str, float] = {}
|
| 114 |
+
for cusip, val in period_rows:
|
| 115 |
+
ticker = cusip_to_ticker.get(cusip)
|
| 116 |
+
if not ticker:
|
| 117 |
+
continue
|
| 118 |
+
mapped_value += val
|
| 119 |
+
weights[ticker] = weights.get(ticker, 0.0) + val / total
|
| 120 |
+
|
| 121 |
+
unmapped_share = (total - mapped_value) / total if total else 0.0
|
| 122 |
+
snapshots.append(
|
| 123 |
+
Snapshot(
|
| 124 |
+
effective_date=effective,
|
| 125 |
+
period_of_report=report,
|
| 126 |
+
weights=weights,
|
| 127 |
+
unmapped_value_share=unmapped_share,
|
| 128 |
+
provenance={
|
| 129 |
+
"manager_cik": manager_cik,
|
| 130 |
+
"filing_ref": ref,
|
| 131 |
+
"run_id": run_id,
|
| 132 |
+
"run_ts_utc": run_ts,
|
| 133 |
+
},
|
| 134 |
+
)
|
| 135 |
+
)
|
| 136 |
+
|
| 137 |
+
snapshots.sort(key=lambda s: s.effective_date)
|
| 138 |
+
return TargetWeights(manager_cik=manager_cik, manager_name=manager_name, snapshots=snapshots)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def load_filing_refs(raw_dir) -> dict:
|
| 142 |
+
"""Read filing_ref.json files written by `wofo.thirteenf.fetch.fetch_filing`.
|
| 143 |
+
|
| 144 |
+
Falls back to parsing primary_doc.xml if filing_ref.json is missing
|
| 145 |
+
(e.g., for filings downloaded by hand before fetch_filing existed).
|
| 146 |
+
"""
|
| 147 |
+
import json
|
| 148 |
+
from pathlib import Path
|
| 149 |
+
from wofo.thirteenf.parse import parse_primary_doc
|
| 150 |
+
|
| 151 |
+
raw_dir = Path(raw_dir)
|
| 152 |
+
refs: dict = {}
|
| 153 |
+
for q in sorted(raw_dir.iterdir()):
|
| 154 |
+
if not q.is_dir():
|
| 155 |
+
continue
|
| 156 |
+
ref_path = q / "filing_ref.json"
|
| 157 |
+
if ref_path.exists():
|
| 158 |
+
d = json.loads(ref_path.read_text())
|
| 159 |
+
period_iso = _quarter_to_iso(q.name)
|
| 160 |
+
refs[period_iso] = d
|
| 161 |
+
else:
|
| 162 |
+
# Synthesize from primary_doc.xml. file_date is unknown here, so
|
| 163 |
+
# we approximate it as period + 45 days (the regulatory deadline).
|
| 164 |
+
# Callers should re-pull via fetch_filing for accurate file_date.
|
| 165 |
+
meta = parse_primary_doc(q / "primary_doc.xml")
|
| 166 |
+
period = _parse_iso(meta.period_iso)
|
| 167 |
+
from datetime import timedelta
|
| 168 |
+
approx_file = period + timedelta(days=45)
|
| 169 |
+
refs[meta.period_iso] = {
|
| 170 |
+
"cik": meta.cik,
|
| 171 |
+
"accession": None,
|
| 172 |
+
"period_ending": meta.period_iso,
|
| 173 |
+
"file_date": approx_file.isoformat(),
|
| 174 |
+
"form": meta.report_type,
|
| 175 |
+
"approximate_file_date": True,
|
| 176 |
+
}
|
| 177 |
+
return refs
|
| 178 |
+
|
| 179 |
+
|
| 180 |
+
def _parse_iso(s: str) -> date:
|
| 181 |
+
return datetime.strptime(s, "%Y-%m-%d").date()
|
| 182 |
+
|
| 183 |
+
|
| 184 |
+
def _quarter_to_iso(label: str) -> str:
|
| 185 |
+
# "2024Q4" -> "2024-12-31"
|
| 186 |
+
y = label[:4]
|
| 187 |
+
q = label[-1]
|
| 188 |
+
end = {"1": "03-31", "2": "06-30", "3": "09-30", "4": "12-31"}[q]
|
| 189 |
+
return f"{y}-{end}"
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def _iter_holdings(holdings: Iterable) -> Iterable: # pragma: no cover - placeholder
|
| 193 |
+
return holdings
|
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Best-effort 13F issuer-name -> ticker mapping.
|
| 2 |
+
|
| 3 |
+
CUSIP -> ticker is a licensed mapping (CGS) and we will not embed one.
|
| 4 |
+
What we can do, for free:
|
| 5 |
+
|
| 6 |
+
1. Pull SEC's `company_tickers.json` (CIK <-> ticker, public).
|
| 7 |
+
2. Normalize 13F `nameOfIssuer` strings and fuzzy-match against company
|
| 8 |
+
names from that file.
|
| 9 |
+
3. Allow callers to provide an `IssuerOverride` map for cases where the
|
| 10 |
+
heuristic is wrong or the issuer is a non-CIK security (ADR, ETF,
|
| 11 |
+
foreign listing).
|
| 12 |
+
|
| 13 |
+
This is intentionally conservative: when in doubt, the function returns
|
| 14 |
+
`None` for that issuer rather than guessing. The backtester treats
|
| 15 |
+
`None` tickers as "skip".
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import json
|
| 20 |
+
import os
|
| 21 |
+
import re
|
| 22 |
+
from dataclasses import dataclass, field
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from urllib.request import Request, urlopen
|
| 25 |
+
|
| 26 |
+
SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class IssuerOverride:
|
| 31 |
+
"""Manual issuer-name (or CUSIP) -> ticker overrides.
|
| 32 |
+
|
| 33 |
+
Keys are matched case-insensitively. CUSIP keys take precedence over
|
| 34 |
+
issuer-name keys when both could match.
|
| 35 |
+
"""
|
| 36 |
+
|
| 37 |
+
by_cusip: dict[str, str] = field(default_factory=dict)
|
| 38 |
+
by_issuer: dict[str, str] = field(default_factory=dict)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
_TRIM_TOKENS = re.compile(
|
| 42 |
+
r"\b(CORP|CORPORATION|INC|INCORPORATED|LTD|LIMITED|LLC|PLC|HLDGS?|HOLDINGS?|"
|
| 43 |
+
r"GROUP|CO|COMPANY|CL|CLASS|COM|COMMON|NEW|THE|TR|TRUST|REIT)\b",
|
| 44 |
+
re.IGNORECASE,
|
| 45 |
+
)
|
| 46 |
+
_PUNCT = re.compile(r"[^\w\s]")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _norm(s: str) -> str:
|
| 50 |
+
s = _PUNCT.sub(" ", s)
|
| 51 |
+
s = _TRIM_TOKENS.sub(" ", s)
|
| 52 |
+
return " ".join(s.lower().split())
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _load_sec_tickers(cache_path: Path | None = None) -> dict:
|
| 56 |
+
if cache_path and cache_path.exists():
|
| 57 |
+
return json.loads(cache_path.read_text())
|
| 58 |
+
ua = os.environ.get("WOFO_SEC_UA")
|
| 59 |
+
if not ua:
|
| 60 |
+
raise RuntimeError(
|
| 61 |
+
"WOFO_SEC_UA env var required to fetch SEC company_tickers.json"
|
| 62 |
+
)
|
| 63 |
+
req = Request(SEC_TICKERS_URL, headers={"User-Agent": ua})
|
| 64 |
+
with urlopen(req, timeout=30) as resp:
|
| 65 |
+
body = resp.read()
|
| 66 |
+
data = json.loads(body)
|
| 67 |
+
if cache_path:
|
| 68 |
+
cache_path.parent.mkdir(parents=True, exist_ok=True)
|
| 69 |
+
cache_path.write_bytes(body)
|
| 70 |
+
return data
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def resolve_tickers(
|
| 74 |
+
issuers: dict[str, str], # cusip -> issuer name
|
| 75 |
+
*,
|
| 76 |
+
overrides: IssuerOverride | None = None,
|
| 77 |
+
sec_tickers_cache: Path | None = None,
|
| 78 |
+
) -> tuple[dict[str, str | None], dict[str, str]]:
|
| 79 |
+
"""Resolve {cusip: issuer_name} -> ({cusip: ticker_or_None}, {cusip: source}).
|
| 80 |
+
|
| 81 |
+
`source` is one of: 'override-cusip', 'override-issuer', 'sec-exact',
|
| 82 |
+
'sec-prefix', 'unmapped'.
|
| 83 |
+
"""
|
| 84 |
+
overrides = overrides or IssuerOverride()
|
| 85 |
+
over_cusip = {k.upper(): v for k, v in overrides.by_cusip.items()}
|
| 86 |
+
over_issuer = {_norm(k): v for k, v in overrides.by_issuer.items()}
|
| 87 |
+
|
| 88 |
+
sec = _load_sec_tickers(sec_tickers_cache) if (issuers and not all(
|
| 89 |
+
c.upper() in over_cusip or _norm(n) in over_issuer for c, n in issuers.items()
|
| 90 |
+
)) else {}
|
| 91 |
+
|
| 92 |
+
sec_index: dict[str, str] = {}
|
| 93 |
+
for entry in sec.values() if isinstance(sec, dict) else []:
|
| 94 |
+
title = entry.get("title", "")
|
| 95 |
+
ticker = entry.get("ticker", "")
|
| 96 |
+
if title and ticker:
|
| 97 |
+
sec_index[_norm(title)] = ticker.upper()
|
| 98 |
+
|
| 99 |
+
out: dict[str, str | None] = {}
|
| 100 |
+
src: dict[str, str] = {}
|
| 101 |
+
for cusip, name in issuers.items():
|
| 102 |
+
if cusip.upper() in over_cusip:
|
| 103 |
+
out[cusip] = over_cusip[cusip.upper()].upper()
|
| 104 |
+
src[cusip] = "override-cusip"
|
| 105 |
+
continue
|
| 106 |
+
n = _norm(name)
|
| 107 |
+
if n in over_issuer:
|
| 108 |
+
out[cusip] = over_issuer[n].upper()
|
| 109 |
+
src[cusip] = "override-issuer"
|
| 110 |
+
continue
|
| 111 |
+
if n in sec_index:
|
| 112 |
+
out[cusip] = sec_index[n]
|
| 113 |
+
src[cusip] = "sec-exact"
|
| 114 |
+
continue
|
| 115 |
+
# Prefix match: issuer name starts with a known company name (or vice
|
| 116 |
+
# versa). Conservative: require >=2 tokens to avoid false positives.
|
| 117 |
+
candidates = [
|
| 118 |
+
(k, t) for k, t in sec_index.items()
|
| 119 |
+
if (k.startswith(n) or n.startswith(k)) and len(k.split()) >= 2 and len(n.split()) >= 2
|
| 120 |
+
]
|
| 121 |
+
if len(candidates) == 1:
|
| 122 |
+
out[cusip] = candidates[0][1]
|
| 123 |
+
src[cusip] = "sec-prefix"
|
| 124 |
+
continue
|
| 125 |
+
out[cusip] = None
|
| 126 |
+
src[cusip] = "unmapped"
|
| 127 |
+
return out, src
|