Claude commited on
Commit
7ea334b
·
unverified ·
1 Parent(s): 4825c21

feat(wofo): add prices, research, backtest, and Phase-1 agent layers

Browse files

End-to-end plumbing for the wofo Phase-1 (research-only) loop.

- wofo/prices/: PriceSource protocol with synthetic (deterministic, for
tests) and Stooq (free daily CSV) adapters. Production feeds plug in
behind the same protocol.
- wofo/research/: follow-the-filer strategy turns a 13F panel into a
dated TargetWeights series with provenance. Effective-date is the
filing date (not period-of-report) to avoid look-ahead bias.
Issuer-name -> ticker mapping is heuristic with manual overrides
since CUSIP -> ticker is licensed (CGS).
- wofo/backtest/: minimal portfolio backtester (target weights x prices
-> daily NAV) with CAGR / Sharpe / max-drawdown metrics. No numpy
dependency. Unknown tickers are skipped to cash.
- wofo/agent/: Phase-1 agent skeleton. Defines read-only research tools
(list_local_filings, summarize_panel, top_holdings, qoq_activity)
and a Claude tool-use runner. Explicitly NO order-entry tools — that
requires Phase-2 promotion + counsel sign-off.
- tests/: 18 tests pass against committed sample data, no network.
- pytest.ini + confcutdir scope test discovery away from the legacy
Zuup app at the repo root.
- wofo/agent/demo_e2e.py: end-to-end plumbing demo (panel -> strategy
-> synthetic backtest), runs offline.

https://claude.ai/code/session_01C97VcztNaYLWwesHfWn6iE

pytest.ini ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [pytest]
2
+ testpaths = tests
3
+ addopts = --import-mode=importlib --confcutdir=tests --rootdir=tests
4
+ pythonpath = .
tests/test_agent_tools.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test the research tools without invoking the model."""
2
+ from wofo.agent import dispatch_tool, TOOLS
3
+
4
+
5
+ def test_tools_registry_shape():
6
+ names = {t["name"] for t in TOOLS}
7
+ assert names == {"list_local_filings", "summarize_panel", "top_holdings", "qoq_activity"}
8
+ for t in TOOLS:
9
+ assert "input_schema" in t and t["input_schema"]["type"] == "object"
10
+
11
+
12
+ def test_list_local_filings_returns_periods():
13
+ r = dispatch_tool("list_local_filings", {})
14
+ assert r.ok and "periods" in r.content
15
+ assert len(r.content["periods"]) >= 5
16
+
17
+
18
+ def test_summarize_panel():
19
+ r = dispatch_tool("summarize_panel", {})
20
+ assert r.ok
21
+ assert r.content["manager"] == "Situational Awareness LP"
22
+ assert r.content["cik"] == "0002045724"
23
+ assert len(r.content["periods"]) >= 5
24
+
25
+
26
+ def test_top_holdings_known_period():
27
+ r = dispatch_tool("top_holdings", {"period": "2025-12-31", "n": 3})
28
+ assert r.ok
29
+ issuers = [h["issuer"] for h in r.content["holdings"]]
30
+ assert "COREWEAVE INC" in issuers
31
+ assert len(r.content["holdings"]) == 3
32
+
33
+
34
+ def test_unknown_period_errors_cleanly():
35
+ r = dispatch_tool("top_holdings", {"period": "1999-12-31"})
36
+ assert not r.ok and "unknown period" in r.error
37
+
38
+
39
+ def test_unknown_tool_errors_cleanly():
40
+ r = dispatch_tool("place_trade", {})
41
+ assert not r.ok and "unknown tool" in r.error
tests/test_backtest.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backtester tests using the synthetic price source."""
2
+ from datetime import date
3
+
4
+ from wofo.backtest import run_backtest, summary, max_drawdown, sharpe
5
+ from wofo.prices import SyntheticPriceSource
6
+ from wofo.research.follow_the_filer import TargetWeights, Snapshot
7
+
8
+
9
+ def _two_snapshot_strategy() -> TargetWeights:
10
+ return TargetWeights(
11
+ manager_cik="0000000000",
12
+ manager_name="Test Manager",
13
+ snapshots=[
14
+ Snapshot(
15
+ effective_date=date(2024, 1, 15),
16
+ period_of_report=date(2023, 12, 31),
17
+ weights={"AAA": 0.6, "BBB": 0.4},
18
+ unmapped_value_share=0.0,
19
+ provenance={"test": "snap1"},
20
+ ),
21
+ Snapshot(
22
+ effective_date=date(2024, 7, 15),
23
+ period_of_report=date(2024, 6, 30),
24
+ weights={"AAA": 0.3, "BBB": 0.3, "CCC": 0.4},
25
+ unmapped_value_share=0.0,
26
+ provenance={"test": "snap2"},
27
+ ),
28
+ ],
29
+ )
30
+
31
+
32
+ def test_backtest_runs_and_produces_nav():
33
+ src = SyntheticPriceSource(drift=0.0, vol=0.01)
34
+ res = run_backtest(_two_snapshot_strategy(), src, start_cash=1_000_000.0, end_date=date(2024, 12, 31))
35
+ assert len(res.nav) == len(res.dates) > 100
36
+ # NAV should not be NaN/inf and should be in a reasonable range.
37
+ assert all(v > 0 and v == v for v in res.nav)
38
+ # Two snapshots -> at least two rebalances.
39
+ assert len(res.rebalance_dates) >= 2
40
+
41
+
42
+ def test_backtest_metrics_make_sense():
43
+ src = SyntheticPriceSource(drift=0.0, vol=0.005)
44
+ res = run_backtest(_two_snapshot_strategy(), src, end_date=date(2024, 12, 31))
45
+ s = summary(res.dates, res.nav)
46
+ assert s["n_days"] == len(res.dates)
47
+ # Metrics should be finite numbers.
48
+ assert isinstance(s["sharpe"], float)
49
+ assert 0.0 <= s["max_drawdown"] <= 1.0
50
+
51
+
52
+ def test_unknown_ticker_skipped_not_crashed(monkeypatch):
53
+ src = SyntheticPriceSource()
54
+ # Inject a target with a ticker the synthetic source still happily prices —
55
+ # the synthetic source returns data for any string. To force a NotFound,
56
+ # use a wrapper.
57
+ from wofo.prices import NotFound
58
+ class FlakySource:
59
+ def __init__(self, inner):
60
+ self.inner = inner
61
+ def daily(self, ticker, start, end):
62
+ if ticker == "MISSING":
63
+ raise NotFound(ticker)
64
+ return self.inner.daily(ticker, start, end)
65
+
66
+ tw = TargetWeights(
67
+ manager_cik="0", manager_name="t",
68
+ snapshots=[Snapshot(
69
+ effective_date=date(2024, 1, 15),
70
+ period_of_report=date(2023, 12, 31),
71
+ weights={"AAA": 0.5, "MISSING": 0.5},
72
+ unmapped_value_share=0.0,
73
+ provenance={},
74
+ )],
75
+ )
76
+ res = run_backtest(tw, FlakySource(src), end_date=date(2024, 6, 30))
77
+ # The portfolio should still run; cash share should be ~50% because half the target was unbuyable.
78
+ assert max(res.cash_share[10:]) >= 0.4
79
+
80
+
81
+ def test_max_drawdown_known_series():
82
+ nav = [100, 110, 105, 90, 95, 120]
83
+ # peak 110 -> trough 90 -> mdd = (110-90)/110 = 0.1818...
84
+ assert abs(max_drawdown(nav) - (110 - 90) / 110) < 1e-9
85
+
86
+
87
+ def test_sharpe_zero_for_flat_series():
88
+ assert sharpe([100.0] * 200) == 0.0
tests/test_research.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the follow-the-filer strategy + issuer mapping."""
2
+ from datetime import date
3
+ from pathlib import Path
4
+
5
+ from wofo.research import follow_the_filer, IssuerOverride
6
+ from wofo.research.follow_the_filer import load_filing_refs
7
+ from wofo.research.issuer_map import _norm
8
+ from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
9
+
10
+
11
+ RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
12
+
13
+
14
+ def test_norm_strips_corporate_suffixes():
15
+ assert _norm("Constellation Energy Corp") == _norm("Constellation Energy Corporation")
16
+ assert _norm("BLOOM ENERGY CORP") == "bloom energy"
17
+ assert _norm("Lumentum Hldgs Inc") == "lumentum"
18
+
19
+
20
+ def test_follow_the_filer_with_overrides():
21
+ # Manual map covers what we need without hitting the network.
22
+ overrides = IssuerOverride(by_issuer={
23
+ "CONSTELLATION ENERGY CORP": "CEG",
24
+ "MARVELL TECHNOLOGY INC": "MRVL",
25
+ "MODINE MFG CO": "MOD",
26
+ "ANTERIX INC": "ATEX",
27
+ "CIPHER MINING INC": "CIFR",
28
+ "VISTRA CORP": "VST",
29
+ })
30
+
31
+ pairs = []
32
+ for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
33
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
34
+ panel = build_panel(pairs)
35
+ refs = load_filing_refs(RAW)
36
+
37
+ # Build cusip -> name from panel issuers, then resolve via overrides only
38
+ # (skip SEC fetch by pre-populating the mapping ourselves).
39
+ cusip_to_ticker: dict[str, str | None] = {}
40
+ for cusip, name in panel["issuers"].items():
41
+ cusip_to_ticker[cusip] = overrides.by_issuer.get(name)
42
+
43
+ tw = follow_the_filer(
44
+ panel,
45
+ filing_refs=refs,
46
+ cusip_to_ticker=cusip_to_ticker,
47
+ manager_cik="0002045724",
48
+ manager_name="Situational Awareness LP",
49
+ )
50
+ assert tw.manager_cik == "0002045724"
51
+ assert len(tw.snapshots) == 5
52
+ # First snapshot: most positions will be unmapped because we only added 6 overrides.
53
+ s0 = tw.snapshots[0]
54
+ # Mapped weights + unmapped share == 1.
55
+ assert abs(sum(s0.weights.values()) + s0.unmapped_value_share - 1.0) < 1e-6
56
+ # Effective date should be on or after period-of-report.
57
+ for s in tw.snapshots:
58
+ assert s.effective_date >= s.period_of_report
tests/test_thirteenf.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for the 13F pipeline using committed sample data."""
2
+ from pathlib import Path
3
+
4
+ from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
5
+
6
+ RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
7
+
8
+
9
+ def test_parse_2024q4_meta():
10
+ m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
11
+ assert m.cik == "0002045724"
12
+ assert m.manager_name == "Situational Awareness LP"
13
+ assert m.period_iso == "2024-12-31"
14
+ assert m.crd_number == "000333011"
15
+ assert m.sec_file_number == "801-132039"
16
+ assert m.is_amendment is False
17
+
18
+
19
+ def test_parse_2024q4_holdings_match_summary():
20
+ m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
21
+ h = parse_infotable(RAW / "2024Q4" / "infotable.xml")
22
+ assert len(h) == m.table_entry_total
23
+ assert sum(x.value_usd for x in h) == m.table_value_total
24
+
25
+
26
+ def test_panel_periods_sorted_and_unique():
27
+ pairs = []
28
+ for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
29
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
30
+ panel = build_panel(pairs)
31
+ assert panel["periods"] == sorted(set(panel["periods"]))
32
+ assert len(panel["periods"]) == 5
33
+
34
+
35
+ def test_qoq_initial_period():
36
+ pairs = []
37
+ for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
38
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
39
+ panel = build_panel(pairs)
40
+ deltas = qoq_changes(panel)
41
+ initials = [d for d in deltas if d["period"] == panel["periods"][0]]
42
+ assert all(d["action"] == "INITIAL" for d in initials)
43
+ # Every position in the first quarter should be classified.
44
+ assert {d["cusip"] for d in initials} == {c for (p, c) in panel["rows"] if p == panel["periods"][0]}
45
+
46
+
47
+ def test_concentration_monotonic_aum():
48
+ pairs = []
49
+ for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
50
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
51
+ panel = build_panel(pairs)
52
+ conc = concentration(panel)
53
+ aums = [conc[p]["total_value_usd"] for p in panel["periods"]]
54
+ # SA LP grew every quarter in the sample window; if this changes when re-pulled,
55
+ # this test should be updated, not silenced.
56
+ assert all(b > a for a, b in zip(aums, aums[1:])), aums
wofo/README.md CHANGED
@@ -14,6 +14,20 @@ wofo/
14
  │ ├── parse.py # XML → dataclasses
15
  │ ├── analyze.py # panel + qoq + concentration
16
  │ └── cli.py # `python -m wofo.thirteenf.cli {pull,analyze}`
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  └── data/
18
  └── 13f/
19
  ├── raw/ # one dir per quarter, primary_doc + infotable
@@ -51,6 +65,54 @@ python -m wofo.thirteenf.cli analyze
51
  The analyze step prints a per-quarter summary and writes JSON +
52
  `REPORT.md` to `wofo/data/13f/processed/`.
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  ## What `wofo` will not do
55
 
56
  - File legal or tax documents on your behalf.
 
14
  │ ├── parse.py # XML → dataclasses
15
  │ ├── analyze.py # panel + qoq + concentration
16
  │ └── cli.py # `python -m wofo.thirteenf.cli {pull,analyze}`
17
+ ├── prices/ # Pluggable price data sources
18
+ │ ├── source.py # PriceSource protocol
19
+ │ ├── synthetic.py # Deterministic random walk (tests / offline)
20
+ │ └── stooq.py # Free Stooq daily-CSV adapter (prototyping)
21
+ ├── research/ # Strategy generators
22
+ │ ├── follow_the_filer.py # 13F panel → dated target weights
23
+ │ └── issuer_map.py # Issuer name → ticker (heuristic + overrides)
24
+ ├── backtest/ # Minimal portfolio backtester
25
+ │ ├── portfolio.py # Target weights × prices → daily NAV
26
+ │ └── metrics.py # CAGR / Sharpe / max drawdown
27
+ ├── agent/ # Phase-1 (research-only) agent
28
+ │ ├── tools.py # Read-only tools the agent may call
29
+ │ ├── runner.py # Claude tool-use loop
30
+ │ └── demo_e2e.py # Plumbing demo (no API key required)
31
  └── data/
32
  └── 13f/
33
  ├── raw/ # one dir per quarter, primary_doc + infotable
 
65
  The analyze step prints a per-quarter summary and writes JSON +
66
  `REPORT.md` to `wofo/data/13f/processed/`.
67
 
68
+ ## Quick start: end-to-end strategy → backtest demo
69
+
70
+ The demo wires panel → follow-the-filer → synthetic backtest. Synthetic
71
+ prices are NOT real returns — this is a plumbing check.
72
+
73
+ ```bash
74
+ python -m wofo.agent.demo_e2e
75
+ ```
76
+
77
+ To run with real prices, swap `SyntheticPriceSource` for a real adapter:
78
+
79
+ ```python
80
+ from wofo.prices.stooq import StooqPriceSource
81
+ src = StooqPriceSource()
82
+ ```
83
+
84
+ Or implement your own adapter against the `wofo.prices.PriceSource`
85
+ protocol — Polygon, Tiingo, IBKR historical, etc.
86
+
87
+ ## Quick start: agent loop (Phase 1, research only)
88
+
89
+ ```bash
90
+ pip install anthropic
91
+ export ANTHROPIC_API_KEY=sk-ant-...
92
+
93
+ python - <<'PY'
94
+ from wofo.agent import run_research_loop
95
+ out = run_research_loop(
96
+ "Summarize Situational Awareness LP's Q4 2025 portfolio "
97
+ "and the largest position changes from Q3 to Q4."
98
+ )
99
+ print(out["final_text"])
100
+ PY
101
+ ```
102
+
103
+ The agent has access only to read-only research tools
104
+ (`list_local_filings`, `summarize_panel`, `top_holdings`,
105
+ `qoq_activity`). It cannot place orders, transfer funds, or modify any
106
+ account. See `wofo/agent/tools.py` for the tool schemas.
107
+
108
+ ## Tests
109
+
110
+ ```bash
111
+ python -m pytest
112
+ ```
113
+
114
+ All tests run against committed sample data (no network required).
115
+
116
  ## What `wofo` will not do
117
 
118
  - File legal or tax documents on your behalf.
wofo/agent/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """wofo agent — Phase 1 (research only).
2
+
3
+ This module defines the *tools* the wofo agent is allowed to call and
4
+ the orchestration loop that drives it. By construction, Phase 1
5
+ exposes only **read-only research tools** — no execution, no order
6
+ entry, no broker connectivity. Phase 2 / 3 will live in separate
7
+ modules and require a deliberate code change (and counsel sign-off)
8
+ to enable.
9
+
10
+ The agent uses the Anthropic Python SDK if available; if it is not
11
+ installed, the tools can still be invoked directly from Python.
12
+ """
13
+ from .tools import TOOLS, dispatch_tool, ToolResult
14
+ from .runner import run_research_loop
15
+
16
+ __all__ = ["TOOLS", "dispatch_tool", "ToolResult", "run_research_loop"]
wofo/agent/demo_e2e.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """End-to-end demo: 13F panel -> follow-the-filer strategy -> synthetic backtest.
2
+
3
+ Runs entirely offline using `SyntheticPriceSource`, so the resulting
4
+ NAV is **not** a real return — it is a sanity check that the plumbing
5
+ works. Replace the price source with a real one (Stooq, Polygon, etc.)
6
+ to get meaningful numbers.
7
+
8
+ python -m wofo.agent.demo_e2e
9
+ """
10
+ from __future__ import annotations
11
+
12
+ from datetime import date
13
+ from pathlib import Path
14
+
15
+ from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
16
+ from wofo.research import IssuerOverride, follow_the_filer
17
+ from wofo.research.follow_the_filer import load_filing_refs
18
+ from wofo.backtest import run_backtest, summary
19
+ from wofo.prices import SyntheticPriceSource
20
+
21
+
22
+ # Hand-curated overrides covering most names that appear in SA LP filings.
23
+ # Update as needed; this is a documentation artifact, not a complete map.
24
+ DEFAULT_OVERRIDES = IssuerOverride(by_issuer={
25
+ "COREWEAVE INC": "CRWV",
26
+ "BLOOM ENERGY CORP": "BE",
27
+ "INTEL CORP": "INTC",
28
+ "LUMENTUM HLDGS INC": "LITE",
29
+ "CORE SCIENTIFIC INC NEW": "CORZ",
30
+ "IREN LIMITED": "IREN",
31
+ "APPLIED DIGITAL CORP": "APLD",
32
+ "SANDISK CORP": "SNDK",
33
+ "EQT CORP": "EQT",
34
+ "CIPHER MINING INC": "CIFR",
35
+ "COHERENT CORP": "COHR",
36
+ "CONSTELLATION ENERGY CORP": "CEG",
37
+ "MARVELL TECHNOLOGY INC": "MRVL",
38
+ "MODINE MFG CO": "MOD",
39
+ "ANTERIX INC": "ATEX",
40
+ "VISTRA CORP": "VST",
41
+ "NVIDIA CORPORATION": "NVDA",
42
+ "BROADCOM INC": "AVGO",
43
+ "TAIWAN SEMICONDUCTOR MFG LTD": "TSM",
44
+ "MICRON TECHNOLOGY INC": "MU",
45
+ "WESTERN DIGITAL CORP": "WDC",
46
+ "SEAGATE TECHNOLOGY HLDNGS PL": "STX",
47
+ "GALAXY DIGITAL INC.": "GLXY",
48
+ "VANECK ETF TRUST": None, # ETF; mapping is ambiguous without ticker
49
+ "CLEANSPARK INC": "CLSK",
50
+ "BITFARMS LTD": "BITF",
51
+ "LIBERTY ENERGY INC": "LBRT",
52
+ "INFOSYS LTD": "INFY",
53
+ "PROPETRO HLDG CORP": "PUMP",
54
+ "BABCOCK & WILCOX ENTERPRISES": "BW",
55
+ "POWER SOLUTIONS INTL INC": "PSIX",
56
+ "WHITEFIBER INC": "WYFI",
57
+ "KILROY RLTY CORP": "KRC",
58
+ })
59
+
60
+
61
+ def main() -> None:
62
+ raw = Path(__file__).resolve().parents[2] / "wofo" / "data" / "13f" / "raw"
63
+ pairs = []
64
+ for q in sorted(p for p in raw.iterdir() if p.is_dir()):
65
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
66
+ panel = build_panel(pairs)
67
+ refs = load_filing_refs(raw)
68
+
69
+ cusip_to_ticker: dict[str, str | None] = {}
70
+ for cusip, name in panel["issuers"].items():
71
+ cusip_to_ticker[cusip] = DEFAULT_OVERRIDES.by_issuer.get(name)
72
+
73
+ tw = follow_the_filer(
74
+ panel,
75
+ filing_refs=refs,
76
+ cusip_to_ticker=cusip_to_ticker,
77
+ manager_cik="0002045724",
78
+ manager_name="Situational Awareness LP",
79
+ run_id="demo_e2e",
80
+ )
81
+
82
+ print(f"Manager: {tw.manager_name}")
83
+ for s in tw.snapshots:
84
+ mapped_count = len(s.weights)
85
+ print(
86
+ f" effective {s.effective_date} report {s.period_of_report} "
87
+ f"mapped={mapped_count} weight_total={sum(s.weights.values()):.1%} "
88
+ f"unmapped={s.unmapped_value_share:.1%}"
89
+ )
90
+
91
+ # Synthetic backtest is a plumbing check, not a real return.
92
+ src = SyntheticPriceSource(drift=0.0004, vol=0.02)
93
+ res = run_backtest(tw, src, start_cash=1_000_000.0, end_date=date(2026, 4, 30))
94
+ s = summary(res.dates, res.nav)
95
+ print()
96
+ print("Synthetic backtest summary (NOT real returns):")
97
+ for k, v in s.items():
98
+ if isinstance(v, float):
99
+ print(f" {k:<14} {v:>12,.4f}")
100
+ else:
101
+ print(f" {k:<14} {v}")
102
+
103
+
104
+ if __name__ == "__main__":
105
+ main()
wofo/agent/runner.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Research-loop runner.
2
+
3
+ Drives a Claude model through a tool-use loop using only the read-only
4
+ research tools defined in `wofo.agent.tools`. Requires the `anthropic`
5
+ SDK at runtime; if it is not installed the module still imports so the
6
+ tools can be used without a model.
7
+ """
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ from typing import Any
12
+
13
+ from .tools import TOOLS, dispatch_tool
14
+
15
+
16
+ SYSTEM_PROMPT = """You are wofo, the Wooden Family Office research agent.
17
+
18
+ You operate in Phase 1 — research only. You have NO ability to place
19
+ orders, transfer funds, or modify any account. You may only call the
20
+ read-only research tools provided.
21
+
22
+ Your job is to produce well-sourced, dated research notes:
23
+ - Cite filings by accession number when relevant.
24
+ - Be explicit about staleness (13F is delayed by up to 45 days).
25
+ - When you don't know, say so. Never fabricate tickers or numbers.
26
+ - Distinguish "the manager held X" (fact) from "X is a good buy"
27
+ (opinion that needs justification).
28
+
29
+ When you finish, return a research note in markdown.
30
+ """.strip()
31
+
32
+
33
+ def run_research_loop(
34
+ user_prompt: str,
35
+ *,
36
+ model: str = "claude-opus-4-7",
37
+ max_iterations: int = 8,
38
+ max_tokens: int = 4096,
39
+ ) -> dict:
40
+ """Run a single research-task loop and return the final transcript.
41
+
42
+ The function returns a dict with `final_text`, `messages` (full
43
+ transcript), and `tool_calls` (audit log). It does not stream.
44
+ """
45
+ try:
46
+ import anthropic # type: ignore
47
+ except ImportError as e:
48
+ raise RuntimeError(
49
+ "anthropic SDK is required to run the agent loop; "
50
+ "`pip install anthropic` and set ANTHROPIC_API_KEY."
51
+ ) from e
52
+
53
+ if not os.environ.get("ANTHROPIC_API_KEY"):
54
+ raise RuntimeError("ANTHROPIC_API_KEY is not set.")
55
+
56
+ client = anthropic.Anthropic()
57
+ messages: list[dict[str, Any]] = [{"role": "user", "content": user_prompt}]
58
+ tool_calls: list[dict[str, Any]] = []
59
+
60
+ for _ in range(max_iterations):
61
+ resp = client.messages.create(
62
+ model=model,
63
+ max_tokens=max_tokens,
64
+ system=SYSTEM_PROMPT,
65
+ tools=TOOLS,
66
+ messages=messages,
67
+ )
68
+ # Append assistant turn.
69
+ messages.append({"role": "assistant", "content": resp.content})
70
+
71
+ if resp.stop_reason != "tool_use":
72
+ text = "".join(b.text for b in resp.content if getattr(b, "type", None) == "text")
73
+ return {"final_text": text, "messages": messages, "tool_calls": tool_calls}
74
+
75
+ # Run every tool call in the assistant turn.
76
+ tool_results: list[dict[str, Any]] = []
77
+ for block in resp.content:
78
+ if getattr(block, "type", None) != "tool_use":
79
+ continue
80
+ tr = dispatch_tool(block.name, block.input)
81
+ tool_calls.append(
82
+ {"name": block.name, "input": block.input, "ok": tr.ok, "error": tr.error}
83
+ )
84
+ tool_results.append(
85
+ {
86
+ "type": "tool_result",
87
+ "tool_use_id": block.id,
88
+ "content": tr.to_message()["content"],
89
+ "is_error": tr.to_message()["is_error"],
90
+ }
91
+ )
92
+ messages.append({"role": "user", "content": tool_results})
93
+
94
+ return {
95
+ "final_text": "(max iterations reached)",
96
+ "messages": messages,
97
+ "tool_calls": tool_calls,
98
+ }
wofo/agent/tools.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tools the wofo Phase-1 agent can call.
2
+
3
+ Each tool is a pure-Python function with a JSON-Schema-style signature.
4
+ The signatures double as the Anthropic tool-use definitions when the
5
+ agent is wired to Claude.
6
+
7
+ **No tool in this module places orders, transfers funds, or modifies
8
+ any account.** Adding such a tool requires:
9
+ 1. Counsel sign-off (see docs/family-office-counsel-packet.md).
10
+ 2. Phase-2 promotion in docs/wofo-architecture.md.
11
+ 3. Hard guardrails (per-trade caps, daily caps, kill switch).
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+ from typing import Any, Callable
19
+
20
+ from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
21
+
22
+
23
+ REPO_ROOT = Path(__file__).resolve().parents[2]
24
+ RAW_DIR = REPO_ROOT / "wofo" / "data" / "13f" / "raw"
25
+
26
+
27
+ @dataclass
28
+ class ToolResult:
29
+ name: str
30
+ ok: bool
31
+ content: Any
32
+ error: str | None = None
33
+
34
+ def to_message(self) -> dict:
35
+ return {
36
+ "type": "tool_result",
37
+ "content": json.dumps(self.content) if self.ok else self.error or "error",
38
+ "is_error": not self.ok,
39
+ }
40
+
41
+
42
+ # --- Tool implementations ---------------------------------------------------
43
+
44
+
45
+ def t_list_local_filings(manager_dir: str = "") -> dict:
46
+ """List 13F filing periods we have on disk."""
47
+ base = RAW_DIR if not manager_dir else (REPO_ROOT / manager_dir)
48
+ if not base.exists():
49
+ return {"periods": [], "base": str(base)}
50
+ periods = sorted(p.name for p in base.iterdir() if p.is_dir())
51
+ return {"periods": periods, "base": str(base)}
52
+
53
+
54
+ def t_summarize_panel() -> dict:
55
+ """Build a panel from local 13F filings and return a summary."""
56
+ pairs = []
57
+ for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
58
+ meta = parse_primary_doc(q / "primary_doc.xml")
59
+ rows = parse_infotable(q / "infotable.xml")
60
+ pairs.append((meta, rows))
61
+ panel = build_panel(pairs)
62
+ conc = concentration(panel)
63
+ return {
64
+ "manager": pairs[-1][0].manager_name if pairs else None,
65
+ "cik": pairs[-1][0].cik if pairs else None,
66
+ "periods": panel["periods"],
67
+ "concentration": conc,
68
+ "totals": panel["totals"],
69
+ }
70
+
71
+
72
+ def t_top_holdings(period: str, n: int = 10) -> dict:
73
+ """Top N holdings for a given period."""
74
+ pairs = []
75
+ for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
76
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
77
+ panel = build_panel(pairs)
78
+ if period not in panel["periods"]:
79
+ raise ValueError(f"unknown period {period}; have {panel['periods']}")
80
+ rows = [
81
+ {"cusip": c, "issuer": panel["issuers"].get(c, ""), "value_usd": r["value_usd"], "shares": r["shares"]}
82
+ for (p, c), r in panel["rows"].items() if p == period
83
+ ]
84
+ rows.sort(key=lambda r: r["value_usd"], reverse=True)
85
+ return {"period": period, "holdings": rows[:n]}
86
+
87
+
88
+ def t_qoq_activity(period: str) -> dict:
89
+ """Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period."""
90
+ pairs = []
91
+ for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
92
+ pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
93
+ panel = build_panel(pairs)
94
+ deltas = qoq_changes(panel)
95
+ return {"period": period, "rows": [d for d in deltas if d["period"] == period]}
96
+
97
+
98
+ # --- Registry ---------------------------------------------------------------
99
+
100
+
101
+ def _schema(name: str, description: str, props: dict, required: list[str]) -> dict:
102
+ return {
103
+ "name": name,
104
+ "description": description,
105
+ "input_schema": {"type": "object", "properties": props, "required": required},
106
+ }
107
+
108
+
109
+ TOOLS: list[dict] = [
110
+ _schema(
111
+ "list_local_filings",
112
+ "List 13F filing periods available on local disk.",
113
+ {"manager_dir": {"type": "string", "description": "optional path; default = the SA LP raw dir"}},
114
+ [],
115
+ ),
116
+ _schema(
117
+ "summarize_panel",
118
+ "Summarize the panel of 13F filings on disk: manager, periods, totals, concentration.",
119
+ {},
120
+ [],
121
+ ),
122
+ _schema(
123
+ "top_holdings",
124
+ "Return the top-N holdings (by reported value) for a given period (YYYY-MM-DD).",
125
+ {
126
+ "period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"},
127
+ "n": {"type": "integer", "description": "How many top holdings to return", "default": 10},
128
+ },
129
+ ["period"],
130
+ ),
131
+ _schema(
132
+ "qoq_activity",
133
+ "Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period.",
134
+ {"period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"}},
135
+ ["period"],
136
+ ),
137
+ ]
138
+
139
+
140
+ _DISPATCH: dict[str, Callable[..., dict]] = {
141
+ "list_local_filings": t_list_local_filings,
142
+ "summarize_panel": t_summarize_panel,
143
+ "top_holdings": t_top_holdings,
144
+ "qoq_activity": t_qoq_activity,
145
+ }
146
+
147
+
148
+ def dispatch_tool(name: str, args: dict | None) -> ToolResult:
149
+ fn = _DISPATCH.get(name)
150
+ if fn is None:
151
+ return ToolResult(name=name, ok=False, content=None, error=f"unknown tool: {name}")
152
+ try:
153
+ result = fn(**(args or {}))
154
+ return ToolResult(name=name, ok=True, content=result)
155
+ except Exception as e: # surface errors to the model so it can recover
156
+ return ToolResult(name=name, ok=False, content=None, error=f"{type(e).__name__}: {e}")
wofo/backtest/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Minimal portfolio backtester.
2
+
3
+ Takes a `TargetWeights` series and a `PriceSource`, simulates a
4
+ periodically rebalanced long-only portfolio, and reports daily NAV +
5
+ summary metrics. Intentionally simple — fancier features (transaction
6
+ costs beyond a flat bps, slippage models, partial fills) belong in a
7
+ dedicated backtest engine; see `docs/repos.md`.
8
+ """
9
+ from .portfolio import run_backtest, BacktestResult
10
+ from .metrics import summary, sharpe, max_drawdown, cagr
11
+
12
+ __all__ = ["run_backtest", "BacktestResult", "summary", "sharpe", "max_drawdown", "cagr"]
wofo/backtest/metrics.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Backtest summary metrics.
2
+
3
+ No numpy dependency. The math is straightforward and the input series
4
+ are short (daily bars over a few years).
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import math
9
+ from datetime import date
10
+ from typing import Sequence
11
+
12
+
13
+ def _returns(nav: Sequence[float]) -> list[float]:
14
+ rets: list[float] = []
15
+ for i in range(1, len(nav)):
16
+ prev = nav[i - 1]
17
+ rets.append((nav[i] - prev) / prev if prev else 0.0)
18
+ return rets
19
+
20
+
21
+ def cagr(dates: Sequence[date], nav: Sequence[float]) -> float:
22
+ if len(nav) < 2 or nav[0] <= 0:
23
+ return 0.0
24
+ years = max((dates[-1] - dates[0]).days / 365.25, 1e-9)
25
+ return (nav[-1] / nav[0]) ** (1 / years) - 1
26
+
27
+
28
+ def sharpe(nav: Sequence[float], *, rf_annual: float = 0.0, periods_per_year: int = 252) -> float:
29
+ rets = _returns(nav)
30
+ if len(rets) < 2:
31
+ return 0.0
32
+ rf_per = rf_annual / periods_per_year
33
+ excess = [r - rf_per for r in rets]
34
+ mean = sum(excess) / len(excess)
35
+ var = sum((r - mean) ** 2 for r in excess) / (len(excess) - 1)
36
+ sd = math.sqrt(var)
37
+ if sd == 0:
38
+ return 0.0
39
+ return (mean / sd) * math.sqrt(periods_per_year)
40
+
41
+
42
+ def max_drawdown(nav: Sequence[float]) -> float:
43
+ """Returns max drawdown as a positive fraction (e.g. 0.25 == 25% peak-to-trough)."""
44
+ peak = -math.inf
45
+ mdd = 0.0
46
+ for v in nav:
47
+ if v > peak:
48
+ peak = v
49
+ if peak > 0:
50
+ dd = (peak - v) / peak
51
+ if dd > mdd:
52
+ mdd = dd
53
+ return mdd
54
+
55
+
56
+ def summary(dates: Sequence[date], nav: Sequence[float]) -> dict:
57
+ return {
58
+ "start_date": dates[0].isoformat() if dates else None,
59
+ "end_date": dates[-1].isoformat() if dates else None,
60
+ "n_days": len(dates),
61
+ "start_nav": nav[0] if nav else 0.0,
62
+ "end_nav": nav[-1] if nav else 0.0,
63
+ "total_return": (nav[-1] / nav[0] - 1) if (nav and nav[0]) else 0.0,
64
+ "cagr": cagr(dates, nav),
65
+ "sharpe": sharpe(nav),
66
+ "max_drawdown": max_drawdown(nav),
67
+ }
wofo/backtest/portfolio.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Portfolio backtester: target weights + price source -> daily NAV."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass, field
5
+ from datetime import date, timedelta
6
+ from typing import Sequence
7
+
8
+ from wofo.prices import PriceSource, NotFound
9
+ from wofo.research import TargetWeights, Snapshot
10
+
11
+
12
+ @dataclass
13
+ class BacktestResult:
14
+ dates: list[date]
15
+ nav: list[float] # portfolio value indexed to start_cash
16
+ weights_history: list[dict[str, float]] # per-day actual weights
17
+ rebalance_dates: list[date]
18
+ skipped_tickers: dict[date, list[str]] = field(default_factory=dict)
19
+ cash_share: list[float] = field(default_factory=list)
20
+
21
+
22
+ def _build_price_panel(
23
+ tickers: set[str],
24
+ start: date,
25
+ end: date,
26
+ source: PriceSource,
27
+ ) -> tuple[dict[date, dict[str, float]], dict[str, list]]:
28
+ """Pull daily closes for each ticker; return {date: {ticker: close}} aligned."""
29
+ raw: dict[str, dict[date, float]] = {}
30
+ missing: list[str] = []
31
+ for t in sorted(tickers):
32
+ try:
33
+ bars = source.daily(t, start, end)
34
+ except NotFound:
35
+ missing.append(t)
36
+ continue
37
+ raw[t] = {b.d: b.close for b in bars}
38
+ # Trading-day axis = union of dates seen in any series.
39
+ all_dates = sorted({d for series in raw.values() for d in series})
40
+ panel: dict[date, dict[str, float]] = {}
41
+ for d in all_dates:
42
+ panel[d] = {}
43
+ for t, series in raw.items():
44
+ if d in series:
45
+ panel[d][t] = series[d]
46
+ return panel, {"missing_tickers": missing}
47
+
48
+
49
+ def _active_snapshot(snapshots: Sequence[Snapshot], d: date) -> Snapshot | None:
50
+ """Most recent snapshot whose effective_date <= d."""
51
+ active: Snapshot | None = None
52
+ for s in snapshots:
53
+ if s.effective_date <= d:
54
+ active = s
55
+ else:
56
+ break
57
+ return active
58
+
59
+
60
+ def run_backtest(
61
+ target_weights: TargetWeights,
62
+ price_source: PriceSource,
63
+ *,
64
+ start_cash: float = 1_000_000.0,
65
+ rebalance_threshold_bps: float = 50.0, # rebalance if any weight drifts > this many bps from target
66
+ cost_bps: float = 5.0, # round-trip-ish slippage+commission per side
67
+ end_date: date | None = None,
68
+ ) -> BacktestResult:
69
+ """Run the backtest.
70
+
71
+ Behavior:
72
+ - Start with `start_cash` in cash on the first effective_date.
73
+ - On each new snapshot's effective_date, mark the new target.
74
+ - Between snapshots, rebalance only if any target ticker drifts beyond
75
+ the threshold from its target.
76
+ - Tickers in the target that the price source doesn't know are
77
+ *skipped*; their target weight is reallocated to cash for that
78
+ snapshot.
79
+ """
80
+ if not target_weights.snapshots:
81
+ raise ValueError("no snapshots")
82
+
83
+ snapshots = sorted(target_weights.snapshots, key=lambda s: s.effective_date)
84
+ start = snapshots[0].effective_date
85
+ end = end_date or (snapshots[-1].effective_date + timedelta(days=365))
86
+ all_tickers = {t for s in snapshots for t in s.weights}
87
+
88
+ panel, meta = _build_price_panel(all_tickers, start, end, price_source)
89
+ missing = set(meta["missing_tickers"])
90
+
91
+ if not panel:
92
+ raise RuntimeError("price source returned no data for any ticker")
93
+
94
+ cash = start_cash
95
+ holdings: dict[str, float] = {} # ticker -> shares
96
+ out = BacktestResult(dates=[], nav=[], weights_history=[], rebalance_dates=[])
97
+
98
+ last_target_id: int | None = None
99
+ for d in sorted(panel):
100
+ prices = panel[d]
101
+ # Mark to market
102
+ nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
103
+
104
+ snap = _active_snapshot(snapshots, d)
105
+ if snap is None:
106
+ out.dates.append(d); out.nav.append(nav_val); out.weights_history.append({}); out.cash_share.append(1.0)
107
+ continue
108
+
109
+ # Filter snapshot to tickers we have *some* prices for and a price today.
110
+ tradable_targets = {t: w for t, w in snap.weights.items() if t in panel.get(d, {}) and t not in missing}
111
+ skipped_today = sorted(set(snap.weights) - set(tradable_targets))
112
+ if skipped_today:
113
+ out.skipped_tickers[d] = skipped_today
114
+
115
+ # Renormalize after dropping unknown / no-price-today tickers; balance to cash.
116
+ target_norm_total = sum(tradable_targets.values()) or 0
117
+ # Note: we do NOT renormalize to 1; missing weight stays in cash.
118
+
119
+ snap_id = id(snap)
120
+ is_new_snapshot = snap_id != last_target_id
121
+ last_target_id = snap_id
122
+
123
+ # Compute current weights (excluding cash).
124
+ current_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
125
+ max_drift_bps = max(
126
+ (abs(current_w.get(t, 0.0) - tradable_targets.get(t, 0.0)) for t in set(current_w) | set(tradable_targets)),
127
+ default=0.0,
128
+ ) * 10_000
129
+
130
+ should_rebalance = is_new_snapshot or max_drift_bps > rebalance_threshold_bps
131
+ if should_rebalance:
132
+ # Liquidate everything to cash, then buy targets.
133
+ for t, sh in holdings.items():
134
+ px = prices.get(t, _last_close(panel, t, d))
135
+ cash += sh * px
136
+ cash -= abs(sh * px) * (cost_bps / 10_000)
137
+ holdings = {}
138
+ for t, w in tradable_targets.items():
139
+ px = prices[t]
140
+ target_dollars = nav_val * w
141
+ shares = target_dollars / px
142
+ cost = abs(target_dollars) * (cost_bps / 10_000)
143
+ holdings[t] = shares
144
+ cash -= target_dollars + cost
145
+ out.rebalance_dates.append(d)
146
+ # Recompute NAV after costs.
147
+ nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
148
+
149
+ actual_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
150
+ cash_w = cash / nav_val if nav_val else 1.0
151
+ out.dates.append(d)
152
+ out.nav.append(nav_val)
153
+ out.weights_history.append(actual_w)
154
+ out.cash_share.append(cash_w)
155
+
156
+ return out
157
+
158
+
159
+ def _last_close(panel: dict[date, dict[str, float]], ticker: str, d: date) -> float:
160
+ """Most-recent close on or before `d`. 0 if never seen (fully out)."""
161
+ cur = d
162
+ for _ in range(10): # short walk-back; enough for weekends/holidays
163
+ cur -= timedelta(days=1)
164
+ if cur in panel and ticker in panel[cur]:
165
+ return panel[cur][ticker]
166
+ return 0.0
wofo/prices/__init__.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pluggable price-data sources.
2
+
3
+ The agent must work with multiple data vendors over time. Calling code
4
+ depends on the `PriceSource` protocol; concrete adapters live alongside.
5
+
6
+ Built-in adapters:
7
+ - `synthetic.SyntheticPriceSource` — deterministic random walk; for tests.
8
+ - `stooq.StooqPriceSource` — free daily CSV from stooq.com; for prototyping.
9
+
10
+ Production deployments should swap in a paid feed (Polygon, Tiingo, etc.)
11
+ behind the same protocol.
12
+ """
13
+ from .source import PriceSource, PriceBar, NotFound
14
+ from .synthetic import SyntheticPriceSource
15
+
16
+ __all__ = ["PriceSource", "PriceBar", "NotFound", "SyntheticPriceSource"]
wofo/prices/source.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Price-source protocol shared by all adapters."""
2
+ from __future__ import annotations
3
+
4
+ from dataclasses import dataclass
5
+ from datetime import date
6
+ from typing import Protocol, runtime_checkable
7
+
8
+
9
+ class NotFound(LookupError):
10
+ """Raised when a ticker is unknown to the source."""
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class PriceBar:
15
+ d: date
16
+ open: float
17
+ high: float
18
+ low: float
19
+ close: float
20
+ volume: int
21
+
22
+
23
+ @runtime_checkable
24
+ class PriceSource(Protocol):
25
+ """Daily-bar price source.
26
+
27
+ Implementations must be deterministic for a given (ticker, start, end).
28
+ They should raise `NotFound` (not return empty) for unknown tickers so
29
+ callers can distinguish "no data" from "delisted on this date."
30
+ """
31
+
32
+ def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
33
+ ...
wofo/prices/stooq.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Stooq.com daily-CSV adapter.
2
+
3
+ Free, no API key. Reasonable for prototyping; not appropriate for
4
+ production trading. Stooq's coverage and corporate-action handling are
5
+ not as clean as paid feeds.
6
+
7
+ URL pattern:
8
+ https://stooq.com/q/d/l/?s={symbol}&i=d&d1=YYYYMMDD&d2=YYYYMMDD
9
+
10
+ US tickers need a `.us` suffix on stooq.
11
+ """
12
+ from __future__ import annotations
13
+
14
+ import csv
15
+ import io
16
+ import os
17
+ import time
18
+ from datetime import date, datetime
19
+ from urllib.error import HTTPError, URLError
20
+ from urllib.request import Request, urlopen
21
+
22
+ from .source import NotFound, PriceBar
23
+
24
+
25
+ class StooqPriceSource:
26
+ BASE = "https://stooq.com/q/d/l/"
27
+
28
+ def __init__(self, *, suffix: str = ".us", request_interval_s: float = 0.25, timeout_s: float = 30.0):
29
+ self.suffix = suffix
30
+ self.request_interval_s = request_interval_s
31
+ self.timeout_s = timeout_s
32
+ self._last_request: float = 0.0
33
+
34
+ def _pace(self) -> None:
35
+ elapsed = time.monotonic() - self._last_request
36
+ if elapsed < self.request_interval_s:
37
+ time.sleep(self.request_interval_s - elapsed)
38
+ self._last_request = time.monotonic()
39
+
40
+ def _fetch(self, url: str) -> str:
41
+ self._pace()
42
+ ua = os.environ.get("WOFO_HTTP_UA", "wofo-research/0.1")
43
+ req = Request(url, headers={"User-Agent": ua})
44
+ try:
45
+ with urlopen(req, timeout=self.timeout_s) as resp:
46
+ return resp.read().decode("utf-8", errors="replace")
47
+ except HTTPError as e:
48
+ if e.code == 404:
49
+ raise NotFound(url) from e
50
+ raise
51
+ except URLError as e:
52
+ raise RuntimeError(f"stooq fetch failed: {e}") from e
53
+
54
+ def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
55
+ symbol = ticker.lower()
56
+ if "." not in symbol:
57
+ symbol += self.suffix
58
+ url = (
59
+ f"{self.BASE}?s={symbol}&i=d"
60
+ f"&d1={start.strftime('%Y%m%d')}&d2={end.strftime('%Y%m%d')}"
61
+ )
62
+ body = self._fetch(url)
63
+ # Stooq returns "No data" or empty for unknown / out-of-range queries.
64
+ if not body or body.startswith("No data") or "Date,Open" not in body:
65
+ raise NotFound(f"stooq: no data for {ticker} {start}..{end}")
66
+ bars: list[PriceBar] = []
67
+ reader = csv.DictReader(io.StringIO(body))
68
+ for row in reader:
69
+ try:
70
+ bars.append(
71
+ PriceBar(
72
+ d=datetime.strptime(row["Date"], "%Y-%m-%d").date(),
73
+ open=float(row["Open"]),
74
+ high=float(row["High"]),
75
+ low=float(row["Low"]),
76
+ close=float(row["Close"]),
77
+ volume=int(float(row.get("Volume") or 0)),
78
+ )
79
+ )
80
+ except (ValueError, KeyError):
81
+ continue
82
+ return bars
wofo/prices/synthetic.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Deterministic synthetic price source for tests / offline development.
2
+
3
+ The price path is a seeded random walk. Same (ticker, start, end) always
4
+ returns the same series, which lets backtests be reproducible without
5
+ network access.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ import random
11
+ from datetime import date, timedelta
12
+
13
+ from .source import PriceBar, PriceSource
14
+
15
+
16
+ class SyntheticPriceSource:
17
+ """Random-walk prices with per-ticker seeded reproducibility."""
18
+
19
+ def __init__(self, *, drift: float = 0.0003, vol: float = 0.02, start_price: float = 100.0):
20
+ self.drift = drift
21
+ self.vol = vol
22
+ self.start_price = start_price
23
+
24
+ def _seed(self, ticker: str) -> int:
25
+ return int(hashlib.sha256(ticker.encode()).hexdigest()[:12], 16)
26
+
27
+ def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
28
+ if end < start:
29
+ raise ValueError("end < start")
30
+ rng = random.Random(self._seed(ticker))
31
+ bars: list[PriceBar] = []
32
+ price = self.start_price
33
+ d = start
34
+ while d <= end:
35
+ # Skip weekends to mimic NYSE calendar (rough — does not skip holidays).
36
+ if d.weekday() < 5:
37
+ shock = rng.gauss(self.drift, self.vol)
38
+ price = max(0.01, price * (1 + shock))
39
+ hi = price * (1 + abs(rng.gauss(0, self.vol / 4)))
40
+ lo = price * (1 - abs(rng.gauss(0, self.vol / 4)))
41
+ op = lo + (hi - lo) * rng.random()
42
+ bars.append(
43
+ PriceBar(
44
+ d=d,
45
+ open=round(op, 4),
46
+ high=round(hi, 4),
47
+ low=round(lo, 4),
48
+ close=round(price, 4),
49
+ volume=rng.randint(100_000, 10_000_000),
50
+ )
51
+ )
52
+ d += timedelta(days=1)
53
+ return bars
54
+
55
+
56
+ # Self-check that this satisfies the protocol.
57
+ _: PriceSource = SyntheticPriceSource()
wofo/research/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Research strategy modules.
2
+
3
+ Strategies turn parsed filing/data panels into dated, provenance-stamped
4
+ target-weight series that backtest and paper-trade modules consume.
5
+ """
6
+ from .follow_the_filer import follow_the_filer, TargetWeights, Snapshot
7
+ from .issuer_map import resolve_tickers, IssuerOverride
8
+
9
+ __all__ = [
10
+ "follow_the_filer",
11
+ "TargetWeights",
12
+ "Snapshot",
13
+ "resolve_tickers",
14
+ "IssuerOverride",
15
+ ]
wofo/research/follow_the_filer.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Follow-the-filer strategy: mirror a 13F manager's reported long book.
2
+
3
+ Important caveats — read before using:
4
+
5
+ 1. **Stale by construction.** 13F filings are due 45 days after quarter
6
+ end. The earliest a follower can act on a filing is the filing date,
7
+ not the period-of-report date. We use `effective_date = file_date`
8
+ for that reason. Backtests using `period_of_report` as the trade
9
+ date are look-ahead-biased.
10
+ 2. **Long-only.** 13Fs report long positions in 13F-eligible securities
11
+ only. The manager's true exposure (shorts, swaps, non-US, cash,
12
+ options on non-13F names) is invisible.
13
+ 3. **Cap-structure ambiguity.** Same issuer can appear under different
14
+ CUSIPs (common vs. converts). We aggregate to issuer level when the
15
+ caller requests it; otherwise CUSIP-level weights are reported.
16
+ 4. **Survivorship/relisting.** Tickers can change. The mapping from
17
+ 13F issuer -> ticker uses SEC's free file and is heuristic.
18
+ """
19
+ from __future__ import annotations
20
+
21
+ from dataclasses import dataclass
22
+ from datetime import date, datetime
23
+ from typing import Iterable
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class Snapshot:
28
+ """One target-weight snapshot active starting on `effective_date`."""
29
+
30
+ effective_date: date # date the follower can first act on this filing
31
+ period_of_report: date # what the manager held at this date
32
+ weights: dict[str, float] # ticker -> weight (sums ~= 1 over mapped names)
33
+ unmapped_value_share: float # share of the manager's reported value
34
+ # whose CUSIPs we couldn't map to a ticker
35
+ provenance: dict # filing refs, source, run timestamp
36
+
37
+ def __post_init__(self):
38
+ total = sum(self.weights.values())
39
+ if self.weights and not (0.99 <= total + self.unmapped_value_share <= 1.01):
40
+ raise ValueError(
41
+ f"weights+unmapped should sum to 1; got {total + self.unmapped_value_share}"
42
+ )
43
+
44
+
45
+ @dataclass(frozen=True)
46
+ class TargetWeights:
47
+ """Time series of target-weight snapshots for one strategy run."""
48
+
49
+ manager_cik: str
50
+ manager_name: str
51
+ snapshots: list[Snapshot]
52
+
53
+ def as_dict(self) -> dict:
54
+ return {
55
+ "manager_cik": self.manager_cik,
56
+ "manager_name": self.manager_name,
57
+ "snapshots": [
58
+ {
59
+ "effective_date": s.effective_date.isoformat(),
60
+ "period_of_report": s.period_of_report.isoformat(),
61
+ "weights": s.weights,
62
+ "unmapped_value_share": s.unmapped_value_share,
63
+ "provenance": s.provenance,
64
+ }
65
+ for s in self.snapshots
66
+ ],
67
+ }
68
+
69
+
70
+ def follow_the_filer(
71
+ panel: dict,
72
+ filing_refs: dict, # period_iso -> {cik, accession, file_date, form}
73
+ cusip_to_ticker: dict[str, str | None],
74
+ *,
75
+ manager_cik: str,
76
+ manager_name: str,
77
+ top_n: int | None = None,
78
+ run_id: str | None = None,
79
+ ) -> TargetWeights:
80
+ """Build target-weight snapshots from a 13F panel.
81
+
82
+ Args:
83
+ panel: output of `wofo.thirteenf.analyze.build_panel`.
84
+ filing_refs: per-period filing metadata. Must include `file_date`
85
+ (YYYY-MM-DD) so we can set `effective_date` correctly.
86
+ cusip_to_ticker: mapping from `wofo.research.resolve_tickers`.
87
+ top_n: if set, keep only the largest N positions per snapshot.
88
+ run_id: optional tag for provenance (e.g. a git SHA).
89
+
90
+ Weights are normalized over MAPPED positions only; the share of
91
+ reported value that came from unmapped CUSIPs is reported as
92
+ `unmapped_value_share` so callers can decide how to handle it
93
+ (skip the snapshot, hold cash, etc.).
94
+ """
95
+ snapshots: list[Snapshot] = []
96
+ run_ts = datetime.utcnow().isoformat(timespec="seconds") + "Z"
97
+ for period in panel["periods"]:
98
+ ref = filing_refs.get(period)
99
+ if not ref or "file_date" not in ref:
100
+ raise ValueError(f"missing file_date for period {period}")
101
+ effective = _parse_iso(ref["file_date"])
102
+ report = _parse_iso(period)
103
+
104
+ period_rows = [
105
+ (cusip, r["value_usd"]) for (p, cusip), r in panel["rows"].items() if p == period
106
+ ]
107
+ period_rows.sort(key=lambda x: x[1], reverse=True)
108
+ if top_n:
109
+ period_rows = period_rows[:top_n]
110
+
111
+ total = sum(v for _, v in period_rows) or 1
112
+ mapped_value = 0
113
+ weights: dict[str, float] = {}
114
+ for cusip, val in period_rows:
115
+ ticker = cusip_to_ticker.get(cusip)
116
+ if not ticker:
117
+ continue
118
+ mapped_value += val
119
+ weights[ticker] = weights.get(ticker, 0.0) + val / total
120
+
121
+ unmapped_share = (total - mapped_value) / total if total else 0.0
122
+ snapshots.append(
123
+ Snapshot(
124
+ effective_date=effective,
125
+ period_of_report=report,
126
+ weights=weights,
127
+ unmapped_value_share=unmapped_share,
128
+ provenance={
129
+ "manager_cik": manager_cik,
130
+ "filing_ref": ref,
131
+ "run_id": run_id,
132
+ "run_ts_utc": run_ts,
133
+ },
134
+ )
135
+ )
136
+
137
+ snapshots.sort(key=lambda s: s.effective_date)
138
+ return TargetWeights(manager_cik=manager_cik, manager_name=manager_name, snapshots=snapshots)
139
+
140
+
141
+ def load_filing_refs(raw_dir) -> dict:
142
+ """Read filing_ref.json files written by `wofo.thirteenf.fetch.fetch_filing`.
143
+
144
+ Falls back to parsing primary_doc.xml if filing_ref.json is missing
145
+ (e.g., for filings downloaded by hand before fetch_filing existed).
146
+ """
147
+ import json
148
+ from pathlib import Path
149
+ from wofo.thirteenf.parse import parse_primary_doc
150
+
151
+ raw_dir = Path(raw_dir)
152
+ refs: dict = {}
153
+ for q in sorted(raw_dir.iterdir()):
154
+ if not q.is_dir():
155
+ continue
156
+ ref_path = q / "filing_ref.json"
157
+ if ref_path.exists():
158
+ d = json.loads(ref_path.read_text())
159
+ period_iso = _quarter_to_iso(q.name)
160
+ refs[period_iso] = d
161
+ else:
162
+ # Synthesize from primary_doc.xml. file_date is unknown here, so
163
+ # we approximate it as period + 45 days (the regulatory deadline).
164
+ # Callers should re-pull via fetch_filing for accurate file_date.
165
+ meta = parse_primary_doc(q / "primary_doc.xml")
166
+ period = _parse_iso(meta.period_iso)
167
+ from datetime import timedelta
168
+ approx_file = period + timedelta(days=45)
169
+ refs[meta.period_iso] = {
170
+ "cik": meta.cik,
171
+ "accession": None,
172
+ "period_ending": meta.period_iso,
173
+ "file_date": approx_file.isoformat(),
174
+ "form": meta.report_type,
175
+ "approximate_file_date": True,
176
+ }
177
+ return refs
178
+
179
+
180
+ def _parse_iso(s: str) -> date:
181
+ return datetime.strptime(s, "%Y-%m-%d").date()
182
+
183
+
184
+ def _quarter_to_iso(label: str) -> str:
185
+ # "2024Q4" -> "2024-12-31"
186
+ y = label[:4]
187
+ q = label[-1]
188
+ end = {"1": "03-31", "2": "06-30", "3": "09-30", "4": "12-31"}[q]
189
+ return f"{y}-{end}"
190
+
191
+
192
+ def _iter_holdings(holdings: Iterable) -> Iterable: # pragma: no cover - placeholder
193
+ return holdings
wofo/research/issuer_map.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Best-effort 13F issuer-name -> ticker mapping.
2
+
3
+ CUSIP -> ticker is a licensed mapping (CGS) and we will not embed one.
4
+ What we can do, for free:
5
+
6
+ 1. Pull SEC's `company_tickers.json` (CIK <-> ticker, public).
7
+ 2. Normalize 13F `nameOfIssuer` strings and fuzzy-match against company
8
+ names from that file.
9
+ 3. Allow callers to provide an `IssuerOverride` map for cases where the
10
+ heuristic is wrong or the issuer is a non-CIK security (ADR, ETF,
11
+ foreign listing).
12
+
13
+ This is intentionally conservative: when in doubt, the function returns
14
+ `None` for that issuer rather than guessing. The backtester treats
15
+ `None` tickers as "skip".
16
+ """
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import os
21
+ import re
22
+ from dataclasses import dataclass, field
23
+ from pathlib import Path
24
+ from urllib.request import Request, urlopen
25
+
26
+ SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"
27
+
28
+
29
+ @dataclass
30
+ class IssuerOverride:
31
+ """Manual issuer-name (or CUSIP) -> ticker overrides.
32
+
33
+ Keys are matched case-insensitively. CUSIP keys take precedence over
34
+ issuer-name keys when both could match.
35
+ """
36
+
37
+ by_cusip: dict[str, str] = field(default_factory=dict)
38
+ by_issuer: dict[str, str] = field(default_factory=dict)
39
+
40
+
41
+ _TRIM_TOKENS = re.compile(
42
+ r"\b(CORP|CORPORATION|INC|INCORPORATED|LTD|LIMITED|LLC|PLC|HLDGS?|HOLDINGS?|"
43
+ r"GROUP|CO|COMPANY|CL|CLASS|COM|COMMON|NEW|THE|TR|TRUST|REIT)\b",
44
+ re.IGNORECASE,
45
+ )
46
+ _PUNCT = re.compile(r"[^\w\s]")
47
+
48
+
49
+ def _norm(s: str) -> str:
50
+ s = _PUNCT.sub(" ", s)
51
+ s = _TRIM_TOKENS.sub(" ", s)
52
+ return " ".join(s.lower().split())
53
+
54
+
55
+ def _load_sec_tickers(cache_path: Path | None = None) -> dict:
56
+ if cache_path and cache_path.exists():
57
+ return json.loads(cache_path.read_text())
58
+ ua = os.environ.get("WOFO_SEC_UA")
59
+ if not ua:
60
+ raise RuntimeError(
61
+ "WOFO_SEC_UA env var required to fetch SEC company_tickers.json"
62
+ )
63
+ req = Request(SEC_TICKERS_URL, headers={"User-Agent": ua})
64
+ with urlopen(req, timeout=30) as resp:
65
+ body = resp.read()
66
+ data = json.loads(body)
67
+ if cache_path:
68
+ cache_path.parent.mkdir(parents=True, exist_ok=True)
69
+ cache_path.write_bytes(body)
70
+ return data
71
+
72
+
73
+ def resolve_tickers(
74
+ issuers: dict[str, str], # cusip -> issuer name
75
+ *,
76
+ overrides: IssuerOverride | None = None,
77
+ sec_tickers_cache: Path | None = None,
78
+ ) -> tuple[dict[str, str | None], dict[str, str]]:
79
+ """Resolve {cusip: issuer_name} -> ({cusip: ticker_or_None}, {cusip: source}).
80
+
81
+ `source` is one of: 'override-cusip', 'override-issuer', 'sec-exact',
82
+ 'sec-prefix', 'unmapped'.
83
+ """
84
+ overrides = overrides or IssuerOverride()
85
+ over_cusip = {k.upper(): v for k, v in overrides.by_cusip.items()}
86
+ over_issuer = {_norm(k): v for k, v in overrides.by_issuer.items()}
87
+
88
+ sec = _load_sec_tickers(sec_tickers_cache) if (issuers and not all(
89
+ c.upper() in over_cusip or _norm(n) in over_issuer for c, n in issuers.items()
90
+ )) else {}
91
+
92
+ sec_index: dict[str, str] = {}
93
+ for entry in sec.values() if isinstance(sec, dict) else []:
94
+ title = entry.get("title", "")
95
+ ticker = entry.get("ticker", "")
96
+ if title and ticker:
97
+ sec_index[_norm(title)] = ticker.upper()
98
+
99
+ out: dict[str, str | None] = {}
100
+ src: dict[str, str] = {}
101
+ for cusip, name in issuers.items():
102
+ if cusip.upper() in over_cusip:
103
+ out[cusip] = over_cusip[cusip.upper()].upper()
104
+ src[cusip] = "override-cusip"
105
+ continue
106
+ n = _norm(name)
107
+ if n in over_issuer:
108
+ out[cusip] = over_issuer[n].upper()
109
+ src[cusip] = "override-issuer"
110
+ continue
111
+ if n in sec_index:
112
+ out[cusip] = sec_index[n]
113
+ src[cusip] = "sec-exact"
114
+ continue
115
+ # Prefix match: issuer name starts with a known company name (or vice
116
+ # versa). Conservative: require >=2 tokens to avoid false positives.
117
+ candidates = [
118
+ (k, t) for k, t in sec_index.items()
119
+ if (k.startswith(n) or n.startswith(k)) and len(k.split()) >= 2 and len(n.split()) >= 2
120
+ ]
121
+ if len(candidates) == 1:
122
+ out[cusip] = candidates[0][1]
123
+ src[cusip] = "sec-prefix"
124
+ continue
125
+ out[cusip] = None
126
+ src[cusip] = "unmapped"
127
+ return out, src