Spaces:

zuup1
/

zuup-preference-collection

Running

Claude commited on Apr 27

Commit

7ea334b

unverified ·

1 Parent(s): 4825c21

feat(wofo): add prices, research, backtest, and Phase-1 agent layers

End-to-end plumbing for the wofo Phase-1 (research-only) loop.

- wofo/prices/: PriceSource protocol with synthetic (deterministic, for
tests) and Stooq (free daily CSV) adapters. Production feeds plug in
behind the same protocol.
- wofo/research/: follow-the-filer strategy turns a 13F panel into a
dated TargetWeights series with provenance. Effective-date is the
filing date (not period-of-report) to avoid look-ahead bias.
Issuer-name -> ticker mapping is heuristic with manual overrides
since CUSIP -> ticker is licensed (CGS).
- wofo/backtest/: minimal portfolio backtester (target weights x prices
-> daily NAV) with CAGR / Sharpe / max-drawdown metrics. No numpy
dependency. Unknown tickers are skipped to cash.
- wofo/agent/: Phase-1 agent skeleton. Defines read-only research tools
(list_local_filings, summarize_panel, top_holdings, qoq_activity)
and a Claude tool-use runner. Explicitly NO order-entry tools — that
requires Phase-2 promotion + counsel sign-off.
- tests/: 18 tests pass against committed sample data, no network.
- pytest.ini + confcutdir scope test discovery away from the legacy
Zuup app at the repo root.
- wofo/agent/demo_e2e.py: end-to-end plumbing demo (panel -> strategy
-> synthetic backtest), runs offline.

https://claude.ai/code/session_01C97VcztNaYLWwesHfWn6iE

Files changed (20) hide show

pytest.ini +4 -0
tests/test_agent_tools.py +41 -0
tests/test_backtest.py +88 -0
tests/test_research.py +58 -0
tests/test_thirteenf.py +56 -0
wofo/README.md +62 -0
wofo/agent/__init__.py +16 -0
wofo/agent/demo_e2e.py +105 -0
wofo/agent/runner.py +98 -0
wofo/agent/tools.py +156 -0
wofo/backtest/__init__.py +12 -0
wofo/backtest/metrics.py +67 -0
wofo/backtest/portfolio.py +166 -0
wofo/prices/__init__.py +16 -0
wofo/prices/source.py +33 -0
wofo/prices/stooq.py +82 -0
wofo/prices/synthetic.py +57 -0
wofo/research/__init__.py +15 -0
wofo/research/follow_the_filer.py +193 -0
wofo/research/issuer_map.py +127 -0

pytest.ini ADDED Viewed

	@@ -0,0 +1,4 @@

+[pytest]
+testpaths = tests
+addopts = --import-mode=importlib --confcutdir=tests --rootdir=tests
+pythonpath = .

tests/test_agent_tools.py ADDED Viewed

	@@ -0,0 +1,41 @@

+"""Test the research tools without invoking the model."""
+from wofo.agent import dispatch_tool, TOOLS
+def test_tools_registry_shape():
+    names = {t["name"] for t in TOOLS}
+    assert names == {"list_local_filings", "summarize_panel", "top_holdings", "qoq_activity"}
+    for t in TOOLS:
+        assert "input_schema" in t and t["input_schema"]["type"] == "object"
+def test_list_local_filings_returns_periods():
+    r = dispatch_tool("list_local_filings", {})
+    assert r.ok and "periods" in r.content
+    assert len(r.content["periods"]) >= 5
+def test_summarize_panel():
+    r = dispatch_tool("summarize_panel", {})
+    assert r.ok
+    assert r.content["manager"] == "Situational Awareness LP"
+    assert r.content["cik"] == "0002045724"
+    assert len(r.content["periods"]) >= 5
+def test_top_holdings_known_period():
+    r = dispatch_tool("top_holdings", {"period": "2025-12-31", "n": 3})
+    assert r.ok
+    issuers = [h["issuer"] for h in r.content["holdings"]]
+    assert "COREWEAVE INC" in issuers
+    assert len(r.content["holdings"]) == 3
+def test_unknown_period_errors_cleanly():
+    r = dispatch_tool("top_holdings", {"period": "1999-12-31"})
+    assert not r.ok and "unknown period" in r.error
+def test_unknown_tool_errors_cleanly():
+    r = dispatch_tool("place_trade", {})
+    assert not r.ok and "unknown tool" in r.error

tests/test_backtest.py ADDED Viewed

	@@ -0,0 +1,88 @@

+"""Backtester tests using the synthetic price source."""
+from datetime import date
+from wofo.backtest import run_backtest, summary, max_drawdown, sharpe
+from wofo.prices import SyntheticPriceSource
+from wofo.research.follow_the_filer import TargetWeights, Snapshot
+def _two_snapshot_strategy() -> TargetWeights:
+    return TargetWeights(
+        manager_cik="0000000000",
+        manager_name="Test Manager",
+        snapshots=[
+            Snapshot(
+                effective_date=date(2024, 1, 15),
+                period_of_report=date(2023, 12, 31),
+                weights={"AAA": 0.6, "BBB": 0.4},
+                unmapped_value_share=0.0,
+                provenance={"test": "snap1"},
+            ),
+            Snapshot(
+                effective_date=date(2024, 7, 15),
+                period_of_report=date(2024, 6, 30),
+                weights={"AAA": 0.3, "BBB": 0.3, "CCC": 0.4},
+                unmapped_value_share=0.0,
+                provenance={"test": "snap2"},
+            ),
+        ],
+    )
+def test_backtest_runs_and_produces_nav():
+    src = SyntheticPriceSource(drift=0.0, vol=0.01)
+    res = run_backtest(_two_snapshot_strategy(), src, start_cash=1_000_000.0, end_date=date(2024, 12, 31))
+    assert len(res.nav) == len(res.dates) > 100
+    # NAV should not be NaN/inf and should be in a reasonable range.
+    assert all(v > 0 and v == v for v in res.nav)
+    # Two snapshots -> at least two rebalances.
+    assert len(res.rebalance_dates) >= 2
+def test_backtest_metrics_make_sense():
+    src = SyntheticPriceSource(drift=0.0, vol=0.005)
+    res = run_backtest(_two_snapshot_strategy(), src, end_date=date(2024, 12, 31))
+    s = summary(res.dates, res.nav)
+    assert s["n_days"] == len(res.dates)
+    # Metrics should be finite numbers.
+    assert isinstance(s["sharpe"], float)
+    assert 0.0 <= s["max_drawdown"] <= 1.0
+def test_unknown_ticker_skipped_not_crashed(monkeypatch):
+    src = SyntheticPriceSource()
+    # Inject a target with a ticker the synthetic source still happily prices —
+    # the synthetic source returns data for any string. To force a NotFound,
+    # use a wrapper.
+    from wofo.prices import NotFound
+    class FlakySource:
+        def __init__(self, inner):
+            self.inner = inner
+        def daily(self, ticker, start, end):
+            if ticker == "MISSING":
+                raise NotFound(ticker)
+            return self.inner.daily(ticker, start, end)
+    tw = TargetWeights(
+        manager_cik="0", manager_name="t",
+        snapshots=[Snapshot(
+            effective_date=date(2024, 1, 15),
+            period_of_report=date(2023, 12, 31),
+            weights={"AAA": 0.5, "MISSING": 0.5},
+            unmapped_value_share=0.0,
+            provenance={},
+        )],
+    )
+    res = run_backtest(tw, FlakySource(src), end_date=date(2024, 6, 30))
+    # The portfolio should still run; cash share should be ~50% because half the target was unbuyable.
+    assert max(res.cash_share[10:]) >= 0.4
+def test_max_drawdown_known_series():
+    nav = [100, 110, 105, 90, 95, 120]
+    # peak 110 -> trough 90 -> mdd = (110-90)/110 = 0.1818...
+    assert abs(max_drawdown(nav) - (110 - 90) / 110) < 1e-9
+def test_sharpe_zero_for_flat_series():
+    assert sharpe([100.0] * 200) == 0.0

tests/test_research.py ADDED Viewed

	@@ -0,0 +1,58 @@

+"""Tests for the follow-the-filer strategy + issuer mapping."""
+from datetime import date
+from pathlib import Path
+from wofo.research import follow_the_filer, IssuerOverride
+from wofo.research.follow_the_filer import load_filing_refs
+from wofo.research.issuer_map import _norm
+from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
+RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
+def test_norm_strips_corporate_suffixes():
+    assert _norm("Constellation Energy Corp") == _norm("Constellation Energy Corporation")
+    assert _norm("BLOOM ENERGY CORP") == "bloom energy"
+    assert _norm("Lumentum Hldgs Inc") == "lumentum"
+def test_follow_the_filer_with_overrides():
+    # Manual map covers what we need without hitting the network.
+    overrides = IssuerOverride(by_issuer={
+        "CONSTELLATION ENERGY CORP": "CEG",
+        "MARVELL TECHNOLOGY INC": "MRVL",
+        "MODINE MFG CO": "MOD",
+        "ANTERIX INC": "ATEX",
+        "CIPHER MINING INC": "CIFR",
+        "VISTRA CORP": "VST",
+    })
+    pairs = []
+    for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    refs = load_filing_refs(RAW)
+    # Build cusip -> name from panel issuers, then resolve via overrides only
+    # (skip SEC fetch by pre-populating the mapping ourselves).
+    cusip_to_ticker: dict[str, str | None] = {}
+    for cusip, name in panel["issuers"].items():
+        cusip_to_ticker[cusip] = overrides.by_issuer.get(name)
+    tw = follow_the_filer(
+        panel,
+        filing_refs=refs,
+        cusip_to_ticker=cusip_to_ticker,
+        manager_cik="0002045724",
+        manager_name="Situational Awareness LP",
+    )
+    assert tw.manager_cik == "0002045724"
+    assert len(tw.snapshots) == 5
+    # First snapshot: most positions will be unmapped because we only added 6 overrides.
+    s0 = tw.snapshots[0]
+    # Mapped weights + unmapped share == 1.
+    assert abs(sum(s0.weights.values()) + s0.unmapped_value_share - 1.0) < 1e-6
+    # Effective date should be on or after period-of-report.
+    for s in tw.snapshots:
+        assert s.effective_date >= s.period_of_report

tests/test_thirteenf.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""Tests for the 13F pipeline using committed sample data."""
+from pathlib import Path
+from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
+RAW = Path(__file__).resolve().parents[1] / "wofo" / "data" / "13f" / "raw"
+def test_parse_2024q4_meta():
+    m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
+    assert m.cik == "0002045724"
+    assert m.manager_name == "Situational Awareness LP"
+    assert m.period_iso == "2024-12-31"
+    assert m.crd_number == "000333011"
+    assert m.sec_file_number == "801-132039"
+    assert m.is_amendment is False
+def test_parse_2024q4_holdings_match_summary():
+    m = parse_primary_doc(RAW / "2024Q4" / "primary_doc.xml")
+    h = parse_infotable(RAW / "2024Q4" / "infotable.xml")
+    assert len(h) == m.table_entry_total
+    assert sum(x.value_usd for x in h) == m.table_value_total
+def test_panel_periods_sorted_and_unique():
+    pairs = []
+    for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    assert panel["periods"] == sorted(set(panel["periods"]))
+    assert len(panel["periods"]) == 5
+def test_qoq_initial_period():
+    pairs = []
+    for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    deltas = qoq_changes(panel)
+    initials = [d for d in deltas if d["period"] == panel["periods"][0]]
+    assert all(d["action"] == "INITIAL" for d in initials)
+    # Every position in the first quarter should be classified.
+    assert {d["cusip"] for d in initials} == {c for (p, c) in panel["rows"] if p == panel["periods"][0]}
+def test_concentration_monotonic_aum():
+    pairs = []
+    for q in sorted(p for p in RAW.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    conc = concentration(panel)
+    aums = [conc[p]["total_value_usd"] for p in panel["periods"]]
+    # SA LP grew every quarter in the sample window; if this changes when re-pulled,
+    # this test should be updated, not silenced.
+    assert all(b > a for a, b in zip(aums, aums[1:])), aums

wofo/README.md CHANGED Viewed

@@ -14,6 +14,20 @@ wofo/
 │   ├── parse.py      # XML → dataclasses
 │   ├── analyze.py    # panel + qoq + concentration
 │   └── cli.py        # `python -m wofo.thirteenf.cli {pull,analyze}`
 └── data/
     └── 13f/
         ├── raw/      # one dir per quarter, primary_doc + infotable
@@ -51,6 +65,54 @@ python -m wofo.thirteenf.cli analyze
 The analyze step prints a per-quarter summary and writes JSON +
 `REPORT.md` to `wofo/data/13f/processed/`.
 ## What `wofo` will not do
 - File legal or tax documents on your behalf.

 │   ├── parse.py      # XML → dataclasses
 │   ├── analyze.py    # panel + qoq + concentration
 │   └── cli.py        # `python -m wofo.thirteenf.cli {pull,analyze}`
+├── prices/           # Pluggable price data sources
+│   ├── source.py     # PriceSource protocol
+│   ├── synthetic.py  # Deterministic random walk (tests / offline)
+│   └── stooq.py      # Free Stooq daily-CSV adapter (prototyping)
+├── research/         # Strategy generators
+│   ├── follow_the_filer.py   # 13F panel → dated target weights
+│   └── issuer_map.py         # Issuer name → ticker (heuristic + overrides)
+├── backtest/         # Minimal portfolio backtester
+│   ├── portfolio.py  # Target weights × prices → daily NAV
+│   └── metrics.py    # CAGR / Sharpe / max drawdown
+├── agent/            # Phase-1 (research-only) agent
+│   ├── tools.py      # Read-only tools the agent may call
+│   ├── runner.py     # Claude tool-use loop
+│   └── demo_e2e.py   # Plumbing demo (no API key required)
 └── data/
     └── 13f/
         ├── raw/      # one dir per quarter, primary_doc + infotable
 The analyze step prints a per-quarter summary and writes JSON +
 `REPORT.md` to `wofo/data/13f/processed/`.
+## Quick start: end-to-end strategy → backtest demo
+The demo wires panel → follow-the-filer → synthetic backtest. Synthetic
+prices are NOT real returns — this is a plumbing check.
+```bash
+python -m wofo.agent.demo_e2e
+```
+To run with real prices, swap `SyntheticPriceSource` for a real adapter:
+```python
+from wofo.prices.stooq import StooqPriceSource
+src = StooqPriceSource()
+```
+Or implement your own adapter against the `wofo.prices.PriceSource`
+protocol — Polygon, Tiingo, IBKR historical, etc.
+## Quick start: agent loop (Phase 1, research only)
+```bash
+pip install anthropic
+export ANTHROPIC_API_KEY=sk-ant-...
+python - <<'PY'
+from wofo.agent import run_research_loop
+out = run_research_loop(
+    "Summarize Situational Awareness LP's Q4 2025 portfolio "
+    "and the largest position changes from Q3 to Q4."
+)
+print(out["final_text"])
+PY
+```
+The agent has access only to read-only research tools
+(`list_local_filings`, `summarize_panel`, `top_holdings`,
+`qoq_activity`). It cannot place orders, transfer funds, or modify any
+account. See `wofo/agent/tools.py` for the tool schemas.
+## Tests
+```bash
+python -m pytest
+```
+All tests run against committed sample data (no network required).
 ## What `wofo` will not do
 - File legal or tax documents on your behalf.

wofo/agent/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""wofo agent — Phase 1 (research only).
+This module defines the *tools* the wofo agent is allowed to call and
+the orchestration loop that drives it. By construction, Phase 1
+exposes only **read-only research tools** — no execution, no order
+entry, no broker connectivity. Phase 2 / 3 will live in separate
+modules and require a deliberate code change (and counsel sign-off)
+to enable.
+The agent uses the Anthropic Python SDK if available; if it is not
+installed, the tools can still be invoked directly from Python.
+"""
+from .tools import TOOLS, dispatch_tool, ToolResult
+from .runner import run_research_loop
+__all__ = ["TOOLS", "dispatch_tool", "ToolResult", "run_research_loop"]

wofo/agent/demo_e2e.py ADDED Viewed

	@@ -0,0 +1,105 @@

+"""End-to-end demo: 13F panel -> follow-the-filer strategy -> synthetic backtest.
+Runs entirely offline using `SyntheticPriceSource`, so the resulting
+NAV is **not** a real return — it is a sanity check that the plumbing
+works. Replace the price source with a real one (Stooq, Polygon, etc.)
+to get meaningful numbers.
+    python -m wofo.agent.demo_e2e
+"""
+from __future__ import annotations
+from datetime import date
+from pathlib import Path
+from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel
+from wofo.research import IssuerOverride, follow_the_filer
+from wofo.research.follow_the_filer import load_filing_refs
+from wofo.backtest import run_backtest, summary
+from wofo.prices import SyntheticPriceSource
+# Hand-curated overrides covering most names that appear in SA LP filings.
+# Update as needed; this is a documentation artifact, not a complete map.
+DEFAULT_OVERRIDES = IssuerOverride(by_issuer={
+    "COREWEAVE INC": "CRWV",
+    "BLOOM ENERGY CORP": "BE",
+    "INTEL CORP": "INTC",
+    "LUMENTUM HLDGS INC": "LITE",
+    "CORE SCIENTIFIC INC NEW": "CORZ",
+    "IREN LIMITED": "IREN",
+    "APPLIED DIGITAL CORP": "APLD",
+    "SANDISK CORP": "SNDK",
+    "EQT CORP": "EQT",
+    "CIPHER MINING INC": "CIFR",
+    "COHERENT CORP": "COHR",
+    "CONSTELLATION ENERGY CORP": "CEG",
+    "MARVELL TECHNOLOGY INC": "MRVL",
+    "MODINE MFG CO": "MOD",
+    "ANTERIX INC": "ATEX",
+    "VISTRA CORP": "VST",
+    "NVIDIA CORPORATION": "NVDA",
+    "BROADCOM INC": "AVGO",
+    "TAIWAN SEMICONDUCTOR MFG LTD": "TSM",
+    "MICRON TECHNOLOGY INC": "MU",
+    "WESTERN DIGITAL CORP": "WDC",
+    "SEAGATE TECHNOLOGY HLDNGS PL": "STX",
+    "GALAXY DIGITAL INC.": "GLXY",
+    "VANECK ETF TRUST": None,    # ETF; mapping is ambiguous without ticker
+    "CLEANSPARK INC": "CLSK",
+    "BITFARMS LTD": "BITF",
+    "LIBERTY ENERGY INC": "LBRT",
+    "INFOSYS LTD": "INFY",
+    "PROPETRO HLDG CORP": "PUMP",
+    "BABCOCK & WILCOX ENTERPRISES": "BW",
+    "POWER SOLUTIONS INTL INC": "PSIX",
+    "WHITEFIBER INC": "WYFI",
+    "KILROY RLTY CORP": "KRC",
+})
+def main() -> None:
+    raw = Path(__file__).resolve().parents[2] / "wofo" / "data" / "13f" / "raw"
+    pairs = []
+    for q in sorted(p for p in raw.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    refs = load_filing_refs(raw)
+    cusip_to_ticker: dict[str, str | None] = {}
+    for cusip, name in panel["issuers"].items():
+        cusip_to_ticker[cusip] = DEFAULT_OVERRIDES.by_issuer.get(name)
+    tw = follow_the_filer(
+        panel,
+        filing_refs=refs,
+        cusip_to_ticker=cusip_to_ticker,
+        manager_cik="0002045724",
+        manager_name="Situational Awareness LP",
+        run_id="demo_e2e",
+    )
+    print(f"Manager: {tw.manager_name}")
+    for s in tw.snapshots:
+        mapped_count = len(s.weights)
+        print(
+            f"  effective {s.effective_date}  report {s.period_of_report}  "
+            f"mapped={mapped_count} weight_total={sum(s.weights.values()):.1%} "
+            f"unmapped={s.unmapped_value_share:.1%}"
+        )
+    # Synthetic backtest is a plumbing check, not a real return.
+    src = SyntheticPriceSource(drift=0.0004, vol=0.02)
+    res = run_backtest(tw, src, start_cash=1_000_000.0, end_date=date(2026, 4, 30))
+    s = summary(res.dates, res.nav)
+    print()
+    print("Synthetic backtest summary (NOT real returns):")
+    for k, v in s.items():
+        if isinstance(v, float):
+            print(f"  {k:<14} {v:>12,.4f}")
+        else:
+            print(f"  {k:<14} {v}")
+if __name__ == "__main__":
+    main()

wofo/agent/runner.py ADDED Viewed

	@@ -0,0 +1,98 @@

+"""Research-loop runner.
+Drives a Claude model through a tool-use loop using only the read-only
+research tools defined in `wofo.agent.tools`. Requires the `anthropic`
+SDK at runtime; if it is not installed the module still imports so the
+tools can be used without a model.
+"""
+from __future__ import annotations
+import os
+from typing import Any
+from .tools import TOOLS, dispatch_tool
+SYSTEM_PROMPT = """You are wofo, the Wooden Family Office research agent.
+You operate in Phase 1 — research only. You have NO ability to place
+orders, transfer funds, or modify any account. You may only call the
+read-only research tools provided.
+Your job is to produce well-sourced, dated research notes:
+- Cite filings by accession number when relevant.
+- Be explicit about staleness (13F is delayed by up to 45 days).
+- When you don't know, say so. Never fabricate tickers or numbers.
+- Distinguish "the manager held X" (fact) from "X is a good buy"
+  (opinion that needs justification).
+When you finish, return a research note in markdown.
+""".strip()
+def run_research_loop(
+    user_prompt: str,
+    *,
+    model: str = "claude-opus-4-7",
+    max_iterations: int = 8,
+    max_tokens: int = 4096,
+) -> dict:
+    """Run a single research-task loop and return the final transcript.
+    The function returns a dict with `final_text`, `messages` (full
+    transcript), and `tool_calls` (audit log). It does not stream.
+    """
+    try:
+        import anthropic  # type: ignore
+    except ImportError as e:
+        raise RuntimeError(
+            "anthropic SDK is required to run the agent loop; "
+            "`pip install anthropic` and set ANTHROPIC_API_KEY."
+        ) from e
+    if not os.environ.get("ANTHROPIC_API_KEY"):
+        raise RuntimeError("ANTHROPIC_API_KEY is not set.")
+    client = anthropic.Anthropic()
+    messages: list[dict[str, Any]] = [{"role": "user", "content": user_prompt}]
+    tool_calls: list[dict[str, Any]] = []
+    for _ in range(max_iterations):
+        resp = client.messages.create(
+            model=model,
+            max_tokens=max_tokens,
+            system=SYSTEM_PROMPT,
+            tools=TOOLS,
+            messages=messages,
+        )
+        # Append assistant turn.
+        messages.append({"role": "assistant", "content": resp.content})
+        if resp.stop_reason != "tool_use":
+            text = "".join(b.text for b in resp.content if getattr(b, "type", None) == "text")
+            return {"final_text": text, "messages": messages, "tool_calls": tool_calls}
+        # Run every tool call in the assistant turn.
+        tool_results: list[dict[str, Any]] = []
+        for block in resp.content:
+            if getattr(block, "type", None) != "tool_use":
+                continue
+            tr = dispatch_tool(block.name, block.input)
+            tool_calls.append(
+                {"name": block.name, "input": block.input, "ok": tr.ok, "error": tr.error}
+            )
+            tool_results.append(
+                {
+                    "type": "tool_result",
+                    "tool_use_id": block.id,
+                    "content": tr.to_message()["content"],
+                    "is_error": tr.to_message()["is_error"],
+                }
+            )
+        messages.append({"role": "user", "content": tool_results})
+    return {
+        "final_text": "(max iterations reached)",
+        "messages": messages,
+        "tool_calls": tool_calls,
+    }

wofo/agent/tools.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""Tools the wofo Phase-1 agent can call.
+Each tool is a pure-Python function with a JSON-Schema-style signature.
+The signatures double as the Anthropic tool-use definitions when the
+agent is wired to Claude.
+**No tool in this module places orders, transfers funds, or modifies
+any account.** Adding such a tool requires:
+1. Counsel sign-off (see docs/family-office-counsel-packet.md).
+2. Phase-2 promotion in docs/wofo-architecture.md.
+3. Hard guardrails (per-trade caps, daily caps, kill switch).
+"""
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Callable
+from wofo.thirteenf import parse_infotable, parse_primary_doc, build_panel, qoq_changes, concentration
+REPO_ROOT = Path(__file__).resolve().parents[2]
+RAW_DIR = REPO_ROOT / "wofo" / "data" / "13f" / "raw"
+@dataclass
+class ToolResult:
+    name: str
+    ok: bool
+    content: Any
+    error: str | None = None
+    def to_message(self) -> dict:
+        return {
+            "type": "tool_result",
+            "content": json.dumps(self.content) if self.ok else self.error or "error",
+            "is_error": not self.ok,
+        }
+# --- Tool implementations ---------------------------------------------------
+def t_list_local_filings(manager_dir: str = "") -> dict:
+    """List 13F filing periods we have on disk."""
+    base = RAW_DIR if not manager_dir else (REPO_ROOT / manager_dir)
+    if not base.exists():
+        return {"periods": [], "base": str(base)}
+    periods = sorted(p.name for p in base.iterdir() if p.is_dir())
+    return {"periods": periods, "base": str(base)}
+def t_summarize_panel() -> dict:
+    """Build a panel from local 13F filings and return a summary."""
+    pairs = []
+    for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
+        meta = parse_primary_doc(q / "primary_doc.xml")
+        rows = parse_infotable(q / "infotable.xml")
+        pairs.append((meta, rows))
+    panel = build_panel(pairs)
+    conc = concentration(panel)
+    return {
+        "manager": pairs[-1][0].manager_name if pairs else None,
+        "cik": pairs[-1][0].cik if pairs else None,
+        "periods": panel["periods"],
+        "concentration": conc,
+        "totals": panel["totals"],
+    }
+def t_top_holdings(period: str, n: int = 10) -> dict:
+    """Top N holdings for a given period."""
+    pairs = []
+    for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    if period not in panel["periods"]:
+        raise ValueError(f"unknown period {period}; have {panel['periods']}")
+    rows = [
+        {"cusip": c, "issuer": panel["issuers"].get(c, ""), "value_usd": r["value_usd"], "shares": r["shares"]}
+        for (p, c), r in panel["rows"].items() if p == period
+    ]
+    rows.sort(key=lambda r: r["value_usd"], reverse=True)
+    return {"period": period, "holdings": rows[:n]}
+def t_qoq_activity(period: str) -> dict:
+    """Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period."""
+    pairs = []
+    for q in sorted(p for p in RAW_DIR.iterdir() if p.is_dir()):
+        pairs.append((parse_primary_doc(q / "primary_doc.xml"), parse_infotable(q / "infotable.xml")))
+    panel = build_panel(pairs)
+    deltas = qoq_changes(panel)
+    return {"period": period, "rows": [d for d in deltas if d["period"] == period]}
+# --- Registry ---------------------------------------------------------------
+def _schema(name: str, description: str, props: dict, required: list[str]) -> dict:
+    return {
+        "name": name,
+        "description": description,
+        "input_schema": {"type": "object", "properties": props, "required": required},
+    }
+TOOLS: list[dict] = [
+    _schema(
+        "list_local_filings",
+        "List 13F filing periods available on local disk.",
+        {"manager_dir": {"type": "string", "description": "optional path; default = the SA LP raw dir"}},
+        [],
+    ),
+    _schema(
+        "summarize_panel",
+        "Summarize the panel of 13F filings on disk: manager, periods, totals, concentration.",
+        {},
+        [],
+    ),
+    _schema(
+        "top_holdings",
+        "Return the top-N holdings (by reported value) for a given period (YYYY-MM-DD).",
+        {
+            "period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"},
+            "n": {"type": "integer", "description": "How many top holdings to return", "default": 10},
+        },
+        ["period"],
+    ),
+    _schema(
+        "qoq_activity",
+        "Quarter-over-quarter activity (NEW/EXIT/ADD/TRIM/HOLD) for a period.",
+        {"period": {"type": "string", "description": "Period of report, e.g. 2025-12-31"}},
+        ["period"],
+    ),
+]
+_DISPATCH: dict[str, Callable[..., dict]] = {
+    "list_local_filings": t_list_local_filings,
+    "summarize_panel": t_summarize_panel,
+    "top_holdings": t_top_holdings,
+    "qoq_activity": t_qoq_activity,
+}
+def dispatch_tool(name: str, args: dict | None) -> ToolResult:
+    fn = _DISPATCH.get(name)
+    if fn is None:
+        return ToolResult(name=name, ok=False, content=None, error=f"unknown tool: {name}")
+    try:
+        result = fn(**(args or {}))
+        return ToolResult(name=name, ok=True, content=result)
+    except Exception as e:  # surface errors to the model so it can recover
+        return ToolResult(name=name, ok=False, content=None, error=f"{type(e).__name__}: {e}")

wofo/backtest/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+"""Minimal portfolio backtester.
+Takes a `TargetWeights` series and a `PriceSource`, simulates a
+periodically rebalanced long-only portfolio, and reports daily NAV +
+summary metrics. Intentionally simple — fancier features (transaction
+costs beyond a flat bps, slippage models, partial fills) belong in a
+dedicated backtest engine; see `docs/repos.md`.
+"""
+from .portfolio import run_backtest, BacktestResult
+from .metrics import summary, sharpe, max_drawdown, cagr
+__all__ = ["run_backtest", "BacktestResult", "summary", "sharpe", "max_drawdown", "cagr"]

wofo/backtest/metrics.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Backtest summary metrics.
+No numpy dependency. The math is straightforward and the input series
+are short (daily bars over a few years).
+"""
+from __future__ import annotations
+import math
+from datetime import date
+from typing import Sequence
+def _returns(nav: Sequence[float]) -> list[float]:
+    rets: list[float] = []
+    for i in range(1, len(nav)):
+        prev = nav[i - 1]
+        rets.append((nav[i] - prev) / prev if prev else 0.0)
+    return rets
+def cagr(dates: Sequence[date], nav: Sequence[float]) -> float:
+    if len(nav) < 2 or nav[0] <= 0:
+        return 0.0
+    years = max((dates[-1] - dates[0]).days / 365.25, 1e-9)
+    return (nav[-1] / nav[0]) ** (1 / years) - 1
+def sharpe(nav: Sequence[float], *, rf_annual: float = 0.0, periods_per_year: int = 252) -> float:
+    rets = _returns(nav)
+    if len(rets) < 2:
+        return 0.0
+    rf_per = rf_annual / periods_per_year
+    excess = [r - rf_per for r in rets]
+    mean = sum(excess) / len(excess)
+    var = sum((r - mean) ** 2 for r in excess) / (len(excess) - 1)
+    sd = math.sqrt(var)
+    if sd == 0:
+        return 0.0
+    return (mean / sd) * math.sqrt(periods_per_year)
+def max_drawdown(nav: Sequence[float]) -> float:
+    """Returns max drawdown as a positive fraction (e.g. 0.25 == 25% peak-to-trough)."""
+    peak = -math.inf
+    mdd = 0.0
+    for v in nav:
+        if v > peak:
+            peak = v
+        if peak > 0:
+            dd = (peak - v) / peak
+            if dd > mdd:
+                mdd = dd
+    return mdd
+def summary(dates: Sequence[date], nav: Sequence[float]) -> dict:
+    return {
+        "start_date": dates[0].isoformat() if dates else None,
+        "end_date": dates[-1].isoformat() if dates else None,
+        "n_days": len(dates),
+        "start_nav": nav[0] if nav else 0.0,
+        "end_nav": nav[-1] if nav else 0.0,
+        "total_return": (nav[-1] / nav[0] - 1) if (nav and nav[0]) else 0.0,
+        "cagr": cagr(dates, nav),
+        "sharpe": sharpe(nav),
+        "max_drawdown": max_drawdown(nav),
+    }

wofo/backtest/portfolio.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""Portfolio backtester: target weights + price source -> daily NAV."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from datetime import date, timedelta
+from typing import Sequence
+from wofo.prices import PriceSource, NotFound
+from wofo.research import TargetWeights, Snapshot
+@dataclass
+class BacktestResult:
+    dates: list[date]
+    nav: list[float]                  # portfolio value indexed to start_cash
+    weights_history: list[dict[str, float]]  # per-day actual weights
+    rebalance_dates: list[date]
+    skipped_tickers: dict[date, list[str]] = field(default_factory=dict)
+    cash_share: list[float] = field(default_factory=list)
+def _build_price_panel(
+    tickers: set[str],
+    start: date,
+    end: date,
+    source: PriceSource,
+) -> tuple[dict[date, dict[str, float]], dict[str, list]]:
+    """Pull daily closes for each ticker; return {date: {ticker: close}} aligned."""
+    raw: dict[str, dict[date, float]] = {}
+    missing: list[str] = []
+    for t in sorted(tickers):
+        try:
+            bars = source.daily(t, start, end)
+        except NotFound:
+            missing.append(t)
+            continue
+        raw[t] = {b.d: b.close for b in bars}
+    # Trading-day axis = union of dates seen in any series.
+    all_dates = sorted({d for series in raw.values() for d in series})
+    panel: dict[date, dict[str, float]] = {}
+    for d in all_dates:
+        panel[d] = {}
+        for t, series in raw.items():
+            if d in series:
+                panel[d][t] = series[d]
+    return panel, {"missing_tickers": missing}
+def _active_snapshot(snapshots: Sequence[Snapshot], d: date) -> Snapshot | None:
+    """Most recent snapshot whose effective_date <= d."""
+    active: Snapshot | None = None
+    for s in snapshots:
+        if s.effective_date <= d:
+            active = s
+        else:
+            break
+    return active
+def run_backtest(
+    target_weights: TargetWeights,
+    price_source: PriceSource,
+    *,
+    start_cash: float = 1_000_000.0,
+    rebalance_threshold_bps: float = 50.0,  # rebalance if any weight drifts > this many bps from target
+    cost_bps: float = 5.0,                  # round-trip-ish slippage+commission per side
+    end_date: date | None = None,
+) -> BacktestResult:
+    """Run the backtest.
+    Behavior:
+    - Start with `start_cash` in cash on the first effective_date.
+    - On each new snapshot's effective_date, mark the new target.
+    - Between snapshots, rebalance only if any target ticker drifts beyond
+      the threshold from its target.
+    - Tickers in the target that the price source doesn't know are
+      *skipped*; their target weight is reallocated to cash for that
+      snapshot.
+    """
+    if not target_weights.snapshots:
+        raise ValueError("no snapshots")
+    snapshots = sorted(target_weights.snapshots, key=lambda s: s.effective_date)
+    start = snapshots[0].effective_date
+    end = end_date or (snapshots[-1].effective_date + timedelta(days=365))
+    all_tickers = {t for s in snapshots for t in s.weights}
+    panel, meta = _build_price_panel(all_tickers, start, end, price_source)
+    missing = set(meta["missing_tickers"])
+    if not panel:
+        raise RuntimeError("price source returned no data for any ticker")
+    cash = start_cash
+    holdings: dict[str, float] = {}  # ticker -> shares
+    out = BacktestResult(dates=[], nav=[], weights_history=[], rebalance_dates=[])
+    last_target_id: int | None = None
+    for d in sorted(panel):
+        prices = panel[d]
+        # Mark to market
+        nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
+        snap = _active_snapshot(snapshots, d)
+        if snap is None:
+            out.dates.append(d); out.nav.append(nav_val); out.weights_history.append({}); out.cash_share.append(1.0)
+            continue
+        # Filter snapshot to tickers we have *some* prices for and a price today.
+        tradable_targets = {t: w for t, w in snap.weights.items() if t in panel.get(d, {}) and t not in missing}
+        skipped_today = sorted(set(snap.weights) - set(tradable_targets))
+        if skipped_today:
+            out.skipped_tickers[d] = skipped_today
+        # Renormalize after dropping unknown / no-price-today tickers; balance to cash.
+        target_norm_total = sum(tradable_targets.values()) or 0
+        # Note: we do NOT renormalize to 1; missing weight stays in cash.
+        snap_id = id(snap)
+        is_new_snapshot = snap_id != last_target_id
+        last_target_id = snap_id
+        # Compute current weights (excluding cash).
+        current_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
+        max_drift_bps = max(
+            (abs(current_w.get(t, 0.0) - tradable_targets.get(t, 0.0)) for t in set(current_w) | set(tradable_targets)),
+            default=0.0,
+        ) * 10_000
+        should_rebalance = is_new_snapshot or max_drift_bps > rebalance_threshold_bps
+        if should_rebalance:
+            # Liquidate everything to cash, then buy targets.
+            for t, sh in holdings.items():
+                px = prices.get(t, _last_close(panel, t, d))
+                cash += sh * px
+                cash -= abs(sh * px) * (cost_bps / 10_000)
+            holdings = {}
+            for t, w in tradable_targets.items():
+                px = prices[t]
+                target_dollars = nav_val * w
+                shares = target_dollars / px
+                cost = abs(target_dollars) * (cost_bps / 10_000)
+                holdings[t] = shares
+                cash -= target_dollars + cost
+            out.rebalance_dates.append(d)
+            # Recompute NAV after costs.
+            nav_val = cash + sum(sh * prices.get(t, _last_close(panel, t, d)) for t, sh in holdings.items())
+        actual_w = {t: (sh * prices.get(t, _last_close(panel, t, d))) / nav_val for t, sh in holdings.items()}
+        cash_w = cash / nav_val if nav_val else 1.0
+        out.dates.append(d)
+        out.nav.append(nav_val)
+        out.weights_history.append(actual_w)
+        out.cash_share.append(cash_w)
+    return out
+def _last_close(panel: dict[date, dict[str, float]], ticker: str, d: date) -> float:
+    """Most-recent close on or before `d`. 0 if never seen (fully out)."""
+    cur = d
+    for _ in range(10):  # short walk-back; enough for weekends/holidays
+        cur -= timedelta(days=1)
+        if cur in panel and ticker in panel[cur]:
+            return panel[cur][ticker]
+    return 0.0

wofo/prices/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""Pluggable price-data sources.
+The agent must work with multiple data vendors over time. Calling code
+depends on the `PriceSource` protocol; concrete adapters live alongside.
+Built-in adapters:
+- `synthetic.SyntheticPriceSource` — deterministic random walk; for tests.
+- `stooq.StooqPriceSource` — free daily CSV from stooq.com; for prototyping.
+Production deployments should swap in a paid feed (Polygon, Tiingo, etc.)
+behind the same protocol.
+"""
+from .source import PriceSource, PriceBar, NotFound
+from .synthetic import SyntheticPriceSource
+__all__ = ["PriceSource", "PriceBar", "NotFound", "SyntheticPriceSource"]

wofo/prices/source.py ADDED Viewed

	@@ -0,0 +1,33 @@

+"""Price-source protocol shared by all adapters."""
+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import date
+from typing import Protocol, runtime_checkable
+class NotFound(LookupError):
+    """Raised when a ticker is unknown to the source."""
+@dataclass(frozen=True)
+class PriceBar:
+    d: date
+    open: float
+    high: float
+    low: float
+    close: float
+    volume: int
+@runtime_checkable
+class PriceSource(Protocol):
+    """Daily-bar price source.
+    Implementations must be deterministic for a given (ticker, start, end).
+    They should raise `NotFound` (not return empty) for unknown tickers so
+    callers can distinguish "no data" from "delisted on this date."
+    """
+    def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
+        ...

wofo/prices/stooq.py ADDED Viewed

	@@ -0,0 +1,82 @@

+"""Stooq.com daily-CSV adapter.
+Free, no API key. Reasonable for prototyping; not appropriate for
+production trading. Stooq's coverage and corporate-action handling are
+not as clean as paid feeds.
+URL pattern:
+    https://stooq.com/q/d/l/?s={symbol}&i=d&d1=YYYYMMDD&d2=YYYYMMDD
+US tickers need a `.us` suffix on stooq.
+"""
+from __future__ import annotations
+import csv
+import io
+import os
+import time
+from datetime import date, datetime
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+from .source import NotFound, PriceBar
+class StooqPriceSource:
+    BASE = "https://stooq.com/q/d/l/"
+    def __init__(self, *, suffix: str = ".us", request_interval_s: float = 0.25, timeout_s: float = 30.0):
+        self.suffix = suffix
+        self.request_interval_s = request_interval_s
+        self.timeout_s = timeout_s
+        self._last_request: float = 0.0
+    def _pace(self) -> None:
+        elapsed = time.monotonic() - self._last_request
+        if elapsed < self.request_interval_s:
+            time.sleep(self.request_interval_s - elapsed)
+        self._last_request = time.monotonic()
+    def _fetch(self, url: str) -> str:
+        self._pace()
+        ua = os.environ.get("WOFO_HTTP_UA", "wofo-research/0.1")
+        req = Request(url, headers={"User-Agent": ua})
+        try:
+            with urlopen(req, timeout=self.timeout_s) as resp:
+                return resp.read().decode("utf-8", errors="replace")
+        except HTTPError as e:
+            if e.code == 404:
+                raise NotFound(url) from e
+            raise
+        except URLError as e:
+            raise RuntimeError(f"stooq fetch failed: {e}") from e
+    def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
+        symbol = ticker.lower()
+        if "." not in symbol:
+            symbol += self.suffix
+        url = (
+            f"{self.BASE}?s={symbol}&i=d"
+            f"&d1={start.strftime('%Y%m%d')}&d2={end.strftime('%Y%m%d')}"
+        )
+        body = self._fetch(url)
+        # Stooq returns "No data" or empty for unknown / out-of-range queries.
+        if not body or body.startswith("No data") or "Date,Open" not in body:
+            raise NotFound(f"stooq: no data for {ticker} {start}..{end}")
+        bars: list[PriceBar] = []
+        reader = csv.DictReader(io.StringIO(body))
+        for row in reader:
+            try:
+                bars.append(
+                    PriceBar(
+                        d=datetime.strptime(row["Date"], "%Y-%m-%d").date(),
+                        open=float(row["Open"]),
+                        high=float(row["High"]),
+                        low=float(row["Low"]),
+                        close=float(row["Close"]),
+                        volume=int(float(row.get("Volume") or 0)),
+                    )
+                )
+            except (ValueError, KeyError):
+                continue
+        return bars

wofo/prices/synthetic.py ADDED Viewed

	@@ -0,0 +1,57 @@

+"""Deterministic synthetic price source for tests / offline development.
+The price path is a seeded random walk. Same (ticker, start, end) always
+returns the same series, which lets backtests be reproducible without
+network access.
+"""
+from __future__ import annotations
+import hashlib
+import random
+from datetime import date, timedelta
+from .source import PriceBar, PriceSource
+class SyntheticPriceSource:
+    """Random-walk prices with per-ticker seeded reproducibility."""
+    def __init__(self, *, drift: float = 0.0003, vol: float = 0.02, start_price: float = 100.0):
+        self.drift = drift
+        self.vol = vol
+        self.start_price = start_price
+    def _seed(self, ticker: str) -> int:
+        return int(hashlib.sha256(ticker.encode()).hexdigest()[:12], 16)
+    def daily(self, ticker: str, start: date, end: date) -> list[PriceBar]:
+        if end < start:
+            raise ValueError("end < start")
+        rng = random.Random(self._seed(ticker))
+        bars: list[PriceBar] = []
+        price = self.start_price
+        d = start
+        while d <= end:
+            # Skip weekends to mimic NYSE calendar (rough — does not skip holidays).
+            if d.weekday() < 5:
+                shock = rng.gauss(self.drift, self.vol)
+                price = max(0.01, price * (1 + shock))
+                hi = price * (1 + abs(rng.gauss(0, self.vol / 4)))
+                lo = price * (1 - abs(rng.gauss(0, self.vol / 4)))
+                op = lo + (hi - lo) * rng.random()
+                bars.append(
+                    PriceBar(
+                        d=d,
+                        open=round(op, 4),
+                        high=round(hi, 4),
+                        low=round(lo, 4),
+                        close=round(price, 4),
+                        volume=rng.randint(100_000, 10_000_000),
+                    )
+                )
+            d += timedelta(days=1)
+        return bars
+# Self-check that this satisfies the protocol.
+_: PriceSource = SyntheticPriceSource()

wofo/research/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+"""Research strategy modules.
+Strategies turn parsed filing/data panels into dated, provenance-stamped
+target-weight series that backtest and paper-trade modules consume.
+"""
+from .follow_the_filer import follow_the_filer, TargetWeights, Snapshot
+from .issuer_map import resolve_tickers, IssuerOverride
+__all__ = [
+    "follow_the_filer",
+    "TargetWeights",
+    "Snapshot",
+    "resolve_tickers",
+    "IssuerOverride",
+]

wofo/research/follow_the_filer.py ADDED Viewed

	@@ -0,0 +1,193 @@

+"""Follow-the-filer strategy: mirror a 13F manager's reported long book.
+Important caveats — read before using:
+1. **Stale by construction.** 13F filings are due 45 days after quarter
+   end. The earliest a follower can act on a filing is the filing date,
+   not the period-of-report date. We use `effective_date = file_date`
+   for that reason. Backtests using `period_of_report` as the trade
+   date are look-ahead-biased.
+2. **Long-only.** 13Fs report long positions in 13F-eligible securities
+   only. The manager's true exposure (shorts, swaps, non-US, cash,
+   options on non-13F names) is invisible.
+3. **Cap-structure ambiguity.** Same issuer can appear under different
+   CUSIPs (common vs. converts). We aggregate to issuer level when the
+   caller requests it; otherwise CUSIP-level weights are reported.
+4. **Survivorship/relisting.** Tickers can change. The mapping from
+   13F issuer -> ticker uses SEC's free file and is heuristic.
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from datetime import date, datetime
+from typing import Iterable
+@dataclass(frozen=True)
+class Snapshot:
+    """One target-weight snapshot active starting on `effective_date`."""
+    effective_date: date          # date the follower can first act on this filing
+    period_of_report: date        # what the manager held at this date
+    weights: dict[str, float]     # ticker -> weight (sums ~= 1 over mapped names)
+    unmapped_value_share: float   # share of the manager's reported value
+                                  # whose CUSIPs we couldn't map to a ticker
+    provenance: dict              # filing refs, source, run timestamp
+    def __post_init__(self):
+        total = sum(self.weights.values())
+        if self.weights and not (0.99 <= total + self.unmapped_value_share <= 1.01):
+            raise ValueError(
+                f"weights+unmapped should sum to 1; got {total + self.unmapped_value_share}"
+            )
+@dataclass(frozen=True)
+class TargetWeights:
+    """Time series of target-weight snapshots for one strategy run."""
+    manager_cik: str
+    manager_name: str
+    snapshots: list[Snapshot]
+    def as_dict(self) -> dict:
+        return {
+            "manager_cik": self.manager_cik,
+            "manager_name": self.manager_name,
+            "snapshots": [
+                {
+                    "effective_date": s.effective_date.isoformat(),
+                    "period_of_report": s.period_of_report.isoformat(),
+                    "weights": s.weights,
+                    "unmapped_value_share": s.unmapped_value_share,
+                    "provenance": s.provenance,
+                }
+                for s in self.snapshots
+            ],
+        }
+def follow_the_filer(
+    panel: dict,
+    filing_refs: dict,           # period_iso -> {cik, accession, file_date, form}
+    cusip_to_ticker: dict[str, str | None],
+    *,
+    manager_cik: str,
+    manager_name: str,
+    top_n: int | None = None,
+    run_id: str | None = None,
+) -> TargetWeights:
+    """Build target-weight snapshots from a 13F panel.
+    Args:
+      panel: output of `wofo.thirteenf.analyze.build_panel`.
+      filing_refs: per-period filing metadata. Must include `file_date`
+        (YYYY-MM-DD) so we can set `effective_date` correctly.
+      cusip_to_ticker: mapping from `wofo.research.resolve_tickers`.
+      top_n: if set, keep only the largest N positions per snapshot.
+      run_id: optional tag for provenance (e.g. a git SHA).
+    Weights are normalized over MAPPED positions only; the share of
+    reported value that came from unmapped CUSIPs is reported as
+    `unmapped_value_share` so callers can decide how to handle it
+    (skip the snapshot, hold cash, etc.).
+    """
+    snapshots: list[Snapshot] = []
+    run_ts = datetime.utcnow().isoformat(timespec="seconds") + "Z"
+    for period in panel["periods"]:
+        ref = filing_refs.get(period)
+        if not ref or "file_date" not in ref:
+            raise ValueError(f"missing file_date for period {period}")
+        effective = _parse_iso(ref["file_date"])
+        report = _parse_iso(period)
+        period_rows = [
+            (cusip, r["value_usd"]) for (p, cusip), r in panel["rows"].items() if p == period
+        ]
+        period_rows.sort(key=lambda x: x[1], reverse=True)
+        if top_n:
+            period_rows = period_rows[:top_n]
+        total = sum(v for _, v in period_rows) or 1
+        mapped_value = 0
+        weights: dict[str, float] = {}
+        for cusip, val in period_rows:
+            ticker = cusip_to_ticker.get(cusip)
+            if not ticker:
+                continue
+            mapped_value += val
+            weights[ticker] = weights.get(ticker, 0.0) + val / total
+        unmapped_share = (total - mapped_value) / total if total else 0.0
+        snapshots.append(
+            Snapshot(
+                effective_date=effective,
+                period_of_report=report,
+                weights=weights,
+                unmapped_value_share=unmapped_share,
+                provenance={
+                    "manager_cik": manager_cik,
+                    "filing_ref": ref,
+                    "run_id": run_id,
+                    "run_ts_utc": run_ts,
+                },
+            )
+        )
+    snapshots.sort(key=lambda s: s.effective_date)
+    return TargetWeights(manager_cik=manager_cik, manager_name=manager_name, snapshots=snapshots)
+def load_filing_refs(raw_dir) -> dict:
+    """Read filing_ref.json files written by `wofo.thirteenf.fetch.fetch_filing`.
+    Falls back to parsing primary_doc.xml if filing_ref.json is missing
+    (e.g., for filings downloaded by hand before fetch_filing existed).
+    """
+    import json
+    from pathlib import Path
+    from wofo.thirteenf.parse import parse_primary_doc
+    raw_dir = Path(raw_dir)
+    refs: dict = {}
+    for q in sorted(raw_dir.iterdir()):
+        if not q.is_dir():
+            continue
+        ref_path = q / "filing_ref.json"
+        if ref_path.exists():
+            d = json.loads(ref_path.read_text())
+            period_iso = _quarter_to_iso(q.name)
+            refs[period_iso] = d
+        else:
+            # Synthesize from primary_doc.xml. file_date is unknown here, so
+            # we approximate it as period + 45 days (the regulatory deadline).
+            # Callers should re-pull via fetch_filing for accurate file_date.
+            meta = parse_primary_doc(q / "primary_doc.xml")
+            period = _parse_iso(meta.period_iso)
+            from datetime import timedelta
+            approx_file = period + timedelta(days=45)
+            refs[meta.period_iso] = {
+                "cik": meta.cik,
+                "accession": None,
+                "period_ending": meta.period_iso,
+                "file_date": approx_file.isoformat(),
+                "form": meta.report_type,
+                "approximate_file_date": True,
+            }
+    return refs
+def _parse_iso(s: str) -> date:
+    return datetime.strptime(s, "%Y-%m-%d").date()
+def _quarter_to_iso(label: str) -> str:
+    # "2024Q4" -> "2024-12-31"
+    y = label[:4]
+    q = label[-1]
+    end = {"1": "03-31", "2": "06-30", "3": "09-30", "4": "12-31"}[q]
+    return f"{y}-{end}"
+def _iter_holdings(holdings: Iterable) -> Iterable:  # pragma: no cover - placeholder
+    return holdings

wofo/research/issuer_map.py ADDED Viewed

	@@ -0,0 +1,127 @@

+"""Best-effort 13F issuer-name -> ticker mapping.
+CUSIP -> ticker is a licensed mapping (CGS) and we will not embed one.
+What we can do, for free:
+1. Pull SEC's `company_tickers.json` (CIK <-> ticker, public).
+2. Normalize 13F `nameOfIssuer` strings and fuzzy-match against company
+   names from that file.
+3. Allow callers to provide an `IssuerOverride` map for cases where the
+   heuristic is wrong or the issuer is a non-CIK security (ADR, ETF,
+   foreign listing).
+This is intentionally conservative: when in doubt, the function returns
+`None` for that issuer rather than guessing. The backtester treats
+`None` tickers as "skip".
+"""
+from __future__ import annotations
+import json
+import os
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from urllib.request import Request, urlopen
+SEC_TICKERS_URL = "https://www.sec.gov/files/company_tickers.json"
+@dataclass
+class IssuerOverride:
+    """Manual issuer-name (or CUSIP) -> ticker overrides.
+    Keys are matched case-insensitively. CUSIP keys take precedence over
+    issuer-name keys when both could match.
+    """
+    by_cusip: dict[str, str] = field(default_factory=dict)
+    by_issuer: dict[str, str] = field(default_factory=dict)
+_TRIM_TOKENS = re.compile(
+    r"\b(CORP|CORPORATION|INC|INCORPORATED|LTD|LIMITED|LLC|PLC|HLDGS?|HOLDINGS?|"
+    r"GROUP|CO|COMPANY|CL|CLASS|COM|COMMON|NEW|THE|TR|TRUST|REIT)\b",
+    re.IGNORECASE,
+)
+_PUNCT = re.compile(r"[^\w\s]")
+def _norm(s: str) -> str:
+    s = _PUNCT.sub(" ", s)
+    s = _TRIM_TOKENS.sub(" ", s)
+    return " ".join(s.lower().split())
+def _load_sec_tickers(cache_path: Path | None = None) -> dict:
+    if cache_path and cache_path.exists():
+        return json.loads(cache_path.read_text())
+    ua = os.environ.get("WOFO_SEC_UA")
+    if not ua:
+        raise RuntimeError(
+            "WOFO_SEC_UA env var required to fetch SEC company_tickers.json"
+        )
+    req = Request(SEC_TICKERS_URL, headers={"User-Agent": ua})
+    with urlopen(req, timeout=30) as resp:
+        body = resp.read()
+    data = json.loads(body)
+    if cache_path:
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        cache_path.write_bytes(body)
+    return data
+def resolve_tickers(
+    issuers: dict[str, str],  # cusip -> issuer name
+    *,
+    overrides: IssuerOverride | None = None,
+    sec_tickers_cache: Path | None = None,
+) -> tuple[dict[str, str | None], dict[str, str]]:
+    """Resolve {cusip: issuer_name} -> ({cusip: ticker_or_None}, {cusip: source}).
+    `source` is one of: 'override-cusip', 'override-issuer', 'sec-exact',
+    'sec-prefix', 'unmapped'.
+    """
+    overrides = overrides or IssuerOverride()
+    over_cusip = {k.upper(): v for k, v in overrides.by_cusip.items()}
+    over_issuer = {_norm(k): v for k, v in overrides.by_issuer.items()}
+    sec = _load_sec_tickers(sec_tickers_cache) if (issuers and not all(
+        c.upper() in over_cusip or _norm(n) in over_issuer for c, n in issuers.items()
+    )) else {}
+    sec_index: dict[str, str] = {}
+    for entry in sec.values() if isinstance(sec, dict) else []:
+        title = entry.get("title", "")
+        ticker = entry.get("ticker", "")
+        if title and ticker:
+            sec_index[_norm(title)] = ticker.upper()
+    out: dict[str, str | None] = {}
+    src: dict[str, str] = {}
+    for cusip, name in issuers.items():
+        if cusip.upper() in over_cusip:
+            out[cusip] = over_cusip[cusip.upper()].upper()
+            src[cusip] = "override-cusip"
+            continue
+        n = _norm(name)
+        if n in over_issuer:
+            out[cusip] = over_issuer[n].upper()
+            src[cusip] = "override-issuer"
+            continue
+        if n in sec_index:
+            out[cusip] = sec_index[n]
+            src[cusip] = "sec-exact"
+            continue
+        # Prefix match: issuer name starts with a known company name (or vice
+        # versa). Conservative: require >=2 tokens to avoid false positives.
+        candidates = [
+            (k, t) for k, t in sec_index.items()
+            if (k.startswith(n) or n.startswith(k)) and len(k.split()) >= 2 and len(n.split()) >= 2
+        ]
+        if len(candidates) == 1:
+            out[cusip] = candidates[0][1]
+            src[cusip] = "sec-prefix"
+            continue
+        out[cusip] = None
+        src[cusip] = "unmapped"
+    return out, src