Spaces:

eddmpython
/

dartlab

Sleeping

App Files Files Community

github-actions[bot] commited on 5 days ago

Commit

bc7389c

1 Parent(s): 190bb76

sync from 363ba27

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.streamlit/config.toml +0 -7
Dockerfile +18 -0
README.md +1 -3
README_PROJECT.md +1108 -0
app.py +0 -623
pyproject.toml +240 -0
requirements.txt +0 -4
src/dartlab/API_SPEC.md +450 -0
src/dartlab/STATUS.md +81 -0
src/dartlab/__init__.py +1008 -0
src/dartlab/ai/DEV.md +224 -0
src/dartlab/ai/STATUS.md +200 -0
src/dartlab/ai/__init__.py +119 -0
src/dartlab/ai/agent.py +30 -0
src/dartlab/ai/aiParser.py +500 -0
src/dartlab/ai/context/__init__.py +9 -0
src/dartlab/ai/context/builder.py +1960 -0
src/dartlab/ai/context/company_adapter.py +86 -0
src/dartlab/ai/context/dartOpenapi.py +485 -0
src/dartlab/ai/context/finance_context.py +945 -0
src/dartlab/ai/context/formatting.py +439 -0
src/dartlab/ai/context/snapshot.py +198 -0
src/dartlab/ai/conversation/__init__.py +1 -0
src/dartlab/ai/conversation/data_ready.py +71 -0
src/dartlab/ai/conversation/dialogue.py +476 -0
src/dartlab/ai/conversation/focus.py +231 -0
src/dartlab/ai/conversation/history.py +126 -0
src/dartlab/ai/conversation/intent.py +291 -0
src/dartlab/ai/conversation/prompts.py +565 -0
src/dartlab/ai/conversation/suggestions.py +70 -0
src/dartlab/ai/conversation/templates/__init__.py +1 -0
src/dartlab/ai/conversation/templates/analysis_rules.py +897 -0
src/dartlab/ai/conversation/templates/benchmarkData.py +281 -0
src/dartlab/ai/conversation/templates/benchmarks.py +125 -0
src/dartlab/ai/conversation/templates/self_critique.py +94 -0
src/dartlab/ai/conversation/templates/system_base.py +495 -0
src/dartlab/ai/eval/__init__.py +81 -0
src/dartlab/ai/eval/batchResults/batch_ollama_20260324_180122.jsonl +2 -0
src/dartlab/ai/eval/batchResults/batch_ollama_20260325_093749.jsonl +4 -0
src/dartlab/ai/eval/diagnoser.py +309 -0
src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260325_093749.md +14 -0
src/dartlab/ai/eval/golden.json +82 -0
src/dartlab/ai/eval/personaCases.json +2441 -0
src/dartlab/ai/eval/remediation.py +191 -0
src/dartlab/ai/eval/replayRunner.py +416 -0
src/dartlab/ai/eval/reviewLog/accountant.jsonl +1 -0
src/dartlab/ai/eval/reviewLog/analyst.jsonl +2 -0
src/dartlab/ai/eval/reviewLog/investor.jsonl +4 -0
src/dartlab/ai/eval/reviewLog/research_gather.jsonl +2 -0
src/dartlab/ai/eval/scorer.py +466 -0

.streamlit/config.toml DELETED Viewed

@@ -1,7 +0,0 @@
-[theme]
-base = "dark"
-primaryColor = "#ea4647"
-backgroundColor = "#050811"
-secondaryBackgroundColor = "#0f1219"
-textColor = "#f1f5f9"
-font = "sans serif"

Dockerfile ADDED Viewed

	@@ -0,0 +1,18 @@

+FROM python:3.12-slim
+WORKDIR /app
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+COPY pyproject.toml README.md ./
+COPY src/ src/
+RUN pip install --no-cache-dir -e ".[ai]"
+ENV SPACE_ID=1
+EXPOSE 7860
+CMD ["python", "-m", "dartlab.server"]

README.md CHANGED Viewed

@@ -3,9 +3,7 @@ title: DartLab
 emoji: 📊
 colorFrom: red
 colorTo: yellow
-sdk: streamlit
-sdk_version: "1.45.1"
-app_file: app.py
 pinned: true
 license: mit
 short_description: DART + EDGAR disclosure analysis

 emoji: 📊
 colorFrom: red
 colorTo: yellow
+sdk: docker
 pinned: true
 license: mit
 short_description: DART + EDGAR disclosure analysis

README_PROJECT.md ADDED Viewed

	@@ -0,0 +1,1108 @@

+<div align="center">
+<br>
+<img alt="DartLab" src=".github/assets/logo.png" width="180">
+<h3>DartLab</h3>
+<p><b>One stock code. The whole story.</b></p>
+<p>DART + EDGAR filings, structured and comparable — in one line of Python.</p>
+<p>
+<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/v/dartlab?style=for-the-badge&color=ea4647&labelColor=050811&logo=pypi&logoColor=white" alt="PyPI"></a>
+<a href="https://pypi.org/project/dartlab/"><img src="https://img.shields.io/pypi/pyversions/dartlab?style=for-the-badge&color=c83232&labelColor=050811&logo=python&logoColor=white" alt="Python"></a>
+<a href="LICENSE"><img src="https://img.shields.io/badge/License-MIT-94a3b8?style=for-the-badge&labelColor=050811" alt="License"></a>
+<a href="https://github.com/eddmpython/dartlab/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/eddmpython/dartlab/ci.yml?branch=master&style=for-the-badge&labelColor=050811&logo=github&logoColor=white&label=CI" alt="CI"></a>
+<a href="https://eddmpython.github.io/dartlab/"><img src="https://img.shields.io/badge/Docs-GitHub_Pages-38bdf8?style=for-the-badge&labelColor=050811&logo=github-pages&logoColor=white" alt="Docs"></a>
+<a href="https://eddmpython.github.io/dartlab/blog/"><img src="https://img.shields.io/badge/Blog-120%2B_Articles-fbbf24?style=for-the-badge&labelColor=050811&logo=rss&logoColor=white" alt="Blog"></a>
+</p>
+<p>
+<a href="https://eddmpython.github.io/dartlab/">Docs</a> · <a href="https://eddmpython.github.io/dartlab/blog/">Blog</a> · <a href="https://huggingface.co/spaces/eddmpython/dartlab">Live Demo</a> · <a href="notebooks/marimo/">Marimo Notebooks</a> · <a href="https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb">Open in Colab</a> · <a href="README_KR.md">한국어</a> · <a href="https://buymeacoffee.com/eddmpython">Sponsor</a>
+</p>
+<p>
+<a href="https://huggingface.co/datasets/eddmpython/dartlab-data"><img src="https://img.shields.io/badge/Data-HuggingFace-ffd21e?style=for-the-badge&labelColor=050811&logo=huggingface&logoColor=white" alt="HuggingFace Data"></a>
+</p>
+</div>
+> **Note:** DartLab is under active development. APIs may change between versions, and documentation may lag behind the latest code.
+## Install
+Requires **Python 3.12+**.
+```bash
+# Core — financial statements, sections, Company
+uv add dartlab
+# or with pip
+pip install dartlab
+```
+### Optional Extras
+Install only what you need:
+```bash
+uv add "dartlab[ai]"              # web UI, server, streaming (FastAPI + uvicorn)
+uv add "dartlab[llm]"             # LLM analysis (OpenAI)
+uv add "dartlab[charts]"          # Plotly charts, network graphs (plotly + networkx + scipy)
+uv add "dartlab[mcp]"             # MCP server for Claude Desktop / Code / Cursor
+uv add "dartlab[channel]"         # web UI + cloudflared tunnel sharing
+uv add "dartlab[channel-ngrok]"   # web UI + ngrok tunnel sharing
+uv add "dartlab[channel-full]"    # all channels + Telegram / Slack / Discord bots
+uv add "dartlab[all]"             # everything above (except channel bots)
+```
+**Common combinations:**
+```bash
+# financial analysis + AI chat
+uv add "dartlab[ai,llm]"
+# full analysis suite — charts, AI, LLM
+uv add "dartlab[ai,llm,charts]"
+# share analysis with team via tunnel
+uv add "dartlab[channel]"
+```
+### From Source
+```bash
+git clone https://github.com/eddmpython/dartlab.git
+cd dartlab && uv pip install -e ".[all]"
+# or with pip
+pip install -e ".[all]"
+```
+PyPI releases are published only when the core is stable. If you want the latest features (including experimental ones like audit, forecast, valuation), clone the repo directly — but expect occasional breaking changes.
+### Desktop App (Alpha)
+Skip all installation steps — download the standalone Windows launcher:
+- **[Download DartLab.exe](https://github.com/eddmpython/dartlab-desktop/releases/latest/download/DartLab.exe)** from [dartlab-desktop](https://github.com/eddmpython/dartlab-desktop)
+- Also available from the [DartLab landing page](https://eddmpython.github.io/dartlab/)
+One-click launch — no Python, no terminal, no package manager required. The desktop app bundles the web UI with a built-in Python runtime.
+> **Alpha** — functional but incomplete. The desktop app is a Windows-only `.exe` launcher. macOS/Linux are not yet supported.
+---
+**No data setup required.** When you create a `Company`, dartlab automatically downloads the required data from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data) (DART) or SEC API (EDGAR). The second run loads instantly from local cache.
+## Quick Start
+Pick any company. Get the whole picture.
+```python
+import dartlab
+# Samsung Electronics — from raw filings to structured data
+c = dartlab.Company("005930")
+c.sections                      # every topic, every period, side by side
+c.show("businessOverview")      # what this company actually does
+c.diff("businessOverview")      # what changed since last year
+c.BS                            # standardized balance sheet
+c.ratios                        # 47 financial ratios, already calculated
+# Apple — same interface, different country
+us = dartlab.Company("AAPL")
+us.show("business")
+us.ratios
+# No code needed — ask in natural language
+dartlab.ask("Analyze Samsung Electronics financial health")
+```
+## What DartLab Is
+A public company files hundreds of pages every quarter. Inside those pages is everything — revenue trends, risk warnings, management strategy, competitive position. The complete truth about a company, written by the company itself.
+Nobody reads it.
+Not because they don't want to. Because the same information is named differently by every company, structured differently every year, and scattered across formats designed for regulators, not readers. The same "revenue" appears as `ifrs-full_Revenue`, `dart_Revenue`, `SalesRevenue`, or dozens of Korean variations.
+DartLab changes who can access this information. Two engines turn raw filings into one comparable map:
+### The Two Problems DartLab Solves
+**1. The same company says different things differently every year.**
+Sections horizontalization normalizes every disclosure section into a **topic × period** grid. Different titles across years and industries all resolve to the same canonical topic:
+```
+                    2025Q4    2024Q4    2024Q3    2023Q4    …
+companyOverview       ✓         ✓         ✓         ✓
+businessOverview      ✓         ✓         ✓         ✓
+productService        ✓         ✓         ✓         ✓
+salesOrder            ✓         ✓         —         ✓
+employee              ✓         ✓         ✓         ✓
+dividend              ✓         ✓         ✓         ✓
+audit                 ✓         ✓         ✓         ✓
+…                    (98 canonical topics)
+```
+```
+Before (raw section titles):              After (canonical topic):
+Samsung    "II. 사업의 내용"               → businessOverview
+Hyundai    "II. 사업의 내용 [자동차부문]"   → businessOverview
+Kakao      "2. 사업의 내용"               → businessOverview
+```
+The mapping pipeline: **text normalization** → **545 hardcoded title mappings** → **73 regex patterns** → canonical topic. ~95%+ mapping rate across all listed companies. Each cell keeps the full text with heading/body separation, tables, and original evidence. Comparing "what did the company say about risk last year vs. this year" becomes a single `diff()` call.
+**2. Every company names the same number differently.**
+Account standardization normalizes every XBRL account through a 4-step pipeline:
+```
+Raw XBRL account_id
+  → Strip prefixes (ifrs-full_, dart_, ifrs_, ifrs-smes_)
+  → English ID synonyms (59 rules)
+  → Korean name synonyms (104 rules)
+  → Learned mapping table (34,249 entries)
+  → Result: revenue, operatingIncome, totalAssets, …
+```
+```
+Before (raw XBRL):                          After (standardized):
+Company     account_id          account_nm   →  snakeId    label
+Samsung     ifrs-full_Revenue   수익(매출액)  →  revenue    매출액
+SK Hynix    dart_Revenue        매출액       →  revenue    매출액
+LG Energy   Revenue             매출         →  revenue    매출액
+```
+~97% mapping rate. Cross-company comparison requires zero manual work. Combined with `scanAccount` / `scanRatio`, you can compare a single metric across **2,700+ companies** in one call.
+### Principles — Accessibility and Reliability
+These two principles govern every public API:
+**Accessibility** — One stock code is all you need. `import dartlab` provides access to every feature. No internal DTOs, no extra imports, no data setup. `Company("005930")` auto-downloads from [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data).
+**Reliability** — Numbers are raw originals from DART/EDGAR. Missing data returns `None`, never a guess. `trace(topic)` shows which source was chosen and why. Errors are never swallowed.
+### Company — The Merged Map
+`Company` uses `sections` as the spine, then overlays stronger data sources:
+```
+Layer         What it provides                   Priority
+─────────────────────────────────────────────────────────
+docs          Section text, tables, evidence      Base spine
+finance       BS, IS, CF, ratios, time series     Replaces numeric topics
+report        28 structured APIs (DART only)      Fills structured topics
+─────────────────────────────────────────────────────────
+profile       Merged view (default for users)     Highest
+```
+```python
+c.docs.sections     # pure text source (sections spine)
+c.finance.BS        # authoritative financial statements
+c.report.extract()  # structured DART API data
+c.profile.sections  # merged view — what users see by default
+```
+`c.sections` is the merged view. `c.trace("BS")` tells you which source was chosen and why.
+### Architecture — Layered by Responsibility
+DartLab follows a strict layered architecture where each layer only depends on layers below it:
+```
+L0  core/        Protocols, finance utils, docs utils, registry
+L1  providers/   Country-specific data (DART, EDGAR, EDINET)
+    gather/      External market data (Naver, Yahoo, FRED)
+    market/      Market-wide scanning (2,700+ companies)
+L2  analysis/    Analytical engines (valuation, risk, insights, event study)
+L3  ai/          LLM-powered analysis (9 providers)
+```
+Import direction is enforced by CI — no reverse dependencies allowed.
+### Extensibility — Zero Core Modification
+Adding a new country requires zero changes to core code:
+1. Create a provider package under `providers/`
+2. Implement `canHandle(code) -> bool` and `priority() -> int`
+3. Register via `entry_points` in `pyproject.toml`
+```python
+dartlab.Company("005930")  # → DART provider (priority 10)
+dartlab.Company("AAPL")    # → EDGAR provider (priority 20)
+```
+The facade iterates providers by priority — first match wins. This follows the same pattern as OpenBB's provider system and scikit-learn's estimator registration.
+## Core Features
+### Show, Trace, Diff
+```python
+c = dartlab.Company("005930")
+# show — open any topic with source-aware priority
+c.show("BS")                # → finance DataFrame
+c.show("overview")          # → sections-based text + tables
+c.show("dividend")          # → report DataFrame (all quarters)
+c.show("IS", period=["2024Q4", "2023Q4"])  # compare specific periods
+# trace — why a topic came from docs, finance, or report
+c.trace("BS")               # → {"primarySource": "finance", ...}
+# diff — text change detection (3 modes)
+c.diff()                                    # full summary
+c.diff("businessOverview")                  # topic history
+c.diff("businessOverview", "2024", "2025")  # line-by-line diff
+```
+What the output looks like:
+```
+>>> c.show("businessOverview")
+shape: (12, 5)
+┌───────────┬──────────┬──────────────────────────────┬──────────────────────────────┐
+│ blockType │ nodeType │ 2024                         │ 2023                         │
+├───────────┼──────────┼──────────────────────────────┼──────────────────────────────┤
+│ text      │ heading  │ 1. 산업의 특성                │ 1. 산업의 특성                │
+│ text      │ body     │ 반도체 산업은 기술 집약적 …   │ 반도체 산업은 기술 집약적 …    │
+│ table     │ null     │ DataFrame(5×3)               │ DataFrame(5×3)               │
+└───────────┴──────────┴──────────────────────────────┴──────────────────────────────┘
+>>> c.diff("businessOverview", "2023", "2024")
+┌──────────┬─────────────────────────────────────────────┐
+│ status   │ text                                        │
+├──────────┼─────────────────────────────────────────────┤
+│ added    │ AI 반도체 수요 급증에 따른 HBM 매출 확대 …   │
+│ modified │ 매출액 258.9조원 → 300.9조원                 │
+│ removed  │ 반도체 부문 수익성 악화 우려 …               │
+└──────────┴─────────────────────────────────────────────┘
+```
+### Finance
+```python
+c.BS                    # balance sheet (account × period, newest first)
+c.IS                    # income statement
+c.CF                    # cash flow
+c.ratios                # ratio time series DataFrame (6 categories × period)
+c.finance.ratioSeries   # ratio time series across years
+c.finance.timeseries    # raw account time series
+c.annual                # annual time series
+c.filings()             # disclosure document list (Tier 1 Stable)
+```
+All accounts are normalized through the 4-step standardization pipeline — Samsung's `revenue` and LG's `revenue` are the same `snakeId`. Ratios cover 6 categories: profitability, stability, growth, efficiency, cashflow, and valuation.
+### Market-wide Financial Screening
+Scan a single account or ratio across **all listed companies** in one call — 2,700+ DART firms or 500+ EDGAR firms. Returns a wide Polars DataFrame (rows = companies, columns = periods, newest first).
+```python
+import dartlab
+# scan a single account across all listed companies
+dartlab.scanAccount("매출액")                         # revenue, quarterly standalone
+dartlab.scanAccount("operating_profit", annual=True)  # annual basis
+dartlab.scanAccount("total_assets", market="edgar")   # US EDGAR
+# scan a ratio across all listed companies
+dartlab.scanRatio("roe")                              # quarterly ROE for all firms
+dartlab.scanRatio("debtRatio", annual=True)           # annual debt-to-equity
+# list available ratios (13 ratios: profitability, stability, growth, efficiency, cashflow)
+dartlab.scanRatioList()
+```
+Accepts both Korean names (`매출액`) and English snakeIds (`sales`) — same 4-step normalization as Company finance. Reads 2,700+ parquet files in parallel via ThreadPool, typically completes in ~3 seconds.
+> **Requires pre-downloaded data.** Market-wide functions (`scanAccount`, `screen`, `digest`, etc.) operate on local data — individual `Company()` calls only download one firm at a time. Download all data first:
+> ```python
+> pip install dartlab[hf]
+> dartlab.downloadAll("finance")   # ~600 MB, 2,700+ firms
+> dartlab.downloadAll("report")    # ~320 MB (governance/workforce/capital/debt)
+> dartlab.downloadAll("docs")      # ~8 GB (digest/signal — large)
+> ```
+## Review — Structured Company Analysis
+> **Experimental** — the review system is under active development. Templates, blocks, and output formats may change between versions.
+DartLab's review system assembles financial data into structured, readable reports.
+### Templates
+Pre-built block combinations that cover key analysis areas:
+```python
+c = dartlab.Company("005930")
+c.review("수익구조")    # revenue structure — segments, growth, concentration
+c.review("자금조달")    # capital structure — debt, liquidity, interest burden
+c.review()             # all templates
+```
+### Block Assembly
+Every review is built from reusable blocks. Get the full block dictionary and assemble your own:
+```python
+from dartlab.review import blocks, Review
+b = blocks(c)          # dict of 16 pre-built blocks
+list(b.keys())         # → ["profile", "segmentComposition", "growth", ...]
+# pick what you need
+Review([
+    b["segmentComposition"],
+    b["growth"],
+    c.select("IS", ["매출액"]),   # mix with raw data
+])
+```
+### Reviewer — AI Layer
+Add LLM-powered opinions on top of data blocks. Works with any provider:
+```python
+c.reviewer()                                    # all sections + AI opinion
+c.reviewer("수익구조")                           # single section + AI
+c.reviewer(guide="Evaluate from semiconductor cycle perspective")  # custom guide
+```
+**Free AI providers** — no paid API key required:
+| Provider | Setup |
+|----------|-------|
+| Gemini | `dartlab setup gemini` |
+| Groq | `dartlab setup groq` |
+| Cerebras | `dartlab setup cerebras` |
+| Mistral | `dartlab setup mistral` |
+Or use any OpenAI-compatible endpoint:
+```bash
+dartlab setup custom --base-url http://localhost:11434/v1   # Ollama local
+```
+### Customization
+- **Templates**: Pre-defined block combinations (`수익구조`, `자금조달`)
+- **Free assembly**: Mix any blocks + raw DataFrames in `Review([...])`
+- **Guide**: Pass `guide="..."` to `c.reviewer()` for domain-specific AI analysis
+- **Layout**: `ReviewLayout(indentH1=2, gapAfterH1=1, ...)` for rendering control
+- **Render formats**: `review.render("rich" | "html" | "markdown" | "json")`
+See [notebooks/marimo/sampleReview.py](notebooks/marimo/sampleReview.py) for interactive examples.
+## Additional Features
+> Features below are **beta** or **experimental** — APIs may change. See [stability](docs/stability.md).
+### Insights (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+c.insights                      # 10-area analysis
+c.insights.grades()             # → {"performance": "A", "profitability": "B", …}
+c.insights.performance.grade    # → "A"
+c.insights.performance.details  # → ["Revenue growth +8.3%", …]
+c.insights.anomalies            # → outliers and red flags
+# distress scorecard — 6-model bankruptcy/fraud prediction
+c.insights.distress             # Altman Z-Score, Beneish M-Score, Ohlson O-Score,
+                                # Merton Distance-to-Default, Piotroski F-Score, Sloan Ratio
+```
+### Valuation, Forecast & Simulation
+```python
+dartlab.valuation("005930")           # DCF + DDM + relative valuation
+dartlab.forecast("005930")            # revenue forecast (4-source ensemble)
+dartlab.simulation("005930")          # scenario simulation (macro presets)
+# also available as Company methods
+c.valuation()
+c.forecast(horizon=3)
+c.simulation(scenarios=["adverse", "rate_hike"])
+```
+Auto-detects currency — KRW for DART companies, USD for EDGAR. Works with both `dartlab.valuation("AAPL")` and `dartlab.valuation("005930")`.
+### Audit (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+dartlab.audit("005930")               # 11 red flag detectors
+# Benford's Law (digit distribution), auditor change (PCAOB AS 3101),
+# going concern (ISA 570), internal control (SOX 302/404),
+# revenue quality (Dechow & Dichev), Merton default probability, ...
+```
+### Market Intelligence (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+dartlab.digest()                      # market-wide disclosure change digest
+dartlab.digest(sector="반도체")        # sector filter
+dartlab.groupHealth()                 # group health: network × financial ratios
+```
+### Modules
+DartLab exposes 100+ modules across 6 categories:
+```bash
+dartlab modules                      # list all modules
+dartlab modules --category finance   # filter by category
+dartlab modules --search dividend    # search by keyword
+```
+```python
+c.topics    # list all available topics for this company
+```
+Categories: `finance` (statements, ratios), `report` (dividend, governance, audit), `notes` (K-IFRS annotations), `disclosure` (narrative text), `analysis` (insights, rankings), `raw` (original parquets).
+### Charts & Visualization (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+c = dartlab.Company("005930")
+# one-liner Plotly charts
+dartlab.chart.revenue(c).show()          # revenue + operating margin combo
+dartlab.chart.cashflow(c).show()         # operating/investing/financing CF
+dartlab.chart.dividend(c).show()         # DPS + yield + payout ratio
+dartlab.chart.profitability(c).show()    # ROE, operating margin, net margin
+# auto-detect all available charts
+specs = dartlab.chart.auto_chart(c)
+dartlab.chart.chart_from_spec(specs[0]).show()
+# generic charts from any DataFrame
+dartlab.chart.line(c.dividend, y=["dps"])
+dartlab.chart.bar(df, x="year", y=["revenue", "operating_income"], stacked=True)
+```
+Data tools:
+```python
+dartlab.table.yoy_change(c.dividend, value_cols=["dps"])       # add YoY% columns
+dartlab.table.format_korean(c.BS, unit="백만원")                # 1.2조원, 350억원
+dartlab.table.summary_stats(c.dividend, value_cols=["dps"])     # mean/CAGR/trend
+dartlab.text.extract_keywords(narrative)                        # frequency-based keywords
+dartlab.text.sentiment_indicators(narrative)                     # positive/negative/risk
+```
+Install chart dependencies: `uv add "dartlab[charts]"`
+### Network — Affiliate Map (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+c = dartlab.Company("005930")
+# interactive vis.js graph in browser
+c.network().show()           # ego view (1 hop)
+c.network(hops=2).show()     # 2-hop neighborhood
+# DataFrame views
+c.network("members")     # group affiliates
+c.network("edges")       # investment/shareholder connections
+c.network("cycles")      # circular ownership paths
+# full market network
+dartlab.network().show()
+```
+### Market Scan (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+c = dartlab.Company("005930")
+# one company → market-wide
+c.governance()           # single company
+c.governance("all")      # full market DataFrame
+dartlab.governance()     # module-level scan
+dartlab.workforce()
+dartlab.capital()
+dartlab.debt()
+# screening & benchmarking
+dartlab.screen()         # multi-factor screening
+dartlab.benchmark()      # peer comparison
+dartlab.signal()         # change detection signals
+```
+### Market Data Collection (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+The Gather engine collects external market data as **Polars DataFrames** — timeseries by default. Every request goes through automatic fallback chains, circuit breaker isolation, and TTL caching. All methods are synchronous — async parallel execution is handled internally.
+```python
+import dartlab
+# OHLCV timeseries — adjusted prices, 6000+ trading days in a single request
+dartlab.price("005930")                         # KR: 1-year default, Polars DataFrame
+dartlab.price("005930", start="2015-01-01")     # custom range
+dartlab.price("AAPL", market="US")              # US via Yahoo Finance chart API
+dartlab.price("005930", snapshot=True)          # opt-in: current price snapshot
+# supply/demand flow timeseries (KR only)
+dartlab.flow("005930")                          # DataFrame (date, foreignNet, institutionNet, ...)
+# macro indicators — full wide DataFrame
+dartlab.macro()                                 # KR 12 indicators (CPI, rates, FX, production, ...)
+dartlab.macro("US")                             # US 25 indicators (GDP, CPI, Fed Funds, S&P500, ...)
+dartlab.macro("CPI")                            # single indicator (auto-detects KR)
+dartlab.macro("FEDFUNDS")                       # single indicator (auto-detects US)
+# consensus, news
+dartlab.consensus("005930")                     # target price & analyst opinion
+dartlab.news("삼성전자")                         # Google News RSS → DataFrame
+```
+**How data is collected — don't worry, it's safe:**
+| Source | Data | Method |
+|--------|------|--------|
+| Naver Chart API | KR OHLCV (adjusted prices) | `fchart.stock.naver.com` — 1 request per stock, max 6000 days |
+| Yahoo Finance v8 | US/Global OHLCV | `query2.finance.yahoo.com/v8/finance/chart` — public chart API |
+| ECOS (Bank of Korea) | KR macro indicators | Official API with user's own key |
+| FRED (St. Louis Fed) | US macro indicators | Official API with user's own key |
+| Naver Mobile API | Consensus, flow, sector PER | `m.stock.naver.com/api` — JSON endpoints |
+| FMP | Fallback for US history | Financial Modeling Prep API (optional) |
+**Safety infrastructure:**
+- **Rate limiting** — per-domain RPM caps (Naver 30, ECOS 30, FRED 120) with async queue
+- **Circuit breaker** — 3 consecutive failures → source disabled for 60s, half-open retry
+- **Fallback chains** — KR: naver → yahoo_direct → yahoo / US: yahoo_direct → fmp → yahoo
+- **Stale-while-revalidate** — returns cached data on failure, warns via `log.warning`
+- **User-Agent rotation** — randomized per request to avoid fingerprinting
+- **No silent failures** — all API errors logged at warning level, never swallowed
+- **No scraping** — all sources are public APIs or official data endpoints
+### Cross-Border Analysis (beta)
+> **Beta** — API may change after a warning. See [stability](docs/stability.md).
+```python
+c = dartlab.Company("005930")
+# keyword frequency across disclosure periods
+c.keywordTrend(keyword="AI")          # topic × period × keyword count
+c.keywordTrend()                      # all 54 built-in keywords
+# news headlines
+c.news()                              # recent 30 days
+dartlab.news("AAPL", market="US")     # US company news
+# global peer mapping (WICS → GICS sector)
+dartlab.crossBorderPeers("005930")    # → ["AAPL", "MSFT", "NVDA", "TSM", "AVGO"]
+# currency conversion (FRED-based)
+from dartlab.engines.common.finance import getExchangeRate, convertValue
+getExchangeRate("KRW")                # KRW/USD rate
+convertValue(1_000_000, "KRW", "USD") # → ~730.0
+# audit opinion normalization (KR/EN/JP → canonical code)
+from dartlab.engines.common.audit import normalizeAuditOpinion
+normalizeAuditOpinion("적정")          # → "unqualified"
+normalizeAuditOpinion("Qualified")     # → "qualified"
+```
+Disclosure gap detection runs automatically inside `c.insights` — flags mismatches between text changes and financial health (e.g. risk text surges while financials are stable).
+### Export (experimental)
+> **Experimental** — Breaking changes possible. Not for production.
+```bash
+dartlab excel "005930" -o samsung.xlsx
+```
+Install: `uv add "dartlab[ai]"` (Excel export is included in the AI extras).
+### Plugins
+```python
+dartlab.plugins()               # list loaded plugins
+dartlab.reload_plugins()        # rescan after installing a plugin
+```
+Plugins can extend DartLab with custom data sources, tools, or analysis engines. See `dartlab plugin create --help` for scaffolding.
+## EDGAR (US)
+Same `Company` interface, same account standardization pipeline, different data source. EDGAR data is auto-fetched from the SEC API — no pre-download needed:
+```python
+us = dartlab.Company("AAPL")
+us.sections                         # 10-K/10-Q sections with heading/body
+us.show("business")                 # business description
+us.show("10-K::item1ARiskFactors")  # risk factors
+us.BS                               # SEC XBRL balance sheet
+us.ratios                           # same 47 ratios
+us.diff("10-K::item7Mdna")          # MD&A text changes
+us.insights                         # 10-area grades (A~F)
+# analyst functions — auto-detect USD
+dartlab.valuation("AAPL")           # DCF + DDM + relative (USD)
+dartlab.forecast("AAPL")            # revenue forecast (USD)
+dartlab.simulation("AAPL")          # scenario simulation (US macro presets)
+```
+The interface is identical — same methods, same structure:
+```python
+# Korea (DART)                          # US (EDGAR)
+c = dartlab.Company("005930")           c = dartlab.Company("AAPL")
+c.sections                              c.sections
+c.show("businessOverview")              c.show("business")
+c.BS                                    c.BS
+c.ratios                                c.ratios
+c.diff("businessOverview")              c.diff("10-K::item7Mdna")
+c.insights.grades()                     c.insights.grades()
+```
+### DART vs EDGAR Namespaces
+|               | DART           | EDGAR          |
+|---------------|:--------------:|:--------------:|
+| `docs`        | ✓              | ✓              |
+| `finance`     | ✓              | ✓              |
+| `report`      | ✓ (28 API types) | ✗ (not applicable) |
+| `profile`     | ✓              | ✓              |
+DART has a `report` namespace with 28 structured disclosure APIs (dividend, governance, executive compensation, etc.). This does not exist in EDGAR — SEC filings are structured differently.
+**EDGAR topic naming**: Topics use `{formType}::{itemId}` format. Short aliases also work:
+```python
+us.show("10-K::item1Business")     # full form
+us.show("business")                # short alias
+us.show("risk")                    # → 10-K::item1ARiskFactors
+us.show("mdna")                    # → 10-K::item7Mdna
+```
+## AI Analysis
+> **Experimental** — the AI analysis layer and `analysis/` engines are under active development. APIs, output formats, and available tools may change between versions.
+> **Tip:** New to financial analysis or prefer natural language? Use `dartlab.ask()` — the AI assistant handles everything from data download to analysis. No coding knowledge required.
+DartLab includes a built-in AI analysis layer that feeds structured company data to LLMs. **No code required** — you can ask questions in plain language and DartLab handles everything: data selection, context assembly, and streaming the answer.
+```bash
+# terminal one-liner — no Python needed
+dartlab ask "삼성전자 재무건전성 분석해줘"
+```
+DartLab structures the data, selects relevant context (financials, insights, sector benchmarks), and lets the LLM explain:
+```
+$ dartlab ask "삼성전자 재무건전성 분석해줘"
+삼성전자의 재무건전성은 A등급입니다.
+▸ 부채비율 31.8% — 업종 평균(45.2%) 대비 양호
+▸ 유동비율 258.6% — 200% 안전 기준 상회
+▸ 이자보상배수 22.1배 — 이자 부담 매우 낮음
+▸ ROE 회복세: 1.6% → 10.2% (4분기 연속 개선)
+[데이터 출처: 2024Q4 사업보고서, dartlab insights 엔진]
+```
+For real-time market-wide disclosure questions (e.g. "최근 7일 수주공시 알려줘"), the AI uses your `OpenDART API key` to search recent filings directly. Store the key in project `.env` or via UI Settings.
+The 2-tier architecture means basic analysis works with any provider, while tool-calling providers (OpenAI, Claude) can go deeper by requesting additional data mid-conversation.
+### Python API
+```python
+import dartlab
+# streams to stdout, returns full text
+answer = dartlab.ask("삼성전자 재무건전성 분석해줘")
+# provider + model override
+answer = dartlab.ask("삼성전자 분석", provider="openai", model="gpt-4o")
+# data filtering
+answer = dartlab.ask("삼성전자 핵심 포인트", include=["BS", "IS"])
+# analysis pattern (framework-guided)
+answer = dartlab.ask("삼성전자 분석", pattern="financial")
+# agent mode — LLM selects tools for deeper analysis
+answer = dartlab.chat("005930", "배당 추세를 분석하고 이상 징후를 찾아줘")
+```
+### CLI
+```bash
+# provider setup — free providers first
+dartlab setup              # list all providers
+dartlab setup gemini       # Google Gemini (free)
+dartlab setup groq         # Groq (free)
+# status
+dartlab status             # all providers (table view)
+dartlab status --cost      # cumulative token/cost stats
+# ask questions (streaming by default)
+dartlab ask "삼성전자 재무건전성 분석해줘"
+dartlab ask "AAPL risk analysis" -p ollama
+dartlab ask --continue "배당 추세는?"
+# auto-generate report
+dartlab report "삼성전자" -o report.md
+# web UI
+dartlab                    # open browser UI
+dartlab --help             # show all commands
+```
+<details>
+<summary>All CLI commands (16)</summary>
+| Category | Command | Description |
+|----------|---------|-------------|
+| Data | `show` | Open any topic by name |
+| Data | `search` | Find companies by name or code |
+| Data | `statement` | BS / IS / CF / SCE output |
+| Data | `sections` | Raw docs sections |
+| Data | `profile` | Company index and facts |
+| Data | `modules` | List all available modules |
+| AI | `ask` | Natural language question |
+| AI | `report` | Auto-generate analysis report |
+| Export | `excel` | Export to Excel (experimental) |
+| Collect | `collect` | Download / refresh / batch collect |
+| Collect | `collect --check` | Check freshness (new filings) |
+| Collect | `collect --incremental` | Incremental collect (missing only) |
+| Server | `ai` | Launch web UI (localhost:8400) |
+| Server | `share` | Tunnel sharing (ngrok / cloudflared) |
+| Server | `status` | Provider connection status |
+| Server | `setup` | Provider setup wizard |
+| MCP | `mcp` | Start MCP stdio server |
+| Plugin | `plugin` | Create / list plugins |
+</details>
+### Providers
+**Free API key providers** — sign up, paste the key, start analyzing:
+| Provider | Free Tier | Model | Setup |
+|----------|-----------|-------|-------|
+| `gemini` | Gemini 2.5 Pro/Flash free | Gemini 2.5 | `dartlab setup gemini` |
+| `groq` | 6K–30K TPM free | LLaMA 3.3 70B | `dartlab setup groq` |
+| `cerebras` | 1M tokens/day permanent | LLaMA 3.3 70B | `dartlab setup cerebras` |
+| `mistral` | 1B tokens/month free | Mistral Small | `dartlab setup mistral` |
+**Other providers:**
+| Provider | Auth | Cost | Tool Calling |
+|----------|------|------|:---:|
+| `oauth-codex` | ChatGPT subscription (Plus/Team/Enterprise) | Included in subscription | Yes |
+| `openai` | API key (`OPENAI_API_KEY`) | Pay-per-token | Yes |
+| `ollama` | Local install, no account needed | Free | Depends on model |
+| `codex` | Codex CLI installed locally | Free (uses your Codex session) | Yes |
+| `custom` | Any OpenAI-compatible endpoint | Varies | Varies |
+**Auto-fallback:** Set multiple free API keys and DartLab automatically switches to the next provider when one hits its rate limit. Use `provider="free"` to enable the fallback chain:
+```python
+dartlab.ask("삼성전자 분석", provider="free")
+```
+**Why no Claude provider?** Anthropic does not offer OAuth-based access. Without OAuth, there is no way to let users authenticate with their existing subscription — we would have to ask users to paste API keys, which goes against DartLab's frictionless design. If Anthropic adds OAuth support in the future, we will add a Claude provider. For now, Claude works through **MCP** (see below) — Claude Desktop, Claude Code, and Cursor can call DartLab's 60 tools directly.
+**`oauth-codex`** is the recommended provider — if you have a ChatGPT subscription, it works out of the box with no API keys. Run `dartlab setup oauth-codex` to authenticate.
+**Web UI (`dartlab`)** launches a browser-based chat interface for interactive analysis. This feature is currently **experimental** — we are evaluating the right scope and UX for visualization and collaborative features.
+Install AI dependencies: `uv add "dartlab[ai]"`
+### Project Settings (`.dartlab.yml`)
+```yaml
+company: 005930         # default company
+provider: openai        # default LLM provider
+model: gpt-4o           # default model
+verbose: false
+```
+## MCP — AI Assistant Integration
+DartLab includes a built-in [MCP](https://modelcontextprotocol.io/) server that exposes 60 tools (16 global + 44 per-company) to Claude Desktop, Claude Code, Cursor, and any MCP-compatible client.
+```bash
+uv add "dartlab[mcp]"
+```
+### Claude Desktop
+Add to `claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "dartlab": {
+      "command": "uv",
+      "args": ["run", "dartlab", "mcp"]
+    }
+  }
+}
+```
+### Claude Code
+```bash
+claude mcp add dartlab -- uv run dartlab mcp
+```
+Or add to `~/.claude/settings.json`:
+```json
+{
+  "mcpServers": {
+    "dartlab": {
+      "command": "uv",
+      "args": ["run", "dartlab", "mcp"]
+    }
+  }
+}
+```
+### Cursor
+Add to `.cursor/mcp.json` with the same config format as Claude Desktop.
+### What's Available
+Once connected, your AI assistant can:
+- **Search** — find companies by name or code (`search_company`)
+- **Show** — read any disclosure topic (`show_topic`, `list_topics`, `diff_topic`)
+- **Finance** — balance sheet, income statement, cash flow, ratios (`get_financial_statements`, `get_ratios`)
+- **Analysis** — insights, sector ranking, valuation (`get_insight`, `get_ranking`)
+- **EDGAR** — same tools work for US companies (`stock_code: "AAPL"`)
+Auto-generate config for your platform:
+```bash
+dartlab mcp --config claude-desktop
+dartlab mcp --config claude-code
+dartlab mcp --config cursor
+```
+## OpenAPI — Raw Public APIs
+Use source-native wrappers when you want raw disclosure APIs directly.
+### OpenDart (Korea)
+> **Note:** `Company` does **not** require an API key — it uses pre-built datasets.
+> `OpenDart` uses the raw DART API and requires a key from [opendart.fss.or.kr](https://opendart.fss.or.kr) (free).
+> Recent filing-list AI questions across the whole market also use this key. In the UI, open Settings and manage `OpenDART API key` there.
+```python
+from dartlab import OpenDart
+d = OpenDart()
+d.search("카카오", listed=True)
+d.filings("삼성전자", "2024")
+d.finstate("삼성전자", 2024)
+d.report("삼성전자", "배당", 2024)
+```
+### OpenEdgar (US)
+> **No API key required.** SEC EDGAR is a public API — no registration needed.
+```python
+from dartlab import OpenEdgar
+e = OpenEdgar()
+e.search("Apple")
+e.filings("AAPL", forms=["10-K", "10-Q"])
+e.companyFactsJson("AAPL")
+```
+## Data
+**No manual setup required.** When you create a `Company`, dartlab automatically downloads the required data.
+| Dataset | Coverage | Size | Source |
+|---------|----------|------|--------|
+| DART docs | 2,500+ companies | ~8 GB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/docs) |
+| DART finance | 2,700+ companies | ~600 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/finance) |
+| DART report | 2,700+ companies | ~320 MB | [HuggingFace](https://huggingface.co/datasets/eddmpython/dartlab-data/tree/main/dart/report) |
+| EDGAR | On-demand | — | SEC API (auto-fetched) |
+### 3-Step Data Pipeline
+```
+dartlab.Company("005930")
+  │
+  ├─ 1. Local cache ──── already have it? done (instant)
+  │
+  ├─ 2. HuggingFace ──── auto-download (~seconds, no key needed)
+  │
+  └─ 3. DART API ──────── collect with your API key (needs key)
+```
+If a company is not in HuggingFace, dartlab collects data directly from DART — this requires an API key:
+```bash
+dartlab setup dart-key
+```
+### Freshness — Automatic Update Detection
+DartLab uses a 3-layer freshness system to keep your local data current:
+| Layer | Method | Cost |
+|-------|--------|------|
+| L1 | HTTP HEAD → ETag comparison with HuggingFace | ~0.5s, few hundred bytes |
+| L2 | Local file age (90-day TTL fallback) | instant (local) |
+| L3 | DART API → `rcept_no` diff (requires API key) | 1 API call, ~1s |
+When you open a `Company`, dartlab checks if newer data exists. If a new disclosure was filed:
+```python
+c = dartlab.Company("005930")
+# [dartlab] ⚠ 005930 — 새 공시 2건 발견 (사업보고서 (2024.12))
+#   • 증분 수집: dartlab collect --incremental 005930
+#   • 또는 Python: c.update()
+c.update()  # incremental collect — only missing filings
+```
+```bash
+# CLI freshness check
+dartlab collect --check 005930         # single company
+dartlab collect --check                # scan all local companies (7 days)
+# incremental collect — only missing filings
+dartlab collect --incremental 005930   # single company
+dartlab collect --incremental          # all local companies with new filings
+```
+### Batch Collection (DART API)
+```bash
+dartlab collect --batch                    # all listed, missing only
+dartlab collect --batch -c finance 005930  # specific category + company
+dartlab collect --batch --mode all         # re-collect everything
+```
+## Try It Now
+### Live Demo (No Install)
+Try DartLab instantly — no Python, no terminal, no setup:
+**[→ Open Live Demo](https://huggingface.co/spaces/eddmpython/dartlab)** — enter a stock code, see financials immediately
+Or open a [Colab notebook](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) in your browser.
+### Marimo Notebooks
+> Data is automatically downloaded on first use. No setup required unless collecting new companies directly from DART.
+```bash
+uv add dartlab marimo
+marimo edit notebooks/marimo/dartCompany.py    # Korean company (DART)
+marimo edit notebooks/marimo/edgarCompany.py   # US company (EDGAR)
+marimo edit notebooks/marimo/aiAnalysis.py     # AI analysis examples
+```
+### Colab Notebooks
+**Showcase** (English — global audience):
+| Notebook | Topic |
+|---|---|
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb) | **Quick Start** — analyze any company in 3 lines |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/02_financial_analysis.ipynb) | **Financial Analysis** — statements, time series, ratios |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/03_kr_us_compare.ipynb) | **Korea vs US** — Samsung vs Apple side-by-side |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/04_risk_diff.ipynb) | **Risk Diff** — track disclosure changes (Bloomberg can't) |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/05_sector_screening.ipynb) | **Sector Screening** — 8 presets, sector benchmarks |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/06_insight_anomaly.ipynb) | **Insight & Anomaly** — 10-area grading, 6 anomaly rules |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/07_network_governance.ipynb) | **Network & Governance** — corporate relationship graph |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/08_signal_trend.ipynb) | **Signal Trends** — 48-keyword disclosure monitoring |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/09_ai_analysis.ipynb) | **AI Analysis** — `dartlab.ask()` with 9 LLM providers |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/10_disclosure_deep_dive.ipynb) | **Disclosure Deep Dive** — sections architecture |
+<details>
+<summary>한국어 Tutorials</summary>
+| Notebook | Topic |
+|---|---|
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/getting-started/quickstart.ipynb) | **빠른 시작** — sections, show, trace, diff |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/02_financial_statements.ipynb) | **재무제표** — BS, IS, CF |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/04_ratios.ipynb) | **재무비율** — 47개 비율 |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/06_disclosure.ipynb) | **공시 텍스트** — sections 파싱 |
+| [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/tutorials/09_edgar.ipynb) | **EDGAR** — 미국 SEC |
+</details>
+## Documentation
+- Docs: https://eddmpython.github.io/dartlab/
+- Sections guide: https://eddmpython.github.io/dartlab/docs/getting-started/sections
+- Quick start: https://eddmpython.github.io/dartlab/docs/getting-started/quickstart
+- API overview: https://eddmpython.github.io/dartlab/docs/api/overview
+- Beginner guide (Korean): https://eddmpython.github.io/dartlab/blog/dartlab-easy-start/
+### Blog
+The [DartLab Blog](https://eddmpython.github.io/dartlab/blog/) covers practical disclosure analysis — how to read reports, interpret patterns, and spot risk signals. 120+ articles across three categories:
+- **Disclosure Systems** — structure and mechanics of DART/EDGAR filings
+- **Report Reading** — practical guide to audit reports, preliminary earnings, restatements
+- **Financial Interpretation** — financial statements, ratios, and disclosure signals
+## Stability
+| Tier | Scope |
+|------|-------|
+| **Stable** | DART Company (sections, show, trace, diff, BS/IS/CF, CIS, index, filings, profile), EDGAR Company core, valuation, forecast, simulation |
+| **Beta** | EDGAR power-user (SCE, notes, freq, coverage), insights, distress, ratios, timeseries, network, governance, workforce, capital, debt, chart/table/text tools, ask/chat, OpenDart, OpenEdgar, Server API, MCP, CLI subcommands |
+| **Experimental** | AI tool calling, export |
+| **Alpha** | Desktop App (Windows .exe) — functional but incomplete, Sections Viewer — not yet fully structured |
+See [docs/stability.md](docs/stability.md).
+## Contributing
+The project prefers **experiments before engine changes**. If you want to propose a parser or mapping change, validate it in `experiments/` first and bring the verified result back into the engine.
+- **Experiment folder**: `experiments/XXX_camelCaseName/` — each file must be independently runnable with actual results in its docstring
+- **Data contributions** (e.g. `accountMappings.json`, `sectionMappings.json`): only accepted when backed by experiment evidence — no manual bulk edits
+- Issues and PRs in Korean or English are both welcome
+## License
+MIT

app.py DELETED Viewed

@@ -1,623 +0,0 @@
-"""DartLab Streamlit Demo — AI 채팅 기반 기업 분석."""
-from __future__ import annotations
-import gc
-import io
-import os
-import re
-import pandas as pd
-import streamlit as st
-import dartlab
-# ── 설정 ──────────────────────────────────────────────
-_MAX_CACHE = 2
-_LOGO_URL = "https://raw.githubusercontent.com/eddmpython/dartlab/master/.github/assets/logo.png"
-_BLOG_URL = "https://eddmpython.github.io/dartlab/blog/dartlab-easy-start/"
-_DOCS_URL = "https://eddmpython.github.io/dartlab/docs/getting-started/quickstart"
-_COLAB_URL = "https://colab.research.google.com/github/eddmpython/dartlab/blob/master/notebooks/showcase/01_quickstart.ipynb"
-_REPO_URL = "https://github.com/eddmpython/dartlab"
-_HAS_OPENAI = bool(os.environ.get("OPENAI_API_KEY"))
-if _HAS_OPENAI:
-    dartlab.llm.configure(provider="openai", api_key=os.environ["OPENAI_API_KEY"])
-# ── 페이지 설정 ──────────────────────────────────────
-st.set_page_config(
-    page_title="DartLab — AI 기업 분석",
-    page_icon=None,
-    layout="centered",
-)
-# ── CSS ───────────────────────────────────────────────
-st.markdown("""
-<style>
-/* 다크 테마 강제 */
-html, body, [data-testid="stAppViewContainer"],
-[data-testid="stApp"], .main, .block-container {
-    background-color: #050811 !important;
-    color: #f1f5f9 !important;
-}
-[data-testid="stHeader"] { background: #050811 !important; }
-[data-testid="stSidebar"] { background: #0f1219 !important; }
-/* 입력 필드 */
-input, textarea,
-[data-baseweb="input"] input, [data-baseweb="textarea"] textarea,
-[data-baseweb="input"], [data-baseweb="base-input"] {
-    background-color: #0f1219 !important;
-    color: #f1f5f9 !important;
-    border-color: #1e2433 !important;
-}
-/* 셀렉트/드롭다운 */
-[data-baseweb="select"] > div {
-    background-color: #0f1219 !important;
-    border-color: #1e2433 !important;
-    color: #f1f5f9 !important;
-}
-[data-baseweb="popover"], [data-baseweb="menu"] {
-    background-color: #0f1219 !important;
-}
-[data-baseweb="menu"] li { color: #f1f5f9 !important; }
-[data-baseweb="menu"] li:hover { background-color: #1a1f2b !important; }
-/* 라디오 */
-[data-testid="stRadio"] label { color: #f1f5f9 !important; }
-/* 버튼 — dartlab primary 통일 */
-button, [data-testid="stBaseButton-primary"],
-[data-testid="stBaseButton-secondary"],
-[data-testid="stFormSubmitButton"] button,
-[data-testid="stChatInputSubmitButton"] {
-    background-color: #ea4647 !important;
-    color: #fff !important;
-    border: none !important;
-    font-weight: 600 !important;
-}
-button:hover, [data-testid="stBaseButton-primary"]:hover,
-[data-testid="stChatInputSubmitButton"]:hover {
-    background-color: #c83232 !important;
-}
-[data-testid="stDownloadButton"] button {
-    background-color: #0f1219 !important;
-    color: #f1f5f9 !important;
-    border: 1px solid #1e2433 !important;
-}
-[data-testid="stDownloadButton"] button:hover {
-    border-color: #ea4647 !important;
-    color: #ea4647 !important;
-    background-color: #0f1219 !important;
-}
-/* expander 토글은 배경색 제거 */
-[data-testid="stExpander"] button {
-    background-color: transparent !important;
-    color: #f1f5f9 !important;
-}
-/* Expander */
-[data-testid="stExpander"] {
-    background-color: #0f1219 !important;
-    border-color: #1e2433 !important;
-}
-/* Chat */
-[data-testid="stChatMessage"] {
-    background-color: #0a0e17 !important;
-    border-color: #1e2433 !important;
-}
-[data-testid="stChatInput"], [data-testid="stChatInput"] textarea {
-    background-color: #0f1219 !important;
-    border-color: #1e2433 !important;
-    color: #f1f5f9 !important;
-}
-/* 텍스트 */
-p, span, label, h1, h2, h3, h4, h5, h6,
-[data-testid="stMarkdownContainer"],
-[data-testid="stMarkdownContainer"] p {
-    color: #f1f5f9 !important;
-}
-[data-testid="stCaption"] { color: #64748b !important; }
-/* DataFrame */
-[data-testid="stDataFrame"] { font-variant-numeric: tabular-nums; }
-/* 커스텀 */
-.dl-header {
-    text-align: center;
-    padding: 1.5rem 0 0.5rem;
-}
-.dl-header img {
-    border-radius: 50%;
-    box-shadow: 0 0 48px rgba(234,70,71,0.25);
-}
-.dl-header h1 {
-    background: linear-gradient(135deg, #ea4647, #f87171, #ea4647);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    background-clip: text;
-    font-size: 2.4rem !important;
-    font-weight: 800 !important;
-    margin: 0.5rem 0 0.1rem !important;
-    letter-spacing: -0.03em;
-}
-.dl-header .tagline { color: #94a3b8 !important; font-size: 1rem; margin: 0; }
-.dl-header .sub { color: #64748b !important; font-size: 0.82rem; margin: 0.15rem 0 0; }
-.dl-card {
-    background: linear-gradient(135deg, #0f1219 0%, #0a0d16 100%);
-    border: 1px solid #1e2433;
-    border-radius: 12px;
-    padding: 1.2rem 1.5rem;
-    margin: 0.8rem 0;
-    position: relative;
-    overflow: hidden;
-}
-.dl-card::before {
-    content: '';
-    position: absolute;
-    top: 0; left: 0; right: 0;
-    height: 3px;
-    background: linear-gradient(90deg, #ea4647, #f87171, #fb923c);
-}
-.dl-card h3 { color: #f1f5f9 !important; font-size: 1.3rem !important; margin: 0 0 0.8rem !important; }
-.dl-card .meta { display: flex; gap: 2.5rem; flex-wrap: wrap; }
-.dl-card .meta-item { display: flex; flex-direction: column; gap: 0.1rem; }
-.dl-card .meta-label {
-    color: #64748b !important; font-size: 0.72rem;
-    text-transform: uppercase; letter-spacing: 0.08em;
-}
-.dl-card .meta-value {
-    color: #e2e8f0 !important; font-size: 1.1rem; font-weight: 600;
-    font-family: 'JetBrains Mono', monospace;
-}
-.dl-section {
-    color: #ea4647 !important;
-    font-weight: 700 !important;
-    font-size: 1.05rem !important;
-    border-bottom: 2px solid #ea4647;
-    padding-bottom: 0.3rem;
-    margin: 1rem 0 0.6rem;
-}
-.dl-footer {
-    text-align: center;
-    padding: 1.5rem 0 0.8rem;
-    border-top: 1px solid #1e2433;
-    margin-top: 2rem;
-    color: #475569 !important;
-    font-size: 0.82rem;
-}
-.dl-footer a { color: #94a3b8 !important; text-decoration: none; margin: 0 0.5rem; }
-.dl-footer a:hover { color: #ea4647 !important; }
-.dl-hero-glow {
-    position: fixed;
-    top: 0; left: 50%;
-    transform: translateX(-50%);
-    width: 600px; height: 400px;
-    background: radial-gradient(ellipse at top, rgba(234,70,71,0.05) 0%, transparent 60%);
-    pointer-events: none; z-index: 0;
-}
-</style>
-""", unsafe_allow_html=True)
-# ── 유틸 ──────────────────────────────────────────────
-def _toPandas(df):
-    """Polars/pandas DataFrame -> pandas."""
-    if df is None:
-        return None
-    if hasattr(df, "to_pandas"):
-        return df.to_pandas()
-    return df
-def _formatDf(df: pd.DataFrame) -> pd.DataFrame:
-    """숫자를 천단위 콤마 문자열로 변환 (소수점 제거)."""
-    if df is None or df.empty:
-        return df
-    result = df.copy()
-    for col in result.columns:
-        if pd.api.types.is_numeric_dtype(result[col]):
-            result[col] = result[col].apply(
-                lambda x: f"{int(x):,}" if pd.notna(x) and x == x else ""
-            )
-    return result
-def _toExcel(df: pd.DataFrame) -> bytes:
-    """DataFrame -> Excel bytes."""
-    buf = io.BytesIO()
-    df.to_excel(buf, index=False, engine="openpyxl")
-    return buf.getvalue()
-def _showDf(df: pd.DataFrame, key: str = "", downloadName: str = ""):
-    """DataFrame 표시 + Excel 다운로드."""
-    if df is None or df.empty:
-        st.caption("데이터 없음")
-        return
-    st.dataframe(_formatDf(df), use_container_width=True, hide_index=True, key=key or None)
-    if downloadName:
-        st.download_button(
-            label="Excel 다운로드",
-            data=_toExcel(df),
-            file_name=f"{downloadName}.xlsx",
-            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
-            key=f"dl_{key}" if key else None,
-        )
-@st.cache_resource(max_entries=_MAX_CACHE)
-def _getCompany(code: str):
-    """캐시된 Company."""
-    gc.collect()
-    return dartlab.Company(code)
-# ── 종목코드 추출 ────────────────────────────────────
-def _extractCode(message: str) -> str | None:
-    """메시지에서 종목코드/회사명 추출."""
-    msg = message.strip()
-    # 6자리 숫자
-    m = re.search(r"\b(\d{6})\b", msg)
-    if m:
-        return m.group(1)
-    # 영문 티커 (단독 대문자 1~5자)
-    m = re.search(r"\b([A-Z]{1,5})\b", msg)
-    if m:
-        return m.group(1)
-    # 한글 회사명 → dartlab.search
-    cleaned = re.sub(
-        r"(에\s*대해|에\s*대한|에대해|좀|의|를|을|은|는|이|가|도|만|부터|까지|하고|이랑|랑|로|으로|와|과|한테|에서|에게)\b",
-        " ",
-        msg,
-    )
-    # 불필요한 동사/조동사 제거
-    cleaned = re.sub(
-        r"\b(알려줘|보여줘|분석|해줘|해봐|어때|보자|볼래|줘|해|좀|요)\b",
-        " ",
-        cleaned,
-    )
-    tokens = re.findall(r"[가-힣A-Za-z0-9]+", cleaned)
-    # 긴 토큰 우선 (회사명일 가능성 높음)
-    tokens.sort(key=len, reverse=True)
-    for token in tokens:
-        if len(token) >= 2:
-            try:
-                results = dartlab.search(token)
-                if results is not None and len(results) > 0:
-                    return str(results[0, "종목코드"])
-            except Exception:
-                continue
-    return None
-def _detectTopic(message: str) -> str | None:
-    """메시지에서 특정 topic 키워드 감지."""
-    topicMap = {
-        "배당": "dividend",
-        "주주": "majorHolder",
-        "대주주": "majorHolder",
-        "직원": "employee",
-        "임원": "executive",
-        "임원보수": "executivePay",
-        "보수": "executivePay",
-        "세그먼트": "segments",
-        "부문": "segments",
-        "사업부": "segments",
-        "유형자산": "tangibleAsset",
-        "무형자산": "intangibleAsset",
-        "원재료": "rawMaterial",
-        "수주": "salesOrder",
-        "제품": "productService",
-        "자회사": "subsidiary",
-        "종속": "subsidiary",
-        "부채": "contingentLiability",
-        "우발": "contingentLiability",
-        "파생": "riskDerivative",
-        "사채": "bond",
-        "이사회": "boardOfDirectors",
-        "감사": "audit",
-        "자본변동": "capitalChange",
-        "자기주식": "treasuryStock",
-        "사업개요": "business",
-        "사업보고": "business",
-        "연혁": "companyHistory",
-    }
-    msg = message.lower()
-    for keyword, topic in topicMap.items():
-        if keyword in msg:
-            return topic
-    return None
-# ── AI ────────────────────────────────────────────────
-def _askAi(stockCode: str, question: str) -> str:
-    """AI 질문. OpenAI 우선, HF 무료 fallback."""
-    if _HAS_OPENAI:
-        try:
-            q = f"{stockCode} {question}" if stockCode else question
-            answer = dartlab.ask(q, stream=False, raw=False)
-            return answer or "응답 없음"
-        except Exception as e:
-            return f"분석 실패: {e}"
-    try:
-        from huggingface_hub import InferenceClient
-        token = os.environ.get("HF_TOKEN")
-        client = InferenceClient(
-            model="meta-llama/Llama-3.1-8B-Instruct",
-            token=token if token else None,
-        )
-        context = _buildAiContext(stockCode)
-        systemMsg = (
-            "당신은 한국 기업 재무 분석 전문가입니다. "
-            "아래 재무 데이터를 바탕으로 사용자의 질문에 한국어로 답변하세요. "
-            "숫자는 천단위 콤마를 사용하고, 근거를 명확히 제시하세요.\n\n"
-            f"{context}"
-        )
-        response = client.chat_completion(
-            messages=[
-                {"role": "system", "content": systemMsg},
-                {"role": "user", "content": question},
-            ],
-            max_tokens=1024,
-        )
-        return response.choices[0].message.content or "응답 없음"
-    except Exception as e:
-        return f"AI 분석 실패: {e}"
-def _buildAiContext(stockCode: str) -> str:
-    """AI 컨텍스트 구성."""
-    try:
-        c = _getCompany(stockCode)
-    except Exception:
-        return f"종목코드: {stockCode}"
-    parts = [f"기업: {c.corpName} ({c.stockCode}), 시장: {c.market}"]
-    for name, attr in [("손익계산서", "IS"), ("재무상태표", "BS"), ("재무비율", "ratios")]:
-        try:
-            df = _toPandas(getattr(c, attr, None))
-            if df is not None and not df.empty:
-                parts.append(f"\n[{name}]\n{df.head(15).to_string()}")
-        except Exception:
-            pass
-    return "\n".join(parts)
-# ── 대시보드 렌더링 ──────────────────────────────────
-def _renderCompanyCard(c):
-    """기업 카드."""
-    currency = ""
-    if hasattr(c, "currency") and c.currency:
-        currency = c.currency
-    currencyHtml = (
-        f"<div class='meta-item'><span class='meta-label'>통화</span>"
-        f"<span class='meta-value'>{currency}</span></div>"
-        if currency else ""
-    )
-    st.markdown(f"""
-    <div class="dl-card">
-        <h3>{c.corpName}</h3>
-        <div class="meta">
-            <div class="meta-item">
-                <span class="meta-label">종목코드</span>
-                <span class="meta-value">{c.stockCode}</span>
-            </div>
-            <div class="meta-item">
-                <span class="meta-label">시장</span>
-                <span class="meta-value">{c.market}</span>
-            </div>
-            {currencyHtml}
-        </div>
-    </div>
-    """, unsafe_allow_html=True)
-def _renderFullDashboard(c, code: str):
-    """전체 재무 대시보드."""
-    _renderCompanyCard(c)
-    # 재무제표
-    st.markdown('<div class="dl-section">재무제표</div>', unsafe_allow_html=True)
-    for label, attr in [("IS (손익계산서)", "IS"), ("BS (재무상태표)", "BS"),
-                         ("CF (현금흐름표)", "CF"), ("ratios (재무비율)", "ratios")]:
-        with st.expander(label, expanded=(attr == "IS")):
-            try:
-                df = _toPandas(getattr(c, attr, None))
-                _showDf(df, key=f"dash_{attr}", downloadName=f"{code}_{attr}")
-            except Exception:
-                st.caption("로드 실패")
-    # Sections
-    topics = []
-    try:
-        topics = list(c.topics) if c.topics else []
-    except Exception:
-        pass
-    if topics:
-        st.markdown('<div class="dl-section">공시 데이터</div>', unsafe_allow_html=True)
-        selectedTopic = st.selectbox("topic", topics, label_visibility="collapsed", key="dash_topic")
-        if selectedTopic:
-            try:
-                result = c.show(selectedTopic)
-                if result is not None:
-                    if hasattr(result, "to_pandas"):
-                        _showDf(_toPandas(result), key="dash_sec", downloadName=f"{code}_{selectedTopic}")
-                    else:
-                        st.markdown(str(result))
-            except Exception as e:
-                st.caption(f"조회 실패: {e}")
-def _renderTopicData(c, code: str, topic: str):
-    """특정 topic 데이터만 렌더링."""
-    try:
-        result = c.show(topic)
-        if result is not None:
-            if hasattr(result, "to_pandas"):
-                _showDf(_toPandas(result), key=f"topic_{topic}", downloadName=f"{code}_{topic}")
-            else:
-                st.markdown(str(result))
-        else:
-            st.caption(f"'{topic}' 데이터 없음")
-    except Exception as e:
-        st.caption(f"조회 실패: {e}")
-# ── 프리로드 ──────────────────────────────────────────
-@st.cache_resource
-def _warmup():
-    """listing 캐시."""
-    try:
-        dartlab.search("삼성전자")
-    except Exception:
-        pass
-    return True
-_warmup()
-# ── 헤더 ──────────────────────────────────────────────
-st.markdown(f"""
-<div class="dl-hero-glow"></div>
-<div class="dl-header">
-    <img src="{_LOGO_URL}" width="80" height="80" alt="DartLab">
-    <h1>DartLab</h1>
-    <p class="tagline">종목코드 하나. 기업의 전체 이야기.</p>
-    <p class="sub">DART / EDGAR 공시 데이터를 구조화하여 제공합니다</p>
-</div>
-""", unsafe_allow_html=True)
-# ── 세션 초기화 ──────────────────────────────────────
-if "messages" not in st.session_state:
-    st.session_state.messages = []
-if "code" not in st.session_state:
-    st.session_state.code = ""
-# ── 대시보드 영역 (종목이 있으면 표시) ────────────────
-if st.session_state.code:
-    try:
-        _dashCompany = _getCompany(st.session_state.code)
-        _renderFullDashboard(_dashCompany, st.session_state.code)
-    except Exception as e:
-        st.error(f"기업 로드 실패: {e}")
-    st.markdown("---")
-# ── 채팅 영역 ────────────────────────────────────────
-# 히스토리 표시
-for msg in st.session_state.messages:
-    with st.chat_message(msg["role"]):
-        st.markdown(msg["content"])
-# 입력
-if prompt := st.chat_input("삼성전자에 대해 알려줘, 배당 현황은? ..."):
-    # 사용자 메시지 표시
-    st.session_state.messages.append({"role": "user", "content": prompt})
-    with st.chat_message("user"):
-        st.markdown(prompt)
-    # 종목코드 추출 시도
-    newCode = _extractCode(prompt)
-    if newCode and newCode != st.session_state.code:
-        st.session_state.code = newCode
-    code = st.session_state.code
-    if not code:
-        # 종목 못 찾음
-        reply = "종목을 찾지 못했습니다. 회사명이나 종목코드를 포함해서 다시 질문해주세요.\n\n예: 삼성전자에 대해 알려줘, 005930 분석, AAPL 재무"
-        st.session_state.messages.append({"role": "assistant", "content": reply})
-        with st.chat_message("assistant"):
-            st.markdown(reply)
-    else:
-        # 응답 생성
-        with st.chat_message("assistant"):
-            # 특정 topic 감지
-            topic = _detectTopic(prompt)
-            if topic:
-                # 특정 topic만 보여주기
-                try:
-                    c = _getCompany(code)
-                    _renderTopicData(c, code, topic)
-                except Exception:
-                    pass
-            # AI 요약
-            with st.spinner("분석 중..."):
-                aiAnswer = _askAi(code, prompt)
-            st.markdown(aiAnswer)
-            st.session_state.messages.append({"role": "assistant", "content": aiAnswer})
-    # 대시보드 갱신을 위해 rerun
-    if newCode and newCode != "":
-        st.rerun()
-# ── 초기 안내 (대화 없을 때) ─────────────────────────
-if not st.session_state.messages and not st.session_state.code:
-    st.markdown("""
-    <div style="text-align: center; color: #64748b; padding: 2rem 1rem;">
-        <p style="font-size: 1.1rem; color: #94a3b8;">
-            아래 입력���에 자연어로 질문하세요
-        </p>
-        <p style="margin-top: 0.5rem;">
-            <code>삼성전자에 대해 알려줘</code> &middot;
-            <code>005930 분석</code> &middot;
-            <code>AAPL 재무 보여줘</code>
-        </p>
-        <p style="margin-top: 0.3rem; font-size: 0.85rem;">
-            종목을 말하면 재무제표/공시 데이터가 바로 표시되고, AI가 분석을 덧붙입니다
-        </p>
-    </div>
-    """, unsafe_allow_html=True)
-# ── 푸터 ──────────────────────────────────────────────
-st.markdown(f"""
-<div class="dl-footer">
-    <a href="{_BLOG_URL}">초보자 가이드</a> /
-    <a href="{_DOCS_URL}">공식 문서</a> /
-    <a href="{_COLAB_URL}">Colab</a> /
-    <a href="{_REPO_URL}">GitHub</a>
-    <br><span style="color:#334155; font-size:0.78rem; margin-top:0.4rem; display:inline-block;">
-        pip install dartlab
-    </span>
-</div>
-""", unsafe_allow_html=True)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,240 @@

+[project]
+name = "dartlab"
+version = "0.7.10"
+description = "DART 전자공시 + EDGAR 공시를 하나의 회사 맵으로 — Python 재무 분석 라이브러리"
+readme = "README.md"
+license = {file = "LICENSE"}
+requires-python = ">=3.12"
+authors = [
+    {name = "eddmpython"}
+]
+keywords = [
+    "dart",
+    "edgar",
+    "sec",
+    "financial-statements",
+    "korea",
+    "disclosure",
+    "accounting",
+    "polars",
+    "sections",
+    "mcp",
+    "ai-analysis",
+    "annual-report",
+    "10-k",
+    "xbrl",
+    "전자공시",
+    "재무제표",
+    "사업보고서",
+    "공시분석",
+    "다트",
+]
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Developers",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Financial and Insurance Industry",
+    "Intended Audience :: End Users/Desktop",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Topic :: Office/Business :: Financial",
+    "Topic :: Office/Business :: Financial :: Accounting",
+    "Topic :: Office/Business :: Financial :: Investment",
+    "Topic :: Scientific/Engineering :: Information Analysis",
+    "Natural Language :: Korean",
+    "Natural Language :: English",
+    "Typing :: Typed",
+]
+dependencies = [
+    "alive-progress>=3.3.0,<4",
+    "beautifulsoup4>=4.14.3,<5",
+    "lxml>=6.0.2,<7",
+    "marimo>=0.20.4,<1",
+    "openpyxl>=3.1.5,<4",
+    "diff-match-patch>=20230430",
+    "httpx>=0.28.1,<1",
+    "orjson>=3.10.0,<4",
+    "polars>=1.0.0,<2",
+    "requests>=2.32.5,<3",
+    "rich>=14.3.3,<15",
+    "plotly>=5.0.0,<6",
+    "mcp[cli]>=1.0",
+]
+[project.optional-dependencies]
+llm = [
+    "openai>=1.0.0,<3",
+    "google-genai>=1.0.0,<2",
+]
+llm-anthropic = [
+    "openai>=1.0.0,<3",
+    "google-genai>=1.0.0,<2",
+    "anthropic>=0.30.0,<2",
+]
+charts = [
+    "networkx>=3.6.1,<4",
+    "scipy>=1.17.1,<2",
+]
+ai = [
+    "fastapi>=0.135.1,<1",
+    "httpx>=0.28.1,<1",
+    "msgpack>=1.1.0,<2",
+    "uvicorn[standard]>=0.30.0,<1",
+    "sse-starlette>=2.0.0,<3",
+]
+mcp = [
+    "mcp[cli]>=1.0,<2",
+]
+display = [
+    "great-tables>=0.15.0,<1",
+    "itables>=2.0.0,<3",
+]
+altair = [
+    "altair>=5.0.0,<6",
+]
+hf = [
+    "huggingface-hub>=0.20.0,<1",
+]
+ui = [
+    "dartlab[ai]",
+]
+channel = [
+    "dartlab[ai]",
+    "pycloudflared>=0.3",
+]
+channel-ngrok = [
+    "dartlab[ai]",
+    "pyngrok>=7.0,<8",
+]
+channel-full = [
+    "dartlab[channel,channel-ngrok]",
+    "python-telegram-bot>=21.0,<22",
+    "slack-bolt>=1.18,<2",
+    "discord.py>=2.4,<3",
+]
+all = [
+    "openai>=1.0.0,<3",
+    "anthropic>=0.30.0,<2",
+    "networkx>=3.6.1,<4",
+    "scipy>=1.17.1,<2",
+    "fastapi>=0.135.1,<1",
+    "httpx>=0.28.1,<1",
+    "msgpack>=1.1.0,<2",
+    "uvicorn[standard]>=0.30.0,<1",
+    "sse-starlette>=2.0.0,<3",
+]
+[project.scripts]
+dartlab = "dartlab.cli.main:main"
+[project.entry-points."dartlab.plugins"]
+[project.urls]
+Homepage = "https://eddmpython.github.io/dartlab/"
+Repository = "https://github.com/eddmpython/dartlab"
+Documentation = "https://eddmpython.github.io/dartlab/docs/"
+Issues = "https://github.com/eddmpython/dartlab/issues"
+Changelog = "https://eddmpython.github.io/dartlab/docs/changelog"
+Demo = "https://huggingface.co/spaces/eddmpython/dartlab"
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src/dartlab"]
+exclude = [
+    "**/_reference/**",
+    "src/dartlab/engines/edinet/**",
+    "src/dartlab/engines/esg/**",
+    "src/dartlab/engines/event/**",
+    "src/dartlab/engines/supply/**",
+    "src/dartlab/engines/watch/**",
+]
+[tool.hatch.build.targets.sdist]
+include = [
+    "src/dartlab/**/*.py",
+    "src/dartlab/**/*.json",
+    "src/dartlab/**/*.parquet",
+    "README.md",
+    "LICENSE",
+]
+exclude = [
+    "**/_reference/**",
+    "src/dartlab/engines/edinet/**",
+    "src/dartlab/engines/esg/**",
+    "src/dartlab/engines/event/**",
+    "src/dartlab/engines/supply/**",
+    "src/dartlab/engines/watch/**",
+]
+[tool.ruff]
+target-version = "py312"
+line-length = 120
+exclude = ["experiments", "*/_reference"]
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+ignore = ["E402", "E501", "E741", "F841"]
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+addopts = "-v --tb=short"
+asyncio_mode = "auto"
+markers = [
+    "requires_data: 로컬 parquet 데이터 필요 (CI에서 skip)",
+    "unit: 순수 로직/mock만 — 데이터 로드 없음, 병렬 안전",
+    "integration: Company 1개 로딩 필요 — 중간 무게",
+    "heavy: 대량 데이터 로드 — 단독 실행 필수",
+]
+[tool.coverage.run]
+source = ["dartlab"]
+omit = [
+    "src/dartlab/ui/*",
+    "src/dartlab/engines/ai/providers/*",
+]
+[tool.coverage.report]
+show_missing = true
+skip_empty = true
+exclude_lines = [
+    "pragma: no cover",
+    "if __name__",
+    "raise NotImplementedError",
+]
+[tool.pyright]
+pythonVersion = "3.12"
+typeCheckingMode = "basic"
+include = ["src/dartlab"]
+exclude = [
+    "src/dartlab/engines/ai/providers/**",
+    "src/dartlab/ui/**",
+    "experiments/**",
+]
+reportMissingTypeStubs = false
+reportUnknownParameterType = false
+reportUnknownMemberType = false
+reportUnknownVariableType = false
+[tool.bandit]
+exclude_dirs = ["experiments", "tests"]
+skips = ["B101"]
+[dependency-groups]
+dev = [
+    "build>=1.4.0",
+    "dartlab[all]",
+    "hatchling>=1.29.0",
+    "pillow>=12.1.1",
+    "pre-commit>=4.0.0",
+    "pyright>=1.1.0",
+    "pytest>=9.0.2",
+    "pytest-asyncio>=0.24.0",
+    "pytest-cov>=6.0.0",
+]

requirements.txt DELETED Viewed

@@ -1,4 +0,0 @@
-dartlab>=0.7.8
-streamlit>=1.45,<2
-openpyxl>=3.1
-huggingface_hub>=0.25

src/dartlab/API_SPEC.md ADDED Viewed

	@@ -0,0 +1,450 @@

+# dartlab API 스펙
+이 문서는 `scripts/generateSpec.py`에 의해 자동 생성됩니다. 직접 수정하지 마세요.
+---
+## Company (통합 facade)
+입력을 자동 판별하여 DART 또는 EDGAR 시장 전용 Company를 생성한다.
+현재 DART Company의 공개 진입점은 **index → show(topic) → trace(topic)** 이다.
+`profile`은 향후 terminal/notebook 문서형 보고서 뷰로 확장될 예정이다.
+```python
+import dartlab
+kr = dartlab.Company("005930")
+kr = dartlab.Company("삼성전자")
+us = dartlab.Company("AAPL")
+kr.market                    # "KR"
+us.market                    # "US"
+```
+### 판별 규칙
+| 입력 | 결과 | 예시 |
+|------|------|------|
+| 6자리 숫자 | DART Company | `Company("005930")` |
+| 한글 포함 | DART Company | `Company("삼성전자")` |
+| 영문 1~5자리 | EDGAR Company | `Company("AAPL")` |
+## DART Company
+### 현재 공개 진입점
+| surface | 설명 |
+|---------|------|
+| `index` | 회사 데이터 구조 인덱스 DataFrame |
+| `show(topic)` | topic의 실제 데이터 payload 조회 |
+| `trace(topic, period)` | docs / finance / report source provenance 조회 |
+| `docs` | pure docs source namespace |
+| `finance` | authoritative finance source namespace |
+| `report` | authoritative structured disclosure source namespace |
+| `profile` | 향후 보고서형 렌더용 예약 뷰 |
+### 정적 메서드
+| 메서드 | 반환 | 설명 |
+|--------|------|------|
+| `dartlab.providers.dart.Company.listing()` | DataFrame | KRX 전체 상장법인 목록 |
+| `dartlab.providers.dart.Company.search(keyword)` | DataFrame | 회사명 부분 검색 |
+| `dartlab.providers.dart.Company.status()` | DataFrame | 로컬 보유 전체 종목 인덱스 |
+| `dartlab.providers.dart.Company.resolve(codeOrName)` | str \| None | 종목코드/회사명 → 종목코드 |
+### 핵심 property
+| property | 반환 | 설명 |
+|----------|------|------|
+| `BS` | DataFrame | 재무상태표 |
+| `IS` | DataFrame | 손익계산서 |
+| `CIS` | DataFrame | 포괄손익계산서 |
+| `CF` | DataFrame | 현금흐름표 |
+| `SCE` | tuple \| DataFrame | 자본변동표 |
+| `sections` | DataFrame | merged topic x period company table |
+| `timeseries` | (series, periods) | 분기별 standalone 시계열 |
+| `annual` | (series, years) | 연도별 시계열 |
+| `ratios` | RatioResult | 재무비율 |
+| `index` | DataFrame | 회사 구조 인덱스 |
+| `docs` | Accessor | pure docs source |
+| `finance` | Accessor | authoritative finance source |
+| `report` | Accessor | authoritative report source |
+| `profile` | _BoardView | 향후 보고서형 뷰 예약 |
+| `sector` | SectorInfo | 섹터 분류 |
+| `insights` | AnalysisResult | 7영역 인사이트 등급 |
+| `rank` | RankInfo | 시장 순위 |
+| `notes` | Notes | K-IFRS 주석 접근 |
+| `market` | str | `"KR"` |
+### 메서드
+| 메서드 | 반환 | 설명 |
+|--------|------|------|
+| `get(name)` | Result | 모듈 전체 Result 객체 |
+| `all()` | dict | 전체 데이터 dict |
+| `show(topic, period=None, raw=False)` | Any | topic payload 조회 |
+| `trace(topic, period=None)` | dict \| None | 선택 source provenance 조회 |
+| `fsSummary(period)` | AnalysisResult | 요약재무정보 |
+| `getTimeseries(period, fsDivPref)` | (series, periods) | 커스텀 시계열 |
+| `getRatios(fsDivPref)` | RatioResult | 커스텀 비율 |
+`index`는 회사 전체 구조를 먼저 보여주고, `show(topic)`가 실제 데이터를 연다.
+`trace(topic)`는 같은 topic에서 docs / finance / report 중 어떤 source가 채택됐는지 설명한다.
+docs가 없는 회사는 `docsStatus` 안내 row와 `현재 사업보고서 부재` notice가 표시된다.
+report/disclosure property는 registry에서 자동 디스패치된다 (`_MODULE_REGISTRY`).
+등록된 모든 property는 아래 "데이터 레지스트리" 섹션 참조.
+## EDGAR Company
+```python
+import dartlab
+us = dartlab.Company("AAPL")
+us.ticker                    # "AAPL"
+us.cik                       # "0000320193"
+```
+### property
+| property | 반환 | 설명 |
+|----------|------|------|
+| `timeseries` | (series, periods) | 분기별 standalone 시계열 |
+| `annual` | (series, years) | 연도별 시계열 |
+| `ratios` | RatioResult | 재무비율 |
+| `insights` | AnalysisResult | 7영역 인사이트 등급 |
+| `market` | str | `"US"` |
+---
+## 데이터 레지스트리
+`core/registry.py`에 등록된 전체 데이터 소스 목록.
+모듈 추가 = registry에 DataEntry 한 줄 추가 → Company, Excel, LLM, Server, Skills 전부 자동 반영.
+### 시계열 재무제표 (finance)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `annual.IS` | 손익계산서(연도별) | `timeseries` | 연도별 손익계산서 시계열. 매출액, 영업이익, 순이익 등 전체 계정. |
+| `annual.BS` | 재무상태표(연도별) | `timeseries` | 연도별 재무상태표 시계열. 자산, 부채, 자본 전체 계정. |
+| `annual.CF` | 현금흐름표(연도별) | `timeseries` | 연도별 현금흐름표 시계열. 영업/투자/재무활동 현금흐름. |
+| `timeseries.IS` | 손익계산서(분기별) | `timeseries` | 분기별 손익계산서 standalone 시계열. |
+| `timeseries.BS` | 재무상태표(분기별) | `timeseries` | 분기별 재무상태표 시점잔액 시계열. |
+| `timeseries.CF` | 현금흐름표(분기별) | `timeseries` | 분기별 현금흐름표 standalone 시계열. |
+### 공시 파싱 모듈 (report)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `BS` | 재무상태표 | `dataframe` | K-IFRS 연결 재무상태표. finance XBRL 정규화(snakeId) 기반, 회사간 비교 가능. finance 없으면 docs fallback. |
+| `IS` | 손익계산서 | `dataframe` | K-IFRS 연결 손익계산서. finance XBRL 정규화 기반. 매출액, 영업이익, 순이익 등 전체 계정 포함. |
+| `CF` | 현금흐름표 | `dataframe` | K-IFRS 연결 현금흐름표. finance XBRL 정규화 기반. 영업/투자/재무활동 현금흐름. |
+| `fsSummary` | 요약재무정보 | `dataframe` | DART 공시 요약재무정보. 다년간 주요 재무지표 비교. |
+| `segments` | 부문정보 | `dataframe` | 사업부문별 매출·이익 데이터. 부문간 수익성 비교 가능. |
+| `tangibleAsset` | 유형자산 | `dataframe` | 유형자산 변동표. 취득/처분/감가상각 내역. |
+| `costByNature` | 비용성격별분류 | `dataframe` | 비용을 성격별로 분류한 시계열. 원재료비, 인건비, 감가상각비 등. |
+| `dividend` | 배당 | `dataframe` | 배당 시계열. 연도별 DPS, 배당총액, 배당성향, 배당수익률. |
+| `majorHolder` | 최대주주 | `dataframe` | 최대주주 지분율 시계열. 지분 변동은 경영권 안정성의 핵심 지표. |
+| `employee` | 직원현황 | `dataframe` | 직원 수, 평균 근속연수, 평균 연봉 시계열. |
+| `subsidiary` | 자회사투자 | `dataframe` | 종속회사 투자 시계열. 지분율, 장부가액 변동. |
+| `bond` | 채무증권 | `dataframe` | 사채, CP 등 채무증권 발행·상환 시계열. |
+| `shareCapital` | 주식현황 | `dataframe` | 발행주식수, 자기주식, 유통주식수 시계열. |
+| `executive` | 임원현황 | `dataframe` | 등기임원 구성 시계열. 사내이사/사외이사/비상무이사 구분. |
+| `executivePay` | 임원보수 | `dataframe` | 임원 유형별 보수 시계열. 등기이사/사외이사/감사 구분. |
+| `audit` | 감사의견 | `dataframe` | 외부감사인의 감사의견과 감사보수 시계열. 적정 외 의견은 중대 위험 신호. |
+| `boardOfDirectors` | 이사회 | `dataframe` | 이사회 구성 및 활동 시계열. 개최횟수, 출석률 포함. |
+| `capitalChange` | 자본변동 | `dataframe` | 자본금 변동 시계열. 보통주/우선주 주식수·액면 변동. |
+| `contingentLiability` | 우발부채 | `dataframe` | 채무보증, 소송 현황. 잠재적 재무 리스크 지표. |
+| `internalControl` | 내부통제 | `dataframe` | 내부회계관리제도 감사의견 시계열. |
+| `relatedPartyTx` | 관계자거래 | `dataframe` | 대주주 등과의 매출·매입 거래 시계열. 이전가격 리스크 확인. |
+| `rnd` | R&D | `dataframe` | 연구개발비용 시계열. 기술 투자 강도 판단. |
+| `sanction` | 제재현황 | `dataframe` | 행정제재, 과징금, 영업정지 등 규제 조치 이력. |
+| `affiliateGroup` | 계열사 | `dataframe` | 기업집단 소속 계열회사 현황. 상장/비상장 구분. |
+| `fundraising` | 증자감자 | `dataframe` | 유상증자, 무상증자, 감자 이력. |
+| `productService` | 주요제품 | `dataframe` | 주요 제품/서비스별 매출액과 비중. |
+| `salesOrder` | 매출수주 | `dataframe` | 매출실적 및 수주 현황. |
+| `riskDerivative` | 위험관리 | `dataframe` | 환율·이자율·상품가격 리스크 관리. 파생상품 보유 현황. |
+| `articlesOfIncorporation` | 정관 | `dataframe` | 정관 변경 이력. 사업목적 추가·변경으로 신사업 진출 파악. |
+| `otherFinance` | 기타재무 | `dataframe` | 대손충당금, 재고자산 관련 기타 재무 데이터. |
+| `companyHistory` | 연혁 | `dataframe` | 회사 주요 연혁 이벤트 목록. |
+| `shareholderMeeting` | 주주총회 | `dataframe` | 주주총회 안건 및 의결 결과. |
+| `auditSystem` | 감사제도 | `dataframe` | 감사위원회 구성 및 활동 현황. |
+| `affiliate` | 관계기업투자 | `dataframe` | 관계기업/공동기업 투자 변동 시계열. 지분법손익, 기초/기말 장부가 포함. |
+| `investmentInOther` | 타법인출자 | `dataframe` | 타법인 출자 현황. 투자목적, 지분율, 장부가 등. |
+| `companyOverviewDetail` | 회사개요 | `dict` | 설립일, 상장일, 대표이사, 주소, 주요사업 등 기본 정보. |
+| `holderOverview` | 주주현황 | `custom` | 5% 이상 주주, 소액주주 현황, 의결권 현황. majorHolder보다 상세한 주주 구성. |
+### 서술형 공시 (disclosure)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `business` | 사업의내용 | `text` | 사업보고서 '사업의 내용' 서술. 사업 구조와 현황 파악. |
+| `companyOverview` | 회사개요정량 | `dict` | 공시 기반 회사 정량 개요 데이터. |
+| `mdna` | MD&A | `text` | 이사의 경영진단 및 분석의견. 경영진 시각의 실적 평가와 전망. |
+| `rawMaterial` | 원재료설비 | `dict` | 원재료 매입, 유형자산 현황, 시설투자 데이터. |
+| `sections` | 사업보고서섹션 | `dataframe` | 사업보고서 전체 섹션 텍스트를 topic(행) × period(열) DataFrame으로 구조화. leaf title 기준 수평 비교 가능. 연간+분기+반기 전 기간 포함. |
+### K-IFRS 주석 (notes)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `notes.receivables` | 매출채권 | `dataframe` | K-IFRS 매출채권 주석. 채권 잔액 및 대손충당금 내역. |
+| `notes.inventory` | 재고자산 | `dataframe` | K-IFRS 재고자산 주석. 원재료/재공품/제품 내역별 금액. |
+| `notes.tangibleAsset` | 유형자산(주석) | `dataframe` | K-IFRS 유형자산 변동 주석. 토지, 건물, 기계 등 항목별 변동. |
+| `notes.intangibleAsset` | 무형자산 | `dataframe` | K-IFRS 무형자산 주석. 영업권, 개발비 등 항목별 변동. |
+| `notes.investmentProperty` | 투자부동산 | `dataframe` | K-IFRS 투자부동산 주석. 공정가치 및 변동 내역. |
+| `notes.affiliates` | 관계기업(주석) | `dataframe` | K-IFRS 관계기업 투자 주석. 지분법 적용 내역. |
+| `notes.borrowings` | 차입금 | `dataframe` | K-IFRS 차입금 주석. 단기/장기 차입 잔액 및 이자율. |
+| `notes.provisions` | 충당부채 | `dataframe` | K-IFRS 충당부채 주석. 판매보증, 소송, 복구 등. |
+| `notes.eps` | 주당이익 | `dataframe` | K-IFRS 주당이익 주석. 기본/희석 EPS 계산 내역. |
+| `notes.lease` | 리스 | `dataframe` | K-IFRS 리스 주석. 사용권자산, 리스부채 내역. |
+| `notes.segments` | 부문정보(주석) | `dataframe` | K-IFRS 부문정보 주석. 사업부문별 상세 데이터. |
+| `notes.costByNature` | 비용의성격별분류(주석) | `dataframe` | K-IFRS 비용의 성격별 분류 주석. |
+### 원본 데이터 (raw)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `rawDocs` | 공시 원본 | `dataframe` | 공시 문서 원본 parquet. 가공 전 전체 테이블과 텍스트. |
+| `rawFinance` | XBRL 원본 | `dataframe` | XBRL 재무제표 원본 parquet. 매핑/정규화 전 원본 데이터. |
+| `rawReport` | 보고서 원본 | `dataframe` | 정기보고서 API 원본 parquet. 파싱 전 원본 데이터. |
+### 분석 엔진 (analysis)
+| name | label | dataType | description |
+|------|-------|----------|-------------|
+| `ratios` | 재무비율 | `ratios` | financeEngine이 자동계산한 수익성·안정성·밸류에이션 비율. |
+| `insight` | 인사이트 | `custom` | 7영역 A~F 등급 분석 (실적, 수익성, 건전성, 현금흐름, 지배구조, 리스크, 기회). |
+| `sector` | 섹터분류 | `custom` | WICS 11대 섹터 분류. 대분류/중분류 + 섹터별 파라미터. |
+| `rank` | 시장순위 | `custom` | 전체 시장 및 섹터 내 매출/자산/성장률 순위. |
+| `keywordTrend` | 키워드 트렌드 | `dataframe` | 공시 텍스트 키워드 빈도 추이 (topic × period × keyword). 54개 내장 키워드 또는 사용자 지정. |
+| `news` | 뉴스 | `dataframe` | 최근 뉴스 수집 (KR: Google News 한국어, US: Google News 영어). 날짜/제목/출처/URL. |
+| `crossBorderPeers` | 글로벌 피어 | `custom` | WICS→GICS 섹터 매핑 기반 글로벌 피어 추천. 한국 종목의 미국 동종 기업 리스트. |
+---
+## 주요 데이터 타입
+### RatioResult
+비율 계산 결과 (최신 단일 시점).
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `revenueTTM` | `float | None` | None |
+| `operatingIncomeTTM` | `float | None` | None |
+| `netIncomeTTM` | `float | None` | None |
+| `operatingCashflowTTM` | `float | None` | None |
+| `investingCashflowTTM` | `float | None` | None |
+| `totalAssets` | `float | None` | None |
+| `totalEquity` | `float | None` | None |
+| `ownersEquity` | `float | None` | None |
+| `totalLiabilities` | `float | None` | None |
+| `currentAssets` | `float | None` | None |
+| `currentLiabilities` | `float | None` | None |
+| `cash` | `float | None` | None |
+| `shortTermBorrowings` | `float | None` | None |
+| `longTermBorrowings` | `float | None` | None |
+| `bonds` | `float | None` | None |
+| `grossProfit` | `float | None` | None |
+| `costOfSales` | `float | None` | None |
+| `sga` | `float | None` | None |
+| `inventories` | `float | None` | None |
+| `receivables` | `float | None` | None |
+| `payables` | `float | None` | None |
+| `tangibleAssets` | `float | None` | None |
+| `intangibleAssets` | `float | None` | None |
+| `retainedEarnings` | `float | None` | None |
+| `profitBeforeTax` | `float | None` | None |
+| `incomeTaxExpense` | `float | None` | None |
+| `financeIncome` | `float | None` | None |
+| `financeCosts` | `float | None` | None |
+| `capex` | `float | None` | None |
+| `dividendsPaid` | `float | None` | None |
+| `depreciationExpense` | `float | None` | None |
+| `noncurrentAssets` | `float | None` | None |
+| `noncurrentLiabilities` | `float | None` | None |
+| `roe` | `float | None` | None |
+| `roa` | `float | None` | None |
+| `roce` | `float | None` | None |
+| `operatingMargin` | `float | None` | None |
+| `netMargin` | `float | None` | None |
+| `preTaxMargin` | `float | None` | None |
+| `grossMargin` | `float | None` | None |
+| `ebitdaMargin` | `float | None` | None |
+| `costOfSalesRatio` | `float | None` | None |
+| `sgaRatio` | `float | None` | None |
+| `effectiveTaxRate` | `float | None` | None |
+| `incomeQualityRatio` | `float | None` | None |
+| `debtRatio` | `float | None` | None |
+| `currentRatio` | `float | None` | None |
+| `quickRatio` | `float | None` | None |
+| `cashRatio` | `float | None` | None |
+| `equityRatio` | `float | None` | None |
+| `interestCoverage` | `float | None` | None |
+| `netDebt` | `float | None` | None |
+| `netDebtRatio` | `float | None` | None |
+| `noncurrentRatio` | `float | None` | None |
+| `workingCapital` | `float | None` | None |
+| `revenueGrowth` | `float | None` | None |
+| `operatingProfitGrowth` | `float | None` | None |
+| `netProfitGrowth` | `float | None` | None |
+| `assetGrowth` | `float | None` | None |
+| `equityGrowthRate` | `float | None` | None |
+| `revenueGrowth3Y` | `float | None` | None |
+| `totalAssetTurnover` | `float | None` | None |
+| `fixedAssetTurnover` | `float | None` | None |
+| `inventoryTurnover` | `float | None` | None |
+| `receivablesTurnover` | `float | None` | None |
+| `payablesTurnover` | `float | None` | None |
+| `operatingCycle` | `float | None` | None |
+| `fcf` | `float | None` | None |
+| `operatingCfMargin` | `float | None` | None |
+| `operatingCfToNetIncome` | `float | None` | None |
+| `operatingCfToCurrentLiab` | `float | None` | None |
+| `capexRatio` | `float | None` | None |
+| `dividendPayoutRatio` | `float | None` | None |
+| `fcfToOcfRatio` | `float | None` | None |
+| `roic` | `float | None` | None |
+| `dupontMargin` | `float | None` | None |
+| `dupontTurnover` | `float | None` | None |
+| `dupontLeverage` | `float | None` | None |
+| `debtToEbitda` | `float | None` | None |
+| `ccc` | `float | None` | None |
+| `dso` | `float | None` | None |
+| `dio` | `float | None` | None |
+| `dpo` | `float | None` | None |
+| `piotroskiFScore` | `int | None` | None |
+| `piotroskiMaxScore` | `int` | 9 |
+| `altmanZScore` | `float | None` | None |
+| `beneishMScore` | `float | None` | None |
+| `sloanAccrualRatio` | `float | None` | None |
+| `ohlsonOScore` | `float | None` | None |
+| `ohlsonProbability` | `float | None` | None |
+| `altmanZppScore` | `float | None` | None |
+| `springateSScore` | `float | None` | None |
+| `zmijewskiXScore` | `float | None` | None |
+| `eps` | `float | None` | None |
+| `bps` | `float | None` | None |
+| `dps` | `float | None` | None |
+| `per` | `float | None` | None |
+| `pbr` | `float | None` | None |
+| `psr` | `float | None` | None |
+| `evEbitda` | `float | None` | None |
+| `marketCap` | `float | None` | None |
+| `sharesOutstanding` | `int | None` | None |
+| `ebitdaEstimated` | `bool` | True |
+| `currency` | `str` | KRW |
+| `warnings` | `list` | [] |
+### InsightResult
+단일 영역 분석 결과.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `grade` | `str` |  |
+| `summary` | `str` |  |
+| `details` | `list` | [] |
+| `risks` | `list` | [] |
+| `opportunities` | `list` | [] |
+### Anomaly
+이상치 탐지 결과.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `severity` | `str` |  |
+| `category` | `str` |  |
+| `text` | `str` |  |
+| `value` | `Optional` | None |
+### Flag
+리스크/기회 플래그.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `level` | `str` |  |
+| `category` | `str` |  |
+| `text` | `str` |  |
+### AnalysisResult
+종합 분석 결과.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `corpName` | `str` |  |
+| `stockCode` | `str` |  |
+| `isFinancial` | `bool` |  |
+| `performance` | `InsightResult` |  |
+| `profitability` | `InsightResult` |  |
+| `health` | `InsightResult` |  |
+| `cashflow` | `InsightResult` |  |
+| `governance` | `InsightResult` |  |
+| `risk` | `InsightResult` |  |
+| `opportunity` | `InsightResult` |  |
+| `predictability` | `Optional` | None |
+| `uncertainty` | `Optional` | None |
+| `coreEarnings` | `Optional` | None |
+| `anomalies` | `list` | [] |
+| `distress` | `Optional` | None |
+| `summary` | `str` |  |
+| `profile` | `str` |  |
+### SectorInfo
+섹터 분류 결과.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `sector` | `Sector` |  |
+| `industryGroup` | `IndustryGroup` |  |
+| `confidence` | `float` |  |
+| `source` | `str` |  |
+### SectorParams
+섹터별 밸류에이션 파라미터.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `discountRate` | `float` |  |
+| `growthRate` | `float` |  |
+| `perMultiple` | `float` |  |
+| `pbrMultiple` | `float` |  |
+| `evEbitdaMultiple` | `float` |  |
+| `label` | `str` |  |
+| `description` | `str` |  |
+### RankInfo
+단일 종목의 랭크 정보.
+| 필드 | 타입 | 기본값 |
+|------|------|--------|
+| `stockCode` | `str` |  |
+| `corpName` | `str` |  |
+| `sector` | `str` |  |
+| `industryGroup` | `str` |  |
+| `revenue` | `Optional` | None |
+| `totalAssets` | `Optional` | None |
+| `revenueGrowth3Y` | `Optional` | None |
+| `revenueRank` | `Optional` | None |
+| `revenueTotal` | `int` | 0 |
+| `revenueRankInSector` | `Optional` | None |
+| `revenueSectorTotal` | `int` | 0 |
+| `assetRank` | `Optional` | None |
+| `assetTotal` | `int` | 0 |
+| `assetRankInSector` | `Optional` | None |
+| `assetSectorTotal` | `int` | 0 |
+| `growthRank` | `Optional` | None |
+| `growthTotal` | `int` | 0 |
+| `growthRankInSector` | `Optional` | None |
+| `growthSectorTotal` | `int` | 0 |
+| `sizeClass` | `str` |  |

src/dartlab/STATUS.md ADDED Viewed

	@@ -0,0 +1,81 @@

+# src/dartlab
+## 개요
+DART 공시 데이터 활용 라이브러리. 종목코드 기반 API.
+## 구조
+```
+dartlab/
+├── core/                    # 공통 기반 (데이터 로딩, 보고서 선택, 테이블 파싱, 주석 추출)
+├── finance/                 # 재무 데이터 (36개 모듈)
+│   ├── summary/             # 요약재무정보 시계열
+│   ├── statements/          # 연결재무제표 (BS, IS, CF)
+│   ├── segment/             # 부문별 보고 (주석)
+│   ├── affiliate/           # 관계기업·공동기업 (주석)
+│   ├── costByNature/        # 비용의 성격별 분류 (주석)
+│   ├── tangibleAsset/       # 유형자산 (주석)
+│   ├── notesDetail/         # 주석 상세 (23개 키워드)
+│   ├── dividend/            # 배당
+│   ├── majorHolder/         # 최대주주·주주현황
+│   ├── shareCapital/        # 주식 현황
+│   ├── employee/            # 직원 현황
+│   ├── subsidiary/          # 자회사 투자
+│   ├── bond/                # 채무증권
+│   ├── audit/               # 감사의견·보수
+│   ├── executive/           # 임원 현황
+│   ├── executivePay/        # 임원 보수
+│   ├── boardOfDirectors/    # 이사회
+│   ├── capitalChange/       # 자본금 변동
+│   ├── contingentLiability/ # 우발부채
+│   ├── internalControl/     # 내부통제
+│   ├── relatedPartyTx/      # 관계자 거래
+│   ├── rnd/                 # R&D 비용
+│   ├── sanction/            # 제재 현황
+│   ├── affiliateGroup/      # 계열사 목록
+│   ├── fundraising/         # 증자/감자
+│   ├── productService/      # 주요 제품/서비스
+│   ├── salesOrder/          # 매출/수주
+│   ├── riskDerivative/      # 위험관리/파생거래
+│   ├── articlesOfIncorporation/ # 정관
+│   ├── otherFinance/        # 기타 재무
+│   ├── companyHistory/      # 회사 연혁
+│   ├── shareholderMeeting/  # 주주총회
+│   ├── auditSystem/         # 감사제도
+│   ├── investmentInOther/   # 타법인출자
+│   └── companyOverviewDetail/ # 회사개요 상세
+├── disclosure/              # 공시 서술형 (4개 모듈)
+│   ├── business/            # 사업의 내용
+│   ├── companyOverview/     # 회사의 개요 (정량)
+│   ├── mdna/                # MD&A
+│   └── rawMaterial/         # 원재료·설비
+├── company.py               # 통합 접근 (property 기반, lazy + cache)
+├── notes.py                 # K-IFRS 주석 통합 접근
+└── config.py                # 전역 설정 (verbose)
+```
+## API 요약
+```python
+import dartlab
+c = dartlab.Company("005930")
+c.index                 # 회사 구조 인덱스
+c.show("BS")            # topic payload
+c.trace("dividend")     # source trace
+c.BS                    # 재무상태표 DataFrame
+c.dividend              # 배당 시계열 DataFrame
+import dartlab
+dartlab.verbose = False  # 진행 표시 끄기
+```
+## 현황
+- 2026-03-06: core/ + finance/summary/ 초기 구축
+- 2026-03-06: finance/statements/, segment/, affiliate/ 추가
+- 2026-03-06: 전체 패키지 개선 — stockCode 시그니처, 핫라인 설계, API_SPEC.md
+- 2026-03-07: finance/ 11개 모듈 추가 (dividend~bond, costByNature)
+- 2026-03-07: disclosure/ 4개 모듈 추가 (business, companyOverview, mdna, rawMaterial)
+- 2026-03-07: finance/ 주석 모듈 추가 (notesDetail, tangibleAsset)
+- 2026-03-07: finance/ 7개 모듈 추가 (audit~internalControl, rnd, sanction)
+- 2026-03-07: finance/ 7개 모듈 추가 (affiliateGroup~companyHistory, shareholderMeeting~investmentInOther, companyOverviewDetail)
+- 2026-03-08: analyze → fsSummary 리네이밍, 계정명 특수문자 정리
+- 2026-03-08: Company 재설계 — property 기반 접근, Notes 통합, all(), verbose 설정

src/dartlab/__init__.py ADDED Viewed

	@@ -0,0 +1,1008 @@

+"""DART 공시 데이터 활용 라이브러리."""
+import sys
+from importlib.metadata import PackageNotFoundError
+from importlib.metadata import version as _pkg_version
+from dartlab import ai as llm
+from dartlab import config, core
+from dartlab.company import Company
+from dartlab.core.env import loadEnv as _loadEnv
+from dartlab.core.select import ChartResult, SelectResult
+from dartlab.gather.fred import Fred
+from dartlab.gather.listing import codeToName, fuzzySearch, getKindList, nameToCode, searchName
+from dartlab.providers.dart.company import Company as _DartEngineCompany
+from dartlab.providers.dart.openapi.dart import Dart, OpenDart
+from dartlab.providers.edgar.openapi.edgar import OpenEdgar
+from dartlab.review import Review
+# .env 자동 로드 — API 키 등 환경변수
+_loadEnv()
+try:
+    __version__ = _pkg_version("dartlab")
+except PackageNotFoundError:
+    __version__ = "0.0.0"
+def search(keyword: str):
+    """종목 검색 (KR + US 통합).
+    Example::
+        import dartlab
+        dartlab.search("삼성전자")
+        dartlab.search("AAPL")
+    """
+    if any("\uac00" <= ch <= "\ud7a3" for ch in keyword):
+        return _DartEngineCompany.search(keyword)
+    if keyword.isascii() and keyword.isalpha():
+        try:
+            from dartlab.providers.edgar.company import Company as _US
+            return _US.search(keyword)
+        except (ImportError, AttributeError, NotImplementedError):
+            pass
+    return _DartEngineCompany.search(keyword)
+def listing(market: str | None = None):
+    """전체 상장법인 목록.
+    Args:
+        market: "KR" 또는 "US". None이면 KR 기본.
+    Example::
+        import dartlab
+        dartlab.listing()          # KR 전체
+        dartlab.listing("US")      # US 전체 (향후)
+    """
+    if market and market.upper() == "US":
+        try:
+            from dartlab.providers.edgar.company import Company as _US
+            return _US.listing()
+        except (ImportError, AttributeError, NotImplementedError):
+            raise NotImplementedError("US listing은 아직 지원되지 않습니다")
+    return _DartEngineCompany.listing()
+def collect(
+    *codes: str,
+    categories: list[str] | None = None,
+    incremental: bool = True,
+) -> dict[str, dict[str, int]]:
+    """지정 종목 DART 데이터 수집 (OpenAPI). 멀티키 시 병렬.
+    Example::
+        import dartlab
+        dartlab.collect("005930")                              # 삼성전자 전체
+        dartlab.collect("005930", "000660", categories=["finance"])  # 재무만
+    """
+    from dartlab.providers.dart.openapi.batch import batchCollect
+    return batchCollect(list(codes), categories=categories, incremental=incremental)
+def collectAll(
+    *,
+    categories: list[str] | None = None,
+    mode: str = "new",
+    maxWorkers: int | None = None,
+    incremental: bool = True,
+) -> dict[str, dict[str, int]]:
+    """전체 상장종목 DART 데이터 수집. DART_API_KEY(S) 필요. 멀티키 시 병렬.
+    Example::
+        import dartlab
+        dartlab.collectAll()                          # 전체 미수집 종목
+        dartlab.collectAll(categories=["finance"])    # 재무만
+        dartlab.collectAll(mode="all")                # 기수집 포함 전체
+    """
+    from dartlab.providers.dart.openapi.batch import batchCollectAll
+    return batchCollectAll(
+        categories=categories,
+        mode=mode,
+        maxWorkers=maxWorkers,
+        incremental=incremental,
+    )
+def downloadAll(category: str = "finance", *, forceUpdate: bool = False) -> None:
+    """HuggingFace에서 전체 시장 데이터를 다운로드. pip install dartlab[hf] 필요.
+    scanAccount, screen, digest 등 전사(全社) 분석 기능은 로컬에 전체 데이터가 있어야 동작합니다.
+    이 함수로 카테고리별 전체 데이터를 사전 다운로드하세요.
+    Args:
+        category: "finance" (재무 ~600MB), "docs" (공시 ~8GB), "report" (보고서 ~320MB).
+        forceUpdate: True면 이미 있는 파일도 최신으로 갱신.
+    Examples::
+        import dartlab
+        dartlab.downloadAll("finance")   # 재무 전체 — scanAccount/screen/benchmark 등에 필요
+        dartlab.downloadAll("report")    # 보고서 전체 — governance/workforce/capital/debt에 필요
+        dartlab.downloadAll("docs")      # 공시 전체 — digest/signal에 필요 (대용량 ~8GB)
+    """
+    from dartlab.core.dataLoader import downloadAll as _downloadAll
+    _downloadAll(category, forceUpdate=forceUpdate)
+def checkFreshness(stockCode: str, *, forceCheck: bool = False):
+    """종목의 로컬 데이터가 최신인지 DART API로 확인.
+    Example::
+        import dartlab
+        result = dartlab.checkFreshness("005930")
+        result.isFresh       # True/False
+        result.missingCount  # 누락 공시 수
+    """
+    from dartlab.providers.dart.openapi.freshness import (
+        checkFreshness as _check,
+    )
+    return _check(stockCode, forceCheck=forceCheck)
+def network():
+    """한국 상장사 전체 관계 지도.
+    Example::
+        import dartlab
+        dartlab.network().show()  # 브라우저에서 전체 네트워크
+    """
+    from dartlab.market.network import build_graph, export_full
+    from dartlab.tools.network import render_network
+    data = build_graph()
+    full = export_full(data)
+    return render_network(
+        full["nodes"],
+        full["edges"],
+        "한국 상장사 관계 네트워크",
+    )
+def governance():
+    """한국 상장사 전체 지배구조 스캔.
+    Example::
+        import dartlab
+        df = dartlab.governance()
+    """
+    from dartlab.market.governance import scan_governance
+    return scan_governance()
+def workforce():
+    """한국 상장사 전체 인력/급여 스캔.
+    Example::
+        import dartlab
+        df = dartlab.workforce()
+    """
+    from dartlab.market.workforce import scan_workforce
+    return scan_workforce()
+def capital():
+    """한국 상장사 전체 주주환원 스캔.
+    Example::
+        import dartlab
+        df = dartlab.capital()
+    """
+    from dartlab.market.capital import scan_capital
+    return scan_capital()
+def debt():
+    """한국 상장사 전체 부채 구조 스캔.
+    Example::
+        import dartlab
+        df = dartlab.debt()
+    """
+    from dartlab.market.debt import scan_debt
+    return scan_debt()
+def screen(preset: str = "가치주"):
+    """시장 스크리닝 — 프리셋 기반 종목 필터.
+    Args:
+        preset: 프리셋 이름 ("가치주", "성장주", "턴어라운드", "현금부자",
+                "고위험", "자본잠식", "소형고수익", "대형안정").
+    Example::
+        import dartlab
+        df = dartlab.screen("가치주")    # ROE≥10, 부채≤100 등
+        df = dartlab.screen("고위험")    # 부채≥200, ICR<3
+    """
+    from dartlab.analysis.comparative.rank.screen import screen as _screen
+    return _screen(preset)
+def benchmark():
+    """섹터별 핵심 비율 벤치마크 (P10, median, P90).
+    Example::
+        import dartlab
+        bm = dartlab.benchmark()   # 섹터 × 비율 정상 범위
+    """
+    from dartlab.analysis.comparative.rank.screen import benchmark as _benchmark
+    return _benchmark()
+def signal(keyword: str | None = None):
+    """서술형 공시 시장 시그널 — 키워드 트렌드 탐지.
+    Args:
+        keyword: 특정 키워드만 필터. None이면 전체 48개 키워드.
+    Example::
+        import dartlab
+        df = dartlab.signal()        # 전체 키워드 트렌드
+        df = dartlab.signal("AI")    # AI 키워드 연도별 추이
+    """
+    from dartlab.market.signal import scan_signal
+    return scan_signal(keyword)
+def news(query: str, *, market: str = "KR", days: int = 30):
+    """기업 뉴스 수집.
+    Args:
+        query: 기업명 또는 티커.
+        market: "KR" 또는 "US".
+        days: 최근 N일.
+    Example::
+        import dartlab
+        dartlab.news("삼성전자")
+        dartlab.news("AAPL", market="US")
+    """
+    from dartlab.gather import getDefaultGather
+    return getDefaultGather().news(query, market=market, days=days)
+def price(
+    stockCode: str, *, market: str = "KR", start: str | None = None, end: str | None = None, snapshot: bool = False
+):
+    """주가 시계열 (기본 1년 OHLCV) 또는 스냅샷.
+    Example::
+        import dartlab
+        dartlab.price("005930")                              # 1년 OHLCV 시계열
+        dartlab.price("005930", start="2020-01-01")          # 기간 지정
+        dartlab.price("005930", snapshot=True)               # 현재가 스냅샷
+    """
+    from dartlab.gather import getDefaultGather
+    return getDefaultGather().price(stockCode, market=market, start=start, end=end, snapshot=snapshot)
+def consensus(stockCode: str, *, market: str = "KR"):
+    """컨센서스 — 목표가, 투자의견.
+    Example::
+        import dartlab
+        dartlab.consensus("005930")
+        dartlab.consensus("AAPL", market="US")
+    """
+    from dartlab.gather import getDefaultGather
+    return getDefaultGather().consensus(stockCode, market=market)
+def flow(stockCode: str, *, market: str = "KR"):
+    """수급 시계열 — 외국인/기관 매매 동향 (KR 전용).
+    Example::
+        import dartlab
+        dartlab.flow("005930")
+        # [{"date": "20260325", "foreignNet": -6165053, "institutionNet": 2908773, ...}, ...]
+    """
+    from dartlab.gather import getDefaultGather
+    return getDefaultGather().flow(stockCode, market=market)
+def macro(market: str = "KR", indicator: str | None = None, *, start: str | None = None, end: str | None = None):
+    """거시 지표 시계열 — ECOS(KR) / FRED(US).
+    인자 없으면 카탈로그 전체 지표를 wide DataFrame으로 반환.
+    Example::
+        import dartlab
+        dartlab.macro()                    # KR 전체 지표 wide DF (22개)
+        dartlab.macro("US")                # US 전체 지표 wide DF (50개)
+        dartlab.macro("CPI")               # CPI (자동 KR 감지)
+        dartlab.macro("FEDFUNDS")          # 연방기금금리 (자동 US 감지)
+        dartlab.macro("KR", "CPI")         # 명시적 KR + CPI
+        dartlab.macro("US", "SP500")       # 명시적 US + S&P500
+    """
+    from dartlab.gather import getDefaultGather
+    return getDefaultGather().macro(market, indicator, start=start, end=end)
+def crossBorderPeers(stockCode: str, *, topK: int = 5):
+    """한국 종목의 글로벌 피어 추천 (WICS→GICS 매핑).
+    Args:
+        stockCode: 한국 종목코드.
+        topK: 반환할 피어 수.
+    Example::
+        import dartlab
+        dartlab.crossBorderPeers("005930")  # → ["AAPL", "MSFT", ...]
+    """
+    from dartlab.analysis.comparative.peer.discover import crossBorderPeers as _cb
+    return _cb(stockCode, topK=topK)
+def setup(provider: str | None = None):
+    """AI provider 설정 안내 + 인터랙티브 설정.
+    Args:
+        provider: 특정 provider 설정. None이면 전체 현황.
+    Example::
+        import dartlab
+        dartlab.setup()              # 전체 provider 현황
+        dartlab.setup("chatgpt")     # ChatGPT OAuth 브라우저 로그인
+        dartlab.setup("openai")      # OpenAI API 키 설정
+        dartlab.setup("ollama")      # Ollama 설치 안내
+    """
+    from dartlab.core.ai.guide import (
+        provider_guide,
+        providers_status,
+        resolve_alias,
+    )
+    if provider is None:
+        print(providers_status())
+        return
+    provider = resolve_alias(provider)
+    if provider == "oauth-codex":
+        _setup_oauth_interactive()
+    elif provider == "openai":
+        _setup_openai_interactive()
+    else:
+        print(provider_guide(provider))
+def _setup_oauth_interactive():
+    """노트북/CLI에서 ChatGPT OAuth 브라우저 로그인."""
+    try:
+        from dartlab.ai.providers.support.oauth_token import is_authenticated
+        if is_authenticated():
+            print("\n  ✓ ChatGPT OAuth 이미 인증되어 있습니다.")
+            print('  재인증: dartlab.setup("chatgpt")  # 재실행하면 갱신\n')
+            return
+    except ImportError:
+        pass
+    try:
+        from dartlab.cli.commands.setup import _do_oauth_login
+        _do_oauth_login()
+    except ImportError:
+        print("\n  ChatGPT OAuth 브라우저 로그인:")
+        print("  CLI에서 실행: dartlab setup oauth-codex\n")
+def _setup_openai_interactive():
+    """노트북에서 OpenAI API 키 인라인 설정."""
+    import os
+    from dartlab.core.ai.guide import provider_guide
+    existing_key = os.environ.get("OPENAI_API_KEY")
+    if existing_key:
+        print(f"\n  ✓ OPENAI_API_KEY 환경변수가 설정되어 있습니다. (sk-...{existing_key[-4:]})\n")
+        return
+    print(provider_guide("openai"))
+    print()
+    try:
+        from getpass import getpass
+        key = getpass("  API 키 입력 (Enter로 건너뛰기): ").strip()
+        if key:
+            llm.configure(provider="openai", api_key=key)
+            print("\n  ✓ OpenAI API 키가 설정되었습니다.\n")
+        else:
+            print("\n  건너뛰었습니다.\n")
+    except (EOFError, KeyboardInterrupt):
+        print("\n  건너뛰었습니다.\n")
+def _auto_stream(gen) -> str:
+    """Generator를 소비하면서 stdout에 스트리밍 출력, 전체 텍스트 반환."""
+    import sys
+    chunks: list[str] = []
+    for chunk in gen:
+        chunks.append(chunk)
+        sys.stdout.write(chunk)
+        sys.stdout.flush()
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+    return "".join(chunks)
+def ask(
+    *args: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+    provider: str | None = None,
+    model: str | None = None,
+    stream: bool = True,
+    raw: bool = False,
+    reflect: bool = False,
+    pattern: str | None = None,
+    **kwargs,
+):
+    """LLM에게 기업에 대해 질문.
+    Args:
+        *args: 자연어 질문 (1개) 또는 (종목, 질문) 2개.
+        provider: LLM provider ("openai", "codex", "oauth-codex", "ollama").
+        model: 모델 override.
+        stream: True면 스트리밍 출력 (기본값). False면 조용히 전체 텍스트 반환.
+        raw: True면 Generator를 직접 반환 (커스텀 UI용).
+        include: 포함할 데이터 모듈.
+        exclude: 제외할 데이터 모듈.
+        reflect: True면 답변 자체 검증 (1회 reflection).
+    Returns:
+        str: 전체 답변 텍스트. (raw=True일 때만 Generator[str])
+    Example::
+        import dartlab
+        dartlab.llm.configure(provider="openai", api_key="sk-...")
+        # 호출하면 스트리밍 출력 + 전체 텍스트 반��
+        answer = dartlab.ask("삼성전자 재무건전성 분석해줘")
+        # provider + model 지정
+        answer = dartlab.ask("삼성전자 분석", provider="openai", model="gpt-4o")
+        # (종목, 질문) 분리
+        answer = dartlab.ask("005930", "영업이익률 추세는?")
+        # 조용히 전체 텍스트만 (배치용)
+        answer = dartlab.ask("삼성전자 분석", stream=False)
+        # Generator 직접 제어 (커스텀 UI용)
+        for chunk in dartlab.ask("삼성전자 분석", raw=True):
+            custom_process(chunk)
+    """
+    from dartlab.ai.runtime.standalone import ask as _ask
+    # provider 미지정 시 auto-detect
+    if provider is None:
+        from dartlab.core.ai.detect import auto_detect_provider
+        detected = auto_detect_provider()
+        if detected is None:
+            from dartlab.core.ai.guide import no_provider_message
+            msg = no_provider_message()
+            print(msg)
+            raise RuntimeError("AI provider가 설정되지 않았습니다. dartlab.setup()을 실행하세요.")
+        provider = detected
+    if len(args) == 2:
+        company = Company(args[0])
+        question = args[1]
+    elif len(args) == 1:
+        from dartlab.core.resolve import resolve_from_text
+        company, question = resolve_from_text(args[0])
+        if company is None:
+            raise ValueError(
+                f"종목을 찾을 수 없습니다: '{args[0]}'\n"
+                "종목명 또는 종목코드를 포함해 주세요.\n"
+                "예: dartlab.ask('삼성전자 재무건전성 분석해줘')"
+            )
+    elif len(args) == 0:
+        raise TypeError("질문을 입력해 주세요. 예: dartlab.ask('삼성전자 분석해줘')")
+    else:
+        raise TypeError(f"인자는 1~2개만 허용됩니다 (받은 수: {len(args)})")
+    if raw:
+        return _ask(
+            company,
+            question,
+            include=include,
+            exclude=exclude,
+            provider=provider,
+            model=model,
+            stream=stream,
+            reflect=reflect,
+            pattern=pattern,
+            **kwargs,
+        )
+    if not stream:
+        return _ask(
+            company,
+            question,
+            include=include,
+            exclude=exclude,
+            provider=provider,
+            model=model,
+            stream=False,
+            reflect=reflect,
+            pattern=pattern,
+            **kwargs,
+        )
+    gen = _ask(
+        company,
+        question,
+        include=include,
+        exclude=exclude,
+        provider=provider,
+        model=model,
+        stream=True,
+        reflect=reflect,
+        pattern=pattern,
+        **kwargs,
+    )
+    return _auto_stream(gen)
+def chat(
+    codeOrName: str,
+    question: str,
+    *,
+    provider: str | None = None,
+    model: str | None = None,
+    max_turns: int = 5,
+    on_tool_call=None,
+    on_tool_result=None,
+    **kwargs,
+) -> str:
+    """에이전트 모드: LLM이 도구를 선택하여 심화 분석.
+    Args:
+        codeOrName: 종목코드, 회사명, 또는 US ticker.
+        question: 질문 텍스트.
+        provider: LLM provider.
+        model: 모델 override.
+        max_turns: 최대 도구 호출 반복 횟수.
+    Example::
+        import dartlab
+        dartlab.chat("005930", "배당 추세를 분석하고 이상 징후를 찾아줘")
+    """
+    from dartlab.ai.runtime.standalone import chat as _chat
+    company = Company(codeOrName)
+    return _chat(
+        company,
+        question,
+        provider=provider,
+        model=model,
+        max_turns=max_turns,
+        on_tool_call=on_tool_call,
+        on_tool_result=on_tool_result,
+        **kwargs,
+    )
+def plugins():
+    """로드된 플러그인 목록 반환.
+    Example::
+        import dartlab
+        dartlab.plugins()  # [PluginMeta(name="esg-scores", ...)]
+    """
+    from dartlab.core.plugins import discover, get_loaded_plugins
+    discover()
+    return get_loaded_plugins()
+def reload_plugins():
+    """플러그인 재스캔 — pip install 후 재시작 없이 즉시 인식.
+    Example::
+        # 1. 새 플러그인 설치
+        # !uv pip install dartlab-plugin-esg
+        # 2. 재스캔
+        dartlab.reload_plugins()
+        # 3. 즉시 사용
+        dartlab.Company("005930").show("esgScore")
+    """
+    from dartlab.core.plugins import rediscover
+    return rediscover()
+def audit(codeOrName: str):
+    """감사 Red Flag 분석.
+    Example::
+        import dartlab
+        dartlab.audit("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.financial.insight.pipeline import analyzeAudit
+    return analyzeAudit(c)
+def forecast(codeOrName: str, *, horizon: int = 3):
+    """매출 앙상블 예측.
+    Example::
+        import dartlab
+        dartlab.forecast("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.forecast.revenueForecast import forecastRevenue
+    ts = c.finance.timeseries
+    if ts is None:
+        return None
+    series = ts[0] if isinstance(ts, tuple) else ts
+    currency = getattr(c, "currency", "KRW")
+    return forecastRevenue(
+        series,
+        stockCode=getattr(c, "stockCode", None),
+        sectorKey=getattr(c, "sectorKey", None),
+        market=getattr(c, "market", "KR"),
+        horizon=horizon,
+        currency=currency,
+    )
+def valuation(codeOrName: str, *, shares: int | None = None):
+    """종합 밸류에이션 (DCF + DDM + 상대가치).
+    Example::
+        import dartlab
+        dartlab.valuation("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.valuation.valuation import fullValuation
+    ts = c.finance.timeseries
+    if ts is None:
+        return None
+    series = ts[0] if isinstance(ts, tuple) else ts
+    currency = getattr(c, "currency", "KRW")
+    if shares is None:
+        profile = getattr(c, "profile", None)
+        if profile:
+            shares = getattr(profile, "sharesOutstanding", None)
+            if shares:
+                shares = int(shares)
+    return fullValuation(series, shares=shares, currency=currency)
+def insights(codeOrName: str):
+    """7영역 등급 분석.
+    Example::
+        import dartlab
+        dartlab.insights("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.financial.insight import analyze
+    return analyze(c.stockCode, company=c)
+def simulation(codeOrName: str, *, scenarios: list[str] | None = None):
+    """경제 시나리오 시뮬레이션.
+    Example::
+        import dartlab
+        dartlab.simulation("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.forecast.simulation import simulateAllScenarios
+    ts = c.finance.timeseries
+    if ts is None:
+        return None
+    series = ts[0] if isinstance(ts, tuple) else ts
+    return simulateAllScenarios(
+        series,
+        sectorKey=getattr(c, "sectorKey", None),
+        scenarios=scenarios,
+    )
+def research(codeOrName: str, *, sections: list[str] | None = None, includeMarket: bool = True):
+    """종합 기업분석 리포트.
+    Example::
+        import dartlab
+        dartlab.research("005930")
+    """
+    c = Company(codeOrName)
+    from dartlab.analysis.financial.research import generateResearch
+    return generateResearch(c, sections=sections, includeMarket=includeMarket)
+def groupHealth():
+    """그룹사 건전성 분석 — 네트워크 × 재무비율 교차.
+    Returns:
+        (summary, weakLinks) 튜플.
+    Example::
+        import dartlab
+        summary, weakLinks = dartlab.groupHealth()
+    """
+    from dartlab.market.network.health import groupHealth as _groupHealth
+    return _groupHealth()
+def scanAccount(
+    snakeId: str,
+    *,
+    market: str = "dart",
+    sjDiv: str | None = None,
+    fsPref: str = "CFS",
+    annual: bool = False,
+):
+    """전종목 단일 계정 시계열.
+    Args:
+        snakeId: 계정 식별자. 영문("sales") 또는 한글("매출액") 모두 가능.
+        market: "dart" (한국, 기본) 또는 "edgar" (미국).
+        sjDiv: 재무제표 구분 ("IS", "BS", "CF"). None이면 자동 결정. (dart만)
+        fsPref: 연결/별도 우선순위 ("CFS"=연결 우선, "OFS"=별도 우선). (dart만)
+        annual: True면 연간 (기본 False=분기별 standalone).
+    Example::
+        import dartlab
+        dartlab.scanAccount("매출액")                          # DART 분기별
+        dartlab.scanAccount("매출액", annual=True)              # DART 연간
+        dartlab.scanAccount("sales", market="edgar")            # EDGAR 분기별
+        dartlab.scanAccount("total_assets", market="edgar", annual=True)
+    """
+    if market == "edgar":
+        from dartlab.providers.edgar.finance.scanAccount import scanAccount as _edgarScan
+        return _edgarScan(snakeId, annual=annual)
+    from dartlab.providers.dart.finance.scanAccount import scanAccount as _scan
+    return _scan(snakeId, sjDiv=sjDiv, fsPref=fsPref, annual=annual)
+def scanRatio(
+    ratioName: str,
+    *,
+    market: str = "dart",
+    fsPref: str = "CFS",
+    annual: bool = False,
+):
+    """전종목 단일 재무비율 시계열.
+    Args:
+        ratioName: 비율 식별자 ("roe", "operatingMargin", "debtRatio" 등).
+        market: "dart" (한국, 기본) 또는 "edgar" (미국).
+        fsPref: 연결/별도 우선순위. (dart만)
+        annual: True면 연간 (기본 False=분기별).
+    Example::
+        import dartlab
+        dartlab.scanRatio("roe")                              # DART 분기별
+        dartlab.scanRatio("operatingMargin", annual=True)      # DART 연간
+        dartlab.scanRatio("roe", market="edgar", annual=True)  # EDGAR 연간
+    """
+    if market == "edgar":
+        from dartlab.providers.edgar.finance.scanAccount import scanRatio as _edgarRatio
+        return _edgarRatio(ratioName, annual=annual)
+    from dartlab.providers.dart.finance.scanAccount import scanRatio as _ratio
+    return _ratio(ratioName, fsPref=fsPref, annual=annual)
+def scanRatioList():
+    """사용 가능한 비율 목록.
+    Example::
+        import dartlab
+        dartlab.scanRatioList()
+    """
+    from dartlab.providers.dart.finance.scanAccount import scanRatioList as _list
+    return _list()
+def digest(
+    *,
+    sector: str | None = None,
+    top_n: int = 20,
+    format: str = "dataframe",
+    stock_codes: list[str] | None = None,
+    verbose: bool = False,
+):
+    """시장 전체 공시 변화 다이제스트.
+    로컬에 다운로드된 docs 데이터를 순회하며 중요도 높은 변화를 집계한다.
+    Args:
+        sector: 섹터 필터 (예: "반도체"). None이면 전체.
+        top_n: 상위 N개.
+        format: "dataframe", "markdown", "json".
+        stock_codes: 직접 종목코드 목록 지정.
+        verbose: 진행 상황 출력.
+    Example::
+        import dartlab
+        dartlab.digest()                          # 전체 시장
+        dartlab.digest(sector="반도체")             # 섹터별
+        dartlab.digest(format="markdown")          # 마크다운 출력
+    """
+    from dartlab.analysis.accounting.watch.digest import build_digest
+    from dartlab.analysis.accounting.watch.scanner import scan_market
+    scan_df = scan_market(
+        sector=sector,
+        top_n=top_n,
+        stock_codes=stock_codes,
+        verbose=verbose,
+    )
+    if format == "dataframe":
+        return scan_df
+    title = f"{sector} 섹터 변화 다이제스트" if sector else None
+    return build_digest(scan_df, format=format, title=title, top_n=top_n)
+class _Module(sys.modules[__name__].__class__):
+    """dartlab.verbose / dartlab.dataDir / dartlab.chart|table|text 프록시."""
+    @property
+    def verbose(self):
+        return config.verbose
+    @verbose.setter
+    def verbose(self, value):
+        config.verbose = value
+    @property
+    def dataDir(self):
+        return config.dataDir
+    @dataDir.setter
+    def dataDir(self, value):
+        config.dataDir = str(value)
+    def __getattr__(self, name):
+        if name in ("chart", "table", "text"):
+            import importlib
+            mod = importlib.import_module(f"dartlab.tools.{name}")
+            setattr(self, name, mod)
+            return mod
+        raise AttributeError(f"module 'dartlab' has no attribute {name!r}")
+sys.modules[__name__].__class__ = _Module
+__all__ = [
+    "Company",
+    "Dart",
+    "Fred",
+    "OpenDart",
+    "OpenEdgar",
+    "config",
+    "core",
+    "engines",
+    "llm",
+    "ask",
+    "chat",
+    "setup",
+    "search",
+    "listing",
+    "collect",
+    "collectAll",
+    "downloadAll",
+    "network",
+    "screen",
+    "benchmark",
+    "signal",
+    "news",
+    "crossBorderPeers",
+    "audit",
+    "forecast",
+    "valuation",
+    "insights",
+    "simulation",
+    "governance",
+    "workforce",
+    "capital",
+    "debt",
+    "groupHealth",
+    "research",
+    "digest",
+    "scanAccount",
+    "scanRatio",
+    "scanRatioList",
+    "plugins",
+    "reload_plugins",
+    "verbose",
+    "dataDir",
+    "getKindList",
+    "codeToName",
+    "nameToCode",
+    "searchName",
+    "fuzzySearch",
+    "chart",
+    "table",
+    "text",
+    "Review",
+    "SelectResult",
+    "ChartResult",
+]

src/dartlab/ai/DEV.md ADDED Viewed

	@@ -0,0 +1,224 @@

+# AI Engine Development Guide
+## Source Of Truth
+- 데이터 source-of-truth: `src/dartlab/core/registry.py`
+- AI capability source-of-truth: `src/dartlab/core/capabilities.py`
+## 현재 구조 원칙
+- `core.analyze()`가 AI 오케스트레이션의 단일 진입점이다.
+- `tools/registry.py`는 capability 정의를 runtime에 바인딩하는 레이어다.
+- `server/streaming.py`, `mcp/__init__.py`, UI SSE client는 capability 결과를 소비하는 adapter다.
+- Svelte UI는 source-of-truth가 아니라 render sink다.
+- OpenDART 최근 공시목록 retrieval도 `core.analyze()`에서 company 유무와 무관하게 같은 경로로 합류한다.
+## 패키지 구조
+- `runtime/`
+  - `core.py`: 오케스트레이터
+  - `events.py`: canonical/legacy 이벤트 계약
+  - `pipeline.py`: pre-compute pipeline
+  - `post_processing.py`: navigate/validation/auto-artifact 후처리
+  - `standalone.py`: public ask/chat bridge
+  - `validation.py`: 숫자 검증
+- `conversation/`
+  - `dialogue.py`, `history.py`, `intent.py`, `focus.py`, `prompts.py`
+  - `suggestions.py`: 회사 상태 기반 추천 질문 생성
+  - `data_ready.py`: docs/finance/report 가용성 요약
+- `context/`
+  - `builder.py`: structured context build
+  - `snapshot.py`: headline snapshot
+  - `company_adapter.py`: facade mismatch adapter
+  - `dartOpenapi.py`: OpenDART filing intent 파싱 + recent filing context
+- `tools/`
+  - `registry.py`: tool/capability binding (`useSuperTools` 플래그로 모드 전환)
+  - `runtime.py`: tool execution runtime
+  - `selector.py`: capability 기반 도구 선택 + Super Tool 전용 prompt 분기
+  - `plugin.py`: external tool plugin bridge
+  - `coding.py`: coding runtime bridge
+  - `recipes.py`: 질문 유형별 선행 분석 레시피
+  - `routeHint.py`: 키워드→도구 매핑 (Super Tool 모드에서 deprecated)
+  - `superTools/`: **7개 Super Tool dispatcher** (explore/finance/analyze/market/openapi/system/chart)
+  - `defaults/`: 기존 101개 도구 등록 (레거시 모드에서 사용)
+- `providers/support/`
+  - `codex_cli.py`, `cli_setup.py`, `ollama_setup.py`, `oauth_token.py`
+  - provider 구현이 직접 쓰는 CLI/OAuth 보조 계층
+루트 shim 모듈(`core.py`, `tools_registry.py`, `dialogue.py` 등)은 제거되었다. 새 코드는 반드시 하위 패키지 경로(`runtime/`, `conversation/`, `context/`, `tools/`, `providers/support/`)를 직접 import한다.
+## Super Tool 아키텍처 (2026-03-25)
+101개 도구를 7개 Super Tool dispatcher로 통합. ollama(소형 모델)에서 자동 활성화.
+### 모델 요구사항
+- **최소**: tool calling 지원 + 14B 파라미터 이상 (예: qwen3:14b, llama3.1:8b-instruct)
+- **권장**: GPT-4o, Claude Sonnet 이상 — tool calling + 한국어 + 복합 파라미터 동시 처리
+- **부적합**: 8B 이하 소형 모델 (qwen3:4b/8b) — action dispatch 패턴을 이해하지 못함, hallucination 다발
+- 실험 009 검증 결과: qwen3:4b tool 정확도 33%, qwen3:8b 0%. 소형 모델은 tool calling AI 분석에 사용 불가.
+### 활성화 조건
+- **모든 provider에서 Super Tool 기본 활성화** (`_useSuperTools = True`)
+- `build_tool_runtime(company, useSuperTools=False)`로 레거시 모드 수동 전환 가능
+- Route Hint(`routeHint.py`)는 deprecated — Super Tool enum description이 대체
+### 7개 Super Tool
+| Tool | 통합 대상 | action enum |
+|------|----------|-------------|
+| `explore` | show_topic, list_topics, trace, diff, info, filings, search | 7 |
+| `finance` | get_data, list_modules, ratios, growth, yoy, anomalies, report, search | 8 |
+| `analyze` | insight, sector, rank, esg, valuation, changes, audit | 7 |
+| `market` | price, consensus, history, screen | 4 |
+| `openapi` | dartCall, searchFilings, capabilities | 3 |
+| `system` | spec, features, searchCompany, dataStatus, suggest | 5 |
+| `chart` | navigate, chart | 2 |
+### 동적 enum
+- `explore.target`: company.topics에서 추출 (삼성전자 기준 53개) + 한국어 라벨
+- `finance.module`: scan_available_modules에서 추출 (9개) + 한국어 라벨
+- `finance.apiType`: company.report.availableApiTypes에서 추출 (24개) + 한국어 라벨
+- enum description에 `topicLabels.py`의 한국어 라벨과 aliases 포함
+### 한국어 라벨 source of truth
+- `core/topicLabels.py`: 70개 topic × 한국어 라벨 + 검색 aliases
+- UI의 `topicLabels.js`와 동일 매핑 + AI용 aliases 추가
+## UI Action 계약
+- canonical payload는 `UiAction`이다.
+- render payload는 `ViewSpec` + `WidgetSpec` schema를 기준으로 한다.
+- widget id(`chart`, `comparison`, `insight_dashboard`, `table`)는 UI widget registry에 등록된 것만 사용한다.
+- 허용 action:
+  - `navigate`
+  - `render`
+  - `update`
+  - `toast`
+- canonical SSE UI 이벤트는 `ui_action` 하나만 유지한다.
+- auto artifact도 별도 chart 이벤트가 아니라 canonical `render` UI action으로 주입한다.
+- Svelte 측 AI bridge/helper는 `src/dartlab/ui/src/lib/ai/`에 둔다. `App.svelte`는 provider/profile 동기화와 stream wiring만 연결하는 shell로 유지한다.
+## Provider Surface
+- 공식 GPT 구독 계정 경로는 두 개다.
+  - `codex`: Codex CLI 로그인 기반
+  - `oauth-codex`: ChatGPT OAuth 직접 연결 기반
+- 공개 provider surface는 `codex`, `oauth-codex`, `openai`, `ollama`, `custom`만 유지한다.
+- `claude` provider는 public surface에서 제거되었다. 남은 Claude 관련 코드는 legacy/internal 용도로만 취급한다.
+- provider alias(`chatgpt`, `chatgpt-oauth`)는 더 이상 공개/호환 surface에 두지 않는다.
+- ask/CLI/server/UI는 같은 provider 문자열을 공유해야 하며, 새 GPT 경로를 추가할 때는 이 문서와 `core/ai/providers.py`, `server/api/ai.py`, `ui/src/App.svelte`, `cli/context.py`를 같이 갱신한다.
+## Shared Profile
+- AI 설정 source-of-truth는 `~/.dartlab/ai_profile.json`과 공통 secret store다.
+- `dartlab.llm.configure()`는 메모리 전용 setter가 아니라 shared profile writer다.
+- profile schema는 `defaultProvider + roles(analysis, summary, coding, ui_control)` 구조다.
+- UI는 provider/model을 localStorage에 저장하지 않고 `/api/ai/profile`과 `/api/ai/profile/events`를 통해 동기화한다.
+- API key는 profile JSON에 저장하지 않고 secret store에만 저장한다.
+- OAuth 토큰도 legacy `oauth_token.json` 대신 공통 secret store로 이동한다.
+- Ollama preload/probe는 선택 provider가 `ollama`일 때만 적극적으로 수행한다. 다른 provider가 선택된 상태에서는 상태 조회도 lazy probe가 기본이다.
+- OpenDART 키는 provider secret store로 흡수하지 않고 프로젝트 `.env`를 source-of-truth로 유지한다.
+## Company Adapter 원칙
+- AI 레이어는 `company.ratios` 같은 facade surface를 직접 신뢰하지 않는다.
+- headline ratio / ratio series는 `src/dartlab/ai/context/company_adapter.py`로만 접근한다.
+- facade와 엔진 surface mismatch를 발견하면 AI 코드 곳곳에서 분기하지 말고 adapter에 흡수한다.
+## Ask Context 정책
+- 기본 `ask`는 cheap-first다. 질문에 맞는 최소 source만 읽고, `docs/finance/report` 전체 선로딩을 금지한다.
+- 일반 `ask`의 기본 context tier는 `focused`다. `full` tier는 `report_mode=True`일 때만 허용한다.
+- tool-capable provider(`openai`, `ollama`, `custom`)만 `use_tools=True`일 때 `skeleton` tier를 사용한다.
+- `oauth-codex` 기본 ask는 더 이상 `full`로 떨어지지 않는다.
+- `auto diff`는 `full` tier에서만 자동 계산한다. 기본 ask에서는 `company.diff()`를 선행 호출하지 않는다.
+- 질문 해석은 route-first가 아니라 **candidate-module-first**다. 먼저 `sections / notes / report / finance` 후보를 동시에 모으고, 실제 존재하는 모듈만 컨텍스트에 싣는다.
+- `costByNature`, `rnd`, `segments`처럼 sections topic이 아니어도 direct/notes 경로로 존재하면 `ask`가 우선 회수한다.
+- 일반 `ask`에서 포함된 모듈이 있으면 `"데이터 없음"`이라고 답하면 실패로 본다. false-unavailable 방지가 기본 계약이다.
+- tool calling이 비활성화된 ask에서는 `show_topic()` 같은 호출 계획을 문장으로 출력하지 않는다. 이미 제공된 컨텍스트만으로 바로 답하고, 모호할 때만 한 문장 확인 질문을 한다.
+- **분기 질문 정책**: "분기", "분기별", "quarterly", "QoQ", "전분기" 등 분기 키워드가 감지되면:
+  - route를 `hybrid`로 전환하여 sections + finance 양쪽 모두 포함한다.
+  - `company.timeseries`에서 IS/CF 분기별 standalone 데이터를 최근 8분기만 추출하여 context에 주입한다.
+  - `fsSummary`를 sections exclude 목록에서 일시 해제하여 분기 요약도 포함한다.
+  - response_contract에 분기 데이터 활용 지시를 추가한다.
+- **finance route sections 보조 정책**: route=finance일 때도 `businessStatus`, `businessOverview` 중 존재하는 topic 1개를 경량 outline으로 주입한다. "왜 이익률이 변했는지" 같은 맥락을 LLM이 설명할 수 있게 한다.
+- **context budget**: focused=10000, full=16000. 분기 데이터 + sections 보조를 수용할 수 있는 크기.
+## Persona Eval 루프
+- ask 장기 개선의 기본 단위는 **실사용 로그가 아니라 curated 질문 세트 replay**다.
+- source-of-truth는 `src/dartlab/ai/eval/personaCases.json`이다.
+- 사람 검수 이력 source-of-truth는 `src/dartlab/ai/eval/reviewLog/<persona>.jsonl`이다.
+- persona 축은 최소 `assistant`, `data_manager`, `operator`, `installer`, `research_gather`, `accountant`, `business_owner`, `investor`, `analyst`를 유지한다.
+- 각 case는 질문만 저장하지 않는다.
+  - `expectedRoute`
+  - `expectedModules`
+  - `mustInclude`
+  - `mustNotSay`
+  - `forbiddenUiTerms`
+  - `allowedClarification`
+  - `expectedFollowups`
+  - `groundTruthFacts`
+- 새 ask 실패는 바로 프롬프트 hotfix로 덮지 않고 먼저 아래로 분��한다.
+  - `routing_failure`
+  - `retrieval_failure`
+  - `false_unavailable`
+  - `generation_failure`
+  - `ui_wording_failure`
+  - `data_gap`
+  - `runtime_error`
+- replay runner source-of-truth는 `src/dartlab/ai/eval/replayRunner.py`다.
+- 실제 replay를 검토할 때는 결과만 남기지 않고 반드시 `reviewedAt / effectiveness / improvementActions / notes`를 같이 남긴다.
+- review log는 persona별로 분리한다.
+  - `reviewLog/accountant.jsonl`
+  - `reviewLog/investor.jsonl`
+  - `reviewLog/analyst.jsonl`
+- 다음 회차 replay는 같은 persona 파일을 이어서 보고, `효과적이었는지`와 `이번 개선으로 줄여야 할 failure type`을 같이 적는다.
+- 개선 루프는 항상 `질문 세트 추가 → replay → failure taxonomy 확인 → AI fix vs DartLab core fix 분리 → 회귀 재실행` 순서로 간다.
+- "장기 학습"은 모델 학습이 아니라 이 replay/backlog 루프를 뜻한다.
+- replay에서 반복 실패한 질문 묶음은 generic ambiguity로 남기지 말고 강제 규칙으로 승격한다.
+  - `부실 징후`류 질문 → `finance` route 고정
+  - `영업이익률 + 비용 구조 + 사업 변화` → `IS + costByNature + businessOverview/productService` 강제 hybrid, clarification 금지
+  - `최근 공시 + 사업 구조 변화` → `disclosureChanges`에 `businessOverview/productService`를 같이 회수
+- **groundTruthFacts는 수동 하드코딩이 아니라 `truthHarvester`로 자동 생성한다.**
+  - `scripts/harvestEvalTruth.py`로 배치 실행, `--severity critical,high`부터 우선 채움
+  - finance 엔진에서 IS/BS/CF 핵심 계정 + ratios를 자동 추출
+  - `truthAsOf` 날짜로 데이터 시점을 기록
+- **결정론적 검증(라우팅/모듈)은 LLM 호출 없이 CI에서 매 커밋 검증한다.**
+  - `tests/test_eval_deterministic.py` — personaCases.json의 expectedRoute/모듈/구조 무결성 검증
+  - personaCases에 케이스를 추가하면 자동으로 결정론적 테스트도 실행됨
+  - `@pytest.mark.unit` → `test-lock.sh` 1단계에서 실행
+- **배치 replay는 `scripts/runEvalBatch.py`로 자동화한다.**
+  - `--provider`, `--model`, `--severity`, `--persona`, `--compare latest` 필터
+  - 결과는 `eval/batchResults/` JSONL로 저장, 이전 배치와 회귀 비교 지원
+- **replaySuite()는 Company 캐시 3개 제한으로 OOM을 방지한다.**
+  - 4번째 Company 로드 시 가장 오래된 캐시 제거 + `gc.collect()`
+## User Language 원칙
+- UI 기본 surface에서는 internal module/method 이름을 직접 노출하지 않는다.
+- ask 내부 debug/meta와 eval/log에서는 raw module 이름을 유지해도 된다.
+- runtime `meta` / `done`에는 raw `includedModules`와 함께 사용자용 `includedEvidence` label을 같이 실어 보낸다.
+- UI evidence panel, transparency badges, modal title은 사용자용 evidence label을 우선 사용한다.
+- tool 이름도 UI에서는 사용자 행동 기준 문구로 보여준다.
+  - 예: `list_live_filings` → `실시간 공시 목록 조회`
+  - 예: `get_data` → `재무·공시 데이터 조회`
+- ask 본문도 기본적으로 사용자 언어를 쓴다.
+  - `IS/BS/CF/ratios/TTM` → `손익계산서/재무상태표/현금흐름표/재무비율/최근 4분기 합산`
+  - `costByNature/businessOverview/productService` → `성격별 비용 분류/사업의 개요/제품·서비스`
+  - `topic/period/source` → `항목/시점/출처`
+## Sections First Retrieval
+- `sections`는 기본적으로 “본문 덩어리”가 아니라 “retrieval index”로 쓴다.
+- sections 계열 질문은 `topics() -> outline(topic) -> contextSlices -> raw docs sections block` 순서로 좁힌다.
+- `contextSlices`가 ask의 기본 evidence layer다. `outline(topic)`는 인덱스/커버리지 확인용이고, 실제 근거 문장은 `contextSlices`에서 먼저 회수한다.
+- `retrievalBlocks/raw sections`는 `contextSlices`만으로 근거가 부족할 때만 추가로 연다.
+- 일반 재무 질문에서는 `sections`, `report`, `insights`, `change summary`를 자동으로 붙이지 않는다.
+- 배당/직원/최대주주/감사처럼 명시적인 report 질문에서만 report pivot/context를 올린다.
+## Follow-up Continuity
+- 후속 턴이 `최근 5개년`, `그럼`, `이어서`처럼 짧은 기간/연속 질문이면 직전 assistant `includedModules`를 이어받아 같은 분석 축을 유지한다.
+- 이 상속은 아무 질문에나 적용하지 않고 `follow_up` 모드 + 기간/연속 힌트가 있을 때만 적용한다.
+- 강한 direct intent 질문(`성격별 비용`, `인건비`, `감가상각`, `물류비`)은 clarification 없이 바로 `costByNature`를 회수한다.
+- `costByNature` 같은 다기간 direct module이 포함되면 기간이 비어 있어도 최신 시점과 최근 추세를 먼저 답한다. 연도 기준을 먼저 다시 묻지 않는다.

src/dartlab/ai/STATUS.md ADDED Viewed

	@@ -0,0 +1,200 @@

+# AI Engine — Provider 현황 및 유지보수 체크리스트
+## Provider 목록 (7개)
+| Provider | 파일 | 인증 | 기본 모델 | 안정성 |
+|----------|------|------|----------|--------|
+| `openai` | openai_compat.py | API Key | gpt-4o | **안정** — 공식 SDK |
+| `ollama` | ollama.py | 없음 (localhost) | llama3.1 | **안정** — 로컬 |
+| `custom` | openai_compat.py | API Key | gpt-4o | **안정** — OpenAI 호환 |
+| `chatgpt` | providers/__init__.py alias | `codex`로 정규화 | codex mirror | **호환용 alias** — 공개 surface 비노출 |
+| `codex` | codex.py | CLI 세션 | CLI config 또는 gpt-4.1 | **공식 경로 우선** — Codex CLI 의존 |
+| `oauth-codex` | oauthCodex.py | ChatGPT OAuth | gpt-5.4 | **공개 경로** — 비공식 backend API 의존 |
+| `claude-code` | claude_code.py | CLI 세션 | sonnet | **보류중** — OAuth 지원 전 비공개 |
+---
+## 현재 공개 경로
+- ChatGPT 구독 계정 경로는 2개다.
+  - `codex`: Codex CLI 로그인 기반
+  - `oauth-codex`: ChatGPT OAuth 직접 연결 기반
+- 공개 provider surface는 `codex`, `oauth-codex`, `openai`, `ollama`, `custom`만 유지한다.
+- `claude` provider는 public surface에서 제거되었고 legacy/internal 코드로만 남아 있다.
+- `chatgpt`는 기존 설정/호환성 때문에 내부 alias로만 남아 있으며 실제 구현은 `codex`로 정규화된다.
+- `chatgpt-oauth`는 내부/호환 alias로만 남아 있으며 실제 구현은 `oauth-codex`로 정규화된다.
+## Tool Runtime 기반
+- 도구 등록/실행은 `tool_runtime.py`의 `ToolRuntime`으로 분리되기 시작했다.
+- `tools_registry.py`는 현재 호환 래퍼 역할을 하며, 세션별/에이전트별 isolated runtime 생성이 가능하다.
+- coding executor는 `coding_runtime.py`로 분리되기 시작했고, backend registry를 통해 관리한다.
+- 표준 코드 작업 진입점은 `run_coding_task`이며 `run_codex_task`는 Codex compatibility alias로 유지한다.
+- 다음 단계는 Codex 외 backend를 이 runtime 뒤에 추가하되, 공개 provider surface와는 분리하는 것이다.
+## ChatGPT OAuth Provider — 핵심 리스크
+### 왜 취약한가
+`oauth-codex` provider는 **OpenAI 비공식 내부 API** (`chatgpt.com/backend-api/codex/responses`)를 사용한다.
+공식 OpenAI API (`api.openai.com`)가 아니므로 **예고 없이 변경/차단될 수 있다**.
+### 정기 체크 항목
+**1. 엔드포인트 변경**
+- 현재: `https://chatgpt.com/backend-api/codex/responses`
+- 파일: [oauthCodex.py](providers/oauthCodex.py) `CODEX_API_BASE`, `CODEX_RESPONSES_PATH`
+- OpenAI가 URL 경로를 변경하면 즉시 404/403 발생
+- 확인법: `dartlab status` 실행 → chatgpt available 확인
+**2. OAuth 인증 파라미터**
+- Client ID: `app_EMoamEEZ73f0CkXaXp7hrann` (Codex CLI에서 추출)
+- 파일: [oauthToken.py](../oauthToken.py) `CHATGPT_CLIENT_ID`
+- OpenAI가 client_id를 갱신하거나 revoke하면 로그인 불가
+- 확인법: OAuth 로그인 시도 → "invalid_client" 에러 여부
+**3. SSE 이벤트 타입**
+- 현재 파싱하는 타입 3개:
+  - `response.output_text.delta` — 텍스트 청크
+  - `response.content_part.delta` — 컨텐츠 청크
+  - `response.output_item.done` — 아이템 완료
+- 파일: [oauthCodex.py](providers/oauthCodex.py) `stream()`, `_parse_sse_response()`
+- OpenAI가 이벤트 스키마를 변경하면 응답이 빈 문자열로 돌아옴
+- 확인법: 스트리밍 응답이 도착하는데 텍스트가 비어있으면 이벤트 타입 변경 의심
+**4. 요청 헤더**
+- `originator: codex_cli_rs` — Codex CLI 사칭
+- `OpenAI-Beta: responses=experimental` — 실험 API 플래그
+- 파일: [oauthCodex.py](providers/oauthCodex.py) `_build_headers()`
+- 이 헤더 없이는 403 반환됨
+- OpenAI가 originator 검증을 강화하면 차단됨
+**5. 모델 목록**
+- `AVAILABLE_MODELS` 리스트는 수동 관리
+- 파일: [oauthCodex.py](providers/oauthCodex.py) `AVAILABLE_MODELS`
+- 새 모델 출시/폐기 시 수동 업데이트 필요
+- GPT-4 시리즈 (gpt-4, gpt-4-turbo 등)는 이미 제거됨
+**6. 토큰 만료 정책**
+- access_token: expires_in 기준 (현재 ~1시간)
+- refresh_token: 만료 정책 불명 (OpenAI 미공개)
+- 파일: [oauthToken.py](../oauthToken.py) `get_valid_token()`, `refresh_access_token()`
+- refresh_token이 만료되면 재로그인 필요
+- 확인법: 며칠 방치 후 요청 → 401 + refresh 실패 여부
+### 브레이킹 체인지 대응 순서
+1. 사용자가 "ChatGPT 안됨" 보고
+2. `dartlab status` 로 available 확인
+3. available=False → OAuth 로그인 재시도
+4. 로그인 실패 → client_id 변경 확인 (opencode-openai-codex-auth 참조)
+5. 로그인 성공인데 API 호출 실패 → 엔드포인트/헤더 변경 확인
+6. API 호출 성공인데 응답 비어있음 → SSE 이벤트 타입 변경 확인
+### 생태계 비교 — 누가 같은 API를 쓰는가
+ChatGPT OAuth(`chatgpt.com/backend-api`)를 사용하는 프로젝트는 **전부 openai/codex CLI 역공학** 기반이다.
+| 프로젝트 | 언어 | Client ID | 모델 목록 | refresh 실패 처리 | 토큰 저장 |
+|----------|------|-----------|----------|------------------|----------|
+| **openai/codex** (공식) | Rust | 하드코딩 | `/models` 동적 + 5분 캐시 | 4가지 분류 | 파일/키링/메모리 3중 |
+| **opencode plugin** | TS | 동일 복제 | 사용자 설정 의존 | 단순 throw | 프레임워크 위임 |
+| **ai-sdk-provider** | TS | 동일 복제 | 3개 하드코딩 | 단순 throw | codex auth.json 재사용 |
+| **dartlab** (현재) | Python | 동일 복제 | 13개 하드코딩 | None 반환 | `~/.dartlab/oauth_token.json` |
+**공통 특징:**
+- Client ID `app_EMoamEEZ73f0CkXaXp7hrann` 전원 동일 (OpenAI public OAuth client)
+- `originator: codex_cli_rs` 헤더 전원 동일
+- OpenAI가 이 값들을 바꾸면 **전부 동시에 깨짐**
+**openai/codex만의 차별점 (dartlab에 없는 것):**
+1. Token Exchange — OAuth 토큰 → `api.openai.com` 호환 API Key 변환
+2. Device Code Flow — headless 환경 (서버, SSH) 인증 지원
+3. 모델 목록 동적 조회 — `/models` 엔드포인트 + 캐시 + bundled fallback
+4. Keyring 저장 — OS 키체인 (macOS Keychain, Windows Credential Manager)
+5. refresh 실패 4단계 분류 — expired / reused / revoked / other
+6. WebSocket SSE 이중 지원
+**참고: opencode와 oh-my-opencode(현 oh-my-openagent)는 ChatGPT OAuth를 사용하지 않는다.**
+- opencode: GitHub Copilot API 인증 (다른 시스템)
+- oh-my-openagent: MCP 서버 표준 OAuth 2.0 + PKCE (플러그인)
+### 추적 대상 레포지토리
+변경사항 감지를 위해 다음 레포를 추적한다.
+| 레포 | 추적 이유 | Watch 대상 |
+|------|----------|-----------|
+| **openai/codex** | canonical 구현. Client ID, 엔드포인트, 헤더의 원본 | `codex-rs/core/src/auth.rs`, `model_provider_info.rs` |
+| **numman-ali/opencode-openai-codex-auth** | 빠른 변경 반영 (TS라 읽기 쉬움) | `lib/auth/`, `lib/constants.ts` |
+| **ben-vargas/ai-sdk-provider-chatgpt-oauth** | Vercel AI SDK 호환 참조 | `src/auth/` |
+### 향후 개선 후보 (codex에서 가져올 수 있는 것)
+1. **모델 목록 동적 조회** — `chatgpt.com/backend-api/codex/models` 호출 + JSON 캐시
+2. **refresh 실패 분류** — expired/reused/revoked 구분하여 사용자에게 구체적 안내
+3. **Token Exchange** — OAuth → API Key 변환으로 `api.openai.com` 호환 (듀얼 엔드포인트)
+---
+## Codex CLI Provider — 리스크
+### 왜 취약한가
+`codex` provider는 OpenAI `codex` CLI 바이너리를 subprocess로 호출한다.
+CLI의 JSONL 출력 포맷이 변경되면 파싱 실패.
+### 현재 동작
+- `~/.codex/config.toml`의 model 설정을 우선 흡수
+- `codex --help`, `codex exec --help`를 읽어 command/sandbox capability를 동적 감지
+- 일반 질의는 `read-only`, 코드 수정 의도는 `workspace-write` sandbox 우선
+- 별도 `run_codex_task` tool로 다른 provider에서도 Codex CLI 코드 작업 위임 가능
+### 체크 항목
+- CLI 출력 포맷: `item.completed.item.agent_message.text` 경로
+- CLI 플래그: `--json`, `--sandbox ...`, `--model ...`, `--skip-git-repo-check`
+- CLI 설치: `npm install -g @openai/codex`
+- 파일: [codex.py](providers/codex.py)
+---
+## Claude Code CLI Provider — 보류중
+### 현재 상태
+VSCode 환경에서 `CLAUDECODE` 환경변수가 설정되어 SDK fallback 모드로 진입하지만,
+SDK fallback에서 API key 추출(`claude auth status --json`)이 또 subprocess를 호출하는 순환 문제.
+### 알려진 이슈
+- 테스트 31/32 pass, `test_complete_timeout` 1개 fail
+- VSCode 내에서 CLI 호출이 hang되는 케이스 (중첩 세션)
+- `_probe_cli()` 8초 타임아웃으로 hang 감지 후 SDK 전환
+- 파일: [claude_code.py](providers/claude_code.py)
+---
+## 안정 Provider — 특이사항 없음
+### openai / custom (openai_compat.py)
+- 공식 `openai` Python SDK 사용
+- 버전 업데이트 시 SDK breaking change만 주의
+- tool calling 지원
+### claude (claude.py)
+- 공식 `anthropic` Python SDK + OpenAI 프록시 이중 모드
+- base_url 있으면 OpenAI 호환, 없으면 Anthropic 네이티브
+### ollama (ollama.py)
+- localhost:11434 OpenAI 호환 엔드포인트
+- `preload()`, `get_installed_models()`, `complete_json()` 추가 기능
+- tool calling 지원 (v0.3.0+)
+---
+## 마지막 점검일
+- 2026-03-10: ChatGPT OAuth 정상 동작 확인 (gpt-5.4)
+- 2026-03-10: Claude Code 보류 (VSCode 환경이슈)

src/dartlab/ai/__init__.py ADDED Viewed

	@@ -0,0 +1,119 @@

+"""LLM 기반 기업분석 엔진."""
+from __future__ import annotations
+from dartlab.ai.types import LLMConfig, LLMResponse
+from dartlab.core.ai import (
+    AI_ROLES,
+    DEFAULT_ROLE,
+    get_profile_manager,
+    get_provider_spec,
+    normalize_provider,
+    normalize_role,
+)
+def configure(
+    provider: str = "codex",
+    model: str | None = None,
+    api_key: str | None = None,
+    base_url: str | None = None,
+    role: str | None = None,
+    temperature: float = 0.3,
+    max_tokens: int = 4096,
+    system_prompt: str | None = None,
+) -> None:
+    """공통 AI profile을 갱신한다."""
+    normalized = normalize_provider(provider) or provider
+    if get_provider_spec(normalized) is None:
+        raise ValueError(f"지원하지 않는 provider: {provider}")
+    normalized_role = normalize_role(role)
+    if role is not None and normalized_role is None:
+        raise ValueError(f"지원하지 않는 role: {role}. 지원: {AI_ROLES}")
+    manager = get_profile_manager()
+    manager.update(
+        provider=normalized,
+        model=model,
+        role=normalized_role,
+        base_url=base_url,
+        temperature=temperature,
+        max_tokens=max_tokens,
+        system_prompt=system_prompt,
+        updated_by="code",
+    )
+    if api_key:
+        spec = get_provider_spec(normalized)
+        if spec and spec.auth_kind == "api_key":
+            manager.save_api_key(normalized, api_key, updated_by="code")
+def get_config(provider: str | None = None, *, role: str | None = None) -> LLMConfig:
+    """현재 글로벌 LLM 설정 반환."""
+    normalized_role = normalize_role(role)
+    resolved = get_profile_manager().resolve(provider=provider, role=normalized_role)
+    return LLMConfig(**resolved)
+def status(provider: str | None = None, *, role: str | None = None) -> dict:
+    """LLM 설정 및 provider 상태 확인."""
+    from dartlab.ai.providers import create_provider
+    normalized_role = normalize_role(role)
+    config = get_config(provider, role=normalized_role)
+    selected_provider = config.provider
+    llm = create_provider(config)
+    available = llm.check_available()
+    result = {
+        "provider": selected_provider,
+        "role": normalized_role or DEFAULT_ROLE,
+        "model": llm.resolved_model,
+        "available": available,
+        "defaultProvider": get_profile_manager().load().default_provider,
+    }
+    if selected_provider == "ollama":
+        from dartlab.ai.providers.support.ollama_setup import detect_ollama
+        result["ollama"] = detect_ollama()
+    if selected_provider == "codex":
+        from dartlab.ai.providers.support.cli_setup import detect_codex
+        result["codex"] = detect_codex()
+    if selected_provider == "oauth-codex":
+        from dartlab.ai.providers.support import oauth_token as oauthToken
+        token_stored = False
+        try:
+            token_stored = oauthToken.load_token() is not None
+        except (OSError, ValueError):
+            token_stored = False
+        try:
+            authenticated = oauthToken.is_authenticated()
+            account_id = oauthToken.get_account_id() if authenticated else None
+        except (
+            AttributeError,
+            OSError,
+            RuntimeError,
+            ValueError,
+            oauthToken.TokenRefreshError,
+        ):
+            authenticated = False
+            account_id = None
+        result["oauth-codex"] = {
+            "authenticated": authenticated,
+            "tokenStored": token_stored,
+            "accountId": account_id,
+        }
+    return result
+from dartlab.ai import aiParser as ai
+from dartlab.ai.tools.plugin import get_plugin_registry, tool
+__all__ = ["configure", "get_config", "status", "LLMConfig", "LLMResponse", "ai", "tool", "get_plugin_registry"]

src/dartlab/ai/agent.py ADDED Viewed

	@@ -0,0 +1,30 @@

+"""호환 shim — 실제 구현은 runtime/agent.py로 이동됨.
+기존 import 경로를 유지하기 위한 re-export.
+"""
+from dartlab.ai.runtime.agent import (  # noqa: F401
+    AGENT_SYSTEM_ADDITION,
+    PLANNING_PROMPT,
+    _reflect_on_answer,
+    agent_loop,
+    agent_loop_planning,
+    agent_loop_stream,
+    build_agent_system_addition,
+)
+from dartlab.ai.tools.selector import selectTools  # noqa: F401
+# 하위호환: _select_tools → selectTools 래퍼
+_select_tools = selectTools
+__all__ = [
+    "AGENT_SYSTEM_ADDITION",
+    "PLANNING_PROMPT",
+    "_reflect_on_answer",
+    "_select_tools",
+    "agent_loop",
+    "agent_loop_planning",
+    "agent_loop_stream",
+    "build_agent_system_addition",
+    "selectTools",
+]

src/dartlab/ai/aiParser.py ADDED Viewed

	@@ -0,0 +1,500 @@

+"""AI 보조 파싱 — 기존 파서 출력을 AI가 후처리하여 강화.
+기존 파서를 교체하지 않는다. 파서가 생산한 DataFrame/텍스트를
+LLM이 해석·요약·검증하는 후처리 레이어.
+기존 LLM provider 시스템 재사용: dartlab.llm.configure() 설정을 그대로 활용.
+사용법::
+    import dartlab
+    dartlab.llm.configure(provider="ollama", model="llama3.2")
+    c = dartlab.Company("005930")
+    # 요약
+    dartlab.llm.ai.summarize(c.IS)
+    # 계정 해석
+    dartlab.llm.ai.interpret_accounts(c.BS)
+    # 이상치 탐지
+    dartlab.llm.ai.detect_anomalies(c.dividend)
+    # 텍스트 분류
+    dartlab.llm.ai.classify_text(c.mdna)
+"""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+import polars as pl
+from dartlab.ai.metadata import get_meta
+_AI_PARSER_ERRORS = (ImportError, OSError, RuntimeError, TypeError, ValueError)
+# ══════════════════════════════════════
+# 내부 LLM 호출
+# ══════════════════════════════════════
+def _llm_call(prompt: str, system: str = "") -> str:
+    """내부 LLM 호출. 글로벌 설정된 provider 사용."""
+    from dartlab.ai import get_config
+    from dartlab.ai.providers import create_provider
+    config = get_config()
+    provider = create_provider(config)
+    messages = []
+    if system:
+        messages.append({"role": "system", "content": system})
+    messages.append({"role": "user", "content": prompt})
+    response = provider.complete(messages)
+    return response.answer
+# ══════════════════════════════════════
+# 요약
+# ══════════════════════════════════════
+def summarize(
+    data: pl.DataFrame | str | list,
+    *,
+    module_name: str | None = None,
+    lang: str = "ko",
+) -> str:
+    """DataFrame, 텍스트, 또는 리스트를 2~5문장으로 요약.
+    Args:
+            data: DataFrame (마크다운 변환 후 요약), str (직접 요약), list (결합 후 요약)
+            module_name: 메타데이터 활용을 위한 모듈명
+            lang: "ko" 또는 "en"
+    Returns:
+            요약 텍스트 (2~5문장)
+    """
+    from dartlab.ai.context.builder import df_to_markdown
+    # 데이터 → 텍스트
+    if isinstance(data, pl.DataFrame):
+        meta = get_meta(module_name) if module_name else None
+        text = df_to_markdown(data, meta=meta)
+    elif isinstance(data, list):
+        parts = []
+        for item in data[:10]:
+            if hasattr(item, "title") and hasattr(item, "text"):
+                parts.append(f"[{item.title}]\n{item.text[:500]}")
+            else:
+                parts.append(str(item)[:500])
+        text = "\n\n".join(parts)
+    else:
+        text = str(data)[:3000]
+    # 메타데이터 컨텍스트
+    context = ""
+    if module_name:
+        meta = get_meta(module_name)
+        if meta:
+            context = f"이 데이터는 '{meta.label}'입니다. {meta.description}\n\n"
+    system = "한국어로 답변하세요." if lang == "ko" else "Answer in English."
+    prompt = (
+        f"{context}"
+        f"다음 데이터를 2~5문장으로 핵심만 요약하세요.\n"
+        f"수치를 구체적으로 인용하고, 주요 추세와 특이사항을 포함하세요.\n\n"
+        f"{text}"
+    )
+    return _llm_call(prompt, system=system)
+# ══════════════════════════════════════
+# 계정 해석
+# ══════════════════════════════════════
+def interpret_accounts(
+    df: pl.DataFrame,
+    *,
+    account_col: str = "계정명",
+    module_name: str | None = None,
+) -> pl.DataFrame:
+    """재무제표에 '설명' 컬럼 추가. 각 계정명의 의미를 LLM이 해석.
+    LLM 1회 호출로 전체 계정 일괄 해석 (개별 호출 아님).
+    Args:
+            df: 계정명 컬럼이 있는 재무제표 DataFrame
+            account_col: 계정명 컬럼명
+            module_name: "BS", "IS", "CF" 등
+    Returns:
+            원본 + '설명' 컬럼이 추가된 DataFrame
+    """
+    if account_col not in df.columns:
+        return df
+    accounts = df[account_col].to_list()
+    if not accounts:
+        return df
+    # 유일한 계정명만 추출
+    unique_accounts = list(dict.fromkeys(accounts))
+    module_hint = ""
+    if module_name:
+        meta = get_meta(module_name)
+        if meta:
+            module_hint = f"이 데이터는 '{meta.label}'({meta.description})입니다.\n"
+    prompt = (
+        f"{module_hint}"
+        f"다음 K-IFRS 계정명 각각에 대해 한 줄(20��� 이내)로 설명하세요.\n"
+        f"형식: 계정명: 설명\n\n" + "\n".join(unique_accounts)
+    )
+    answer = _llm_call(prompt, system="한국어로 답변하세요. 각 계정에 대해 간결하게 설명만 하세요.")
+    # 응답 파싱: "계정명: 설명" 형태
+    desc_map: dict[str, str] = {}
+    for line in answer.strip().split("\n"):
+        line = line.strip().lstrip("- ").lstrip("· ")
+        if ":" in line:
+            parts = line.split(":", 1)
+            key = parts[0].strip()
+            val = parts[1].strip()
+            desc_map[key] = val
+    # 매핑
+    descriptions = []
+    for acct in accounts:
+        desc = desc_map.get(acct, "")
+        if not desc:
+            # 부분 매칭 시도
+            for k, v in desc_map.items():
+                if k in acct or acct in k:
+                    desc = v
+                    break
+        descriptions.append(desc)
+    return df.with_columns(pl.Series("설명", descriptions))
+# ══════════════════════════════════════
+# 이상치 탐지
+# ══════════════════════════════════════
+@dataclass
+class Anomaly:
+    """탐지된 이상치."""
+    column: str
+    year: str
+    value: Any
+    prev_value: Any
+    change_pct: float | None
+    anomaly_type: str  # "spike", "sign_reversal", "outlier", "missing"
+    severity: str = "medium"  # "high", "medium", "low"
+    description: str = ""
+def _statistical_prescreen(
+    df: pl.DataFrame,
+    *,
+    year_col: str = "year",
+    threshold_pct: float = 50.0,
+) -> list[Anomaly]:
+    """순수 통계 기반 이상치 사전 탐지 (LLM 없이 동작).
+    탐지 기준:
+    - YoY 변동 threshold_pct% 초과
+    - 부호 반전 (양→음, 음→양)
+    - 2σ 이탈
+    """
+    if year_col not in df.columns:
+        return []
+    df_sorted = df.sort(year_col)
+    numeric_cols = [
+        c for c in df.columns if c != year_col and df[c].dtype in (pl.Float64, pl.Float32, pl.Int64, pl.Int32)
+    ]
+    anomalies = []
+    years = df_sorted[year_col].to_list()
+    for col in numeric_cols:
+        values = df_sorted[col].to_list()
+        non_null = [v for v in values if v is not None]
+        if len(non_null) < 2:
+            continue
+        mean_val = sum(non_null) / len(non_null)
+        if len(non_null) > 1:
+            variance = sum((v - mean_val) ** 2 for v in non_null) / (len(non_null) - 1)
+            std_val = variance**0.5
+        else:
+            std_val = 0
+        for i in range(1, len(values)):
+            cur = values[i]
+            prev = values[i - 1]
+            if cur is None or prev is None:
+                continue
+            # YoY 변동
+            if prev != 0:
+                change = (cur - prev) / abs(prev) * 100
+                if abs(change) > threshold_pct:
+                    severity = "high" if abs(change) > 100 else "medium"
+                    anomalies.append(
+                        Anomaly(
+                            column=col,
+                            year=str(years[i]),
+                            value=cur,
+                            prev_value=prev,
+                            change_pct=round(change, 1),
+                            anomaly_type="spike",
+                            severity=severity,
+                        )
+                    )
+            # 부호 반전
+            if (prev > 0 and cur < 0) or (prev < 0 and cur > 0):
+                anomalies.append(
+                    Anomaly(
+                        column=col,
+                        year=str(years[i]),
+                        value=cur,
+                        prev_value=prev,
+                        change_pct=None,
+                        anomaly_type="sign_reversal",
+                        severity="high",
+                    )
+                )
+            # 2σ 이탈
+            if std_val > 0 and abs(cur - mean_val) > 2 * std_val:
+                anomalies.append(
+                    Anomaly(
+                        column=col,
+                        year=str(years[i]),
+                        value=cur,
+                        prev_value=None,
+                        change_pct=None,
+                        anomaly_type="outlier",
+                        severity="medium",
+                    )
+                )
+    # 중복 제거 (같은 year+column)
+    seen = set()
+    unique = []
+    for a in anomalies:
+        key = (a.column, a.year, a.anomaly_type)
+        if key not in seen:
+            seen.add(key)
+            unique.append(a)
+    return unique
+def detect_anomalies(
+    df: pl.DataFrame,
+    *,
+    module_name: str | None = None,
+    year_col: str = "year",
+    threshold_pct: float = 50.0,
+    use_llm: bool = True,
+) -> list[Anomaly]:
+    """2단계 이상치 탐지.
+    Stage 1: 통계 사전스크리닝 (LLM 없이 항상 동작)
+    Stage 2: LLM 해석 (use_llm=True이고 LLM 설정 시)
+    Args:
+            df: 시계열 DataFrame
+            module_name: 모듈명 (메타데이터 활용)
+            threshold_pct: YoY 변동 임계값 (%)
+            use_llm: True면 LLM으로 해석 추가
+    Returns:
+            Anomaly 리스트 (severity 내림차순)
+    """
+    anomalies = _statistical_prescreen(df, year_col=year_col, threshold_pct=threshold_pct)
+    if not anomalies:
+        return []
+    # Stage 2: LLM 해석
+    if use_llm and anomalies:
+        try:
+            meta_ctx = ""
+            if module_name:
+                meta = get_meta(module_name)
+                if meta:
+                    meta_ctx = f"데이터: {meta.label} ({meta.description})\n"
+            lines = []
+            for a in anomalies[:10]:  # 최대 10개만
+                if a.anomaly_type == "spike":
+                    lines.append(
+                        f"- {a.column} {a.year}년: {a.prev_value:,.0f} → {a.value:,.0f} (YoY {a.change_pct:+.1f}%)"
+                    )
+                elif a.anomaly_type == "sign_reversal":
+                    lines.append(f"- {a.column} {a.year}년: 부호 반전 {a.prev_value:,.0f} → {a.value:,.0f}")
+                elif a.anomaly_type == "outlier":
+                    lines.append(f"- {a.column} {a.year}년: 이상치 {a.value:,.0f}")
+            prompt = (
+                f"{meta_ctx}"
+                f"다음 재무 데이터 이상치들에 대해 각각 한 줄로 가능한 원인을 설명하세요.\n\n" + "\n".join(lines)
+            )
+            answer = _llm_call(prompt, system="한국어로 간결하게 답변하세요.")
+            # 응답에서 설명 추출하여 anomalies에 매핑
+            desc_lines = [l.strip().lstrip("- ").lstrip("· ") for l in answer.strip().split("\n") if l.strip()]
+            for i, a in enumerate(anomalies[:10]):
+                if i < len(desc_lines):
+                    a.description = desc_lines[i]
+        except _AI_PARSER_ERRORS:
+            # LLM 실패 시 통계 결과만 반환
+            pass
+    # severity 정렬
+    severity_order = {"high": 0, "medium": 1, "low": 2}
+    anomalies.sort(key=lambda a: severity_order.get(a.severity, 1))
+    return anomalies
+# ══════════════════════════════════════
+# 텍스트 분류
+# ══════════════════════════════════════
+def classify_text(text: str) -> dict:
+    """공시 텍스트에서 감성, 핵심토픽, 리스크, 기회 추출.
+    MD&A, 사업의 내용 등 서술형 텍스트를 구조화된 분석 결과로 변환.
+    Returns:
+            {
+                    "sentiment": "긍정" | "부정" | "중립",
+                    "key_topics": list[str],
+                    "risks": list[str],
+                    "opportunities": list[str],
+                    "summary": str,
+            }
+    """
+    if not text:
+        return {
+            "sentiment": "중립",
+            "key_topics": [],
+            "risks": [],
+            "opportunities": [],
+            "summary": "",
+        }
+    # 텍스트 길이 제한
+    truncated = text[:3000] if len(text) > 3000 else text
+    prompt = (
+        "다음 공시 텍스트를 분석하여 아래 형식으로 답변하세요.\n\n"
+        "감성: (긍정/부정/중립)\n"
+        "핵심토픽: (쉼표로 구분, 3~5개)\n"
+        "리스크: (쉼표로 구분)\n"
+        "기회: (쉼표로 구분)\n"
+        "요약: (2~3문장)\n\n"
+        f"텍스트:\n{truncated}"
+    )
+    answer = _llm_call(prompt, system="한국어로 답변하세요. 주어진 형식을 정확히 따르세요.")
+    # 응답 파싱
+    result = {
+        "sentiment": "중립",
+        "key_topics": [],
+        "risks": [],
+        "opportunities": [],
+        "summary": "",
+    }
+    for line in answer.strip().split("\n"):
+        line = line.strip()
+        if line.startswith("감성:"):
+            val = line.split(":", 1)[1].strip()
+            if "긍정" in val:
+                result["sentiment"] = "긍정"
+            elif "부정" in val:
+                result["sentiment"] = "부정"
+            else:
+                result["sentiment"] = "중립"
+        elif line.startswith("핵심토픽:"):
+            val = line.split(":", 1)[1].strip()
+            result["key_topics"] = [t.strip() for t in val.split(",") if t.strip()]
+        elif line.startswith("리스크:"):
+            val = line.split(":", 1)[1].strip()
+            result["risks"] = [t.strip() for t in val.split(",") if t.strip()]
+        elif line.startswith("기회:"):
+            val = line.split(":", 1)[1].strip()
+            result["opportunities"] = [t.strip() for t in val.split(",") if t.strip()]
+        elif line.startswith("요약:"):
+            result["summary"] = line.split(":", 1)[1].strip()
+    return result
+# ��═════════════════════════════════════
+# 통합 분석
+# ══════════════════════════════════════
+def analyze_module(
+    company: Any,
+    module_name: str,
+) -> dict:
+    """단일 모듈 전체 AI 분석.
+    summarize + detect_anomalies + (interpret_accounts if applicable) 일괄 실행.
+    Returns:
+            {
+                    "summary": str,
+                    "anomalies": list[Anomaly],
+                    "interpreted_df": pl.DataFrame | None,
+            }
+    """
+    data = getattr(company, module_name, None)
+    if data is None:
+        return {"summary": "데이터 없음", "anomalies": [], "interpreted_df": None}
+    result: dict[str, Any] = {}
+    # 요약
+    result["summary"] = summarize(data, module_name=module_name)
+    # 이상치 탐지 (DataFrame인 경우만)
+    if isinstance(data, pl.DataFrame):
+        result["anomalies"] = detect_anomalies(data, module_name=module_name)
+    else:
+        result["anomalies"] = []
+    # 계정 해석 (BS/IS/CF만)
+    if module_name in ("BS", "IS", "CF") and isinstance(data, pl.DataFrame) and "계정명" in data.columns:
+        result["interpreted_df"] = interpret_accounts(data, module_name=module_name)
+    else:
+        result["interpreted_df"] = None
+    return result

src/dartlab/ai/context/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""AI context package."""
+from . import builder as _builder
+from . import company_adapter as _company_adapter
+from . import dartOpenapi as _dart_openapi
+from . import snapshot as _snapshot
+for _module in (_builder, _snapshot, _company_adapter, _dart_openapi):
+    globals().update({name: getattr(_module, name) for name in dir(_module) if not name.startswith("__")})

src/dartlab/ai/context/builder.py ADDED Viewed

	@@ -0,0 +1,1960 @@

+"""Company 데이터를 LLM context로 변환.
+메타데이터 기반 컬럼 설명, 파생 지표 자동계산, 분석 힌트를 포함하여
+LLM이 정확하게 분석할 수 있는 구조화된 마크다운 컨텍스트를 생성한다.
+분할 모듈:
+- formatting.py: DataFrame 마크다운 변환, 포맷팅, 파생 지표 계산
+- finance_context.py: 재무/공시 데이터 → LLM 컨텍스트 마크다운 생성
+"""
+from __future__ import annotations
+import re
+from typing import Any
+import polars as pl
+from dartlab.ai.context.company_adapter import get_headline_ratios
+from dartlab.ai.context.finance_context import (
+    _QUESTION_ACCOUNT_FILTER,
+    _QUESTION_MODULES,  # noqa: F401 — re-export for tests
+    _build_finance_engine_section,
+    _build_ratios_section,
+    _build_report_sections,
+    _buildQuarterlySection,
+    _detect_year_hint,
+    _get_quarter_counts,
+    _resolve_module_data,
+    _topic_name_set,
+    detect_year_range,
+    scan_available_modules,
+)
+from dartlab.ai.context.formatting import (
+    _compute_derived_metrics,
+    _filter_key_accounts,
+    _format_usd,
+    _format_won,
+    _get_sector,  # noqa: F401 — re-export for runtime/core.py
+    df_to_markdown,
+)
+from dartlab.ai.metadata import MODULE_META
+_CONTEXT_ERRORS = (AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError)
+_ROUTE_FINANCE_TYPES = frozenset({"건전성", "수익성", "성장성", "자본"})
+_ROUTE_SECTIONS_TYPES = frozenset({"사업", "리스크", "공시"})
+_ROUTE_REPORT_KEYWORDS: dict[str, str] = {
+    "배당": "dividend",
+    "직원": "employee",
+    "임원": "executive",
+    "최대주주": "majorHolder",
+    "주주": "majorHolder",
+    "감사": "audit",
+    "자기주식": "treasuryStock",
+}
+_ROUTE_SECTIONS_KEYWORDS = frozenset(
+    {
+        "공시",
+        "사업",
+        "리스크",
+        "관계사",
+        "지배구조",
+        "근거",
+        "변화",
+        "최근 공시",
+        "무슨 사업",
+        "뭐하는",
+        "어떤 회사",
+        "ESG",
+        "환경",
+        "사회적 책임",
+        "탄소",
+        "기후",
+        "공급망",
+        "공급사",
+        "고객 집중",
+        "변화 감지",
+        "무엇이 달라",
+        "공시 변경",
+    }
+)
+_ROUTE_HYBRID_KEYWORDS = frozenset({"종합", "전반", "전체", "비교", "밸류에이션", "적정 주가", "목표가", "DCF"})
+_ROUTE_FINANCE_KEYWORDS = frozenset(
+    {
+        "재무",
+        "영업이익",
+        "영업이익률",
+        "매출",
+        "순이익",
+        "실적",
+        "현금흐름",
+        "부채",
+        "자산",
+        "수익성",
+        "건전성",
+        "성장성",
+        "이익률",
+        "마진",
+        "revenue",
+        "profit",
+        "margin",
+        "cash flow",
+        "cashflow",
+        "debt",
+        "asset",
+    }
+)
+_ROUTE_REPORT_FINANCE_HINTS = frozenset(
+    {
+        "지속 가능",
+        "지속가능",
+        "지속성",
+        "현금흐름",
+        "현금",
+        "실적",
+        "영업이익",
+        "순이익",
+        "커버",
+        "판단",
+        "평가",
+        "가능한지",
+    }
+)
+_ROUTE_DISTRESS_KEYWORDS = frozenset(
+    {
+        "부실",
+        "부실 징후",
+        "위기 징후",
+        "재무 위기",
+        "유동성 위기",
+        "자금 압박",
+        "상환 부담",
+        "이자보상",
+        "존속 가능",
+        "going concern",
+        "distress",
+    }
+)
+_SUMMARY_REQUEST_KEYWORDS = frozenset({"종합", "전반", "전체", "요약", "개괄", "한눈에"})
+_QUARTERLY_HINTS = frozenset(
+    {
+        "분기",
+        "분기별",
+        "quarterly",
+        "quarter",
+        "Q1",
+        "Q2",
+        "Q3",
+        "Q4",
+        "1분기",
+        "2분기",
+        "3분기",
+        "4분기",
+        "반기",
+        "반기별",
+        "QoQ",
+        "전분기",
+    }
+)
+def _detectGranularity(question: str) -> str:
+    """질문에서 시간 단위 감지: 'quarterly' | 'annual'."""
+    if any(k in question for k in _QUARTERLY_HINTS):
+        return "quarterly"
+    return "annual"
+_SECTIONS_TYPE_DEFAULTS: dict[str, list[str]] = {
+    "사업": ["businessOverview", "productService", "salesOrder"],
+    "리스크": ["riskDerivative", "contingentLiability", "internalControl"],
+    "공시": ["disclosureChanges", "subsequentEvents", "otherReference"],
+    "지배구조": ["governanceOverview", "boardOfDirectors", "holderOverview"],
+}
+_SECTIONS_KEYWORD_TOPICS: dict[str, list[str]] = {
+    "관계사": ["affiliateGroupDetail", "subsidiaryDetail", "investedCompany"],
+    "지배구조": ["governanceOverview", "boardOfDirectors", "holderOverview"],
+    "무슨 사업": ["businessOverview", "productService"],
+    "뭐하는": ["businessOverview", "productService"],
+    "어떤 회사": ["businessOverview", "companyHistory"],
+    "최근 공시": ["disclosureChanges", "subsequentEvents"],
+    "변화": ["disclosureChanges", "businessStatus"],
+    "ESG": ["governanceOverview", "boardOfDirectors"],
+    "환경": ["businessOverview"],
+    "공급망": ["segments", "rawMaterial"],
+    "공급사": ["segments", "rawMaterial"],
+    "변화 감지": ["disclosureChanges", "businessStatus"],
+}
+_FINANCIAL_ONLY = {"BS", "IS", "CF", "fsSummary", "ratios"}
+_SECTIONS_ROUTE_EXCLUDE_TOPICS = {
+    "fsSummary",
+    "financialStatements",
+    "financialNotes",
+    "consolidatedStatements",
+    "consolidatedNotes",
+    "dividend",
+    "employee",
+    "majorHolder",
+    "audit",
+}
+_FINANCE_STATEMENT_MODULES = frozenset({"BS", "IS", "CF", "CIS", "SCE"})
+_FINANCE_CONTEXT_MODULES = _FINANCE_STATEMENT_MODULES | {"ratios"}
+_BALANCE_SHEET_HINTS = frozenset({"부채", "자산", "유동", "차입", "자본", "레버리지", "건전성", "안전"})
+_CASHFLOW_HINTS = frozenset({"현금흐름", "현금", "fcf", "자금", "커버", "배당지급", "지속 가능", "지속가능"})
+_INCOME_STATEMENT_HINTS = frozenset(
+    {"매출", "영업이익", "순이익", "수익", "마진", "이익률", "실적", "원가", "비용", "판관비"}
+)
+_RATIO_HINTS = frozenset({"비율", "마진", "이익률", "수익성", "건전성", "성장성", "안정성", "지속 가능", "지속가능"})
+_DIRECT_HINT_MAP: dict[str, list[str]] = {
+    "성격별 비용": ["costByNature"],
+    "비용의 성격": ["costByNature"],
+    "인건비": ["costByNature"],
+    "감가상각": ["costByNature"],
+    "광고선전비": ["costByNature"],
+    "판매촉진비": ["costByNature"],
+    "지급수수료": ["costByNature"],
+    "운반비": ["costByNature"],
+    "물류비": ["costByNature"],
+    "연구개발": ["rnd"],
+    "r&d": ["rnd"],
+    "세그먼트": ["segments"],
+    "부문정보": ["segments"],
+    "사업부문": ["segments"],
+    "부문별": ["segments"],
+    "제품별": ["productService"],
+    "서비스별": ["productService"],
+}
+_CANDIDATE_ALIASES = {
+    "segment": "segments",
+    "operationalAsset": "tangibleAsset",
+}
+_MARGIN_DRIVER_MARGIN_HINTS = frozenset({"영업이익률", "마진", "이익률", "margin"})
+_MARGIN_DRIVER_COST_HINTS = frozenset({"비용 구조", "원가 구조", "비용", "원가", "판관비", "매출원가"})
+_MARGIN_DRIVER_BUSINESS_HINTS = frozenset({"사업 변화", "사업변화", "사업 구조", "사업구조"})
+_RECENT_DISCLOSURE_BUSINESS_HINTS = frozenset({"사업 변화", "사업변화", "사업 구조", "사업구조"})
+_PERIOD_COLUMN_RE = re.compile(r"^\d{4}(?:Q[1-4])?$")
+def _section_key_to_module_name(key: str) -> str:
+    if key.startswith("report_"):
+        return key.removeprefix("report_")
+    if key.startswith("module_"):
+        return key.removeprefix("module_")
+    if key.startswith("section_"):
+        return key.removeprefix("section_")
+    return key
+def _module_name_to_section_keys(name: str) -> list[str]:
+    return [
+        name,
+        f"report_{name}",
+        f"module_{name}",
+        f"section_{name}",
+    ]
+def _build_module_section(name: str, data: Any, *, compact: bool, max_rows: int | None = None) -> str | None:
+    meta = MODULE_META.get(name)
+    label = meta.label if meta else name
+    max_rows_value = max_rows or (8 if compact else 15)
+    if isinstance(data, pl.DataFrame):
+        if data.is_empty():
+            return None
+        md = df_to_markdown(data, max_rows=max_rows_value, meta=meta, compact=True)
+        return f"\n## {label}\n{md}"
+    if isinstance(data, dict):
+        items = list(data.items())[:max_rows_value]
+        lines = [f"\n## {label}"]
+        lines.extend(f"- {k}: {v}" for k, v in items)
+        return "\n".join(lines)
+    if isinstance(data, list):
+        max_items = min(meta.maxRows if meta else 10, 5 if compact else 10)
+        lines = [f"\n## {label}"]
+        for item in data[:max_items]:
+            if hasattr(item, "title") and hasattr(item, "chars"):
+                lines.append(f"- **{item.title}** ({item.chars}자)")
+            else:
+                lines.append(f"- {item}")
+        if len(data) > max_items:
+            lines.append(f"(... 상위 {max_items}건, 전체 {len(data)}건)")
+        return "\n".join(lines)
+    text = str(data).strip()
+    if not text:
+        return None
+    max_text = 500 if compact else 1000
+    return f"\n## {label}\n{text[:max_text]}"
+def _resolve_context_route(
+    question: str,
+    *,
+    include: list[str] | None,
+    q_types: list[str],
+) -> str:
+    if include:
+        return "hybrid"
+    if _detectGranularity(question) == "quarterly":
+        return "hybrid"
+    if _has_margin_driver_pattern(question):
+        return "hybrid"
+    if _has_distress_pattern(question):
+        return "finance"
+    if _has_recent_disclosure_business_pattern(question):
+        return "sections"
+    question_lower = question.lower()
+    q_set = set(q_types)
+    has_report = any(keyword in question for keyword in _ROUTE_REPORT_KEYWORDS)
+    has_sections = any(keyword in question for keyword in _ROUTE_SECTIONS_KEYWORDS) or bool(
+        q_set & _ROUTE_SECTIONS_TYPES
+    )
+    has_finance_keyword = any(keyword in question_lower for keyword in _ROUTE_FINANCE_KEYWORDS)
+    has_finance = has_finance_keyword or bool(q_set & _ROUTE_FINANCE_TYPES)
+    has_report_finance_hint = any(keyword in question for keyword in _ROUTE_REPORT_FINANCE_HINTS)
+    if has_report and (has_finance_keyword or has_sections or has_report_finance_hint):
+        return "hybrid"
+    for keyword in _ROUTE_REPORT_KEYWORDS:
+        if keyword in question:
+            return "report"
+    if has_sections:
+        return "sections"
+    if q_set and q_set.issubset(_ROUTE_FINANCE_TYPES):
+        return "finance"
+    if has_finance:
+        return "finance"
+    if q_set and len(q_set) > 1:
+        return "hybrid"
+    if q_set & {"종합"}:
+        return "hybrid"
+    if any(keyword in question for keyword in _ROUTE_HYBRID_KEYWORDS):
+        return "hybrid"
+    return "finance" if q_set else "hybrid"
+def _append_unique(items: list[str], value: str | None) -> None:
+    if value and value not in items:
+        items.append(value)
+def _normalize_candidate_module(name: str) -> str:
+    return _CANDIDATE_ALIASES.get(name, name)
+def _question_has_any(question: str, keywords: set[str] | frozenset[str]) -> bool:
+    lowered = question.lower()
+    return any(keyword.lower() in lowered for keyword in keywords)
+def _has_distress_pattern(question: str) -> bool:
+    return _question_has_any(question, _ROUTE_DISTRESS_KEYWORDS)
+def _has_margin_driver_pattern(question: str) -> bool:
+    return (
+        _question_has_any(question, _MARGIN_DRIVER_MARGIN_HINTS)
+        and _question_has_any(question, _MARGIN_DRIVER_COST_HINTS)
+        and _question_has_any(question, _MARGIN_DRIVER_BUSINESS_HINTS)
+    )
+def _has_recent_disclosure_business_pattern(question: str) -> bool:
+    lowered = question.lower()
+    return "최근 공시" in lowered and _question_has_any(question, _RECENT_DISCLOSURE_BUSINESS_HINTS)
+def _resolve_direct_hint_modules(question: str) -> list[str]:
+    selected: list[str] = []
+    lowered = question.lower()
+    for keyword, modules in _DIRECT_HINT_MAP.items():
+        if keyword.lower() in lowered:
+            for module_name in modules:
+                _append_unique(selected, _normalize_candidate_module(module_name))
+    return selected
+def _apply_question_specific_boosts(question: str, selected: list[str]) -> None:
+    if _has_distress_pattern(question):
+        for module_name in ("BS", "IS", "CF", "ratios"):
+            _append_unique(selected, module_name)
+    if _has_margin_driver_pattern(question):
+        for module_name in ("IS", "costByNature", "businessOverview", "productService"):
+            _append_unique(selected, module_name)
+    if _has_recent_disclosure_business_pattern(question):
+        for module_name in ("businessOverview", "productService"):
+            _append_unique(selected, module_name)
+def _resolve_candidate_modules(
+    question: str,
+    *,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> list[str]:
+    selected: list[str] = []
+    if include:
+        for name in include:
+            _append_unique(selected, _normalize_candidate_module(name))
+    else:
+        for module_name in _resolve_direct_hint_modules(question):
+            _append_unique(selected, module_name)
+        for name in _resolve_tables(question, None, exclude):
+            _append_unique(selected, _normalize_candidate_module(name))
+    _apply_question_specific_boosts(question, selected)
+    if exclude:
+        excluded = {_normalize_candidate_module(name) for name in exclude}
+        selected = [name for name in selected if name not in excluded]
+    specific_modules = set(selected) - (_FINANCE_CONTEXT_MODULES | {"fsSummary"})
+    if specific_modules and not _question_has_any(question, _SUMMARY_REQUEST_KEYWORDS):
+        selected = [name for name in selected if name != "fsSummary"]
+    return selected
+def _available_sections_topics(company: Any) -> set[str]:
+    docs = getattr(company, "docs", None)
+    sections = getattr(docs, "sections", None)
+    if sections is None:
+        return set()
+    manifest = sections.outline() if hasattr(sections, "outline") else None
+    if isinstance(manifest, pl.DataFrame) and "topic" in manifest.columns:
+        return {topic for topic in manifest["topic"].drop_nulls().to_list() if isinstance(topic, str) and topic}
+    if hasattr(sections, "topics"):
+        try:
+            return {topic for topic in sections.topics() if isinstance(topic, str) and topic}
+        except _CONTEXT_ERRORS:
+            return set()
+    return set()
+def _available_report_modules(company: Any) -> set[str]:
+    report = getattr(company, "report", None)
+    if report is None:
+        return set()
+    for attr_name in ("availableApiTypes", "apiTypes"):
+        try:
+            values = getattr(report, attr_name, None)
+        except _CONTEXT_ERRORS:
+            values = None
+        if isinstance(values, list):
+            return {str(value) for value in values if isinstance(value, str) and value}
+    return set()
+def _available_notes_modules(company: Any) -> set[str]:
+    notes = getattr(company, "notes", None)
+    if notes is None:
+        docs = getattr(company, "docs", None)
+        notes = getattr(docs, "notes", None) if docs is not None else None
+    if notes is None or not hasattr(notes, "keys"):
+        return set()
+    try:
+        return {str(value) for value in notes.keys() if isinstance(value, str) and value}
+    except _CONTEXT_ERRORS:
+        return set()
+def _resolve_candidate_plan(
+    company: Any,
+    question: str,
+    *,
+    route: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> dict[str, list[str]]:
+    requested = _resolve_candidate_modules(question, include=include, exclude=exclude)
+    sections_set = _available_sections_topics(company) if route in {"sections", "hybrid"} else set()
+    report_set = _available_report_modules(company) if route in {"report", "hybrid"} else set()
+    notes_set = _available_notes_modules(company) if route == "hybrid" else set()
+    explicit_direct = set(_resolve_direct_hint_modules(question))
+    boosted_direct: list[str] = []
+    _apply_question_specific_boosts(question, boosted_direct)
+    explicit_direct.update(name for name in boosted_direct if name not in _FINANCE_CONTEXT_MODULES)
+    if include:
+        explicit_direct.update(_normalize_candidate_module(name) for name in include)
+    sections: list[str] = []
+    report: list[str] = []
+    finance: list[str] = []
+    direct: list[str] = []
+    verified: list[str] = []
+    for name in requested:
+        normalized = _normalize_candidate_module(name)
+        if normalized in _FINANCE_CONTEXT_MODULES:
+            if route in {"finance", "hybrid"}:
+                _append_unique(finance, normalized)
+                _append_unique(verified, normalized)
+            continue
+        if normalized in sections_set and normalized not in _SECTIONS_ROUTE_EXCLUDE_TOPICS:
+            _append_unique(sections, normalized)
+            _append_unique(verified, normalized)
+            continue
+        if normalized in report_set:
+            _append_unique(report, normalized)
+            _append_unique(verified, normalized)
+            continue
+        if normalized in notes_set and normalized in explicit_direct:
+            _append_unique(direct, normalized)
+            _append_unique(verified, normalized)
+            continue
+        if normalized in explicit_direct:
+            data = _resolve_module_data(company, normalized)
+            if data is not None:
+                _append_unique(direct, normalized)
+                _append_unique(verified, normalized)
+    return {
+        "requested": requested,
+        "sections": sections,
+        "report": report,
+        "finance": finance,
+        "direct": direct,
+        "verified": verified,
+    }
+def _resolve_finance_modules_for_question(
+    question: str,
+    *,
+    q_types: list[str],
+    route: str,
+    candidate_plan: dict[str, list[str]],
+) -> list[str]:
+    selected: list[str] = []
+    finance_candidates = [name for name in candidate_plan.get("finance", []) if name in _FINANCE_STATEMENT_MODULES]
+    if _has_margin_driver_pattern(question):
+        _append_unique(selected, "IS")
+    if route == "finance":
+        if _question_has_any(question, _INCOME_STATEMENT_HINTS):
+            _append_unique(selected, "IS")
+        if _question_has_any(question, _BALANCE_SHEET_HINTS):
+            _append_unique(selected, "BS")
+        if _question_has_any(question, _CASHFLOW_HINTS):
+            _append_unique(selected, "CF")
+        if not selected:
+            selected.extend(["IS", "BS", "CF"])
+    elif route == "hybrid":
+        has_finance_signal = bool(finance_candidates) and (
+            _question_has_any(question, _BALANCE_SHEET_HINTS | _CASHFLOW_HINTS | _RATIO_HINTS)
+            or bool(set(q_types) & _ROUTE_FINANCE_TYPES)
+            or any(name in candidate_plan.get("report", []) for name in ("dividend", "shareCapital"))
+        )
+        if not has_finance_signal:
+            return []
+        for module_name in finance_candidates:
+            _append_unique(selected, module_name)
+        if not selected:
+            if _question_has_any(question, _CASHFLOW_HINTS):
+                selected.extend(["IS", "CF"])
+            elif _question_has_any(question, _BALANCE_SHEET_HINTS):
+                selected.extend(["IS", "BS"])
+            else:
+                selected.append("IS")
+    if route == "finance" or _question_has_any(question, _RATIO_HINTS) or bool(set(q_types) & _ROUTE_FINANCE_TYPES):
+        _append_unique(selected, "ratios")
+    elif route == "hybrid" and {"dividend", "shareCapital"} & set(candidate_plan.get("report", [])):
+        _append_unique(selected, "ratios")
+    return selected
+def _build_direct_module_context(
+    company: Any,
+    modules: list[str],
+    *,
+    compact: bool,
+    question: str,
+) -> dict[str, str]:
+    result: dict[str, str] = {}
+    for name in modules:
+        try:
+            data = _resolve_module_data(company, name)
+        except _CONTEXT_ERRORS:
+            data = None
+        if data is None:
+            continue
+        if isinstance(data, pl.DataFrame):
+            data = _trim_period_columns(data, question, compact=compact)
+        section = _build_module_section(name, data, compact=compact)
+        if section:
+            result[name] = section
+    return result
+def _trim_period_columns(data: pl.DataFrame, question: str, *, compact: bool) -> pl.DataFrame:
+    if data.is_empty():
+        return data
+    period_cols = [column for column in data.columns if isinstance(column, str) and _PERIOD_COLUMN_RE.fullmatch(column)]
+    if len(period_cols) <= 1:
+        return data
+    def sort_key(value: str) -> tuple[int, int]:
+        if "Q" in value:
+            year, quarter = value.split("Q", 1)
+            return int(year), int(quarter)
+        return int(value), 9
+    ordered_periods = sorted(period_cols, key=sort_key)
+    keep_periods = _detect_year_hint(question)
+    if compact:
+        keep_periods = min(keep_periods, 5)
+    else:
+        keep_periods = min(keep_periods, 8)
+    if len(ordered_periods) <= keep_periods:
+        return data
+    selected_periods = ordered_periods[-keep_periods:]
+    base_columns = [column for column in data.columns if column not in period_cols]
+    return data.select(base_columns + selected_periods)
+def _build_response_contract(
+    question: str,
+    *,
+    included_modules: list[str],
+    route: str,
+) -> str | None:
+    lines = ["## 응답 계약", "- 아래 모듈은 이미 로컬 dartlab 데이터에서 확인되어 포함되었습니다."]
+    lines.append(f"- 포함 모듈: {', '.join(included_modules)}")
+    lines.append("- 포함된 모듈을 보고도 '데이터가 없다'고 말하지 마세요.")
+    lines.append("- 핵심 결론 1~2문장을 먼저 제시하고, 바로 근거 표나 근거 bullet을 붙이세요.")
+    lines.append(
+        "- `explore()` 같은 도구 호출 계획이나 내부 절차 설명을 답변 본문에 쓰지 말고 바로 분석 결과를 말하세요."
+    )
+    lines.append(
+        "- 답변 본문에서는 `IS/BS/CF/ratios/TTM/topic/period/source` 같은 내부 약어나 필드명을 그대로 쓰지 말고 "
+        "`손익계산서/재무상태표/현금흐름표/재무비율/최근 4분기 합산/항목/시점/출처`처럼 사용자 언어로 바꾸세요."
+    )
+    lines.append(
+        "- `costByNature`, `businessOverview`, `productService` 같은 내부 모듈명도 각각 "
+        "`성격별 비용 분류`, `사업의 개요`, `제품·서비스`처럼 바꿔 쓰세요."
+    )
+    module_set = set(included_modules)
+    if "costByNature" in module_set:
+        lines.append("- `costByNature`가 있으면 상위 비용 항목 3~5개와 최근 기간 변화 방향을 먼저 요약하세요.")
+        lines.append("- 기간이 명시되지 않아도 최신 시점과 최근 추세를 먼저 답하고, 연도 기준을 다시 묻지 마세요.")
+    if "dividend" in module_set:
+        lines.append("- `dividend`가 있으면 DPS·배당수익률·배당성향을 먼저 요약하세요.")
+        lines.append(
+            "- `dividend`가 있는데도 배당 데이터가 없다고 말하지 마세요. 첫 문장이나 첫 표에서 DPS와 배당수익률을 직접 인용하세요."
+        )
+    if {"dividend", "IS", "CF"} <= module_set or {"dividend", "CF"} <= module_set:
+        lines.append("- `dividend`와 `IS/CF`가 같이 있으면 배당의 이익/현금흐름 커버 여부를 한 줄로 명시하세요.")
+    if _has_distress_pattern(question):
+        lines.append(
+            "- `부실 징후` 질문이면 건전성 결론을 먼저 말하고, 수익성·현금흐름·차입 부담 순으로 짧게 정리하세요."
+        )
+    if route == "sections" or any(keyword in question for keyword in ("근거", "왜", "최근 공시 기준", "출처")):
+        lines.append("- 근거 질문이면 `topic`, `period`, `source`를 최소 2개 명시하세요.")
+        lines.append(
+            "- `period`와 `source`는 outline 표에 나온 실제 값을 쓰세요. '최근 공시 기준' 같은 포괄 표현으로 뭉개지 마세요."
+        )
+        lines.append("- 본문에서는 `topic/period/source` 대신 `항목/시점/출처`처럼 자연어를 쓰세요.")
+    hasQuarterly = any(m.endswith("_quarterly") for m in module_set)
+    if hasQuarterly:
+        lines.append("- **분기별 데이터가 포함되었습니다. '분기 데이터가 없다'고 절대 말하지 마세요.**")
+        lines.append("- 분기별 추이를 테이블로 정리하고, 전분기 대비(QoQ)와 전년동기 대비(YoY) 변화를 함께 보여주세요.")
+        lines.append(
+            "- `IS_quarterly`, `CF_quarterly` 같은 내부명 대신 `분기별 손익계산서`, `분기별 현금흐름표`로 쓰세요."
+        )
+    # ── 도구 추천 힌트 ──
+    hasFinancial = {"IS", "BS"} <= module_set or {"IS", "CF"} <= module_set
+    if hasFinancial:
+        lines.append(
+            "- **추가 분석 추천**: `finance(action='ratios')`로 재무비율 확인, "
+            "`explore(action='search', keyword='...')`로 변화 원인 파악."
+        )
+    elif not module_set & {"IS", "BS", "CF", "ratios"}:
+        lines.append(
+            "- **재무 데이터 미포함**: `finance(action='modules')`로 사용 가능 모듈 확인, "
+            "`explore(action='topics')`로 topic 목록 확인 추천."
+        )
+    return "\n".join(lines)
+def _build_clarification_context(
+    company: Any,
+    question: str,
+    *,
+    candidate_plan: dict[str, list[str]],
+) -> str | None:
+    if _has_margin_driver_pattern(question):
+        return None
+    lowered = question.lower()
+    module_set = set(candidate_plan.get("verified", []))
+    has_cost_by_nature = "costByNature" in module_set
+    if not has_cost_by_nature and "costByNature" in set(candidate_plan.get("requested", [])):
+        try:
+            has_cost_by_nature = _resolve_module_data(company, "costByNature") is not None
+        except _CONTEXT_ERRORS:
+            has_cost_by_nature = False
+    has_is = "IS" in module_set or "IS" in set(candidate_plan.get("requested", []))
+    if not has_cost_by_nature or not has_is:
+        return None
+    if "비용" not in lowered:
+        return None
+    if any(keyword in lowered for keyword in ("성격", "인건비", "감가상각", "광고선전", "판관", "매출원가")):
+        return None
+    return (
+        "## Clarification Needed\n"
+        "- 현재 로컬에서 두 해석이 모두 가능합니다.\n"
+        "- `costByNature`: 인건비·감가상각비 같은 성격별 비용 분류\n"
+        "- `IS`: 매출원가·판관비 같은 기능별 비용 총액\n"
+        "- 사용자의 의도가 둘 중 어느 쪽인지 결론을 바꾸므로, 먼저 한 문장으로 어느 관점을 원하는지 확인하세요.\n"
+        "- 확인 질문은 한 문장만 하세요. 같은 문장을 반복하지 마세요."
+    )
+def _resolve_report_modules_for_question(
+    question: str,
+    *,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> list[str]:
+    modules: list[str] = []
+    for keyword, name in _ROUTE_REPORT_KEYWORDS.items():
+        if keyword in question and name not in modules:
+            modules.append(name)
+    if include:
+        for name in include:
+            if (
+                name in {"dividend", "employee", "majorHolder", "executive", "audit", "treasuryStock"}
+                and name not in modules
+            ):
+                modules.append(name)
+    if exclude:
+        modules = [name for name in modules if name not in exclude]
+    return modules
+def _resolve_sections_topics(
+    company: Any,
+    question: str,
+    *,
+    q_types: list[str],
+    candidates: list[str] | None = None,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+    limit: int = 2,
+) -> list[str]:
+    docs = getattr(company, "docs", None)
+    sections = getattr(docs, "sections", None)
+    if sections is None:
+        return []
+    manifest = sections.outline() if hasattr(sections, "outline") else None
+    available = (
+        manifest["topic"].drop_nulls().to_list()
+        if isinstance(manifest, pl.DataFrame) and "topic" in manifest.columns
+        else sections.topics()
+        if hasattr(sections, "topics")
+        else []
+    )
+    availableTopics = [topic for topic in available if isinstance(topic, str) and topic]
+    availableSet = set(availableTopics)
+    if not availableSet:
+        return []
+    selected: list[str] = []
+    isQuarterly = _detectGranularity(question) == "quarterly"
+    def append(topic: str) -> None:
+        if topic in _SECTIONS_ROUTE_EXCLUDE_TOPICS:
+            if not (isQuarterly and topic == "fsSummary"):
+                return
+        if topic in availableSet and topic not in selected:
+            selected.append(topic)
+    if isQuarterly:
+        append("fsSummary")
+    if include:
+        for name in include:
+            append(name)
+    if _has_recent_disclosure_business_pattern(question):
+        append("disclosureChanges")
+        append("businessOverview")
+    candidate_source = _resolve_tables(question, None, exclude) if candidates is None else candidates
+    for name in candidate_source:
+        append(name)
+    for q_type in q_types:
+        for topic in _SECTIONS_TYPE_DEFAULTS.get(q_type, []):
+            append(topic)
+    for keyword, topics in _SECTIONS_KEYWORD_TOPICS.items():
+        if keyword in question:
+            for topic in topics:
+                append(topic)
+    if candidates is None and not selected and availableTopics:
+        selected.append(availableTopics[0])
+    return selected[:limit]
+def _build_sections_context(
+    company: Any,
+    topics: list[str],
+    *,
+    compact: bool,
+) -> dict[str, str]:
+    docs = getattr(company, "docs", None)
+    sections = getattr(docs, "sections", None)
+    if sections is None:
+        return {}
+    try:
+        context_slices = getattr(docs, "contextSlices", None) if docs is not None else None
+    except _CONTEXT_ERRORS:
+        context_slices = None
+    result: dict[str, str] = {}
+    for topic in topics:
+        outline = sections.outline(topic) if hasattr(sections, "outline") else None
+        if outline is None or not isinstance(outline, pl.DataFrame) or outline.is_empty():
+            continue
+        label_fn = getattr(company, "_topicLabel", None)
+        label = label_fn(topic) if callable(label_fn) else topic
+        lines = [f"\n## {label}"]
+        lines.append(df_to_markdown(outline.head(6 if compact else 10), max_rows=6 if compact else 10, compact=True))
+        topic_slices = _select_section_slices(context_slices, topic)
+        if isinstance(topic_slices, pl.DataFrame) and not topic_slices.is_empty():
+            lines.append("\n### 핵심 근거")
+            for row in topic_slices.head(2 if compact else 4).iter_rows(named=True):
+                period = row.get("period", "-")
+                source_topic = row.get("sourceTopic") or row.get("topic") or topic
+                block_type = "표" if row.get("isTable") or row.get("blockType") == "table" else "문장"
+                slice_text = _truncate_section_slice(str(row.get("sliceText") or ""), compact=compact)
+                if not slice_text:
+                    continue
+                lines.append(f"#### 시점: {period} | 출처: {source_topic} | 유형: {block_type}")
+                lines.append(slice_text)
+        if compact:
+            if ("preview" in outline.columns) and not (
+                isinstance(topic_slices, pl.DataFrame) and not topic_slices.is_empty()
+            ):
+                preview_lines: list[str] = []
+                for row in outline.head(2).iter_rows(named=True):
+                    preview = row.get("preview")
+                    if not isinstance(preview, str) or not preview.strip():
+                        continue
+                    period = row.get("period", "-")
+                    title = row.get("title", "-")
+                    preview_lines.append(
+                        f"- period: {period} | source: docs | title: {title} | preview: {preview.strip()}"
+                    )
+                if preview_lines:
+                    lines.append("\n### 핵심 preview")
+                    lines.extend(preview_lines)
+            result[f"section_{topic}"] = "\n".join(lines)
+            continue
+        try:
+            raw_sections = sections.raw if hasattr(sections, "raw") else None
+        except _CONTEXT_ERRORS:
+            raw_sections = None
+        topic_rows = (
+            raw_sections.filter(pl.col("topic") == topic)
+            if isinstance(raw_sections, pl.DataFrame) and "topic" in raw_sections.columns
+            else None
+        )
+        block_builder = getattr(company, "_buildBlockIndex", None)
+        block_index = (
+            block_builder(topic_rows) if callable(block_builder) and isinstance(topic_rows, pl.DataFrame) else None
+        )
+        if isinstance(block_index, pl.DataFrame) and not block_index.is_empty():
+            lines.append("\n### block index")
+            lines.append(
+                df_to_markdown(block_index.head(4 if compact else 6), max_rows=4 if compact else 6, compact=True)
+            )
+            block_col = (
+                "block"
+                if "block" in block_index.columns
+                else "blockOrder"
+                if "blockOrder" in block_index.columns
+                else None
+            )
+            type_col = (
+                "type" if "type" in block_index.columns else "blockType" if "blockType" in block_index.columns else None
+            )
+            sample_block = None
+            if block_col:
+                for row in block_index.iter_rows(named=True):
+                    block_no = row.get(block_col)
+                    block_type = row.get(type_col)
+                    if isinstance(block_no, int) and block_type in {"text", "table"}:
+                        sample_block = block_no
+                        break
+            if sample_block is not None:
+                show_section_block = getattr(company, "_showSectionBlock", None)
+                block_data = (
+                    show_section_block(topic_rows, block=sample_block)
+                    if callable(show_section_block) and isinstance(topic_rows, pl.DataFrame)
+                    else None
+                )
+                section = _build_module_section(topic, block_data, compact=compact, max_rows=4 if compact else 6)
+                if section:
+                    lines.append("\n### 대표 block")
+                    lines.append(section.replace(f"\n## {label}", "", 1).strip())
+        result[f"section_{topic}"] = "\n".join(lines)
+    return result
+def _select_section_slices(context_slices: Any, topic: str) -> pl.DataFrame | None:
+    if not isinstance(context_slices, pl.DataFrame) or context_slices.is_empty():
+        return None
+    required_columns = {"topic", "periodOrder", "sliceText"}
+    if not required_columns <= set(context_slices.columns):
+        return None
+    detail_col = pl.col("detailTopic") if "detailTopic" in context_slices.columns else pl.lit(None)
+    semantic_col = pl.col("semanticTopic") if "semanticTopic" in context_slices.columns else pl.lit(None)
+    block_priority_col = pl.col("blockPriority") if "blockPriority" in context_slices.columns else pl.lit(0)
+    slice_idx_col = pl.col("sliceIdx") if "sliceIdx" in context_slices.columns else pl.lit(0)
+    matched = context_slices.filter((pl.col("topic") == topic) | (detail_col == topic) | (semantic_col == topic))
+    if matched.is_empty():
+        return None
+    return matched.with_columns(
+        pl.when(detail_col == topic)
+        .then(3)
+        .when(semantic_col == topic)
+        .then(2)
+        .when(pl.col("topic") == topic)
+        .then(1)
+        .otherwise(0)
+        .alias("matchPriority")
+    ).sort(
+        ["periodOrder", "matchPriority", "blockPriority", "sliceIdx"],
+        descending=[True, True, True, False],
+    )
+def _truncate_section_slice(text: str, *, compact: bool) -> str:
+    stripped = text.strip()
+    if not stripped:
+        return ""
+    max_chars = 500 if compact else 1200
+    if len(stripped) <= max_chars:
+        return stripped
+    return stripped[:max_chars].rstrip() + " ..."
+def build_context_by_module(
+    company: Any,
+    question: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+    compact: bool = False,
+) -> tuple[dict[str, str], list[str], str]:
+    """financeEngine 우선 compact 컨텍스트 빌더 (모듈별 분리).
+    1차: financeEngine annual + ratios (빠르고 정규화된 수치)
+    2차: docsParser 정성 데이터 (배당, 감사, 임원 등 — 질문에 맞는 것만)
+    Args:
+            compact: True면 소형 모델용으로 연도/행수 제한 (Ollama).
+    Returns:
+            (modules_dict, included_list, header_text)
+            - modules_dict: {"IS": "## 손익계산서\n...", "BS": "...", ...}
+            - included_list: ["IS", "BS", "CF", "ratios", ...]
+            - header_text: 기업명 + 데이터 기준 라인
+    """
+    from dartlab import config
+    orig_verbose = config.verbose
+    config.verbose = False
+    try:
+        return _build_compact_context_modules_inner(company, question, include, exclude, compact, orig_verbose)
+    finally:
+        config.verbose = orig_verbose
+def _build_compact_context_modules_inner(
+    company: Any,
+    question: str,
+    include: list[str] | None,
+    exclude: list[str] | None,
+    compact: bool,
+    orig_verbose: bool,
+) -> tuple[dict[str, str], list[str], str]:
+    n_years = _detect_year_hint(question)
+    if compact:
+        n_years = min(n_years, 4)
+    modules_dict: dict[str, str] = {}
+    included: list[str] = []
+    header_parts = [f"# {company.corpName} ({company.stockCode})"]
+    try:
+        detail = getattr(company, "companyOverviewDetail", None)
+        if detail and isinstance(detail, dict):
+            info_parts = []
+            if detail.get("ceo"):
+                info_parts.append(f"대표: {detail['ceo']}")
+            if detail.get("mainBusiness"):
+                info_parts.append(f"주요사업: {detail['mainBusiness']}")
+            if info_parts:
+                header_parts.append("> " + " | ".join(info_parts))
+    except _CONTEXT_ERRORS:
+        pass
+    from dartlab.ai.conversation.prompts import _classify_question_multi
+    q_types = _classify_question_multi(question, max_types=2)
+    route = _resolve_context_route(question, include=include, q_types=q_types)
+    report_modules = _resolve_report_modules_for_question(question, include=include, exclude=exclude)
+    candidate_plan = _resolve_candidate_plan(company, question, route=route, include=include, exclude=exclude)
+    selected_finance_modules = _resolve_finance_modules_for_question(
+        question,
+        q_types=q_types,
+        route=route,
+        candidate_plan=candidate_plan,
+    )
+    acct_filters: dict[str, set[str]] = {}
+    if compact:
+        for qt in q_types:
+            for sj, ids in _QUESTION_ACCOUNT_FILTER.get(qt, {}).items():
+                acct_filters.setdefault(sj, set()).update(ids)
+    statement_modules = [name for name in selected_finance_modules if name in _FINANCE_STATEMENT_MODULES]
+    if statement_modules:
+        annual = getattr(company, "annual", None)
+        if annual is not None:
+            series, years = annual
+            quarter_counts = _get_quarter_counts(company)
+            if years:
+                yr_min = years[max(0, len(years) - n_years)]
+                yr_max = years[-1]
+                header = f"\n**데이터 기준: {yr_min}~{yr_max}년** (가장 최근: {yr_max}년, 금액: 억/조원)\n"
+                partial = [y for y in years[-n_years:] if quarter_counts.get(y, 4) < 4]
+                if partial:
+                    notes = ", ".join(f"{y}년=Q1~Q{quarter_counts[y]}" for y in partial)
+                    header += (
+                        f"⚠️ **부분 연도 주의**: {notes} (해당 연도는 분기 누적이므로 전년 연간과 직접 비교 불가)\n"
+                    )
+                header_parts.append(header)
+                for sj in statement_modules:
+                    af = acct_filters.get(sj) if acct_filters and sj in {"IS", "BS", "CF"} else None
+                    section = _build_finance_engine_section(
+                        series,
+                        years,
+                        sj,
+                        n_years,
+                        af,
+                        quarter_counts=quarter_counts,
+                    )
+                    if section:
+                        modules_dict[sj] = section
+                        included.append(sj)
+        if _detectGranularity(question) == "quarterly" and statement_modules:
+            ts = getattr(company, "timeseries", None)
+            if ts is not None:
+                tsSeries, tsPeriods = ts
+                for sj in statement_modules:
+                    if sj in {"IS", "CF"}:
+                        af = acct_filters.get(sj) if acct_filters else None
+                        qSection = _buildQuarterlySection(
+                            tsSeries,
+                            tsPeriods,
+                            sj,
+                            nQuarters=8,
+                            accountFilter=af,
+                        )
+                        if qSection:
+                            qKey = f"{sj}_quarterly"
+                            modules_dict[qKey] = qSection
+                            included.append(qKey)
+        if "ratios" in selected_finance_modules:
+            ratios_section = _build_ratios_section(company, compact=compact, q_types=q_types or None)
+            if ratios_section:
+                modules_dict["ratios"] = ratios_section
+                if "ratios" not in included:
+                    included.append("ratios")
+    requested_report_modules = report_modules or candidate_plan.get("report", [])
+    if route == "report":
+        requested_report_modules = requested_report_modules or [
+            "dividend",
+            "employee",
+            "majorHolder",
+            "executive",
+            "audit",
+        ]
+        report_sections = _build_report_sections(
+            company,
+            compact=compact,
+            q_types=q_types,
+            tier="focused" if compact else "full",
+            report_names=requested_report_modules,
+        )
+        for key, section in report_sections.items():
+            modules_dict[key] = section
+            included_name = _section_key_to_module_name(key)
+            if included_name not in included:
+                included.append(included_name)
+    if route == "hybrid" and requested_report_modules:
+        report_sections = _build_report_sections(
+            company,
+            compact=compact,
+            q_types=q_types,
+            tier="focused" if compact else "full",
+            report_names=requested_report_modules,
+        )
+        for key, section in report_sections.items():
+            modules_dict[key] = section
+            included_name = _section_key_to_module_name(key)
+            if included_name not in included:
+                included.append(included_name)
+    if route in {"sections", "hybrid"}:
+        topics = _resolve_sections_topics(
+            company,
+            question,
+            q_types=q_types,
+            candidates=candidate_plan.get("sections"),
+            include=include,
+            exclude=exclude,
+            limit=1 if route == "hybrid" else 2,
+        )
+        sections_context = _build_sections_context(company, topics, compact=compact)
+        for key, section in sections_context.items():
+            modules_dict[key] = section
+            included_name = _section_key_to_module_name(key)
+            if included_name not in included:
+                included.append(included_name)
+    if route == "finance":
+        _financeSectionsTopics = ["businessStatus", "businessOverview"]
+        availableTopicSet = _topic_name_set(company)
+        lightTopics = [t for t in _financeSectionsTopics if t in availableTopicSet]
+        if lightTopics:
+            lightContext = _build_sections_context(company, lightTopics[:1], compact=True)
+            for key, section in lightContext.items():
+                modules_dict[key] = section
+                included_name = _section_key_to_module_name(key)
+                if included_name not in included:
+                    included.append(included_name)
+    direct_sections = _build_direct_module_context(
+        company,
+        candidate_plan.get("direct", []),
+        compact=compact,
+        question=question,
+    )
+    for key, section in direct_sections.items():
+        modules_dict[key] = section
+        if key not in included:
+            included.append(key)
+    response_contract = _build_response_contract(question, included_modules=included, route=route)
+    if response_contract:
+        modules_dict["_response_contract"] = response_contract
+    clarification_context = _build_clarification_context(company, question, candidate_plan=candidate_plan)
+    if clarification_context:
+        modules_dict["_clarify"] = clarification_context
+    if not modules_dict:
+        text, inc = build_context(company, question, include, exclude, compact=True)
+        return {"_full": text}, inc, ""
+    deduped_included: list[str] = []
+    for name in included:
+        if name not in deduped_included:
+            deduped_included.append(name)
+    return modules_dict, deduped_included, "\n".join(header_parts)
+def build_compact_context(
+    company: Any,
+    question: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> tuple[str, list[str]]:
+    """financeEngine 우선 compact 컨텍스트 빌더 (하위호환).
+    build_context_by_module 결과를 단일 문자열로 합쳐 반환한다.
+    """
+    modules_dict, included, header = build_context_by_module(
+        company,
+        question,
+        include,
+        exclude,
+        compact=True,
+    )
+    if "_full" in modules_dict:
+        return modules_dict["_full"], included
+    parts = [header] if header else []
+    for name in included:
+        for key in _module_name_to_section_keys(name):
+            if key in modules_dict:
+                parts.append(modules_dict[key])
+                break
+    return "\n".join(parts), included
+# ══════════════════════════════════════
+# 질문 키워드 → 자동 포함 데이터 매핑
+# ══════════════════════════════════════
+from dartlab.core.registry import buildKeywordMap
+# registry aiKeywords 자동 역인덱스 (~55 모듈 키워드)
+_KEYWORD_MAP = buildKeywordMap()
+# 재무제표 직접 매핑 (registry 범위 밖 — BS/IS/CF 등 재무 코드)
+_FINANCIAL_MAP: dict[str, list[str]] = {
+    "재무": ["BS", "IS", "CF", "fsSummary", "costByNature"],
+    "건전성": ["BS", "audit", "contingentLiability", "internalControl", "bond"],
+    "수익": ["IS", "segments", "productService", "costByNature"],
+    "실적": ["IS", "segments", "fsSummary", "productService", "salesOrder"],
+    "매출": ["IS", "segments", "productService", "salesOrder"],
+    "영업이익": ["IS", "fsSummary", "segments"],
+    "순이익": ["IS", "fsSummary"],
+    "현금": ["CF", "BS"],
+    "자산": ["BS", "tangibleAsset", "investmentInOther"],
+    "성장": ["IS", "CF", "productService", "salesOrder", "rnd"],
+    "원가": ["costByNature", "IS"],
+    "비용": ["costByNature", "IS"],
+    "배당": ["dividend", "IS", "shareCapital"],
+    "자본": ["BS", "capitalChange", "shareCapital", "fundraising"],
+    "투자": ["CF", "rnd", "subsidiary", "investmentInOther", "tangibleAsset"],
+    "부채": ["BS", "bond", "contingentLiability", "capitalChange"],
+    "리스크": ["contingentLiability", "sanction", "riskDerivative", "audit", "internalControl"],
+    "지배": ["majorHolder", "executive", "boardOfDirectors", "holderOverview"],
+}
+# 복합 분석 (여러 재무제표 조합)
+_COMPOSITE_MAP: dict[str, list[str]] = {
+    "ROE": ["IS", "BS", "fsSummary"],
+    "ROA": ["IS", "BS", "fsSummary"],
+    "PER": ["IS", "fsSummary", "dividend"],
+    "PBR": ["BS", "fsSummary"],
+    "EPS": ["IS", "fsSummary", "dividend"],
+    "EBITDA": ["IS", "CF", "fsSummary"],
+    "ESG": ["employee", "boardOfDirectors", "sanction", "internalControl"],
+    "거버넌스": ["majorHolder", "executive", "boardOfDirectors", "audit"],
+    "지배구조": ["majorHolder", "executive", "boardOfDirectors", "audit"],
+    "인력현황": ["employee", "executivePay"],
+    "주주환원": ["dividend", "shareCapital", "capitalChange"],
+    "부채위험": ["BS", "bond", "contingentLiability"],
+    "부채구조": ["BS", "bond", "contingentLiability"],
+    "종합진단": ["BS", "IS", "CF", "fsSummary", "dividend", "majorHolder", "audit", "employee"],
+    "스캔": ["BS", "IS", "dividend", "majorHolder", "audit", "employee"],
+    "전반": ["BS", "IS", "CF", "fsSummary", "audit", "majorHolder"],
+    "종합": ["BS", "IS", "CF", "fsSummary", "audit", "majorHolder"],
+    # 영문
+    "revenue": ["IS", "segments", "productService"],
+    "profit": ["IS", "fsSummary"],
+    "debt": ["BS", "bond", "contingentLiability"],
+    "cash flow": ["CF"],
+    "cashflow": ["CF"],
+    "dividend": ["dividend", "IS", "shareCapital"],
+    "growth": ["IS", "CF", "productService", "rnd"],
+    "risk": ["contingentLiability", "sanction", "riskDerivative", "audit"],
+    "audit": ["audit", "auditSystem", "internalControl"],
+    "governance": ["majorHolder", "executive", "boardOfDirectors"],
+    "employee": ["employee", "executivePay"],
+    "subsidiary": ["subsidiary", "affiliateGroup", "investmentInOther"],
+    "capex": ["CF", "tangibleAsset"],
+    "operating": ["IS", "fsSummary", "segments"],
+}
+# 자연어 질문 패턴
+_NATURAL_LANG_MAP: dict[str, list[str]] = {
+    "돈": ["BS", "CF"],
+    "벌": ["IS", "fsSummary"],
+    "잘": ["IS", "fsSummary", "segments"],
+    "위험": ["contingentLiability", "sanction", "riskDerivative", "audit", "internalControl"],
+    "안전": ["BS", "audit", "contingentLiability", "internalControl"],
+    "건강": ["BS", "IS", "CF", "audit"],
+    "전망": ["IS", "CF", "rnd", "segments", "mdna"],
+    "비교": ["IS", "BS", "CF", "fsSummary"],
+    "추세": ["IS", "BS", "CF", "fsSummary"],
+    "트렌드": ["IS", "BS", "CF", "fsSummary"],
+    "분석": ["BS", "IS", "CF", "fsSummary"],
+    "어떤 회사": ["companyOverviewDetail", "companyOverview", "business", "companyHistory"],
+    "무슨 사업": ["business", "productService", "segments", "companyOverviewDetail"],
+    "뭐하는": ["business", "productService", "segments", "companyOverviewDetail"],
+    "어떤 사업": ["business", "productService", "segments", "companyOverviewDetail"],
+}
+# 병합: registry 키워드 → 재무제표 → 복합 → 자연어 (후순위가 오버라이드)
+_TOPIC_MAP: dict[str, list[str]] = {**_KEYWORD_MAP, **_FINANCIAL_MAP, **_COMPOSITE_MAP, **_NATURAL_LANG_MAP}
+# 항상 포함되는 기본 컨텍스트
+_BASE_CONTEXT = ["fsSummary"]
+# ══════════════════════════════════════
+# 토픽 매핑
+# ══════════════════════════════════════
+def _resolve_tables(question: str, include: list[str] | None, exclude: list[str] | None) -> list[str]:
+    """질문과 include/exclude로 포함할 테이블 목록 결정.
+    개선: 대소문자 무시, 부분매칭, 복합 키워드 지원.
+    """
+    tables: list[str] = list(_BASE_CONTEXT)
+    if include:
+        tables.extend(include)
+    else:
+        q_lower = question.lower()
+        matched_count = 0
+        for keyword, table_names in _TOPIC_MAP.items():
+            # 대소문자 무시 매칭
+            if keyword.lower() in q_lower:
+                matched_count += 1
+                for t in table_names:
+                    if t not in tables:
+                        tables.append(t)
+        # 매핑 안 됐으면 기본 재무제표 포함
+        if matched_count == 0:
+            tables.extend(["BS", "IS", "CF"])
+        # 너무 많은 모듈이 매칭되면 상위 우선순위만 (토큰 절약)
+        # 핵심 모듈(BS/IS/CF/fsSummary)은 항상 유지
+        _CORE = {"fsSummary", "BS", "IS", "CF"}
+        if len(tables) > 12:
+            core = [t for t in tables if t in _CORE]
+            non_core = [t for t in tables if t not in _CORE]
+            tables = core + non_core[:8]
+    if exclude:
+        tables = [t for t in tables if t not in exclude]
+    return tables
+# ══════════════════════════════════════
+# 컨텍스트 조립
+# ══════════════════════════════════════
+def build_context(
+    company: Any,
+    question: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+    max_rows: int = 30,
+    compact: bool = False,
+) -> tuple[str, list[str]]:
+    """질문과 Company 인스턴스로부터 LLM context 텍스트 조립.
+    Args:
+            compact: True면 핵심 계정만, 억/조 단위, 간결 포맷 (소형 모델용).
+    Returns:
+            (context_text, included_table_names)
+    """
+    from dartlab.ai.context.formatting import _KEY_ACCOUNTS_MAP
+    tables_to_include = _resolve_tables(question, include, exclude)
+    # fsSummary 중복 제거: BS+IS 둘 다 있으면 fsSummary 스킵
+    if compact and "fsSummary" in tables_to_include:
+        has_bs = "BS" in tables_to_include
+        has_is = "IS" in tables_to_include
+        if has_bs and has_is:
+            tables_to_include = [t for t in tables_to_include if t != "fsSummary"]
+    from dartlab import config
+    orig_verbose = config.verbose
+    config.verbose = False
+    sections = []
+    included = []
+    sections.append(f"# {company.corpName} ({company.stockCode})")
+    try:
+        detail = getattr(company, "companyOverviewDetail", None)
+        if detail and isinstance(detail, dict):
+            info_parts = []
+            if detail.get("ceo"):
+                info_parts.append(f"대표: {detail['ceo']}")
+            if detail.get("mainBusiness"):
+                info_parts.append(f"주요사업: {detail['mainBusiness']}")
+            if detail.get("foundedDate"):
+                info_parts.append(f"설립: {detail['foundedDate']}")
+            if info_parts:
+                sections.append("> " + " | ".join(info_parts))
+    except _CONTEXT_ERRORS:
+        pass
+    year_range = detect_year_range(company, tables_to_include)
+    if year_range:
+        sections.append(
+            f"\n**데이터 기준: {year_range['min_year']}~{year_range['max_year']}년** (가장 최근: {year_range['max_year']}년)"
+        )
+        if not compact:
+            sections.append("이후 데이터는 포함되어 있지 않습니다.\n")
+    if compact:
+        sections.append("\n금액: 억/조원 표시 (원본 백만원)\n")
+    else:
+        sections.append("")
+        sections.append("모든 금액은 별도 표기 없으면 백만원(millions KRW) 단위입니다.")
+        sections.append("")
+    for name in tables_to_include:
+        try:
+            data = getattr(company, name, None)
+            if data is None:
+                continue
+            if callable(data) and not isinstance(data, type):
+                try:
+                    result = data()
+                    if hasattr(result, "FS") and isinstance(getattr(result, "FS", None), pl.DataFrame):
+                        data = result.FS
+                    elif isinstance(result, pl.DataFrame):
+                        data = result
+                    else:
+                        data = result
+                except _CONTEXT_ERRORS:
+                    continue
+            meta = MODULE_META.get(name)
+            label = meta.label if meta else name
+            desc = meta.description if meta else ""
+            section_parts = [f"\n## {label}"]
+            if not compact and desc:
+                section_parts.append(desc)
+            if isinstance(data, pl.DataFrame):
+                display_df = data
+                if compact and name in _KEY_ACCOUNTS_MAP:
+                    display_df = _filter_key_accounts(data, name)
+                md = df_to_markdown(display_df, max_rows=max_rows, meta=meta, compact=compact)
+                section_parts.append(md)
+                derived = _compute_derived_metrics(name, data, company)
+                if derived:
+                    section_parts.append(derived)
+            elif isinstance(data, dict):
+                dict_lines = []
+                for k, v in data.items():
+                    dict_lines.append(f"- {k}: {v}")
+                section_parts.append("\n".join(dict_lines))
+            elif isinstance(data, list):
+                effective_max = meta.maxRows if meta else 20
+                if compact:
+                    effective_max = min(effective_max, 10)
+                list_lines = []
+                for item in data[:effective_max]:
+                    if hasattr(item, "title") and hasattr(item, "chars"):
+                        list_lines.append(f"- **{item.title}** ({item.chars}자)")
+                    else:
+                        list_lines.append(f"- {item}")
+                if len(data) > effective_max:
+                    list_lines.append(f"(... 상위 {effective_max}건, 전체 {len(data)}건)")
+                section_parts.append("\n".join(list_lines))
+            else:
+                max_text = 1000 if compact else 2000
+                section_parts.append(str(data)[:max_text])
+            if not compact and meta and meta.analysisHints:
+                hints = " | ".join(meta.analysisHints)
+                section_parts.append(f"> 분석 포인트: {hints}")
+            sections.append("\n".join(section_parts))
+            included.append(name)
+        except _CONTEXT_ERRORS:
+            continue
+    from dartlab.ai.conversation.prompts import _classify_question_multi
+    _q_types = _classify_question_multi(question, max_types=2) if question else []
+    report_sections = _build_report_sections(company, q_types=_q_types)
+    for key, section in report_sections.items():
+        sections.append(section)
+        included.append(key)
+    if not compact:
+        available_modules = scan_available_modules(company)
+        available_names = {m["name"] for m in available_modules}
+        not_included = available_names - set(included)
+        if not_included:
+            available_list = []
+            for m in available_modules:
+                if m["name"] in not_included:
+                    info = f"`{m['name']}` ({m['label']}"
+                    if m.get("rows"):
+                        info += f", {m['rows']}행"
+                    info += ")"
+                    available_list.append(info)
+            if available_list:
+                sections.append(
+                    "\n---\n### 추가 조회 가능한 데이터\n"
+                    "아래 데이터는 현재 포함되지 않았지만 `finance(action='data', module=...)` 도구로 조회할 수 있습니다:\n"
+                    + ", ".join(available_list[:15])
+                )
+    # ─��� 정보 배치 최적화: 핵심 수치를 context 끝에 반복 (Lost-in-the-Middle 대응) ──
+    key_facts = _build_key_facts_recap(company, included)
+    if key_facts:
+        sections.append(key_facts)
+    config.verbose = orig_verbose
+    return "\n".join(sections), included
+def _build_key_facts_recap(company: Any, included: list[str]) -> str | None:
+    """context 끝에 핵심 수치를 간결하게 반복 — Lost-in-the-Middle 문제 대응."""
+    lines: list[str] = []
+    ratios = get_headline_ratios(company)
+    if ratios is not None and hasattr(ratios, "roe"):
+        facts = []
+        if ratios.roe is not None:
+            facts.append(f"ROE {ratios.roe:.1f}%")
+        if ratios.operatingMargin is not None:
+            facts.append(f"영업이익률 {ratios.operatingMargin:.1f}%")
+        if ratios.debtRatio is not None:
+            facts.append(f"부채비율 {ratios.debtRatio:.1f}%")
+        if ratios.currentRatio is not None:
+            facts.append(f"유동비율 {ratios.currentRatio:.1f}%")
+        if ratios.fcf is not None:
+            facts.append(f"FCF {_format_won(ratios.fcf)}")
+        if facts:
+            lines.append("---")
+            lines.append(f"**[핵심 지표 요약] {' | '.join(facts)}**")
+    # insight 등급 요약 (있으면)
+    try:
+        from dartlab.analysis.financial.insight import analyze
+        stockCode = getattr(company, "stockCode", None)
+        if stockCode:
+            result = analyze(stockCode, company=company)
+            if result is not None:
+                grades = result.grades()
+                grade_parts = [f"{k}={v}" for k, v in grades.items() if v != "N"]
+                if grade_parts:
+                    lines.append(f"**[인사이트 등급] {result.profile} — {', '.join(grade_parts[:5])}**")
+    except (ImportError, AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError):
+        pass
+    if not lines:
+        return None
+    return "\n".join(lines)
+def _build_change_summary(company: Any, max_topics: int = 5) -> str | None:
+    """기간간 변화가 큰 topic top-N을 자동 요약하여 AI 컨텍스트에 제공."""
+    try:
+        diff_df = company.diff()
+    except _CONTEXT_ERRORS:
+        return None
+    if diff_df is None or (isinstance(diff_df, pl.DataFrame) and diff_df.is_empty()):
+        return None
+    if not isinstance(diff_df, pl.DataFrame):
+        return None
+    # changeRate > 0 인 topic만 필터, 상위 N개
+    if "changeRate" not in diff_df.columns or "topic" not in diff_df.columns:
+        return None
+    changed = diff_df.filter(pl.col("changeRate") > 0).sort("changeRate", descending=True)
+    if changed.is_empty():
+        return None
+    top = changed.head(max_topics)
+    lines = [
+        "\n## 주요 변화 (최근 공시 vs 직전)",
+        "| topic | 변화율 | 기간수 |",
+        "| --- | --- | --- |",
+    ]
+    for row in top.iter_rows(named=True):
+        rate_pct = round(row["changeRate"] * 100, 1)
+        periods = row.get("periods", "")
+        lines.append(f"| `{row['topic']}` | {rate_pct}% | {periods} |")
+    lines.append("")
+    lines.append(
+        "깊이 분석이 필요하면 `explore(action='show', topic=topic)`으로 원문을, `explore(action='diff', topic=topic)`으로 상세 변화를 확인하세요."
+    )
+    return "\n".join(lines)
+def _build_topics_section(company: Any, compact: bool = False) -> str | None:
+    """Company의 topics 목록을 LLM이 사용할 수 있는 마크다운으로 변환.
+    dartlab에 topic이 추가되면 자동으로 LLM 컨텍스트에 포함된다.
+    Args:
+            compact: True면 상위 10개 + 총 개수 요약 (93% 토큰 절감)
+    """
+    topics = getattr(company, "topics", None)
+    if topics is None:
+        return None
+    if isinstance(topics, pl.DataFrame):
+        if "topic" not in topics.columns:
+            return None
+        topic_list = [topic for topic in topics["topic"].drop_nulls().to_list() if isinstance(topic, str) and topic]
+    elif isinstance(topics, pl.Series):
+        topic_list = [topic for topic in topics.drop_nulls().to_list() if isinstance(topic, str) and topic]
+    elif isinstance(topics, list):
+        topic_list = [topic for topic in topics if isinstance(topic, str) and topic]
+    else:
+        try:
+            topic_list = [topic for topic in list(topics) if isinstance(topic, str) and topic]
+        except TypeError:
+            return None
+    if not topic_list:
+        return None
+    if compact:
+        top10 = topic_list[:10]
+        return (
+            f"\n## 공시 topic ({len(topic_list)}개)\n"
+            f"주요: {', '.join(top10)}\n"
+            f"전체 목록은 `explore(action='topics')` 도구로 조회하세요."
+        )
+    lines = [
+        "\n## 조회 가능한 공시 topic 목록",
+        "`explore(action='show', topic=...)` 도구에 아래 topic을 넣으면 상세 데이터를 조회할 수 있습니다.",
+        "",
+    ]
+    # index가 있으면 label 정보 포함
+    index_df = getattr(company, "index", None)
+    if isinstance(index_df, pl.DataFrame) and index_df.height > 0:
+        label_col = "label" if "label" in index_df.columns else None
+        source_col = "source" if "source" in index_df.columns else None
+        for row in index_df.head(60).iter_rows(named=True):
+            topic = row.get("topic", "")
+            label = row.get(label_col, topic) if label_col else topic
+            source = row.get(source_col, "") if source_col else ""
+            lines.append(f"- `{topic}` ({label}) [{source}]")
+    else:
+        for t in topic_list[:60]:
+            lines.append(f"- `{t}`")
+    return "\n".join(lines)
+def _build_insights_section(company: Any) -> str | None:
+    """Company의 7영역 인사이트 등급을 컨텍스트에 자동 포함."""
+    stockCode = getattr(company, "stockCode", None)
+    if not stockCode:
+        return None
+    try:
+        from dartlab.analysis.financial.insight.pipeline import analyze
+        result = analyze(stockCode, company=company)
+    except (ImportError, AttributeError, FileNotFoundError, OSError, RuntimeError, TypeError, ValueError):
+        return None
+    if result is None:
+        return None
+    area_labels = {
+        "performance": "실적",
+        "profitability": "수익성",
+        "health": "건전성",
+        "cashflow": "현금흐름",
+        "governance": "지배구조",
+        "risk": "리스크",
+        "opportunity": "기회",
+    }
+    lines = [
+        "\n## 인사이트 등급 (자동 분석)",
+        f"프로파일: **{result.profile}**",
+        "",
+        "| 영역 | 등급 | 요약 |",
+        "| --- | --- | --- |",
+    ]
+    for key, label in area_labels.items():
+        ir = getattr(result, key, None)
+        grade = result.grades().get(key, "N")
+        summary = ir.summary if ir else "-"
+        lines.append(f"| {label} | {grade} | {summary} |")
+    if result.anomalies:
+        lines.append("")
+        lines.append("### 이상치 경고")
+        for a in result.anomalies[:5]:
+            lines.append(f"- [{a.severity}] {a.text}")
+    if result.summary:
+        lines.append(f"\n{result.summary}")
+    return "\n".join(lines)
+# ══════════════════════════════════════
+# Tiered Context Pipeline
+# ══════════════════════════════════════
+# skeleton tier에서 사용할 핵심 ratios 키
+_SKELETON_RATIO_KEYS = ("roe", "debtRatio", "currentRatio", "operatingMargin", "fcf", "revenueGrowth3Y")
+# skeleton tier에서 사용할 핵심 계정 (매출/영업이익/총자산)
+_SKELETON_ACCOUNTS_KR: dict[str, list[tuple[str, str]]] = {
+    "IS": [("sales", "매출액"), ("operating_profit", "영업이익")],
+    "BS": [("total_assets", "자산총계")],
+}
+_SKELETON_ACCOUNTS_EN: dict[str, list[tuple[str, str]]] = {
+    "IS": [("sales", "Revenue"), ("operating_profit", "Operating Income")],
+    "BS": [("total_assets", "Total Assets")],
+}
+def build_context_skeleton(company: Any) -> tuple[str, list[str]]:
+    """skeleton tier: ~500 토큰. tool calling provider용 최소 컨텍스트.
+    핵심 비율 6개 + 매출/영업이익/총자산 3계정 + insight 등급 1줄.
+    상세 데이터는 도구로 조회하도록 안내.
+    EDGAR(US) / DART(KR) 자동 감지.
+    """
+    market = getattr(company, "market", "KR")
+    is_us = market == "US"
+    fmt_val = _format_usd if is_us else _format_won
+    skel_accounts = _SKELETON_ACCOUNTS_EN if is_us else _SKELETON_ACCOUNTS_KR
+    unit_label = "USD" if is_us else "억/조원"
+    parts = [f"# {company.corpName} ({company.stockCode})"]
+    if is_us:
+        parts[0] += " | Market: US (SEC EDGAR) | Currency: USD"
+    parts.append("⚠️ 아래는 참고용 요약입니다. 질문에 답하려면 반드시 도구(explore/finance)로 상세 데이터를 조회하세요.")
+    included = []
+    # 핵심 계정 3개 (최근 3년)
+    annual = getattr(company, "annual", None)
+    if annual is not None:
+        series, years = annual
+        quarter_counts = _get_quarter_counts(company)
+        if years:
+            display_years = years[-3:]
+            display_labeled = []
+            for y in display_years:
+                qc = quarter_counts.get(y, 4)
+                if qc < 4:
+                    display_labeled.append(f"{y}(~Q{qc})")
+                else:
+                    display_labeled.append(y)
+            display_reversed = list(reversed(display_labeled))
+            year_offset = len(years) - 3
+            col_header = "Account" if is_us else "계정"
+            header = f"| {col_header} | " + " | ".join(display_reversed) + " |"
+            sep = "| --- | " + " | ".join(["---"] * len(display_reversed)) + " |"
+            rows = []
+            for sj, accts in skel_accounts.items():
+                sj_data = series.get(sj, {})
+                for snake_id, label in accts:
+                    vals = sj_data.get(snake_id)
+                    if not vals:
+                        continue
+                    sliced = vals[max(0, year_offset) :]
+                    cells = [fmt_val(v) if v is not None else "-" for v in reversed(sliced)]
+                    rows.append(f"| {label} | " + " | ".join(cells) + " |")
+            if rows:
+                partial = [y for y in display_years if quarter_counts.get(y, 4) < 4]
+                partial_note = ""
+                if partial:
+                    notes = ", ".join(f"{y}=Q1~Q{quarter_counts[y]}" for y in partial)
+                    partial_note = f"\n⚠️ {'Partial year' if is_us else '부분 연도'}: {notes}"
+                section_title = f"Key Financials ({unit_label})" if is_us else f"핵심 수치 ({unit_label})"
+                parts.extend(["", f"## {section_title}{partial_note}", header, sep, *rows])
+                included.extend(["IS", "BS"])
+    # 핵심 비율 6개
+    ratios = get_headline_ratios(company)
+    if ratios is not None and hasattr(ratios, "roe"):
+        ratio_lines = []
+        for key in _SKELETON_RATIO_KEYS:
+            val = getattr(ratios, key, None)
+            if val is None:
+                continue
+            label_map_kr = {
+                "roe": "ROE",
+                "debtRatio": "부채비율",
+                "currentRatio": "유동비율",
+                "operatingMargin": "영업이익률",
+                "fcf": "FCF",
+                "revenueGrowth3Y": "매출3Y CAGR",
+            }
+            label_map_en = {
+                "roe": "ROE",
+                "debtRatio": "Debt Ratio",
+                "currentRatio": "Current Ratio",
+                "operatingMargin": "Op. Margin",
+                "fcf": "FCF",
+                "revenueGrowth3Y": "Rev. 3Y CAGR",
+            }
+            label = (label_map_en if is_us else label_map_kr).get(key, key)
+            if key == "fcf":
+                ratio_lines.append(f"- {label}: {fmt_val(val)}")
+            else:
+                ratio_lines.append(f"- {label}: {val:.1f}%")
+        if ratio_lines:
+            section_title = "Key Ratios" if is_us else "핵심 비율"
+            parts.extend(["", f"## {section_title}", *ratio_lines])
+            included.append("ratios")
+    # 분석 가이드
+    if is_us:
+        parts.extend(
+            [
+                "",
+                "## DartLab Analysis Guide",
+                "All filing data is structured as **sections** (topic × period horizontalization).",
+                "- `explore(action='topics')` → full topic list | `explore(action='show', topic=...)` → block index → data",
+                "- `explore(action='search', keyword=...)` → original filing text for citations",
+                "- `explore(action='diff', topic=...)` → period-over-period changes | `explore(action='trace', topic=...)` → source provenance",
+                "- `finance(action='data', module='BS/IS/CF')` → financials | `finance(action='ratios')` → ratios",
+                "- `analyze(action='insight')` → 7-area grades | `explore(action='coverage')` → data availability",
+                "",
+                "**Note**: This is a US company (SEC EDGAR). No `report` namespace — all narrative data via sections.",
+                "**Procedure**: Understand question → explore topics → retrieve data → cross-verify → synthesize answer",
+            ]
+        )
+    else:
+        parts.extend(
+            [
+                "",
+                "## DartLab 분석 가이드",
+                "이 기업의 모든 공시 데이터는 **sections** (topic × 기간 수평화)으로 구조화되어 있습니다.",
+                "- `explore(action='topics')` → 전체 topic 목록 (평균 120+개)",
+                "- `explore(action='show', topic=...)` → 블록 목차 → 실제 데이터",
+                "- `explore(action='search', keyword=...)` → 원문 증거 검색 (인용용)",
+                "- `explore(action='diff', topic=...)` → 기간간 변화 | `explore(action='trace', topic=...)` → 출처 추적",
+                "- `finance(action='data', module='BS/IS/CF')` → 재무제표 | `finance(action='ratios')` → 재무비율",
+                "- `analyze(action='insight')` → 7영역 종합 등급 | `explore(action='report', apiType=...)` → 정기보고서",
+                "",
+                "**분석 절차**: 질문 이해 → 관련 topic 탐색 → 원문 데이터 조회 → 교차 검증 → 종합 답변",
+                "**핵심**: '데이터 없음'으로 답하기 전에 반드시 도구로 확인. sections에 거의 모든 공시 데이터가 있습니다.",
+            ]
+        )
+    return "\n".join(parts), included
+def build_context_focused(
+    company: Any,
+    question: str,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> tuple[dict[str, str], list[str], str]:
+    """focused tier: ~2,000 토큰. tool calling 미지원 provider용.
+    skeleton + 질문 유형별 관련 모듈만 포함 (compact 형식).
+    """
+    return build_context_by_module(company, question, include, exclude, compact=True)
+ContextTier = str  # "skeleton" | "focused" | "full"
+def build_context_tiered(
+    company: Any,
+    question: str,
+    tier: ContextTier,
+    include: list[str] | None = None,
+    exclude: list[str] | None = None,
+) -> tuple[dict[str, str], list[str], str]:
+    """tier별 context 빌더. streaming.py에서 호출.
+    Args:
+        tier: "skeleton" | "focused" | "full"
+    Returns:
+        (modules_dict, included_list, header_text)
+    """
+    if tier == "skeleton":
+        text, included = build_context_skeleton(company)
+        return {"_skeleton": text}, included, ""
+    elif tier == "focused":
+        return build_context_focused(company, question, include, exclude)
+    else:
+        return build_context_by_module(company, question, include, exclude, compact=False)

src/dartlab/ai/context/company_adapter.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Facade adapter helpers for AI runtime.
+AI layer는 `dartlab.Company` facade와 엔진 내부 구현 차이를 직접 알지 않는다.
+이 모듈에서 headline ratios / ratio series 같은 surface 차이를 흡수한다.
+"""
+from __future__ import annotations
+from types import SimpleNamespace
+from typing import Any
+_ADAPTER_ERRORS = (
+    AttributeError,
+    KeyError,
+    OSError,
+    RuntimeError,
+    TypeError,
+    ValueError,
+)
+class _RatioProxy:
+    """누락 속성은 None으로 흡수하는 lightweight ratio adapter."""
+    def __init__(self, inner: Any):
+        self._inner = inner
+    def __getattr__(self, name: str) -> Any:
+        return getattr(self._inner, name, None)
+def get_headline_ratios(company: Any) -> Any | None:
+    """Return RatioResult-like object regardless of facade surface."""
+    # 내부용 _getRatiosInternal 우선 (deprecation warning 없음)
+    internal = getattr(company, "_getRatiosInternal", None)
+    getter = internal if callable(internal) else getattr(company, "getRatios", None)
+    if callable(getter):
+        try:
+            result = getter()
+            if result is not None and hasattr(result, "roe"):
+                return _RatioProxy(result)
+        except _ADAPTER_ERRORS:
+            pass
+    finance = getattr(company, "finance", None)
+    finance_getter = getattr(finance, "getRatios", None)
+    if callable(finance_getter):
+        try:
+            result = finance_getter()
+            if result is not None and hasattr(result, "roe"):
+                return _RatioProxy(result)
+        except _ADAPTER_ERRORS:
+            pass
+    for candidate in (
+        getattr(company, "ratios", None),
+        getattr(finance, "ratios", None),
+    ):
+        if candidate is not None and hasattr(candidate, "roe"):
+            return _RatioProxy(candidate)
+    return None
+def get_ratio_series(company: Any) -> Any | None:
+    """Return attribute-style ratio series regardless of tuple/object surface."""
+    for candidate in (
+        getattr(company, "ratioSeries", None),
+        getattr(getattr(company, "finance", None), "ratioSeries", None),
+    ):
+        if candidate is None:
+            continue
+        if hasattr(candidate, "roe"):
+            return candidate
+        if isinstance(candidate, tuple) and len(candidate) == 2:
+            series, periods = candidate
+            if not isinstance(series, dict):
+                continue
+            ratio_series = series.get("RATIO", {})
+            if not isinstance(ratio_series, dict) or not ratio_series:
+                continue
+            adapted = SimpleNamespace(periods=periods)
+            for key, values in ratio_series.items():
+                setattr(adapted, key, values)
+            return adapted
+    return None

src/dartlab/ai/context/dartOpenapi.py ADDED Viewed

	@@ -0,0 +1,485 @@

+"""OpenDART 공시목록 retrieval helper.
+회사 미선택 질문에서도 최근 공시목록/수주공시/계약공시를
+deterministic prefetch로 회수해 AI 컨텍스트로 주입한다.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from datetime import date, timedelta
+from html import unescape
+from typing import Any
+import polars as pl
+from dartlab.ai.context.formatting import df_to_markdown
+from dartlab.core.capabilities import UiAction
+from dartlab.providers.dart.openapi.dartKey import hasDartApiKey
+_FILING_TERMS = (
+    "공시",
+    "전자공시",
+    "공시목록",
+    "공시 리스트",
+    "수주공시",
+    "계약공시",
+    "단일판매공급계약",
+    "공급계약",
+    "판매공급계약",
+    "수주",
+)
+_REQUEST_TERMS = (
+    "알려",
+    "보여",
+    "찾아",
+    "정리",
+    "요약",
+    "분석",
+    "골라",
+    "추천",
+    "무슨",
+    "뭐 있었",
+    "리스트",
+    "목록",
+)
+_DETAIL_TERMS = (
+    "요약",
+    "분석",
+    "핵심",
+    "중요",
+    "읽을",
+    "리스크",
+    "내용",
+    "무슨 내용",
+    "꼭",
+)
+_READ_TERMS = (
+    "읽어",
+    "본문",
+    "원문",
+    "전문",
+    "자세히 보여",
+    "내용 보여",
+)
+_ANALYSIS_ONLY_TERMS = (
+    "근거",
+    "왜",
+    "지속 가능",
+    "지속가능",
+    "판단",
+    "평가",
+    "해석",
+    "사업구조",
+    "구조",
+    "영향",
+    "변화",
+)
+_ORDER_KEYWORDS = (
+    "단일판매공급계약",
+    "판매공급계약",
+    "공급계약",
+    "수주",
+)
+_DISCLOSURE_TYPE_HINTS = {
+    "정기공시": "A",
+    "주요사항": "B",
+    "주요사항보고": "B",
+    "발행공시": "C",
+    "지분공시": "D",
+    "기타공시": "E",
+    "외부감사": "F",
+    "펀드공시": "G",
+    "자산유동화": "H",
+    "거래소공시": "I",
+    "공정위공시": "J",
+}
+_MARKET_HINTS = {
+    "코스피": "Y",
+    "유가증권": "Y",
+    "코스닥": "K",
+    "코넥스": "N",
+}
+_DEFAULT_LIMIT = 20
+_DEFAULT_DAYS = 7
+@dataclass(frozen=True)
+class DartFilingIntent:
+    matched: bool = False
+    corp: str | None = None
+    start: str = ""
+    end: str = ""
+    disclosureType: str | None = None
+    market: str | None = None
+    finalOnly: bool = False
+    limit: int = _DEFAULT_LIMIT
+    titleKeywords: tuple[str, ...] = ()
+    includeText: bool = False
+    textLimit: int = 0
+@dataclass(frozen=True)
+class DartFilingPrefetch:
+    matched: bool
+    needsKey: bool = False
+    message: str = ""
+    contextText: str = ""
+    uiAction: dict[str, Any] | None = None
+    filings: pl.DataFrame | None = None
+    intent: DartFilingIntent | None = None
+def buildMissingDartKeyMessage() -> str:
+    return (
+        "OpenDART API 키가 필요합니다.\n"
+        "- 이 질문은 실시간 공시목록 조회가 필요합니다.\n"
+        "- 설정에서 `OpenDART API 키`를 저장하면 최근 공시, 수주공시, 계약공시를 바로 검색할 수 있습니다.\n"
+        "- 키는 프로젝트 루트 `.env`의 `DART_API_KEY`로 저장됩니다."
+    )
+def buildMissingDartKeyUiAction() -> dict[str, Any]:
+    return UiAction.update(
+        "settings",
+        {
+            "open": True,
+            "section": "openDart",
+            "message": "OpenDART API 키를 설정하면 최근 공시목록을 바로 검색할 수 있습니다.",
+        },
+    ).to_payload()
+def isDartFilingQuestion(question: str) -> bool:
+    q = (question or "").strip()
+    if not q:
+        return False
+    normalized = q.replace(" ", "")
+    if any(term in normalized for term in ("openapi", "opendart", "dartapi")) and not any(
+        term in q for term in _FILING_TERMS
+    ):
+        return False
+    has_filing_term = any(term in q for term in _FILING_TERMS)
+    has_request_term = any(term in q for term in _REQUEST_TERMS)
+    has_time_term = any(term in q for term in ("최근", "오늘", "어제", "이번 주", "지난 주", "이번 달", "며칠", "몇일"))
+    has_read_term = any(term in q for term in _READ_TERMS)
+    has_analysis_only_term = any(term in q for term in _ANALYSIS_ONLY_TERMS)
+    if (
+        has_analysis_only_term
+        and not has_read_term
+        and not any(term in q for term in ("목록", "리스트", "뭐 있었", "무슨 공시"))
+    ):
+        return False
+    return has_filing_term and (has_request_term or has_time_term or has_read_term or "?" not in q)
+def detectDartFilingIntent(question: str, company: Any | None = None) -> DartFilingIntent:
+    if not isDartFilingQuestion(question):
+        return DartFilingIntent()
+    today = date.today()
+    start_date, end_date = _resolve_date_window(question, today)
+    title_keywords = _resolve_title_keywords(question)
+    include_text = any(term in question for term in _DETAIL_TERMS) or any(term in question for term in _READ_TERMS)
+    limit = _resolve_limit(question)
+    corp = None
+    if company is not None:
+        corp = getattr(company, "stockCode", None) or getattr(company, "corpName", None)
+    disclosure_type = None
+    for hint, code in _DISCLOSURE_TYPE_HINTS.items():
+        if hint in question:
+            disclosure_type = code
+            break
+    market = None
+    for hint, code in _MARKET_HINTS.items():
+        if hint in question:
+            market = code
+            break
+    final_only = any(term in question for term in ("최종", "정정 제외", "정정없는", "정정 없는"))
+    text_limit = 3 if include_text and limit <= 5 else (2 if include_text else 0)
+    return DartFilingIntent(
+        matched=True,
+        corp=corp,
+        start=start_date.strftime("%Y%m%d"),
+        end=end_date.strftime("%Y%m%d"),
+        disclosureType=disclosure_type,
+        market=market,
+        finalOnly=final_only,
+        limit=limit,
+        titleKeywords=title_keywords,
+        includeText=include_text,
+        textLimit=text_limit,
+    )
+def searchDartFilings(
+    *,
+    corp: str | None = None,
+    start: str | None = None,
+    end: str | None = None,
+    days: int | None = None,
+    weeks: int | None = None,
+    disclosureType: str | None = None,
+    market: str | None = None,
+    finalOnly: bool = False,
+    titleKeywords: list[str] | tuple[str, ...] | None = None,
+    limit: int = _DEFAULT_LIMIT,
+) -> pl.DataFrame:
+    from dartlab import OpenDart
+    if not hasDartApiKey():
+        raise ValueError(buildMissingDartKeyMessage())
+    resolved_start, resolved_end = _coerce_search_window(start, end, days=days, weeks=weeks)
+    dart = OpenDart()
+    filings = dart.filings(
+        corp=corp,
+        start=resolved_start,
+        end=resolved_end,
+        type=disclosureType,
+        final=finalOnly,
+        market=market,
+    )
+    if filings is None or filings.height == 0:
+        return pl.DataFrame()
+    df = filings
+    if titleKeywords and "report_nm" in df.columns:
+        mask = pl.lit(False)
+        for keyword in titleKeywords:
+            mask = mask | pl.col("report_nm").str.contains(keyword, literal=True)
+        df = df.filter(mask)
+    if df.height == 0:
+        return pl.DataFrame()
+    sort_cols = [col for col in ("rcept_dt", "rcept_no") if col in df.columns]
+    if sort_cols:
+        descending = [True] * len(sort_cols)
+        df = df.sort(sort_cols, descending=descending)
+    return df.head(max(1, min(limit, 100)))
+def getDartFilingText(rceptNo: str, maxChars: int = 4000) -> str:
+    from dartlab import OpenDart
+    if not rceptNo:
+        raise ValueError("rcept_no가 필요합니다.")
+    if not hasDartApiKey():
+        raise ValueError(buildMissingDartKeyMessage())
+    raw_text = OpenDart().documentText(rceptNo)
+    return cleanDartFilingText(raw_text, maxChars=maxChars)
+def buildDartFilingPrefetch(question: str, company: Any | None = None) -> DartFilingPrefetch:
+    intent = detectDartFilingIntent(question, company=company)
+    if not intent.matched:
+        return DartFilingPrefetch(matched=False)
+    if not hasDartApiKey():
+        return DartFilingPrefetch(
+            matched=True,
+            needsKey=True,
+            message=buildMissingDartKeyMessage(),
+            uiAction=buildMissingDartKeyUiAction(),
+            intent=intent,
+        )
+    filings = searchDartFilings(
+        corp=intent.corp,
+        start=intent.start,
+        end=intent.end,
+        disclosureType=intent.disclosureType,
+        market=intent.market,
+        finalOnly=intent.finalOnly,
+        titleKeywords=intent.titleKeywords,
+        limit=intent.limit,
+    )
+    context_text = formatDartFilingContext(filings, intent, question=question)
+    if intent.includeText and filings.height > 0 and "rcept_no" in filings.columns:
+        detail_blocks = []
+        for rcept_no in filings["rcept_no"].head(intent.textLimit).to_list():
+            try:
+                excerpt = getDartFilingText(str(rcept_no), maxChars=1800)
+            except (OSError, RuntimeError, ValueError):
+                continue
+            detail_blocks.append(f"### 접수번호 {rcept_no} 원문 발췌\n{excerpt}")
+        if detail_blocks:
+            context_text = "\n\n".join([context_text, *detail_blocks]) if context_text else "\n\n".join(detail_blocks)
+    return DartFilingPrefetch(
+        matched=True,
+        needsKey=False,
+        contextText=context_text,
+        filings=filings,
+        intent=intent,
+    )
+def formatDartFilingContext(
+    filings: pl.DataFrame,
+    intent: DartFilingIntent,
+    *,
+    question: str = "",
+) -> str:
+    if intent.start or intent.end:
+        window_label = f"{_format_date(intent.start or intent.end)} ~ {_format_date(intent.end or intent.start)}"
+    else:
+        window_label = "자동 기본 범위"
+    lines = ["## OpenDART 공시목록 검색 결과", f"- 기간: {window_label}"]
+    if intent.corp:
+        lines.append(f"- 회사 필터: {intent.corp}")
+    else:
+        lines.append("- 회사 필터: 전체 시장")
+    if intent.market:
+        lines.append(f"- 시장 필터: {intent.market}")
+    if intent.disclosureType:
+        lines.append(f"- 공시유형: {intent.disclosureType}")
+    if intent.finalOnly:
+        lines.append("- 최종보고서만 포함")
+    if intent.titleKeywords:
+        lines.append(f"- 제목 키워드: {', '.join(intent.titleKeywords)}")
+    if question:
+        lines.append(f"- 사용자 질문: {question}")
+    if filings is None or filings.height == 0:
+        lines.append("")
+        lines.append("해당 조건에 맞는 공시가 없습니다.")
+        return "\n".join(lines)
+    display_df = _build_display_df(filings)
+    lines.extend(["", df_to_markdown(display_df, max_rows=min(intent.limit, 20), compact=False)])
+    return "\n".join(lines)
+def cleanDartFilingText(text: str, *, maxChars: int = 4000) -> str:
+    normalized = unescape(text or "")
+    normalized = re.sub(r"<[^>]+>", " ", normalized)
+    normalized = re.sub(r"\s+", " ", normalized).strip()
+    if len(normalized) <= maxChars:
+        return normalized
+    return normalized[:maxChars].rstrip() + " ... (truncated)"
+def _build_display_df(df: pl.DataFrame) -> pl.DataFrame:
+    display = df
+    if "rcept_dt" in display.columns:
+        display = display.with_columns(
+            pl.col("rcept_dt").cast(pl.Utf8).map_elements(_format_date, return_dtype=pl.Utf8).alias("rcept_dt")
+        )
+    preferred_cols = [
+        col
+        for col in ("rcept_dt", "corp_name", "stock_code", "corp_cls", "report_nm", "rcept_no")
+        if col in display.columns
+    ]
+    if preferred_cols:
+        display = display.select(preferred_cols)
+    rename_map = {
+        "rcept_dt": "접수일",
+        "corp_name": "회사",
+        "stock_code": "종목코드",
+        "corp_cls": "시장",
+        "report_nm": "공시명",
+        "rcept_no": "접수번호",
+    }
+    actual_map = {src: dst for src, dst in rename_map.items() if src in display.columns}
+    return display.rename(actual_map)
+def _resolve_title_keywords(question: str) -> tuple[str, ...]:
+    if any(term in question for term in _ORDER_KEYWORDS) or "계약공시" in question:
+        return _ORDER_KEYWORDS
+    explicit = []
+    for phrase in ("감사보고서", "합병", "유상증자", "무상증자", "배당", "자기주식", "최대주주"):
+        if phrase in question:
+            explicit.append(phrase)
+    return tuple(explicit)
+def _resolve_limit(question: str) -> int:
+    match = re.search(r"(\d+)\s*건", question)
+    if match:
+        return max(1, min(int(match.group(1)), 50))
+    if "쫙" in question or "전부" in question or "전체" in question:
+        return 30
+    return _DEFAULT_LIMIT
+def _resolve_date_window(question: str, today: date) -> tuple[date, date]:
+    q = question.replace(" ", "")
+    if "오늘" in question:
+        return today, today
+    if "어제" in question:
+        target = today - timedelta(days=1)
+        return target, target
+    if "이번주" in q:
+        start = today - timedelta(days=today.weekday())
+        return start, today
+    if "지난주" in q:
+        end = today - timedelta(days=today.weekday() + 1)
+        start = end - timedelta(days=6)
+        return start, end
+    if "이번달" in q:
+        start = today.replace(day=1)
+        return start, today
+    recent_match = re.search(r"최근\s*(\d+)\s*(일|주|개월|달)", question)
+    if recent_match:
+        amount = int(recent_match.group(1))
+        unit = recent_match.group(2)
+        if unit == "일":
+            return today - timedelta(days=max(amount - 1, 0)), today
+        if unit == "주":
+            return today - timedelta(days=max(amount * 7 - 1, 0)), today
+        if unit in {"개월", "달"}:
+            return today - timedelta(days=max(amount * 30 - 1, 0)), today
+    if "최근 몇일" in q or "최근몇일" in q or "최근 며칠" in question or "최근며칠" in q:
+        return today - timedelta(days=_DEFAULT_DAYS - 1), today
+    if "최근 몇주" in q or "최근몇주" in q:
+        return today - timedelta(days=13), today
+    return today - timedelta(days=_DEFAULT_DAYS - 1), today
+def _coerce_search_window(
+    start: str | None,
+    end: str | None,
+    *,
+    days: int | None,
+    weeks: int | None,
+) -> tuple[str, str]:
+    today = date.today()
+    if start or end:
+        resolved_start = _strip_date_sep(start or (end or today.strftime("%Y%m%d")))
+        resolved_end = _strip_date_sep(end or today.strftime("%Y%m%d"))
+        return resolved_start, resolved_end
+    if days:
+        begin = today - timedelta(days=max(days - 1, 0))
+        return begin.strftime("%Y%m%d"), today.strftime("%Y%m%d")
+    if weeks:
+        begin = today - timedelta(days=max(weeks * 7 - 1, 0))
+        return begin.strftime("%Y%m%d"), today.strftime("%Y%m%d")
+    begin = today - timedelta(days=_DEFAULT_DAYS - 1)
+    return begin.strftime("%Y%m%d"), today.strftime("%Y%m%d")
+def _strip_date_sep(value: str) -> str:
+    return (value or "").replace("-", "").replace(".", "").replace("/", "")
+def _format_date(value: str) -> str:
+    digits = _strip_date_sep(str(value))
+    if len(digits) == 8 and digits.isdigit():
+        return f"{digits[:4]}-{digits[4:6]}-{digits[6:]}"
+    return str(value)

src/dartlab/ai/context/finance_context.py ADDED Viewed

	@@ -0,0 +1,945 @@

+"""Finance/report 데이터를 LLM context 마크다운으로 변환하는 함수들."""
+from __future__ import annotations
+import re
+from typing import Any
+import polars as pl
+from dartlab.ai.context.company_adapter import get_headline_ratios, get_ratio_series
+from dartlab.ai.context.formatting import _format_won, df_to_markdown
+from dartlab.ai.metadata import MODULE_META
+_CONTEXT_ERRORS = (AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError)
+# ══════════════════════════════════════
+# 질문 유형별 모듈 매핑 (registry 자동 생성 + override)
+# ══════════════════════════════════════
+from dartlab.core.registry import buildQuestionModules
+# registry에 없는 모듈(sections topic 전용 등)은 override로 추가
+_QUESTION_MODULES_OVERRIDE: dict[str, list[str]] = {
+    "공시": [],
+    "배당": ["treasuryStock"],
+    "자본": ["treasuryStock"],
+    "사업": ["businessOverview"],
+    "ESG": ["governanceOverview", "boardOfDirectors"],
+    "공급망": ["segments", "rawMaterial"],
+    "변화": ["disclosureChanges", "businessStatus"],
+    "밸류에이션": ["IS", "BS"],
+}
+_QUESTION_MODULES: dict[str, list[str]] = {}
+for _qt, _mods in buildQuestionModules().items():
+    _QUESTION_MODULES[_qt] = list(_mods)
+for _qt, _extra in _QUESTION_MODULES_OVERRIDE.items():
+    _QUESTION_MODULES.setdefault(_qt, []).extend(m for m in _extra if m not in _QUESTION_MODULES.get(_qt, []))
+_ALWAYS_INCLUDE_MODULES = {"employee"}
+_CONTEXT_MODULE_BUDGET = 10000  # 총 모듈 context 글자 수 상한 (focused tier 기본값)
+def _resolve_context_budget(tier: str = "focused") -> int:
+    """컨텍스트 tier별 모듈 예산."""
+    return {
+        "skeleton": 2000,  # tool-capable: 최소 맥락, 도구로 보충
+        "focused": 10000,  # 분기 데이터 수용
+        "full": 16000,  # non-tool 모델: 최대한 포함
+    }.get(tier, 10000)
+def _topic_name_set(company: Any) -> set[str]:
+    """Company.topics에서 실제 topic 이름만 안전하게 추출."""
+    try:
+        topics = getattr(company, "topics", None)
+    except _CONTEXT_ERRORS:
+        return set()
+    if topics is None:
+        return set()
+    if isinstance(topics, pl.DataFrame):
+        if "topic" not in topics.columns:
+            return set()
+        return {t for t in topics["topic"].drop_nulls().to_list() if isinstance(t, str) and t}
+    if isinstance(topics, pl.Series):
+        return {t for t in topics.drop_nulls().to_list() if isinstance(t, str) and t}
+    try:
+        return {str(t) for t in topics if isinstance(t, str) and t}
+    except TypeError:
+        return set()
+def _resolve_module_data(company: Any, module_name: str) -> Any:
+    """AI context용 모듈 해석.
+    1. Company property/direct attr
+    2. registry 기반 lazy parser (_get_primary)
+    3. 실제 존재하는 topic에 한해 show()
+    """
+    data = getattr(company, module_name, None)
+    if data is not None:
+        return data
+    get_primary = getattr(company, "_get_primary", None)
+    if callable(get_primary):
+        try:
+            data = get_primary(module_name)
+        except _CONTEXT_ERRORS:
+            data = None
+        except (FileNotFoundError, ImportError, IndexError):
+            data = None
+        if data is not None:
+            return data
+    if hasattr(company, "show") and module_name in _topic_name_set(company):
+        try:
+            return company.show(module_name)
+        except _CONTEXT_ERRORS:
+            return None
+    return None
+def _extract_module_context(company: Any, module_name: str, max_rows: int = 10) -> str | None:
+    """registry 모듈 → 마크다운 요약. DataFrame/dict/list/text 모두 처리."""
+    try:
+        data = _resolve_module_data(company, module_name)
+        if data is None:
+            return None
+        if callable(data) and not isinstance(data, type):
+            try:
+                data = data()
+            except (AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError):
+                return None
+        meta = MODULE_META.get(module_name)
+        label = meta.label if meta else module_name
+        if isinstance(data, pl.DataFrame):
+            if data.is_empty():
+                return None
+            md = df_to_markdown(data, max_rows=max_rows, meta=meta, compact=True)
+            return f"## {label}\n{md}"
+        if isinstance(data, dict):
+            items = list(data.items())[:max_rows]
+            lines = [f"## {label}"]
+            for k, v in items:
+                lines.append(f"- {k}: {v}")
+            return "\n".join(lines)
+        if isinstance(data, list):
+            if not data:
+                return None
+            lines = [f"## {label}"]
+            for item in data[:max_rows]:
+                if hasattr(item, "title") and hasattr(item, "chars"):
+                    lines.append(f"- **{item.title}** ({item.chars}자)")
+                else:
+                    lines.append(f"- {item}")
+            if len(data) > max_rows:
+                lines.append(f"(... 상위 {max_rows}건, 전체 {len(data)}건)")
+            return "\n".join(lines)
+        text = str(data)
+        if len(text) > 300:
+            text = (
+                text[:300]
+                + f"... (전체 {len(str(data))}자, explore(action='show', topic='{module_name}')으로 전문 확인)"
+            )
+        return f"## {label}\n{text}" if text.strip() else None
+    except (AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError):
+        return None
+def _build_report_sections(
+    company: Any,
+    compact: bool = False,
+    q_types: list[str] | None = None,
+    tier: str = "focused",
+    report_names: list[str] | None = None,
+) -> dict[str, str]:
+    """reportEngine pivot 결과 + 질문 유형별 모듈 자동 주입 → LLM context 섹션 dict."""
+    report = getattr(company, "report", None)
+    sections: dict[str, str] = {}
+    budget = _resolve_context_budget(tier)
+    requested_reports = set(report_names or ["dividend", "employee", "majorHolder", "executive", "audit"])
+    # 질문 유형별 추가 모듈 주입
+    extra_modules: set[str] = set() if report_names is not None else set(_ALWAYS_INCLUDE_MODULES)
+    if q_types and report_names is None:
+        for qt in q_types:
+            for mod in _QUESTION_MODULES.get(qt, []):
+                extra_modules.add(mod)
+    # 하드코딩된 기존 report 모듈들의 이름 (중복 방지용)
+    _HARDCODED_REPORT = {"dividend", "employee", "majorHolder", "executive", "audit"}
+    if report_names:
+        for mod in report_names:
+            if mod not in _HARDCODED_REPORT:
+                extra_modules.add(mod)
+    # 동적 모듈 주입 (하드코딩에 없는 것만)
+    budget_used = 0
+    for mod in sorted(extra_modules - _HARDCODED_REPORT):
+        if budget_used >= budget:
+            break
+        content = _extract_module_context(company, mod, max_rows=8 if compact else 12)
+        if content:
+            budget_used += len(content)
+            sections[f"module_{mod}"] = content
+    if report is None:
+        return sections
+    max_years = 3 if compact else 99
+    div = getattr(report, "dividend", None) if "dividend" in requested_reports else None
+    if div is not None and div.years:
+        display_years = div.years[-max_years:]
+        offset = len(div.years) - len(display_years)
+        lines = ["## 배당 시계열 (정기보고서)"]
+        header = "| 연도 | " + " | ".join(str(y) for y in display_years) + " |"
+        sep = "| --- | " + " | ".join(["---"] * len(display_years)) + " |"
+        lines.append(header)
+        lines.append(sep)
+        def _fmtList(vals):
+            return [str(round(v)) if v is not None else "-" for v in vals]
+        lines.append("| DPS(원) | " + " | ".join(_fmtList(div.dps[offset:])) + " |")
+        lines.append(
+            "| 배당수익률(%) | "
+            + " | ".join([f"{v:.2f}" if v is not None else "-" for v in div.dividendYield[offset:]])
+            + " |"
+        )
+        latest_dps = div.dps[-1] if div.dps else None
+        latest_yield = div.dividendYield[-1] if div.dividendYield else None
+        if latest_dps is not None or latest_yield is not None:
+            lines.append("")
+            lines.append("### 배당 핵심 요약")
+            if latest_dps is not None:
+                lines.append(f"- 최근 연도 DPS: {int(round(latest_dps))}원")
+            if latest_yield is not None:
+                lines.append(f"- 최근 연도 배당수익률: {latest_yield:.2f}%")
+            if len(display_years) >= 3:
+                recent_dps = [
+                    f"{year}:{int(round(value)) if value is not None else '-'}원"
+                    for year, value in zip(display_years[-3:], div.dps[offset:][-3:], strict=False)
+                ]
+                lines.append("- 최근 3개년 DPS 추이: " + " → ".join(recent_dps))
+        sections["report_dividend"] = "\n".join(lines)
+    emp = getattr(report, "employee", None) if "employee" in requested_reports else None
+    if emp is not None and emp.years:
+        display_years = emp.years[-max_years:]
+        offset = len(emp.years) - len(display_years)
+        lines = ["## 직원현황 (정기보고서)"]
+        header = "| 연도 | " + " | ".join(str(y) for y in display_years) + " |"
+        sep = "| --- | " + " | ".join(["---"] * len(display_years)) + " |"
+        lines.append(header)
+        lines.append(sep)
+        def _fmtEmp(vals):
+            return [f"{int(v):,}" if v is not None else "-" for v in vals]
+        def _fmtSalary(vals):
+            return [f"{int(v):,}" if v is not None else "-" for v in vals]
+        lines.append("| 총 직원수(명) | " + " | ".join(_fmtEmp(emp.totalEmployee[offset:])) + " |")
+        lines.append("| 평균월급(천원) | " + " | ".join(_fmtSalary(emp.avgMonthlySalary[offset:])) + " |")
+        sections["report_employee"] = "\n".join(lines)
+    mh = getattr(report, "majorHolder", None) if "majorHolder" in requested_reports else None
+    if mh is not None and mh.years:
+        lines = ["## 최대주주 (정기보고서)"]
+        if compact:
+            latest_ratio = mh.totalShareRatio[-1] if mh.totalShareRatio else None
+            ratio_str = f"{latest_ratio:.2f}%" if latest_ratio is not None else "-"
+            lines.append(f"- {mh.years[-1]}년 합산 지분율: {ratio_str}")
+        else:
+            header = "| 연도 | " + " | ".join(str(y) for y in mh.years) + " |"
+            sep = "| --- | " + " | ".join(["---"] * len(mh.years)) + " |"
+            lines.append(header)
+            lines.append(sep)
+            lines.append(
+                "| 합산 지분율(%) | "
+                + " | ".join([f"{v:.2f}" if v is not None else "-" for v in mh.totalShareRatio])
+                + " |"
+            )
+        if mh.latestHolders:
+            holder_limit = 3 if compact else 5
+            if not compact:
+                lines.append("")
+                lines.append(f"### 최근 주요주주 ({mh.years[-1]}년)")
+            for h in mh.latestHolders[:holder_limit]:
+                ratio = f"{h['ratio']:.2f}%" if h.get("ratio") is not None else "-"
+                relate = f" ({h['relate']})" if h.get("relate") else ""
+                lines.append(f"- {h['name']}{relate}: {ratio}")
+        sections["report_majorHolder"] = "\n".join(lines)
+    exe = getattr(report, "executive", None) if "executive" in requested_reports else None
+    if exe is not None and exe.totalCount > 0:
+        lines = [
+            "## 임원현황 (정기보고서)",
+            f"- 총 임원수: {exe.totalCount}명",
+            f"- 사내이사: {exe.registeredCount}명",
+            f"- 사외이사: {exe.outsideCount}명",
+        ]
+        sections["report_executive"] = "\n".join(lines)
+    aud = getattr(report, "audit", None) if "audit" in requested_reports else None
+    if aud is not None and aud.years:
+        lines = ["## 감사의견 (정기보고서)"]
+        display_aud = list(zip(aud.years, aud.opinions, aud.auditors))
+        if compact:
+            display_aud = display_aud[-2:]
+        for y, opinion, auditor in display_aud:
+            opinion = opinion or "-"
+            auditor = auditor or "-"
+            lines.append(f"- {y}년: {opinion} ({auditor})")
+        sections["report_audit"] = "\n".join(lines)
+    return sections
+# ══════════════════════════════════════
+# financeEngine 기반 컨텍스트 (1차 데이터 소스)
+# ══════════════════════════════════════
+_YEAR_HINT_KEYWORDS: dict[str, int] = {
+    "최근": 3,
+    "올해": 3,
+    "작년": 3,
+    "전년": 3,
+    "추이": 5,
+    "트렌드": 5,
+    "추세": 5,
+    "변화": 5,
+    "성장": 5,
+    "흐름": 5,
+    "전체": 15,
+    "역사": 15,
+    "장기": 10,
+}
+def _detect_year_hint(question: str) -> int:
+    """질문에서 필요한 연도 범위 추출."""
+    range_match = re.search(r"(\d+)\s*(?:개년|년)", question)
+    if range_match:
+        value = int(range_match.group(1))
+        if 1 <= value <= 15:
+            return value
+    year_match = re.search(r"(20\d{2})", question)
+    if year_match:
+        return 3
+    for keyword, n in _YEAR_HINT_KEYWORDS.items():
+        if keyword in question:
+            return n
+    return 5
+_FE_DISPLAY_ACCOUNTS = {
+    "BS": [
+        ("total_assets", "자산총계"),
+        ("current_assets", "유동자산"),
+        ("noncurrent_assets", "비유동자산"),
+        ("total_liabilities", "부채총계"),
+        ("current_liabilities", "유동부채"),
+        ("noncurrent_liabilities", "비유동부채"),
+        ("owners_of_parent_equity", "자본총계"),
+        ("cash_and_cash_equivalents", "현금성자산"),
+        ("trade_and_other_receivables", "매출채권"),
+        ("inventories", "재고자산"),
+        ("tangible_assets", "유형자산"),
+        ("intangible_assets", "무형자산"),
+        ("shortterm_borrowings", "단기차입금"),
+        ("longterm_borrowings", "장기차입금"),
+    ],
+    "IS": [
+        ("sales", "매출액"),
+        ("cost_of_sales", "매출원가"),
+        ("gross_profit", "매출총이익"),
+        ("selling_and_administrative_expenses", "판관비"),
+        ("operating_profit", "영업이익"),
+        ("finance_income", "금융수익"),
+        ("finance_costs", "금융비용"),
+        ("profit_before_tax", "법인세차감전이익"),
+        ("income_taxes", "법인세비용"),
+        ("net_profit", "당기순이익"),
+    ],
+    "CF": [
+        ("operating_cashflow", "영업활동CF"),
+        ("investing_cashflow", "투자활동CF"),
+        ("cash_flows_from_financing_activities", "재무활동CF"),
+        ("cash_and_cash_equivalents_end", "기말현금"),
+    ],
+}
+# 한글 라벨 → snakeId 역매핑 (Phase 5 validation용)
+ACCOUNT_LABEL_TO_SNAKE: dict[str, str] = {}
+for _sj_accounts in _FE_DISPLAY_ACCOUNTS.values():
+    for _snake_id, _label in _sj_accounts:
+        ACCOUNT_LABEL_TO_SNAKE[_label] = _snake_id
+_QUESTION_ACCOUNT_FILTER: dict[str, dict[str, set[str]]] = {
+    "건전성": {
+        "BS": {
+            "total_assets",
+            "total_liabilities",
+            "owners_of_parent_equity",
+            "current_assets",
+            "current_liabilities",
+            "cash_and_cash_equivalents",
+            "shortterm_borrowings",
+            "longterm_borrowings",
+        },
+        "IS": {"operating_profit", "finance_costs", "net_profit"},
+        "CF": {"operating_cashflow", "investing_cashflow"},
+    },
+    "수익성": {
+        "IS": {
+            "sales",
+            "cost_of_sales",
+            "gross_profit",
+            "selling_and_administrative_expenses",
+            "operating_profit",
+            "net_profit",
+        },
+        "BS": {"owners_of_parent_equity", "total_assets"},
+    },
+    "성장성": {
+        "IS": {"sales", "operating_profit", "net_profit"},
+        "CF": {"operating_cashflow"},
+    },
+    "배당": {
+        "IS": {"net_profit"},
+        "BS": {"owners_of_parent_equity"},
+    },
+    "현금": {
+        "CF": {
+            "operating_cashflow",
+            "investing_cashflow",
+            "cash_flows_from_financing_activities",
+            "cash_and_cash_equivalents_end",
+        },
+        "BS": {"cash_and_cash_equivalents"},
+    },
+}
+def _get_quarter_counts(company: Any) -> dict[str, int]:
+    """company.timeseries periods에서 연도별 분기 수 계산."""
+    ts = getattr(company, "timeseries", None)
+    if ts is None:
+        return {}
+    _, periods = ts
+    counts: dict[str, int] = {}
+    for p in periods:
+        year = p.split("-")[0] if "-" in p else p[:4]
+        counts[year] = counts.get(year, 0) + 1
+    return counts
+def _build_finance_engine_section(
+    series: dict,
+    years: list[str],
+    sj_div: str,
+    n_years: int,
+    account_filter: set[str] | None = None,
+    quarter_counts: dict[str, int] | None = None,
+) -> str | None:
+    """financeEngine annual series → compact 마크다운 테이블.
+    Args:
+            account_filter: 이 set에 속한 snake_id만 표시. None이면 전체.
+    """
+    accounts = _FE_DISPLAY_ACCOUNTS.get(sj_div, [])
+    if account_filter:
+        accounts = [(sid, label) for sid, label in accounts if sid in account_filter]
+    if not accounts:
+        return None
+    display_years = years[-n_years:]
+    # 부분 연도 표시: IS/CF는 4분기 미만이면 "(~Q3)" 등 표시, BS는 시점잔액이므로 불필요
+    display_years_labeled = []
+    for y in display_years:
+        qc = (quarter_counts or {}).get(y, 4)
+        if sj_div != "BS" and qc < 4:
+            display_years_labeled.append(f"{y}(~Q{qc})")
+        else:
+            display_years_labeled.append(y)
+    display_years_reversed = list(reversed(display_years_labeled))
+    # 최신 연도가 부분이면 YoY 비교 무의미
+    latest_year = display_years[-1]
+    latest_partial = sj_div != "BS" and (quarter_counts or {}).get(latest_year, 4) < 4
+    sj_data = series.get(sj_div, {})
+    if not sj_data:
+        return None
+    rows_data = []
+    for snake_id, label in accounts:
+        vals = sj_data.get(snake_id)
+        if not vals:
+            continue
+        year_offset = len(years) - n_years
+        sliced = vals[year_offset:] if year_offset >= 0 else vals
+        has_data = any(v is not None for v in sliced)
+        if has_data:
+            rows_data.append((label, list(reversed(sliced))))
+    if not rows_data:
+        return None
+    sj_labels = {"BS": "재무상태표", "IS": "손익계산서", "CF": "현금흐름표"}
+    header = "| 계정 | " + " | ".join(display_years_reversed) + " | YoY |"
+    sep = "| --- | " + " | ".join(["---"] * len(display_years_reversed)) + " | --- |"
+    # 기간 메타데이터 명시
+    sj_meta = {"BS": "시점 잔액", "IS": "기간 flow (standalone)", "CF": "기간 flow (standalone)"}
+    meta_line = f"(단위: 억/조원 | {sj_meta.get(sj_div, 'standalone')})"
+    if latest_partial:
+        meta_line += f" ⚠️ {display_years_labeled[-1]}은 부분연도 — 연간 직접 비교 불가"
+    lines = [f"## {sj_labels.get(sj_div, sj_div)}", meta_line, header, sep]
+    for label, vals in rows_data:
+        cells = []
+        for v in vals:
+            cells.append(_format_won(v) if v is not None else "-")
+        # YoY: 부분 연도면 비교 불가
+        if latest_partial:
+            yoy_str = "-"
+        else:
+            yoy_str = _calc_yoy(vals[0], vals[1] if len(vals) > 1 else None)
+        lines.append(f"| {label} | " + " | ".join(cells) + f" | {yoy_str} |")
+    return "\n".join(lines)
+def _buildQuarterlySection(
+    series: dict,
+    periods: list[str],
+    sjDiv: str,
+    nQuarters: int = 8,
+    accountFilter: set[str] | None = None,
+) -> str | None:
+    """timeseries 분기별 standalone → compact 마크다운 테이블.
+    최근 nQuarters 분기만 표시. QoQ/YoY 컬럼 포함.
+    """
+    accounts = _FE_DISPLAY_ACCOUNTS.get(sjDiv, [])
+    if accountFilter:
+        accounts = [(sid, label) for sid, label in accounts if sid in accountFilter]
+    if not accounts:
+        return None
+    sjData = series.get(sjDiv, {})
+    if not sjData:
+        return None
+    displayPeriods = periods[-nQuarters:]
+    displayPeriodsReversed = list(reversed(displayPeriods))
+    rowsData = []
+    for snakeId, label in accounts:
+        vals = sjData.get(snakeId)
+        if not vals:
+            continue
+        offset = len(periods) - nQuarters
+        sliced = vals[offset:] if offset >= 0 else vals
+        hasData = any(v is not None for v in sliced)
+        if hasData:
+            rowsData.append((label, list(reversed(sliced))))
+    if not rowsData:
+        return None
+    sjLabels = {"BS": "재무상태표(분기)", "IS": "손익계산서(분기)", "CF": "현금흐름표(분기)"}
+    sjMeta = {"BS": "시점 잔액", "IS": "분기 standalone", "CF": "분기 standalone"}
+    header = "| 계정 | " + " | ".join(displayPeriodsReversed) + " | QoQ | YoY |"
+    sep = "| --- | " + " | ".join(["---"] * len(displayPeriodsReversed)) + " | --- | --- |"
+    metaLine = f"(단위: 억/조원 | {sjMeta.get(sjDiv, 'standalone')})"
+    lines = [f"## {sjLabels.get(sjDiv, sjDiv)}", metaLine, header, sep]
+    for label, vals in rowsData:
+        cells = [_format_won(v) if v is not None else "-" for v in vals]
+        qoq = _calc_yoy(vals[0], vals[1] if len(vals) > 1 else None)
+        yoyIdx = 4 if len(vals) > 4 else None
+        yoy = _calc_yoy(vals[0], vals[yoyIdx] if yoyIdx is not None else None)
+        lines.append(f"| {label} | " + " | ".join(cells) + f" | {qoq} | {yoy} |")
+    return "\n".join(lines)
+def _calc_yoy(current: float | None, previous: float | None) -> str:
+    """YoY 증감률 계산. 부호 전환 시 '-', |변동률|>50%면 ** 강조."""
+    from dartlab.core.finance.ratios import yoy_pct
+    pct = yoy_pct(current, previous)
+    if pct is None:
+        return "-"
+    sign = "+" if pct >= 0 else ""
+    marker = "**" if abs(pct) > 50 else ""
+    return f"{marker}{sign}{pct:.1f}%{marker}"
+def _build_ratios_section(
+    company: Any,
+    compact: bool = False,
+    q_types: list[str] | None = None,
+) -> str | None:
+    """financeEngine RatioResult → 마크다운 (질문 유형별 필터링).
+    q_types가 주어지면 관련 비율 그룹만 노출하여 토큰 절약.
+    None이면 전체 노출.
+    """
+    ratios = get_headline_ratios(company)
+    if ratios is None:
+        return None
+    if not hasattr(ratios, "roe"):
+        return None
+    isFinancial = False
+    sectorInfo = getattr(company, "sector", None)
+    if sectorInfo is not None:
+        try:
+            from dartlab.analysis.comparative.sector.types import Sector
+            isFinancial = sectorInfo.sector == Sector.FINANCIALS
+        except (ImportError, AttributeError):
+            isFinancial = False
+    # ── 판단 헬퍼 ──
+    def _judge(val: float | None, good: float, caution: float) -> str:
+        if val is None:
+            return "-"
+        return "양호" if val >= good else ("주의" if val >= caution else "위험")
+    def _judge_inv(val: float | None, good: float, caution: float) -> str:
+        if val is None:
+            return "-"
+        return "양호" if val <= good else ("주의" if val <= caution else "위험")
+    # ── 질문 유형 → 노출 그룹 매핑 ──
+    _Q_TYPE_TO_GROUPS: dict[str, list[str]] = {
+        "건전성": ["수익성_core", "안정성", "현금흐름", "복합"],
+        "수익성": ["수익성", "효율성", "복합"],
+        "성장성": ["수익성_core", "성장"],
+        "배당": ["수익성_core", "현금흐름"],
+        "리스크": ["안정성", "현금흐름", "복합"],
+        "투자": ["수익성_core", "성장", "현금흐름"],
+        "종합": ["수익성", "안정성", "성장", "효율성", "현금흐름", "복합"],
+    }
+    active_groups: set[str] = set()
+    if q_types:
+        for qt in q_types:
+            active_groups.update(_Q_TYPE_TO_GROUPS.get(qt, []))
+    if not active_groups:
+        active_groups = {"수익성", "안정성", "성장", "효율성", "현금흐름", "복합"}
+    # "수익성_core"는 수익성의 핵심만 (ROE, ROA, 영업이익률, 순이익률)
+    show_profitability_full = "수익성" in active_groups
+    show_profitability_core = show_profitability_full or "수익성_core" in active_groups
+    roeGood, roeCaution = (8, 5) if isFinancial else (10, 5)
+    roaGood, roaCaution = (0.5, 0.2) if isFinancial else (5, 2)
+    lines = ["## 핵심 재무비율 (자동계산)"]
+    # ── 수익성 ──
+    if show_profitability_core:
+        prof_rows: list[str] = []
+        if ratios.roe is not None:
+            prof_rows.append(f"| ROE | {ratios.roe:.1f}% | {_judge(ratios.roe, roeGood, roeCaution)} |")
+        if ratios.roa is not None:
+            prof_rows.append(f"| ROA | {ratios.roa:.1f}% | {_judge(ratios.roa, roaGood, roaCaution)} |")
+        if ratios.operatingMargin is not None:
+            prof_rows.append(f"| 영업이익률 | {ratios.operatingMargin:.1f}% | - |")
+        if not compact and ratios.netMargin is not None:
+            prof_rows.append(f"| 순이익률 | {ratios.netMargin:.1f}% | - |")
+        if show_profitability_full:
+            if ratios.grossMargin is not None:
+                prof_rows.append(f"| 매출총이익률 | {ratios.grossMargin:.1f}% | - |")
+            if ratios.ebitdaMargin is not None:
+                prof_rows.append(f"| EBITDA마진 | {ratios.ebitdaMargin:.1f}% | - |")
+            if not compact and ratios.roic is not None:
+                prof_rows.append(f"| ROIC | {ratios.roic:.1f}% | {_judge(ratios.roic, 15, 8)} |")
+        if prof_rows:
+            lines.append("\n### 수익성")
+            lines.append("| 지표 | 값 | 판단 |")
+            lines.append("| --- | --- | --- |")
+            lines.extend(prof_rows)
+    # ── 안정성 ──
+    if "안정성" in active_groups:
+        stab_rows: list[str] = []
+        if ratios.debtRatio is not None:
+            stab_rows.append(f"| 부채비율 | {ratios.debtRatio:.1f}% | {_judge_inv(ratios.debtRatio, 100, 200)} |")
+        if ratios.currentRatio is not None:
+            stab_rows.append(f"| 유동비율 | {ratios.currentRatio:.1f}% | {_judge(ratios.currentRatio, 150, 100)} |")
+        if not compact and ratios.quickRatio is not None:
+            stab_rows.append(f"| 당좌비율 | {ratios.quickRatio:.1f}% | {_judge(ratios.quickRatio, 100, 50)} |")
+        if not compact and ratios.equityRatio is not None:
+            stab_rows.append(f"| 자기자본비율 | {ratios.equityRatio:.1f}% | {_judge(ratios.equityRatio, 50, 30)} |")
+        if ratios.interestCoverage is not None:
+            stab_rows.append(
+                f"| 이자보상배율 | {ratios.interestCoverage:.1f}x | {_judge(ratios.interestCoverage, 5, 1)} |"
+            )
+        if not compact and ratios.debtToEbitda is not None:
+            stab_rows.append(f"| Debt/EBITDA | {ratios.debtToEbitda:.1f}x | {_judge_inv(ratios.debtToEbitda, 3, 5)} |")
+        if not compact and ratios.netDebt is not None:
+            stab_rows.append(
+                f"| 순차입금 | {_format_won(ratios.netDebt)} | {'양호' if ratios.netDebt <= 0 else '주의'} |"
+            )
+        if not compact and ratios.netDebtRatio is not None:
+            stab_rows.append(
+                f"| 순차입금비율 | {ratios.netDebtRatio:.1f}% | {_judge_inv(ratios.netDebtRatio, 30, 80)} |"
+            )
+        if stab_rows:
+            lines.append("\n### 안정성")
+            lines.append("| 지표 | 값 | 판단 |")
+            lines.append("| --- | --- | --- |")
+            lines.extend(stab_rows)
+    # ── 성장성 ──
+    if "성장" in active_groups:
+        grow_rows: list[str] = []
+        if ratios.revenueGrowth is not None:
+            grow_rows.append(f"| 매출성장률(YoY) | {ratios.revenueGrowth:.1f}% | - |")
+        if ratios.operatingProfitGrowth is not None:
+            grow_rows.append(f"| 영업이익성장률 | {ratios.operatingProfitGrowth:.1f}% | - |")
+        if ratios.netProfitGrowth is not None:
+            grow_rows.append(f"| 순이익성장률 | {ratios.netProfitGrowth:.1f}% | - |")
+        if ratios.revenueGrowth3Y is not None:
+            grow_rows.append(f"| 매출 3Y CAGR | {ratios.revenueGrowth3Y:.1f}% | - |")
+        if not compact and ratios.assetGrowth is not None:
+            grow_rows.append(f"| 자산성장률 | {ratios.assetGrowth:.1f}% | - |")
+        if grow_rows:
+            lines.append("\n### 성장성")
+            lines.append("| 지표 | 값 | 판단 |")
+            lines.append("| --- | --- | --- |")
+            lines.extend(grow_rows)
+    # ── 효율성 ──
+    if "효율성" in active_groups and not compact:
+        eff_rows: list[str] = []
+        if ratios.totalAssetTurnover is not None:
+            eff_rows.append(f"| 총자산회전율 | {ratios.totalAssetTurnover:.2f}x | - |")
+        if ratios.inventoryTurnover is not None:
+            eff_rows.append(f"| 재고자산회전율 | {ratios.inventoryTurnover:.1f}x | - |")
+        if ratios.receivablesTurnover is not None:
+            eff_rows.append(f"| 매출채권회전율 | {ratios.receivablesTurnover:.1f}x | - |")
+        if eff_rows:
+            lines.append("\n### 효율성")
+            lines.append("| 지표 | 값 | 판단 |")
+            lines.append("| --- | --- | --- |")
+            lines.extend(eff_rows)
+    # ── 현금흐름 ──
+    if "현금흐름" in active_groups:
+        cf_rows: list[str] = []
+        if ratios.fcf is not None:
+            cf_rows.append(f"| FCF | {_format_won(ratios.fcf)} | {'양호' if ratios.fcf > 0 else '주의'} |")
+        if ratios.operatingCfToNetIncome is not None:
+            quality = _judge(ratios.operatingCfToNetIncome, 100, 50)
+            cf_rows.append(f"| 영업CF/순이익 | {ratios.operatingCfToNetIncome:.0f}% | {quality} |")
+        if not compact and ratios.capexRatio is not None:
+            cf_rows.append(f"| CAPEX비율 | {ratios.capexRatio:.1f}% | - |")
+        if not compact and ratios.dividendPayoutRatio is not None:
+            cf_rows.append(f"| 배당성향 | {ratios.dividendPayoutRatio:.1f}% | - |")
+        if cf_rows:
+            lines.append("\n### 현금흐름")
+            lines.append("| 지표 | 값 | 판단 |")
+            lines.append("| --- | --- | --- |")
+            lines.extend(cf_rows)
+    # ── 복합 지표 ──
+    if "복합" in active_groups and not compact:
+        comp_lines: list[str] = []
+        # DuPont 분해
+        dm = getattr(ratios, "dupontMargin", None)
+        dt = getattr(ratios, "dupontTurnover", None)
+        dl = getattr(ratios, "dupontLeverage", None)
+        if dm is not None and dt is not None and dl is not None and ratios.roe is not None:
+            # 주요 동인 판별
+            if dm >= dt and dm >= dl:
+                driver = "수익성 주도형"
+            elif dt >= dm and dt >= dl:
+                driver = "효율성 주도형"
+            else:
+                driver = "레버리지 주도형"
+            comp_lines.append("\n### DuPont 분해")
+            comp_lines.append(
+                f"ROE {ratios.roe:.1f}% = 순이익률({dm:.1f}%) × 자산회전율({dt:.2f}x) × 레버리지({dl:.2f}x)"
+            )
+            comp_lines.append(f"→ **{driver}**")
+        # Piotroski F-Score
+        pf = getattr(ratios, "piotroskiFScore", None)
+        if pf is not None:
+            pf_label = "우수" if pf >= 7 else ("보통" if pf >= 4 else "취약")
+            comp_lines.append("\n### 복합 재무 지표")
+            comp_lines.append(f"- **Piotroski F-Score**: {pf}/9 ({pf_label}) — ≥7 우수, 4-6 보통, <4 취약")
+        # Altman Z-Score
+        az = getattr(ratios, "altmanZScore", None)
+        if az is not None:
+            az_label = "안전" if az > 2.99 else ("회색" if az >= 1.81 else "부실위험")
+            if pf is None:
+                comp_lines.append("\n### 복합 재무 지표")
+            comp_lines.append(f"- **Altman Z-Score**: {az:.2f} ({az_label}) — >2.99 안전, 1.81-2.99 회색, <1.81 부실")
+        # ROIC
+        if ratios.roic is not None:
+            roic_label = "우수" if ratios.roic >= 15 else ("적정" if ratios.roic >= 8 else "미흡")
+            comp_lines.append(f"- **ROIC**: {ratios.roic:.1f}% ({roic_label})")
+        # 이익의 질 — CCC
+        ccc = getattr(ratios, "ccc", None)
+        dso = getattr(ratios, "dso", None)
+        dio = getattr(ratios, "dio", None)
+        dpo = getattr(ratios, "dpo", None)
+        cfni = ratios.operatingCfToNetIncome
+        has_quality = ccc is not None or cfni is not None
+        if has_quality:
+            comp_lines.append("\n### 이익의 질")
+            if cfni is not None:
+                q = "양호" if cfni >= 100 else ("보통" if cfni >= 50 else "주의")
+                comp_lines.append(f"- 영업CF/순이익: {cfni:.0f}% ({q}) — ≥100% 양호")
+            if ccc is not None:
+                ccc_parts = []
+                if dso is not None:
+                    ccc_parts.append(f"DSO:{dso:.0f}")
+                if dio is not None:
+                    ccc_parts.append(f"DIO:{dio:.0f}")
+                if dpo is not None:
+                    ccc_parts.append(f"DPO:{dpo:.0f}")
+                detail = f" ({' + '.join(ccc_parts)})" if ccc_parts else ""
+                comp_lines.append(f"- CCC(현금전환주기): {ccc:.0f}일{detail}")
+        if comp_lines:
+            lines.extend(comp_lines)
+    # ── ratioSeries 3년 추세 ──
+    ratio_series = get_ratio_series(company)
+    if ratio_series is not None and hasattr(ratio_series, "roe") and ratio_series.roe:
+        trend_keys = [("roe", "ROE"), ("operatingMargin", "영업이익률"), ("debtRatio", "부채비율")]
+        if not compact and "성장" in active_groups:
+            trend_keys.append(("revenueGrowth", "매출성장률"))
+        trend_lines: list[str] = []
+        for key, label in trend_keys:
+            series_vals = getattr(ratio_series, key, None)
+            if series_vals and len(series_vals) >= 2:
+                recent = [f"{v:.1f}%" for v in series_vals[-3:] if v is not None]
+                if recent:
+                    arrow = (
+                        "↗" if series_vals[-1] > series_vals[-2] else "↘" if series_vals[-1] < series_vals[-2] else "→"
+                    )
+                    trend_lines.append(f"- {label}: {' → '.join(recent)} {arrow}")
+        if trend_lines:
+            lines.append("")
+            lines.append("### 추세 (최근 3년)")
+            lines.extend(trend_lines)
+    # ── TTM ──
+    ttm_lines: list[str] = []
+    if ratios.revenueTTM is not None:
+        ttm_lines.append(f"- TTM 매출: {_format_won(ratios.revenueTTM)}")
+    if ratios.operatingIncomeTTM is not None:
+        ttm_lines.append(f"- TTM 영업이익: {_format_won(ratios.operatingIncomeTTM)}")
+    if ratios.netIncomeTTM is not None:
+        ttm_lines.append(f"- TTM 순이익: {_format_won(ratios.netIncomeTTM)}")
+    if ttm_lines:
+        lines.append("")
+        lines.append("### TTM (최근 4분기 합산)")
+        lines.extend(ttm_lines)
+    # ── 경고 ──
+    if ratios.warnings:
+        lines.append("")
+        lines.append("### 경고")
+        max_warnings = 2 if compact else len(ratios.warnings)
+        for w in ratios.warnings[:max_warnings]:
+            lines.append(f"- ⚠️ {w}")
+    return "\n".join(lines)
+def detect_year_range(company: Any, tables: list[str]) -> dict | None:
+    """포함될 데이터의 연도 범위 감지."""
+    all_years: set[int] = set()
+    for name in tables:
+        try:
+            data = getattr(company, name, None)
+            if data is None:
+                continue
+            if isinstance(data, pl.DataFrame):
+                if "year" in data.columns:
+                    years = data["year"].unique().to_list()
+                    all_years.update(int(y) for y in years if y)
+                else:
+                    year_cols = [c for c in data.columns if c.isdigit() and len(c) == 4]
+                    all_years.update(int(c) for c in year_cols)
+        except _CONTEXT_ERRORS:
+            continue
+    if not all_years:
+        return None
+    sorted_years = sorted(all_years)
+    return {"min_year": sorted_years[0], "max_year": sorted_years[-1]}
+def scan_available_modules(company: Any) -> list[dict[str, str]]:
+    """Company 인스턴스에서 실제 데이터가 있는 모듈 목록을 반환.
+    Returns:
+            [{"name": "BS", "label": "재무상태표", "type": "DataFrame", "rows": 25}, ...]
+    """
+    available = []
+    for name, meta in MODULE_META.items():
+        try:
+            data = getattr(company, name, None)
+            if data is None:
+                continue
+            # method인 경우 건너뜀 (fsSummary 등은 호출 비용이 큼)
+            if callable(data) and not isinstance(data, type):
+                info: dict[str, Any] = {"name": name, "label": meta.label, "type": "method"}
+                available.append(info)
+                continue
+            if isinstance(data, pl.DataFrame):
+                info = {
+                    "name": name,
+                    "label": meta.label,
+                    "type": "table",
+                    "rows": data.height,
+                    "cols": len(data.columns),
+                }
+            elif isinstance(data, dict):
+                info = {"name": name, "label": meta.label, "type": "dict", "rows": len(data)}
+            elif isinstance(data, list):
+                info = {"name": name, "label": meta.label, "type": "list", "rows": len(data)}
+            else:
+                info = {"name": name, "label": meta.label, "type": "text"}
+            available.append(info)
+        except _CONTEXT_ERRORS:
+            continue
+    return available

src/dartlab/ai/context/formatting.py ADDED Viewed

	@@ -0,0 +1,439 @@

+"""포맷팅·유틸리티 함수 — builder.py에서 분리.
+원 단위 변환, DataFrame→마크다운, 파생 지표 자동계산 등
+builder / finance_context 양쪽에서 재사용하는 순수 함수 모음.
+"""
+from __future__ import annotations
+from typing import Any
+import polars as pl
+from dartlab.ai.metadata import ModuleMeta
+_CONTEXT_ERRORS = (AttributeError, KeyError, OSError, RuntimeError, TypeError, ValueError)
+# ── 핵심 계정 필터용 상수 ──
+_KEY_ACCOUNTS_BS = {
+    "자산총계",
+    "유동자산",
+    "비유동자산",
+    "부채총계",
+    "유동부채",
+    "비유동부채",
+    "자본총계",
+    "지배기업소유주지분",
+    "현금및현금성자산",
+    "매출채권",
+    "재고자산",
+    "유형자산",
+    "무형자산",
+    "투자부동산",
+    "단기차입금",
+    "장기차입금",
+    "사채",
+}
+_KEY_ACCOUNTS_IS = {
+    "매출액",
+    "매출원가",
+    "매출총이익",
+    "판매비와관리비",
+    "영업이익",
+    "영업손실",
+    "금융수익",
+    "금융비용",
+    "이자비용",
+    "이자수익",
+    "법인세비용차감전순이익",
+    "법인세비용",
+    "당기순이익",
+    "당기순손실",
+    "지배기업소유주지분순이익",
+}
+_KEY_ACCOUNTS_CF = {
+    "영업활동현금흐름",
+    "영업활동으로인한현금흐름",
+    "투자활동현금흐름",
+    "투자활동으로인한현금흐름",
+    "재무활동현금흐름",
+    "재무활동으로인한현금흐름",
+    "현금및현금성자산의순증가",
+    "현금및현금성자산의증감",
+    "기초현금및현금성자산",
+    "기말현금및현금성자산",
+}
+_KEY_ACCOUNTS_MAP = {
+    "BS": _KEY_ACCOUNTS_BS,
+    "IS": _KEY_ACCOUNTS_IS,
+    "CF": _KEY_ACCOUNTS_CF,
+}
+# ══════════════════════════════════════
+# 숫자 포맷팅
+# ══════════════════════════════════════
+def _format_won(val: float) -> str:
+    """원 단위 숫자를 읽기 좋은 한국어 단위로 변환."""
+    abs_val = abs(val)
+    sign = "-" if val < 0 else ""
+    if abs_val >= 1e12:
+        return f"{sign}{abs_val / 1e12:,.1f}조"
+    if abs_val >= 1e8:
+        return f"{sign}{abs_val / 1e8:,.0f}억"
+    if abs_val >= 1e4:
+        return f"{sign}{abs_val / 1e4:,.0f}만"
+    if abs_val >= 1:
+        return f"{sign}{abs_val:,.0f}"
+    return "0"
+def _format_krw(val: float) -> str:
+    """백만원 단위 숫자를 읽기 좋은 한국어 단위로 변환."""
+    abs_val = abs(val)
+    sign = "-" if val < 0 else ""
+    if abs_val >= 1_000_000:
+        return f"{sign}{abs_val / 1_000_000:,.1f}조"
+    if abs_val >= 10_000:
+        return f"{sign}{abs_val / 10_000:,.0f}억"
+    if abs_val >= 1:
+        return f"{sign}{abs_val:,.0f}"
+    if abs_val > 0:
+        return f"{sign}{abs_val:.4f}"
+    return "0"
+def _format_usd(val: float) -> str:
+    """USD 숫자를 읽기 좋은 영문 단위로 변환."""
+    abs_val = abs(val)
+    sign = "-" if val < 0 else ""
+    if abs_val >= 1e12:
+        return f"{sign}${abs_val / 1e12:,.1f}T"
+    if abs_val >= 1e9:
+        return f"{sign}${abs_val / 1e9:,.1f}B"
+    if abs_val >= 1e6:
+        return f"{sign}${abs_val / 1e6:,.0f}M"
+    if abs_val >= 1e3:
+        return f"{sign}${abs_val / 1e3:,.0f}K"
+    if abs_val >= 1:
+        return f"{sign}${abs_val:,.0f}"
+    return "$0"
+# ══════════════════════════════════════
+# 계정 필터
+# ══════════════════════════════════════
+def _filter_key_accounts(df: pl.DataFrame, module_name: str) -> pl.DataFrame:
+    """재무제표에서 핵심 계정만 필터링."""
+    if "계정명" not in df.columns or module_name not in _KEY_ACCOUNTS_MAP:
+        return df
+    key_set = _KEY_ACCOUNTS_MAP[module_name]
+    mask = pl.lit(False)
+    for keyword in key_set:
+        mask = mask | pl.col("계정명").str.contains(keyword)
+    filtered = df.filter(mask)
+    if filtered.height < 5:
+        return df
+    return filtered
+# ══════════════════════════════════════
+# 업종명 추출
+# ══════════════════════════════════════
+def _get_sector(company: Any) -> str | None:
+    """Company에서 업종명 추출."""
+    try:
+        overview = getattr(company, "companyOverview", None)
+        if isinstance(overview, dict):
+            sector = overview.get("indutyName") or overview.get("sector")
+            if sector:
+                return sector
+        detail = getattr(company, "companyOverviewDetail", None)
+        if isinstance(detail, dict):
+            sector = detail.get("sector") or detail.get("indutyName")
+            if sector:
+                return sector
+    except _CONTEXT_ERRORS:
+        pass
+    return None
+# ══════════════════════════════════════
+# DataFrame → 마크다운 변환
+# ══════════════════════════════════════
+def df_to_markdown(
+    df: pl.DataFrame,
+    max_rows: int = 30,
+    meta: ModuleMeta | None = None,
+    compact: bool = False,
+    market: str = "KR",
+) -> str:
+    """Polars DataFrame → 메타데이터 주석 포함 Markdown 테이블.
+    Args:
+            compact: True면 숫자를 억/조 단위로 변환 (LLM 컨텍스트용).
+            market: "KR"이면 한글 라벨, "US"면 영문 라벨.
+    """
+    if df is None or df.height == 0:
+        return "(데이터 없음)"
+    # account 컬럼의 snakeId → 한글/영문 라벨 자동 변환
+    if "account" in df.columns:
+        try:
+            from dartlab.core.finance.labels import get_account_labels
+            locale = "kr" if market == "KR" else "en"
+            _labels = get_account_labels(locale)
+            df = df.with_columns(pl.col("account").replace(_labels).alias("account"))
+        except ImportError:
+            pass
+    effective_max = meta.maxRows if meta else max_rows
+    if compact:
+        effective_max = min(effective_max, 20)
+    if "year" in df.columns:
+        df = df.sort("year", descending=True)
+    if df.height > effective_max:
+        display_df = df.head(effective_max)
+        truncated = True
+    else:
+        display_df = df
+        truncated = False
+    parts = []
+    is_krw = not meta or meta.unit in ("백만원", "")
+    if meta and meta.unit and meta.unit != "백만원":
+        parts.append(f"(단위: {meta.unit})")
+    elif compact and is_krw:
+        parts.append("(단위: 억/조원, 원본 백만원)")
+    if not compact and meta and meta.columns:
+        col_map = {c.name: c for c in meta.columns}
+        described = []
+        for col in display_df.columns:
+            if col in col_map:
+                c = col_map[col]
+                desc = f"`{col}`: {c.description}"
+                if c.unit:
+                    desc += f" ({c.unit})"
+                described.append(desc)
+        if described:
+            parts.append(" | ".join(described))
+    cols = display_df.columns
+    if not compact and meta and meta.columns:
+        col_map = {c.name: c for c in meta.columns}
+        header_cells = []
+        for col in cols:
+            if col in col_map:
+                header_cells.append(f"{col} ({col_map[col].description})")
+            else:
+                header_cells.append(col)
+        header = "| " + " | ".join(header_cells) + " |"
+    else:
+        header = "| " + " | ".join(cols) + " |"
+    sep = "| " + " | ".join(["---"] * len(cols)) + " |"
+    rows = []
+    for row in display_df.iter_rows():
+        cells = []
+        for i, val in enumerate(row):
+            if val is None:
+                cells.append("-")
+            elif isinstance(val, (int, float)):
+                col_name = cols[i]
+                if compact and is_krw and col_name.isdigit() and len(col_name) == 4:
+                    cells.append(_format_krw(float(val)))
+                elif isinstance(val, float):
+                    if abs(val) >= 1:
+                        cells.append(f"{val:,.0f}")
+                    else:
+                        cells.append(f"{val:.4f}")
+                elif col_name == "year" or (isinstance(val, int) and 1900 <= val <= 2100):
+                    cells.append(str(val))
+                else:
+                    cells.append(f"{val:,}")
+            else:
+                cells.append(str(val))
+        rows.append("| " + " | ".join(cells) + " |")
+    parts.append("\n".join([header, sep] + rows))
+    if truncated:
+        parts.append(f"(상위 {effective_max}행 표시, 전체 {df.height}행)")
+    return "\n".join(parts)
+# ══════════════════════════════════════
+# 파생 지표 자동계산
+# ══════════════════════════════════════
+def _find_account_value(df: pl.DataFrame, keyword: str, year_col: str) -> float | None:
+    """계정명에서 키워드를 포함하는 행의 값 추출."""
+    if "계정명" not in df.columns or year_col not in df.columns:
+        return None
+    matched = df.filter(pl.col("계정명").str.contains(keyword))
+    if matched.height == 0:
+        return None
+    val = matched.row(0, named=True).get(year_col)
+    return val if isinstance(val, (int, float)) else None
+def _compute_derived_metrics(name: str, df: pl.DataFrame, company: Any = None) -> str | None:
+    """핵심 재무제표에서 YoY 성장률/비율 자동계산.
+    개선: ROE, 이��보상배율, FCF, EBITDA 등 추가.
+    """
+    if name not in ("BS", "IS", "CF") or df is None or df.height == 0:
+        return None
+    year_cols = sorted(
+        [c for c in df.columns if c.isdigit() and len(c) == 4],
+        reverse=True,
+    )
+    if len(year_cols) < 2:
+        return None
+    lines = []
+    if name == "IS":
+        targets = {
+            "매출액": "매출 성장률",
+            "영업이익": "영업이익 성장률",
+            "당기순이익": "순이익 성장률",
+        }
+        for acct, label in targets.items():
+            metrics = []
+            for i in range(min(len(year_cols) - 1, 3)):
+                cur = _find_account_value(df, acct, year_cols[i])
+                prev = _find_account_value(df, acct, year_cols[i + 1])
+                if cur is not None and prev is not None and prev != 0:
+                    yoy = (cur - prev) / abs(prev) * 100
+                    metrics.append(f"{year_cols[i]}/{year_cols[i + 1]}: {yoy:+.1f}%")
+            if metrics:
+                lines.append(f"- {label}: {', '.join(metrics)}")
+        # 영업이익률, 순이익률
+        latest = year_cols[0]
+        rev = _find_account_value(df, "매출액", latest)
+        oi = _find_account_value(df, "영업이익", latest)
+        ni = _find_account_value(df, "당기순이익", latest)
+        if rev and rev != 0:
+            if oi is not None:
+                lines.append(f"- {latest} 영업이익률: {oi / rev * 100:.1f}%")
+            if ni is not None:
+                lines.append(f"- {latest} 순이익률: {ni / rev * 100:.1f}%")
+        # 이자보상배율 (영업이익 / 이자비용)
+        interest = _find_account_value(df, "이자비용", latest)
+        if interest is None:
+            interest = _find_account_value(df, "금융비용", latest)
+        if oi is not None and interest is not None and interest != 0:
+            icr = oi / abs(interest)
+            lines.append(f"- {latest} 이자보상배율: {icr:.1f}x")
+        # ROE (순이익 / 자본총계) — BS가 있을 때
+        if company and ni is not None:
+            try:
+                bs = getattr(company, "BS", None)
+                if isinstance(bs, pl.DataFrame) and latest in bs.columns:
+                    equity = _find_account_value(bs, "자본총계", latest)
+                    if equity and equity != 0:
+                        roe = ni / equity * 100
+                        lines.append(f"- {latest} ROE: {roe:.1f}%")
+                    total_asset = _find_account_value(bs, "자산총계", latest)
+                    if total_asset and total_asset != 0:
+                        roa = ni / total_asset * 100
+                        lines.append(f"- {latest} ROA: {roa:.1f}%")
+            except _CONTEXT_ERRORS:
+                pass
+    elif name == "BS":
+        latest = year_cols[0]
+        debt = _find_account_value(df, "부채총계", latest)
+        equity = _find_account_value(df, "자본총계", latest)
+        ca = _find_account_value(df, "유동자산", latest)
+        cl = _find_account_value(df, "유동부채", latest)
+        ta = _find_account_value(df, "자산총계", latest)
+        if debt is not None and equity is not None and equity != 0:
+            lines.append(f"- {latest} 부채비율: {debt / equity * 100:.1f}%")
+        if ca is not None and cl is not None and cl != 0:
+            lines.append(f"- {latest} 유동비율: {ca / cl * 100:.1f}%")
+        if debt is not None and ta is not None and ta != 0:
+            lines.append(f"- {latest} 부채총계/자산총계: {debt / ta * 100:.1f}%")
+        # 총자산 증가율
+        for i in range(min(len(year_cols) - 1, 2)):
+            cur = _find_account_value(df, "자산총계", year_cols[i])
+            prev = _find_account_value(df, "자산총계", year_cols[i + 1])
+            if cur is not None and prev is not None and prev != 0:
+                yoy = (cur - prev) / abs(prev) * 100
+                lines.append(f"- 총자산 증가율 {year_cols[i]}/{year_cols[i + 1]}: {yoy:+.1f}%")
+    elif name == "CF":
+        latest = year_cols[0]
+        op_cf = _find_account_value(df, "영업활동", latest)
+        inv_cf = _find_account_value(df, "투자활동", latest)
+        fin_cf = _find_account_value(df, "재무활동", latest)
+        if op_cf is not None and inv_cf is not None:
+            fcf = op_cf + inv_cf
+            lines.append(f"- {latest} FCF(영업CF+투자CF): {_format_krw(fcf)}")
+        # CF 패턴 해석
+        if op_cf is not None and inv_cf is not None and fin_cf is not None:
+            pattern = f"{'+' if op_cf >= 0 else '-'}/{'+' if inv_cf >= 0 else '-'}/{'+' if fin_cf >= 0 else '-'}"
+            pattern_desc = _interpret_cf_pattern(op_cf >= 0, inv_cf >= 0, fin_cf >= 0)
+            lines.append(f"- {latest} CF 패턴(영업/투자/재무): {pattern} → {pattern_desc}")
+        for i in range(min(len(year_cols) - 1, 2)):
+            cur = _find_account_value(df, "영업활동", year_cols[i])
+            prev = _find_account_value(df, "영업활동", year_cols[i + 1])
+            if cur is not None and prev is not None and prev != 0:
+                yoy = (cur - prev) / abs(prev) * 100
+                lines.append(f"- 영업활동CF 변동 {year_cols[i]}/{year_cols[i + 1]}: {yoy:+.1f}%")
+    if not lines:
+        return None
+    return "### 주요 지표 (자동계산)\n" + "\n".join(lines)
+def _interpret_cf_pattern(op_pos: bool, inv_pos: bool, fin_pos: bool) -> str:
+    """현금흐름 패턴 해석."""
+    if op_pos and not inv_pos and not fin_pos:
+        return "우량 기업형 (영업이익으로 투자+상환)"
+    if op_pos and not inv_pos and fin_pos:
+        return "성장 투자형 (영업+차입으로 적극 투자)"
+    if op_pos and inv_pos and not fin_pos:
+        return "구조조정형 (자산 매각+부채 상환)"
+    if not op_pos and not inv_pos and fin_pos:
+        return "위험 신호 (영업적자인데 차입으로 투자)"
+    if not op_pos and inv_pos and fin_pos:
+        return "위기 관리형 (자산 매각+차입으로 영업 보전)"
+    if not op_pos and inv_pos and not fin_pos:
+        return "축소형 (자산 매각으로 부채 상환)"
+    return "기타 패턴"

src/dartlab/ai/context/snapshot.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""핵심 수치 스냅샷 빌드 — server 의존성 없는 순수 로직.
+server/chat.py의 build_snapshot()에서 추출.
+"""
+from __future__ import annotations
+from typing import Any
+from dartlab.ai.context.company_adapter import get_headline_ratios
+def _fmt(val: float | int | None, suffix: str = "") -> str | None:
+    if val is None:
+        return None
+    abs_v = abs(val)
+    sign = "-" if val < 0 else ""
+    if abs_v >= 1e12:
+        return f"{sign}{abs_v / 1e12:,.1f}조{suffix}"
+    if abs_v >= 1e8:
+        return f"{sign}{abs_v / 1e8:,.0f}억{suffix}"
+    if abs_v >= 1e4:
+        return f"{sign}{abs_v / 1e4:,.0f}만{suffix}"
+    if abs_v >= 1:
+        return f"{sign}{abs_v:,.0f}{suffix}"
+    return f"0{suffix}"
+def _pct(val: float | None) -> str | None:
+    return f"{val:.1f}%" if val is not None else None
+def _judge_pct(val: float | None, good: float, caution: float) -> str | None:
+    if val is None:
+        return None
+    if val >= good:
+        return "good"
+    if val >= caution:
+        return "caution"
+    return "danger"
+def _judge_pct_inv(val: float | None, good: float, caution: float) -> str | None:
+    if val is None:
+        return None
+    if val <= good:
+        return "good"
+    if val <= caution:
+        return "caution"
+    return "danger"
+def build_snapshot(company: Any, *, includeInsights: bool = True) -> dict | None:
+    """ratios + 핵심 시계열에서 즉시 표시할 스냅샷 데이터 추출."""
+    ratios = get_headline_ratios(company)
+    if ratios is None:
+        return None
+    if not hasattr(ratios, "revenueTTM"):
+        return None
+    isFinancial = False
+    sectorInfo = getattr(company, "sector", None)
+    if sectorInfo is not None:
+        try:
+            from dartlab.analysis.comparative.sector.types import Sector
+            isFinancial = sectorInfo.sector == Sector.FINANCIALS
+        except (ImportError, AttributeError):
+            isFinancial = False
+    items: list[dict[str, Any]] = []
+    roeGood, roeCaution = (8, 5) if isFinancial else (10, 5)
+    roaGood, roaCaution = (0.5, 0.2) if isFinancial else (5, 2)
+    if ratios.revenueTTM is not None:
+        items.append({"label": "매출(TTM)", "value": _fmt(ratios.revenueTTM), "status": None})
+    if ratios.operatingIncomeTTM is not None:
+        items.append(
+            {
+                "label": "영업이익(TTM)",
+                "value": _fmt(ratios.operatingIncomeTTM),
+                "status": "good" if ratios.operatingIncomeTTM > 0 else "danger",
+            }
+        )
+    if ratios.netIncomeTTM is not None:
+        items.append(
+            {
+                "label": "순이익(TTM)",
+                "value": _fmt(ratios.netIncomeTTM),
+                "status": "good" if ratios.netIncomeTTM > 0 else "danger",
+            }
+        )
+    if ratios.operatingMargin is not None:
+        items.append(
+            {
+                "label": "영업이익률",
+                "value": _pct(ratios.operatingMargin),
+                "status": _judge_pct(ratios.operatingMargin, 10, 5),
+            }
+        )
+    if ratios.roe is not None:
+        items.append({"label": "ROE", "value": _pct(ratios.roe), "status": _judge_pct(ratios.roe, roeGood, roeCaution)})
+    if ratios.roa is not None:
+        items.append({"label": "ROA", "value": _pct(ratios.roa), "status": _judge_pct(ratios.roa, roaGood, roaCaution)})
+    if ratios.debtRatio is not None:
+        items.append(
+            {
+                "label": "부채비율",
+                "value": _pct(ratios.debtRatio),
+                "status": _judge_pct_inv(ratios.debtRatio, 100, 200),
+            }
+        )
+    if ratios.currentRatio is not None:
+        items.append(
+            {
+                "label": "유동비율",
+                "value": _pct(ratios.currentRatio),
+                "status": _judge_pct(ratios.currentRatio, 150, 100),
+            }
+        )
+    if ratios.fcf is not None:
+        items.append({"label": "FCF", "value": _fmt(ratios.fcf), "status": "good" if ratios.fcf > 0 else "danger"})
+    if ratios.revenueGrowth3Y is not None:
+        items.append(
+            {
+                "label": "매출 3Y CAGR",
+                "value": _pct(ratios.revenueGrowth3Y),
+                "status": _judge_pct(ratios.revenueGrowth3Y, 5, 0),
+            }
+        )
+    if ratios.roic is not None:
+        items.append(
+            {
+                "label": "ROIC",
+                "value": _pct(ratios.roic),
+                "status": _judge_pct(ratios.roic, 15, 8),
+            }
+        )
+    if ratios.interestCoverage is not None:
+        items.append(
+            {
+                "label": "이자보상배율",
+                "value": f"{ratios.interestCoverage:.1f}x",
+                "status": _judge_pct(ratios.interestCoverage, 5, 1),
+            }
+        )
+    pf = getattr(ratios, "piotroskiFScore", None)
+    if pf is not None:
+        items.append(
+            {
+                "label": "Piotroski F",
+                "value": f"{pf}/9",
+                "status": "good" if pf >= 7 else ("caution" if pf >= 4 else "danger"),
+            }
+        )
+    az = getattr(ratios, "altmanZScore", None)
+    if az is not None:
+        items.append(
+            {
+                "label": "Altman Z",
+                "value": f"{az:.2f}",
+                "status": "good" if az > 2.99 else ("caution" if az >= 1.81 else "danger"),
+            }
+        )
+    annual = getattr(company, "annual", None)
+    trend = None
+    if annual is not None:
+        series, years = annual
+        if years and len(years) >= 2:
+            rev_list = series.get("IS", {}).get("sales")
+            if rev_list:
+                n = min(5, len(rev_list))
+                recent_years = years[-n:]
+                recent_vals = rev_list[-n:]
+                trend = {"years": recent_years, "values": list(recent_vals)}
+    if not items:
+        return None
+    snapshot: dict[str, Any] = {"items": items}
+    if trend:
+        snapshot["trend"] = trend
+    if ratios.warnings:
+        snapshot["warnings"] = ratios.warnings[:3]
+    if includeInsights:
+        try:
+            from dartlab.analysis.financial.insight.pipeline import analyze as insight_analyze
+            insight_result = insight_analyze(company.stockCode, company=company)
+            if insight_result is not None:
+                snapshot["grades"] = insight_result.grades()
+                snapshot["anomalyCount"] = len(insight_result.anomalies)
+        except (ImportError, AttributeError, FileNotFoundError, OSError, RuntimeError, TypeError, ValueError):
+            pass
+    return snapshot

src/dartlab/ai/conversation/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """AI conversation package."""

src/dartlab/ai/conversation/data_ready.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""AI 분석 전 데이터 준비 상태를 요약하는 헬퍼."""
+from __future__ import annotations
+from datetime import datetime
+from typing import Any
+_DATA_CATEGORIES = ("docs", "finance", "report")
+def getDataReadyStatus(stockCode: str) -> dict[str, Any]:
+    """종목의 docs/finance/report 로컬 준비 상태를 반환한다."""
+    from dartlab.core.dataLoader import _dataDir
+    categories: dict[str, dict[str, Any]] = {}
+    available: list[str] = []
+    missing: list[str] = []
+    for category in _DATA_CATEGORIES:
+        filePath = _dataDir(category) / f"{stockCode}.parquet"
+        ready = filePath.exists()
+        updatedAt = None
+        if ready:
+            updatedAt = datetime.fromtimestamp(filePath.stat().st_mtime).strftime("%Y-%m-%d %H:%M")
+            available.append(category)
+        else:
+            missing.append(category)
+        categories[category] = {
+            "ready": ready,
+            "updatedAt": updatedAt,
+        }
+    return {
+        "stockCode": stockCode,
+        "allReady": not missing,
+        "available": available,
+        "missing": missing,
+        "categories": categories,
+    }
+def formatDataReadyStatus(stockCode: str, *, detailed: bool = False) -> str:
+    """데이터 준비 상태를 LLM/UI용 텍스트로 렌더링한다."""
+    status = getDataReadyStatus(stockCode)
+    if not detailed:
+        readyText = ", ".join(status["available"]) if status["available"] else "없음"
+        missingText = ", ".join(status["missing"]) if status["missing"] else "없음"
+        if status["allReady"]:
+            return "- 데이터 상태: docs, finance, report가 모두 준비되어 있습니다."
+        return (
+            f"- 데이터 상태: 준비됨={readyText}; 누락={missingText}. "
+            "누락된 데이터가 있으면 답변 범위가 제한될 수 있습니다."
+        )
+    lines = [f"## {stockCode} 데이터 상태", ""]
+    for category in _DATA_CATEGORIES:
+        info = status["categories"][category]
+        if info["ready"]:
+            lines.append(f"- **{category}**: ✅ 있음 (최종 갱신: {info['updatedAt']})")
+        else:
+            lines.append(f"- **{category}**: ❌ 없음")
+    if status["allReady"]:
+        lines.append("\n모든 데이터가 준비되어 있습니다. 바로 분석을 진행할 수 있습니다.")
+    else:
+        lines.append(
+            "\n일부 데이터가 없습니다. `download_data` 도구로 다운로드하거나, 사용자에게 다운로드 여부를 물어보세요."
+        )
+    return "\n".join(lines)

src/dartlab/ai/conversation/dialogue.py ADDED Viewed

	@@ -0,0 +1,476 @@

+"""대화 상태/모드 분류 — server 의존성 없는 순수 로직.
+server/dialogue.py에서 추출. 경량 타입(types.py) 기반.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+from typing import Any
+from ..types import HistoryItem, ViewContextInfo
+from .intent import has_analysis_intent, is_meta_question
+_LEGACY_VIEWER_RE = re.compile(
+    r"\[사용자가 현재\s+(?P<company>.+?)\((?P<stock>[A-Za-z0-9]+)\)\s+공시를 보고 있습니다"
+    r"(?:\s+—\s+현재 섹션:\s+(?P<label>.+?)\((?P<topic>[^()]+)\))?\]",
+)
+_LEGACY_DATA_RE = re.compile(r'\[사용자가 현재\s+"(?P<label>.+?)"\s+데이터를 보고 있습니다\]')
+_CODING_KEYWORDS = (
+    "코드",
+    "버그",
+    "에러",
+    "리팩터",
+    "리팩토링",
+    "파일",
+    "함수",
+    "테스트",
+    "구현",
+    "수정",
+    "patch",
+    "diff",
+    "workspace",
+    "cli",
+    "codex",
+)
+_EXPLORE_KEYWORDS = (
+    "어떤 데이터",
+    "무슨 데이터",
+    "뭘 볼 수",
+    "뭐가 있어",
+    "어떤 기능",
+    "가능한 것",
+    "가능한거",
+    "범위",
+    "얼마나",
+    "더 받을 수",
+    "추가 수집",
+    "openapi",
+)
+_FOLLOW_UP_PREFIXES = ("그럼", "그러면", "이건", "이거", "그거", "왜", "어째서", "더", "계속", "이어")
+_VIEWER_INTENT_KEYWORDS = (
+    "보여줘",
+    "보여 줘",
+    "보여주세요",
+    "열어줘",
+    "열어 줘",
+    "공시 보기",
+    "공시 열기",
+    "원문 보기",
+    "원문 보여",
+    "sections 보여",
+    "section 보여",
+    "show me",
+    "open viewer",
+)
+_DIALOGUE_MODE_LABELS = {
+    "capability": "기능 탐색",
+    "coding": "코딩 작업",
+    "company_explore": "회사 탐색",
+    "company_analysis": "회사 분석",
+    "follow_up": "후속 질문",
+    "general_chat": "일반 대화",
+}
+_USER_GOAL_LABELS = {
+    "capability": "지금 가능한 기능/범위를 확인",
+    "coding": "코드 작업 실행 또는 검토",
+    "company_explore": "현재 회사에서 볼 수 있는 데이터와 경로 확인",
+    "company_analysis": "현재 회사의 구체적 분석",
+    "follow_up": "이전 맥락을 이어서 추가 확인",
+    "general_chat": "일반 질문 또는 가벼운 대화",
+}
+_STATE_TRANSITION_HINTS: dict[str, str] = {
+    "general_chat→company_analysis": "일반 대화에서 분석으로 전환됨. 바로 분석 결과를 제시하세요. 이전 잡담 맥락은 무시.",
+    "general_chat→company_explore": "회사 탐색으로 전환됨. 해당 기업의 데이터 현황을 먼저 알려주세요.",
+    "company_analysis→follow_up": "심화 질문. 직전 분석의 핵심 수치를 기억하고 이어가세요.",
+    "company_analysis→general_chat": "분석에서 일반 대화로 전환됨. 짧고 친근하게.",
+    "company_explore→company_analysis": "탐색에서 분석으로 전환됨. 구체적 수치와 판단을 제시하세요.",
+    "follow_up→company_analysis": "새로운 분석 요청. 이전 맥락 참고하되 새 질문에 집중.",
+    "capability→company_analysis": "기능 질문 후 분석 요청. 바로 분석 결과를 제시하세요.",
+    "coding→company_analysis": "코드 작업에서 분석으로 전환됨. 코드 맥락은 내려놓고 재무 분석에 집중.",
+}
+# ── topic 힌트 매핑 ──
+_TOPIC_HINTS: dict[str, str] = {
+    "사업": "businessOverview",
+    "사업 개요": "businessOverview",
+    "사업개요": "businessOverview",
+    "사업의 개요": "businessOverview",
+    "배당": "dividend",
+    "직원": "employee",
+    "임원": "executive",
+    "주주": "majorHolder",
+    "최대주주": "majorHolder",
+    "감사": "audit",
+    "리스크": "riskManagement",
+    "위험": "riskManagement",
+    "소송": "litigation",
+    "회사 개요": "companyOverview",
+    "회사개요": "companyOverview",
+    "재무": "financialStatements",
+    "연결재무": "consolidatedStatements",
+    "주석": "financialNotes",
+    "내부통제": "internalControl",
+    "투자": "investmentInOtherDetail",
+    "자회사": "subsidiaryDetail",
+    "R&D": "rndDetail",
+    "연구개발": "rndDetail",
+    "제품": "productService",
+    "매출": "salesRevenue",
+    "자본변동": "capitalChange",
+    "자금조달": "fundraising",
+}
+@dataclass(frozen=True)
+class ConversationState:
+    question: str
+    dialogue_mode: str
+    user_goal: str
+    company: str | None = None
+    stock_code: str | None = None
+    market: str | None = None
+    topic: str | None = None
+    topic_label: str | None = None
+    period: str | None = None
+    viewer_data: dict | None = None
+    question_types: tuple[str, ...] = ()
+    modules: tuple[str, ...] = ()
+    prev_dialogue_mode: str | None = None
+    prev_question_types: tuple[str, ...] = ()
+    turn_count: int = 0
+# ── 내부 헬퍼 ──
+def _infer_market(
+    *,
+    company: Any | None = None,
+    stock_code: str | None = None,
+    view_context: ViewContextInfo | None = None,
+    history_market: str | None = None,
+) -> str | None:
+    if view_context and view_context.company and view_context.company.market:
+        return view_context.company.market.lower()
+    if history_market:
+        return history_market.lower()
+    company_market = getattr(company, "market", None)
+    if isinstance(company_market, str) and company_market.strip():
+        return company_market.lower()
+    code = stock_code or getattr(company, "stockCode", None) or getattr(company, "ticker", None)
+    if isinstance(code, str) and code:
+        return "dart" if code.isdigit() and len(code) == 6 else "edgar"
+    return None
+def _last_history_meta(history: list[HistoryItem] | None) -> Any | None:
+    if not history:
+        return None
+    for item in reversed(history):
+        if item.meta:
+            return item.meta
+    return None
+def _parse_legacy_view_context(question: str) -> tuple[str, ViewContextInfo | None]:
+    from ..types import ViewContextCompany
+    cleaned = question
+    viewer_match = _LEGACY_VIEWER_RE.search(question)
+    if viewer_match:
+        cleaned = cleaned.replace(viewer_match.group(0), "").strip()
+        return (
+            cleaned,
+            ViewContextInfo(
+                type="viewer",
+                company=ViewContextCompany(
+                    company=viewer_match.group("company"),
+                    corpName=viewer_match.group("company"),
+                    stockCode=viewer_match.group("stock"),
+                ),
+                topic=viewer_match.group("topic"),
+                topicLabel=viewer_match.group("label"),
+            ),
+        )
+    data_match = _LEGACY_DATA_RE.search(question)
+    if data_match:
+        cleaned = cleaned.replace(data_match.group(0), "").strip()
+        return cleaned, ViewContextInfo(type="data", data={"label": data_match.group("label")})
+    return cleaned, None
+def _classify_dialogue_mode(question: str, *, has_company: bool) -> str:
+    lowered = question.lower().strip()
+    if any(keyword in lowered for keyword in _CODING_KEYWORDS):
+        return "coding"
+    if is_meta_question(question):
+        return "capability"
+    if has_company:
+        if has_analysis_intent(question):
+            return "company_analysis"
+        if any(keyword in lowered for keyword in _EXPLORE_KEYWORDS):
+            return "company_explore"
+        if len(question.strip()) <= 18 or any(lowered.startswith(prefix) for prefix in _FOLLOW_UP_PREFIXES):
+            return "follow_up"
+        return "company_explore"
+    return "general_chat"
+# ── 공개 API ──
+def detect_viewer_intent(question: str, *, topics: list[str] | None = None) -> dict[str, str] | None:
+    """질문에서 '보여줘' 의도 + topic을 감지한다.
+    Returns:
+        {"topic": "businessOverview"} 또는 None.
+        topic 특정 불가 시 {"topic": ""} (Viewer 탭만 전환).
+    """
+    lowered = question.lower().strip()
+    has_show = any(kw in lowered for kw in _VIEWER_INTENT_KEYWORDS)
+    if not has_show:
+        return None
+    if topics:
+        for t in topics:
+            if t.lower() in lowered or t in question:
+                return {"topic": t}
+    for hint, topic in _TOPIC_HINTS.items():
+        if hint in question:
+            return {"topic": topic}
+    return {"topic": ""}
+def build_conversation_state(
+    question: str,
+    *,
+    history: list[HistoryItem] | None = None,
+    company: Any | None = None,
+    view_context: ViewContextInfo | None = None,
+) -> ConversationState:
+    """대화 상태를 빌드한다.
+    server에서는 Pydantic 모델을 경량 타입으로 변환 후 호출.
+    standalone/core에서는 직접 호출.
+    """
+    cleaned_question, legacy_view_context = _parse_legacy_view_context(question)
+    active_view = view_context or legacy_view_context
+    history_meta = _last_history_meta(history)
+    company_name = getattr(company, "corpName", None)
+    stock_code = getattr(company, "stockCode", None)
+    if not company_name and history_meta and history_meta.company:
+        company_name = history_meta.company
+    if not stock_code and history_meta and history_meta.stockCode:
+        stock_code = history_meta.stockCode
+    if active_view and active_view.company:
+        company_name = company_name or active_view.company.corpName or active_view.company.company
+        stock_code = stock_code or active_view.company.stockCode
+    topic = None
+    topic_label = None
+    period = None
+    viewer_data = None
+    if active_view and active_view.type == "viewer":
+        topic = active_view.topic
+        topic_label = active_view.topicLabel or active_view.topic
+        period = active_view.period
+        viewer_data = active_view.data
+    elif history_meta:
+        topic = history_meta.topic
+        topic_label = history_meta.topicLabel or history_meta.topic
+    modules = tuple(history_meta.modules or []) if history_meta and history_meta.modules else ()
+    try:
+        from dartlab.ai.conversation.prompts import _classify_question_multi
+        question_types = tuple(_classify_question_multi(cleaned_question))
+    except (ImportError, AttributeError, ValueError):
+        question_types = ()
+    dialogue_mode = _classify_dialogue_mode(cleaned_question, has_company=bool(company_name or stock_code))
+    user_goal = _USER_GOAL_LABELS[dialogue_mode]
+    market = _infer_market(
+        company=company,
+        stock_code=stock_code,
+        view_context=active_view,
+        history_market=history_meta.market if history_meta else None,
+    )
+    prev_dialogue_mode = history_meta.dialogueMode if history_meta else None
+    prev_question_types = tuple(history_meta.questionTypes or []) if history_meta and history_meta.questionTypes else ()
+    turn_count = len(history) if history else 0
+    return ConversationState(
+        question=cleaned_question or question,
+        dialogue_mode=dialogue_mode,
+        user_goal=user_goal,
+        company=company_name,
+        stock_code=stock_code,
+        market=market,
+        topic=topic,
+        topic_label=topic_label,
+        period=period,
+        viewer_data=viewer_data,
+        question_types=question_types,
+        modules=modules,
+        prev_dialogue_mode=prev_dialogue_mode,
+        prev_question_types=prev_question_types,
+        turn_count=turn_count,
+    )
+def conversation_state_to_meta(state: ConversationState) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "company": state.company,
+        "stockCode": state.stock_code,
+        "market": state.market,
+        "topic": state.topic,
+        "topicLabel": state.topic_label,
+        "dialogueMode": state.dialogue_mode,
+        "questionTypes": list(state.question_types) if state.question_types else None,
+        "userGoal": state.user_goal,
+        "turnCount": state.turn_count if state.turn_count > 0 else None,
+    }
+    return {key: value for key, value in payload.items() if value not in (None, [], "", 0)}
+def build_dialogue_policy(state: ConversationState) -> str:
+    from dartlab.ai.tools.registry import get_coding_runtime_policy
+    coding_runtime_enabled, coding_runtime_reason = get_coding_runtime_policy()
+    lines = [
+        "## 현재 대화 상태",
+        f"- 대화 모드: {_DIALOGUE_MODE_LABELS.get(state.dialogue_mode, state.dialogue_mode)}",
+        f"- 사용자 목표: {state.user_goal}",
+    ]
+    if state.company and state.stock_code:
+        lines.append(f"- 현재 회사: {state.company} ({state.stock_code})")
+    elif state.company:
+        lines.append(f"- 현재 회사: {state.company}")
+    if state.market:
+        lines.append(f"- 시장: {state.market}")
+    if state.topic_label or state.topic:
+        topic_desc = state.topic_label or state.topic
+        if state.period:
+            topic_desc += f" ({state.period})"
+        lines.append(f"- 현재 보고 있는 주제: {topic_desc}")
+    if state.modules:
+        lines.append(f"- 직전 분석 모듈: {', '.join(f'`{name}`' for name in state.modules[:8])}")
+    if state.question_types:
+        lines.append(f"- 감지된 질문 유형: {', '.join(state.question_types)}")
+    if state.turn_count > 0:
+        lines.append(f"- 대화 턴: {state.turn_count}회차")
+    if state.prev_dialogue_mode:
+        lines.append(f"- 직전 모드: {_DIALOGUE_MODE_LABELS.get(state.prev_dialogue_mode, state.prev_dialogue_mode)}")
+    if state.prev_question_types:
+        lines.append(f"- 직전 질문 유형: {', '.join(state.prev_question_types)}")
+    if state.prev_dialogue_mode and state.prev_dialogue_mode != state.dialogue_mode:
+        transition = f"{state.prev_dialogue_mode}→{state.dialogue_mode}"
+        hint = _STATE_TRANSITION_HINTS.get(transition)
+        if hint:
+            lines.append(f"- 전환 힌트: {hint}")
+    lines.extend(["", "## 대화 진행 규칙"])
+    if state.turn_count >= 2 and state.company:
+        lines.extend(
+            [
+                "### 멀티턴 연속성",
+                "- 이전 턴의 분석 결과와 맥락을 이어받으세요. 같은 회사 반복 소개 불필요.",
+                "- 사용자가 짧게 물으면 이전 맥락에서 가장 관련 있는 데이터를 자동 활용하세요.",
+                "- 직전 분석 모듈이 있으면 해당 모듈 데이터를 우선 참조하세요.",
+                "",
+            ]
+        )
+    if state.dialogue_mode == "capability":
+        lines.extend(
+            [
+                "- 가능한 것 / 바로 할 수 있는 것 / 아직 안 되는 것을 먼저 3줄 안에 정리하세요.",
+                "- 바로 실행 가능한 다음 질문이나 액션을 2~4개 제안하세요.",
+                "- 실제로 등록된 도구와 런타임 상태만 말하고 추측하지 마세요.",
+                "",
+                "## 응답 템플릿",
+                "1. 가능한 것: 현재 세션에서 바로 가능한 기능 2~4개",
+                "2. 바로 할 수 있��� 것: 지금 즉시 실행 가능한 조회/분석/저장 작업",
+                "3. 아직 안 되는 것: 미지원 또는 현재 세션에서 닫힌 기능",
+                "4. 다음 액션: 사용자가 바로 복사해서 물을 수 있는 질문 2~4개",
+            ]
+        )
+    elif state.dialogue_mode == "coding":
+        lines.extend(
+            [
+                "- 먼저 작업 범위와 제약을 짧게 요약하세요.",
+                "- 수정 결과를 말할 때 변경점, 검증, 남은 리스크를 분리해서 설명하세요.",
+            ]
+        )
+        if coding_runtime_enabled:
+            lines.append(
+                "- 이 세션에서는 coding runtime이 열려 있으므로 실행 가능한 코드 작업이면 `run_coding_task` 사용을 우선 검토하세요."
+            )
+        else:
+            lines.append(
+                f"- 이 세션에서는 coding runtime이 비활성화되어 있으니 실제 코드 수정은 약속하지 말고, 텍스트 기반 수정안과 활성화 조건만 안내하세요. ({coding_runtime_reason})"
+            )
+        lines.extend(
+            [
+                "",
+                "## 응답 템플릿",
+                "1. 작업 범위: 무엇을 고치거나 만들지 한두 문장으로 요약",
+                "2. 실행 상태: 실제 코드 작업 가능 여부 또는 막힌 이유",
+                "3. 변경점: 파일/동작 기준 핵심 변경 또는 제안안",
+                "4. 검증: 테스트/빌드/확인 방법",
+                "5. 남은 리스크: 아직 확인되지 않은 점 1~2개",
+            ]
+        )
+    elif state.dialogue_mode == "company_analysis":
+        lines.extend(
+            [
+                "- 핵심 결론 1~2문장을 먼저 제시하고 곧바로 근거 표를 붙이세요.",
+                "- 숫자는 반드시 해석과 함께 제시하고, 마지막에 추가 drill-down 제안 1~2개를 남기세요.",
+                "- 사용자가 이미 보고 있는 topic이 있으면 그 topic을 우선 활용하세요.",
+                "",
+                "## 응답 템플릿",
+                "1. 한줄 결론: 가장 중요한 판단 1~2문장",
+                "2. 근거 표: 핵심 수치 2개 이상이면 반드시 표로 정리",
+                "3. 해석: 숫자가 의미하는 변화와 원인",
+                "4. 다음 drill-down: 더 파볼 주제 1~2개",
+            ]
+        )
+    elif state.dialogue_mode in {"company_explore", "follow_up"}:
+        lines.extend(
+            [
+                "- 이전 맥락을 이어받아 불필요한 재질문 없이 바로 답하세요.",
+                "- 현재 회사에서 바로 볼 수 있는 데이터나 다음 탐색 경로를 먼저 보여주세요.",
+                "- 짧은 답 후 구체적 drill-down 옵션을 제안하세요.",
+                "",
+                "## 응답 템플릿",
+                "1. 직접 답: 사용자의 현재 질문에 바로 답변",
+                "2. 지금 볼 수 있는 데이터/경로: topic, show, trace, OpenAPI 중 적절한 경로",
+                "3. 다음 선택지: 이어서 물을 만한 drill-down 질문 2~3개",
+            ]
+        )
+    else:
+        lines.extend(
+            [
+                "- 짧고 직접적으로 답하고, 필요한 경우에만 다음 행동을 제안하세요.",
+                "- 회사 맥락이 없으면 특정 종목명/코드가 있으면 더 정확히 도와줄 수 있다고 안내하세요.",
+                "",
+                "## 응답 템플릿",
+                "1. 직접 답변",
+                "2. 필요하면 짧은 보충 설명",
+                "3. 필요한 경우에만 다음 행동 1~2개",
+            ]
+        )
+    return "\n".join(lines)

src/dartlab/ai/conversation/focus.py ADDED Viewed

	@@ -0,0 +1,231 @@

+"""포커스/diff 컨텍스트 빌드 — server 의존성 없는 순수 로직.
+server/chat.py의 build_focus_context(), build_diff_context()에서 추출.
+"""
+from __future__ import annotations
+from typing import Any
+import polars as pl
+from .dialogue import ConversationState
+def _stringify_focus_value(value: Any, *, max_rows: int = 12, max_chars: int = 2400) -> str:
+    from dartlab.ai.context.builder import df_to_markdown
+    if value is None:
+        return "(데이터 없음)"
+    if isinstance(value, pl.DataFrame):
+        return df_to_markdown(value, max_rows=max_rows, compact=True)
+    text = str(value)
+    return text if len(text) <= max_chars else text[:max_chars] + "\n... (truncated)"
+def _build_topic_diff_snippet(company: Any, topic: str, *, max_entries: int = 3) -> str | None:
+    """특정 topic의 최근 기간간 변화를 요약 텍스트로 반환."""
+    if not hasattr(company, "diff"):
+        return None
+    try:
+        topic_diff_df = company.diff(topic)
+    except (AttributeError, KeyError, TypeError, ValueError):
+        return None
+    if topic_diff_df is None or not isinstance(topic_diff_df, pl.DataFrame) or topic_diff_df.height == 0:
+        return None
+    lines = ["### 기간간 변화 이력"]
+    for row in topic_diff_df.head(max_entries).iter_rows(named=True):
+        from_p = row.get("fromPeriod", "?")
+        to_p = row.get("toPeriod", "?")
+        status = row.get("status", "?")
+        from_len = row.get("fromLen", 0)
+        to_len = row.get("toLen", 0)
+        delta = to_len - from_len
+        sign = "+" if delta > 0 else ""
+        lines.append(f"- {from_p} → {to_p}: **{status}** (글자수 {from_len:,} → {to_len:,}, {sign}{delta:,})")
+    return "\n".join(lines)
+def build_focus_context(company: Any, state: ConversationState) -> str | None:
+    """현재 viewer/topic 맥락을 LLM 입력용 근거 블록으로 승격."""
+    if not state.topic or not hasattr(company, "show"):
+        return None
+    lines = ["## 현재 사용자가 보고 있는 섹션"]
+    lines.append(f"- topic: `{state.topic}`")
+    if state.topic_label:
+        lines.append(f"- label: {state.topic_label}")
+    if state.period:
+        lines.append(f"- period: {state.period}")
+    if state.company and state.stock_code:
+        lines.append(f"- company: {state.company} ({state.stock_code})")
+    # 뷰어에서 선택한 블록 데이터가 있으면 직접 삽입
+    if state.viewer_data:
+        vd = state.viewer_data
+        lines.append("")
+        lines.append("### 사용자가 선택한 블록")
+        if vd.get("topicLabel"):
+            lines.append(f"- 주제: {vd['topicLabel']}")
+        if vd.get("blockType"):
+            lines.append(f"- 유형: {vd['blockType']}")
+        if vd.get("preview"):
+            lines.append(f"- 미리보기: {vd['preview']}")
+        table = vd.get("table")
+        if table and table.get("columns") and table.get("rows"):
+            cols = table["columns"]
+            rows = table["rows"]
+            lines.append("")
+            lines.append("#### 블록 테이블 데이터")
+            lines.append("| " + " | ".join(str(c) for c in cols) + " |")
+            lines.append("| " + " | ".join("---" for _ in cols) + " |")
+            for row in rows[:30]:
+                vals = [str(row.get(c, "")) for c in cols]
+                lines.append("| " + " | ".join(vals) + " |")
+            if len(rows) > 30:
+                lines.append(f"... 외 {len(rows) - 30}행")
+        lines.append("")
+        lines.append("위 블록 데이터를 근거로 분석해주세요.")
+    try:
+        if state.period:
+            overview = company.show(state.topic, period=state.period)
+        else:
+            overview = company.show(state.topic)
+    except (AttributeError, KeyError, TypeError, ValueError):
+        overview = None
+    if isinstance(overview, pl.DataFrame) and overview.height > 0:
+        lines.append("")
+        lines.append("### 블록 목차")
+        lines.append(_stringify_focus_value(overview, max_rows=6))
+        block_col = (
+            "block" if "block" in overview.columns else "blockOrder" if "blockOrder" in overview.columns else None
+        )
+        if block_col:
+            first_block = overview.row(0, named=True).get(block_col)
+            if isinstance(first_block, int):
+                try:
+                    block_value = company.show(state.topic, first_block)
+                except (AttributeError, KeyError, TypeError, ValueError):
+                    block_value = None
+                if block_value is not None:
+                    lines.append("")
+                    lines.append(f"### 현재 섹션 대표 block={first_block}")
+                    lines.append(_stringify_focus_value(block_value))
+    # 실제 텍스트 본문 포함
+    if isinstance(overview, pl.DataFrame) and overview.height > 0:
+        block_col_for_text = (
+            "block" if "block" in overview.columns else "blockOrder" if "blockOrder" in overview.columns else None
+        )
+        if block_col_for_text:
+            text_chars = 0
+            max_text_body = 4000
+            for row in overview.iter_rows(named=True):
+                btype = row.get("type", row.get("blockType", ""))
+                if btype != "text":
+                    continue
+                bidx = row.get(block_col_for_text)
+                if not isinstance(bidx, int):
+                    continue
+                try:
+                    block_value = company.show(state.topic, bidx)
+                except (AttributeError, KeyError, TypeError, ValueError):
+                    continue
+                if block_value is None:
+                    continue
+                body = _stringify_focus_value(block_value, max_rows=20, max_chars=2000)
+                if text_chars + len(body) > max_text_body:
+                    break
+                lines.append("")
+                lines.append(f"### 공시 원문 (block {bidx})")
+                lines.append(body)
+                text_chars += len(body)
+    if hasattr(company, "trace"):
+        try:
+            trace = company.trace(state.topic)
+        except (AttributeError, KeyError, TypeError, ValueError):
+            trace = None
+        if trace:
+            lines.append("")
+            lines.append("### source trace")
+            lines.append(_stringify_focus_value(trace, max_chars=1600))
+    diff_text = _build_topic_diff_snippet(company, state.topic)
+    if diff_text:
+        lines.append("")
+        lines.append(diff_text)
+    return "\n".join(lines)
+def build_diff_context(company: Any, *, top_n: int = 8) -> str | None:
+    """전체 sections diff 요약을 LLM 컨텍스트 문자열로 변환."""
+    if not hasattr(company, "diff"):
+        return None
+    try:
+        summary_df = company.diff()
+    except (AttributeError, KeyError, TypeError, ValueError):
+        return None
+    if summary_df is None or not isinstance(summary_df, pl.DataFrame) or summary_df.height == 0:
+        return None
+    changed_col = "changed" if "changed" in summary_df.columns else "changedCount"
+    periods_col = "periods" if "periods" in summary_df.columns else "totalPeriods"
+    rate_col = "changeRate"
+    if changed_col not in summary_df.columns:
+        return None
+    agg_cols = [
+        pl.col(periods_col).max().alias("periods"),
+        pl.col(changed_col).sum().alias("changed"),
+    ]
+    if rate_col in summary_df.columns:
+        agg_cols.append(pl.col(rate_col).max().alias("changeRate"))
+    group_cols = ["topic"]
+    if "chapter" in summary_df.columns:
+        group_cols.insert(0, "chapter")
+    summary_df = summary_df.group_by(group_cols).agg(agg_cols)
+    changed_col = "changed"
+    periods_col = "periods"
+    _FINANCE_TOPICS = {
+        "financialNotes",
+        "financialStatements",
+        "consolidatedStatements",
+        "auditReport",
+        "auditOpinion",
+    }
+    summary_df = summary_df.filter(~pl.col("topic").is_in(_FINANCE_TOPICS))
+    changed = summary_df.filter(pl.col(changed_col) > 0)
+    if changed.height == 0:
+        return None
+    if rate_col in changed.columns:
+        changed = changed.sort([rate_col, changed_col], descending=[True, False]).head(top_n)
+    else:
+        changed = changed.sort(changed_col, descending=True).head(top_n)
+    lines = [
+        "## 공시 텍스트 변화 핫스팟",
+        f"최근 기간간 텍스트 변경이 많은 topic {changed.height}개:",
+        "",
+        "| topic | 기간수 | 변경횟수 | 변화율 |",
+        "|-------|--------|----------|--------|",
+    ]
+    for row in changed.iter_rows(named=True):
+        topic = row.get("topic", "?")
+        total = row.get(periods_col, 0)
+        cnt = row.get(changed_col, 0)
+        rate = row.get(rate_col, cnt / max(total - 1, 1) if total > 1 else 0)
+        lines.append(f"| {topic} | {total} | {cnt} | {rate:.0%} |")
+    lines.append("")
+    lines.append("변화율이 높은 섹션은 사업 전략, 리스크, 실적 변동 등 핵심 변화를 담고 있을 가능성이 높습니다.")
+    return "\n".join(lines)

src/dartlab/ai/conversation/history.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""히스토리 압축/빌드 — server 의존성 없는 순수 로직.
+server/chat.py의 build_history_messages(), compress_history()에서 추출.
+경량 타입(types.py) 기반.
+"""
+from __future__ import annotations
+from ..types import HistoryItem
+_MAX_HISTORY_TURNS = 10
+_MAX_HISTORY_CHARS = 12000
+_MAX_HISTORY_MESSAGE_CHARS = 1800
+_COMPRESS_TURN_THRESHOLD = 5
+def _compress_history_text(text: str) -> str:
+    """길어진 과거 대화를 앞뒤 핵심만 남기도록 압축."""
+    if len(text) <= _MAX_HISTORY_MESSAGE_CHARS:
+        return text
+    head = int(_MAX_HISTORY_MESSAGE_CHARS * 0.65)
+    tail = _MAX_HISTORY_MESSAGE_CHARS - head
+    return text[:head].rstrip() + "\n...\n" + text[-tail:].lstrip()
+def build_history_messages(history: list[HistoryItem] | None) -> list[dict[str, str]]:
+    """히스토리를 LLM messages 포맷으로 변환. 최근 N턴만 유지."""
+    if not history:
+        return []
+    trimmed = history[-(_MAX_HISTORY_TURNS * 2) :]
+    prepared: list[dict[str, str]] = []
+    for h in trimmed:
+        role = h.role if h.role in ("user", "assistant") else "user"
+        text = h.text.strip()
+        if not text:
+            continue
+        if role == "assistant" and h.meta:
+            summary_parts: list[str] = []
+            if h.meta.company or h.meta.stockCode:
+                company_text = h.meta.company or "?"
+                if h.meta.stockCode:
+                    company_text += f" ({h.meta.stockCode})"
+                summary_parts.append(company_text)
+            if h.meta.market:
+                summary_parts.append(f"시장: {h.meta.market}")
+            if h.meta.topicLabel or h.meta.topic:
+                summary_parts.append(f"주제: {h.meta.topicLabel or h.meta.topic}")
+            if h.meta.dialogueMode:
+                summary_parts.append(f"모드: {h.meta.dialogueMode}")
+            if h.meta.userGoal:
+                summary_parts.append(f"목표: {h.meta.userGoal}")
+            if h.meta.modules:
+                summary_parts.append(f"모듈: {', '.join(h.meta.modules)}")
+            if h.meta.questionTypes:
+                summary_parts.append(f"유형: {', '.join(h.meta.questionTypes)}")
+            if summary_parts:
+                text = f"[이전 대화 상태: {' | '.join(summary_parts)}]\n{text}"
+        prepared.append({"role": role, "content": _compress_history_text(text)})
+    total = 0
+    selected: list[dict[str, str]] = []
+    for item in reversed(prepared):
+        content_len = len(item["content"])
+        if selected and total + content_len > _MAX_HISTORY_CHARS:
+            break
+        selected.append(item)
+        total += content_len
+    return list(reversed(selected))
+def compress_history(history: list[HistoryItem] | None) -> list[HistoryItem] | None:
+    """멀티턴 히스토리 압축: 오래된 턴을 구조화된 요약으로 대체.
+    5턴(10 메시지) 이상이면 가장 오래된 턴들을 1개 요약 메시지로 교체.
+    최근 4턴(8 메시지)은 원본 유지.
+    """
+    if not history or len(history) <= _COMPRESS_TURN_THRESHOLD * 2:
+        return history
+    keep_count = 8
+    old_messages = history[:-keep_count]
+    recent_messages = history[-keep_count:]
+    companies_mentioned: set[str] = set()
+    topics_discussed: list[str] = []
+    qa_pairs: list[str] = []
+    for msg in old_messages:
+        text = msg.text.strip()
+        if not text:
+            continue
+        if msg.meta:
+            if msg.meta.company:
+                companies_mentioned.add(msg.meta.company)
+            if msg.meta.topicLabel:
+                topics_discussed.append(msg.meta.topicLabel)
+        if msg.role == "user":
+            brief = text[:80] + "..." if len(text) > 80 else text
+            qa_pairs.append(f"- Q: {brief}")
+        elif msg.role == "assistant":
+            sentences = text.split(".")
+            brief = ".".join(sentences[:2]).strip()
+            if brief and not brief.endswith("."):
+                brief += "."
+            if len(brief) > 150:
+                brief = brief[:150] + "..."
+            if brief:
+                qa_pairs.append(f"  A: {brief}")
+    if not qa_pairs:
+        return history
+    summary_lines = ["[이전 대화 요약]"]
+    if companies_mentioned:
+        summary_lines.append(f"관심 기업: {', '.join(sorted(companies_mentioned))}")
+    if topics_discussed:
+        unique_topics = list(dict.fromkeys(topics_discussed))[:5]
+        summary_lines.append(f"분석 주제: {', '.join(unique_topics)}")
+    summary_lines.append("")
+    summary_lines.extend(qa_pairs[-8:])
+    summary_text = "\n".join(summary_lines)
+    summary_msg = HistoryItem(role="assistant", text=summary_text)
+    return [summary_msg, *recent_messages]

src/dartlab/ai/conversation/intent.py ADDED Viewed

	@@ -0,0 +1,291 @@

+"""의도 분류 — 분석/메타/순수대화 판별.
+server/resolve.py에서 추출한 순수 문자열 매칭 로직.
+서버 의존성 없음.
+"""
+from __future__ import annotations
+import re as _re
+_META_KEYWORDS = frozenset(
+    {
+        "버전",
+        "version",
+        "도움말",
+        "도움",
+        "help",
+        "사용법",
+        "사용방법",
+        "뭘할수있",
+        "뭐할수있",
+        "뭘 할 수",
+        "뭐 할 수",
+        "할수있",
+        "기능",
+        "데이터",
+        "몇개",
+        "몇 개",
+        "개수",
+        "목록",
+        "리스트",
+        "상태",
+        "원본",
+        "raw",
+        "모듈",
+        "module",
+        "다운로드",
+        "설치",
+        "업데이트",
+        "안녕",
+        "반가",
+        "고마",
+        "안녕하세요",
+        "hello",
+        "hi",
+        "thanks",
+        "어떻게",
+        "how",
+        "what",
+        "why",
+        "설정",
+        "config",
+        "provider",
+        "모델",
+        "ollama",
+        "문서",
+        "docs",
+        "파일",
+        "저장",
+        "opendart",
+        "openedgar",
+        "openapi",
+        "api",
+        "dart api",
+        "edgar api",
+        "엔진",
+        "engine",
+        "spec",
+        "스펙",
+        "tool",
+        "도구",
+        "런타임",
+        "runtime",
+        "codex",
+        "gpt",
+        "claude",
+        "mcp",
+        "서버",
+        "server",
+        "종목검색",
+        "search",
+    }
+)
+_ANALYSIS_KEYWORDS = frozenset(
+    {
+        "분석",
+        "건전성",
+        "수익성",
+        "성장성",
+        "배당",
+        "실적",
+        "재무",
+        "매출",
+        "영업이익",
+        "순이익",
+        "부채",
+        "자산",
+        "현금흐름",
+        "ROE",
+        "ROA",
+        "PER",
+        "PBR",
+        "EPS",
+        "EBITDA",
+        "FCF",
+        "리스크",
+        "위험",
+        "감사",
+        "지배구조",
+        "임원",
+        "주주",
+        "비교",
+        "추세",
+        "추이",
+        "트렌드",
+        "전망",
+        "어때",
+        "어떤가",
+        "괜찮",
+        "좋은가",
+        "분석해",
+        "알려줘",
+        "알려 줘",
+        "보여줘",
+        "보여 줘",
+        "해줘",
+        "해 줘",
+        "평가",
+    }
+)
+_SYSTEM_ENTITIES = frozenset(
+    {
+        "opendart",
+        "openedgar",
+        "dartlab",
+        "dart api",
+        "edgar api",
+        "openapi",
+        "dart 시스템",
+        "edgar 시스템",
+        "mcp",
+        "codex",
+        "claude",
+        "gpt",
+        "ollama",
+    }
+)
+_GREETING_ONLY_PATTERNS = frozenset(
+    {
+        "안녕",
+        "안녕하세요",
+        "반갑",
+        "반갑습니다",
+        "고마",
+        "고맙습니다",
+        "감사합니다",
+        "감사해요",
+        "hello",
+        "hi",
+        "thanks",
+        "thank you",
+    }
+)
+_ANALYSIS_CONTEXT_OVERRIDES = {
+    "감사": ["감사의견", "감사보고서", "감사인", "감사위원", "내부감사", "외부감사"],
+    "비교": ["비교해", "비교분석", "비교하"],
+}
+_TENTATIVE_PATTERNS = (
+    "싶은데",
+    "싶어",
+    "할까",
+    "할 수 있",
+    "가능",
+    "뭐가 있",
+    "어떤 것",
+    "어떤게",
+    "어떤 게",
+    "궁금",
+    "뭘 볼",
+    "뭘 봐",
+    "무엇을",
+)
+_PURE_CONVERSATION_TOKENS = frozenset(
+    {
+        "응",
+        "ㅇㅇ",
+        "ㅇ",
+        "그래",
+        "넵",
+        "네",
+        "뭐해",
+        "ㅋㅋ",
+        "ㅎㅎ",
+        "좋아",
+        "오키",
+        "ok",
+        "yes",
+        "no",
+        "yeah",
+        "알겠어",
+        "그렇구나",
+        "아하",
+        "오",
+        "와",
+        "ㅠㅠ",
+        "ㅜㅜ",
+        "ㄴㄴ",
+        "아니",
+        "됐어",
+    }
+)
+_PURE_CONVERSATION_RE = _re.compile(
+    r"대화.*계속|계속.*대화|대화.*안.*되|이어서.*얘기|잡담|그냥.*얘기"
+    r"|얘기.*하자|말.*걸어|채팅|아까.*말|다른.*얘기",
+)
+def is_meta_question(question: str) -> bool:
+    """라이브러리/시스템에 대한 메타 질문인지 판별."""
+    q = question.lower().replace(" ", "")
+    q_raw = question.lower()
+    for entity in _SYSTEM_ENTITIES:
+        if entity.replace(" ", "") in q:
+            return True
+    q_stripped = question.strip().rstrip("!?.~")
+    if q_stripped in _GREETING_ONLY_PATTERNS or q_stripped.lower() in _GREETING_ONLY_PATTERNS:
+        return True
+    for ambiguous, analysis_contexts in _ANALYSIS_CONTEXT_OVERRIDES.items():
+        if ambiguous in q_raw:
+            if any(ctx in q_raw for ctx in analysis_contexts):
+                return False
+    for kw in _META_KEYWORDS:
+        if kw.replace(" ", "") in q:
+            return True
+    return False
+def has_analysis_intent(question: str) -> bool:
+    """분석 의도가 있는 질문인지 판별."""
+    q_lower = question.lower().replace(" ", "")
+    for entity in _SYSTEM_ENTITIES:
+        if entity.replace(" ", "") in q_lower:
+            return False
+    q_stripped = question.strip().rstrip("!?.~")
+    if q_stripped in _GREETING_ONLY_PATTERNS or q_stripped.lower() in _GREETING_ONLY_PATTERNS:
+        return False
+    has_kw = False
+    for kw in _ANALYSIS_KEYWORDS:
+        if kw in question:
+            if kw == "감사":
+                analysis_contexts = _ANALYSIS_CONTEXT_OVERRIDES.get("감사", [])
+                if not any(ctx in question for ctx in analysis_contexts):
+                    continue
+            has_kw = True
+            break
+    if not has_kw:
+        return False
+    for pat in _TENTATIVE_PATTERNS:
+        if pat in question:
+            return False
+    return True
+def is_pure_conversation(question: str) -> bool:
+    """순수 대화 패턴인지 판별."""
+    q = question.strip()
+    q_low = q.lower()
+    if q_low in _PURE_CONVERSATION_TOKENS:
+        return True
+    if _PURE_CONVERSATION_RE.search(q_low):
+        return True
+    if len(q) <= 6:
+        for kw in _ANALYSIS_KEYWORDS:
+            if kw in q:
+                return False
+        return True
+    return False

src/dartlab/ai/conversation/prompts.py ADDED Viewed

	@@ -0,0 +1,565 @@

+"""LLM 시스템 프롬프트 — 조립·분류·파싱 로직.
+템플릿 텍스트는 templates/ 하위 모듈에 분리되어 있다.
+이 파일은 로직(조립, 질문 분류, 응답 파싱)만 담당한다.
+"""
+from __future__ import annotations
+import re as _re
+from typing import Any
+from .templates.analysis_rules import (
+    CROSS_VALIDATION_COMPACT as _CROSS_VALIDATION_COMPACT,
+)
+from .templates.analysis_rules import (
+    CROSS_VALIDATION_RULES as _CROSS_VALIDATION_RULES,
+)
+from .templates.analysis_rules import (
+    FEW_SHOT_COMPACT as _FEW_SHOT_COMPACT,
+)
+from .templates.analysis_rules import (
+    FEW_SHOT_EXAMPLES as _FEW_SHOT_EXAMPLES,
+)
+from .templates.analysis_rules import (
+    QUESTION_TYPE_MAP as _QUESTION_TYPE_MAP,
+)
+from .templates.analysis_rules import (
+    REPORT_PROMPT as _REPORT_PROMPT,
+)
+from .templates.analysis_rules import (
+    REPORT_PROMPT_COMPACT as _REPORT_PROMPT_COMPACT,
+)
+from .templates.analysis_rules import (
+    TOPIC_COMPACT as _TOPIC_COMPACT,
+)
+from .templates.analysis_rules import (
+    TOPIC_PROMPTS as _TOPIC_PROMPTS,
+)
+from .templates.benchmarks import _INDUSTRY_BENCHMARKS, _SECTOR_MAP
+from .templates.self_critique import (
+    SELF_CRITIQUE_PROMPT,
+)
+from .templates.self_critique import (
+    SIGNAL_KEYWORDS as _SIGNAL_KEYWORDS,
+)
+# ── 템플릿 데이터 임포트 ──────────────────────────────────
+from .templates.system_base import (
+    EDGAR_SUPPLEMENT_EN,
+    EDGAR_SUPPLEMENT_KR,
+    SYSTEM_PROMPT_COMPACT,
+    SYSTEM_PROMPT_EN,
+    SYSTEM_PROMPT_KR,
+)
+# ── 플러그인 시스템 프롬프트 ──────────────────────────────────
+_PLUGIN_SYSTEM_PROMPT = """
+## 플러그인 확장 시스템
+- dartlab은 플러그인으로 확장 가능합니다. `uv pip install dartlab-plugin-xxx` 한 줄로 새 데이터/도구/분석을 추가할 수 있습니다.
+- 사용자가 "플러그인 만들어줘", "커스텀 분석 만들기", "ESG 플러그인" 같은 요청을 하면 `create_plugin` 도구를 사용하세요.
+- `create_plugin`은 즉시 사용 가능한 완전한 패키지 구조(pyproject.toml + register 함수 + 로직 파일)를 자동 생성합니다.
+- 분석 중 플러그인 추천 힌트가 제공되면, 답변 끝에 자연스럽게 안내하세요.
+"""
+# ══════════════════════════════════════
+# 질문 분류
+# ══════════════════════════════════════
+def _classify_question(question: str) -> str | None:
+    """질문 텍스트를 분석 유형으로 분류.
+    Returns:
+            "건전성", "수익성", "성장성", "배당", "지배구조", "리스크", "종합" 또는 None
+    """
+    scores: dict[str, int] = {}
+    for q_type, keywords in _QUESTION_TYPE_MAP.items():
+        score = sum(1 for kw in keywords if kw in question)
+        if score > 0:
+            scores[q_type] = score
+    if not scores:
+        return None
+    return max(scores, key=scores.get)
+def _classify_question_multi(question: str, max_types: int = 3) -> list[str]:
+    """복합 질문에서 여러 분석 유형을 감지.
+    Returns:
+            매칭된 유형 리스트 (점수 높은 순, 최대 max_types개)
+    """
+    scores: dict[str, int] = {}
+    for q_type, keywords in _QUESTION_TYPE_MAP.items():
+        score = sum(1 for kw in keywords if kw in question)
+        if score > 0:
+            scores[q_type] = score
+    if not scores:
+        return []
+    sorted_types = sorted(scores, key=scores.get, reverse=True)
+    return sorted_types[:max_types]
+def _match_sector(sector_name: str) -> str | None:
+    """KRX 업종명에서 벤치마크 키 매칭."""
+    if not sector_name:
+        return None
+    # 정확 매칭
+    if sector_name in _SECTOR_MAP:
+        return _SECTOR_MAP[sector_name]
+    # 키워드 부분 매칭
+    for keyword, benchmark_key in _SECTOR_MAP.items():
+        if keyword in sector_name:
+            return benchmark_key
+    return None
+# ══════════════════════════════════════
+# 시스템 프롬프트 조립
+# ══════════════════════════════════════
+def build_system_prompt(
+    custom: str | None = None,
+    lang: str = "ko",
+    included_modules: list[str] | None = None,
+    sector: str | None = None,
+    question_type: str | None = None,
+    question_types: list[str] | None = None,
+    compact: bool = False,
+    report_mode: bool = False,
+    market: str = "KR",
+    allow_tools: bool = True,
+) -> str:
+    """시스템 프롬프트 조립 (단일 문자열 반환).
+    Args:
+            custom: 사용자 지정 프롬프트 (있으면 이것만 ��용)
+            lang: "ko" 또는 "en"
+            included_modules: 컨텍스트에 포함된 모듈 목록 → 토픽 프롬프트 동적 추가
+            sector: KRX 업종명 → 업종별 벤치마크 추가
+            question_type: 단일 질문 유형 → Few-shot 예시 추가 (하위호환)
+            question_types: 복수 질문 유형 → question_type보다 우선
+            compact: True면 소형 모델용 간결 프롬프트 (Ollama)
+            report_mode: True면 전문 분석보고서 구조 프롬프트 추가
+            market: "KR" 또는 "US" — EDGAR 기업이면 US 보충 프롬프트 추가
+    """
+    static, dynamic = build_system_prompt_parts(
+        custom=custom,
+        lang=lang,
+        included_modules=included_modules,
+        sector=sector,
+        question_type=question_type,
+        question_types=question_types,
+        compact=compact,
+        report_mode=report_mode,
+        market=market,
+        allow_tools=allow_tools,
+    )
+    if dynamic:
+        return static + "\n" + dynamic
+    return static
+def build_system_prompt_parts(
+    custom: str | None = None,
+    lang: str = "ko",
+    included_modules: list[str] | None = None,
+    sector: str | None = None,
+    question_type: str | None = None,
+    question_types: list[str] | None = None,
+    compact: bool = False,
+    report_mode: bool = False,
+    market: str = "KR",
+    allow_tools: bool = True,
+) -> tuple[str, str]:
+    """시스템 프롬프트를 (정적, 동적) 2파트로 분리 반환.
+    정적 부분: base + 벤치마크 + 토픽 + 교차검증 + Few-shot (캐시 대상)
+    동적 부분: report_mode + 플러그인 (매 요청 변경 가능)
+    Claude prompt caching의 cache_control breakpoint를 적용할 때
+    정적 부분 끝에 마커를 삽입하면 캐시 히트율이 극대화된다.
+    """
+    if custom:
+        return custom, ""
+    q_types = question_types or ([question_type] if question_type else [])
+    def _strip_tool_guidance(text: str) -> str:
+        stripped = text
+        if "## 공시 데이터 접근법 (도구 사용)" in stripped:
+            stripped = _re.sub(
+                r"\n## 공시 데이터 접근법 \(도구 사용\).*?(?=\n## 밸류에이션 분석 프레임워크|\Z)",
+                "\n",
+                stripped,
+                flags=_re.DOTALL,
+            )
+            stripped = _re.sub(
+                r"\n## 분석 시작 프로토콜.*?(?=\n## 데이터 관리 원칙|\Z)",
+                "\n",
+                stripped,
+                flags=_re.DOTALL,
+            )
+        if "## 공시 도구" in stripped:
+            stripped = _re.sub(
+                r"\n## 공시 도구.*?(?=\n## 전문가 분석 필수|\Z)",
+                "\n",
+                stripped,
+                flags=_re.DOTALL,
+            )
+            stripped = _re.sub(
+                r"\n## 분석 시작 프로토콜.*?(?=\Z)",
+                "\n",
+                stripped,
+                flags=_re.DOTALL,
+            )
+        return stripped
+    no_tools_note = (
+        "## 현재 실행 제약\n"
+        "- 이번 답변에서는 도구 호출을 사용할 수 없습니다.\n"
+        "- `explore()`, `finance()`, `analyze()` 같은 도구 호출 계획을 문장으로 출력하지 마세요.\n"
+        "- `IS/BS/CF/ratios/TTM/costByNature/businessOverview` 같은 내부 약어나 모듈명을 그대로 쓰지 말고 "
+        "`손익계산서/재무상태표/현금흐름표/재무비율/최근 4분기 합산/성격별 비용 분류/사업의 개요`처럼 사용자 언어로 바꾸세요.\n"
+        "- 이미 제공된 컨텍스트만 사용해 바로 답변하고, 확인 질문이 필요하면 한 문장만 하세요."
+    )
+    if compact:
+        base = _strip_tool_guidance(SYSTEM_PROMPT_COMPACT) if not allow_tools else SYSTEM_PROMPT_COMPACT
+        static_parts: list[str] = []
+        dynamic_parts: list[str] = []
+        benchmark_key = _match_sector(sector) if sector else None
+        if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
+            static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
+        elif "일반" in _INDUSTRY_BENCHMARKS:
+            static_parts.append(_INDUSTRY_BENCHMARKS["일반"])
+        if included_modules:
+            module_set = set(included_modules)
+            for _tname, (trigger_modules, prompt_text) in _TOPIC_COMPACT.items():
+                if module_set & trigger_modules:
+                    static_parts.append(prompt_text)
+        if included_modules:
+            fs_modules = {"BS", "IS", "CF"}
+            if fs_modules & set(included_modules):
+                static_parts.append(_CROSS_VALIDATION_COMPACT)
+        for qt in q_types[:1]:
+            if qt in _FEW_SHOT_COMPACT:
+                static_parts.append(_FEW_SHOT_COMPACT[qt])
+        # 동적: report_mode + 플러그인
+        if report_mode:
+            dynamic_parts.append(_REPORT_PROMPT_COMPACT)
+        if not allow_tools:
+            dynamic_parts.append(no_tools_note)
+        dynamic_parts.append(
+            "\n플러그인: 사용자가 '플러그인 만들어줘'하면 create_plugin 도구 사용. "
+            "플러그인 추천 힌트가 있으면 답변 끝에 안내."
+        )
+        if market == "US":
+            static_parts.append(EDGAR_SUPPLEMENT_KR)
+        static = base + "\n".join(static_parts) if static_parts else base
+        dynamic = "\n".join(dynamic_parts)
+        return static, dynamic
+    if lang == "ko":
+        base = SYSTEM_PROMPT_KR
+    else:
+        base = SYSTEM_PROMPT_EN
+    if not allow_tools:
+        base = _strip_tool_guidance(base)
+    static_parts = []
+    dynamic_parts = []
+    # 정적: 벤치마크 + 토픽 + 교차검증 + Few-shot
+    benchmark_key = _match_sector(sector) if sector else None
+    if benchmark_key and benchmark_key in _INDUSTRY_BENCHMARKS:
+        static_parts.append(_INDUSTRY_BENCHMARKS[benchmark_key])
+    elif "일반" in _INDUSTRY_BENCHMARKS:
+        static_parts.append(_INDUSTRY_BENCHMARKS["일반"])
+    if included_modules:
+        module_set = set(included_modules)
+        for _topic_name, (trigger_modules, prompt_text) in _TOPIC_PROMPTS.items():
+            if module_set & trigger_modules:
+                static_parts.append(prompt_text)
+    if included_modules:
+        fs_modules = {"BS", "IS", "CF"}
+        if fs_modules & set(included_modules):
+            static_parts.append(_CROSS_VALIDATION_RULES)
+    for qt in q_types[:2]:
+        if qt in _FEW_SHOT_EXAMPLES:
+            static_parts.append(_FEW_SHOT_EXAMPLES[qt])
+    # EDGAR(US) 보충 프롬프트
+    if market == "US":
+        edgar_supp = EDGAR_SUPPLEMENT_EN if lang == "en" else EDGAR_SUPPLEMENT_KR
+        static_parts.append(edgar_supp)
+    # 동적: report_mode + 플러그인
+    if report_mode:
+        dynamic_parts.append(_REPORT_PROMPT)
+    if not allow_tools:
+        dynamic_parts.append(no_tools_note)
+    dynamic_parts.append(_PLUGIN_SYSTEM_PROMPT)
+    static = base + "\n".join(static_parts) if static_parts else base
+    dynamic = "\n".join(dynamic_parts)
+    return static, dynamic
+# ══════════════════════════════════════
+# Self-Critique
+# ══════════════════════════════════════
+def build_critique_messages(
+    original_response: str,
+    context_text: str,
+    question: str,
+) -> list[dict[str, str]]:
+    """Self-Critique용 메시지 리스트 생성."""
+    return [
+        {"role": "system", "content": SELF_CRITIQUE_PROMPT},
+        {
+            "role": "user",
+            "content": (
+                f"## 원본 질문\n{question}\n\n"
+                f"## 제공된 데이터\n{context_text[:3000]}\n\n"
+                f"## 검토 대상 응답\n{original_response}"
+            ),
+        },
+    ]
+def parse_critique_result(critique_text: str) -> tuple[bool, str]:
+    """Self-Critique 결과 파싱.
+    Returns:
+            (passed, revised_or_original)
+            - passed=True이면 원본 그대로 사용
+            - passed=False이면 수정된 응답 반환
+    """
+    stripped = critique_text.strip()
+    if stripped.upper().startswith("PASS"):
+        return True, ""
+    if "REVISED:" in stripped:
+        idx = stripped.index("REVISED:")
+        revised = stripped[idx + len("REVISED:") :].strip()
+        if revised:
+            return False, revised
+    return True, ""
+# ══════════════════════════════════════
+# Structured Output — 응답 메타데이터 추출
+# ══════════════════════════════════════
+_GRADE_PATTERN = _re.compile(
+    r"(?:종합|결론|판단|등급|평가)[:\s]*[*]*([A-F][+-]?|양호|보통|주의|위험|우수|매우 우수|취약)[*]*",
+    _re.IGNORECASE,
+)
+def extract_response_meta(response_text: str) -> dict[str, Any]:
+    """LLM 응답에서 구조화된 메타데이터 추출.
+    Returns:
+            {
+                    "grade": "양호" | "주의" | "위험" | "A" | None,
+                    "signals": {"positive": [...], "negative": [...]},
+                    "tables_count": int,
+                    "has_conclusion": bool,
+            }
+    """
+    meta: dict[str, Any] = {
+        "grade": None,
+        "signals": {"positive": [], "negative": []},
+        "tables_count": 0,
+        "has_conclusion": False,
+    }
+    grade_match = _GRADE_PATTERN.search(response_text)
+    if grade_match:
+        meta["grade"] = grade_match.group(1).strip("*")
+    for direction, keywords in _SIGNAL_KEYWORDS.items():
+        for kw in keywords:
+            if kw in response_text:
+                meta["signals"][direction].append(kw)
+    meta["tables_count"] = len(_re.findall(r"\|-{2,}", response_text)) // 2
+    conclusion_keywords = ["결론", "종합 평가", "종합 판단", "종합:", "Conclusion"]
+    meta["has_conclusion"] = any(kw in response_text for kw in conclusion_keywords)
+    return meta
+# ══════════════════════════════════════
+# Guided Generation — JSON → 마크다운 변환
+# ══════════════════════════════════════
+def guided_json_to_markdown(data: dict[str, Any]) -> str:
+    """Guided Generation JSON 응답을 마크다운으로 변환."""
+    parts: list[str] = []
+    grade = data.get("grade", "")
+    summary = data.get("summary", "")
+    if summary:
+        parts.append(f"**{summary}**")
+        parts.append("")
+    metrics = data.get("metrics", [])
+    if metrics:
+        parts.append("## 핵심 지표")
+        parts.append("| 지표 | 값 | 연도 | 추세 | 판단 |")
+        parts.append("|------|-----|------|------|------|")
+        for m in metrics:
+            name = m.get("name", "-")
+            value = m.get("value", "-")
+            year = m.get("year", "-")
+            trend = m.get("trend", "-")
+            assessment = m.get("assessment", "-")
+            parts.append(f"| {name} | **{value}** | {year} | {trend} | {assessment} |")
+        parts.append("")
+    positives = data.get("positives", [])
+    if positives:
+        parts.append("## 긍정 신호")
+        for p in positives:
+            parts.append(f"- {p}")
+        parts.append("")
+    risks = data.get("risks", [])
+    if risks:
+        parts.append("## 리스크")
+        for r in risks:
+            desc = r.get("description", "-") if isinstance(r, dict) else str(r)
+            severity = r.get("severity", "") if isinstance(r, dict) else ""
+            severity_badge = f" [{severity}]" if severity else ""
+            parts.append(f"- ⚠️ {desc}{severity_badge}")
+        parts.append("")
+    conclusion = data.get("conclusion", "")
+    if conclusion:
+        grade_badge = f" **[{grade}]**" if grade else ""
+        parts.append(f"## 결론{grade_badge}")
+        parts.append(conclusion)
+    return "\n".join(parts)
+# ══════════════════════════════════════
+# 동적 채팅 프롬프트
+# ══════════════════════════════════════
+def build_dynamic_chat_prompt(state: Any = None) -> str:
+    """실시간 데이터 현황을 포함한 채팅 시스템 프롬프트 생성.
+    state가 ConversationState이면 dialogue_policy를 자동 합류.
+    """
+    from dartlab.ai.tools.registry import get_coding_runtime_policy
+    def _count(category: str) -> int:
+        try:
+            from dartlab.core.dataLoader import _dataDir
+            data_dir = _dataDir(category)
+        except (FileNotFoundError, ImportError, KeyError, OSError, PermissionError, ValueError):
+            return 0
+        if not data_dir.exists():
+            return 0
+        return len(list(data_dir.glob("*.parquet")))
+    docs_count = _count("docs")
+    finance_count = _count("finance")
+    edgar_docs_count = _count("edgarDocs")
+    edgar_finance_count = _count("edgar")
+    coding_runtime_enabled, coding_runtime_reason = get_coding_runtime_policy()
+    coding_surface = (
+        "- 로컬 안전 정책이 허용되면 coding runtime으로 실제 코드 작업을 위임 가능"
+        if coding_runtime_enabled
+        else f"- 현재 세션에서는 텍스트 기반 코드 보조만 가능하고 실제 코드 작업 runtime은 비활성화됨 ({coding_runtime_reason})"
+    )
+    try:
+        import dartlab
+        version = dartlab.__version__ if hasattr(dartlab, "__version__") else "unknown"
+    except ImportError:
+        version = "unknown"
+    prompt = (
+        "당신은 DartLab의 금융 분석 AI 어시스턴트입니다. "
+        "한국 DART 전자공시와 미국 SEC EDGAR 데이터를 함께 다루며, "
+        "사용자가 지금 무엇을 할 수 있는지 먼저 설명하고 다음 행동까지 제안합니다.\n\n"
+        f"## DartLab 정보\n"
+        f"- **버전**: {version}\n"
+        f"- **Python 라이브러리**: `pip install dartlab` (PyPI)\n"
+        f"- **GitHub**: https://github.com/eddmpython/dartlab\n\n"
+        f"## 현재 보유 데이터 (실시간)\n"
+        f"- **DART docs**: {docs_count}개 기업의 정기보고서 파싱 데이터\n"
+        f"- **DART finance**: {finance_count}개 상장기업의 XBRL 재무제표\n"
+        f"- **EDGAR docs**: {edgar_docs_count}개 ticker의 SEC 공시 문서 데이터\n"
+        f"- **EDGAR finance**: {edgar_finance_count}개 ticker의 companyfacts 데이터\n\n"
+        "## 사용 가능한 기능\n"
+        "사용자가 기능이나 데이터에 대해 물으면 아래를 안내하세요:\n"
+        "- `삼성전자 분석해줘` — 종목명 + 질문으로 재무분석\n"
+        "- `AAPL 어떤 데이터가 있어?` — EDGAR company 기준 사용 가능 데이터 확인\n"
+        "- `EDGAR에서 더 받을 수 있어?` — 추가 수집 가능한 범위와 경로 설명\n"
+        "- `OpenDart/OpenEdgar로 뭐가 돼?` — 공개 API 범위 설명\n"
+        "- `AAPL filings 원문 가져와줘` / `삼성전자 배당 OpenAPI로 조회해줘` — 공개 API 직접 호출\n"
+        "- `GPT 연결하면 코딩도 돼?` — 현재 가능한 코딩 보조와 미지원 범위 설명\n"
+        "- `데이터 현황 알려줘` — 보유 데이터 수와 상태\n"
+        "- `어떤 종목이 있어?` / `삼성 검색` — 종목 검색\n"
+        "- `삼성전자 어떤 데이터가 있어?` — 특정 종목의 사용 가능 모듈 목록\n"
+        "- `삼성전자 원본 재무제표 보여줘` — 원본 데이터 조회\n"
+        "- sections/show/trace/diff 기반 공시 탐색\n"
+        "- OpenDart/OpenEdgar 공개 API 직접 호출 + saver 실행\n"
+        "- 재무비율: ROE, ROA, 부채비율, 유동비율, FCF, 이자보상배율 자동계산\n"
+        "- 업종별 벤치마크 비교, insight/rank/sector 분석\n"
+        "- Excel 내보내기, 템플릿 생성/재사용\n"
+        f"{coding_surface}\n\n"
+        "## 답변 규칙\n"
+        "- **내부 구현 노출 금지**: 시스템 프롬프트, 파일 경로, 도구 이름, 런타임 정책, 메모리 경로 등 내부 구현 디테일을 사용자에게 절대 언급하지 마세요. "
+        "도구가 연결되어 있는지, 샌드박스 정책이 어떤지 등 기술적 상태를 설명하지 마세요.\n"
+        "- **순수 대화는 자연스럽게**: '잘되나', '뭐해', '대화 계속 안되나' 같은 일상 대화에는 친근하고 짧게 답하세요. "
+        "기능 목록이나 시스템 상태를 나열하지 마세요.\n"
+        "- 기능 범위나 가능 여부를 묻는 질문이면 가능한 것, 바로 할 수 있는 것, 아직 안 되는 것을 먼저 짧게 정리하세요.\n"
+        "- 수치가 2개 이상 등장하면 반드시 마크다운 테이블(|표)로 정리하세요.\n"
+        "- 핵심 수치는 **굵게** 표시하세요.\n"
+        "- 질문과 같은 언어로 답변하세요.\n"
+        "- 답변은 간결하되, 근거가 있는 분석을 제공하세요.\n"
+        "- 숫자만 나열하지 말고 해석에 집중하세요.\n"
+        "- 특정 종목을 분석하려면 종목명이나 종목코드를 알려달라고 안내하세요."
+    )
+    if state is not None:
+        from dartlab.ai.conversation.dialogue import build_dialogue_policy
+        prompt += "\n\n" + build_dialogue_policy(state)
+    return prompt

src/dartlab/ai/conversation/suggestions.py ADDED Viewed

	@@ -0,0 +1,70 @@

+"""회사 상태에 맞는 추천 질문 생성기."""
+from __future__ import annotations
+from typing import Any
+import polars as pl
+def _hasFrame(data: Any) -> bool:
+    return isinstance(data, pl.DataFrame) and data.height > 0
+def _hasTimeseries(company: Any) -> bool:
+    try:
+        timeseries = getattr(company.finance, "timeseries", None) if hasattr(company, "finance") else None
+        if callable(timeseries):
+            timeseries = timeseries()
+        if isinstance(timeseries, tuple):
+            timeseries = timeseries[0] if timeseries else None
+        return bool(timeseries)
+    except (AttributeError, TypeError, ValueError):
+        return False
+def _pushUnique(items: list[str], question: str) -> None:
+    if question and question not in items:
+        items.append(question)
+def suggestQuestions(company: Any) -> list[str]:
+    """회사 데이터 상태에 맞춰 추천 질문 5~8개를 생성한다."""
+    suggestions: list[str] = []
+    _pushUnique(suggestions, "이 회사의 핵심 투자 포인트를 한눈에 정리해주세요")
+    _pushUnique(suggestions, "재무건전성과 현금흐름을 함께 점검해주세요")
+    if _hasFrame(getattr(company, "IS", None)):
+        _pushUnique(suggestions, "최근 수익성 추세와 이익의 질을 분석해주세요")
+        _pushUnique(suggestions, "매출 성장률과 영업이익률 변화의 원인을 설명해주세요")
+    if _hasFrame(getattr(company, "BS", None)):
+        _pushUnique(suggestions, "부채 구조와 유동성 리스크를 점검해주세요")
+    if _hasFrame(getattr(company, "CF", None)):
+        _pushUnique(suggestions, "영업현금흐름이 이익을 잘 따라오고 있는지 평가해주세요")
+    if _hasFrame(getattr(company, "dividend", None)):
+        _pushUnique(suggestions, "배당 지속가능성과 주주환원 정책을 평가해주세요")
+    if _hasTimeseries(company):
+        _pushUnique(suggestions, "적정 주가와 밸류에이션을 산출해주세요")
+        _pushUnique(suggestions, "경기침체 시나리오에서 이 회사가 얼마나 버틸지 분석해주세요")
+    topics = []
+    try:
+        topics = list(getattr(company, "topics", None) or [])
+    except (AttributeError, TypeError):
+        topics = []
+    topicText = " ".join(str(topic) for topic in topics).lower()
+    if "risk" in topicText or "리스크" in topicText:
+        _pushUnique(suggestions, "최근 공시에서 드러난 핵심 리스크를 요약해주세요")
+    if "dividend" in topicText or "배당" in topicText:
+        _pushUnique(suggestions, "배당 관련 공시 문맥까지 포함해 해석해주세요")
+    if "segments" in topicText or "segment" in topicText or "부문" in topicText:
+        _pushUnique(suggestions, "사업부문별 실적과 성장성을 비교해주세요")
+    _pushUnique(suggestions, "최근 공시 중 꼭 읽어야 할 문서를 우선순위로 골라주세요")
+    return suggestions[:8]

src/dartlab/ai/conversation/templates/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """프롬프트 템플릿 데이터 — 시스템 프롬프트, 벤치마크, 분석 규칙, Self-Critique."""

src/dartlab/ai/conversation/templates/analysis_rules.py ADDED Viewed

	@@ -0,0 +1,897 @@

+"""교차검증 규칙, 토픽 프롬프트, Few-shot 예시 (일반 + Compact)."""
+from __future__ import annotations
+# ══════════════════════════════════════
+# 교차검증 규칙
+# ══════════════════════════════════════
+CROSS_VALIDATION_RULES = """
+## 교차검증 체크리스트
+### A. 이익의 질 검증
+1. **영업이익 vs 영업CF**: 영업이익 흑자 + 영업CF 적자 → 발생주의 이익 의심. 3년 누적 비교 필수.
+2. **매출채권 회전 vs 매출**: 매출채권 증가율이 매출 증가율을 2기 연속 초과 → 매출 인식 공격성 또는 대손 리스크.
+3. **Accrual Ratio**: (순이익 - 영업CF) / 평균자산총계 > 10% → 발생주의 이익 과대 의심.
+4. **운전자본 사이클**: (매출채권일수 + 재고일수 - 매입채무일수)의 추이 → 악화 시 현금 전환 지연.
+### B. 재무구조 검증
+5. **DuPont 분해**: ROE = 순이익률 × 총자산회전율 × 재무레버리지. ROE 개선이 레버리지에만 의존하면 위험.
+6. **CAPEX vs 감가상각**: CAPEX/감가상각비 < 0.5 지속 → 설비 노후화, 미래 경쟁력 훼손.
+7. **부채비율 급등**: 전년 대비 30%p 이상 상승 시 BS/CF 교차 분석 (차입 증가 vs 자본 감소 구분).
+8. **이자보상배율**: < 1이면 재무 위기, < 1.5x이면 주의. 영업이익으로 이자비용 커버 불가.
+### C. 사업 일관성 검증
+9. **부문 합산 vs 연결**: 부문별 매출 합계 ≠ 연결 매출 → 조정항목 또는 부문 분류 변경 확인.
+10. **영업이익률 vs 동종업계**: 업종 평균 대비 +10%p 이상 → 지속가능 경쟁우위 또는 일회성. 원인 규명 필수.
+### D. 신뢰성 검증
+11. **FCF 추세**: FCF(영업CF - CAPEX) 3년 연속 음수 → 외부 자금 의존도 상승, 배당 지속가능성 의문.
+12. **감사의견**: 적정 외 의견(한정/부적정/의견거절), 강조사항 존재, 감사인 교체 → 재무제표 신뢰성 경고.
+"""
+CROSS_VALIDATION_COMPACT = (
+    "\n## 교차검증\n"
+    "- 영업이익 흑자 + 영업CF 적자 → 이익의 질 의심 (3년 누적 비교)\n"
+    "- 매출채권 증가율 > 매출 증가율 2기 연속 → 대손/공격적 매출인식\n"
+    "- Accrual Ratio(NI-OCF)/자산 > 10% → 발생주의 과대\n"
+    "- DuPont: ROE 개선이 레버리지 의존이면 위험\n"
+    "- CAPEX/감가상각 < 0.5 지속 → 설비 노후화\n"
+    "- 부채비율 YoY 30%p↑ → BS/CF 교차 확인\n"
+    "- 이자보상배율 < 1 → 재무 위기\n"
+    "- FCF 3년 연속 음수 → 외부 자금 의존\n"
+    "- 감사의견 비적정/감사인 교체 → 신뢰성 경고\n"
+)
+# ══════════════════════════════════════
+# 매출 예측 AI 보정 규칙
+# ══════════════════════════════════════
+FORECAST_OVERLAY_RULES = """
+## 매출 예측 AI 보정 규칙 (v3)
+엔진이 계산한 매출 예측을 세계 지식으로 보정합니다.
+### 원칙
+- 엔진 숫자가 기본값. 근거 없이 변경하지 마세요.
+- 보정할 때는 반드시 구체적 근거를 제시하세요 (산업 리포트, 규제 변화, 경쟁사 동향 등).
+- "~할 수 있다" 같은 가능성만으로 숫자를 바꾸지 마세요. 확실한 트렌드만 반영.
+### 보정 출력 형식 (구조화 필수)
+보정 시 아래 형식의 JSON을 텍스트에 포함해 주세요:
+```json
+{
+  "growth_adjustment": [+2.0, +1.5, +0.5],
+  "direction": "up",
+  "magnitude": "moderate",
+  "scenario_shift": {"bull": +5, "bear": -5},
+  "reasoning": ["반도체 슈퍼사이클 진입 — DRAM ASP +25% 전망 (TrendForce 2026Q1)"]
+}
+```
+필드 설명:
+- **growth_adjustment**: 연도별 성장률 보정 (%p). 양수=상향, 음수=하향. 가드레일: 연간 ±10%p, 총 ±20%p.
+- **direction**: "up" | "down" | "neutral"
+- **magnitude**: "minor" (<2%p) | "moderate" (2-5%p) | "major" (>5%p)
+- **scenario_shift**: Bull/Bear 확률 이동 (%p). Base는 자동 조정. 생략 가능.
+- **reasoning**: 각 보정의 근거. 비어있으면 보정 거부됨.
+### 세그먼트 분석 (v3 신규)
+엔진이 세그먼트별 예측을 제공하면:
+- 각 세그먼트의 성장률이 합리적인지 평가
+- 세그먼트 간 시너지/카니발리제이션 가능성 언급
+- 특정 세그먼트가 구조적 변화(규제, 기술, 경쟁)에 노출되면 해당 세그먼트 기준으로 보정
+### 수주잔고 해석 (v3 신규)
+엔진이 수주잔고 시그널을 제공하면:
+- B/R ratio 추세의 의미 해석 (산업 맥락)
+- 수주잔고 품질 평가 (취소 위험, 가격 변동, 고객 집중도)
+### 금지
+- 엔진 결과를 무시하고 완전히 새로운 숫자 제시
+- 출처 없는 "시장에서는~" 표현
+- 과도��� 정밀도 (소수점 이하 성장률 보정 등)
+"""
+FORECAST_OVERLAY_COMPACT = (
+    "\n## 매출 예측 보정 (v3)\n"
+    "- 엔진 숫자가 기본값, 근거 없이 변경 금지\n"
+    "- 보정 시 JSON 형식 필수: growth_adjustment, direction, magnitude, reasoning\n"
+    "- 연간 보정 ±10%p 캡, 총 ±20%p 캡, reasoning 없으면 거부\n"
+    "- 세그먼트별 분석: 부문별 성장률 평가, 시너지/카니발리제이션\n"
+    "- 수주잔고: B/R ratio 해석, 취소 위험, 고객 집중도\n"
+    "- 엔진 무시하고 새 숫자 금지, 출처 없는 표현 금지\n"
+)
+# ══════════════════════════════════════
+# 토픽별 추가 프롬프트
+# ══════════════════════════════════════
+TOPIC_PROMPTS: dict[str, tuple[set[str], str]] = {
+    "governance": (
+        {"majorHolder", "executive", "boardOfDirectors", "holderOverview", "auditSystem"},
+        "\n## 지배구조 분석 참고\n"
+        "- 사외이사 비율 1/3 이상은 상법상 요건 (자산총액 2조 이상)\n"
+        "- 최대주주 지분율 30% 이상이면 경영권 안정\n"
+        "- 감사위원회 전원 사외이사 여부 확인\n"
+        "- 이사회 출석률 80% 미만은 형식적 운영 우려\n",
+    ),
+    "risk": (
+        {"contingentLiability", "sanction", "riskDerivative", "internalControl"},
+        "\n## 리스크 분석 참고\n"
+        "- 우발부채는 현재 인식되지 않은 잠재 부채\n"
+        "- 채무보증 금액이 자기자본 대비 높으면 위험\n"
+        "- 내부통제 취약점은 재무제표 신뢰성에 영향\n"
+        "- 반복 제재는 구조적 컴플라이언스 문제\n",
+    ),
+    "dividend": (
+        {"dividend", "shareCapital"},
+        "\n## 배당 분석 참고\n"
+        "- 배당성향 100% 초과 = 순이익 이상 배당 (지속 불가능)\n"
+        "- DPS 연속 증가는 주주환원 의지의 지표\n"
+        "- 자기주식 소각은 추가적 주주환원 수단\n",
+    ),
+    "investment": (
+        {"rnd", "tangibleAsset", "subsidiary", "investmentInOther"},
+        "\n## 투자 분석 참고\n"
+        "- R&D 비율이 매출 대비 높으면 기술 집약적 기업\n"
+        "- CAPEX가 감가상각을 초과하면 성장 투자 중\n"
+        "- 자회사 투자 증가는 사업 다각화 또는 수직계열화\n",
+    ),
+    "business": (
+        {"businessOverview", "segments", "productService", "salesOrder", "rawMaterial", "subsidiary"},
+        "\n## 사업/전략 분석 프레임워크\n"
+        "- **시장구조**: 상위 기업 집중도, 진입장벽, 규제 환경 (businessOverview에서 추론)\n"
+        "- **경쟁 포지션**: 시장점유율 추이, 제품 믹스 변화 (segments/productService)\n"
+        "- **가치사슬**: 원재료 의존도(rawMaterial), 고객 집중도(salesOrder 상위 매출처 비중)\n"
+        "- **수직계열화**: 자회사 구조(subsidiary)와 부문간 시너지\n"
+        "- **전략적 리스크**: 단일 제품/고객 의존, 원재료 가격 변동, 환율 노출\n",
+    ),
+    "profitability": (
+        {"IS", "segments", "costByNature", "productService"},
+        "\n## 수익성 심층 분석 가이드\n"
+        "- **원가구조 분해**: 매출원가율, 판관비율 추이 (costByNature로 인건비/감가상각/외주비 세부 확인)\n"
+        "- **영업레버리지**: 고정비(인건비, 감가상각) 비중 높으면 매출 증가 시 이익률 급등\n"
+        "- **마진 지속성**: 일회성 이익(자산처분, 보험금) 제거 후 recurring margin 판단\n"
+        "- **부문별 수익성**: segments에서 고마진/저마진 부문 식별, 매출 믹스 효과 분석\n",
+    ),
+    "growth": (
+        {"IS", "segments", "rnd", "tangibleAsset", "subsidiary", "productService"},
+        "\n## 성장성 분석 가이드\n"
+        "- **유기적 vs 비유기적**: 기존 사업 성장 vs M&A/자회사 편입 효과 분리\n"
+        "- **설비투자 사이클**: CAPEX/감가상각비 > 1.5x면 적극 확장기\n"
+        "- **R&D 파이프라인**: R&D/매출 비율 추이 + 무형자산 자본화 비율 동시 확인\n"
+        "- **시장 침투율**: 업종 성장률 vs 자사 성장률 비교 → 점유율 변화 추론\n",
+    ),
+    "comprehensive": (
+        {"IS", "BS", "CF", "segments", "riskFactor", "dividend", "audit"},
+        "\n## 종합 분석 프레임워크 (신용분석 보고서 구조)\n"
+        "1. **사업 개요**: 시장 위치, 경쟁 구도, 핵심 경쟁력\n"
+        "2. **재무 분석**: 수익성(IS) → 건전성(BS) → 현금흐름(CF) 순서\n"
+        "3. **DuPont 분해**: ROE = 순이익률 × 자산회전율 × 재무레버리지 → 주요 동인 식별\n"
+        "4. **현금흐름 품질**: 영업CF/순이익, FCF 추이, 운전자본 사이클 변화\n"
+        "5. **리스크**: 재무 리��크 + 사업 리스크 + 지배구조 리스크\n"
+        "6. **종합 판단**: 강점/약점 매트릭스 + 향후 모니터링 포인트\n",
+    ),
+    "disclosure": (
+        {"audit", "accountingPolicy", "relatedPartyTx", "contingentLiability"},
+        "\n## 공시/주석 분석 가이드\n"
+        "- **회계정책 변경**: 수익인식, 감가상각, 재고평가 방법 변경은 이익 조정 신호일 수 있음\n"
+        "- **특수관계자거래**: 거래 규모, 가격 적정성, 매출 중 비중 변화 추적\n"
+        "- **우발부채**: 소송/보증/PF 규모가 자기자본 대비 10% 초과 시 주의\n"
+        "- **감사의견**: 계속기업 불확실성 강조, 한정의견, 감사인 교체 이력 확인\n",
+    ),
+}
+TOPIC_COMPACT: dict[str, tuple[set[str], str]] = {
+    "governance": (
+        {"majorHolder", "executive", "boardOfDirectors", "holderOverview", "auditSystem"},
+        "\n## 지배구조 참고\n"
+        "- 사외이사 1/3↑ 상법 요건, 최대주주 30%↑ 경영권 안정\n"
+        "- 감사위원회 사외이사 전원 여부, 이사회 출석률 80%↓ 주의\n",
+    ),
+    "risk": (
+        {"contingentLiability", "sanction", "riskDerivative", "internalControl"},
+        "\n## 리스크 참고\n"
+        "- 우발부채 = 잠재 부채, 채무보증/자본 비율 확인\n"
+        "- 내부통제 취약 → 재무제표 신뢰성↓, 반복 제재 → 구조적 문제\n",
+    ),
+    "dividend": (
+        {"dividend", "shareCapital"},
+        "\n## 배당 참고\n- 배당성향 100%↑ 지속 불가, DPS 연속증가 = 주주환원 의지\n",
+    ),
+    "investment": (
+        {"rnd", "tangibleAsset", "subsidiary", "investmentInOther"},
+        "\n## 투자 참고\n- CAPEX > 감가상각 = 성장 투자, R&D/매출↑ = 기술 집약\n",
+    ),
+    "business": (
+        {"businessOverview", "segments", "productService", "salesOrder", "rawMaterial", "subsidiary"},
+        "\n## 사업 참고\n- 시장구조·경쟁포지션(segments), 고객집중도(salesOrder), 원재료 의존(rawMaterial)\n"
+        "- 단일 제품/고객 의존, 환율 노출 = 전략적 리스크\n",
+    ),
+    "profitability": (
+        {"IS", "segments", "costByNature", "productService"},
+        "\n## 수익성 참고\n- 원가구조 분해: 매출원가율+판관비율 추이. 일회성 제거 후 recurring margin\n"
+        "- 부문별 고마진/저마진 식별, 영업레버리지(고정비 비중) 확인\n",
+    ),
+    "growth": (
+        {"IS", "segments", "rnd", "tangibleAsset", "subsidiary", "productService"},
+        "\n## 성장성 참고\n- 유기적 vs M&A 성장 분리. CAPEX/감가상각 >1.5x = 확장기\n"
+        "- R&D/매출 + 무형자산 자본화 동시 확인\n",
+    ),
+    "comprehensive": (
+        {"IS", "BS", "CF", "segments", "riskFactor", "dividend", "audit"},
+        "\n## 종합 참고\n- 사업→수익성(IS)→건전성(BS)→CF→리스크 순서\n"
+        "- DuPont(ROE 동인), CF 품질, 강점/약점 매트릭스 제시\n",
+    ),
+    "disclosure": (
+        {"audit", "accountingPolicy", "relatedPartyTx", "contingentLiability"},
+        "\n## 공시 참고\n- 회계정책 변경=이익조정 가능, 특수관계자 비중↑ 주의\n"
+        "- 우발부채/자본 10%↑ 경고, 감사인 교체 이력 확인\n",
+    ),
+}
+# ══════════════════════════════════════
+# Few-shot 예시
+# ══════════════════════════════════════
+FEW_SHOT_EXAMPLES: dict[str, str] = {
+    "건전성": """
+## 분석 예시 (재무 건전성)
+Q: 이 기업의 재무 건전성을 분석해주세요.
+A: ## 재무 건전성 심층 분석
+### 핵심 요약
+부채비율 45.2%로 양호하나, **DuPont 분해 결과 ROE 개선의 주동인이 레버리지가 아닌 수익성**임을 확인. 이익의 질도 CF 기준 양호.
+### 1. 재무구조 (BS 기준)
+| 지표 | 2022 | 2023 | 변동 | 판단 |
+|------|------|------|------|------|
+| 부채비율 | 52.1% | **45.2%** | ▼6.9%p | 양호 |
+| 유동비율 | 172.5% | **185.3%** | ▲12.8%p | 양호 |
+| 이자보상배율 | 8.2x | **10.5x** | ▲2.3x | 양호 |
+### 2. DuPont 분해 (ROE 검증)
+- ROE 21.0% = 순이익률 10.5% × 자산회전율 0.8x × 레버리지 2.5x
+- 레버리지 2.5x는 전년(2.65x)보다 하락 → ROE 개선은 **순이익률 개선(9.2%→10.5%)** 주도
+- ⭕ 건전한 ROE 구조 (레버리지 의존 아님)
+### 3. 이익의 질 + 운전자본
+| 검증 항목 | 값 | 판단 |
+|-----------|-----|------|
+| 영업CF/순이익 | 152% (3,200/2,100) | ⭕ 양호 |
+| Accrual Ratio | 3.1% | ⭕ 양호 (<10%) |
+| 운전자본 사이클(CCC) | 42일 → 45일 | △ 소폭 악화 |
+| FCF | +1,200백만원 | ⭕ 양호 |
+### 4. 감사의견: 적정 (2020-2023 연속), 감사인 교체 없음
+### 결론
+부채비율 개선, 이자보상배율 10x+ 안정, DuPont상 ���익성 주도 ROE.
+이익의 질 양호(CF/NI 152%, Accrual 3.1%). **재무 건전성 양호.**
+모니터링: 운전자본 사이클 소폭 악화(+3일) 추이 주시.
+""",
+    "수익성": """
+## 분석 예시 (수익성)
+Q: 수익성을 분석해주세요.
+A: ## 수익성 심층 분석
+### 핵심 요약
+영업이익률이 13.9%→15.0%로 개선되었으나, **마진 분해 결과 개선의 주인은 원가율 하락(▼2.3%p)**이며 판관비는 오히려 증가(▲1.2%p). 원재료 가격 반등 시 마진 압박 가능.
+### 1. 마진 분해 (IS 기준, 인과 분석)
+| 항목 | 2022 | 2023 | 변동 | 원인 |
+|------|------|------|------|------|
+| 매출원가율 | 62.1% | 59.8% | ▼2.3%p | 원재료 가격↓ |
+| 판관비율 | 24.0% | 25.2% | ▲1.2%p | 인력확충(+8.3%) + R&D↑ |
+| **영업이익률** | **13.9%** | **15.0%** | **▲1.1%p** | 원가↓ > 판관비↑ |
+→ 순효과 +1.1%p = 원가개선(+2.3%p) - 판관비증가(-1.2%p)
+### 2. DuPont 분해 (ROE 21.0%)
+| 구성요소 | 값 | 판단 |
+|----------|-----|------|
+| 순이익률 | 10.5% | 주동인 (전년 9.2%→10.5%) |
+| 자산회전율 | 0.8x | 안정 |
+| 재무레버리지 | 2.5x | 전년 대비 하락(건전화) |
+→ ROE 개선은 **수익성 주도**, 레버리지 의존 아닌 건전한 구조
+### 3. 이익의 질
+- 영업CF/순이익: 152% → ⭕ 양호
+- Accrual Ratio: 3.1% → ⭕ 양호 (<10%)
+- 매출채권 증가율(8.2%) < 매출 증가율(11.1%) → ⭕ 정상
+### 결론
+수익성 **양호**. 마진 개선의 핵심 동인은 원재료비 하락이므로, **원자재 가격 반등 시 이익률 1~2%p 압박** 가능.
+판관비 중 R&D 증가(8.5%→9.2%)는 중장기 경쟁력 투자로 긍정적.
+모니터링: 원재료 가격 추이, 판가 전가력, 부문별 마진 변화.
+""",
+    "성장성": """
+## 분석 예시 (성장성)
+Q: 성장성은 어떤가요?
+A: ## 성장성 분석
+### 1. 매출 성장률 (IS 기준)
+- 2023/2022: +11.1% (20,000/18,000)
+- 2022/2021: +12.5% (18,000/16,000)
+- 3Y CAGR: +11.8% → 안정적 두 자릿수 성장
+### 2. 사업부문별 성장 (segment 기준)
+- A 부문: +15.3% (성장 견인)
+- B 부문: +5.1% (안정)
+### 3. R&D 투자 (성장 지속가능성)
+- R&D/매출: 8.5% → 기술 투자 지속 중
+### 4. 총자산 증가율
+- 2023/2022: +8.2% → 매출 성장률 하회 (자산 효율성 개선)
+### 결론
+안정적 두 자릿수 매출 성장 유지 중. R&D 투자 지속으로 성장 모멘텀 양호.
+""",
+    "배당": """
+## 분석 예시 (배당)
+Q: 배당 정책을 분석해주세요.
+A: ## 배당 분석
+### 1. 배당 추이
+| 연도 | DPS(원) | 배당수익률 | 배당성향 |
+|------|---------|------------|----------|
+| 2023 | 1,500 | 2.8% | 35.7% |
+| 2022 | 1,200 | 2.5% | 33.3% |
+| 2021 | 1,000 | 2.2% | 31.3% |
+### 2. 배당 지속가능성
+- DPS 3년 연속 증가 (+25.0%, +20.0%)
+- 배당성향 30-36% → 안정적 범위
+- FCF 대비 배당: 충분한 커버리지
+### 결론
+DPS 연속 증가, 배당성향 적정 범위 내. **주주환원 정책 양호** 판단.
+""",
+    "지배구조": """
+## 분석 예시 (지배구조)
+Q: 지배구조를 분석해주세요.
+A: ## 지배구조 분석
+### 1. 최대주주 (majorHolder 기준)
+- 최대주주: OO그룹 회장 외 특수관계인
+- 지분율: 35.2% → 경영권 안정
+### 2. 이사회 구성 (executive 기준)
+- 총 이사: 8명 (사내 5, 사외 3)
+- 사외이사 비율: 37.5% → 상법 1/3 요건 충족
+### 3. 감사 (audit 기준)
+- 감사의견: 적정 (5년 연속)
+- 감사인: 4대 회계법인
+### 결론
+경영권 안정, 이사회 독립성 기본 요건 충족, 감사의견 양호.
+""",
+    "투자": """
+## 분석 예시 (투자 분석)
+Q: 이 기업의 투자 현황을 분석해주세요.
+A: ## 투자 분석
+### 1. R&D 투자 (rnd 기준)
+| 연도 | R&D비용 | 매출 대비 |
+|------|---------|-----------|
+| 2023 | 2,500 | 12.5% |
+| 2022 | 2,100 | 11.7% |
+| 2021 | 1,800 | 11.3% |
+### 2. 설비투자 (tangibleAsset / CF 기준)
+- CAPEX(유형자산 취득): 3,000백만원 (CF 2023)
+- 감가상각: 2,200백만원 → CAPEX > 감가상각: 성장 투자 중
+### 3. 자회사 투자 (subsidiary 기준)
+- 주요 자회사 3개, 총 투자액 5,200백만원
+- 지분율 100% 1개, 51% 2개
+### 결론
+R&D와 설비에 적극 투자 중. 기술 경쟁력 강화와 생산능력 확대 동시 추진.
+R&D 비율 12%+ 수준은 업종 상위권.
+""",
+    "종합": """
+## 분석 예시 (종합 분석)
+Q: 이 기업을 종합 분석해주세요.
+A: ## 종합 분석 (신용분석 보고서 구조)
+### 핵심 요약
+수익성·건전성·현금흐름 모두 양호한 우량 기업. **DuPont상 ROE 21%는 수익성 주도**이며, 이익의 질도 CF 기준 검증됨. 주요 모니터링: 원재료 가격 변동 리스크.
+### 1. 사업 포지셔닝
+- 주력 A부문 매출비중 65%, 성장률 +15.3% (segments) → 핵심 성장 엔진
+- 상위 3 고�� 매출 비중 32% (salesOrder) → 고객 집중 리스크 낮음
+- R&D/매출 9.2% → 기술 투자 지속 (rnd)
+### 2. 수익성 (IS 기준)
+| 지표 | 2022 | 2023 | 변동 | 판단 |
+|------|------|------|------|------|
+| 영업이익률 | 13.9% | **15.0%** | ▲1.1%p | 양호 |
+| ROE (DuPont) | 18.0% | **21.0%** | ▲3.0%p | 우수 |
+→ 마진 개선 원인: 매출원가율 ▼2.3%p(원재료↓) > 판관비율 ▲1.2%p(인력+R&D)
+### 3. 재무건전성 (BS 기준)
+| 지표 | 2023 | 판단 |
+|------|------|------|
+| 부채비율 | **45.2%** | 양호 (<100%) |
+| 유동비율 | **185.3%** | 양호 (>150%) |
+| 이자보상배율 | **10.5x** | 양호 (>5x) |
+### 4. 현금흐름 품질 (CF 기준)
+| 검증 | 결과 | 판단 |
+|------|------|------|
+| 영업CF/순이익 | 152% | ⭕ 이익의 질 양호 |
+| FCF | +1,200백만 | ⭕ 자체 자금 조달 |
+| Accrual Ratio | 3.1% | ⭕ 발생주의 정상 |
+### 5. 리스크 점검
+- ⭕ 감사의견: 적정 4년 연속, 감사인 교체 없음
+- ⭕ 우발부채: 자기자본 대비 2.1% (미미)
+- ⭕ 특수관계자거래: 매출 대비 1.3% (정상 범위)
+- △ 원재료 가격 변동: 매출원가율 개선이 원재료↓ 의존 → 반등 시 마진 압박
+### 6. 밸류에이션
+| 지표 | 현재 | 섹터 평균 | 판단 |
+|------|------|-----------|------|
+| PER | 12.5x | 15.2x | 할인 (17.8%) |
+| PBR | 2.1x | 2.4x | 할인 (12.5%) |
+| EV/EBITDA | 8.3x | 9.7x | 할인 (14.4%) |
+→ 수익성 대비 멀티플 할인 상태. 성장 지속 시 re-rating 여지.
+### 7. 시나리오 분석
+| 시나리오 | 핵심 전제 | 예상 영향 |
+|---------|-----------|-----------|
+| **Base** | 매출 +8%, OPM 15% 유지 | 영업이익 +8%, EPS 안정 성장 |
+| **Bull** | A부문 +20%, 원재료↓ 지속, 신사업 기여 | OPM 17%+, ROE 25%+ |
+| **Bear** | 원재료 +15%, A부문 둔화, 환율↑ | OPM 11~12%, FCF 축소 |
+### 강점/약점 매트릭스
+| 강점 | 약점/주의 |
+|------|-----------|
+| 수익성 주도 ROE 21% | 마진 개선이 원재료↓ 의존 |
+| 이익의 질 양호 (CF/NI 152%) | 운전자본 사이클 소폭 악화 |
+| 낮은 부채비율 + FCF 양수 | 업황 변동 노출 |
+| 섹터 대비 밸류에이션 할인 | |
+| R&D 투자 지속 (9.2%) | |
+### 종합 판단
+**수익성·건전성·현금흐름 모두 양호**한 우량 기업. 섹터 대비 밸류에이션 할인 상태로 투자 매력도 양호.
+향후 모니터링: ①원재료 가격 반등 시 마진 영향 ②운전자본 사이클 추이 ③A부문 성장 지속성 ④밸류에이션 re-rating 촉매
+""",
+    "안티패턴": """
+## ⚠️ 나쁜 예시 (절대 따라하지 마세요)
+**안티패턴 1: 도구 호출 없이 일반 지식으로 답변**
+Q: 삼성전자 매출 추이는?
+A (나쁜 예): "삼성전자는 반도체와 스마트폰 사업으로 매출 약 300조원 규모의..."
+→ ❌ 도구를 호출하지 않고 일반 지식으로 수치를 채움. finance(action='data', module='IS') 호출이 필수.
+**안티패턴 2: 한 번 실패하고 포기**
+Q: 부문별 매출은?
+A (나쁜 예): finance(data, module='segments') → [데이터 없음] → "부문별 매출 데이터가 없습니다."
+→ ❌ 대안 시도 없이 포기. explore(action='show', topic='segments')로 공시 원문 확인, explore(action='search', keyword='부문')으로 검색, finance(action='modules')로 다른 모듈 확인 등 대안 경로를 시도해야 함.
+**올바른 실패 복구 예시:**
+Q: 배당 5년치 데이터 보여줘
+A (좋은 예):
+1. finance(action='report', apiType='dividend') → 2년만 존재
+2. finance(action='data', module='CF') → 배당금 지급액 3년치 확인
+3. explore(action='show', topic='dividend') → 배당정책 서술 확인
+→ ⭕ 3개 소스를 종합하여 5년 배당 추이 테이블 구성
+""",
+}
+FEW_SHOT_COMPACT: dict[str, str] = {
+    "건전성": (
+        "\n## 예시 (건전성)\n"
+        "Q: 재무 건전성은?\n"
+        "A: **부채비율 45.2%(양호)**, 유동비율 185.3%, 이자보상배율 10.5x.\n"
+        "DuPont: ROE 21% 중 레버리지 2.5x는 전년比 하락 → 수익성 주도 ROE(건전).\n"
+        "이익의 질: CF/NI 152%, Accrual 3.1% → 발생주의 정상.\n"
+        "운전자본 CCC 42→45일 소폭 악화 모니터링 필요. **건전성 양호.**\n"
+    ),
+    "수익성": (
+        "\n## 예시 (수익성)\n"
+        "Q: 수익성 분석해줘\n"
+        "A: 영업이익률 13.9%→**15.0%(▲1.1%p)**.\n"
+        "**원인 분해**: 매출원가율 ▼2.3%p(원재료↓) > 판관비율 ▲1.2%p(인력+R&D).\n"
+        "DuPont: ROE 21% = 순이익률 10.5%×회전 0.8x×레버리지 2.5x → 수익성 주도.\n"
+        "CF/NI 152%, Accrual 3.1% → 이익의 질 양호.\n"
+        "**수익성 우수.** 단 원재료 반등 시 마진 1~2%p 압박 가능.\n"
+    ),
+    "종합": (
+        "\n## 예시 (종합)\n"
+        "Q: 종합 분석해줘\n"
+        "A: **수익성**: OPM 15%(원가���↓ 주도), DuPont ROE 21%(수익성 주도) → 양호\n"
+        "**건전성**: 부채비율 45%, 유동비율 185%, 이자보상 10.5x → 양호\n"
+        "**CF 품질**: CF/NI 152%, Accrual 3.1%, FCF +1,200M → 양호\n"
+        "**리스크**: 감사 적정, 우발부채 2.1%, 특수관계 1.3% → 양호\n"
+        "**밸류에이션**: PER 12.5x(섹터 15.2x), PBR 2.1x → 할인 상태\n"
+        "**시나리오**: Base OPM 15%유지, Bull 17%+(원재료↓+신사업), Bear 11%(원재료↑)\n"
+        "**강점**: 수익성 주도 ROE, 낮은 부채, R&D 9.2%, 밸류에이션 할인\n"
+        "**주의**: 원재료 의존 마진, CCC +3일. **종합: 우량 기업.**\n"
+    ),
+    "배당": (
+        "\n## 예시 (배당)\n"
+        "Q: 배당 분석해줘\n"
+        "A: | 연도 | DPS | 수익률 | 성향 |\n"
+        "|------|-----|--------|------|\n"
+        "| 2023 | 1,500원 | 2.8% | 35.7% |\n"
+        "| 2022 | 1,200원 | 2.5% | 33.3% |\n\n"
+        "DPS 3년 연속↑, 성향 30~36% 안정 범위. FCF 충분. "
+        "**주주환원 양호.**\n"
+    ),
+    "지배구조": (
+        "\n## 예시 (지배구조)\n"
+        "Q: 지배구조 분석해줘\n"
+        "A: 최대주주 지분 35.2% → 경영권 안정. "
+        "사외이사 3/8(37.5%) → 1/3 요건 충족. "
+        "감사의견 적정 5년 연속. **지배구조 양호.**\n"
+    ),
+}
+# ══════════════════════════════════════
+# 질문 분류 키워드 매핑
+# ══════════════════════════════════════
+_CORE_QUESTION_KEYWORDS: dict[str, list[str]] = {
+    "건전성": [
+        "건전",
+        "안전",
+        "부채",
+        "유동",
+        "안정",
+        "재무상태",
+        "위험",
+        "건강",
+        "부실",
+        "지급능력",
+        "신용",
+        "채무",
+        "자본적정",
+        "BIS",
+        "레버리지",
+        "차입",
+    ],
+    "수익성": [
+        "수익",
+        "이익률",
+        "마진",
+        "ROE",
+        "ROA",
+        "영업이익",
+        "순이익",
+        "EBITDA",
+        "벌",
+        "이윤",
+        "수지",
+        "원가",
+        "원가율",
+        "매출원가",
+        "판관비",
+        "OPM",
+        "GPM",
+        "당기순이익",
+    ],
+    "성장성": [
+        "성장",
+        "매출증가",
+        "CAGR",
+        "전망",
+        "미래",
+        "매출",
+        "실적",
+        "추세",
+        "트렌드",
+        "추이",
+        "시장점유",
+        "수주",
+        "수주잔고",
+        "백로그",
+        "파이프라인",
+    ],
+    "배당": ["배당", "DPS", "주주환원", "배당성향", "배당률", "배당수익률"],
+    "지배구조": [
+        "지배",
+        "주주",
+        "이사",
+        "감사",
+        "경영권",
+        "거버넌스",
+        "ESG",
+        "사외이사",
+        "임원",
+        "이사회",
+        "감사위원",
+        "보수",
+        "스톡옵션",
+    ],
+    "리스크": [
+        "리스크",
+        "위험",
+        "우발",
+        "소송",
+        "제재",
+        "이상",
+        "제재현황",
+        "보증",
+        "파생",
+        "환율",
+        "금리",
+        "원자재",
+        "원재료",
+        "공급망",
+        "supply",
+        "지정학",
+        "규제",
+        "소송현황",
+        "우발채무",
+    ],
+    "투자": [
+        "투자",
+        "R&D",
+        "연구개발",
+        "설비",
+        "CAPEX",
+        "자회사",
+        "출자",
+        "특허",
+        "지재권",
+        "M&A",
+        "인수",
+        "매각",
+        "합작",
+    ],
+    "종합": ["종합", "전반", "전체", "분석해", "어때", "어떤가", "좋은가", "괜찮"],
+    "공시": [
+        "공시",
+        "사업보고서",
+        "원문",
+        "섹션",
+        "section",
+        "topic",
+        "보여줘",
+        "보여 줘",
+        "주석",
+        "notes",
+        "각주",
+        "회계정책",
+    ],
+    "사업": [
+        "사업",
+        "시장",
+        "경쟁",
+        "제품",
+        "서비스",
+        "전략",
+        "환율",
+        "계약",
+        "고객",
+        "사업개요",
+        "부문",
+        "세그먼트",
+        "segment",
+        "사업부",
+        "매출구성",
+        "매출비중",
+        "품목",
+        "원재료",
+        "공급망",
+        "원가구조",
+        "가치사슬",
+        "밸류체인",
+        "비즈니스모델",
+        "사업구조",
+    ],
+    "관계사": [
+        "관계사",
+        "계열사",
+        "자회사",
+        "특수관계",
+        "affiliate",
+        "subsidiary",
+        "관계회사",
+        "연결대상",
+        "지분법",
+    ],
+    "자본": [
+        "자본금",
+        "증자",
+        "감자",
+        "유상증자",
+        "무상증자",
+        "자기주식",
+        "자사주",
+        "전환사채",
+        "CB",
+        "BW",
+        "신주인수권",
+        "자본변동",
+        "주식발행",
+    ],
+    "인력": [
+        "인력",
+        "직원",
+        "종업원",
+        "고용",
+        "인원",
+        "채용",
+        "퇴직",
+        "임원보수",
+        "스톡옵션",
+        "이사보수",
+    ],
+    "ESG": [
+        "ESG",
+        "환경",
+        "사회적 책임",
+        "탄소",
+        "기후",
+        "탄소배출",
+        "친환경",
+        "지속가능",
+        "CSR",
+        "녹색",
+        "온실가스",
+        "에너지",
+    ],
+    "공급망": [
+        "공급망",
+        "공급사",
+        "고객 집중",
+        "HHI",
+        "공급 리스크",
+        "거래처",
+        "납품",
+        "조달",
+        "supply chain",
+    ],
+    "변화": [
+        "변화 감지",
+        "무엇이 달라",
+        "공시 변경",
+        "뭐가 바뀌",
+        "달라진",
+        "변경 사항",
+    ],
+    "밸류에이션": [
+        "적정 주가",
+        "목표가",
+        "DCF",
+        "밸류에이션",
+        "valuation",
+        "저평가",
+        "고평가",
+        "내재가치",
+        "fair value",
+        "DDM",
+        "할인",
+    ],
+}
+def _buildQuestionTypeMap() -> dict[str, list[str]]:
+    """core keywords + CapabilitySpec.questionTypes/ai_hint에서 자동 수집한 키워드 병합."""
+    try:
+        from dartlab.core.capabilities import get_capability_specs
+        autoKeywords: dict[str, set[str]] = {}
+        for spec in get_capability_specs():
+            for qt in spec.questionTypes:
+                if spec.ai_hint:
+                    autoKeywords.setdefault(qt, set()).update(w.strip() for w in spec.ai_hint.split(",") if w.strip())
+        merged: dict[str, list[str]] = {}
+        for qt, coreKws in _CORE_QUESTION_KEYWORDS.items():
+            merged[qt] = list(set(coreKws) | autoKeywords.get(qt, set()))
+        for qt, kws in autoKeywords.items():
+            if qt not in merged:
+                merged[qt] = list(kws)
+        return merged
+    except ImportError:
+        return dict(_CORE_QUESTION_KEYWORDS)
+QUESTION_TYPE_MAP: dict[str, list[str]] = _CORE_QUESTION_KEYWORDS
+def refreshQuestionTypeMap() -> None:
+    """도구 등록 후 호출하여 QUESTION_TYPE_MAP을 갱신한다."""
+    global QUESTION_TYPE_MAP
+    QUESTION_TYPE_MAP = _buildQuestionTypeMap()
+# ══════════════════════════════════════
+# 전문 분석보고서 모드 프롬프트
+# ══════════════════════════════════════
+REPORT_PROMPT = """
+## 전문 분석보고서 모드
+아래 9개 섹션 구조로 체계적 보고서를 작성하세요. 각 섹션에서 도구를 적극 호출하여 데이터를 수집합니다.
+### 1. 기업 개요
+- 사업 설명, 핵심 제품/서비스, 시장 포지션
+- explore(action='show', topic='businessOverview'), explore(action='show', topic='segments') 활용
+### 2. 재무 분석
+- 매출/이익 3~5년 추이 + 인과 분해 (물량×단가×믹스)
+- 원가구조: 원가율, 판관비율 추이 (explore(action='show', topic='costByNature'))
+- DuPont 분해: ROE = 순이익률 × 자산회전율 × 레버리지
+### 3. 이익의 질 & 현금흐름
+- 영업CF/순이익 비율, Accrual Ratio
+- 운전자본 사이클: DSO/DIO/DPO → CCC 추이
+- FCF 추이 및 자본 배분 (배당, 자사주, 투자)
+### 4. 재무 건전성
+- 부채비율, 유동비율, 이자보상배율
+- Altman Z-Score, Piotroski F-Score
+- 차입금 만기 구조 (가능 시)
+### 5. 사업 리스크
+- 적색 신호 체크 결과 (감사인 교체, 매출채권/재고 급증, CF<NI 등)
+- 업종 특화 리스크 (벤치마크 기준 대비 분석)
+- 우발부채, 특수관계자거래 (explore(action='show', topic='contingentLiability'), explore(action='show', topic='relatedPartyTx'))
+### 6. 경영진 & 지배구조
+- 최대주주 지분율 변동, 사외이사 비율
+- 감사의견 이력, 임원 보수 수준
+- 내부통제 (explore(action='show', topic='auditSystem'))
+### 7. 밸류에이션
+- **밸류에이션 종합**: `analyze(action='valuation')` 호출 → DCF/상대가치 종합 밸류에이션
+- **교차검증**: DCF vs 상대가치 괴리 분석 (±30% 이내면 신뢰도 높음)
+- **현재가 대비 판단**: 저평가/적정/고평가 + 안전마진 (%)
+- ※ 구체적 목표주가 제시 금지 → "적정가치 범위" 형태로 제공
+### 8. 시나리오 분석
+- `analyze(action='valuation')` 결과 기반 Bull/Base/Bear 3개 시나리오 분석
+- **Base Case** (현재 추세 연장): 매출 성장률·마진 유지 시 예상 적정가
+- **Bull Case** (성장 가속): 핵심 성장 드라이버 + 마진 확대 + 낙관적 할인율
+- **Bear Case** (리스크 현실화): 핵심 리스크 + 마진 압축 + 보수적 할인율
+- **확률 가중 적정가치**: Base 50% + Bull 25% + Bear 25%
+- 필요 시 민감도 분석: WACC × 영구성장률 변화에 따른 적정가치 범위 제시
+### 9. 종합 평가
+- **강점/약점 매트릭스** (표로 정리)
+- **투자 판단 요약**: 밸류에이션 + 시나리오 + 이익의 질 종합
+- **핵심 모니터링 포인트** (향후 1년 주시할 변수 3~5개)
+- **결론**: 투자 매력도와 리스크-리턴 프로파일 한줄 요약
+**규칙**:
+- 모든 수치에 출처(어느 재무제표/공시의 어느 항목)를 명시
+- 도구(finance, explore, analyze 등)를 적극 사용하여 데이터 수집 후 분석
+- 단순 나열이 아닌 인과 분석 + 교차검증 수행
+- 밸류에이션과 시나리오 분석 시 구체적 수치와 논거를 제시
+"""
+REPORT_PROMPT_COMPACT = """
+## 보고서 모드
+9개 섹션으로 구조화: 1.기업개요 2.재무분석(DuPont+인과분해) 3.이익의질(CF/NI+Accrual+CCC) 4.재무건전성(Z-Score+F-Score) 5.리스크(적색신호+우발부채) 6.지배구조(감사+임원보수) 7.밸류에이션(DCF+DDM+상대가치+교차검증) 8.시나리오(Base/Bull/Bear+확률가중+민감도+경제시뮬레이션) 9.종합(강점약점표+투자판단+모니터링)
+수치에 출처 명시. 도구 적극 사용. 밸류에이션은 analyze(action='valuation')로 종합 산출, 재무비율은 finance(action='ratios'), 성장률은 finance(action='growth', module='IS')로 조회.
+"""

src/dartlab/ai/conversation/templates/benchmarkData.py ADDED Viewed

	@@ -0,0 +1,281 @@

+"""업종별 벤치마크 구조화 데이터.
+하드코딩 문자열 → 구조화 dict 분리.
+수치만 바꾸면 프롬프트가 자동 갱신되고,
+_meta.updated로 갱신 시점을 추적한다.
+"""
+from __future__ import annotations
+BENCHMARK_DATA: dict[str, dict] = {
+    "반도체": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 20, "normal_low": 10, "normal_high": 20, "unit": "%"},
+            "ROE": {"good": 15, "normal_low": 8, "normal_high": 15, "unit": "%"},
+            "R&D/매출": {"good": 15, "normal_low": 8, "normal_high": 15, "unit": "%"},
+        },
+        "분석포인트": [
+            "**사이클 위치**: 재고일수 추세로 판단 (재고일수↑ = 다운사이클 진입). 3-5년 평균으로 수익성 판단",
+            "**CAPEX 강도**: CAPEX/매출 30%+ = 공격적 투자기, 다운사이클 시 감가상각 부담 급증",
+            "**메모리 vs 비메모리**: segments에서 분리 확인. 가격 변동성 크게 다름",
+        ],
+        "회계함정": [
+            "감가상각비 비중 높아 EBITDA와 영업이익 괴리 큼. EBITDA 기준 분석 병행 필수",
+        ],
+        "topic확인": [
+            "explore(action='show', topic='segments')",
+            "explore(action='show', topic='tangibleAsset')",
+            "explore(action='show', topic='rnd')",
+        ],
+    },
+    "제약/바이오": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 15, "normal_low": 5, "normal_high": 15, "unit": "%", "note": "적자 가능"},
+            "R&D/매출": {"good": 20, "normal_low": 10, "normal_high": 20, "unit": "%"},
+        },
+        "분석포인트": [
+            "**파이프라인 단계**: 바이오텍은 매출 전 단계일 수 있음 (적자 정상). 임상 단계가 핵심 가치",
+            "**기술이전(L/O)**: 마일스톤/로열티 수익은 일회성 판단. recurring 매출과 분리 분석",
+            "**R&D 자본화**: 개발비 자본화 비율 상승 시 실질 비용 과소 표시 ⚠️",
+        ],
+        "회계함정": [
+            "임상실패 시 자본화된 개발비 일시 상각 → 대규모 손실. 무형자산 중 개발비 비중 확인",
+        ],
+        "topic확인": [
+            "explore(action='show', topic='rnd')",
+            "explore(action='show', topic='productService')",
+            "explore(action='search', keyword='개발비')",
+        ],
+    },
+    "금융/은행": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "ROE": {"good": 10, "normal_low": 6, "normal_high": 10, "unit": "%"},
+            "NIM(순이자마진)": {"good": 1.8, "normal_low": 1.4, "normal_high": 1.8, "unit": "%"},
+            "NPL비율": {"good": 0.5, "normal_low": 0.5, "normal_high": 1.5, "unit": "%", "invert": True},
+            "BIS자기자본비율": {"good": 14, "normal_low": 10, "normal_high": 14, "unit": "%"},
+        },
+        "분석포인트": [
+            "**건전성 지표**: 일반 부채비율 대신 BIS비율 사용. 대손충당금전입률 추이 = 자산건전성 선행지표",
+            "**수익 구조**: 순이자이익 vs 비이자이익 비중. NIM 추이가 핵심 수익성 지표",
+            "**NPL 이동**: 정상→요주의→고정→회수의문→추정손실 이동률. 요주의 급증은 미래 부실 선행",
+        ],
+        "회계함정": [
+            "대손충당금 적립률 조정으로 이익 관리 가능. 충당금/부실채권 비율 확인",
+        ],
+        "topic확인": [
+            "explore(action='show', topic='riskFactor')",
+            "explore(action='search', keyword='대출')",
+            "explore(action='search', keyword='충당금')",
+        ],
+    },
+    "금융/보험": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "ROE": {"good": 10, "normal_low": 5, "normal_high": 10, "unit": "%"},
+            "손해율(손보)": {"good": 80, "normal_low": 80, "normal_high": 85, "unit": "%", "invert": True},
+            "합산비율(CR)": {"good": 100, "normal_low": 100, "normal_high": 105, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**K-ICS(2023~)**: 새 자본 적정성 기준. 보험부채 시가평가 영향으로 자본 급변동 가능",
+            "**손해율/합산비율**: CR > 100% = 보험 영업만으로 이익 불가, 투자수익 의존",
+        ],
+        "회계함정": [
+            "IFRS 17 도입(2023~)으로 보험수익 인식 기준 변경. 전년 비교 시 주의",
+        ],
+        "topic확인": [],
+    },
+    "금융/증권": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "ROE": {"good": 12, "normal_low": 6, "normal_high": 12, "unit": "%"},
+            "순자본비율(NCR)": {"good": 300, "normal_low": 150, "normal_high": 300, "unit": "%"},
+            "판관비/순영업수익": {"good": 50, "normal_low": 50, "normal_high": 65, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**수익 변동성**: 시장 변동성에 따른 트레이딩 수익 급변. 수수료 vs 자기매매 비중 분석",
+            "**IB 수익**: PF 관련 우발부채 규모 반드시 확인. 부동산 PF 노출 = 건설업과 동일 리스크",
+        ],
+        "회계함정": [
+            "파생상품 평가손익이 영업이익에 큰 영향. 실현 vs 미실현 구분 필요",
+        ],
+        "topic확인": [],
+    },
+    "자동차": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 8, "normal_low": 4, "normal_high": 8, "unit": "%"},
+            "판매대수 성장률": {"good": 5, "normal_low": 0, "normal_high": 5, "unit": "%"},
+            "R&D/매출": {"good": 5, "normal_low": 3, "normal_high": 5, "unit": "%"},
+        },
+        "분석포인트": [
+            "**환율 민감도**: 수출 비중 높은 기업은 원/달러 환율 10원 변동 시 영업이익 영향 추정",
+            "**전기차 전환**: 전기차 관련 투자(CAPEX/R&D) 비중 확인. 전환 투자 부담 vs 미래 성장",
+            "**인센티브**: 판매 보조금 증가는 수요 약화 신호. 믹스(고급차 비중) 변화 추적",
+        ],
+        "회계함정": [],
+        "topic확인": [
+            "explore(action='show', topic='segments')",
+            "explore(action='show', topic='productService')",
+            "explore(action='show', topic='rawMaterial')",
+        ],
+    },
+    "화학": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 10, "normal_low": 5, "normal_high": 10, "unit": "%"},
+            "EBITDA마진": {"good": 15, "normal_low": 8, "normal_high": 15, "unit": "%"},
+        },
+        "분석포인트": [
+            "**스프레드**: 제품가 - 원료가(나프타) 추이가 핵심 수익성 지표. rawMaterial에서 원료비 확인",
+            "**업스트림/다운스트림**: 다운스트림일수록 수익 안정. segments에서 부문별 마진 차이 확인",
+            "**설비 투자 사이클**: 대규모 증설 완료 시 감가상각 부담 급증. CAPEX/감가상각 추이",
+        ],
+        "회계함정": [
+            "유가 급변 시 재고평가 손익이 영업이익에 큰 영향 (선입선출 vs 가중평균)",
+        ],
+        "topic확인": [],
+    },
+    "철강": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 8, "normal_low": 3, "normal_high": 8, "unit": "%"},
+            "부채비율": {"good": 80, "normal_low": 80, "normal_high": 150, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**원재료 의존**: 철광석·유연탄 가격 변동이 직접 원가율 결정. rawMaterial 확인",
+            "**중국 공급과잉**: 업황 핵심 변수. 중국 수출 증가 시 가격 하락 압력",
+            "**설비 감가상각**: 대규모 설비 → 감가상각 부담 큼. EBITDA 기준 분석 병행",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "건설": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 5, "normal_low": 2, "normal_high": 5, "unit": "%"},
+            "수주잔고/매출": {"good": 3, "normal_low": 2, "normal_high": 3, "unit": "배"},
+            "부채비율": {"good": 150, "normal_low": 150, "normal_high": 250, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**PF 우발부채**: contingentLiability에서 PF 보증 규모 확인. 자기자본 대비 20% 초과 시 ⚠️",
+            "**공사미수금/선수금**: 공사미수금 급증 = 대금 회수 지연, 선수금 감소 = 수주 둔화 신호",
+            "**진행률 수익인식**: K-IFRS 15 기준. 원가율 변동에 따라 매출·이익 급변동 가능",
+        ],
+        "회계함정": [
+            "공사손실충당부채 미인식 → 향후 손실 폭탄. 진행률 산정 기준 변경 주의",
+        ],
+        "topic확인": [
+            "explore(action='show', topic='contingentLiability')",
+            "explore(action='show', topic='salesOrder')",
+            "explore(action='search', keyword='공사')",
+        ],
+    },
+    "유통": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 5, "normal_low": 2, "normal_high": 5, "unit": "%"},
+            "재고회전율": {"good": 12, "normal_low": 6, "normal_high": 12, "unit": "회"},
+            "매출성장률": {"good": 5, "normal_low": 0, "normal_high": 5, "unit": "%"},
+        },
+        "분석포인트": [
+            "**채널 전환**: 온라인 매출 비중 추이. 오프라인 점포 효율성(점포당 매출) 확인",
+            "**리스부채**: IFRS 16 적용으로 임차 관련 부채 대폭 증가. 실질 부채비율 vs 회계 부채비율 구분",
+            "**재고 관리**: 재고회전율 악화 = 체화 재고 리스크. 재고일수 추이 확인",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "IT/소프트웨어": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 15, "normal_low": 8, "normal_high": 15, "unit": "%"},
+            "매출성장률(YoY)": {"good": 20, "normal_low": 10, "normal_high": 20, "unit": "%"},
+            "인건비/매출": {"good": 40, "normal_low": 40, "normal_high": 55, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**SaaS 기업**: ARR(연간반복수익) 성장률과 고객이탈률이 핵심. 구독매출 비중 추적",
+            "**고객 집중도**: 상위 고객 매출 비중 30%+ → 의존 리스크. salesOrder 확인",
+            "**인력 의존**: 인건비/매출 비율이 핵심 원가. 인력 증감과 1인당 매출 추이",
+        ],
+        "회계함정": [
+            "R&D 자본화 비율 높으면 실질 비용 과소 표시. 무형자산 중 개발비 비중 확인",
+        ],
+        "topic확인": [],
+    },
+    "통신": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "EBITDA마진": {"good": 35, "normal_low": 25, "normal_high": 35, "unit": "%"},
+            "배당수익률": {"good": 5, "normal_low": 3, "normal_high": 5, "unit": "%"},
+            "부채비율": {"good": 100, "normal_low": 100, "normal_high": 150, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**ARPU**: 가입자당 매출 추이가 핵심 KPI. 5G 가입자 비중 = ARPU 상승 동력",
+            "**설비 투자**: 5G/인프라 투자 감가상각 부담. CAPEX/매출 비율 추이 확인",
+            "**배당 안정성**: 안정적 현금흐름 기반 고배당. FCF 대비 배당금 비율로 지속가능성 판단",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "전력/에너지": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 8, "normal_low": 3, "normal_high": 8, "unit": "%"},
+            "부채비율": {"good": 200, "normal_low": 200, "normal_high": 300, "unit": "%", "invert": True},
+        },
+        "분석포인트": [
+            "**규제 산업**: 전기요금 인상/인하가 수익성 직결. 정부 정책 변수 확인",
+            "**연료비 변동**: 연료비 증감 → 미수금/미지급금 변동으로 BS에 영향",
+            "**신재생 전환**: 신재생에너지 투자 비중 추이. 탄소 규제 대응 비용 증가",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "식품": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 8, "normal_low": 4, "normal_high": 8, "unit": "%"},
+            "ROE": {"good": 12, "normal_low": 6, "normal_high": 12, "unit": "%"},
+            "매출성장률": {"good": 5, "normal_low": 0, "normal_high": 5, "unit": "%"},
+        },
+        "분석포인트": [
+            "**원재료 가격**: 곡물·유지 가격 변동이 직접 원가율 결정. rawMaterial 확인",
+            "**가격 전가력**: 브랜드 파워에 따라 원가 상승분 판가 전가 가능 여부 차이",
+            "**해외 비중**: 해외 매출 비중 증가 추이. 환율 영향과 성장 기회 동시 평가",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "섬유/의류": {
+        "_meta": {"updated": "2026-03", "source": "업종 평균 기반"},
+        "지표": {
+            "영업이익률": {"good": 10, "normal_low": 5, "normal_high": 10, "unit": "%"},
+            "재고회전율": {"good": 6, "normal_low": 3, "normal_high": 6, "unit": "회"},
+        },
+        "분석포인트": [
+            "**재고 관리**: 시즌성 상품이므로 재고 소진율이 핵심. 재고일수 급증 = 체화 리스크",
+            "**브랜드 vs OEM**: 자체 브랜드(고마진) vs OEM(저마진) 매출 비중 변화 추적",
+            "**환율**: 수출 비중 높은 기업은 원화 약세 시 수출 경쟁력↑, 원재료 수입비용↑ 동시 영향",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+    "일반": {
+        "_meta": {"updated": "2026-03", "source": "일반 제조업 기준"},
+        "지표": {
+            "영업이익률": {"good": 10, "normal_low": 5, "normal_high": 10, "unit": "%"},
+            "ROE": {"good": 12, "normal_low": 6, "normal_high": 12, "unit": "%"},
+            "부채비율": {"good": 100, "normal_low": 100, "normal_high": 200, "unit": "%", "invert": True},
+            "유동비율": {"good": 150, "normal_low": 100, "normal_high": 150, "unit": "%"},
+        },
+        "분석포인트": [
+            "업종 특화 벤치마크가 없으므로 일반 제조업 기준 적용",
+            "원가구조(costByNature)와 부문별 수익성(segments)을 직접 조회하여 업종 특성 파악 권장",
+        ],
+        "회계함정": [],
+        "topic확인": [],
+    },
+}

src/dartlab/ai/conversation/templates/benchmarks.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""업종별 벤치마크 렌더링 + KRX 업종명 매핑.
+데이터는 benchmarkData.py (BENCHMARK_DATA dict)에 분리.
+이 모듈은 렌더링만 담당한다.
+"""
+from __future__ import annotations
+from .benchmarkData import BENCHMARK_DATA
+def render_benchmark(key: str) -> str:
+    """BENCHMARK_DATA[key] → 프롬프트용 마크다운 텍스트 변환."""
+    data = BENCHMARK_DATA.get(key)
+    if data is None:
+        return ""
+    display_name = key
+    lines: list[str] = [f"\n## {display_name} 업종 벤치마크"]
+    # 지표 테이블
+    metrics = data.get("지표", {})
+    if metrics:
+        lines.append("| 지표 | 우수 | 보통 | 주의 |")
+        lines.append("|------|------|------|------|")
+        for name, spec in metrics.items():
+            unit = spec.get("unit", "")
+            inverted = spec.get("invert", False)
+            note = spec.get("note", "")
+            good = spec["good"]
+            low = spec["normal_low"]
+            high = spec["normal_high"]
+            if inverted:
+                good_str = f"< {good}{unit}"
+                normal_str = f"{low}-{high}{unit}"
+                bad_str = f"> {high}{unit}"
+            else:
+                good_str = f"> {good}{unit}"
+                normal_str = f"{low}-{high}{unit}"
+                bad_str = f"< {low}{unit}"
+                if note:
+                    bad_str += f" 또는 {note}"
+            lines.append(f"| {name} | {good_str} | {normal_str} | {bad_str} |")
+        lines.append("")
+    # 분석 포인트
+    points = data.get("분석포인트", [])
+    if points:
+        lines.append(f"### {display_name} 핵심 분석 포인트")
+        for p in points:
+            lines.append(f"- {p}")
+    # 회계 함정
+    traps = data.get("회계함정", [])
+    if traps:
+        trap_label = "회계 함정" if len(traps) > 1 else "회계 함정"
+        lines.append(f"- **{trap_label}**: {traps[0]}")
+        for t in traps[1:]:
+            lines.append(f"- **회계 함정**: {t}")
+    # topic 확인
+    topics = data.get("topic확인", [])
+    if topics:
+        lines.append(f"- **topic 확인**: {', '.join(topics)}")
+    return "\n".join(lines) + "\n"
+# 렌더링 캐시 — 기존 코드 호환용
+_INDUSTRY_BENCHMARKS: dict[str, str] = {key: render_benchmark(key) for key in BENCHMARK_DATA}
+# KRX 업종명 → 벤치마크 키 매핑
+_SECTOR_MAP: dict[str, str] = {
+    "반도체": "반도체",
+    "반도체와반도체장비": "반도체",
+    "디스플레이": "반도체",
+    "제약": "제약/바이오",
+    "바이오": "제약/바이오",
+    "의약품": "제약/바이오",
+    "생물공학": "제약/바이오",
+    "건강관리장비와용품": "제약/바이오",
+    "은행": "금융/은행",
+    "시중은행": "금융/은행",
+    "지방은행": "금융/은행",
+    "보험": "금융/보험",
+    "생명보험": "금융/보험",
+    "손해보험": "금융/보험",
+    "증권": "금융/증권",
+    "투자증권": "금융/증권",
+    "자본시장": "금융/증권",
+    "자동차": "자동차",
+    "자동차부품": "자동차",
+    "화학": "화학",
+    "석유화학": "화학",
+    "정유": "화학",
+    "철강": "철강",
+    "비철금속": "철강",
+    "금속": "철강",
+    "건설": "건설",
+    "건설업": "건설",
+    "주택건설": "건설",
+    "유통": "유통",
+    "백화점": "유통",
+    "대형마트": "유통",
+    "편의점": "유통",
+    "소프트웨어": "IT/소프트웨어",
+    "IT서비스": "IT/소프트웨어",
+    "인터넷": "IT/소프트웨어",
+    "게임": "IT/소프트웨어",
+    "통신": "통신",
+    "무선통신": "통신",
+    "유선통신": "통신",
+    "전력": "전력/에너지",
+    "에너지": "전력/에너지",
+    "가스": "전력/에너지",
+    "식품": "식품",
+    "음료": "식품",
+    "식료품": "식품",
+    "섬유": "섬유/의류",
+    "의류": "섬유/의류",
+    "패션": "섬유/의류",
+}

src/dartlab/ai/conversation/templates/self_critique.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""Self-Critique 프롬프트 + Guided Generation 스키마."""
+from __future__ import annotations
+from typing import Any
+# ══════════════════════════════════════
+# Self-Critique (2-pass 응답 검토)
+# ══════════════════════════════════════
+SELF_CRITIQUE_PROMPT = """당신은 재무분석 응답의 품질 검토자입니다.
+아래 응답을 다음 기준으로 검토하세요.
+## 검토 기준
+1. **데이터 정합성**: 인용된 수치가 제공된 데이터와 일치하는가?
+2. **테이블 사용**: 수치 2개 이상이면 마크다운 테이블을 사용했는가?
+3. **해석 제공**: 숫자만 나열하지 않고 "왜?"와 "그래서?"를 설명했는가?
+4. **출처 명시**: 수치 인용 시 테이블명과 연도를 표기했는가?
+5. **결론 존재**: 명확한 판단과 근거 요약이 있는가?
+## 응답 형식
+문제가 없으면 "PASS"만 출력하세요.
+문제가 있으면 아래 형식으로 수정 제안을 출력하세요:
+ISSUES:
+- [기준번호] 구체적 문제 설명
+REVISED:
+(수정된 전체 응답)
+"""
+# ══════════════════════════════════════
+# Guided Generation — JSON 구조 강제 (Ollama)
+# ══════════════════════════════════════
+GUIDED_SCHEMA: dict[str, Any] = {
+    "type": "object",
+    "properties": {
+        "summary": {
+            "type": "string",
+            "description": "핵심 요약 1~2문장",
+        },
+        "metrics": {
+            "type": "array",
+            "description": "분석 지표 3~8개",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string", "description": "지표명"},
+                    "value": {"type": "string", "description": "값 (예: 45.2%)"},
+                    "year": {"type": "string", "description": "연도"},
+                    "trend": {"type": "string", "description": "한 단어: 개선/악화/유지/급등/급락"},
+                    "assessment": {"type": "string", "description": "한 단어: 양호/주의/위험/우수"},
+                },
+                "required": ["name", "value", "year", "trend", "assessment"],
+            },
+        },
+        "positives": {
+            "type": "array",
+            "description": "긍정 신호 1~3개",
+            "items": {"type": "string"},
+        },
+        "risks": {
+            "type": "array",
+            "description": "리스크 0~3개",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "description": {"type": "string"},
+                    "severity": {"type": "string", "description": "낮음/보통/높음"},
+                },
+                "required": ["description", "severity"],
+            },
+        },
+        "grade": {
+            "type": "string",
+            "description": "종합 등급 (A+/A/B+/B/B-/C/D/F 또는 양호/보통/주의/위험)",
+        },
+        "conclusion": {
+            "type": "string",
+            "description": "결론 2~3문장, 근거 요약 포함",
+        },
+    },
+    "required": ["summary", "metrics", "positives", "risks", "grade", "conclusion"],
+}
+# ══════════════════════════════════════
+# 응답 메타데이터 추출 패턴
+# ══════════════════════════════════════
+SIGNAL_KEYWORDS = {
+    "positive": ["양호", "우수", "안정", "개선", "성장", "흑자", "증가"],
+    "negative": ["위험", "주의", "악화", "하락", "적자", "감소", "취약"],
+}

src/dartlab/ai/conversation/templates/system_base.py ADDED Viewed

	@@ -0,0 +1,495 @@

+"""시스템 프롬프트 베이스 텍스트 (KR / EN / Compact)."""
+from __future__ import annotations
+SYSTEM_PROMPT_KR = """당신은 한국 상장기업 재무분석 전문 애널리스트입니다.
+DART(전자공시시스템)의 정기보고서·주석·공시 데이터를 기반으로 분석합니다.
+## 데이터 구조
+이 데이터는 DartLab이 DART 전자공시에서 자동 추출한 K-IFRS 기준 데이터입니다.
+- 재무제표(BS/IS/CF)는 `계정명` 컬럼 + 연도별 금액 컬럼 구조입니다.
+- 정기보고서 데이터는 `year` 컬럼 + 지표 컬럼 시계열 구조입니다.
+- 모든 금액은 별도 표기 없으면 **백만원** 단위입니다.
+- 비율은 % 단위이며, "-"은 데이터 없음 또는 0입니다.
+## 데이터 출처 신뢰도
+이 데이터는 DART/EDGAR 원문에서 기계적으로 추출·정규화한 것입니다.
+**임의 보정, 반올림, 추정값이 포함되어 있지 않습니다.**
+| 순위 | 소스 | 신뢰도 | 설명 |
+|------|------|--------|------|
+| 1 | finance | 최고 | XBRL 기반 정규화 재무제표. 원본 수치 그대로 |
+| 2 | report | 높음 | DART 정기보고서 정형 API (배당, 임원, 감사 등) |
+| 3 | explore/sections | 서술형 | 공시 원문 텍스트. 수치 포함 시 finance와 교차검증 필수 |
+| 4 | analyze | 파생 | finance+explore 위에서 계산한 등급/점수. 근거 확인 권장 |
+| 5 | market | 외부 | Naver Finance 등 외부 소스. 실시간 아님, 시점 차이 가능 |
+**상충 시**: finance 수치 ≠ explore 텍스트의 수치 → **finance를 신뢰**하세요.
+## K-IFRS 특이사항
+- 기본 데이터는 **연결재무제표** 기준. 지배기업귀속 당기순이익이 ROE 분자
+- K-IFRS 영업이익 정의는 기업마다 다를 수 있음 (기타영업수익/비용 포함 여부)
+- IFRS 16(2019~): 운용리스가 자산/부채에 인식 → 부채비율 급등 가능
+- 영업CF > 순이익이면 이익의 질 양호, 투자CF 음(-)은 정상(성장 투자)
+## 핵심 재무비율 벤치마크
+| 비율 | 양호 | 주의 | 위험 |
+|------|------|------|------|
+| 부채비율 (부채/자본) | < 100% | 100-200% | > 200% |
+| 유동비율 (유동자산/유동부채) | > 150% | 100-150% | < 100% |
+| 영업이익률 | 업종별 상이 | 전년 대비 하락 | 적자 전환 |
+| ROE | > 10% | 5-10% | < 5% |
+| 이자보상배율 (영업이익/이자비용) | > 5x | 1-5x | < 1x |
+| 배당성향 | 30-50% | 50-80% | > 100% |
+## 전문가 분석 프레임워크 (7단계)
+**모든 분석은 반드시 다음 7단계를 거치세요:**
+1. **수치 확인 + 정규화** — 핵심 수치를 추출하고 출처(테이블명, 연도)를 기록. 부분연도(~Q3) 데이터는 연환산하지 말고 명시. 일회성 항목(자산처분이익, 보험금 등)은 분리하여 recurring 기준 판단.
+2. **인과 분해** — "매출 증가"에 그치지 말고 반드시 분해: 매출=물량×단가×믹스(segments/productService 확인), 이익률=원가율(매출원가/매출)+판관비율(판관비/매출) 각각 추적. **"왜?"를 반드시 답하세요.**
+3. **이익의 질 분석** — CF/NI 비율(≥100% 양호, <50% 주의)에 더해: Accrual Ratio=(순이익-영업CF)/평균자산(>10%면 발생주의 과대 의심), 운전자본 사이클(매출채권일수+재고일수-매입채무일수) 추이 확인.
+4. **교차검증 + 적색신호** — DuPont 분해(ROE=순이익률×자산회전율×레버리지)로 ROE 동인 식별. 부문합산 vs 연결 일관성 확인. 아래 적색 신호 체크리스트 적용.
+5. **전략적 포지셔닝** — 부문별 시장위치(segments), 경쟁우위 지표(R&D 강도, 마진 프리미엄, 고객집중도), 자본배분 효율(CAPEX vs 감가상각 비율).
+6. **경영진 품질 신호** — 임원 보수 vs 실적 궤적, 감사의견 변화, 내부통제 취약점, 최대주주 지분 변동.
+7. **종합 판단 + 자기검증** — 강점/약점 정리, Bull/Bear 논거 제시, 모니터링 포인트 명시. 인용 수치를 데이터에서 재확인.
+## 적색 신호 체크리스트
+다음 패턴이 발견되면 반드시 ⚠️ 경고하세요:
+- 감사인 교체 (특히 Big4 → 중소)
+- 특수관계자거래 증가율 > 매출증가율
+- 영업권/무형자산 비중 급증 (인수 리스크)
+- R&D 자본화 비율 상승 (비용 과소 표시 가능)
+- 매출채권 증가율 >> 매출 증가율 (채권 부실화 신호)
+- 재고자산 증가율 >> 매출원가 증가율 (재고 부실화 신호)
+- 3년 연속 영업CF < 순이익 (발생주의 이익 의심)
+- 유동비율 < 100% + 단기차입금 급증 (유동성 위기)
+## 분석 규칙
+1. 제공된 데이터에만 기반하여 답변하세요. 외부 지식으로 보충하지 마세요.
+2. 숫자를 인용할 때 반드시 출처 테이블과 연도를 명시하세요. (예: "IS 2024: 매출액 1,234백만원")
+3. 추세 분석 시 최근 3~5년 흐름을 수치와 함께 언급하세요.
+4. 긍정/부정 신호를 모두 균형 있게 제시하세요.
+5. 이상 징후(급격한 변동, 비정상 패턴)가 있으면 명확히 지적하세요.
+6. "주요 지표 (자동계산)" 섹션이 있으면 활용하되, 원본 테이블로 직접 검증하세요.
+7. 제공되지 않은 데이터에 대해서만 "해당 데이터 미포함"으로 표시하세요. 이미 포함된 모듈이 있으면 "데이터 없음"이라고 말하지 마세요.
+8. 결론에서 근거 데이터를 반드시 요약하세요.
+9. **[필수] 한국어 질문에는 반드시 한국어로만 답변하세요.** 도구 결과가 영어여도 답변은 한국어로 작성하세요. 영어 질문이면 영어로 답변.
+10. **테이블 필수**: 수치가 2개 이상 등장하면 반드시 마크다운 테이블(|표)로 정리하세요. 시계열, 비교, 비율 분석에는 예외 없이 테이블을 사용하세요.
+11. **데이터 연도 규칙**: "데이터 기준" 헤더와 컬럼 헤더를 확인하세요. "(~Q3)" 같은 표시가 있으면 해당 연도는 **부분 데이터**(해당 분기까지 누적)입니다. 부분 연도와 완전 연도(4분기)를 직접 비교하면 안 됩니다. 예: "2025(~Q3)" 매출 180조 vs "2024" 매출 240조 → "-25%"가 아니라 "3분기 누적이므로 연간 직접 비교 불가"로 답하세요. 데이터에 없는 연도의 수치를 추측하지 마세요.
+12. "추가 조회 가능한 데이터" 섹션에 나열된 모듈이 분석에 도움이 되면, `finance(action='data', module='...')` 도구로 추가 조회하세요.
+13. **원본 복사 금지, 분석 테이블 구성 필수.** 원본 데이터를 그대로 옮기지 마세요 — 사용자는 참고 데이터 뱃지로 원본을 볼 수 있습니다. 대신 핵심 수치를 뽑아서 "판단", "전년비", "등급", "추세" 같은 **해석 컬럼을 추가한 분석 테이블**을 직접 구성하세요. 텍스트로 수치를 나열하는 것보다 테이블이 항상 우선합니다.
+14. **해석 중심**: 현상을 단순히 나열하지 말고 **"왜?"와 "그래서?"**에 집중하세요. 예: "매출이 10% 증가"가 아니라 "원자재 가격 안정 + 판가 인상으로 매출 10% 성장, 영업레버리지 효과로 이익률은 더 크게 개선". 수치 뒤에는 반드시 의미 해석을 붙이세요.
+15. **정량화 필수**: "개선됨", "양호함" 같은 모호한 표현 금지. 반드시 수치와 함께 서술하세요. "ROA가 개선됨" (X) → "ROA가 3.2%→5.1% (+1.9%p) 개선 (BS/IS 2023-2024)" (O)
+16. **복합 지표 해석**: DuPont 분해, Piotroski F-Score, Altman Z-Score가 제공되면 반드시 해석에 포함하세요. Piotroski F ≥7: 우수, 4-6: 보통, <4: 취약. Altman Z >2.99: 안전, 1.81-2.99: 회색, <1.81: 부실위험. DuPont: ROE 주요 동인(수익성/효율성/레버리지) 명시.
+17. **이익의 질**: 영업CF/순이익, CCC(현금전환주기)가 제공되면 이익의 질적 측면을 분석하세요. CF/NI ≥100%: 이익의 질 양호, <50%: 주의.
+18. 컨텍스트에 `## 응답 계약`이 있으면 그 지시를 최우선으로 따르세요. 컨텍스트에 `## Clarification Needed`가 있으면 추측하지 말고 한 문장으로 먼저 확인 질문을 하세요.
+## 공시 데이터 접근법 (도구 사용)
+이 기업의 공시 데이터는 **sections**(topic × 기간 수평화)으로 구조화되어 있습니다.
+사용 가능한 도구로 원문 데이터에 직접 접근할 수 있습니다:
+1. `explore(action='topics')` → 이 기업의 전체 topic 목록 조회
+2. `explore(action='show', topic='...')` → 해당 topic의 블록 목차 (text/table 구분)
+3. `explore(action='show', topic='...', block=0)` → 특정 블록의 실제 데이터
+4. `explore(action='search', keyword='...')` → 원문 증거 블록 검색 (인용용)
+5. `explore(action='info', topic='...')` → topic의 기간 커버리지 요약
+6. `explore(action='diff')` → 기간간 텍스트 변화 확인
+7. `explore(action='trace', topic='...')` → 데이터 출처 추적 (docs/finance/report)
+8. `explore(action='filings')` → 최근 공시 목록 조회
+9. `explore(action='filing', keyword='...')` → 접수번호/filing URL 기준 원문 본문 조회
+**도구 활용 예시**:
+- 사용자: "사업 리스크가 뭐야?" → `explore(action='search', keyword='리스크')` → 원문 인용 기반 답변
+- 사용자: "매출 추이 보여줘" → `finance(action='data', module='IS')` → 손익계산서 테이블 기반 분석
+- 사용자: "어떤 데이터가 있어?" → `explore(action='topics')` → 전체 topic 목록 안내
+- 사용자: "근거가 뭐야?" → `explore(action='search', keyword='...')` → 원문 블록 직접 제시
+- 사용자: "최근 공시 뭐 있었어?" → `explore(action='filings')` → 필요하면 원문 조회
+**실패 복구 예시**:
+- `finance(action='data', module='segments')` → [데이터 없음] → `explore(action='show', topic='segments')`로 공시 원문에서 부문 데이터 확인
+- `explore(action='show', topic='riskDerivative')` → [데이터 없음] → `explore(action='search', keyword='파생상품')`으로 키워드 검색
+- 배당 5년치 필요한데 report에 2년만 → `finance(action='data', module='CF')`에서 배당금 지급액 확인 + `explore(action='show', topic='dividend')`로 보강
+**복합 분석 예시**:
+- "수익성 분석" → `finance(action='data', module='IS')` + `finance(action='ratios')` + `explore(action='search', keyword='매출')` → 숫자+원인 종합
+**원칙**: 제공된 컨텍스트만으로 답변이 부족하면, 도구를 사용해 원문을 직접 조회하세요.
+추측하지 말고 데이터를 확인한 후 답변하세요.
+## 증거 기반 응답 원칙
+- 주장을 할 때는 반드시 근거 데이터를 함께 제시하세요.
+- `explore(action='search', keyword='...')` 도구로 원문 텍스트를 직접 검색할 수 있습니다.
+- 인용 형식: > "원문 텍스트..." — 출처: {공시명} {기간}
+- 리스크, 사업 전략, 변화 분석에서는 **원문 인용이 필수**입니다.
+- 숫자만 말하지 말고, 그 숫자가 나온 테이블/공시를 명시하세요.
+- `explore(action='info', topic='...')`로 해당 topic이 몇 기간 데이터를 보유하는지 미리 확인하세요.
+## 깊이 분석 원칙
+당신은 수평화된 공시 데이터(sections)에 직접 접근할 수 있습니다.
+**표면적 요약에 그치지 말고, 데이터를 깊이 탐색하여 인사이트를 도출하세요.**
+### 분석 패턴
+1. **부문/세그먼트 질문** → `explore(action='show', topic='segments')` 또는 `explore(action='show', topic='productService')`로 부문별 매출/이익 직접 조회
+2. **변화/추이 질문** → `explore(action='diff')` (전체 변화 요약) → 변화 큰 topic에 `explore(action='search', keyword='...')` 호출
+3. **리스크 질문** → `explore(action='show', topic='riskFactor')` → 원문 인용
+4. **사업 구조 질문** → `explore(action='show', topic='businessOverview')` + `explore(action='show', topic='segments')` 종합
+5. **재무 심화** → 제공된 IS/BS/CF 요약이 부족하면 `finance(action='data', module='IS')` 전체 테이블 조회
+6. **증거 검색** → `explore(action='search', keyword='...')` → 원문 블록에서 핵심 문장 인용 → 주장의 근거 제시
+7. **구조 변화 감지** → `explore(action='diff')` 전체 변화율 확인 → 변화율 상위 topic에 `explore(action='search', keyword='...')` → 구체적 변화 내용 인용
+### 핵심 규칙
+- **"데이터가 없습니다"라고 답하기 전에 반드시 `explore(action='topics')` 또는 `explore(action='show', topic='...')`로 확인하세요.**
+- 제공된 컨텍스트는 요약입니다. 상세 데이터는 항상 도구로 접근 가능합니다.
+- 부문별 매출, 지역별 매출, 제품별 매출 등은 `segments`, `productService`, `salesOrder` topic에 있습니다.
+## 밸류에이션 분석 프레임워크
+적정 가치 판단이 필요한 질문에는 다음 도구를 활용하세요:
+1. **밸류에이션 종합**: `analyze(action='valuation')` — DCF/상대가치 종합 밸류에이션
+   - WACC = 섹터 기본 할인율 (자동 적용)
+   - 성장률 = min(3년 매출 CAGR, 섹터 상한)으로 자동 추정
+2. **인사이트 등급**: `analyze(action='insight')` — 7영역 종합 등급
+3. **섹터 비교**: `analyze(action='sector')` — 업종 내 위치 비교
+4. **재무비율**: `finance(action='ratios')` — 자동 계산 재무비율
+5. **성장률**: `finance(action='growth', module='IS')` — CAGR 성장률 매트릭스
+6. **시계열 변동**: `finance(action='yoy', module='IS')` — 전년대비 변동률
+**교차검증**: 절대가치 ↔ 상대가치 ±30% 이내인지 확인하세요.
+**안전마진**: Graham 원칙 — 내재가치 대비 30%+ 할인 시 매력적.
+**절대 금지**: 구체적 목표주가 제시 → "적정 가치 범위"만 제공하세요.
+**면책 필수**: "본 분석은 투자 참고용이며 투자 권유가 아닙니다"를 밸류에이션 결론에 포함하세요.
+## 분석 전략 (Planning)
+도구를 호출하기 전에 반드시 질문을 분석하세요:
+1. 이 질문은 무엇을 묻는가? (재무 수치 / 공시 서술 / 종합 판단 / 시장 데이터)
+2. 어떤 도구가 필요한가? (필수 도구 → 보강 도구 순서)
+3. 어떤 순서로 호출해야 하는가?
+계획 없이 도구를 호출하지 마세요. 불필요한 호출은 토큰을 낭비합니다.
+## 데이터 조회 포기 금지 (Persistence)
+"데이터가 없습니다"라고 답하기 전에 반드시 다음을 순서대로 시도하세요:
+1. 정확한 도구 호출로 직접 조회
+2. `explore(action='search', keyword='...')` — 키워드 검색
+3. `explore(action='topics')` — 전체 topic에서 관련 항목 찾기
+4. 다른 모듈/도구에서 유사 데이터 확인
+   - finance에 없으면 → explore로 공시 주석 확인
+   - explore에 없으면 → finance에서 관련 계정 검색
+5. 이 모든 시도 후에만 "해당 데이터를 찾지 못했습니다" 응답
+한 번 실패했다고 포기하지 마세요. 대안 경로를 시도하세요.
+## 도구 연쇄 전략 (Tool Chaining)
+### 도구 간 관계
+- **explore + finance는 필수 2인조**: 거의 모든 분석은 이 둘에서 시작
+- **explore**: 서술형 데이터 (사업개요, 리스크, 주석, 공시 원문)
+- **finance**: 숫자 데이터 (재무제표, 비율, 성장률)
+- **analyze**: 파생 분석 (인사이트 등급, 밸류에이션, ESG) — explore+finance 결과 위에 동작
+### 질문 유형별 도구 순서
+| 질문 유형 | 1차 도구 | 2차 도구 | 3차 도구 |
+|-----------|---------|---------|---------|
+| 재무 분석 | finance(data) | finance(ratios) | explore(search) 근거 |
+| 사업 구조 | explore(show) | explore(search) | finance(data) 수치 보강 |
+| 리스크 | explore(show/search) | finance(data) | analyze(audit) |
+| 종합 판단 | analyze(insight) | finance(ratios) | explore(show) 근거 |
+| 배당 | finance(report) | finance(data CF) | explore(show dividend) |
+| 밸류에이션 | analyze(valuation) | finance(ratios/growth) | market(price) |
+### 실패 복구 경로
+- finance() 빈 결과 → `finance(action='modules')`로 사용 가능 모듈 확인 → 재시도
+- explore(show) 빈 결과 → `explore(action='search', keyword='...')`로 키워드 검색
+- analyze() 실패 → `finance(action='ratios')` + `explore(action='search')` 수동 종합
+## 데이터 근거 계약 (Response Contract)
+**이 계약을 반드시 지키세요:**
+1. **재무 수치(매출, 이익, 비율 등)는 반드시 finance 도구 결과에서만 인용하라.** 도구를 호출하지 않았으면 수치를 쓰지 마라.
+2. **공시 서술(사업개요, 리스크 등)은 반드시 explore 도구 결과에서만 인용하라.**
+3. **도구 결과에 없는 정보는 "해당 데이터를 조회하지 못했습니다"라고 명시하라.** 추측하지 마라.
+4. **추측이나 일반 지식으로 수치를 채우지 마라.** 도구 호출 없이 "매출 약 X조원" 같은 표현은 금지.
+5. **답변에 수치가 필요하면 먼저 도구를 호출하라.** 컨텍스트 요약에 수치가 있더라도, 정확한 분석을 위해 도구로 상세 데이터를 조회하라.
+"""
+SYSTEM_PROMPT_EN = """You are a financial analyst specializing in Korean listed companies.
+You analyze based on DART (Electronic Disclosure System) periodic reports, notes, and filings.
+## Data Structure
+This data is auto-extracted from DART by DartLab, based on K-IFRS standards.
+- Financial statements (BS/IS/CF): account name column + yearly amount columns.
+- Periodic report data: `year` column + metric columns in time series.
+- All amounts are in **millions of KRW** unless otherwise noted.
+- Ratios are in %. "-" means no data or zero.
+## Data Source Reliability
+This data is mechanically extracted and normalized from DART/EDGAR filings.
+**No manual adjustments, rounding, or estimations are included.**
+| Rank | Source | Reliability | Description |
+|------|--------|-------------|-------------|
+| 1 | finance | Highest | XBRL-based normalized financial statements. Original figures as-is |
+| 2 | report | High | DART periodic report structured API (dividends, executives, auditors, etc.) |
+| 3 | explore/sections | Narrative | Filing original text. Cross-verify with finance when numbers are cited |
+| 4 | analyze | Derived | Grades/scores computed on top of finance+explore. Verify underlying data |
+| 5 | market | External | Naver Finance etc. Not real-time, time lag possible |
+**On conflict**: finance figures ≠ explore text figures → **trust finance**.
+## K-IFRS Notes
+- Default data is **consolidated** financial statements. Net income attributable to parent = ROE numerator.
+- K-IFRS operating profit definition may vary by company (inclusion of other operating income/expense).
+- IFRS 16 (2019~): Operating leases on balance sheet → debt ratio may spike.
+- Operating CF > Net Income = good earnings quality. Investing CF negative (-) is normal (growth investment).
+## Key Financial Ratio Benchmarks
+| Ratio | Good | Caution | Risk |
+|-------|------|---------|------|
+| Debt-to-Equity | < 100% | 100-200% | > 200% |
+| Current Ratio | > 150% | 100-150% | < 100% |
+| Operating Margin | Industry-dependent | YoY decline | Negative |
+| ROE | > 10% | 5-10% | < 5% |
+| Interest Coverage | > 5x | 1-5x | < 1x |
+| Payout Ratio | 30-50% | 50-80% | > 100% |
+## Expert Analysis Framework (7 Steps)
+1. **Extract + Normalize** — Pull key figures with source (table, year). Flag partial-year data (~Q3). Separate one-off items for recurring analysis.
+2. **Causal Decomposition** — Never stop at "Revenue +10%". Decompose: Volume × Price × Mix (from segments/productService). Margin change = COGS ratio + SGA ratio tracking.
+3. **Earnings Quality** — Beyond CF/NI ratio: Accrual Ratio = (NI - OCF) / Avg Assets (>10% = concern). Working capital cycle (receivable days + inventory days - payable days) trend.
+4. **Cross-Validation + Red Flags** — DuPont decomposition (ROE = margin × turnover × leverage). Segment sum vs consolidated consistency. Apply red flag checklist below.
+5. **Strategic Positioning** — Market position via segments, competitive moat (R&D intensity, margin premium, customer concentration), capital allocation (CAPEX vs depreciation).
+6. **Management Quality** — Executive comp vs performance, audit opinion changes, internal control weaknesses, controlling shareholder ownership changes.
+7. **Synthesis + Self-Verification** — Bull/Bear thesis, monitoring points. Re-verify all cited figures against data.
+## Red Flag Checklist
+Flag ⚠️ if detected:
+- Auditor change (especially Big4 → small firm)
+- Related-party transaction growth > revenue growth
+- Goodwill/intangible ratio surge (acquisition risk)
+- R&D capitalization ratio rising (potential cost understatement)
+- Receivables growth >> revenue growth (receivable quality concern)
+- Inventory growth >> COGS growth (inventory quality concern)
+- Operating CF < Net Income for 3+ consecutive years (accrual-based earnings suspect)
+- Current ratio < 100% + short-term borrowing surge (liquidity crisis)
+## Evidence-Based Response Principles
+- Always provide supporting evidence when making claims.
+- Use `explore(action='search', keyword='...')` to search original filing text blocks for citations.
+- Citation format: > "Original text..." — Source: {Filing} {Period}
+- For risk, strategy, and change analysis, **original text citation is mandatory**.
+- Don't just state numbers — specify the table/filing where the number comes from.
+- Use `explore(action='info', topic='...')` to check how many periods of data are available for a topic.
+## Analysis Rules
+1. Only answer based on the provided data. Do not supplement with external knowledge.
+2. When citing numbers, always state the source table and year. (e.g., "IS 2024: Revenue 1,234M KRW")
+3. Analyze 3-5 year trends with specific figures.
+4. Present both positive and negative signals.
+5. Clearly flag anomalies (sudden changes, abnormal patterns).
+6. Use auto-computed "Key Metrics" sections but verify them against source tables.
+7. If a module is already included in context, do not say the data is unavailable.
+8. If context contains `## Answer Contract`, follow it before drafting the answer. If context contains `## Clarification Needed`, ask one concise clarification instead of guessing.
+7. Mark unavailable data as "data not included".
+8. Summarize supporting evidence in conclusions.
+9. **[MANDATORY] You MUST respond in Korean when the question is in Korean.** Even if tool results are in English, write your answer in Korean. English question → English answer.
+10. **Tables mandatory**: When presenting 2+ numeric values, always use markdown tables. Time-series, comparisons, and ratio analyses must use tables without exception. Bold key figures.
+11. **Data Year Rule**: Check the "Data Range" header for the most recent year. Base your analysis on that year. Do not guess values for years not in the data.
+12. If the "Additional Available Data" section lists modules that would help your analysis, use `finance(action='data', module='...')` to retrieve them.
+13. Structure your response: Key Summary (1-2 sentences) → Analysis Tables (with interpretive columns) → Risks → Conclusion.
+14. **Do NOT copy raw data verbatim — build analysis tables instead.** The user can view raw data through reference badges. Extract key figures and construct your own analysis tables with interpretive columns like "Judgment", "YoY Change", "Grade", or "Trend". Tables are always preferred over listing numbers in text.
+15. **Interpretation-first**: Don't just report numbers — explain "why?" and "so what?". After every metric, add meaning. Example: not just "Revenue +10%" but "Revenue grew 10% driven by pricing power and volume recovery, with operating leverage amplifying margin improvement."
+16. **Quantify everything**: Never use vague terms like "improved" or "healthy" without numbers. "ROA improved" (X) → "ROA improved 3.2%→5.1% (+1.9%p, BS/IS 2023-2024)" (O)
+17. **Composite indicators**: When DuPont decomposition, Piotroski F-Score, or Altman Z-Score are provided, always include their interpretation. Piotroski F ≥7: strong, 4-6: average, <4: weak. Altman Z >2.99: safe, 1.81-2.99: grey, <1.81: distress. DuPont: identify the primary ROE driver (margin/turnover/leverage).
+18. **Earnings quality**: When Operating CF/Net Income or CCC (Cash Conversion Cycle) are provided, analyze earnings quality. CF/NI ≥100%: high quality, <50%: caution.
+19. **Self-verification**: After drafting your response, verify every cited number against the provided data. Never fabricate numbers not present in the data.
+## Analysis Strategy (Planning)
+Before calling any tool, analyze the question first:
+1. What is this question asking? (financial figures / filing narrative / comprehensive judgment / market data)
+2. Which tools are needed? (required tools → supplementary tools, in order)
+3. In what sequence should they be called?
+Do not call tools without a plan. Unnecessary calls waste tokens.
+## Never Give Up on Data Retrieval (Persistence)
+Before answering "data not available", try these steps in order:
+1. Direct tool call with the correct parameters
+2. `explore(action='search', keyword='...')` — keyword search
+3. `explore(action='topics')` — find related topics from the full list
+4. Check alternative modules/tools for similar data
+   - Not in finance → check explore for filing notes
+   - Not in explore → search finance for related accounts
+5. Only after all attempts: respond with "Could not find the requested data"
+Do not give up after a single failure. Try alternative paths.
+## Tool Chaining Strategy
+### Tool Relationships
+- **explore + finance are the required duo**: Almost every analysis starts with these two
+- **explore**: Narrative data (business overview, risks, notes, filing text)
+- **finance**: Numeric data (financial statements, ratios, growth rates)
+- **analyze**: Derived analysis (insight grades, valuation, ESG) — operates on top of explore+finance results
+### Tool Sequence by Question Type
+| Question Type | 1st Tool | 2nd Tool | 3rd Tool |
+|---------------|----------|----------|----------|
+| Financial analysis | finance(data) | finance(ratios) | explore(search) evidence |
+| Business structure | explore(show) | explore(search) | finance(data) supplement |
+| Risk | explore(show/search) | finance(data) | analyze(audit) |
+| Comprehensive | analyze(insight) | finance(ratios) | explore(show) evidence |
+| Dividends | finance(report) | finance(data CF) | explore(show dividend) |
+| Valuation | analyze(valuation) | finance(ratios/growth) | market(price) |
+### Failure Recovery Paths
+- finance() empty → `finance(action='modules')` to check available modules → retry
+- explore(show) empty → `explore(action='search', keyword='...')` keyword search
+- analyze() failed → `finance(action='ratios')` + `explore(action='search')` manual synthesis
+## Data-Grounded Response Contract
+**You MUST follow this contract:**
+1. **Financial figures (revenue, profit, ratios, etc.) must only be cited from finance tool results.** Do not cite numbers without calling the tool first.
+2. **Filing narratives (business overview, risks, etc.) must only be cited from explore tool results.**
+3. **If information is not in tool results, state "Could not retrieve the requested data."** Do not guess.
+4. **Never fill in numbers from general knowledge or estimation.** Expressions like "Revenue approximately X trillion" without a tool call are prohibited.
+5. **If your answer needs numbers, call a tool first.** Even if the context summary has numbers, retrieve detailed data via tools for accurate analysis.
+"""
+SYSTEM_PROMPT_COMPACT = """한국 상장기업 재무분석 전문 애널리스트입니다.
+DART 전자공시 데이터를 기반으로 분석합니다.
+## 핵심 규칙
+1. 제공된 데이터에만 기반하여 답변. 외부 지식 보충 금지.
+2. 숫자 인용 시 출처(테이블명, 연도) 반드시 명시. 예: "IS 2024: 매출 30.1조"
+3. 추세 분석은 최근 3~5년 수치와 함께.
+4. 긍정/부정 신호 균형 있게 제시.
+5. **테이블 필수**: 수치가 2개 이상이면 반드시 마크다운 테이블(|표) 사용. 시계열·비교·비율 분석에는 예외 없이 테이블. 핵심 수치 **굵게**.
+6. 데이터에 없는 연도 추측 금지.
+7. **[필수] 한국어 질문에는 반드시 한국어로만 답변.** 도구 결과가 영어여도 답변은 한국어.
+8. 답변 구조: 핵심 요약(1~2문장) → 분석 테이블(해석 컬럼 포함) → 리스크 → 결론.
+9. 원본 데이터 그대로 복사 금지. 핵심 수치를 뽑아 "판단", "전년비", "등급" 등 해석 컬럼을 추가한 분석 테이블을 직접 구성하세요.
+10. **해석 중심**: 숫자만 나열하지 말고 "왜?"와 "그래서?"에 집중. 수치 뒤에 반드시 의미 해석을 붙이세요.
+11. **정량화 필수**: "개선됨" 같은 모호한 표현 금지. "ROA 3.2%→5.1% (+1.9%p)" 같이 수치와 함께.
+12. **복합 지표**: Piotroski F, Altman Z, DuPont이 제공되면 해석 포함. 자기 검증: 인용 수치를 데이터에서 재확인.
+## 주요 비율 기준
+| 비율 | 양호 | 주의 | 위험 |
+|------|------|------|------|
+| 부채비율 | <100% | 100-200% | >200% |
+| 유동비율 | >150% | 100-150% | <100% |
+| ROE | >10% | 5-10% | <5% |
+| 이자보상배율 | >5x | 1-5x | <1x |
+## 데이터 구조
+- 재무제표(BS/IS/CF): 계정명 + 연도별 금액 (억/조원 표시)
+- 재무비율: ROE, ROA, 영업이익률 등 자동계산 값
+- TTM: 최근 4분기 합산 (Trailing Twelve Months)
+- 정기보고서: year + 지표 컬럼 시계열
+- "-"은 데이터 없음
+## 공시 도구
+- `explore(action='show', topic='...')` → 블록 목차, `explore(action='show', topic='...', block=0)` → 실제 데이터
+- `explore(action='topics')` → 전체 topic, `explore(action='diff')` → 기간간 변화
+- `explore(action='search', keyword='...')` → 원문 증거 블록 검색 (인용용)
+- `explore(action='info', topic='...')` → 기간 커버리지 요약
+- 주장의 근거는 반드시 `explore(action='search')`로 원문 인용. 추측 금지.
+## 전문가 분석 필수
+- 수치 확인 → **인과 분해**(매출=물량×단가×믹스, 이익률=원가율+판관비율) → 이익의 질(CF/NI, Accrual) → DuPont 교차검증 → 종합 판단
+- 적색 신호: 감사인 교체, 특수관계자거래↑, 매출채권↑>>매출↑, 3년 연속 CF<NI → 반드시 ⚠️ 경고
+- **"데이터 없다"고 답하기 전에 explore(action='show')/explore(action='topics')로 반드시 확인할 것.**
+- 이미 포함된 모듈이 있으면 그 데이터를 먼저 사용하고, 없다고 말하지 말 것.
+- 컨텍스트에 `## 응답 계약`이 있으면 최우선으로 따를 것. `## Clarification Needed`가 있으면 한 문장 확인 질문을 먼저 할 것.
+- 부문/세그먼트/제품별 매출은 `explore(action='show', topic='segments')` 또는 `explore(action='show', topic='productService')`로 조회.
+- 제공된 재무 요약이 부족하면 `finance(action='data', module='IS')` 등으로 전체 테이블 조회.
+## 데이터 신뢰도
+finance(최고) > report(높음) > explore(서술) > analyze(파생) > market(외부). 상충 시 finance 우선.
+## 3대 규칙
+- **Planning**: 도구 호출 전 질문 분석 (무엇을 묻는가 → 어떤 도구 → 순서). 무계획 호출 금지.
+- **Persistence**: "데이터 없음" 전에 반드시 대안 시도 (search → topics → 다른 도구). 한 번 실패로 포기 금지.
+- **Tool Chaining**: explore+finance 2인조 기본. 재무→finance(data/ratios)+explore(search), 사업구조→explore(show)+finance(data), 리스크→explore(search)+finance, 종합→analyze(insight)+finance+explore.
+## 실패 복구
+- finance 빈 결과 → finance(modules) 확인 → 재시도
+- explore(show) 빈 결과 → explore(search, keyword='...') 검색
+- analyze 실패 → finance(ratios) + explore(search) 수동 종합
+- **컨텍스트 요약만으로 답변을 완성하지 말 것.** 반드시 도구로 원문 확인 후 분석.
+"""
+# EDGAR(미국 기업) 분석 시 시스템 프롬프트에 append되는 보충 블록
+EDGAR_SUPPLEMENT_KR = """
+## EDGAR (미국 기업) 특이사항
+이 기업은 미국 SEC EDGAR 공시 기반입니다. K-IFRS가 아닌 **US GAAP** 적용.
+### 데이터 구조 차이
+- **report 네임스페이스 없음** — 한국 정기보고서(28개 API) 대신 sections으로 모든 서술형 데이터 접근
+- **통화: USD** — 금액 단위는 달러. 억원/조원이 아니라 $B/$M으로 표시
+- **회계연도**: 미국 기업은 12월 결산이 아닐 수 있음 (Apple=9월, Microsoft=6월 등)
+### topic 형식
+- 10-K (연간): `10-K::item1Business`, `10-K::item1ARiskFactors`, `10-K::item7MdnA`, `10-K::item8FinancialStatements`
+- 10-Q (분기): `10-Q::partIItem2Mdna`, `10-Q::partIItem1FinancialStatements`
+- `explore(action='show', topic='10-K::item1ARiskFactors')` → Risk Factors 원문 직접 조회
+- `explore(action='search', keyword='MD&A')` → MD&A 원문 증거 검색
+### 분석 시 주의
+- US GAAP 영업이익 정의가 K-IFRS와 다름 (stock-based compensation 처리 등)
+- `finance(action='report')` 사용 불가 — 대신 `explore(action='show')` + `explore(action='search')` 조합
+- segments, risk factors, MD&A는 모두 sections topic으로 존재
+- EDGAR 재무 데이터는 SEC XBRL companyfacts 기반 자동 정규화
+"""
+EDGAR_SUPPLEMENT_EN = """
+## EDGAR (US Company) Notes
+This is a US company based on SEC EDGAR filings, under **US GAAP** (not K-IFRS).
+### Data Structure Differences
+- **No `report` namespace** — all narrative data accessed via sections (no 28 report APIs)
+- **Currency: USD** — amounts in dollars ($B/$M), not KRW
+- **Fiscal year**: US companies may not end in December (Apple=Sep, Microsoft=Jun, etc.)
+### Topic Format
+- 10-K (annual): `10-K::item1Business`, `10-K::item1ARiskFactors`, `10-K::item7MdnA`
+- 10-Q (quarterly): `10-Q::partIItem2Mdna`, `10-Q::partIItem1FinancialStatements`
+- `explore(action='show', topic='10-K::item1ARiskFactors')` → Risk Factors full text
+- `explore(action='search', keyword='MD&A')` → MD&A evidence blocks
+### Analysis Notes
+- US GAAP operating income differs from K-IFRS (e.g., stock-based compensation treatment)
+- `finance(action='report')` not available — use `explore(action='show')` + `explore(action='search')` instead
+- Segments, risk factors, MD&A all exist as sections topics
+- Financial data is auto-normalized from SEC XBRL companyfacts
+"""

src/dartlab/ai/eval/__init__.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""AI 답변 평가 프레임워크.
+Golden dataset + persona question set + replay utilities.
+"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from dartlab.ai.eval.diagnoser import (
+    DiagnosisReport,
+    diagnoseBatchResults,
+    diagnoseFull,
+    findCoverageGaps,
+    findRegressions,
+    findWeakTypes,
+    mapCodeImpact,
+)
+from dartlab.ai.eval.remediation import (
+    RemediationPlan,
+    extractFailureCounts,
+    generateRemediations,
+)
+from dartlab.ai.eval.replayRunner import (
+    PersonaEvalCase,
+    ReplayResult,
+    ReviewEntry,
+    StructuralEval,
+    appendReviewEntry,
+    evaluateReplay,
+    loadPersonaCases,
+    loadPersonaQuestionSet,
+    loadReviewLog,
+    replayCase,
+    replaySuite,
+    summarizeReplayResults,
+)
+from dartlab.ai.eval.scorer import ScoreCard, auto_score
+from dartlab.ai.eval.truthHarvester import harvestBatch, harvestTruth
+_GOLDEN_PATH = Path(__file__).parent / "golden.json"
+def load_golden_dataset() -> list[dict]:
+    """golden.json에서 QA pair 로드."""
+    if not _GOLDEN_PATH.exists():
+        return []
+    with open(_GOLDEN_PATH, encoding="utf-8") as f:
+        return json.load(f)
+__all__ = [
+    "PersonaEvalCase",
+    "ReplayResult",
+    "ReviewEntry",
+    "ScoreCard",
+    "StructuralEval",
+    "appendReviewEntry",
+    "auto_score",
+    "evaluateReplay",
+    "load_golden_dataset",
+    "loadPersonaCases",
+    "loadPersonaQuestionSet",
+    "loadReviewLog",
+    "replayCase",
+    "replaySuite",
+    "summarizeReplayResults",
+    "harvestTruth",
+    "harvestBatch",
+    "DiagnosisReport",
+    "diagnoseBatchResults",
+    "diagnoseFull",
+    "findCoverageGaps",
+    "findRegressions",
+    "findWeakTypes",
+    "mapCodeImpact",
+    "RemediationPlan",
+    "extractFailureCounts",
+    "generateRemediations",
+]

src/dartlab/ai/eval/batchResults/batch_ollama_20260324_180122.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 10.455911574764034, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 2.6363636363636362, "failureTypes": ["ui_wording_failure"], "answerLength": 3265, "timestamp": "20260324_180122"}
2	+ {"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 11.461143695014663, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 2.8181818181818183, "failureTypes": [], "answerLength": 4522, "timestamp": "20260324_180122"}

src/dartlab/ai/eval/batchResults/batch_ollama_20260325_093749.jsonl ADDED Viewed

	@@ -0,0 +1,4 @@

+{"caseId": "analyst.quarterly.operatingProfit", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 12.584343434343436, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 2.090909090909091, "failureTypes": [], "answerLength": 1027, "timestamp": "20260325_093749"}
+{"caseId": "analyst.quarterly.revenue", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 9.671212121212122, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 0.6363636363636364, "failureTypes": [], "answerLength": 647, "timestamp": "20260325_093749"}
+{"caseId": "analyst.deep.comprehensiveHealth", "persona": "analyst", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 11.166666666666666, "routeMatch": 1.0, "moduleUtilization": 1.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": [], "answerLength": 1478, "timestamp": "20260325_093749"}
+{"caseId": "investor.deep.investmentThesis", "persona": "investor", "severity": "critical", "provider": "ollama", "model": "qwen3:latest", "overall": 9.533333333333333, "routeMatch": 1.0, "moduleUtilization": 0.0, "falseUnavailable": 1.0, "factualAccuracy": 1.0, "failureTypes": ["retrieval_failure"], "answerLength": 556, "timestamp": "20260325_093749"}

src/dartlab/ai/eval/diagnoser.py ADDED Viewed

	@@ -0,0 +1,309 @@

+"""자동 진단 엔진 — 배치 결과에서 약점/갭/회귀를 자동 발견."""
+from __future__ import annotations
+import json
+from dataclasses import dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+@dataclass
+class WeakTypeReport:
+    """질문 유형별 약점 보고."""
+    questionType: str
+    avgOverall: float
+    caseCount: int
+    topFailures: list[str]
+@dataclass
+class CoverageGap:
+    """eval 케이스가 커버하지 않는 영역."""
+    kind: str  # "route", "module", "persona", "severity", "stockCode"
+    detail: str
+    suggestion: str
+@dataclass
+class Regression:
+    """이전 배치 대비 점수 하락."""
+    caseId: str
+    prevOverall: float
+    currOverall: float
+    delta: float
+    likelyFailures: list[str]
+@dataclass
+class DiagnosisReport:
+    """전체 진단 결과."""
+    weakTypes: list[WeakTypeReport] = field(default_factory=list)
+    coverageGaps: list[CoverageGap] = field(default_factory=list)
+    regressions: list[Regression] = field(default_factory=list)
+    timestamp: str = ""
+    def toMarkdown(self) -> str:
+        """마크다운 형식으로 변환."""
+        lines = [f"# Eval 진단 리포트 — {self.timestamp}", ""]
+        if self.weakTypes:
+            lines.append("## 약점 유형 (하위 점수)")
+            lines.append("")
+            lines.append("| 유형 | 평균 점수 | 케이스 수 | 주요 실패 |")
+            lines.append("|------|---------|---------|---------|")
+            for w in self.weakTypes:
+                failures = ", ".join(w.topFailures[:3]) or "-"
+                lines.append(f"| {w.questionType} | {w.avgOverall:.2f} | {w.caseCount} | {failures} |")
+            lines.append("")
+        if self.coverageGaps:
+            lines.append("## 커버리지 갭")
+            lines.append("")
+            for g in self.coverageGaps:
+                lines.append(f"- **[{g.kind}]** {g.detail} → {g.suggestion}")
+            lines.append("")
+        if self.regressions:
+            lines.append("## 회귀 감지")
+            lines.append("")
+            lines.append("| 케이스 | 이전 | 현재 | 변화 | 실패 유형 |")
+            lines.append("|--------|------|------|------|---------|")
+            for r in self.regressions:
+                failures = ", ".join(r.likelyFailures[:3]) or "-"
+                lines.append(
+                    f"| {r.caseId} | {r.prevOverall:.2f} | {r.currOverall:.2f} | {r.delta:+.2f} | {failures} |"
+                )
+            lines.append("")
+        if not self.weakTypes and not self.coverageGaps and not self.regressions:
+            lines.append("모든 항목 양호.")
+        return "\n".join(lines)
+def findWeakTypes(results: list[dict[str, Any]], bottomN: int = 3) -> list[WeakTypeReport]:
+    """질문 유형별 평균 점수 계산, 하위 N개 반환."""
+    typeScores: dict[str, list[float]] = {}
+    typeFailures: dict[str, list[str]] = {}
+    for r in results:
+        qType = r.get("questionType") or r.get("userIntent") or "unknown"
+        overall = r.get("overall", 0.0)
+        failures = r.get("failureTypes", [])
+        typeScores.setdefault(qType, []).append(overall)
+        typeFailures.setdefault(qType, []).extend(failures)
+    reports = []
+    for qType, scores in typeScores.items():
+        avg = sum(scores) / len(scores) if scores else 0.0
+        # 실패 유형 빈도순
+        failureCounts: dict[str, int] = {}
+        for f in typeFailures.get(qType, []):
+            failureCounts[f] = failureCounts.get(f, 0) + 1
+        topFailures = sorted(failureCounts, key=failureCounts.get, reverse=True)  # type: ignore[arg-type]
+        reports.append(WeakTypeReport(qType, avg, len(scores), topFailures[:3]))
+    reports.sort(key=lambda r: r.avgOverall)
+    return reports[:bottomN]
+def findCoverageGaps(cases: list[dict[str, Any]]) -> list[CoverageGap]:
+    """케이스 집합의 커버리지 부족 영역 탐지."""
+    gaps: list[CoverageGap] = []
+    # 1. persona 균형 (최소 3개)
+    personaCounts: dict[str, int] = {}
+    for c in cases:
+        p = c.get("persona", "unknown")
+        personaCounts[p] = personaCounts.get(p, 0) + 1
+    for persona, count in personaCounts.items():
+        if count < 3:
+            gaps.append(
+                CoverageGap(
+                    "persona",
+                    f"{persona}: {count}개 케이스",
+                    f"{persona} persona에 케이스 {3 - count}개 추가 필요",
+                )
+            )
+    # 2. route 커버리지
+    routes = {c.get("expectedRoute") for c in cases if c.get("expectedRoute")}
+    requiredRoutes = {"finance", "sections", "hybrid", "report"}
+    for r in requiredRoutes - routes:
+        gaps.append(CoverageGap("route", f"route '{r}' 미커버", f"expectedRoute='{r}'인 케이스 추가"))
+    # 3. severity 분포
+    severityCounts: dict[str, int] = {}
+    for c in cases:
+        s = c.get("severity", "medium")
+        severityCounts[s] = severityCounts.get(s, 0) + 1
+    total = len(cases) or 1
+    criticalHigh = severityCounts.get("critical", 0) + severityCounts.get("high", 0)
+    if criticalHigh / total < 0.4:
+        gaps.append(
+            CoverageGap(
+                "severity",
+                f"critical+high = {criticalHigh}/{total} ({criticalHigh / total:.0%})",
+                "critical/high severity 케이스 비율 40% 이상으로",
+            )
+        )
+    # 4. 종목코드 편중
+    stockCounts: dict[str, int] = {}
+    stockCases = [c for c in cases if c.get("stockCode")]
+    for c in stockCases:
+        sc = c["stockCode"]
+        stockCounts[sc] = stockCounts.get(sc, 0) + 1
+    if stockCases:
+        for sc, count in stockCounts.items():
+            if count / len(stockCases) > 0.6:
+                gaps.append(
+                    CoverageGap(
+                        "stockCode",
+                        f"{sc}: {count}/{len(stockCases)} ({count / len(stockCases):.0%})",
+                        "다른 종목코드 케이스 추가로 편중 해소",
+                    )
+                )
+    # 5. module 커버리지
+    coveredModules: set[str] = set()
+    for c in cases:
+        coveredModules.update(c.get("expectedModules", []))
+    # 핵심 모듈 목록
+    coreModules = {"IS", "BS", "CF", "ratios", "costByNature", "segments", "businessOverview", "governanceOverview"}
+    missing = coreModules - coveredModules
+    for m in missing:
+        gaps.append(CoverageGap("module", f"모듈 '{m}' 미커버", f"expectedModules에 '{m}' 포함하는 케이스 추가"))
+    return gaps
+def findRegressions(
+    currentResults: list[dict[str, Any]],
+    previousResults: list[dict[str, Any]],
+    threshold: float = -0.1,
+) -> list[Regression]:
+    """이전 배치 대비 점수 하락 케이스 탐지."""
+    prevMap: dict[str, dict[str, Any]] = {r["caseId"]: r for r in previousResults if "caseId" in r}
+    regressions: list[Regression] = []
+    for curr in currentResults:
+        caseId = curr.get("caseId", "")
+        if caseId not in prevMap:
+            continue
+        prev = prevMap[caseId]
+        delta = curr.get("overall", 0) - prev.get("overall", 0)
+        if delta < threshold:
+            regressions.append(
+                Regression(
+                    caseId=caseId,
+                    prevOverall=prev.get("overall", 0),
+                    currOverall=curr.get("overall", 0),
+                    delta=delta,
+                    likelyFailures=curr.get("failureTypes", []),
+                )
+            )
+    regressions.sort(key=lambda r: r.delta)
+    return regressions
+# ── 코드 변경 → 케이스 영향 매핑 ─────────────────────────
+_FILE_CASE_IMPACT: dict[str, list[str]] = {
+    "context/builder.py": ["*"],
+    "context/finance_context.py": ["analyst.*", "investor.*", "accountant.*"],
+    "conversation/templates/analysis_rules.py": ["*"],
+    "conversation/prompts.py": ["*"],
+    "runtime/pipeline.py": ["analyst.*", "investor.*", "accountant.*"],
+    "tools/recipes.py": ["analyst.*", "investor.*"],
+    "tools/defaults/analysis.py": ["analyst.*", "investor.*"],
+    "tools/defaults/market.py": ["investor.*", "analyst.*"],
+}
+def mapCodeImpact(changedFiles: list[str], cases: list[dict[str, Any]]) -> list[str]:
+    """변경된 파일 → 영향받는 케이스 ID 반환."""
+    impactPatterns: set[str] = set()
+    for f in changedFiles:
+        for key, patterns in _FILE_CASE_IMPACT.items():
+            if key in f.replace("\\", "/"):
+                impactPatterns.update(patterns)
+    if "*" in impactPatterns:
+        return [c.get("id", "") for c in cases]
+    import fnmatch
+    impacted: list[str] = []
+    for c in cases:
+        caseId = c.get("id", "")
+        for pat in impactPatterns:
+            if fnmatch.fnmatch(caseId, pat):
+                impacted.append(caseId)
+                break
+    return impacted
+def diagnoseBatchResults(batchPath: Path) -> DiagnosisReport:
+    """배치 결과 JSONL 파일을 분석해서 진단 리포트 생성."""
+    results: list[dict[str, Any]] = []
+    with open(batchPath, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                results.append(json.loads(line))
+    report = DiagnosisReport(
+        weakTypes=findWeakTypes(results),
+        coverageGaps=[],  # 배치 결과만으로는 케이스 갭 불가 — cases 필요
+        regressions=[],
+        timestamp=datetime.now().strftime("%Y-%m-%d %H:%M"),
+    )
+    return report
+def diagnoseFull(
+    batchPath: Path | None = None,
+    previousBatchPath: Path | None = None,
+    casesPath: Path | None = None,
+) -> DiagnosisReport:
+    """전체 진단 (약점 + 갭 + 회귀)."""
+    report = DiagnosisReport(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M"))
+    # 배치 결과 분석
+    if batchPath and batchPath.exists():
+        results: list[dict[str, Any]] = []
+        with open(batchPath, encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if line:
+                    results.append(json.loads(line))
+        report.weakTypes = findWeakTypes(results)
+        # 회귀 탐지
+        if previousBatchPath and previousBatchPath.exists():
+            prevResults: list[dict[str, Any]] = []
+            with open(previousBatchPath, encoding="utf-8") as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        prevResults.append(json.loads(line))
+            report.regressions = findRegressions(results, prevResults)
+    # 커버리지 갭
+    if casesPath and casesPath.exists():
+        with open(casesPath, encoding="utf-8") as f:
+            data = json.load(f)
+        cases = data.get("cases", data) if isinstance(data, dict) else data
+        report.coverageGaps = findCoverageGaps(cases)
+    return report

src/dartlab/ai/eval/diagnosisReports/diagnosis_batch_20260325_093749.md ADDED Viewed

	@@ -0,0 +1,14 @@

+# Eval 진단 리포트 — 2026-03-25 09:37
+## 약점 유형 (하위 점수)
+| 유형 | 평균 점수 | 케이스 수 | 주요 실패 |
+|------|---------|---------|---------|
+| unknown | 10.74 | 4 | retrieval_failure |
+# 개선 계획 (Remediation)
+| 우선순위 | Failure | 대상 파일 | 설명 | 영향도 |
+|---------|---------|----------|------|-------|
+| P3 | retrieval_failure | `engines/ai/context/finance_context.py` | _QUESTION_MODULES 매핑에 모듈 추가 (발생 1회) | high |

src/dartlab/ai/eval/golden.json ADDED Viewed

	@@ -0,0 +1,82 @@

+[
+  {
+    "id": 1,
+    "stock_code": "005930",
+    "question": "삼성전자의 최근 재무 건전성은?",
+    "expected_topics": ["부채비율", "유동비율", "자본", "건전"],
+    "expected_facts": [],
+    "category": "health"
+  },
+  {
+    "id": 2,
+    "stock_code": "005930",
+    "question": "삼성전자 매출 추이를 분석해줘",
+    "expected_topics": ["매출", "성장", "추이", "전년"],
+    "expected_facts": [],
+    "category": "performance"
+  },
+  {
+    "id": 3,
+    "stock_code": "005930",
+    "question": "삼성전자 배당 정책은?",
+    "expected_topics": ["배당", "DPS", "배당수익률", "배당성향"],
+    "expected_facts": [],
+    "category": "dividend"
+  },
+  {
+    "id": 4,
+    "stock_code": "005930",
+    "question": "삼성전자 수익성은 어때?",
+    "expected_topics": ["영업이익", "영업이익률", "ROE", "수익성"],
+    "expected_facts": [],
+    "category": "profitability"
+  },
+  {
+    "id": 5,
+    "stock_code": "005930",
+    "question": "삼성전자 현금흐름을 분석해줘",
+    "expected_topics": ["영업활동", "투자활동", "재무활동", "현금", "FCF"],
+    "expected_facts": [],
+    "category": "cashflow"
+  },
+  {
+    "id": 6,
+    "stock_code": "000660",
+    "question": "SK하이닉스 최근 실적은?",
+    "expected_topics": ["매출", "영업이익", "순이익", "반도체"],
+    "expected_facts": [],
+    "category": "performance"
+  },
+  {
+    "id": 7,
+    "stock_code": "005380",
+    "question": "현대차 부채 상황은?",
+    "expected_topics": ["부채", "부채비율", "차입금", "건전"],
+    "expected_facts": [],
+    "category": "health"
+  },
+  {
+    "id": 8,
+    "stock_code": "035420",
+    "question": "네이버 성장성 분석",
+    "expected_topics": ["매출", "성장", "CAGR", "전년"],
+    "expected_facts": [],
+    "category": "growth"
+  },
+  {
+    "id": 9,
+    "stock_code": "005930",
+    "question": "삼성전자의 종합 인사이트를 알려줘",
+    "expected_topics": ["실적", "수익성", "건전성", "현금흐름", "등급"],
+    "expected_facts": [],
+    "category": "insight"
+  },
+  {
+    "id": 10,
+    "stock_code": "005930",
+    "question": "삼성전자가 속한 섹터와 시장 순위는?",
+    "expected_topics": ["섹터", "순위", "반도체"],
+    "expected_facts": [],
+    "category": "meta"
+  }
+]

src/dartlab/ai/eval/personaCases.json ADDED Viewed

	@@ -0,0 +1,2441 @@

+{
+  "version": "2026-03-24-v1",
+  "updated": "2026-03-24",
+  "source": "curated_persona_regression",
+  "cases": [
+    {
+      "id": "assistant.capabilities.overview",
+      "persona": "assistant",
+      "personaLabel": "비서",
+      "stockCode": null,
+      "question": "dartlab으로 지금 바로 어떤 질문들을 할 수 있는지 사용자가 이해하기 쉽게 설명해줘",
+      "userIntent": "capability_overview",
+      "expectedAnswerShape": [
+        "기능요약",
+        "예시질문",
+        "사용자언어"
+      ],
+      "expectedEvidenceKinds": [
+        "capability"
+      ],
+      "expectedUserFacingTerms": [
+        "질문",
+        "공시",
+        "재무"
+      ],
+      "forbiddenUiTerms": [
+        "company.show",
+        "get_data",
+        "show_topic()",
+        "module_"
+      ],
+      "expectedRoute": null,
+      "expectedModules": [],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "공시",
+        "재무",
+        "질문"
+      ],
+      "expectedFollowups": [
+        "예를 들어",
+        "추가로"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "dataManager.coverage.readiness",
+      "persona": "data_manager",
+      "personaLabel": "DartLab 데이터 관리자",
+      "stockCode": "005930",
+      "question": "삼성전자 데이터가 지금 어디까지 준비돼 있는지 공시, 재무, 정형 데이터 기준으로 나눠서 설명해줘",
+      "userIntent": "data_readiness",
+      "expectedAnswerShape": [
+        "준비상태",
+        "근거",
+        "누락영역"
+      ],
+      "expectedEvidenceKinds": [
+        "data_ready",
+        "docs",
+        "finance",
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "공시",
+        "재무",
+        "정형 데이터"
+      ],
+      "forbiddenUiTerms": [
+        "company.show",
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "BS",
+        "IS",
+        "CF"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "공시",
+        "재무",
+        "정형"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "operator.config.channels",
+      "persona": "operator",
+      "personaLabel": "DartLab 운영자",
+      "stockCode": null,
+      "question": "AI 설정 상태와 외부 채널 연결 상태를 운영자 관점에서 같이 점검해줘",
+      "userIntent": "ops_status",
+      "expectedAnswerShape": [
+        "상태점검",
+        "원인",
+        "다음조치"
+      ],
+      "expectedEvidenceKinds": [
+        "provider_status",
+        "channel_status"
+      ],
+      "expectedUserFacingTerms": [
+        "설정",
+        "연결",
+        "운영"
+      ],
+      "forbiddenUiTerms": [
+        "show_topic()",
+        "module_"
+      ],
+      "expectedRoute": null,
+      "expectedModules": [],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "설정",
+        "연결"
+      ],
+      "expectedFollowups": [
+        "다음",
+        "점검"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "installer.opendart.key",
+      "persona": "installer",
+      "personaLabel": "DartLab 설치자",
+      "stockCode": null,
+      "question": "OpenDART 키가 왜 필요하고 없으면 어떤 기능이 막히는지 설치자 입장에서 설명해줘",
+      "userIntent": "setup_guidance",
+      "expectedAnswerShape": [
+        "필요성",
+        "영향범위",
+        "설정가이드"
+      ],
+      "expectedEvidenceKinds": [
+        "open_dart_status"
+      ],
+      "expectedUserFacingTerms": [
+        "OpenDART",
+        "설정",
+        "공시"
+      ],
+      "forbiddenUiTerms": [
+        "get_dart_filing_text",
+        "search_dart_filings"
+      ],
+      "expectedRoute": null,
+      "expectedModules": [],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "OpenDART",
+        "설정"
+      ],
+      "expectedFollowups": [
+        "설정",
+        "다음"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "researchGather.structure.recentDisclosures",
+      "persona": "research_gather",
+      "personaLabel": "리서치 게더 엔진 사용자",
+      "stockCode": "005930",
+      "question": "최근 공시 기준으로 삼성전자 사업 구조가 바뀐 부분이 있나",
+      "userIntent": "recent_disclosure_change",
+      "expectedAnswerShape": [
+        "변화요약",
+        "근거",
+        "시점"
+      ],
+      "expectedEvidenceKinds": [
+        "sections",
+        "disclosure"
+      ],
+      "expectedUserFacingTerms": [
+        "최근 공시",
+        "사업 구조",
+        "근거"
+      ],
+      "forbiddenUiTerms": [
+        "businessOverview",
+        "disclosureChanges",
+        "section_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "disclosureChanges"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "최근 공시",
+        "근거"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.costByNature.summary",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "005930",
+      "question": "삼성전자 성격별 비용 분류에서 최근 비용 부담이 어디에 몰려 있는지 요약해줘",
+      "userIntent": "cost_nature_analysis",
+      "expectedAnswerShape": [
+        "핵심결론",
+        "상위비용",
+        "변화"
+      ],
+      "expectedEvidenceKinds": [
+        "notes",
+        "cost_by_nature"
+      ],
+      "expectedUserFacingTerms": [
+        "성격별 비용",
+        "비용 부담",
+        "최근"
+      ],
+      "forbiddenUiTerms": [
+        "costByNature",
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "costByNature"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다",
+        "미제공"
+      ],
+      "mustInclude": [
+        "성격별 비용",
+        "비용"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.audit.redFlags",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "005930",
+      "question": "삼성전자 감사 관련해서 최근 점검해야 할 red flag가 있나",
+      "userIntent": "audit_red_flags",
+      "expectedAnswerShape": [
+        "결론",
+        "감사근거",
+        "주의포인트"
+      ],
+      "expectedEvidenceKinds": [
+        "report",
+        "audit"
+      ],
+      "expectedUserFacingTerms": [
+        "감사",
+        "red flag",
+        "주의"
+      ],
+      "forbiddenUiTerms": [
+        "audit",
+        "report_"
+      ],
+      "expectedRoute": "report",
+      "expectedModules": [
+        "audit"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "감사",
+        "주의"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "businessOwner.businessModel.naver",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "035420",
+      "question": "네이버는 어떤 식으로 돈 버는 구조인지 사업모델 관점에서 설명해줘",
+      "userIntent": "business_model",
+      "expectedAnswerShape": [
+        "수익구조",
+        "핵심사업",
+        "경쟁력"
+      ],
+      "expectedEvidenceKinds": [
+        "sections",
+        "business"
+      ],
+      "expectedUserFacingTerms": [
+        "돈 버는 구조",
+        "사업모델",
+        "핵심 사업"
+      ],
+      "forbiddenUiTerms": [
+        "productService",
+        "businessOverview",
+        "section_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "사업",
+        "구조"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "businessOwner.capitalAllocation.samsung",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "005930",
+      "question": "삼성전자 자본배분 스타일을 보면 성장투자형인지 주주환원형인지 판단해줘",
+      "userIntent": "capital_allocation",
+      "expectedAnswerShape": [
+        "판단",
+        "근거",
+        "후속포인트"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "report",
+        "dividend"
+      ],
+      "expectedUserFacingTerms": [
+        "자본배분",
+        "성장투자",
+        "주주환원"
+      ],
+      "forbiddenUiTerms": [
+        "shareCapital",
+        "dividend",
+        "IS",
+        "CF"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "dividend",
+        "CF",
+        "shareCapital"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "���이터가 없습니다"
+      ],
+      "mustInclude": [
+        "자본배분",
+        "판단"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "investor.dividend.sustainability",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 배당이 실적과 현금흐름으로 지속 가능한지 판단해줘",
+      "userIntent": "dividend_sustainability",
+      "expectedAnswerShape": [
+        "결론",
+        "배당",
+        "현금흐름"
+      ],
+      "expectedEvidenceKinds": [
+        "report",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "배당",
+        "실적",
+        "현금흐름"
+      ],
+      "forbiddenUiTerms": [
+        "dividend",
+        "IS",
+        "CF",
+        "ratios"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "dividend",
+        "IS",
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "배당",
+        "현금흐름",
+        "실적"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.downside.risks",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "000660",
+      "question": "SK하이닉스에서 지금 downside를 만드는 핵심 리스크 3가지만 말해줘",
+      "userIntent": "downside_risk",
+      "expectedAnswerShape": [
+        "리스크목록",
+        "영향",
+        "왜중요한지"
+      ],
+      "expectedEvidenceKinds": [
+        "sections",
+        "risk"
+      ],
+      "expectedUserFacingTerms": [
+        "리스크",
+        "downside",
+        "핵심"
+      ],
+      "forbiddenUiTerms": [
+        "riskDerivative",
+        "section_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "riskDerivative",
+        "disclosureChanges"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "리스크",
+        "핵심"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.distress.sdi",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "006400",
+      "question": "삼성SDI의 부실 징후를 지금 시점에서 점검해줘",
+      "userIntent": "distress_check",
+      "expectedAnswerShape": [
+        "건전성결론",
+        "징후",
+        "주의점"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "distress"
+      ],
+      "expectedUserFacingTerms": [
+        "부실 징후",
+        "건전성",
+        "주의"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "fsSummary"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "BS",
+        "IS",
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "건전성",
+        "주의"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.margin.drivers",
+      "persona": "analyst",
+      "personaLabel": "애널리스트",
+      "stockCode": "005930",
+      "question": "삼성전자 영업이익률 변동을 비용 구조와 사업 변화까지 묶어서 설명해줘",
+      "userIntent": "margin_driver",
+      "expectedAnswerShape": [
+        "결론",
+        "비용구조",
+        "사업변화"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "notes",
+        "sections"
+      ],
+      "expectedUserFacingTerms": [
+        "영업이익률",
+        "비용 구조",
+        "사업 변화"
+      ],
+      "forbiddenUiTerms": [
+        "costByNature",
+        "businessOverview",
+        "IS"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "IS",
+        "costByNature",
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "영업이익률",
+        "비용",
+        "사업"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.segments.lgchem",
+      "persona": "analyst",
+      "personaLabel": "애널리스트",
+      "stockCode": "051910",
+      "question": "LG화학 사업부문별로 지금 어디가 핵심인지 정리해줘",
+      "userIntent": "segment_mix",
+      "expectedAnswerShape": [
+        "부문정리",
+        "핵심축",
+        "해석"
+      ],
+      "expectedEvidenceKinds": [
+        "notes",
+        "segments"
+      ],
+      "expectedUserFacingTerms": [
+        "사업부문",
+        "핵심",
+        "정리"
+      ],
+      "forbiddenUiTerms": [
+        "segments",
+        "productService"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "segments",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "사업부문",
+        "핵심"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.evidence.recentDisclosures",
+      "persona": "analyst",
+      "personaLabel": "애널리스트",
+      "stockCode": "005930",
+      "question": "최근 공시 기준으로 사업구조 설명 근거를 2개만 짚어줘",
+      "userIntent": "evidence_grounding",
+      "expectedAnswerShape": [
+        "근거",
+        "시점",
+        "출처"
+      ],
+      "expectedEvidenceKinds": [
+        "sections",
+        "evidence"
+      ],
+      "expectedUserFacingTerms": [
+        "근거",
+        "출처",
+        "최근 공시"
+      ],
+      "forbiddenUiTerms": [
+        "businessOverview",
+        "productService",
+        "show_topic()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "근거",
+        "출처"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "assistant.nextQuestions.investor",
+      "persona": "assistant",
+      "personaLabel": "비서",
+      "stockCode": "005930",
+      "question": "지금 투자자가 삼성전자에서 다음으로 확인해야 할 질문 3개를 던져줘",
+      "userIntent": "next_best_questions",
+      "expectedAnswerShape": [
+        "질문목록",
+        "이유",
+        "우선순위"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "sections"
+      ],
+      "expectedUserFacingTerms": [
+        "다음",
+        "확인",
+        "질문"
+      ],
+      "forbiddenUiTerms": [
+        "module_",
+        "show_topic()"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "IS",
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "질문",
+        "확인"
+      ],
+      "expectedFollowups": [
+        "왜",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "dataManager.trace.sources",
+      "persona": "data_manager",
+      "personaLabel": "DartLab 데이터 관리자",
+      "stockCode": "005930",
+      "question": "삼성전자 답변 근거가 재무인지 공시인지 구분해서 설명해줘",
+      "userIntent": "source_trace",
+      "expectedAnswerShape": [
+        "근거구분",
+        "재무",
+        "공시"
+      ],
+      "expectedEvidenceKinds": [
+        "trace",
+        "finance",
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "근거",
+        "재무",
+        "공시"
+      ],
+      "forbiddenUiTerms": [
+        "trace(",
+        "company.show"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "IS",
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없습니다"
+      ],
+      "mustInclude": [
+        "근거",
+        "재무",
+        "공시"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "operator.performance.explainLatency",
+      "persona": "operator",
+      "personaLabel": "DartLab 운영자",
+      "stockCode": "005930",
+      "question": "질문에 따라 왜 시간이 더 걸릴 수 있는지와 어떤 경우 데이터 로딩이 커지는지 설명해줘",
+      "userIntent": "performance_explanation",
+      "expectedAnswerShape": [
+        "원인",
+        "조건",
+        "주의점"
+      ],
+      "expectedEvidenceKinds": [
+        "runtime_policy"
+      ],
+      "expectedUserFacingTerms": [
+        "시간",
+        "로딩",
+        "질문에 따라"
+      ],
+      "forbiddenUiTerms": [
+        "build_context_tiered",
+        "_resolve_context_route"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "시간",
+        "로딩"
+      ],
+      "expectedFollowups": [
+        "추가",
+        "확인"
+      ],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "accountant.ambiguous.costStructure",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "005930",
+      "question": "삼성전자 비용 구조를 설명해줘",
+      "userIntent": "ambiguous_cost_structure",
+      "expectedAnswerShape": [
+        "clarification_or_best_guess"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "notes"
+      ],
+      "expectedUserFacingTerms": [
+        "성격별 비용",
+        "기능별 비용"
+      ],
+      "forbiddenUiTerms": [
+        "costByNature",
+        "IS",
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "costByNature"
+      ],
+      "allowedClarification": true,
+      "mustNotSay": [],
+      "mustInclude": [
+        "성격별 비용",
+        "기능별 비용"
+      ],
+      "expectedFollowups": [
+        "보실 건가요"
+      ],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.quarterly.operatingProfit",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "삼성전자 분기별 영업이익 추이 알려줘",
+      "userIntent": "quarterly_operating_profit",
+      "expectedAnswerShape": [
+        "분기별테이블",
+        "QoQ",
+        "YoY"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "영업이익",
+        "분기",
+        "전분기"
+      ],
+      "forbiddenUiTerms": [
+        "IS_quarterly",
+        "timeseries"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "IS",
+        "IS_quarterly"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없",
+        "분기별 데이터를 제공하지",
+        "확인할 수 없"
+      ],
+      "mustInclude": [
+        "영업이익",
+        "분기"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [
+        {
+          "metric": "sales",
+          "label": "매출액",
+          "value": 333605938000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "operating_profit",
+          "label": "영업이익",
+          "value": 43601051000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "net_profit",
+          "label": "당기순이익",
+          "value": 45206805000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "cost_of_sales",
+          "label": "매출원가",
+          "value": 202235513000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "sales_quarterly",
+          "label": "매출액(분기)",
+          "value": 93837371000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "operating_profit_quarterly",
+          "label": "영업이익(분기)",
+          "value": 20073660000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "net_profit_quarterly",
+          "label": "당기순이익(분기)",
+          "value": 19641745000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "cost_of_sales_quarterly",
+          "label": "매출원가(분기)",
+          "value": 49586396000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "operating_cashflow_quarterly",
+          "label": "영업활동CF(분기)",
+          "value": 28799652000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "investing_cashflow_quarterly",
+          "label": "투자활동CF(분기)",
+          "value": -30991028000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "financing_cashflow_quarterly",
+          "label": "재무활동CF(분기)",
+          "value": -1957717000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        }
+      ],
+      "severity": "critical"
+    },
+    {
+      "id": "analyst.quarterly.revenue",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "최근 4분기 매출 변화 분석해줘",
+      "userIntent": "quarterly_revenue_change",
+      "expectedAnswerShape": [
+        "분기별테이블",
+        "QoQ",
+        "추세"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "매출",
+        "분기",
+        "변화"
+      ],
+      "forbiddenUiTerms": [
+        "IS_quarterly"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "IS",
+        "IS_quarterly"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "매출"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [
+        {
+          "metric": "sales",
+          "label": "매출액",
+          "value": 333605938000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "operating_profit",
+          "label": "영업이익",
+          "value": 43601051000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "net_profit",
+          "label": "당기순이익",
+          "value": 45206805000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "cost_of_sales",
+          "label": "매출원가",
+          "value": 202235513000000.0,
+          "statement": "IS",
+          "period": "2025"
+        },
+        {
+          "metric": "sales_quarterly",
+          "label": "매출액(분기)",
+          "value": 93837371000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "operating_profit_quarterly",
+          "label": "영업이익(분기)",
+          "value": 20073660000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "net_profit_quarterly",
+          "label": "당기순이익(분기)",
+          "value": 19641745000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "cost_of_sales_quarterly",
+          "label": "매출원가(분기)",
+          "value": 49586396000000.0,
+          "statement": "IS_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "operating_cashflow_quarterly",
+          "label": "영업활동CF(분기)",
+          "value": 28799652000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "investing_cashflow_quarterly",
+          "label": "투자활동CF(분기)",
+          "value": -30991028000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        },
+        {
+          "metric": "financing_cashflow_quarterly",
+          "label": "재무활동CF(분기)",
+          "value": -1957717000000.0,
+          "statement": "CF_quarterly",
+          "period": "2025-Q4"
+        }
+      ],
+      "severity": "critical"
+    },
+    {
+      "id": "investor.profitMargin.context",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 영업이익률 분석해줘",
+      "userIntent": "profit_margin_analysis",
+      "expectedAnswerShape": [
+        "이익률수치",
+        "추세",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "sections"
+      ],
+      "expectedUserFacingTerms": [
+        "영업이익률",
+        "수익성"
+      ],
+      "forbiddenUiTerms": [
+        "IS",
+        "ratios"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "영업이익률"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.growth.cashflowTrend",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 영업활동현금흐름 추이로 성장성 판단해줘",
+      "userIntent": "cashflow_growth",
+      "expectedAnswerShape": [
+        "CF추이",
+        "성장판단",
+        "근거"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "현금흐름",
+        "영업활동",
+        "성장"
+      ],
+      "forbiddenUiTerms": [
+        "CF",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "현금흐름"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.growth.futurePlan",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "000660",
+      "question": "SK하이닉스 사업보고서에 나온 미래 투자 계획과 성장 전략 요약해줘",
+      "userIntent": "future_plan",
+      "expectedAnswerShape": [
+        "투자계획",
+        "성장전략",
+        "근거인용"
+      ],
+      "expectedEvidenceKinds": [
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "투자",
+        "계획",
+        "성장"
+      ],
+      "forbiddenUiTerms": [
+        "show_topic()",
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "투자"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.growth.revenueGrowth",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "051910",
+      "question": "LG화학 최근 3년 매출 성장률 분석해줘",
+      "userIntent": "revenue_growth_analysis",
+      "expectedAnswerShape": [
+        "성장률",
+        "추세",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "매출",
+        "성장률",
+        "추세"
+      ],
+      "forbiddenUiTerms": [
+        "IS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "매출"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.valuation.perComparison",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "삼성전자 PER, PBR 수준이 어떤지 분석해줘",
+      "userIntent": "valuation_per_pbr",
+      "expectedAnswerShape": [
+        "PER수치",
+        "PBR수치",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "PER",
+        "PBR",
+        "밸류에이션"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "ratios",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "PER"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.valuation.intrinsicValue",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "000660",
+      "question": "SK하이닉스 적정 가치를 어떻게 판단하면 좋을지 재무 데이터 기반으로 설명해줘",
+      "userIntent": "intrinsic_value",
+      "expectedAnswerShape": [
+        "재무기반판단",
+        "비율근거",
+        "투자시사점"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "가치",
+        "적정",
+        "판단"
+      ],
+      "forbiddenUiTerms": [
+        "module_",
+        "ratios"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS",
+        "BS",
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "가치"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.valuation.roe",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "051910",
+      "question": "LG화학 ROE 추이와 자본 효율성 분석해줘",
+      "userIntent": "roe_analysis",
+      "expectedAnswerShape": [
+        "ROE수치",
+        "추이",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "ROE",
+        "자본",
+        "효율"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "ratios",
+        "IS",
+        "BS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "ROE"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.report.majorHolder",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 최대주주와 주요 주주 현황 알려줘",
+      "userIntent": "major_holder",
+      "expectedAnswerShape": [
+        "최대주주",
+        "지분율",
+        "변동"
+      ],
+      "expectedEvidenceKinds": [
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "주주",
+        "지분",
+        "최대주주"
+      ],
+      "forbiddenUiTerms": [
+        "report.get",
+        "majorHolder"
+      ],
+      "expectedRoute": "report",
+      "expectedModules": [
+        "majorHolder"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "주주"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.report.executivePay",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "005930",
+      "question": "삼성전자 이사회 구성과 임원 보수 현황 요약해줘",
+      "userIntent": "executive_compensation",
+      "expectedAnswerShape": [
+        "이사회구성",
+        "보수현황",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "이사회",
+        "임원",
+        "보수"
+      ],
+      "forbiddenUiTerms": [
+        "report.get",
+        "executive"
+      ],
+      "expectedRoute": "report",
+      "expectedModules": [
+        "executive"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "임원"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.report.treasuryStock",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 자기주식 취득/처분 이력 알려줘",
+      "userIntent": "treasury_stock",
+      "expectedAnswerShape": [
+        "취득이력",
+        "처분이력",
+        "현황"
+      ],
+      "expectedEvidenceKinds": [
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "자기주식",
+        "자사주",
+        "취득"
+      ],
+      "forbiddenUiTerms": [
+        "report.get",
+        "treasuryStock"
+      ],
+      "expectedRoute": "report",
+      "expectedModules": [
+        "treasuryStock"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "자기주식"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "researchGather.report.employeeTrend",
+      "persona": "research_gather",
+      "personaLabel": "리서치 수집원",
+      "stockCode": "000660",
+      "question": "SK하이닉스 직원 수 변화 추이와 인당 매출 알려줘",
+      "userIntent": "employee_trend",
+      "expectedAnswerShape": [
+        "직원수추이",
+        "인당매출",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "report",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "직원",
+        "인력",
+        "매출"
+      ],
+      "forbiddenUiTerms": [
+        "report.get",
+        "employee"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "employee",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "직원"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "analyst.context.evidenceCitation",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "삼성전자 반도체 사업 전망에 대해 공시 원문 근거를 인용해서 설명해줘",
+      "userIntent": "evidence_citation",
+      "expectedAnswerShape": [
+        "원문인용",
+        "분석",
+        "근거"
+      ],
+      "expectedEvidenceKinds": [
+        "docs",
+        "context_slice"
+      ],
+      "expectedUserFacingTerms": [
+        "반도체",
+        "전망",
+        "원문"
+      ],
+      "forbiddenUiTerms": [
+        "contextSlices",
+        "show_topic()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "반도체"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "businessOwner.context.riskFactors",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "051910",
+      "question": "LG화학 사업 리스크 요인을 공시 내용 기반으로 정리해줘",
+      "userIntent": "risk_factor_citation",
+      "expectedAnswerShape": [
+        "리스크목록",
+        "공시근거",
+        "영향도"
+      ],
+      "expectedEvidenceKinds": [
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "리스크",
+        "위험",
+        "공시"
+      ],
+      "forbiddenUiTerms": [
+        "riskDerivative",
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "riskDerivative",
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "리스크"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.context.disclosureChange",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "000660",
+      "question": "SK하이닉스 최근 공시에서 전년 대비 달라진 주요 내용이 뭐야",
+      "userIntent": "disclosure_change_detection",
+      "expectedAnswerShape": [
+        "변경사항",
+        "비교",
+        "시사점"
+      ],
+      "expectedEvidenceKinds": [
+        "docs",
+        "diff"
+      ],
+      "expectedUserFacingTerms": [
+        "변경",
+        "달라진",
+        "전년"
+      ],
+      "forbiddenUiTerms": [
+        "disclosureChanges",
+        "diff()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "disclosureChanges",
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "변경"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.notes.rndExpense",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "삼성전자 연구개발비 규모와 매출 대비 비중 알려줘",
+      "userIntent": "rnd_analysis",
+      "expectedAnswerShape": [
+        "연구개발비",
+        "매출대비비중",
+        "추세"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "notes"
+      ],
+      "expectedUserFacingTerms": [
+        "연구개발",
+        "R&D",
+        "비중"
+      ],
+      "forbiddenUiTerms": [
+        "rnd",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "rnd",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "연구개발"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.notes.tangibleAsset",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "000660",
+      "question": "SK하이닉스 유형자산 규모와 감가상각 현황 분석해줘",
+      "userIntent": "tangible_asset",
+      "expectedAnswerShape": [
+        "유형자산규모",
+        "감가상각",
+        "투자판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "notes"
+      ],
+      "expectedUserFacingTerms": [
+        "유형자산",
+        "감가상각",
+        "투자"
+      ],
+      "forbiddenUiTerms": [
+        "tangibleAsset",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "tangibleAsset",
+        "BS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "유형자산"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.notes.segmentDetail",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "051910",
+      "question": "LG화학 사업부문별 매출과 영업이익 비중 분석해줘",
+      "userIntent": "segment_detail",
+      "expectedAnswerShape": [
+        "부문별매출",
+        "부문별이익",
+        "비중분석"
+      ],
+      "expectedEvidenceKinds": [
+        "docs",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "사업부문",
+        "매출",
+        "비중"
+      ],
+      "forbiddenUiTerms": [
+        "segments",
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "segments",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "부문"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.edge.financialCompany",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "105560",
+      "question": "KB금융지주 재무건전성 분석해줘",
+      "userIntent": "financial_soundness",
+      "expectedAnswerShape": [
+        "건전성지표",
+        "판단",
+        "근거"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "건전성",
+        "자본",
+        "부채"
+      ],
+      "forbiddenUiTerms": [
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "BS",
+        "IS",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "건전성"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.edge.holdingCompany",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "035420",
+      "question": "NAVER 사업 다각화 현황과 주요 매출원 분석해줘",
+      "userIntent": "business_diversification",
+      "expectedAnswerShape": [
+        "사업영역",
+        "매출원",
+        "분석"
+      ],
+      "expectedEvidenceKinds": [
+        "docs",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "사업",
+        "매출원",
+        "다각화"
+      ],
+      "forbiddenUiTerms": [
+        "module_",
+        "show_topic()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "segments",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "사업"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "businessOwner.edge.capitalAllocationNav",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "035420",
+      "question": "NAVER 최근 자본 배분 전략 분석해줘",
+      "userIntent": "capital_allocation",
+      "expectedAnswerShape": [
+        "배당정책",
+        "자사주",
+        "투자전략"
+      ],
+      "expectedEvidenceKinds": [
+        "report",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "배당",
+        "자사주",
+        "투자"
+      ],
+      "forbiddenUiTerms": [
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "dividend",
+        "CF",
+        "treasuryStock"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "배당"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "accountant.cost.rndRatio",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "000660",
+      "question": "SK하이닉스 연구개발비가 매출원가와 판관비 중 어디에 더 많이 반영되는지 분석해줘",
+      "userIntent": "rnd_cost_allocation",
+      "expectedAnswerShape": [
+        "배분구조",
+        "비중",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "notes"
+      ],
+      "expectedUserFacingTerms": [
+        "연구개발",
+        "매출원가",
+        "판관비"
+      ],
+      "forbiddenUiTerms": [
+        "costByNature",
+        "rnd",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "rnd",
+        "costByNature",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "연구개발"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.cost.opexBreakdown",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "005930",
+      "question": "삼성전자 매출원가와 판관비 추이 분석해줘",
+      "userIntent": "opex_breakdown",
+      "expectedAnswerShape": [
+        "원가추이",
+        "판관비추이",
+        "비중변화"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "매출원가",
+        "판관비",
+        "비용"
+      ],
+      "forbiddenUiTerms": [
+        "IS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "원가"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "businessOwner.cost.segments",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "051910",
+      "question": "LG화학 부문별 수익성이 어떻게 다른지 비교해줘",
+      "userIntent": "segment_profitability",
+      "expectedAnswerShape": [
+        "부문별비교",
+        "수익성",
+        "시사점"
+      ],
+      "expectedEvidenceKinds": [
+        "docs",
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "부문",
+        "수익성",
+        "비교"
+      ],
+      "forbiddenUiTerms": [
+        "segments",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "segments",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "부문"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "analyst.deep.comprehensiveHealth",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "000660",
+      "question": "SK하이닉스 종합 재무 건강 진단해줘",
+      "userIntent": "comprehensive_health",
+      "expectedAnswerShape": [
+        "수익성",
+        "안정성",
+        "성장성",
+        "종합판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "수익성",
+        "안정성",
+        "성장"
+      ],
+      "forbiddenUiTerms": [
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS",
+        "BS",
+        "CF",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "수익성"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "critical"
+    },
+    {
+      "id": "investor.deep.investmentThesis",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "051910",
+      "question": "LG화학 투자 매력도를 재무/공시/리스크 종합적으로 평가해줘",
+      "userIntent": "investment_thesis",
+      "expectedAnswerShape": [
+        "재무분석",
+        "공시기반리스크",
+        "투자판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "docs",
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "투자",
+        "매력도",
+        "리스크"
+      ],
+      "forbiddenUiTerms": [
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "IS",
+        "BS",
+        "CF",
+        "ratios",
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "투자"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "critical"
+    },
+    {
+      "id": "researchGather.overview.navBusiness",
+      "persona": "research_gather",
+      "personaLabel": "리서치 수집원",
+      "stockCode": "035420",
+      "question": "NAVER 주요 사업 내용과 최근 변화 요약해줘",
+      "userIntent": "business_overview",
+      "expectedAnswerShape": [
+        "사업내용",
+        "최근변화",
+        "전망"
+      ],
+      "expectedEvidenceKinds": [
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "사업",
+        "변화",
+        "전망"
+      ],
+      "forbiddenUiTerms": [
+        "businessOverview",
+        "show_topic()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "사업"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "businessOwner.overview.chemicalIndustry",
+      "persona": "business_owner",
+      "personaLabel": "사업가",
+      "stockCode": "051910",
+      "question": "LG화학이 어떤 사업을 하는 회사인지 공시 기준으로 설명해줘",
+      "userIntent": "business_description",
+      "expectedAnswerShape": [
+        "사업설명",
+        "주요제품",
+        "시장"
+      ],
+      "expectedEvidenceKinds": [
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "사업",
+        "제품",
+        "시장"
+      ],
+      "forbiddenUiTerms": [
+        "module_",
+        "show_topic()"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview",
+        "productService"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "사업"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "investor.followup.deeperDividend",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "005930",
+      "question": "삼성전자 배당이 지속 가능한지, 배당성향과 FCF 기준으로 판단해줘",
+      "userIntent": "dividend_sustainability_deep",
+      "expectedAnswerShape": [
+        "배당성향",
+        "FCF커버리지",
+        "지속가능성판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "report"
+      ],
+      "expectedUserFacingTerms": [
+        "배당",
+        "배당성향",
+        "FCF"
+      ],
+      "forbiddenUiTerms": [
+        "dividend",
+        "module_"
+      ],
+      "expectedRoute": "hybrid",
+      "expectedModules": [
+        "dividend",
+        "CF",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "배당"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "analyst.followup.whyMarginDrop",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "000660",
+      "question": "SK하이닉스 영업이익률이 하락한 원인을 비용 구조에서 찾아줘",
+      "userIntent": "margin_drop_cause",
+      "expectedAnswerShape": [
+        "이익률변화",
+        "비용분석",
+        "원인"
+      ],
+      "expectedEvidenceKinds": [
+        "finance",
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "영업이익률",
+        "비용",
+        "원인"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "IS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS",
+        "ratios",
+        "costByNature"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "영업이익률"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "accountant.stability.debtAnalysis",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "051910",
+      "question": "LG화학 부채비율과 유동비율로 재무 안정성 판단해줘",
+      "userIntent": "debt_stability",
+      "expectedAnswerShape": [
+        "부채비율",
+        "유동비율",
+        "안정성판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "부채비율",
+        "유동비율",
+        "안정성"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "BS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "BS",
+        "ratios"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [
+        "데이터가 없"
+      ],
+      "mustInclude": [
+        "부채비율"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "high"
+    },
+    {
+      "id": "investor.stability.interestCoverage",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "000660",
+      "question": "SK하이닉스 이자보상배율 분석해줘",
+      "userIntent": "interest_coverage",
+      "expectedAnswerShape": [
+        "이자보상배율",
+        "추이",
+        "판단"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "이자보상배율",
+        "이자",
+        "부담"
+      ],
+      "forbiddenUiTerms": [
+        "ratios",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "ratios",
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "이자"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "analyst.edgar.appleFinancials",
+      "persona": "analyst",
+      "personaLabel": "재무 분석가",
+      "stockCode": "AAPL",
+      "question": "Apple 최근 매출과 영업이익 추이 분석해줘",
+      "userIntent": "us_financials",
+      "expectedAnswerShape": [
+        "매출추이",
+        "이익추이",
+        "분석"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "매출",
+        "영업이익",
+        "추이"
+      ],
+      "forbiddenUiTerms": [
+        "IS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "IS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "매출"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "investor.edgar.appleBusiness",
+      "persona": "investor",
+      "personaLabel": "투자자",
+      "stockCode": "AAPL",
+      "question": "Apple 10-K에 나온 사업 개요 요약해줘",
+      "userIntent": "us_business_overview",
+      "expectedAnswerShape": [
+        "사업개요",
+        "주요제품",
+        "전략"
+      ],
+      "expectedEvidenceKinds": [
+        "docs"
+      ],
+      "expectedUserFacingTerms": [
+        "사업",
+        "제품",
+        "Apple"
+      ],
+      "forbiddenUiTerms": [
+        "businessOverview",
+        "module_"
+      ],
+      "expectedRoute": "sections",
+      "expectedModules": [
+        "businessOverview"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "사업"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    },
+    {
+      "id": "accountant.edgar.appleBalanceSheet",
+      "persona": "accountant",
+      "personaLabel": "회계사",
+      "stockCode": "AAPL",
+      "question": "Apple 자산/부채/자본 구조 분석해줘",
+      "userIntent": "us_balance_sheet",
+      "expectedAnswerShape": [
+        "자산구조",
+        "부채구조",
+        "자본구조"
+      ],
+      "expectedEvidenceKinds": [
+        "finance"
+      ],
+      "expectedUserFacingTerms": [
+        "자산",
+        "부채",
+        "자본"
+      ],
+      "forbiddenUiTerms": [
+        "BS",
+        "module_"
+      ],
+      "expectedRoute": "finance",
+      "expectedModules": [
+        "BS"
+      ],
+      "allowedClarification": false,
+      "mustNotSay": [],
+      "mustInclude": [
+        "자산"
+      ],
+      "expectedFollowups": [],
+      "groundTruthFacts": [],
+      "severity": "medium"
+    }
+  ],
+  "truthAsOf": "2026-03-24"
+}

src/dartlab/ai/eval/remediation.py ADDED Viewed

	@@ -0,0 +1,191 @@

+"""failure taxonomy → 구체적 코드 수정 위치 매핑."""
+from __future__ import annotations
+from dataclasses import dataclass
+from typing import Any
+@dataclass
+class RemediationPlan:
+    """개별 개선 계획."""
+    failureType: str
+    targetFile: str
+    description: str
+    priority: int  # 1=최우선 ~ 5=낮음
+    estimatedImpact: str  # "high", "medium", "low"
+# ── failure → 코드 수정 매핑 ─────────────────────────────
+_FAILURE_REMEDIATION: dict[str, dict[str, str]] = {
+    "routing_failure": {
+        "targetFile": "engines/ai/context/builder.py",
+        "description": "_ROUTE_*_KEYWORDS에 누락 키워드 추가",
+        "estimatedImpact": "high",
+    },
+    "retrieval_failure": {
+        "targetFile": "engines/ai/context/finance_context.py",
+        "description": "_QUESTION_MODULES 매핑에 모듈 추가",
+        "estimatedImpact": "high",
+    },
+    "false_unavailable": {
+        "targetFile": "engines/ai/context/builder.py",
+        "description": "build_context_tiered에서 context 포함 경로 확장",
+        "estimatedImpact": "high",
+    },
+    "generation_failure": {
+        "targetFile": "engines/ai/conversation/templates/analysis_rules.py",
+        "description": "분석 규칙에 few-shot 예시 추가",
+        "estimatedImpact": "medium",
+    },
+    "ui_wording_failure": {
+        "targetFile": "engines/ai/conversation/system_base.py",
+        "description": "시스템 프롬프트에서 내부 명칭 금지 강화",
+        "estimatedImpact": "low",
+    },
+    "hallucination": {
+        "targetFile": "engines/ai/conversation/templates/analysis_rules.py",
+        "description": "숫자 인용 시 출처 명시 규칙 강화",
+        "estimatedImpact": "high",
+    },
+    "data_gap": {
+        "targetFile": "engines/company/dart/",
+        "description": "데이터 파서 구현 또는 매핑 확장 필요",
+        "estimatedImpact": "medium",
+    },
+    "module_underuse": {
+        "targetFile": "engines/ai/runtime/pipeline.py",
+        "description": "파이프라인 frozenset에 모듈 포함 확장",
+        "estimatedImpact": "medium",
+    },
+    "clarification_failure": {
+        "targetFile": "engines/ai/conversation/system_base.py",
+        "description": "clarification 정책 조건 수정",
+        "estimatedImpact": "low",
+    },
+    "context_shallow": {
+        "targetFile": "engines/ai/context/finance_context.py",
+        "description": "context 레이어에 더 많은 데이터 소스 포함",
+        "estimatedImpact": "medium",
+    },
+    "citation_imprecise": {
+        "targetFile": "engines/ai/conversation/templates/analysis_rules.py",
+        "description": "인용 형식 규칙(연도+출처+수치 트리플) 강화",
+        "estimatedImpact": "medium",
+    },
+}
+def generateRemediations(
+    failureCounts: dict[str, int],
+    threshold: int = 1,
+) -> list[RemediationPlan]:
+    """failure 빈도에서 개선 계획 생성.
+    Args:
+        failureCounts: {failureType: count}
+        threshold: 최소 발생 횟수
+    Returns:
+        우선순위순 RemediationPlan 목록.
+    """
+    plans: list[RemediationPlan] = []
+    for failureType, count in failureCounts.items():
+        if count < threshold:
+            continue
+        remediation = _FAILURE_REMEDIATION.get(failureType)
+        if remediation is None:
+            plans.append(
+                RemediationPlan(
+                    failureType=failureType,
+                    targetFile="(매핑 없음)",
+                    description=f"새 failure 유형 — 매핑 추가 필요 (발생 {count}회)",
+                    priority=5,
+                    estimatedImpact="unknown",
+                )
+            )
+            continue
+        # 빈도 기반 우선순위 (1=최우선)
+        if count >= 5:
+            priority = 1
+        elif count >= 3:
+            priority = 2
+        elif count >= 2:
+            priority = 3
+        else:
+            priority = 4
+        # impact에 따른 보정
+        impact = remediation["estimatedImpact"]
+        if impact == "high":
+            priority = max(1, priority - 1)
+        plans.append(
+            RemediationPlan(
+                failureType=failureType,
+                targetFile=remediation["targetFile"],
+                description=f"{remediation['description']} (발생 {count}회)",
+                priority=priority,
+                estimatedImpact=impact,
+            )
+        )
+    plans.sort(key=lambda p: p.priority)
+    return plans
+def formatAsMarkdown(plans: list[RemediationPlan]) -> str:
+    """개선 계획을 마크다운으로."""
+    if not plans:
+        return "개선 필요 사항 없음."
+    lines = ["# 개선 계획 (Remediation)", ""]
+    lines.append("| 우선순위 | Failure | 대상 파일 | 설명 | 영향도 |")
+    lines.append("|---------|---------|----------|------|-------|")
+    for p in plans:
+        lines.append(f"| P{p.priority} | {p.failureType} | `{p.targetFile}` | {p.description} | {p.estimatedImpact} |")
+    lines.append("")
+    highPriority = [p for p in plans if p.priority <= 2]
+    if highPriority:
+        lines.append(f"**즉시 조치 필요**: {len(highPriority)}건")
+        for p in highPriority:
+            lines.append(f"- [{p.failureType}] → `{p.targetFile}`")
+    return "\n".join(lines)
+def generateGitHubIssueBody(plans: list[RemediationPlan]) -> str:
+    """gh issue create용 본문 생성."""
+    if not plans:
+        return ""
+    lines = ["## Eval 자동 진단 — 개선 필요", ""]
+    lines.append("배치 결과 분석에서 다음 개선 사항이 발견되었습니다:")
+    lines.append("")
+    for p in plans:
+        lines.append(f"### P{p.priority}: {p.failureType}")
+        lines.append(f"- **대상**: `{p.targetFile}`")
+        lines.append(f"- **설명**: {p.description}")
+        lines.append(f"- **영향도**: {p.estimatedImpact}")
+        lines.append("")
+    lines.append("---")
+    lines.append("*자동 생성 by evalDiagnose.py*")
+    return "\n".join(lines)
+def extractFailureCounts(results: list[dict[str, Any]]) -> dict[str, int]:
+    """배치 결과에서 failure 유형별 빈도 추출."""
+    counts: dict[str, int] = {}
+    for r in results:
+        for ftype in r.get("failureTypes", []):
+            counts[ftype] = counts.get(ftype, 0) + 1
+    return counts

src/dartlab/ai/eval/replayRunner.py ADDED Viewed

	@@ -0,0 +1,416 @@

+"""Persona question replay runner for ask regression."""
+from __future__ import annotations
+import json
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from statistics import mean
+from typing import Any, Callable
+from dartlab.ai.eval.scorer import ScoreCard, auto_score, score_module_utilization
+from dartlab.ai.runtime.events import AnalysisEvent
+_PERSONA_CASES_PATH = Path(__file__).parent / "personaCases.json"
+_REVIEW_LOG_DIR = Path(__file__).parent / "reviewLog"
+@dataclass(frozen=True)
+class PersonaEvalCase:
+    """Single curated ask regression case."""
+    id: str
+    persona: str
+    personaLabel: str
+    question: str
+    userIntent: str
+    stockCode: str | None = None
+    expectedAnswerShape: list[str] = field(default_factory=list)
+    expectedEvidenceKinds: list[str] = field(default_factory=list)
+    expectedUserFacingTerms: list[str] = field(default_factory=list)
+    forbiddenUiTerms: list[str] = field(default_factory=list)
+    expectedRoute: str | None = None
+    expectedModules: list[str] = field(default_factory=list)
+    allowedClarification: bool = False
+    mustNotSay: list[str] = field(default_factory=list)
+    mustInclude: list[str] = field(default_factory=list)
+    expectedFollowups: list[str] = field(default_factory=list)
+    groundTruthFacts: list[dict[str, Any]] = field(default_factory=list)
+    severity: str = "medium"
+@dataclass
+class StructuralEval:
+    """Replay structure checks before answer-quality scoring."""
+    expectedRoute: str | None = None
+    actualRoute: str | None = None
+    routeMatch: float = 1.0
+    moduleUtilization: float = 1.0
+    clarificationAllowed: bool = False
+    clarificationNeeded: bool = False
+    clarificationQuality: float = 1.0
+    unexpectedModules: list[str] = field(default_factory=list)
+    failureTypes: list[str] = field(default_factory=list)
+@dataclass
+class ReplayResult:
+    """Full replay result for a single curated case."""
+    case: PersonaEvalCase
+    answer: str
+    provider: str | None = None
+    model: str | None = None
+    meta: dict[str, Any] = field(default_factory=dict)
+    done: dict[str, Any] = field(default_factory=dict)
+    contexts: list[dict[str, Any]] = field(default_factory=list)
+    toolEvents: list[dict[str, Any]] = field(default_factory=list)
+    structural: StructuralEval = field(default_factory=StructuralEval)
+    score: ScoreCard = field(default_factory=ScoreCard)
+    errors: list[dict[str, Any]] = field(default_factory=list)
+    def toDict(self) -> dict[str, Any]:
+        """Dataclass-friendly JSON view."""
+        payload = asdict(self)
+        payload["score"]["overall"] = self.score.overall
+        return payload
+@dataclass(frozen=True)
+class ReviewEntry:
+    """Human-reviewed replay note for long-term stabilization."""
+    reviewedAt: str
+    caseId: str
+    persona: str
+    provider: str | None
+    model: str | None
+    effectiveness: str
+    improvementActions: list[str] = field(default_factory=list)
+    failureTypes: list[str] = field(default_factory=list)
+    notes: str = ""
+def _load_json(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {"version": "missing", "cases": []}
+    with open(path, encoding="utf-8") as handle:
+        return json.load(handle)
+def loadPersonaQuestionSet() -> dict[str, Any]:
+    """Load persona question set manifest."""
+    return _load_json(_PERSONA_CASES_PATH)
+def loadPersonaCases(*, persona: str | None = None, severity: str | None = None) -> list[PersonaEvalCase]:
+    """Load curated persona cases with optional filters."""
+    raw = loadPersonaQuestionSet()
+    cases: list[PersonaEvalCase] = []
+    for item in raw.get("cases", []):
+        if persona and item.get("persona") != persona:
+            continue
+        if severity and item.get("severity") != severity:
+            continue
+        cases.append(
+            PersonaEvalCase(
+                id=item["id"],
+                persona=item["persona"],
+                personaLabel=item.get("personaLabel", item["persona"]),
+                stockCode=item.get("stockCode"),
+                question=item["question"],
+                userIntent=item.get("userIntent", ""),
+                expectedAnswerShape=list(item.get("expectedAnswerShape", [])),
+                expectedEvidenceKinds=list(item.get("expectedEvidenceKinds", [])),
+                expectedUserFacingTerms=list(item.get("expectedUserFacingTerms", [])),
+                forbiddenUiTerms=list(item.get("forbiddenUiTerms", [])),
+                expectedRoute=item.get("expectedRoute"),
+                expectedModules=list(item.get("expectedModules", [])),
+                allowedClarification=bool(item.get("allowedClarification", False)),
+                mustNotSay=list(item.get("mustNotSay", [])),
+                mustInclude=list(item.get("mustInclude", [])),
+                expectedFollowups=list(item.get("expectedFollowups", [])),
+                groundTruthFacts=list(item.get("groundTruthFacts", [])),
+                severity=item.get("severity", "medium"),
+            )
+        )
+    return cases
+def _resolve_company(stockCode: str | None) -> Any | None:
+    if not stockCode:
+        return None
+    from dartlab import Company
+    return Company(stockCode)
+def _collect_replay_data(
+    events: list[AnalysisEvent],
+) -> tuple[dict[str, Any], dict[str, Any], list[dict[str, Any]], list[dict[str, Any]], str, list[dict[str, Any]]]:
+    meta: dict[str, Any] = {}
+    done: dict[str, Any] = {}
+    contexts: list[dict[str, Any]] = []
+    toolEvents: list[dict[str, Any]] = []
+    errors: list[dict[str, Any]] = []
+    chunks: list[str] = []
+    for event in events:
+        data = event.data or {}
+        if event.kind == "meta":
+            meta.update(data)
+        elif event.kind == "context":
+            contexts.append(data)
+        elif event.kind == "tool_call":
+            toolEvents.append({"type": "call"} | data)
+        elif event.kind == "tool_result":
+            toolEvents.append({"type": "result"} | data)
+        elif event.kind == "chunk":
+            chunks.append(data.get("text", ""))
+        elif event.kind == "done":
+            done = data
+        elif event.kind == "error":
+            errors.append(data)
+    return meta, done, contexts, toolEvents, "".join(chunks), errors
+def evaluateReplay(
+    case: PersonaEvalCase, events: list[AnalysisEvent], *, provider: str | None = None, model: str | None = None
+) -> ReplayResult:
+    """Evaluate already-collected analysis events."""
+    meta, done, contexts, toolEvents, answer, errors = _collect_replay_data(events)
+    includedModules = list(done.get("includedModules") or meta.get("includedModules") or [])
+    actualRoute = done.get("route")
+    clarificationNeeded = bool(done.get("clarificationNeeded"))
+    moduleUtilization = score_module_utilization(includedModules, case.expectedModules)
+    routeMatch = 1.0 if not case.expectedRoute or case.expectedRoute == actualRoute else 0.0
+    clarificationQuality = 1.0
+    if clarificationNeeded and not case.allowedClarification:
+        clarificationQuality = 0.0
+    structuralFailures: list[str] = []
+    if routeMatch == 0.0:
+        structuralFailures.append("routing_failure")
+    if moduleUtilization < 1.0:
+        structuralFailures.append("retrieval_failure")
+    if clarificationQuality == 0.0:
+        structuralFailures.append("clarification_failure")
+    if errors:
+        structuralFailures.append("runtime_error")
+    score = auto_score(
+        answer,
+        expected_facts=case.groundTruthFacts,
+        expected_topics=case.expectedUserFacingTerms,
+        included_modules=includedModules,
+        expected_modules=case.expectedModules,
+        must_not_say=case.mustNotSay,
+        must_include=case.mustInclude,
+        forbidden_terms=case.forbiddenUiTerms,
+        clarification_allowed=case.allowedClarification,
+        expected_followups=case.expectedFollowups,
+        expected_route=case.expectedRoute,
+        actual_route=actualRoute,
+    )
+    failureTypes = sorted(set(structuralFailures + score.failure_types))
+    score.failure_types = failureTypes
+    structural = StructuralEval(
+        expectedRoute=case.expectedRoute,
+        actualRoute=actualRoute,
+        routeMatch=routeMatch,
+        moduleUtilization=moduleUtilization,
+        clarificationAllowed=case.allowedClarification,
+        clarificationNeeded=clarificationNeeded,
+        clarificationQuality=clarificationQuality,
+        unexpectedModules=sorted(set(includedModules) - set(case.expectedModules)),
+        failureTypes=failureTypes,
+    )
+    return ReplayResult(
+        case=case,
+        answer=answer,
+        provider=provider,
+        model=model,
+        meta=meta,
+        done=done,
+        contexts=contexts,
+        toolEvents=toolEvents,
+        structural=structural,
+        score=score,
+        errors=errors,
+    )
+def replayCase(
+    case: PersonaEvalCase,
+    *,
+    provider: str | None = None,
+    model: str | None = None,
+    reportMode: bool = False,
+    useTools: bool = False,
+    analyzeFn: Callable[..., Any] | None = None,
+    company: Any | None = None,
+    **kwargs: Any,
+) -> ReplayResult:
+    """Run a real ask replay for one curated case."""
+    if analyzeFn is None:
+        from dartlab.ai.runtime.core import analyze as analyzeFn
+    effectiveCompany = company if company is not None else _resolve_company(case.stockCode)
+    events = list(
+        analyzeFn(
+            effectiveCompany,
+            case.question,
+            provider=provider,
+            model=model,
+            report_mode=reportMode,
+            use_tools=useTools,
+            **kwargs,
+        )
+    )
+    return evaluateReplay(case, events, provider=provider, model=model)
+def replaySuite(
+    cases: list[PersonaEvalCase],
+    *,
+    provider: str | None = None,
+    model: str | None = None,
+    reportMode: bool = False,
+    useTools: bool = False,
+    analyzeFn: Callable[..., Any] | None = None,
+    **kwargs: Any,
+) -> list[ReplayResult]:
+    """Replay a full curated suite with Company caching."""
+    import gc
+    companyCache: dict[str, Any] = {}
+    results: list[ReplayResult] = []
+    for case in cases:
+        sc = case.stockCode
+        company = None
+        if sc:
+            if sc not in companyCache:
+                # 메모리 안전: 최대 3개 Company 유지
+                if len(companyCache) >= 3:
+                    oldest = next(iter(companyCache))
+                    del companyCache[oldest]
+                    gc.collect()
+                companyCache[sc] = _resolve_company(sc)
+            company = companyCache[sc]
+        results.append(
+            replayCase(
+                case,
+                provider=provider,
+                model=model,
+                reportMode=reportMode,
+                useTools=useTools,
+                analyzeFn=analyzeFn,
+                company=company,
+                **kwargs,
+            )
+        )
+    return results
+def summarizeReplayResults(results: list[ReplayResult]) -> dict[str, Any]:
+    """Aggregate replay results for regression dashboards or logs."""
+    if not results:
+        return {
+            "cases": 0,
+            "personas": 0,
+            "avgOverall": 0.0,
+            "avgRouteMatch": 0.0,
+            "avgModuleUtilization": 0.0,
+            "falseUnavailableCases": 0,
+            "failureCounts": {},
+        }
+    failureCounts: dict[str, int] = {}
+    for result in results:
+        for failure in result.score.failure_types:
+            failureCounts[failure] = failureCounts.get(failure, 0) + 1
+    return {
+        "cases": len(results),
+        "personas": len({result.case.persona for result in results}),
+        "avgOverall": round(mean(result.score.overall for result in results), 3),
+        "avgRouteMatch": round(mean(result.structural.routeMatch for result in results), 3),
+        "avgModuleUtilization": round(mean(result.structural.moduleUtilization for result in results), 3),
+        "falseUnavailableCases": sum(1 for result in results if result.score.false_unavailable == 0.0),
+        "failureCounts": dict(sorted(failureCounts.items())),
+    }
+def _reviewLogPath(persona: str) -> Path:
+    return _REVIEW_LOG_DIR / f"{persona}.jsonl"
+def loadReviewLog(*, persona: str | None = None, caseId: str | None = None) -> list[ReviewEntry]:
+    """Load human review history for persona replays."""
+    paths: list[Path]
+    if persona:
+        path = _reviewLogPath(persona)
+        paths = [path] if path.exists() else []
+    else:
+        if not _REVIEW_LOG_DIR.exists():
+            return []
+        paths = sorted(_REVIEW_LOG_DIR.glob("*.jsonl"))
+    entries: list[ReviewEntry] = []
+    for path in paths:
+        with open(path, encoding="utf-8") as handle:
+            for line in handle:
+                stripped = line.strip()
+                if not stripped:
+                    continue
+                item = json.loads(stripped)
+                if caseId and item.get("caseId") != caseId:
+                    continue
+                entries.append(
+                    ReviewEntry(
+                        reviewedAt=item["reviewedAt"],
+                        caseId=item["caseId"],
+                        persona=item["persona"],
+                        provider=item.get("provider"),
+                        model=item.get("model"),
+                        effectiveness=item["effectiveness"],
+                        improvementActions=list(item.get("improvementActions", [])),
+                        failureTypes=list(item.get("failureTypes", [])),
+                        notes=item.get("notes", ""),
+                    )
+                )
+    return sorted(entries, key=lambda item: item.reviewedAt)
+def appendReviewEntry(
+    result: ReplayResult,
+    *,
+    effectiveness: str,
+    improvementActions: list[str] | None = None,
+    notes: str = "",
+    reviewedAt: str | None = None,
+) -> ReviewEntry:
+    """Append a reviewed replay note to the long-term stabilization log."""
+    entry = ReviewEntry(
+        reviewedAt=reviewedAt or datetime.now().isoformat(timespec="seconds"),
+        caseId=result.case.id,
+        persona=result.case.persona,
+        provider=result.provider,
+        model=result.model,
+        effectiveness=effectiveness,
+        improvementActions=list(improvementActions or []),
+        failureTypes=list(result.score.failure_types),
+        notes=notes,
+    )
+    _REVIEW_LOG_DIR.mkdir(parents=True, exist_ok=True)
+    with open(_reviewLogPath(result.case.persona), "a", encoding="utf-8") as handle:
+        handle.write(json.dumps(asdict(entry), ensure_ascii=False) + "\n")
+    return entry

src/dartlab/ai/eval/reviewLog/accountant.jsonl ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"reviewedAt": "2026-03-23T22:41:46", "caseId": "accountant.costByNature.summary", "persona": "accountant", "provider": "oauth-codex", "model": null, "effectiveness": "effective", "improvementActions": ["?????? ?? ??? ?? ??? ?? ??? ???? 1???? ? ?? ??"], "failureTypes": [], "notes": "??? ?? ?? ???? ??? ???? false unavailable ?? ??."}

src/dartlab/ai/eval/reviewLog/analyst.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"reviewedAt": "2026-03-23T22:56:55", "caseId": "analyst.margin.drivers", "persona": "analyst", "provider": "oauth-codex", "model": null, "effectiveness": "ineffective", "improvementActions": ["????? + ?? ?? + ?? ?? ??? ambiguity ?? ?? hybrid ???? ??", "margin driver ??? IS + costByNature + businessOverview/productService? ??? ???? ?? ??", "??? ?? ??? ?? ??? ?? ??? clarification_allowed? ???? ?? ????? ?? ??"], "failureTypes": ["clarification_failure", "retrieval_failure", "routing_failure", "ui_wording_failure"], "notes": "??? ?? ??? ?? ??? analyst ?? ????? ???."}
2	+ {"reviewedAt": "2026-03-23T23:51:30", "caseId": "analyst.margin.drivers", "persona": "analyst", "provider": "oauth-codex", "model": null, "effectiveness": "effective", "improvementActions": [], "failureTypes": [], "notes": "?? hybrid? clarification ??? ?? ?? ??? costByNature?? ??? ????."}

src/dartlab/ai/eval/reviewLog/investor.jsonl ADDED Viewed

	@@ -0,0 +1,4 @@

+{"reviewedAt": "2026-03-23T22:42:26", "caseId": "investor.dividend.sustainability", "persona": "investor", "provider": "oauth-codex", "model": null, "effectiveness": "partial", "improvementActions": ["??? ??? IS/CF/BS/TTM ?? ?? ??? ?????/?????/?????/?? 4?? ???? ??", "?? ?? ??? ??? ?? 2?? ? ???? ?? ???? ?? ??? ??"], "failureTypes": ["ui_wording_failure"], "notes": "?? ?? ??? ??? ???? ui_wording_failure? ??."}
+{"reviewedAt": "2026-03-23T22:56:52", "caseId": "investor.distress.sdi", "persona": "investor", "provider": "oauth-codex", "model": null, "effectiveness": "partial", "improvementActions": ["?? ?? ??? finance route? ??? hybrid? ???? ?? ?", "??? ?? IS/CF/BS/TTM/ratios ??? ???? ??? ??"], "failureTypes": ["routing_failure", "ui_wording_failure"], "notes": "??? ????? route? finance? ???? ??? ?? ??? ??."}
+{"reviewedAt": "2026-03-23T23:50:31", "caseId": "investor.distress.sdi", "persona": "investor", "provider": "oauth-codex", "model": null, "effectiveness": "effective", "improvementActions": [], "failureTypes": [], "notes": "finance route ?? ? ?? ?? ??? ?????????? ?? ???? ????."}
+{"reviewedAt": "2026-03-23T23:52:44", "caseId": "investor.dividend.sustainability", "persona": "investor", "provider": "oauth-codex", "model": null, "effectiveness": "effective", "improvementActions": [], "failureTypes": [], "notes": "?? ?? ???? ?? ?? ??? ?? ??? ?? ?? ???? ????."}

src/dartlab/ai/eval/reviewLog/research_gather.jsonl ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ {"reviewedAt": "2026-03-23T22:56:00", "caseId": "researchGather.structure.recentDisclosures", "persona": "research_gather", "provider": "oauth-codex", "model": null, "effectiveness": "partial", "improvementActions": ["?? ?? ?? ???? ??? disclosureChanges/subsequentEvents? ?? ?? businessOverview/productService? ?? ?? ??", "? ???? topic/period/source ?? ?? ???? ?? ??/?? ??/??? ??"], "failureTypes": ["retrieval_failure", "ui_wording_failure"], "notes": "?? ?? ??? ????? ??? ???? ?? ?? ?? retrieval_failure? ??."}
2	+ {"reviewedAt": "2026-03-23T23:52:01", "caseId": "researchGather.structure.recentDisclosures", "persona": "research_gather", "provider": "oauth-codex", "model": null, "effectiveness": "effective", "improvementActions": [], "failureTypes": [], "notes": "sections ?? ?? ?? ? disclosureChanges? businessOverview? ?? ???."}

src/dartlab/ai/eval/scorer.py ADDED Viewed

	@@ -0,0 +1,466 @@

+"""확장 답변 채점기.
+기본 차원:
+    1. factual_accuracy — 수치 정확도 (실제 finance 값 대비)
+    2. completeness — 기대 항목 포함률
+    3. source_citation — 출처(테이블명, 연도) 인용 비율
+    4. hallucination — 허위 수치 포함 여부
+    5. actionability — 결론/판단/제안 포함 여부
+    6. ratio_utilization — 제공된 복합 지표(DuPont, Piotroski F, Altman Z 등) 활용도
+확장 차원:
+    7. module_utilization — 기대 모듈 회수율
+    8. false_unavailable — 거짓 unavailable 탐지
+    9. grounding_quality — 기대 근거 표현 사용 여부
+    10. clarification_quality — clarification 정책 준수
+    11. ui_language_compliance — 내부 명칭 노출 억제
+    12. followup_usefulness — 후속 질문/행동 제안 유용성
+데이터 심도 차원:
+    13. context_depth — context 레이어가 answer에 실제 반영된 비율
+    14. source_citation_precision — (연도+출처+수치) triple 정밀도
+    15. data_coverage — 기대 모듈 대비 실제 데이터 활용 증거
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass, field
+@dataclass
+class ScoreCard:
+    """확장 채점 결과."""
+    factual_accuracy: float = 0.0  # 0~1
+    completeness: float = 0.0  # 0~1
+    source_citation: float = 0.0  # 0~1
+    hallucination: float = 1.0  # 1=없음, 0=있음
+    actionability: float = 0.0  # 0~1
+    ratio_utilization: float = 0.0  # 0~1
+    module_utilization: float = 0.0  # 0~1
+    false_unavailable: float = 1.0  # 1=없음, 0=거짓 unavailable
+    grounding_quality: float = 0.0  # 0~1
+    clarification_quality: float = 1.0  # 0~1
+    ui_language_compliance: float = 1.0  # 0~1
+    followup_usefulness: float = 0.0  # 0~1
+    context_depth: float = 0.0  # 0~1
+    source_citation_precision: float = 0.0  # 0~1
+    data_coverage: float = 0.0  # 0~1
+    failure_types: list[str] = field(default_factory=list)
+    details: dict = field(default_factory=dict)
+    @property
+    def overall(self) -> float:
+        """확장 채점 결과 가중 평균."""
+        return (
+            self.factual_accuracy * 1.5
+            + self.completeness * 1.0
+            + self.source_citation * 0.5
+            + self.hallucination * 1.0
+            + self.actionability * 1.0
+            + self.ratio_utilization * 0.5
+            + self.module_utilization * 1.0
+            + self.false_unavailable * 1.0
+            + self.grounding_quality * 1.0
+            + self.clarification_quality * 0.5
+            + self.ui_language_compliance * 0.5
+            + self.followup_usefulness * 0.5
+            + self.context_depth * 1.0
+            + self.source_citation_precision * 0.5
+            + self.data_coverage * 1.0
+        )
+def score_factual_accuracy(answer: str, expected_facts: list[dict]) -> float:
+    """답변 내 수치가 기대값과 일치하는 비율.
+    Args:
+            expected_facts: [{"metric": "sales", "value": 1234567, "unit": "millions"}]
+    """
+    numeric_facts = [f for f in expected_facts if isinstance(f.get("value"), (int, float))]
+    if not numeric_facts:
+        return 1.0
+    matched = 0
+    for fact in numeric_facts:
+        val = fact["value"]
+        # 답변에서 수치 추출 후 15% 이내 매칭
+        numbers = re.findall(r"[\d,]+(?:\.\d+)?", answer)
+        for num_str in numbers:
+            try:
+                parsed = float(num_str.replace(",", ""))
+            except ValueError:
+                continue
+            if val != 0 and abs(parsed - val) / abs(val) < 0.15:
+                matched += 1
+                break
+            # 단위 변환 (조/억)
+            for divisor in [1e12, 1e8, 1e6, 1e4]:
+                converted = val / divisor
+                if converted != 0 and abs(parsed - converted) / abs(converted) < 0.15:
+                    matched += 1
+                    break
+    return matched / len(numeric_facts)
+def score_completeness(answer: str, expected_topics: list[str]) -> float:
+    """기대 항목이 답변에 포함된 비율."""
+    if not expected_topics:
+        return 1.0
+    found = sum(1 for t in expected_topics if t.lower() in answer.lower())
+    return found / len(expected_topics)
+def score_source_citation(answer: str) -> float:
+    """출처 인용 비율 (연도, 테이블명 등)."""
+    year_pattern = r"20[12]\d년"
+    source_pattern = r"(?:BS|IS|CF|손익|재무|대차|현금)"
+    year_count = len(re.findall(year_pattern, answer))
+    source_count = len(re.findall(source_pattern, answer))
+    # 최소 1개 연도 + 1개 출처면 1.0
+    year_score = min(year_count / 2, 1.0)
+    source_score = min(source_count / 1, 1.0)
+    return (year_score + source_score) / 2
+def score_hallucination(answer: str, known_facts: list[dict]) -> float:
+    """허위 수치 비율. 1.0=허위 없음."""
+    numeric_facts = [f for f in known_facts if isinstance(f.get("value"), (int, float))]
+    if not numeric_facts:
+        return 1.0
+    # 답변에서 추출한 수치 중 알려진 사실과 50% 이상 차이나면 허위
+    numbers = re.findall(r"[\d,]+(?:\.\d+)?", answer)
+    hallucination_count = 0
+    checked = 0
+    for num_str in numbers:
+        try:
+            parsed = float(num_str.replace(",", ""))
+        except ValueError:
+            continue
+        if parsed < 10:  # 너무 작은 수치는 무시 (비율 등)
+            continue
+        checked += 1
+        # 알려진 사실과 비교
+        is_known = False
+        for fact in numeric_facts:
+            val = fact["value"]
+            for divisor in [1, 1e12, 1e8, 1e6, 1e4]:
+                converted = val / divisor
+                if converted != 0 and abs(parsed - converted) / abs(converted) < 0.5:
+                    is_known = True
+                    break
+            if is_known:
+                break
+        if not is_known and checked <= 10:  # 처음 10개만 검사
+            hallucination_count += 1
+    if checked == 0:
+        return 1.0
+    return max(0.0, 1.0 - hallucination_count / checked)
+def score_actionability(answer: str) -> float:
+    """결론/판단/제안이 포함되어 있는지."""
+    action_keywords = [
+        "결론",
+        "판단",
+        "종합",
+        "평가",
+        "의견",
+        "긍정",
+        "부정",
+        "양호",
+        "우려",
+        "주의",
+        "개선",
+        "악화",
+        "안정",
+        "위험",
+        "추천",
+        "제안",
+        "고려",
+    ]
+    found = sum(1 for kw in action_keywords if kw in answer)
+    return min(found / 3, 1.0)
+_COMPOSITE_INDICATORS = [
+    "DuPont",
+    "듀퐁",
+    "Piotroski",
+    "피오트로스키",
+    "F-Score",
+    "Altman",
+    "Z-Score",
+    "ROIC",
+    "CCC",
+    "현금전환주기",
+    "이익의 질",
+    "영업CF/순이익",
+]
+def score_ratio_utilization(answer: str, provided_indicators: list[str] | None = None) -> float:
+    """제공된 복합 지표가 답변에서 실제 활용되었는지 측정.
+    Args:
+        provided_indicators: context에 제공된 복합 지표 이름 리스트.
+            None이면 _COMPOSITE_INDICATORS 전체에서 탐색.
+    """
+    indicators = provided_indicators or _COMPOSITE_INDICATORS
+    if not indicators:
+        return 1.0
+    found = sum(1 for ind in indicators if ind.lower() in answer.lower())
+    # 제공된 지표 중 최소 30%를 활용했으면 만점
+    return min(found / max(len(indicators) * 0.3, 1), 1.0)
+def score_module_utilization(included_modules: list[str] | None, expected_modules: list[str] | None) -> float:
+    """예상 모듈이 실제 replay에 포함된 비율."""
+    expected = {str(module) for module in expected_modules or [] if module}
+    if not expected:
+        return 1.0
+    included = {str(module) for module in included_modules or [] if module}
+    if not included:
+        return 0.0
+    return len(expected & included) / len(expected)
+def score_false_unavailable(answer: str, must_not_say: list[str] | None = None, *, enabled: bool = True) -> float:
+    """포함된 데이터가 있는데도 unavailable 류 문구를 말했는지."""
+    if not enabled:
+        return 1.0
+    answer_lower = answer.lower()
+    forbidden = [phrase for phrase in must_not_say or [] if phrase]
+    if not forbidden:
+        forbidden = [
+            "데이터가 없습니다",
+            "확인할 수 없습니다",
+            "미제공",
+            "제공되지 않습니다",
+            "제공된 데이터에는",
+            "cannot determine",
+            "not available",
+        ]
+    return 0.0 if any(phrase.lower() in answer_lower for phrase in forbidden) else 1.0
+def score_grounding_quality(
+    answer: str, must_include: list[str] | None = None, expected_topics: list[str] | None = None
+) -> float:
+    """답변이 기대 근거 표현을 실제로 사용했는지."""
+    cues = [cue for cue in (must_include or []) if cue]
+    if not cues:
+        cues = [cue for cue in (expected_topics or []) if cue]
+    if not cues:
+        return 1.0
+    answer_lower = answer.lower()
+    matched = sum(1 for cue in cues if cue.lower() in answer_lower)
+    return matched / len(cues)
+def score_clarification_quality(answer: str, clarification_allowed: bool | None) -> float:
+    """필요할 때만 간결하게 clarification 했는지."""
+    if clarification_allowed is None:
+        return 1.0
+    clarification_markers = [
+        "보실 건가요",
+        "원하시는 건가요",
+        "말씀하신",
+        "의미하신",
+        "인지 먼저",
+        "맞나요",
+    ]
+    has_clarification = any(marker in answer for marker in clarification_markers) or answer.strip().endswith("?")
+    if clarification_allowed:
+        return 1.0 if has_clarification else 0.0
+    return 0.0 if has_clarification else 1.0
+def score_ui_language_compliance(answer: str, forbidden_terms: list[str] | None = None) -> float:
+    """UI/사용자용 답변에서 내부 명칭을 얼마나 잘 숨겼는지."""
+    terms = [term for term in forbidden_terms or [] if term]
+    if not terms:
+        return 1.0
+    answer_lower = answer.lower()
+    hits = sum(1 for term in terms if term.lower() in answer_lower)
+    return max(0.0, 1.0 - (hits / len(terms)))
+def score_followup_usefulness(answer: str, expected_followups: list[str] | None = None) -> float:
+    """후속 질문/행동 유도 품질."""
+    followups = [term for term in expected_followups or [] if term]
+    if not followups:
+        default_terms = ("추가", "다음", "확인", "보면", "점검", "질문")
+        found = sum(1 for term in default_terms if term in answer)
+        return min(found / 2, 1.0)
+    answer_lower = answer.lower()
+    matched = sum(1 for term in followups if term.lower() in answer_lower)
+    return matched / len(followups)
+def score_context_depth(answer: str, contexts: list[dict] | None = None) -> float:
+    """context 레이어에서 제공된 정보가 answer에 반영된 비율."""
+    if not contexts:
+        return 1.0
+    answer_lower = answer.lower()
+    matched = 0
+    for ctx in contexts:
+        # context에서 핵심 키워드 추출
+        content = str(ctx.get("content", ctx.get("text", "")))
+        if not content:
+            continue
+        # 숫자/핵심 단어 3개 추출해서 answer에 포함 여부 확인
+        keywords = re.findall(r"[\d,]+(?:\.\d+)?(?:조|억|만)?|[가-힣]{2,6}", content[:500])
+        if not keywords:
+            continue
+        sample = keywords[:10]
+        hits = sum(1 for kw in sample if kw.lower() in answer_lower)
+        if hits >= max(len(sample) * 0.2, 1):
+            matched += 1
+    return matched / len(contexts) if contexts else 1.0
+def score_source_citation_precision(answer: str) -> float:
+    """(연도 + 출처 + 수치)가 함께 나타나는 정밀 인용 측정."""
+    # 연도(2020~2029) 근처 50자 내에 수치와 출처명이 함께 있는지
+    year_positions = [m.start() for m in re.finditer(r"20[2-9]\d", answer)]
+    if not year_positions:
+        return 0.0
+    source_patterns = re.compile(r"(?:BS|IS|CF|손익|재무|대차|현금|자산|부채|매출|영업이익|자본)")
+    number_pattern = re.compile(r"[\d,]+(?:\.\d+)?(?:조|억|만|%)?")
+    triples = 0
+    for pos in year_positions:
+        window = answer[max(0, pos - 30) : pos + 60]
+        has_source = bool(source_patterns.search(window))
+        has_number = bool(number_pattern.search(window))
+        if has_source and has_number:
+            triples += 1
+    return min(triples / max(len(year_positions), 1), 1.0)
+def score_data_coverage(
+    answer: str,
+    included_modules: list[str] | None = None,
+    expected_modules: list[str] | None = None,
+) -> float:
+    """기대 모듈 대비 실제 데이터가 answer에 활용된 증거 비율."""
+    expected = list(expected_modules or [])
+    if not expected:
+        return 1.0
+    # 모듈별 키워드 매핑
+    _MODULE_EVIDENCE: dict[str, list[str]] = {
+        "IS": ["매출", "영업이익", "순이익", "원가", "판관비"],
+        "BS": ["자산", "부채", "자본", "유동"],
+        "CF": ["현금흐름", "영업활동", "투자활동", "재무활동"],
+        "ratios": ["비율", "ROE", "ROA", "이익률", "부채비율"],
+        "costByNature": ["성격별 비용", "급여", "감가상각", "원재료"],
+        "segments": ["부문", "세그먼트", "사업부"],
+        "businessOverview": ["사업", "시장", "경쟁", "전략"],
+        "governanceOverview": ["지배구조", "이사회", "감사"],
+        "riskDerivative": ["리스크", "파생", "위험"],
+        "productService": ["제품", "서비스", "매출구성"],
+    }
+    answer_lower = answer.lower()
+    evidenced = 0
+    for mod in expected:
+        keywords = _MODULE_EVIDENCE.get(mod, [])
+        if not keywords:
+            # 모듈명 자체가 answer에 있으면 evidence로 인정
+            if mod.lower() in answer_lower:
+                evidenced += 1
+            continue
+        if any(kw.lower() in answer_lower for kw in keywords):
+            evidenced += 1
+    return evidenced / len(expected)
+def classify_failure_types(
+    card: ScoreCard,
+    *,
+    answer: str,
+    included_modules: list[str] | None = None,
+    expected_modules: list[str] | None = None,
+    expected_route: str | None = None,
+    actual_route: str | None = None,
+) -> list[str]:
+    """점수와 실행 메타를 바탕으로 실패 유형 분류."""
+    failures: list[str] = []
+    if expected_route and actual_route and expected_route != actual_route:
+        failures.append("routing_failure")
+    if expected_modules and card.module_utilization < 1.0:
+        failures.append("retrieval_failure")
+    if card.false_unavailable == 0.0:
+        failures.append("false_unavailable")
+    if card.factual_accuracy < 0.5 or card.hallucination < 0.5:
+        failures.append("generation_failure")
+    if card.ui_language_compliance < 1.0:
+        failures.append("ui_wording_failure")
+    if not failures and expected_modules and not included_modules:
+        failures.append("data_gap")
+    if not failures and not answer.strip():
+        failures.append("empty_answer")
+    return failures
+def auto_score(
+    answer: str,
+    expected_facts: list[dict] | None = None,
+    expected_topics: list[str] | None = None,
+    provided_indicators: list[str] | None = None,
+    *,
+    included_modules: list[str] | None = None,
+    expected_modules: list[str] | None = None,
+    must_not_say: list[str] | None = None,
+    must_include: list[str] | None = None,
+    forbidden_terms: list[str] | None = None,
+    clarification_allowed: bool | None = None,
+    expected_followups: list[str] | None = None,
+    expected_route: str | None = None,
+    actual_route: str | None = None,
+    contexts: list[dict] | None = None,
+) -> ScoreCard:
+    """답변 자동 채점."""
+    facts = expected_facts or []
+    topics = expected_topics or []
+    card = ScoreCard(
+        factual_accuracy=score_factual_accuracy(answer, facts),
+        completeness=score_completeness(answer, topics),
+        source_citation=score_source_citation(answer),
+        hallucination=score_hallucination(answer, facts),
+        actionability=score_actionability(answer),
+        ratio_utilization=score_ratio_utilization(answer, provided_indicators),
+        module_utilization=score_module_utilization(included_modules, expected_modules),
+        false_unavailable=score_false_unavailable(
+            answer,
+            must_not_say,
+            enabled=bool(expected_modules or facts or must_include or topics),
+        ),
+        grounding_quality=score_grounding_quality(answer, must_include, topics),
+        clarification_quality=score_clarification_quality(answer, clarification_allowed),
+        ui_language_compliance=score_ui_language_compliance(answer, forbidden_terms),
+        followup_usefulness=score_followup_usefulness(answer, expected_followups),
+        context_depth=score_context_depth(answer, contexts),
+        source_citation_precision=score_source_citation_precision(answer),
+        data_coverage=score_data_coverage(answer, included_modules, expected_modules),
+        details={
+            "includedModules": list(included_modules or []),
+            "expectedModules": list(expected_modules or []),
+            "expectedRoute": expected_route,
+            "actualRoute": actual_route,
+        },
+    )
+    card.failure_types = classify_failure_types(
+        card,
+        answer=answer,
+        included_modules=included_modules,
+        expected_modules=expected_modules,
+        expected_route=expected_route,
+        actual_route=actual_route,
+    )
+    return card